ComputeNonbondedBase.h

Go to the documentation of this file.
00001 
00007 // Several special cases are defined:
00008 //   NBTYPE: exclusion method (NBPAIR, NBSELF -- mutually exclusive)
00009 //   FULLELECT full electrostatics calculation?
00010 
00011 #ifdef ARCH_POWERPC
00012 #include <builtins.h>
00013 #endif
00014 
00015 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
00016 #include <emmintrin.h>  // We're using SSE2 intrinsics
00017 #if defined(__INTEL_COMPILER)
00018 #define __align(X) __declspec(align(X) )
00019 #elif defined(__GNUC__) || defined(__PGI)
00020 #define __align(X)  __attribute__((aligned(X) ))
00021 #else
00022 #define __align(X) __declspec(align(X) )
00023 #endif
00024 #endif
00025 
00026 #ifdef DEFINITION // (
00027   #include "LJTable.h"
00028   #include "Molecule.h"
00029   #include "ComputeNonbondedUtil.h"
00030 #endif // )
00031   #include "Parameters.h"
00032 #if NAMD_ComputeNonbonded_SortAtoms != 0
00033   #include "PatchMap.h"
00034 #endif
00035 
00036 // determining class name
00037 #undef NAME
00038 #undef CLASS
00039 #undef CLASSNAME
00040 #define NAME CLASSNAME(calc)
00041 
00042 #undef PAIR
00043 #if NBTYPE == NBPAIR
00044   #define PAIR(X) X
00045   #define CLASS ComputeNonbondedPair
00046   #define CLASSNAME(X) ENERGYNAME( X ## _pair )
00047 #else
00048   #define PAIR(X)
00049 #endif
00050 
00051 #undef SELF
00052 #if NBTYPE == NBSELF
00053   #define SELF(X) X
00054   #define CLASS ComputeNonbondedSelf
00055   #define CLASSNAME(X) ENERGYNAME( X ## _self )
00056 #else
00057   #define SELF(X)
00058 #endif
00059 
00060 #undef ENERGYNAME
00061 #undef ENERGY
00062 #undef NOENERGY
00063 #ifdef CALCENERGY
00064   #define ENERGY(X) X
00065   #define NOENERGY(X)
00066   #define ENERGYNAME(X) SLOWONLYNAME( X ## _energy )
00067 #else
00068   #define ENERGY(X)
00069   #define NOENERGY(X) X
00070   #define ENERGYNAME(X) SLOWONLYNAME( X )
00071 #endif
00072 
00073 #undef SLOWONLYNAME
00074 #undef FAST
00075 #undef NOFAST
00076 #ifdef SLOWONLY
00077   #define FAST(X)
00078   #define NOFAST(X) X
00079   #define SLOWONLYNAME(X) MERGEELECTNAME( X ## _slow )
00080 #else
00081   #define FAST(X) X
00082   #define NOFAST(X)
00083   #define SLOWONLYNAME(X) MERGEELECTNAME( X )
00084 #endif
00085 
00086 #undef MERGEELECTNAME
00087 #undef SHORT
00088 #undef NOSHORT
00089 #ifdef MERGEELECT
00090   #define SHORT(X)
00091   #define NOSHORT(X) X
00092   #define MERGEELECTNAME(X) FULLELECTNAME( X ## _merge )
00093 #else
00094   #define SHORT(X) X
00095   #define NOSHORT(X)
00096   #define MERGEELECTNAME(X) FULLELECTNAME( X )
00097 #endif
00098 
00099 #undef FULLELECTNAME
00100 #undef FULL
00101 #undef NOFULL
00102 #ifdef FULLELECT
00103   #define FULLELECTNAME(X) TABENERGYNAME( X ## _fullelect )
00104   #define FULL(X) X
00105   #define NOFULL(X)
00106 #else
00107   #define FULLELECTNAME(X) TABENERGYNAME( X )
00108   #define FULL(X)
00109   #define NOFULL(X) X
00110 #endif
00111 
00112 #undef TABENERGYNAME
00113 #undef TABENERGY
00114 #undef NOTABENERGY
00115 #ifdef TABENERGYFLAG
00116   #define TABENERGYNAME(X) FEPNAME( X ## _tabener )
00117   #define TABENERGY(X) X
00118   #define NOTABENERGY(X)
00119 #else
00120   #define TABENERGYNAME(X) FEPNAME( X )
00121   #define TABENERGY(X)
00122   #define NOTABENERGY(X) X
00123 #endif
00124 
00125 // Here are what these macros stand for:
00126 // FEP/NOT_FEP: FEP free energy perturbation is active/inactive 
00127 //      (does NOT use LAM)
00128 // LES: locally-enhanced sampling is active
00129 // LAM: scale the potential by a factor lambda (except FEP)
00130 // INT: measure interaction energies
00131 // PPROF: pressure profiling
00132 
00133 #undef FEPNAME
00134 #undef FEP
00135 #undef LES
00136 #undef INT
00137 #undef PPROF
00138 #undef LAM
00139 #undef CUDA
00140 #undef ALCH
00141 #undef TI
00142 #undef GO
00143 #define FEPNAME(X) LAST( X )
00144 #define FEP(X)
00145 #define ALCHPAIR(X)
00146 #define NOT_ALCHPAIR(X) X
00147 #define LES(X)
00148 #define INT(X)
00149 #define PPROF(X)
00150 #define LAM(X)
00151 #define CUDA(X)
00152 #define ALCH(X)
00153 #define TI(X)
00154 #define GO(X)
00155 #ifdef FEPFLAG
00156   #undef FEPNAME
00157   #undef FEP
00158   #undef ALCH
00159   #define FEPNAME(X) LAST( X ## _fep )
00160   #define FEP(X) X
00161   #define ALCH(X) X
00162 #endif
00163 #ifdef TIFLAG
00164   #undef FEPNAME
00165   #undef TI
00166   #undef ALCH
00167   #define FEPNAME(X) LAST( X ## _ti )
00168   #define TI(X) X
00169   #define ALCH(X) X
00170 #endif
00171 #ifdef LESFLAG
00172   #undef FEPNAME
00173   #undef LES
00174   #undef LAM
00175   #define FEPNAME(X) LAST( X ## _les )
00176   #define LES(X) X
00177   #define LAM(X) X
00178 #endif
00179 #ifdef INTFLAG
00180   #undef FEPNAME
00181   #undef INT
00182   #define FEPNAME(X) LAST( X ## _int )
00183   #define INT(X) X
00184 #endif
00185 #ifdef PPROFFLAG
00186   #undef FEPNAME
00187   #undef INT
00188   #undef PPROF
00189   #define FEPNAME(X) LAST( X ## _pprof )
00190   #define INT(X) X
00191   #define PPROF(X) X
00192 #endif
00193 #ifdef GOFORCES
00194   #undef FEPNAME
00195   #undef GO
00196   #define FEPNAME(X) LAST( X ## _go )
00197   #define GO(X) X
00198 #endif
00199 #ifdef NAMD_CUDA
00200   #undef CUDA
00201   #define CUDA(X) X
00202 #endif
00203 
00204 #define LAST(X) X
00205 
00206 // see if things are really messed up
00207 SELF( PAIR( foo bar ) )
00208 LES( FEP( foo bar ) )
00209 LES( INT( foo bar ) )
00210 FEP( INT( foo bar ) )
00211 LAM( INT( foo bar ) )
00212 FEP( NOENERGY( foo bar ) )
00213 ENERGY( NOENERGY( foo bar ) )
00214 TABENERGY(NOTABENERGY( foo bar ) )
00215 
00216 #define KNL_MAKE_DEPENDS_INCLUDE
00217 #include  "ComputeNonbondedBase2KNL.h"
00218 #undef KNL_MAKE_DEPENDS_INCLUDE
00219 
00220 #undef KNL
00221 #undef NOKNL
00222 #ifdef NAMD_KNL
00223   #if ( TABENERGY(1+) FEP(1+) TI(1+) INT(1+) LES(1+) GO(1+) PPROF(1+) NOFAST(1+) 0 )
00224     #define KNL(X)
00225     #define NOKNL(X) X
00226   #else
00227     #define KNL(X) X
00228     #define NOKNL(X)
00229   #endif
00230 #else
00231   #define KNL(X)
00232   #define NOKNL(X) X
00233 #endif
00234 
00235 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00236   #define COMPONENT_DOTPRODUCT(A,B)  ((A##_x * B##_x) + (A##_y * B##_y) + (A##_z * B##_z))
00237 #endif
00238 
00239 
00240 // ************************************************************
00241 // function header
00242 void ComputeNonbondedUtil :: NAME
00243   ( nonbonded *params )
00244 
00245 // function body
00246 {
00247   // int NAME;  // to track errors via unused variable warnings
00248 
00249   if ( ComputeNonbondedUtil::commOnly ) return;
00250 
00251 #ifdef NAMD_CUDA
00252   NOENERGY(return;)
00253 #endif
00254 
00255   // speedup variables
00256   BigReal *reduction = params->reduction;
00257   SimParameters *simParams = params->simParameters;
00258 
00259   PPROF(
00260   BigReal *pressureProfileReduction = params->pressureProfileReduction;
00261   const BigReal invThickness = 1.0 / pressureProfileThickness;
00262   )
00263 
00264   Pairlists &pairlists = *(params->pairlists);
00265 #ifdef NAMD_CUDA
00266   int savePairlists = 0;
00267   int usePairlists = 0;
00268 #else
00269   int savePairlists = params->savePairlists;
00270   int usePairlists = params->usePairlists;
00271 #endif
00272   pairlists.reset();
00273   // PAIR(iout << "--------\n" << endi;)
00274   
00275   // BEGIN LA
00276   const int doLoweAndersen = params->doLoweAndersen;
00277   // END LA
00278 
00279   // local variables
00280   int exclChecksum = 0;
00281   FAST
00282   (
00283    // JLai
00284   ENERGY( BigReal vdwEnergy = 0;
00285           GO( BigReal groLJEnergy = 0;
00286               BigReal groGaussEnergy = 0;
00287               BigReal goEnergyNative = 0;
00288               BigReal goEnergyNonnative = 0; ) )
00289   SHORT
00290   (
00291   ENERGY( BigReal electEnergy = 0; )
00292   Vector f_net = 0.;
00293   )
00294 
00295   FEP
00296   (
00297   ENERGY( BigReal vdwEnergy_s = 0; BigReal vdwEnergy_s_Left = 0; )
00298   SHORT
00299   (
00300   ENERGY( BigReal electEnergy_s = 0; )
00301   )
00302   )
00303   )
00304     
00305 #ifndef A2_QPX
00306   FULL
00307   (
00308   ENERGY( BigReal fullElectEnergy = 0; )
00309   Vector fullf_net = 0.;
00310   FEP
00311   (
00312   ENERGY( BigReal fullElectEnergy_s = 0; )
00313   )
00314   )
00315 #else 
00316   Vector fullf_net = 0.;
00317   BigReal fullElectEnergy = 0;
00318   BigReal fullElectEnergy_s = 0;
00319 #endif
00320 
00321   // Bringing stuff into local namespace for speed.
00322   const BigReal offset_x = params->offset.x;
00323   const BigReal offset_y = params->offset.y;
00324   const BigReal offset_z = params->offset.z;
00325 
00326   // Note that offset_f assumes positions are relative to patch centers
00327   const float offset_x_f = params->offset_f.x;
00328   const float offset_y_f = params->offset_f.y;
00329   const float offset_z_f = params->offset_f.z;
00330 
00331   register const BigReal plcutoff2 = \
00332                         params->plcutoff * params->plcutoff;
00333   register const BigReal groupplcutoff2 = \
00334                         params->groupplcutoff * params->groupplcutoff;
00335   const BigReal dielectric_1 = ComputeNonbondedUtil:: dielectric_1;
00336   const LJTable* const ljTable = ComputeNonbondedUtil:: ljTable;
00337   LJTable::TableEntry ljNull;  ljNull.A = 0; ljNull.B = 0;
00338   const LJTable::TableEntry* const lj_null_pars = &ljNull;
00339   const Molecule* const mol = ComputeNonbondedUtil:: mol;
00340   SHORT
00341   (
00342   const BigReal* const table_four = ComputeNonbondedUtil:: table_short;
00343   )
00344   FULL
00345   (
00346   SHORT
00347   (
00348   const BigReal* const slow_table = ComputeNonbondedUtil:: slow_table;
00349   )
00350   NOSHORT
00351   (
00352 //#if 1 ALCH(-1)
00353   const BigReal* const table_four = ComputeNonbondedUtil:: table_noshort;
00354 //#else  // have to switch this for ALCH
00355 //  BigReal* table_four = ComputeNonbondedUtil:: table_noshort;
00356 //#endif
00357   )
00358   )
00359   BigReal scaling = ComputeNonbondedUtil:: scaling;
00360   const float scaling_f = scaling;
00361 #ifdef  A2_QPX
00362   vector4double scalingv = vec_splats(scaling);
00363 #endif
00364   const BigReal modf_mod = 1.0 - scale14;
00365   FAST
00366   (
00367   const BigReal switchOn2 = ComputeNonbondedUtil:: switchOn2;
00368   const float switchOn2_f = switchOn2;
00369   const float cutoff2_f = ComputeNonbondedUtil::cutoff2;
00370   const BigReal c1 = ComputeNonbondedUtil:: c1;
00371   const BigReal c3 = ComputeNonbondedUtil:: c3;
00372   const float c1_f = c1;
00373   const float c3_f = c3;
00374   )
00375   const BigReal r2_delta = ComputeNonbondedUtil:: r2_delta;
00376   const int r2_delta_exp = ComputeNonbondedUtil:: r2_delta_exp;
00377   // const int r2_delta_expc = 64 * (r2_delta_exp - 127);
00378   const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
00379 
00380   ALCH(
00381     const BigReal switchdist2 = ComputeNonbondedUtil::switchOn2;
00382     const BigReal cutoff2 = ComputeNonbondedUtil::cutoff2;
00383     const BigReal switchfactor = 1./((cutoff2 - switchdist2)*(cutoff2 - switchdist2)*(cutoff2 - switchdist2));
00384     const BigReal alchVdwShiftCoeff = ComputeNonbondedUtil::alchVdwShiftCoeff;
00385     const Bool vdwForceSwitching = ComputeNonbondedUtil::vdwForceSwitching;
00386     const Bool LJcorrection = ComputeNonbondedUtil::LJcorrection;
00387     const Bool Fep_WCA_repuOn = ComputeNonbondedUtil::Fep_WCA_repuOn;
00388     const Bool Fep_WCA_dispOn = ComputeNonbondedUtil::Fep_WCA_dispOn;
00389     const Bool Fep_ElecOn = ComputeNonbondedUtil::Fep_ElecOn;
00390     const Bool Fep_Wham = ComputeNonbondedUtil::Fep_Wham;
00391     const BigReal WCA_rcut1 = ComputeNonbondedUtil::WCA_rcut1;
00392     const BigReal WCA_rcut2 = ComputeNonbondedUtil::WCA_rcut2;
00393     const BigReal WCA_rcut3 = ComputeNonbondedUtil::WCA_rcut3;
00394 
00395     /* BKR - Enable dynamic lambda by passing the timestep to simParams.
00396        The SimParameters object now has all knowledge of the lambda schedule
00397        and all operations should ask _it_ for lambda, rather than recomputing
00398        it.
00399     */
00400     const BigReal alchLambda = simParams->getCurrentLambda(params->step);
00401 
00402     /*lambda values 'up' are for atoms scaled up with lambda (partition 1)*/
00403     BigReal lambdaUp = alchLambda; // needed in ComputeNonbondedBase2.h!
00404     BigReal elecLambdaUp = simParams->getElecLambda(lambdaUp);
00405     BigReal vdwLambdaUp = simParams->getVdwLambda(lambdaUp);
00406     BigReal vdwShiftUp = alchVdwShiftCoeff*(1 - vdwLambdaUp);
00407     FEP(BigReal lambda2Up = ComputeNonbondedUtil::alchLambda2;)
00408     FEP(BigReal elecLambda2Up = simParams->getElecLambda(lambda2Up);)
00409     FEP(BigReal vdwLambda2Up = simParams->getVdwLambda(lambda2Up);)
00410     FEP(BigReal vdwShift2Up = alchVdwShiftCoeff*(1 - vdwLambda2Up);)
00411 
00412     FEP( if( (Fep_Wham) && (Fep_WCA_repuOn) ) {
00413         elecLambdaUp=0.0; 
00414         vdwLambdaUp=ComputeNonbondedUtil::alchRepLambda;
00415     })
00416     FEP( if( (Fep_Wham) && (Fep_WCA_dispOn) ) {
00417         elecLambdaUp=0.0; 
00418         vdwLambdaUp=ComputeNonbondedUtil::alchDispLambda;
00419     })
00420     FEP( if( (Fep_Wham) && (Fep_ElecOn) ) {
00421         elecLambdaUp=ComputeNonbondedUtil::alchElecLambda; 
00422         vdwLambdaUp=1.0;
00423         vdwLambda2Up=1.0;
00424         vdwShiftUp = 0.0;
00425         vdwShift2Up = 0.0;
00426     })
00427         
00428     /*lambda values 'down' are for atoms scaled down with lambda (partition 2)*/
00429     BigReal lambdaDown = 1 - alchLambda; // needed in ComputeNonbondedBase2.h!
00430     BigReal elecLambdaDown = simParams->getElecLambda(lambdaDown);
00431     BigReal vdwLambdaDown = simParams->getVdwLambda(lambdaDown);
00432     BigReal vdwShiftDown = alchVdwShiftCoeff*(1 - vdwLambdaDown);
00433     FEP(BigReal lambda2Down = 1 - ComputeNonbondedUtil::alchLambda2;)
00434     FEP(BigReal elecLambda2Down = simParams->getElecLambda(lambda2Down);)
00435     FEP(BigReal vdwLambda2Down = simParams->getVdwLambda(lambda2Down);)
00436     FEP(BigReal vdwShift2Down = alchVdwShiftCoeff*(1 - vdwLambda2Down);)
00437 
00438     FEP( if( (Fep_Wham) && (Fep_WCA_repuOn) ) {
00439         elecLambdaDown=0.0; 
00440         vdwLambdaDown = 1.0 - ComputeNonbondedUtil::alchRepLambda;
00441     })
00442     FEP( if( (Fep_Wham) && (Fep_WCA_dispOn) ) {
00443         elecLambdaDown=0.0; 
00444         vdwLambdaDown = 1.0 - ComputeNonbondedUtil::alchDispLambda;
00445     })
00446     FEP( if( (Fep_Wham) && (Fep_ElecOn) ) {
00447         elecLambdaDown = 1.0 - ComputeNonbondedUtil::alchElecLambda; 
00448         vdwLambdaDown = 1.0;
00449         vdwLambda2Down = 1.0;
00450         vdwShiftDown = 0.0;
00451         vdwShift2Down = 0.0;
00452     })
00453   
00454   // Thermodynamic Integration Notes: 
00455   // Separation of atom pairs into different pairlist according to lambda
00456   // coupling, for alchemical free energy calculations. Normal pairlists
00457   // (pairlist[nxm]_save) are re-used for non-lambda-coupled pairs; new ones
00458   // (pairlist[nxm][12]_save are created for atoms switched up or down with
00459   // lambda respectively.
00460   // This makes TI coding far easier and more readable, since it's necessary 
00461   // to store dU/dlambda in different variables depending on whether atoms are
00462   // being switched up or down. Further, it allows the prior explicit coding of 
00463   // the separation-shifted vdW potential to stay in place without a big 
00464   // performance hit, since it's no longer necessary to calculate vdW potentials
00465   // explicity for the bulk (non-alchemical) interactions - so that part of the 
00466   // free energy code stays readable and easy to modify. Finally there should
00467   // be some performance gain over the old FEP implementation as we no
00468   // longer have to check the partitions of each atom pair and switch
00469   // parameters accordingly for every single NonbondedBase2.h loop - this is 
00470   // done at the pairlist level
00471   
00472   int pswitchTable[3*3];
00473   // determines which pairlist alchemical pairings get sent to
00474   // 0: non-alchemical pairs, partition 0 <-> partition 0
00475   // 1: atoms scaled up as lambda increases, p0<->p1
00476   // 2: atoms scaled down as lambda increases, p0<->p2
00477   // all p1<->p2 interactions to be dropped (99)
00478   // in general, 'up' refers to 1, 'down' refers to 2
00479   for (int ip=0; ip<3; ++ip) {
00480     for (int jp=0; jp<3; ++jp ) {
00481       pswitchTable[ip+3*jp] = 0;
00482       if ((ip==1 && jp==0) || (ip==0 && jp==1)) pswitchTable[ip+3*jp] = 1;
00483       if ((ip==2 && jp==0) || (ip==0 && jp==2)) pswitchTable[ip+3*jp] = 2;
00484       if (ip + jp == 3) pswitchTable[ip+3*jp] = 99; // no interaction
00485 
00486       if (! ComputeNonbondedUtil::alchDecouple) {
00487         // no decoupling: interactions within a partition are treated like
00488         // normal alchemical pairs
00489         if (ip == 1 && jp == 1) pswitchTable[ip+3*jp] = 1;
00490         if (ip == 2 && jp == 2) pswitchTable[ip+3*jp] = 2;
00491       }
00492       if (ComputeNonbondedUtil::alchDecouple) {
00493         // decoupling: PME calculates extra grids so that while PME 
00494         // interaction with the full system is switched off, a new PME grid
00495         // containing only alchemical atoms is switched on. Full interactions 
00496         // between alchemical atoms are maintained; potentials within one 
00497         // partition need not be scaled here.
00498         if (ip == 1 && jp == 1) pswitchTable[ip+3*jp] = 0;
00499         if (ip == 2 && jp == 2) pswitchTable[ip+3*jp] = 0;
00500       }
00501     }
00502   }
00503 
00504   // dU/dlambda variables for thermodynamic integration
00505   TI(
00506       BigReal vdwEnergy_ti_1 = 0;
00507       BigReal vdwEnergy_ti_2 = 0;
00508       SHORT(BigReal electEnergy_ti_1 = 0;
00509       BigReal electEnergy_ti_2 = 0;)
00510       FULL(BigReal fullElectEnergy_ti_1 = 0; 
00511       BigReal fullElectEnergy_ti_2 = 0;) 
00512    )
00513   )
00514         
00515         
00516   const int i_upper = params->numAtoms[0];
00517   register const int j_upper = params->numAtoms[1];
00518   register int j;
00519   register int i;
00520   const CompAtom *p_0 = params->p[0];
00521   const CompAtom *p_1 = params->p[1];
00522   KNL( const CompAtomFlt *pFlt_0 = params->pFlt[0]; )
00523   KNL( const CompAtomFlt *pFlt_1 = params->pFlt[1]; )
00524   const CompAtomExt *pExt_0 = params->pExt[0];
00525   const CompAtomExt *pExt_1 = params->pExt[1];
00526 
00527   char * excl_flags_buff = 0;
00528   const int32 * full_excl = 0;
00529   const int32 * mod_excl = 0;
00530 
00531   plint *pairlistn_save;  int npairn;
00532   plint *pairlistx_save;  int npairx;
00533   plint *pairlistm_save;  int npairm;
00534 
00535   ALCH(
00536   plint *pairlistnA1_save;  int npairnA1;
00537   plint *pairlistxA1_save;  int npairxA1;
00538   plint *pairlistmA1_save;  int npairmA1;
00539   plint *pairlistnA2_save;  int npairnA2;
00540   plint *pairlistxA2_save;  int npairxA2;
00541   plint *pairlistmA2_save;  int npairmA2;
00542   )
00543 
00544   NBWORKARRAYSINIT(params->workArrays);
00545 
00546   int arraysize = j_upper+5;
00547 
00548   NBWORKARRAY(int,pairlisti,arraysize)
00549   NBWORKARRAY(BigReal,r2list,arraysize)
00550   KNL( NBWORKARRAY(float,r2list_f,arraysize) )
00551   KNL( NBWORKARRAY(float,xlist,arraysize) )
00552   KNL( NBWORKARRAY(float,ylist,arraysize) )
00553   KNL( NBWORKARRAY(float,zlist,arraysize) )
00554 
00555   union { double f; int32 i[2]; } byte_order_test;
00556   byte_order_test.f = 1.0;  // should occupy high-order bits only
00557   int32 *r2iilist = (int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
00558 
00559   if ( ! ( savePairlists || ! usePairlists ) ) arraysize = 0;
00560 
00561 
00562   // DMK - Atom Sort
00563   // 
00564   // Basic Idea: Determine the line between the center of masses of
00565   //   the two patches.  Project and then sort the lists of atoms
00566   //   along this line.  Then, as the pairlists are being generated
00567   //   for the atoms in the first atom list, use the sorted
00568   //   list to only add atoms from the second list that are between
00569   //   +/- ~cutoff from the atoms position on the line.
00570   #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00571 
00572   // NOTE: For the first atom list, just the sort values themselves need to be
00573   // calculated (a BigReal vs. SortEntry data structure).  However, a second
00574   // SortEntry array is needed to perform the merge sort on the second list of
00575   // atoms.  Use the atomSort_[0/1]_sortValues__ arrays to sort the second
00576   // list of atoms and then use the left over array to make a version of the
00577   // list that only includes non-fixed groups (fixg).
00578 
00579   NBWORKARRAY(SortEntry, atomSort_0_sortValues__, arraysize);
00580   NBWORKARRAY(SortEntry, atomSort_1_sortValues__, arraysize);
00581   NBWORKARRAY(BigReal, p_0_sortValues, arraysize);
00582 
00583   register SortEntry* p_1_sortValues = atomSort_0_sortValues__;
00584   register SortEntry* p_1_sortValues_fixg = atomSort_1_sortValues__;
00585 
00586   int p_0_sortValues_len = 0;
00587   int p_1_sortValues_len = 0;
00588   int p_1_sortValues_fixg_len = 0;
00589 
00590   // Calculate the distance between to projected points on the line that
00591   //   represents the cutoff distance.
00592   BigReal atomSort_windowRadius = sqrt(groupplcutoff2);
00593 
00594   if (savePairlists || !usePairlists) {
00595 
00596     register const BigReal projLineVec_x = params->projLineVec.x;
00597     register const BigReal projLineVec_y = params->projLineVec.y;
00598     register const BigReal projLineVec_z = params->projLineVec.z;
00599 
00600     // Calculate the sort values for the atoms in patch 1
00601     {
00602       register int nbgs = p_1->nonbondedGroupSize;
00603       register BigReal p_x = p_1->position.x;
00604       register BigReal p_y = p_1->position.y;
00605       register BigReal p_z = p_1->position.z;
00606       register int index = 0;
00607 
00608       for (register int j = nbgs; j < j_upper; j += nbgs) {
00609 
00610         // Set p_j_next to point to the atom for the next iteration and begin
00611         //   loading the 'nbgs' value for that atom.
00612         register const CompAtom* p_j_next = p_1 + j;
00613         nbgs = p_j_next->nonbondedGroupSize;
00614 
00615         // Calculate the distance along the projection vector
00616         // NOTE: If the vector from the origin to the point is 'A' and the vector
00617         //   between the patches is 'B' then to project 'A' onto 'B' we take the dot
00618         //   product of the two vectors 'A dot B' divided by the length of 'B'.
00619         // So... projection of A onto B = (A dot B) / length(B), however, note
00620         //   that length(B) == 1, therefore the sort value is simply (A dot B)
00621         register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00622 
00623         // Start loading the next iteration's atom's position
00624         p_x = p_j_next->position.x;
00625         p_y = p_j_next->position.y;
00626         p_z = p_j_next->position.z;
00627 
00628         // Store the caclulated sort value into the array of sort values
00629         register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
00630         p_1_sortValStorePtr->index = index;
00631         p_1_sortValStorePtr->sortValue = sortVal;
00632         p_1_sortValues_len++;
00633 
00634         // Update index for the next iteration
00635         index = j;       
00636 
00637       } // end while (j < j_upper)
00638 
00639       register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00640 
00641       register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
00642       p_1_sortValStorePtr->index = index;
00643       p_1_sortValStorePtr->sortValue = sortVal;
00644       p_1_sortValues_len++;
00645     }
00646 
00647     // NOTE: This list and another version of it with only non-fixed
00648     //   atoms will be used in place of grouplist and fixglist.
00649     #if 0   // Selection Sort
00650       sortEntries_selectionSort(p_1_sortValues, p_1_sortValues_len);
00651     #elif 0   // Bubble Sort
00652       sortEntries_bubbleSort(p_1_sortValues, p_1_sortValues_len);
00653     #else   // Merge Sort
00654       #if NAMD_ComputeNonbonded_SortAtoms_LessBranches == 0
00655         sortEntries_mergeSort_v1(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
00656       #else
00657         sortEntries_mergeSort_v2(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
00658       #endif
00659     #endif
00660 
00661     // Calculate the sort values for the atoms in patch 0
00662     {
00663       register int nbgs = p_0->nonbondedGroupSize;
00664       register BigReal p_x = p_0->position.x + offset_x;
00665       register BigReal p_y = p_0->position.y + offset_y;
00666       register BigReal p_z = p_0->position.z + offset_z;
00667       register int index = 0;
00668 
00669       for (register int i = nbgs; i < i_upper; i += nbgs) {
00670 
00671         // Set p_i_next to point to the atom for the next iteration and begin
00672         //   loading the 'nbgs' value for that atom.
00673         register const CompAtom* p_i_next = p_0 + i;
00674         nbgs = p_i_next->nonbondedGroupSize;
00675 
00676         // Calculate the distance along the projection vector
00677         register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00678 
00679         // Start loading the next iteration's atom's position
00680         p_x = p_i_next->position.x + offset_x;
00681         p_y = p_i_next->position.y + offset_y;
00682         p_z = p_i_next->position.z + offset_z;
00683 
00684         // Store the calculated sort value into the array of sort values
00685         register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
00686         *p_0_sortValStorePtr = sortVal;
00687 
00688         // Update index for the next iteration
00689         index = i;
00690       }
00691 
00692       register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00693 
00694       register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
00695       *p_0_sortValStorePtr = sortVal;
00696 
00697       p_0_sortValues_len = i_upper;
00698     }
00699 
00700   }  // end if (savePairlists || !usePairlists)
00701 
00702   #endif  // NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00703 
00704   // Atom Sort : The grouplist and fixglist arrays are not needed when the
00705   //   the atom sorting code is in use.
00706   #if ! (NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) )
00707     NBWORKARRAY(int,grouplist,arraysize);
00708     NBWORKARRAY(int,fixglist,arraysize);
00709   #endif
00710 
00711   NBWORKARRAY(int,goodglist,arraysize);
00712   NBWORKARRAY(plint,pairlistx,arraysize);
00713   NBWORKARRAY(plint,pairlistm,arraysize);
00714   NBWORKARRAY(int,pairlist,arraysize);
00715   NBWORKARRAY(int,pairlist2,arraysize);
00716   ALCH(
00717   NBWORKARRAY(plint,pairlistnA1,arraysize);
00718   NBWORKARRAY(plint,pairlistxA1,arraysize);
00719   NBWORKARRAY(plint,pairlistmA1,arraysize);
00720   NBWORKARRAY(plint,pairlistnA2,arraysize);
00721   NBWORKARRAY(plint,pairlistxA2,arraysize);
00722   NBWORKARRAY(plint,pairlistmA2,arraysize);
00723   )
00724 
00725   int fixg_upper = 0;
00726   int g_upper = 0;
00727 
00728   if ( savePairlists || ! usePairlists ) {
00729 
00730     #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00731 
00732       // Create a sorted list of non-fixed groups
00733       register int fixg = 0;
00734       for (int tmpI = 0; tmpI < p_1_sortValues_len; tmpI++) {
00735         register SortEntry* p_1_sortEntry = p_1_sortValues + tmpI;
00736         register int p_1_index = p_1_sortEntry->index;
00737         if (!pExt_1[p_1_index].groupFixed) {
00738           register SortEntry* p_1_sortEntry_fixg = p_1_sortValues_fixg + p_1_sortValues_fixg_len;
00739           p_1_sortEntry_fixg->index = p_1_sortEntry->index;
00740           p_1_sortEntry_fixg->sortValue = p_1_sortEntry->sortValue;
00741           p_1_sortValues_fixg_len++;
00742         }
00743       }
00744 
00745     #else
00746 
00747       register int g = 0;
00748       for ( j = 0; j < j_upper; ++j ) {
00749         if ( p_1[j].nonbondedGroupSize ) {
00750           grouplist[g++] = j;
00751         }
00752       }
00753       g_upper = g;
00754       if ( g_upper ) grouplist[g_upper] = grouplist[g_upper-1];
00755       int fixg = 0;
00756 
00757       if ( fixedAtomsOn ) {
00758         for ( g = 0; g < g_upper; ++g ) {
00759           j = grouplist[g];
00760           if ( ! pExt_1[j].groupFixed ) {
00761             fixglist[fixg++] = j;
00762           }
00763         }
00764       }
00765 
00766       fixg_upper = fixg;
00767       if ( fixg_upper ) fixglist[fixg_upper] = fixglist[fixg_upper-1];
00768 
00769     #endif // NAMD_ComputeNonbonded_SortAtoms != 0
00770 
00771     pairlists.addIndex();
00772     pairlists.setIndexValue(i_upper);
00773 
00774   } else { // if ( savePairlists || ! usePairlists )
00775 
00776     if ( pairlists.getIndexValue() != i_upper )
00777       NAMD_bug("pairlist i_upper mismatch!");
00778 
00779   } // if ( savePairlists || ! usePairlists )
00780 
00781   SELF(
00782   int j_hgroup = 0;
00783   int g_lower = 0;
00784   int fixg_lower = 0;
00785   )
00786   int pairlistindex=0;
00787   int pairlistoffset=0;
00788 
00789   
00790 
00791 #if ( SHORT( FAST( 1+ ) ) 0 )
00792     Force *f_0 = params->ff[0];
00793 #if ( PAIR( 1+ ) 0 )
00794     Force *f_1 = params->ff[1];
00795 #else
00796 #define f_1 f_0
00797 #endif
00798 #endif
00799 #if ( FULL( 1+ ) 0 )
00800     Force *fullf_0 = params->fullf[0];
00801 #if ( PAIR( 1+ ) 0 )
00802     Force *fullf_1 = params->fullf[1];
00803 #else
00804 #define fullf_1 fullf_0
00805 #endif
00806 #endif
00807     
00808 
00809   int maxPart = params->numParts - 1;
00810   int groupCount = params->minPart;
00811   PAIR( 
00812     if ( savePairlists || ! usePairlists ) { 
00813       i = 0;
00814       pairlists.addIndex(); 
00815     } else {
00816       i = pairlists.getIndexValue();
00817     }
00818   )
00819    PAIR(for ( ; i < (i_upper);)) SELF(for ( i=0; i < (i_upper- 1);i++))
00820     {
00821     const CompAtom &p_i = p_0[i];
00822     KNL( const CompAtomFlt &pFlt_i = pFlt_0[i]; )
00823     const CompAtomExt &pExt_i = pExt_0[i];
00824 
00825     PAIR(if (savePairlists || ! usePairlists){)
00826     if ( p_i.hydrogenGroupSize ) {
00827       if ( groupCount ) {  // skip this group
00828         --groupCount;
00829         i += p_i.hydrogenGroupSize;
00830 #ifdef ARCH_POWERPC
00831         __dcbt((void *) &(p_0[i]));
00832 #endif
00833         SELF(--i;)
00834         continue;
00835       } else {  // compute this group
00836         groupCount = maxPart;
00837       }
00838     }
00839     PAIR(})
00840 
00841     register const BigReal p_i_x = p_i.position.x + offset_x;
00842     register const BigReal p_i_y = p_i.position.y + offset_y;
00843     register const BigReal p_i_z = p_i.position.z + offset_z;
00844 #if KNL(1)+0
00845     register const BigReal p_i_x_f = pFlt_i.position.x + offset_x_f;
00846     register const BigReal p_i_y_f = pFlt_i.position.y + offset_y_f;
00847     register const BigReal p_i_z_f = pFlt_i.position.z + offset_z_f;
00848 #endif
00849 #ifdef A2_QPX
00850     vector4double p_i_v = {p_i_x, p_i_y, p_i_z, 0.0};
00851 #endif
00852 
00853     ALCH(const int p_i_partition = p_i.partition;)
00854 
00855     PPROF(
00856         const int p_i_partition = p_i.partition;
00857         int n1 = (int)floor((p_i.position.z-pressureProfileMin)*invThickness);
00858         pp_clamp(n1, pressureProfileSlabs);
00859         )
00860 
00861   SELF ( if ( p_i.hydrogenGroupSize ) j_hgroup = i + p_i.hydrogenGroupSize; )
00862 
00863   if ( savePairlists || ! usePairlists ) {
00864 
00865     #ifdef MEM_OPT_VERSION
00866     const ExclusionCheck *exclcheck = mol->get_excl_check_for_idx(pExt_i.exclId);        
00867     const int excl_min = pExt_i.id + exclcheck->min;
00868     const int excl_max = pExt_i.id + exclcheck->max;
00869     #else
00870     const ExclusionCheck *exclcheck = mol->get_excl_check_for_atom(pExt_i.id);
00871     const int excl_min = exclcheck->min;
00872     const int excl_max = exclcheck->max;
00873     #endif
00874     const char * excl_flags_var;
00875     if ( exclcheck->flags ) excl_flags_var = exclcheck->flags - excl_min;
00876     else {  // need to build list on the fly
00877 
00878     //TODO: Should change later!!!!!!!!!! --Chao Mei
00879     //Now just for the sake of passing compilation
00880     #ifndef MEM_OPT_VERSION 
00881       if ( excl_flags_buff ) {
00882         int nl,l;
00883         nl = full_excl[0] + 1;
00884         for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = 0;
00885         nl = mod_excl[0] + 1;
00886         for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = 0;
00887       } else {
00888         excl_flags_buff = new char[mol->numAtoms];
00889         memset( (void*) excl_flags_buff, 0, mol->numAtoms);
00890       }
00891       int nl,l;
00892       full_excl = mol->get_full_exclusions_for_atom(pExt_i.id);
00893       nl = full_excl[0] + 1;
00894       for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = EXCHCK_FULL;
00895       mod_excl = mol->get_mod_exclusions_for_atom(pExt_i.id);
00896       nl = mod_excl[0] + 1;
00897       for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = EXCHCK_MOD;
00898       excl_flags_var = excl_flags_buff;
00899     #endif
00900 
00901     }
00902     const char * const excl_flags = excl_flags_var;
00903 
00904   if ( p_i.nonbondedGroupSize ) {
00905 
00906     pairlistindex = 0;  // initialize with 0 elements
00907     pairlistoffset=0;
00908     const int groupfixed = ( fixedAtomsOn && pExt_i.groupFixed );
00909 
00910     // If patch divisions are not made by hydrogen groups, then
00911     // hydrogenGroupSize is set to 1 for all atoms.  Thus we can
00912     // carry on as if we did have groups - only less efficiently.
00913     // An optimization in this case is to not rebuild the temporary
00914     // pairlist but to include every atom in it.  This should be a
00915     // a very minor expense.
00916 
00917     SELF
00918     (
00919       if ( p_i.hydrogenGroupSize ) {
00920         // exclude child hydrogens of i
00921         // j_hgroup = i + p_i.hydrogenGroupSize;  (moved above)
00922         while ( g_lower < g_upper &&
00923                 grouplist[g_lower] < j_hgroup ) ++g_lower;
00924         while ( fixg_lower < fixg_upper &&
00925                 fixglist[fixg_lower] < j_hgroup ) ++fixg_lower;
00926       }
00927       // add all child or sister hydrogens of i
00928       for ( j = i + 1; j < j_hgroup; ++j ) {
00929         pairlist[pairlistindex++] = j;
00930       }
00931     )
00932 
00933     // add remaining atoms to pairlist via hydrogen groups
00934     register int *pli = pairlist + pairlistindex;
00935 
00936     {
00937       // Atom Sort : Modify the values of g and gu based on the added information
00938       //   of the linear projections (sort values) information.
00939       #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00940 
00941         register int g = 0;
00942         register BigReal p_i_sortValue = p_0_sortValues[i];
00943         const BigReal p_i_sortValue_plus_windowRadius = p_i_sortValue + atomSort_windowRadius;
00944         register SortEntry* sortValues = ( groupfixed ? p_1_sortValues_fixg : p_1_sortValues );
00945 
00946         // Find the actual gu (upper bound in sorted list for this outer-loop atom) based on the sort values
00947         register int lower = 0;
00948         register int upper = (groupfixed ? p_1_sortValues_fixg_len : p_1_sortValues_len);
00949         while ((upper - lower) > 1) {
00950           register int j = ((lower + upper) >> 1);
00951           register BigReal jSortVal = sortValues[j].sortValue;
00952           if (jSortVal < p_i_sortValue_plus_windowRadius) {
00953             lower = j;
00954           } else {
00955             upper = j;
00956           }
00957         }
00958         const int gu = (sortValues[lower].sortValue >= p_i_sortValue_plus_windowRadius) ? lower : upper;
00959 
00960       #else
00961 
00962         register int *gli = goodglist;
00963         const int *glist = ( groupfixed ? fixglist : grouplist );
00964         SELF( const int gl = ( groupfixed ? fixg_lower : g_lower ); )
00965         const int gu = ( groupfixed ? fixg_upper : g_upper );
00966         register int g = PAIR(0) SELF(gl);
00967 
00968       #endif
00969 
00970 
00971       if ( g < gu ) {
00972         int hu = 0;
00973 #ifndef NAMD_KNL
00974 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
00975         if ( gu - g  >  6 ) { 
00976 
00977           #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00978             register SortEntry* sortEntry0 = sortValues + g;
00979             register SortEntry* sortEntry1 = sortValues + g + 1;
00980             register int jprev0 = sortEntry0->index;
00981             register int jprev1 = sortEntry1->index;
00982           #else
00983             register int jprev0 = glist[g    ];
00984             register int jprev1 = glist[g + 1];
00985           #endif
00986           
00987           register int j0;
00988           register int j1;
00989 
00990           __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
00991           __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
00992           __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
00993 
00994           // these don't change here, so we could move them into outer scope
00995           const __m128d P_I_X = _mm_set1_pd(p_i_x);
00996           const __m128d P_I_Y = _mm_set1_pd(p_i_y);
00997           const __m128d P_I_Z = _mm_set1_pd(p_i_z);
00998  
00999           g += 2;
01000           for ( ; g < gu - 2; g +=2 ) {
01001             // compute 1d distance, 2-way parallel       
01002             j0     =  jprev0;
01003             j1     =  jprev1;
01004 
01005             __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
01006             __m128d R2_01 = _mm_mul_pd(T_01, T_01);
01007             T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
01008             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01009             T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
01010             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01011             
01012             #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01013               sortEntry0 = sortValues + g;
01014               sortEntry1 = sortValues + g + 1;
01015               jprev0 = sortEntry0->index;
01016               jprev1 = sortEntry1->index;
01017             #else
01018               jprev0     =  glist[g  ];
01019               jprev1     =  glist[g+1];
01020             #endif
01021            
01022             PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01023             PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01024             PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01025 
01026             __align(16) double r2_01[2];
01027             _mm_store_pd(r2_01, R2_01); // 16-byte-aligned store
01028 
01029             // XXX these could possibly benefit from SSE-based conditionals
01030             bool test0 = ( r2_01[0] < groupplcutoff2 );
01031             bool test1 = ( r2_01[1] < groupplcutoff2 ); 
01032             
01033             //removing ifs benefits on many architectures
01034             //as the extra stores will only warm the cache up
01035             goodglist [ hu         ] = j0;
01036             goodglist [ hu + test0 ] = j1;
01037             
01038             hu += test0 + test1;
01039           }
01040           g-=2;
01041         }
01042 #elif defined (A2_QPX)
01043         if ( gu - g  >  6 ) { 
01044 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01045           register SortEntry* sortEntry0 = sortValues + g;
01046           register SortEntry* sortEntry1 = sortValues + g + 1;
01047           register int jprev0 = sortEntry0->index;
01048           register int jprev1 = sortEntry1->index;
01049           #else
01050           register int jprev0 = glist[g    ];
01051           register int jprev1 = glist[g + 1];
01052           #endif
01053           
01054           __dcbt ((void*)(p_1 + jprev0));
01055           register  int j0; 
01056           register  int j1;           
01057           vector4double    pj_v_0, pj_v_1; 
01058           vector4double    v_0, v_1;
01059           register BigReal r2_0, r2_1;
01060           
01061           pj_v_0 = vec_ld(jprev0 * sizeof(CompAtom), (BigReal *)p_1);
01062           pj_v_1 = vec_ld(jprev1 * sizeof(CompAtom), (BigReal *)p_1);  
01063           
01064           g += 2;
01065           for ( ; g < gu - 2; g +=2 ) {
01066             // compute 1d distance, 2-way parallel       
01067             j0     =  jprev0;
01068             j1     =  jprev1;
01069 
01070 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01071             sortEntry0 = sortValues + g;
01072             sortEntry1 = sortValues + g + 1;
01073             jprev0 = sortEntry0->index;
01074             jprev1 = sortEntry1->index;
01075             #else
01076             jprev0     =  glist[g  ];
01077             jprev1     =  glist[g+1];
01078             #endif
01079 
01080             v_0 = vec_sub (p_i_v, pj_v_0);
01081             v_1 = vec_sub (p_i_v, pj_v_1);
01082             v_0 = vec_mul (v_0, v_0);
01083             v_1 = vec_mul (v_1, v_1);
01084 
01085             r2_0 = vec_extract(v_0, 0) + vec_extract(v_0, 1) + vec_extract(v_0, 2);
01086             r2_1 = vec_extract(v_1, 0) + vec_extract(v_1, 1) + vec_extract(v_1, 2);
01087             
01088             pj_v_0 = vec_ld(jprev0 * sizeof(CompAtom), (BigReal *)p_1);
01089             pj_v_1 = vec_ld(jprev1 * sizeof(CompAtom), (BigReal *)p_1);  
01090             
01091             size_t test0 = ( groupplcutoff2 >  r2_0 );
01092             size_t test1 = ( groupplcutoff2 >  r2_1 ); 
01093             
01094             //removing ifs benefits on many architectures
01095             //as the extra stores will only warm the cache up
01096             goodglist [ hu         ] = j0;
01097             goodglist [ hu + test0 ] = j1;
01098             
01099             hu += test0 + test1;
01100           }
01101           g-=2;
01102         }
01103 #else
01104         if ( gu - g  >  6 ) { 
01105 
01106           #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01107             register SortEntry* sortEntry0 = sortValues + g;
01108             register SortEntry* sortEntry1 = sortValues + g + 1;
01109             register int jprev0 = sortEntry0->index;
01110             register int jprev1 = sortEntry1->index;
01111           #else
01112             register int jprev0 = glist[g    ];
01113             register int jprev1 = glist[g + 1];
01114           #endif
01115           
01116           register  int j0; 
01117           register  int j1; 
01118           
01119           register  BigReal pj_x_0, pj_x_1; 
01120           register  BigReal pj_y_0, pj_y_1; 
01121           register  BigReal pj_z_0, pj_z_1; 
01122           register  BigReal t_0, t_1, r2_0, r2_1;
01123           
01124           pj_x_0 = p_1[jprev0].position.x;
01125           pj_x_1 = p_1[jprev1].position.x;  
01126           
01127           pj_y_0 = p_1[jprev0].position.y; 
01128           pj_y_1 = p_1[jprev1].position.y;  
01129           
01130           pj_z_0 = p_1[jprev0].position.z; 
01131           pj_z_1 = p_1[jprev1].position.z;
01132           
01133           g += 2;
01134           for ( ; g < gu - 2; g +=2 ) {
01135             // compute 1d distance, 2-way parallel       
01136             j0     =  jprev0;
01137             j1     =  jprev1;
01138             
01139             t_0    =  p_i_x - pj_x_0;
01140             t_1    =  p_i_x - pj_x_1;
01141             r2_0   =  t_0 * t_0;
01142             r2_1   =  t_1 * t_1;
01143             
01144             t_0    =  p_i_y - pj_y_0;
01145             t_1    =  p_i_y - pj_y_1;
01146             r2_0  +=  t_0 * t_0;
01147             r2_1  +=  t_1 * t_1;
01148             
01149             t_0    =  p_i_z - pj_z_0;
01150             t_1    =  p_i_z - pj_z_1;
01151             r2_0  +=  t_0 * t_0;
01152             r2_1  +=  t_1 * t_1;
01153             
01154             #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01155               sortEntry0 = sortValues + g;
01156               sortEntry1 = sortValues + g + 1;
01157               jprev0 = sortEntry0->index;
01158               jprev1 = sortEntry1->index;
01159             #else
01160               jprev0     =  glist[g  ];
01161               jprev1     =  glist[g+1];
01162             #endif
01163             
01164             pj_x_0     =  p_1[jprev0].position.x;
01165             pj_x_1     =  p_1[jprev1].position.x;
01166             pj_y_0     =  p_1[jprev0].position.y; 
01167             pj_y_1     =  p_1[jprev1].position.y;
01168             pj_z_0     =  p_1[jprev0].position.z; 
01169             pj_z_1     =  p_1[jprev1].position.z;
01170             
01171             bool test0 = ( r2_0 < groupplcutoff2 );
01172             bool test1 = ( r2_1 < groupplcutoff2 ); 
01173             
01174             //removing ifs benefits on many architectures
01175             //as the extra stores will only warm the cache up
01176             goodglist [ hu         ] = j0;
01177             goodglist [ hu + test0 ] = j1;
01178             
01179             hu += test0 + test1;
01180           }
01181           g-=2;
01182         }
01183 #endif
01184 #endif // NAMD_KNL
01185         
01186         for (; g < gu; g++) {
01187 
01188           #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01189             register SortEntry* sortEntry = sortValues + g;
01190             register int j = sortEntry->index;
01191           #else
01192             int j = glist[g];
01193           #endif
01194 
01195           BigReal p_j_x = p_1[j].position.x;
01196           BigReal p_j_y = p_1[j].position.y;
01197           BigReal p_j_z = p_1[j].position.z;
01198           
01199           BigReal r2 = p_i_x - p_j_x;
01200           r2 *= r2;
01201           BigReal t2 = p_i_y - p_j_y;
01202           r2 += t2 * t2;
01203           t2 = p_i_z - p_j_z;
01204           r2 += t2 * t2;
01205 
01206           if ( r2 <= groupplcutoff2 ) 
01207             goodglist[hu ++] = j; 
01208         }
01209 
01210         for ( int h=0; h<hu; ++h ) {
01211           int j = goodglist[h];
01212           int nbgs = p_1[j].nonbondedGroupSize;
01213           pli[0] = j;   // copy over the next three in any case
01214           pli[1] = j+1;
01215           pli[2] = j+2;
01216           if ( nbgs & 4 ) {  // if nbgs > 3, assume nbgs <= 5
01217             pli[3] = j+3;
01218             pli[4] = j+4;
01219           }
01220           pli += nbgs;
01221         }
01222       }
01223     }
01224 
01225     pairlistindex = pli - pairlist;
01226     // make sure padded element on pairlist points to real data
01227     if ( pairlistindex ) {
01228        pairlist[pairlistindex] = pairlist[pairlistindex-1];
01229     } PAIR( else {  // skip empty loops if no pairs were found
01230        i += p_i.nonbondedGroupSize;
01231        continue;
01232     } )
01233   } // if i is nonbonded group parent
01234   SELF
01235     (
01236     // self-comparisions require list to be incremented
01237     // pair-comparisions use entire list (pairlistoffset is 0)
01238     else pairlistoffset++;
01239     )
01240 
01241     const int atomfixed = ( fixedAtomsOn && pExt_i.atomFixed );
01242 
01243     register int *pli = pairlist2;
01244 
01245     plint *pairlistn = pairlists.newlist(j_upper + 5 + 5 ALCH( + 20 ));
01246     register plint *plin = pairlistn;
01247 
01248     
01249     if ( qmForcesOn ) {
01250         
01251         Real qmGroup_i = mol->get_qmAtomGroup(pExt_i.id);
01252         
01253         for (int k=pairlistoffset; k<pairlistindex; k++) {
01254           j = pairlist[k];
01255           
01256           Real qmGroup_j = mol->get_qmAtomGroup(pExt_1[j].id);
01257           
01258           // There can be no non-bonded interaction between an QM-QM pair of the same QM group
01259           if (qmGroup_i > 0) {
01260               if (qmGroup_i == qmGroup_j) {
01261                 continue;
01262               } else {
01263                   *(pli++) = j;
01264               }
01265           } else {
01266               *(pli++) = j;
01267           }
01268         }
01269       
01270       int npair2_int = pli - pairlist2;
01271       pli = pairlist2;
01272       for (int k=0; k<npair2_int; k++) {
01273           
01274         j = pairlist2[k];
01275         
01276         BigReal p_j_x = p_1[j].position.x;
01277         BigReal r2 = p_i_x - p_j_x;
01278         r2 *= r2;
01279         BigReal p_j_y = p_1[j].position.y;
01280         BigReal t2 = p_i_y - p_j_y;
01281         r2 += t2 * t2;
01282         BigReal p_j_z = p_1[j].position.z;
01283         t2 = p_i_z - p_j_z;
01284         r2 += t2 * t2;
01285         
01286         if ( ( ! (atomfixed && pExt_1[j].atomFixed) ) && (r2 <= plcutoff2) ) {
01287           int atom2 = pExt_1[j].id;
01288           if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
01289           else *(plin++) = j;
01290         }
01291       }
01292     } else
01293     
01294     
01295     
01296     INT(
01297     if ( pairInteractionOn ) {
01298       const int ifep_type = p_i.partition;
01299       if (pairInteractionSelf) {
01300         if (ifep_type != 1) { PAIR(++i;) continue; }
01301         for (int k=pairlistoffset; k<pairlistindex; k++) {
01302           j = pairlist[k];
01303           const int jfep_type = p_1[j].partition;
01304           // for pair-self, both atoms must be in group 1.
01305           if (jfep_type == 1) {
01306             *(pli++) = j;
01307           }
01308         }
01309       } else {
01310         if (ifep_type != 1 && ifep_type != 2) { PAIR(++i;) continue; }
01311         for (int k=pairlistoffset; k<pairlistindex; k++) {
01312           j = pairlist[k];
01313           const int jfep_type = p_1[j].partition;
01314           // for pair, must have one from each group.
01315           if (ifep_type + jfep_type == 3) {
01316             *(pli++) = j;
01317           }
01318         }
01319       }
01320       int npair2_int = pli - pairlist2;
01321       pli = pairlist2;
01322       for (int k=0; k<npair2_int; k++) {
01323         j = pairlist2[k];
01324         BigReal p_j_x = p_1[j].position.x;
01325         BigReal r2 = p_i_x - p_j_x;
01326         r2 *= r2;
01327         BigReal p_j_y = p_1[j].position.y;
01328         BigReal t2 = p_i_y - p_j_y;
01329         r2 += t2 * t2;
01330         BigReal p_j_z = p_1[j].position.z;
01331         t2 = p_i_z - p_j_z;
01332         r2 += t2 * t2;
01333         if ( ( ! (atomfixed && pExt_1[j].atomFixed) ) && (r2 <= plcutoff2) ) {
01334           int atom2 = pExt_1[j].id;
01335           if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
01336           else *(plin++) = j;
01337         }
01338       }
01339     } else
01340     )
01341     if ( atomfixed ) {
01342       for (int k=pairlistoffset; k<pairlistindex; k++) {
01343         j = pairlist[k];
01344         BigReal p_j_x = p_1[j].position.x;
01345         BigReal r2 = p_i_x - p_j_x;
01346         r2 *= r2;
01347         BigReal p_j_y = p_1[j].position.y;
01348         BigReal t2 = p_i_y - p_j_y;
01349         r2 += t2 * t2;
01350         BigReal p_j_z = p_1[j].position.z;
01351         t2 = p_i_z - p_j_z;
01352         r2 += t2 * t2;
01353         if ( (! pExt_1[j].atomFixed) && (r2 <= plcutoff2) ) {
01354           int atom2 = pExt_1[j].id;
01355           if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
01356           else *(plin++) = j;
01357         }
01358       }
01359     } else {
01360       int k = pairlistoffset;
01361       int ku = pairlistindex;
01362       if ( k < ku ) {
01363 #ifndef NAMD_KNL
01364 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
01365         if ( ku - k  >  6 ) {      
01366           register  int jprev0 = pairlist [k    ];
01367           register  int jprev1 = pairlist [k + 1];
01368           
01369           register  int j0; 
01370           register  int j1; 
01371 
01372           __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01373           __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01374           __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01375 
01376           // these don't change here, so we could move them into outer scope
01377           const __m128d P_I_X = _mm_set1_pd(p_i_x);
01378           const __m128d P_I_Y = _mm_set1_pd(p_i_y);
01379           const __m128d P_I_Z = _mm_set1_pd(p_i_z);
01380           
01381           int atom2_0 = pExt_1[jprev0].id;
01382           int atom2_1 = pExt_1[jprev1].id;
01383           
01384           k += 2;
01385           for ( ; k < ku - 2; k +=2 ) {
01386             // compute 1d distance, 2-way parallel       
01387             j0     =  jprev0;
01388             j1     =  jprev1;
01389             
01390             __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
01391             __m128d R2_01 = _mm_mul_pd(T_01, T_01);
01392             T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
01393             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01394             T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
01395             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01396             
01397             jprev0     =  pairlist[k];
01398             jprev1     =  pairlist[k+1];
01399             
01400             PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01401             PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01402             PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01403 
01404             __align(16) double r2_01[2];
01405             _mm_store_pd(r2_01, R2_01); // 16-byte-aligned store
01406             
01407             if (r2_01[0] <= plcutoff2) {
01408               if ( atom2_0 >= excl_min && atom2_0 <= excl_max ) 
01409                 *(pli++) = j0;
01410               else 
01411                 *(plin++) = j0;
01412             }
01413             atom2_0 = pExt_1[jprev0].id;
01414             
01415             if (r2_01[1] <= plcutoff2) {
01416               if ( atom2_1 >= excl_min && atom2_1 <= excl_max ) 
01417                 *(pli++) = j1;
01418               else 
01419                 *(plin++) = j1;
01420             }
01421             atom2_1 = pExt_1[jprev1].id;            
01422           }
01423           k-=2;
01424         }       
01425 #elif defined(A2_QPX)
01426         if ( ku - k  >  6 ) {      
01427           register  int jprev0 = pairlist [k];
01428           register  int jprev1 = pairlist [k + 1];
01429           
01430           register  int j0; 
01431           register  int j1; 
01432           vector4double    pj_v_0, pj_v_1; 
01433           vector4double    v_0, v_1;
01434           BigReal          r2_0, r2_1;
01435 
01436           pj_v_0 = vec_ld(jprev0 * sizeof(CompAtom), (BigReal *)p_1);
01437           pj_v_1 = vec_ld(jprev1 * sizeof(CompAtom), (BigReal *)p_1);  
01438           
01439           int atom2_0 = pExt_1[jprev0].id;
01440           int atom2_1 = pExt_1[jprev1].id;
01441           
01442           k += 2;
01443           for ( ; k < ku - 2; k +=2 ) {
01444             // compute 1d distance, 2-way parallel       
01445             j0     =  jprev0;
01446             j1     =  jprev1;
01447           
01448             v_0 = vec_sub (p_i_v, pj_v_0);
01449             v_1 = vec_sub (p_i_v, pj_v_1);
01450             v_0 = vec_mul (v_0, v_0);
01451             v_1 = vec_mul (v_1, v_1);
01452 
01453             r2_0 = vec_extract(v_0, 0) + vec_extract(v_0, 1) + vec_extract(v_0, 2);
01454             r2_1 = vec_extract(v_1, 0) + vec_extract(v_1, 1) + vec_extract(v_1, 2);
01455 
01456             jprev0     =  pairlist[k];
01457             jprev1     =  pairlist[k+1];
01458             
01459             pj_v_0 = vec_ld(jprev0 * sizeof(CompAtom), (BigReal *)p_1);
01460             pj_v_1 = vec_ld(jprev1 * sizeof(CompAtom), (BigReal *)p_1);  
01461                     
01462             if (r2_0 <= plcutoff2) {
01463               if ( atom2_0 >= excl_min && atom2_0 <= excl_max ) 
01464                 *(pli++) = j0;
01465               else 
01466                 *(plin++) = j0;
01467             }
01468             atom2_0 = pExt_1[jprev0].id;
01469             
01470             if (r2_1 <= plcutoff2) {
01471               if ( atom2_1 >= excl_min && atom2_1 <= excl_max ) 
01472                 *(pli++) = j1;
01473               else 
01474                 *(plin++) = j1;
01475             }
01476             atom2_1 = pExt_1[jprev1].id;            
01477           }
01478           k-=2;
01479         }      
01480 #else
01481         if ( ku - k  >  6 ) {      
01482           register  int jprev0 = pairlist [k];
01483           register  int jprev1 = pairlist [k + 1];
01484           
01485           register  int j0; 
01486           register  int j1; 
01487           
01488           register  BigReal pj_x_0, pj_x_1; 
01489           register  BigReal pj_y_0, pj_y_1; 
01490           register  BigReal pj_z_0, pj_z_1; 
01491           register  BigReal t_0, t_1, r2_0, r2_1;
01492           
01493           pj_x_0 = p_1[jprev0].position.x;
01494           pj_x_1 = p_1[jprev1].position.x;  
01495           
01496           pj_y_0 = p_1[jprev0].position.y; 
01497           pj_y_1 = p_1[jprev1].position.y;  
01498           
01499           pj_z_0 = p_1[jprev0].position.z; 
01500           pj_z_1 = p_1[jprev1].position.z;
01501           
01502           int atom2_0 = pExt_1[jprev0].id;
01503           int atom2_1 = pExt_1[jprev1].id;
01504           
01505           k += 2;
01506           for ( ; k < ku - 2; k +=2 ) {
01507             // compute 1d distance, 2-way parallel       
01508             j0     =  jprev0;
01509             j1     =  jprev1;
01510             
01511             t_0    =  p_i_x - pj_x_0;
01512             t_1    =  p_i_x - pj_x_1;
01513             r2_0   =  t_0 * t_0;
01514             r2_1   =  t_1 * t_1;
01515             
01516             t_0    =  p_i_y - pj_y_0;
01517             t_1    =  p_i_y - pj_y_1;
01518             r2_0  +=  t_0 * t_0;
01519             r2_1  +=  t_1 * t_1;
01520             
01521             t_0    =  p_i_z - pj_z_0;
01522             t_1    =  p_i_z - pj_z_1;
01523             r2_0  +=  t_0 * t_0;
01524             r2_1  +=  t_1 * t_1;
01525             
01526             jprev0     =  pairlist[k];
01527             jprev1     =  pairlist[k+1];
01528             
01529             pj_x_0     =  p_1[jprev0].position.x;
01530             pj_x_1     =  p_1[jprev1].position.x;
01531             pj_y_0     =  p_1[jprev0].position.y; 
01532             pj_y_1     =  p_1[jprev1].position.y;
01533             pj_z_0     =  p_1[jprev0].position.z; 
01534             pj_z_1     =  p_1[jprev1].position.z;
01535             
01536             if (r2_0 <= plcutoff2) {
01537               if ( atom2_0 >= excl_min && atom2_0 <= excl_max ) 
01538                 *(pli++) = j0;
01539               else 
01540                 *(plin++) = j0;
01541             }
01542             atom2_0 = pExt_1[jprev0].id;
01543             
01544             if (r2_1 <= plcutoff2) {
01545               if ( atom2_1 >= excl_min && atom2_1 <= excl_max ) 
01546                 *(pli++) = j1;
01547               else 
01548                 *(plin++) = j1;
01549              }
01550             atom2_1 = pExt_1[jprev1].id;            
01551           }
01552           k-=2;
01553         }       
01554 #endif
01555 #endif // NAMD_KNL
01556 
01557         for (; k < ku; k++) {
01558           int j = pairlist[k];
01559           int atom2 = pExt_1[j].id;
01560           
01561           BigReal p_j_x = p_1[j].position.x;
01562           BigReal p_j_y = p_1[j].position.y;
01563           BigReal p_j_z = p_1[j].position.z;
01564           
01565           BigReal r2 = p_i_x - p_j_x;
01566           r2 *= r2;
01567           BigReal t2 = p_i_y - p_j_y;
01568           r2 += t2 * t2;
01569           t2 = p_i_z - p_j_z;
01570           r2 += t2 * t2;
01571           
01572           if (r2 <= plcutoff2) {
01573             if ( atom2 >= excl_min && atom2 <= excl_max ) 
01574               *(pli++) = j;
01575             else 
01576               *(plin++) = j;
01577           }
01578         }
01579       }
01580     }
01581 
01582     PAIR(
01583     if ( plin == pairlistn && pli == pairlist2 ) {
01584       ++i;
01585       continue;
01586     }
01587     pairlists.setIndexValue(i); 
01588     )
01589 
01590     plint *plix = pairlistx;
01591     plint *plim = pairlistm;
01592     ALCH(
01593     plint *plinA1 = pairlistnA1;
01594     plint *plixA1 = pairlistxA1;
01595     plint *plimA1 = pairlistmA1;
01596     plint *plinA2 = pairlistnA2;
01597     plint *plixA2 = pairlistxA2;
01598     plint *plimA2 = pairlistmA2;
01599     )
01600 
01601     int k;
01602 
01603 #if 0 ALCH(+1)
01604     int unsortedNpairn = plin - pairlistn;
01605     plin = pairlistn;
01606     for ( k=0; k<unsortedNpairn; ++k ) {
01607       int j = pairlistn[k];
01608       switch(pswitchTable[p_i_partition + 3*(p_1[j].partition)]) {
01609         case 0:  *(plin++) = j; break;
01610         case 1:  *(plinA1++) = j; break;
01611         case 2:  *(plinA2++) = j; break;
01612       }
01613     }
01614 #endif
01615 
01616     int npair2 = pli - pairlist2;
01617     // if ( npair2 ) pairlist2[npair2] = pairlist2[npair2-1];
01618     // removed code for implicit exclusions within hydrogen groups -JCP
01619     for (k=0; k < npair2; ++k ) {
01620       int j = pairlist2[k];
01621       int atom2 = pExt_1[j].id;
01622       int excl_flag = excl_flags[atom2];
01623       ALCH(int pswitch = pswitchTable[p_i_partition + 3*(p_1[j].partition)];)
01624       switch ( excl_flag ALCH( + 3 * pswitch)) {
01625       case 0:  *(plin++) = j;  break;
01626       case 1:  *(plix++) = j;  break;
01627       case 2:  *(plim++) = j;  break;
01628       ALCH(
01629       case 3:  *(plinA1++) = j; break;
01630       case 6:  *(plinA2++) = j; break;
01631       case 5:  *(plimA1++) = j; break;
01632       case 8:  *(plimA2++) = j; break;
01633       case 4:  *(plixA1++) = j; break;
01634       case 7:  *(plixA2++) = j; break;
01635       )
01636       }
01637     }
01638 
01639     npairn = plin - pairlistn;
01640     pairlistn_save = pairlistn;
01641     pairlistn_save[npairn] = npairn ? pairlistn_save[npairn-1] : -1;
01642     pairlists.newsize(npairn + 1);
01643 
01644     npairx = plix - pairlistx;
01645     pairlistx_save = pairlists.newlist();
01646     for ( k=0; k<npairx; ++k ) {
01647       pairlistx_save[k] = pairlistx[k];
01648     }
01649     pairlistx_save[k] = k ? pairlistx_save[k-1] : -1;
01650     pairlists.newsize(npairx + 1);
01651 
01652     npairm = plim - pairlistm;
01653     pairlistm_save = pairlists.newlist();
01654     for ( k=0; k<npairm; ++k ) {
01655       pairlistm_save[k] = pairlistm[k];
01656     }
01657     pairlistm_save[k] = k ? pairlistm_save[k-1] : -1;
01658     pairlists.newsize(npairm + 1);
01659 
01660 
01661 #if 0 ALCH(+1)
01662 #define PAIRLISTFROMARRAY(NPAIRS,PL1,PL2,PLSAVE) \
01663 { \
01664   (NPAIRS) = (PL2) - (PL1); \
01665   (PLSAVE) = pairlists.newlist(); \
01666   for ( k=0; k<(NPAIRS); ++k ) { \
01667     (PLSAVE)[k] = (PL1)[k]; \
01668   } \
01669   (PLSAVE)[k] = k ? (PLSAVE)[k-1] : -1; \
01670   pairlists.newsize((NPAIRS) + 1); \
01671 }
01672   
01673     PAIRLISTFROMARRAY(npairnA1,pairlistnA1,plinA1,pairlistnA1_save);
01674     PAIRLISTFROMARRAY(npairxA1,pairlistxA1,plixA1,pairlistxA1_save);
01675     PAIRLISTFROMARRAY(npairmA1,pairlistmA1,plimA1,pairlistmA1_save);
01676     PAIRLISTFROMARRAY(npairnA2,pairlistnA2,plinA2,pairlistnA2_save);
01677     PAIRLISTFROMARRAY(npairxA2,pairlistxA2,plixA2,pairlistxA2_save);
01678     PAIRLISTFROMARRAY(npairmA2,pairlistmA2,plimA2,pairlistmA2_save);
01679 #undef PAIRLISTFROMARRAY
01680 
01681 #endif
01682     
01683     if ( ! savePairlists ) pairlists.reset();  // limit space usage
01684     PAIR( pairlists.addIndex(); )
01685     
01686         // PAIR( iout << i << " " << i_upper << " save\n" << endi;)
01687   } else { // if ( savePairlists || ! usePairlists )
01688         // PAIR( iout << i << " " << i_upper << " use\n" << endi;)
01689 
01690     pairlists.nextlist(&pairlistn_save,&npairn);  --npairn;
01691     pairlists.nextlist(&pairlistx_save,&npairx);  --npairx;
01692     pairlists.nextlist(&pairlistm_save,&npairm);  --npairm;
01693     ALCH(
01694     pairlists.nextlist(&pairlistnA1_save,&npairnA1);  --npairnA1;
01695     pairlists.nextlist(&pairlistxA1_save,&npairxA1);  --npairxA1;
01696     pairlists.nextlist(&pairlistmA1_save,&npairmA1);  --npairmA1;
01697     pairlists.nextlist(&pairlistnA2_save,&npairnA2);  --npairnA2;
01698     pairlists.nextlist(&pairlistxA2_save,&npairxA2);  --npairxA2;
01699     pairlists.nextlist(&pairlistmA2_save,&npairmA2);  --npairmA2;
01700     )
01701 
01702   } // if ( savePairlists || ! usePairlists )
01703 
01704     LES( BigReal *lambda_table_i =
01705                         lambda_table + (lesFactor+1) * p_i.partition; )
01706 
01707     INT(
01708     const BigReal force_sign = (
01709       ( pairInteractionOn && ! pairInteractionSelf ) ?
01710         ( ( p_i.partition == 1 ) ? 1. : -1. ) : 0.
01711     );
01712       
01713     )
01714 
01715     const BigReal kq_i = COULOMB * p_i.charge * scaling * dielectric_1;
01716     const LJTable::TableEntry * const lj_row =
01717                 ljTable->table_row(p_i.vdwType);
01718 
01719 #ifdef A2_QPX
01720     vector4double kq_iv = (vector4double)(0.0);
01721     vector4double f_i_v = (vector4double)(0.0);
01722     vector4double fullf_i_v = (vector4double)(0.0);
01723     vector4double full_cnst = (vector4double)(0.);
01724 #define f_i_x       vec_extract(f_i_v, 0)
01725 #define f_i_y       vec_extract(f_i_v, 1)
01726 #define f_i_z       vec_extract(f_i_v, 2)
01727 #define fullf_i_x   vec_extract(fullf_i_v, 0)
01728 #define fullf_i_y   vec_extract(fullf_i_v, 1)
01729 #define fullf_i_z   vec_extract(fullf_i_v, 2)
01730 #else
01731     SHORT( FAST( BigReal f_i_x = 0.; ) )
01732     SHORT( FAST( BigReal f_i_y = 0.; ) )
01733     SHORT( FAST( BigReal f_i_z = 0.; ) )
01734     FULL( BigReal fullf_i_x = 0.; )
01735     FULL( BigReal fullf_i_y = 0.; )
01736     FULL( BigReal fullf_i_z = 0.; )
01737 #endif
01738 
01739     int npairi;
01740     int k;
01741 
01742 #if KNL(1)+0
01743     const float kq_i_f = kq_i;
01744 
01745     npairi = pairlist_from_pairlist_knl(ComputeNonbondedUtil::cutoff2_f,
01746         p_i_x_f, p_i_y_f, p_i_z_f, pFlt_1, pairlistn_save, npairn, pairlisti,
01747         r2list_f, xlist, ylist, zlist);
01748 #else
01749     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01750         p_i_x, p_i_y, p_i_z, p_1, pairlistn_save, npairn, pairlisti,
01751         r2_delta, r2list);
01752 #endif
01753 
01754     if ( npairi ) {
01755 
01756 // BEGIN NBTHOLE OF DRUDE MODEL
01757 #if (FAST(1+)0)
01758     if (drudeNbthole && p_i.hydrogenGroupSize > 1) {
01759   
01760       Parameters *parameters = params->parameters;
01761       const NbtholePairValue * const nbthole_array = parameters->nbthole_array;
01762       const int NumNbtholePairParams = parameters->NumNbtholePairParams;
01763       BigReal drudeNbtholeCut = simParams -> drudeNbtholeCut;
01764       NOKNL( BigReal drudeNbtholeCut2 = (drudeNbtholeCut * drudeNbtholeCut) + r2_delta; )
01765       KNL( float drudeNbtholeCut2 = (drudeNbtholeCut * drudeNbtholeCut); )
01766       BigReal CC = COULOMB * scaling * dielectric_1;
01767       int kk;
01768 
01769       for (k = 0; k < npairi; k++) {
01770             NOKNL( if (r2list[k] > drudeNbtholeCut2) { continue; } )
01771             KNL( if (r2list_f[k] > drudeNbtholeCut2) { continue; } )
01772   
01773             const int j = pairlisti[k];
01774             const CompAtom& p_j = p_1[j];
01775 
01776             if ( p_j.hydrogenGroupSize < 2 ) { continue; }
01777   
01778         for (kk = 0;kk < NumNbtholePairParams; kk++) {
01779 
01780                if (((nbthole_array[kk].ind1 == p_i.vdwType) &&
01781                   (nbthole_array[kk].ind2 == p_j.vdwType)) ||
01782                   ((nbthole_array[kk].ind2 == p_i.vdwType) &&
01783                    (nbthole_array[kk].ind1 == p_j.vdwType))) {
01784                  break;
01785                }
01786         }
01787         if ( kk < NumNbtholePairParams ) {
01788     
01789                 BigReal aprod = mol->GetAtomAlpha(pExt_0[i].id) * mol->GetAtomAlpha(pExt_1[j].id);
01790                 const BigReal aa = nbthole_array[kk].tholeij * powf(aprod, -1.f/6);
01791                 const BigReal qqaa = CC * p_0[i].charge * p_1[j].charge;
01792                 const BigReal qqad = CC * p_0[i].charge * p_1[j+1].charge;
01793                 const BigReal qqda = CC * p_0[i+1].charge * p_1[j].charge;
01794                 const BigReal qqdd = CC * p_0[i+1].charge * p_1[j+1].charge;
01795 
01796                 Vector raa = (p_0[i].position + params->offset) - p_1[j].position;
01797                 Vector rad = (p_0[i].position + params->offset) - p_1[j+1].position;
01798                 Vector rda = (p_0[i+1].position + params->offset) - p_1[j].position;
01799                 Vector rdd = (p_0[i+1].position + params->offset) - p_1[j+1].position;
01800 
01801                 BigReal raa_invlen = raa.rlength();  // reciprocal of length
01802                 BigReal rad_invlen = rad.rlength();
01803                 BigReal rda_invlen = rda.rlength();
01804                 BigReal rdd_invlen = rdd.rlength();
01805 
01806                 BigReal auaa = aa / raa_invlen;
01807                 BigReal auad = aa / rad_invlen;
01808                 BigReal auda = aa / rda_invlen;
01809                 BigReal audd = aa / rdd_invlen;
01810 
01811                 BigReal expauaa = exp(-auaa);
01812                 BigReal expauad = exp(-auad);
01813                 BigReal expauda = exp(-auda);
01814                 BigReal expaudd = exp(-audd);
01815 
01816                 BigReal polyauaa = 1 + 0.5*auaa;
01817                 BigReal polyauad = 1 + 0.5*auad;
01818                 BigReal polyauda = 1 + 0.5*auda;
01819                 BigReal polyaudd = 1 + 0.5*audd;
01820 
01821                 BigReal ethole = 0;
01822                 ethole += qqaa * raa_invlen * (- polyauaa * expauaa);
01823                 ethole += qqad * rad_invlen * (- polyauad * expauad);
01824                 ethole += qqda * rda_invlen * (- polyauda * expauda);
01825                 ethole += qqdd * rdd_invlen * (- polyaudd * expaudd);
01826 
01827                 polyauaa = 1 + auaa*polyauaa;
01828                 polyauad = 1 + auad*polyauad;
01829                 polyauda = 1 + auda*polyauda;
01830                 polyaudd = 1 + audd*polyaudd;
01831 
01832                 BigReal raa_invlen3 = raa_invlen * raa_invlen * raa_invlen;
01833                 BigReal rad_invlen3 = rad_invlen * rad_invlen * rad_invlen;
01834                 BigReal rda_invlen3 = rda_invlen * rda_invlen * rda_invlen;
01835                 BigReal rdd_invlen3 = rdd_invlen * rdd_invlen * rdd_invlen;
01836 
01837                 BigReal dfaa = qqaa * raa_invlen3 * (polyauaa*expauaa);
01838                 BigReal dfad = qqad * rad_invlen3 * (polyauad*expauad);
01839                 BigReal dfda = qqda * rda_invlen3 * (polyauda*expauda);
01840                 BigReal dfdd = qqdd * rdd_invlen3 * (polyaudd*expaudd);
01841 
01842                 Vector faa = -dfaa * raa;
01843                 Vector fad = -dfad * rad;
01844                 Vector fda = -dfda * rda;
01845                 Vector fdd = -dfdd * rdd;
01846 
01847                 SHORT(f_net) NOSHORT(fullf_net) += (faa + fad) + (fda + fdd);
01848                 params->ff[0][i] += faa + fad;
01849                 params->ff[0][i+1] += fda + fdd;
01850                 params->ff[1][j] -= faa + fda;
01851                 params->ff[1][j+1] -= fad + fdd;
01852 
01853                 reduction[electEnergyIndex] += ethole;
01854           }
01855        }
01856     }
01857 #endif
01858 // END NBTHOLE OF DRUDE MODEL
01859     
01860     // BEGIN LA
01861 #if (FAST(1+)0)
01862     if (doLoweAndersen && p_i.hydrogenGroupSize) {
01863         BigReal loweAndersenCutoff = simParams->loweAndersenCutoff;
01864         BigReal loweAndersenCutoff2 = (loweAndersenCutoff * loweAndersenCutoff) + r2_delta;
01865         BigReal loweAndersenProb = simParams->loweAndersenRate * (simParams->dt * simParams->nonbondedFrequency) * 0.001; // loweAndersenRate is in 1/ps
01866         const bool loweAndersenUseCOMvelocity = (simParams->rigidBonds != RIGID_NONE);
01867         const BigReal kbT = BOLTZMANN * (simParams->loweAndersenTemp);
01868         const BigReal dt_inv = TIMEFACTOR / (simParams->dt * simParams->nonbondedFrequency);
01869         //const BigReal dt_inv = 1.0 / simParams->dt;
01870         //BigReal kbT = 8.3145e-7 * (simParams->loweAndersenTemp); // in A^2/fs^2 * K
01871         
01872         const CompAtom* v_0 = params->v[0];
01873         const CompAtom* v_1 = params->v[1];
01874         const CompAtom& v_i = v_0[i];
01875         Mass mass_i = v_i.charge;
01876         Velocity vel_i = v_i.position;
01877         Position pos_i = p_i.position;
01878         int atom_i = pExt_0[i].id;
01879         
01880         if (loweAndersenUseCOMvelocity) {
01881             Mass mass = 0;
01882             Velocity vel = 0;
01883             Position pos = 0;
01884             for (int l = 0; l < p_i.hydrogenGroupSize; l++) {
01885                 vel += v_0[i+l].charge * v_0[i+l].position;
01886                 pos += v_0[i+l].charge * p_0[i+l].position;
01887                 mass += v_0[i+l].charge;
01888             }
01889             vel_i = vel / mass;
01890             pos_i = pos / mass;
01891             mass_i = mass;
01892         }
01893         
01894         // Note v[0].charge is actually mass
01895         //Random rand(CkMyPe()); // ?? OK ?? NO!!!!
01896         Random *rand = params->random;
01897         for (k = 0; k < npairi; k++) {
01898             if (r2list[k] > loweAndersenCutoff2) { continue; }
01899                 
01900             const int j = pairlisti[k];
01901             const CompAtom& v_j = v_1[j];
01902             const CompAtom& p_j = p_1[j];
01903                 
01904             if (!p_j.hydrogenGroupSize) { continue; }
01905             if (rand->uniform() > loweAndersenProb) { continue; }
01906                 
01907             Mass mass_j = v_j.charge;
01908             Velocity vel_j = v_j.position;
01909             Position pos_j = p_j.position;
01910             int atom_j = pExt_1[j].id;
01911                 
01912             if (loweAndersenUseCOMvelocity) {
01913                 Mass mass = 0;
01914                 Velocity vel = 0;
01915                 Position pos = 0;
01916                 for (int l = 0; l < p_j.hydrogenGroupSize; l++) {
01917                     vel += v_1[j+l].charge * v_1[j+l].position;
01918                     pos += v_1[j+l].charge * p_1[j+l].position;
01919                     mass += v_1[j+l].charge;
01920                 }
01921                 vel_j = vel / mass;
01922                 pos_j = pos / mass;
01923                 mass_j = mass;
01924             }
01925                 
01926             //Velocity deltaV = v_i.position - v_j.position;
01927             Velocity deltaV = vel_i - vel_j;
01928             Mass mu_ij = (mass_i * mass_j)/(mass_i + mass_j);
01929             //Vector sep = (p_i.position + params->offset) - p_j.position;
01930             Vector sep = (pos_i + params->offset) - pos_j;
01931             Vector sigma_ij = sep.unit();
01932             BigReal lambda = rand->gaussian() * sqrt(kbT / mu_ij);
01933             Force force = mu_ij * dt_inv * (lambda - (deltaV * sigma_ij)) * sigma_ij;
01934                 
01935             //DebugM(1, "atom1 atom2 = " << atom1 << " " << atom2 << " lambda = " << lambda << " force = " << force << " deltaP = " << sep.length() << " sqrt(r2) = " << sqrt(r2list[k]) << "\n");
01936             //DebugM(1, "atom1 atom2 = " << atom_i << " " << atom_j << " mass1 mass2 = " << mass_i << " " << mass_j << " hgrp1 hgrp2 " << p_i.hydrogenGroupSize << " " << p_j.hydrogenGroupSize << "\n");
01937                 
01938             if (loweAndersenUseCOMvelocity) {
01939                 BigReal inv_mass_i = 1.0 / mass_i;
01940                 BigReal inv_mass_j = 1.0 / mass_j;
01941                 for (int l = 0; l < p_i.hydrogenGroupSize; l++) {
01942                     params->ff[0][i+l] += (v_0[i+l].charge * inv_mass_i) * force;
01943                 }
01944                 for (int l = 0; l < p_j.hydrogenGroupSize; l++) {
01945                     params->ff[1][j+l] -= (v_1[j+l].charge * inv_mass_j) * force;
01946                 }
01947             } else {
01948                 params->ff[0][i] += force;
01949                 params->ff[1][j] -= force;
01950             }
01951             SHORT(f_net) NOSHORT(fullf_net) += force;
01952         }
01953     }
01954 #endif
01955     // END LA
01956 
01957 #define NORMAL(X) X
01958 #define EXCLUDED(X)
01959 #define MODIFIED(X)
01960 #define PRAGMA_SIMD
01961 #if KNL(1+)0
01962   switch ( vdw_switch_mode ) {
01963 
01964 #define VDW_SWITCH_MODE VDW_SWITCH_MODE_ENERGY
01965     case VDW_SWITCH_MODE:
01966 #include  "ComputeNonbondedBase2KNL.h"
01967     break;
01968 #undef VDW_SWITCH_MODE
01969 
01970 #define VDW_SWITCH_MODE VDW_SWITCH_MODE_MARTINI
01971     case VDW_SWITCH_MODE:
01972 #include  "ComputeNonbondedBase2KNL.h"
01973     break;
01974 #undef VDW_SWITCH_MODE
01975 
01976 #define VDW_SWITCH_MODE VDW_SWITCH_MODE_FORCE
01977     case VDW_SWITCH_MODE:
01978 #include  "ComputeNonbondedBase2KNL.h"
01979     break;
01980 #undef VDW_SWITCH_MODE
01981 
01982   }
01983 #else
01984 #include  "ComputeNonbondedBase2.h"
01985 #endif
01986 #undef PRAGMA_SIMD
01987 #undef NORMAL
01988 #undef EXCLUDED
01989 #undef MODIFIED
01990 
01991     }  // if ( npairi )
01992 
01993     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01994         p_i_x, p_i_y, p_i_z, p_1, pairlistm_save, npairm, pairlisti,
01995         r2_delta, r2list);
01996     exclChecksum += npairi;
01997 
01998 #define NORMAL(X)
01999 #define EXCLUDED(X)
02000 #define MODIFIED(X) X
02001 #include  "ComputeNonbondedBase2.h"
02002 #undef NORMAL
02003 #undef EXCLUDED
02004 #undef MODIFIED
02005 
02006 #ifdef FULLELECT
02007     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
02008         p_i_x, p_i_y, p_i_z, p_1, pairlistx_save, npairx, pairlisti,
02009         r2_delta, r2list);
02010     exclChecksum += npairi;
02011 
02012 #undef FAST
02013 #define FAST(X)
02014 #define NORMAL(X)
02015 #define EXCLUDED(X) X
02016 #define MODIFIED(X)
02017 #include  "ComputeNonbondedBase2.h"
02018 #undef FAST
02019 #ifdef SLOWONLY
02020   #define FAST(X)
02021 #else
02022   #define FAST(X) X
02023 #endif
02024 #undef NORMAL
02025 #undef EXCLUDED
02026 #undef MODIFIED
02027 #else
02028     exclChecksum += npairx;
02029 #endif
02030 
02031 #if 0 ALCH(+1)   // nonbondedbase2 for alchemical-separeted pairlists
02032 
02033     #undef ALCHPAIR
02034     #define ALCHPAIR(X) X
02035     #undef NOT_ALCHPAIR
02036     #define NOT_ALCHPAIR(X)
02037     BigReal myLambda; FEP(BigReal myLambda2;)
02038     BigReal myElecLambda;  FEP(BigReal myElecLambda2;)
02039     BigReal myVdwLambda; FEP(BigReal myVdwLambda2;)
02040     BigReal myVdwShift; FEP(BigReal myVdwShift2;)
02041     BigReal alch_vdw_energy; BigReal alch_vdw_force;
02042     FEP(BigReal alch_vdw_energy_2; BigReal alch_vdw_energy_2_Left;) TI(BigReal alch_vdw_dUdl;)
02043     BigReal shiftedElec; BigReal shiftedElecForce;
02044     
02045     /********************************************************************/
02046     /*******NONBONDEDBASE2 FOR NORMAL INTERACTIONS SCALED BY LAMBDA******/
02047     /********************************************************************/
02048     #define NORMAL(X) X
02049     #define EXCLUDED(X)
02050     #define MODIFIED(X)
02051 
02052     #define ALCH1(X) X
02053     #define ALCH2(X)
02054     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
02055             p_i_x, p_i_y, p_i_z, p_1, pairlistnA1_save, npairnA1, pairlisti,
02056             r2_delta, r2list);
02057     #include  "ComputeNonbondedBase2.h" // normal, direction 'up'
02058     #undef ALCH1
02059     #undef ALCH2
02060 
02061     #define ALCH1(X)
02062     #define ALCH2(X) X
02063     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
02064             p_i_x, p_i_y, p_i_z, p_1, pairlistnA2_save, npairnA2, pairlisti,
02065             r2_delta, r2list);
02066     #include  "ComputeNonbondedBase2.h" // normal, direction 'down'
02067     #undef ALCH1
02068     #undef ALCH2
02069 
02070     #undef NORMAL
02071     #undef EXCLUDED
02072     #undef MODIFIED
02073     
02074     /********************************************************************/
02075     /******NONBONDEDBASE2 FOR MODIFIED INTERACTIONS SCALED BY LAMBDA*****/
02076     /********************************************************************/
02077     #define NORMAL(X)
02078     #define EXCLUDED(X)
02079     #define MODIFIED(X) X
02080 
02081     #define ALCH1(X) X
02082     #define ALCH2(X)
02083     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
02084             p_i_x, p_i_y, p_i_z, p_1, pairlistmA1_save, npairmA1, pairlisti,
02085             r2_delta, r2list);
02086         exclChecksum += npairi;
02087     #include  "ComputeNonbondedBase2.h" // modified, direction 'up'
02088     #undef ALCH1
02089     #undef ALCH2
02090 
02091     #define ALCH1(X)
02092     #define ALCH2(X) X
02093     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
02094             p_i_x, p_i_y, p_i_z, p_1, pairlistmA2_save, npairmA2, pairlisti,
02095             r2_delta, r2list);
02096         exclChecksum += npairi;
02097     #include  "ComputeNonbondedBase2.h" // modified, direction 'down'
02098     #undef ALCH1
02099     #undef ALCH2
02100 
02101     #undef NORMAL
02102     #undef EXCLUDED
02103     #undef MODIFIED
02104     
02105     /********************************************************************/
02106     /******NONBONDEDBASE2 FOR EXCLUDED INTERACTIONS SCALED BY LAMBDA*****/
02107     /********************************************************************/
02108     #ifdef FULLELECT
02109     #undef FAST
02110     #define FAST(X)
02111     #define NORMAL(X)
02112     #define EXCLUDED(X) X
02113     #define MODIFIED(X)
02114 
02115     #define ALCH1(X) X
02116     #define ALCH2(X)
02117     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
02118             p_i_x, p_i_y, p_i_z, p_1, pairlistxA1_save, npairxA1, pairlisti,
02119             r2_delta, r2list);
02120         exclChecksum += npairi;
02121     #include  "ComputeNonbondedBase2.h"  //excluded, direction 'up'
02122     #undef ALCH1
02123     #undef ALCH2
02124 
02125     #define ALCH1(X)
02126     #define ALCH2(X) X
02127     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
02128             p_i_x, p_i_y, p_i_z, p_1, pairlistxA2_save, npairxA2, pairlisti,
02129             r2_delta, r2list);
02130         exclChecksum += npairi;
02131     #include  "ComputeNonbondedBase2.h"  //excluded, direction 'down'
02132     #undef ALCH1
02133     #undef ALCH2
02134 
02135     #undef FAST
02136     #ifdef SLOWONLY
02137       #define FAST(X)
02138     #else
02139       #define FAST(X) X
02140     #endif
02141     #undef NORMAL
02142     #undef EXCLUDED
02143     #undef MODIFIED
02144     #else
02145         exclChecksum += npairxA1 + npairxA2;
02146     #endif
02147 
02148     #undef ALCHPAIR
02149     #define ALCHPAIR(X)
02150     #undef NOT_ALCHPAIR
02151     #define NOT_ALCHPAIR(X) X
02152 
02153 #endif // end nonbondedbase2.h includes for alchemical pairlists
02154 
02155 #ifdef NETWORK_PROGRESS
02156     CkNetworkProgress();
02157 #endif
02158 
02159 #ifdef ARCH_POWERPC
02160     //data cache block touch the position structure
02161     __dcbt ((void *) &(p_0[i+1]));
02162     //__prefetch_by_load ((void *)&(groupCount));
02163 #endif
02164 
02165 #ifndef NAMD_CUDA
02166     SHORT( FAST( f_net.x += f_i_x; ) )
02167     SHORT( FAST( f_net.y += f_i_y; ) )
02168     SHORT( FAST( f_net.z += f_i_z; ) )
02169     FULL( fullf_net.x += fullf_i_x; )
02170     FULL( fullf_net.y += fullf_i_y; )
02171     FULL( fullf_net.z += fullf_i_z; )
02172     SHORT( FAST( f_0[i].x += f_i_x; ) )
02173     SHORT( FAST( f_0[i].y += f_i_y; ) )
02174     SHORT( FAST( f_0[i].z += f_i_z; ) )
02175     FULL( fullf_0[i].x += fullf_i_x; )
02176     FULL( fullf_0[i].y += fullf_i_y; )
02177     FULL( fullf_0[i].z += fullf_i_z; )
02178 #endif
02179 PAIR(
02180     if ( savePairlists || ! usePairlists ) {
02181       i++;
02182     } else {
02183       i = pairlists.getIndexValue();
02184     }
02185 )
02186 
02187         // PAIR( iout << i << " " << i_upper << " end\n" << endi;)
02188   } // for i
02189 
02190   // PAIR(iout << "++++++++\n" << endi;)
02191   PAIR( if ( savePairlists ) { pairlists.setIndexValue(i); } )
02192 
02193 #ifdef f_1
02194 #undef f_1
02195 #endif
02196 #if ( SHORT( FAST( 1+ ) ) 0 )
02197 #if ( PAIR( 1+ ) 0 )
02198   {
02199 #ifndef NAMD_CUDA
02200     BigReal virial_xx = f_net.x * params->offset_f.x;
02201     BigReal virial_xy = f_net.x * params->offset_f.y;
02202     BigReal virial_xz = f_net.x * params->offset_f.z;
02203     BigReal virial_yy = f_net.y * params->offset_f.y;
02204     BigReal virial_yz = f_net.y * params->offset_f.z;
02205     BigReal virial_zz = f_net.z * params->offset_f.z;
02206 
02207     reduction[virialIndex_XX] += virial_xx;
02208     reduction[virialIndex_XY] += virial_xy;
02209     reduction[virialIndex_XZ] += virial_xz;
02210     reduction[virialIndex_YX] += virial_xy;
02211     reduction[virialIndex_YY] += virial_yy;
02212     reduction[virialIndex_YZ] += virial_yz;
02213     reduction[virialIndex_ZX] += virial_xz;
02214     reduction[virialIndex_ZY] += virial_yz;
02215     reduction[virialIndex_ZZ] += virial_zz;
02216 #endif
02217   }
02218 #endif
02219 #endif
02220 
02221 #ifdef fullf_1
02222 #undef fullf_1
02223 #endif
02224 #if ( FULL( 1+ ) 0 )
02225 #if ( PAIR( 1+ ) 0 )
02226   {
02227 #ifndef NAMD_CUDA
02228     BigReal fullElectVirial_xx = fullf_net.x * params->offset_f.x;
02229     BigReal fullElectVirial_xy = fullf_net.x * params->offset_f.y;
02230     BigReal fullElectVirial_xz = fullf_net.x * params->offset_f.z;
02231     BigReal fullElectVirial_yy = fullf_net.y * params->offset_f.y;
02232     BigReal fullElectVirial_yz = fullf_net.y * params->offset_f.z;
02233     BigReal fullElectVirial_zz = fullf_net.z * params->offset_f.z;
02234 
02235     reduction[fullElectVirialIndex_XX] += fullElectVirial_xx;
02236     reduction[fullElectVirialIndex_XY] += fullElectVirial_xy;
02237     reduction[fullElectVirialIndex_XZ] += fullElectVirial_xz;
02238     reduction[fullElectVirialIndex_YX] += fullElectVirial_xy;
02239     reduction[fullElectVirialIndex_YY] += fullElectVirial_yy;
02240     reduction[fullElectVirialIndex_YZ] += fullElectVirial_yz;
02241     reduction[fullElectVirialIndex_ZX] += fullElectVirial_xz;
02242     reduction[fullElectVirialIndex_ZY] += fullElectVirial_yz;
02243     reduction[fullElectVirialIndex_ZZ] += fullElectVirial_zz;
02244 #endif
02245   }
02246 #endif
02247 #endif
02248 
02249 #ifndef NAMD_CUDA
02250   reduction[exclChecksumIndex] += exclChecksum;
02251 #endif
02252   FAST
02253   (
02254    // JLai
02255   ENERGY( reduction[vdwEnergyIndex] += vdwEnergy;
02256           GO( reduction[groLJEnergyIndex] += groLJEnergy;
02257               reduction[groGaussEnergyIndex] += groGaussEnergy;
02258               reduction[goNativeEnergyIndex] += goEnergyNative;
02259               reduction[goNonnativeEnergyIndex] += goEnergyNonnative; ) )
02260   SHORT
02261   (
02262   ENERGY( reduction[electEnergyIndex] += electEnergy; )
02263   )
02264   ALCH
02265   (
02266     TI(reduction[vdwEnergyIndex_ti_1] += vdwEnergy_ti_1;) 
02267     TI(reduction[vdwEnergyIndex_ti_2] += vdwEnergy_ti_2;) 
02268     FEP( reduction[vdwEnergyIndex_s] += vdwEnergy_s; )
02269     FEP( reduction[vdwEnergyIndex_s_Left] += vdwEnergy_s_Left; )
02270   SHORT
02271   (
02272     FEP( reduction[electEnergyIndex_s] += electEnergy_s; )
02273     TI(reduction[electEnergyIndex_ti_1] += electEnergy_ti_1;) 
02274     TI(reduction[electEnergyIndex_ti_2] += electEnergy_ti_2;) 
02275   )
02276   )
02277   )
02278   FULL
02279   (
02280   ENERGY( reduction[fullElectEnergyIndex] += fullElectEnergy; )
02281   ALCH
02282   (
02283     FEP( reduction[fullElectEnergyIndex_s] += fullElectEnergy_s; )
02284     TI(reduction[fullElectEnergyIndex_ti_1] += fullElectEnergy_ti_1;) 
02285     TI(reduction[fullElectEnergyIndex_ti_2] += fullElectEnergy_ti_2;) 
02286   )
02287   )
02288 
02289   delete [] excl_flags_buff;
02290 
02291 }
02292 

Generated on Thu Nov 23 01:17:11 2017 for NAMD by  doxygen 1.4.7