Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

ComputeNonbondedBase.h

Go to the documentation of this file.
00001 
00007 // Several special cases are defined:
00008 //   NBTYPE: exclusion method (NBPAIR, NBSELF -- mutually exclusive)
00009 //   FULLELECT full electrostatics calculation?
00010 
00011 #ifdef ARCH_POWERPC
00012 #include <builtins.h>
00013 #endif
00014 
00015 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
00016 #include <emmintrin.h>  // We're using SSE2 intrinsics
00017 #if defined(__INTEL_COMPILER)
00018 #define __align(X) __declspec(align(X) )
00019 #elif defined(__GNUC__) || defined(__PGI)
00020 #define __align(X)  __attribute__((aligned(X) ))
00021 #else
00022 #define __align(X) __declspec(align(X) )
00023 #endif
00024 #endif
00025 
00026 #ifdef DEFINITION // (
00027   #include "LJTable.h"
00028   #include "Molecule.h"
00029   #include "ComputeNonbondedUtil.h"
00030 #endif // )
00031   #include "Parameters.h"
00032 #if NAMD_ComputeNonbonded_SortAtoms != 0
00033   #include "PatchMap.h"
00034 #endif
00035 
00036 // determining class name
00037 #undef NAME
00038 #undef CLASS
00039 #undef CLASSNAME
00040 #define NAME CLASSNAME(calc)
00041 
00042 #undef PAIR
00043 #if NBTYPE == NBPAIR
00044   #define PAIR(X) X
00045   #define CLASS ComputeNonbondedPair
00046   #define CLASSNAME(X) ENERGYNAME( X ## _pair )
00047 #else
00048   #define PAIR(X)
00049 #endif
00050 
00051 #undef SELF
00052 #if NBTYPE == NBSELF
00053   #define SELF(X) X
00054   #define CLASS ComputeNonbondedSelf
00055   #define CLASSNAME(X) ENERGYNAME( X ## _self )
00056 #else
00057   #define SELF(X)
00058 #endif
00059 
00060 #undef ENERGYNAME
00061 #undef ENERGY
00062 #undef NOENERGY
00063 #ifdef CALCENERGY
00064   #define ENERGY(X) X
00065   #define NOENERGY(X)
00066   #define ENERGYNAME(X) SLOWONLYNAME( X ## _energy )
00067 #else
00068   #define ENERGY(X)
00069   #define NOENERGY(X) X
00070   #define ENERGYNAME(X) SLOWONLYNAME( X )
00071 #endif
00072 
00073 #undef SLOWONLYNAME
00074 #undef FAST
00075 #ifdef SLOWONLY
00076   #define FAST(X)
00077   #define SLOWONLYNAME(X) MERGEELECTNAME( X ## _slow )
00078 #else
00079   #define FAST(X) X
00080   #define SLOWONLYNAME(X) MERGEELECTNAME( X )
00081 #endif
00082 
00083 #undef MERGEELECTNAME
00084 #undef SHORT
00085 #undef NOSHORT
00086 #ifdef MERGEELECT
00087   #define SHORT(X)
00088   #define NOSHORT(X) X
00089   #define MERGEELECTNAME(X) FULLELECTNAME( X ## _merge )
00090 #else
00091   #define SHORT(X) X
00092   #define NOSHORT(X)
00093   #define MERGEELECTNAME(X) FULLELECTNAME( X )
00094 #endif
00095 
00096 #undef FULLELECTNAME
00097 #undef FULL
00098 #undef NOFULL
00099 #ifdef FULLELECT
00100   #define FULLELECTNAME(X) TABENERGYNAME( X ## _fullelect )
00101   #define FULL(X) X
00102   #define NOFULL(X)
00103 #else
00104   #define FULLELECTNAME(X) TABENERGYNAME( X )
00105   #define FULL(X)
00106   #define NOFULL(X) X
00107 #endif
00108 
00109 #undef TABENERGYNAME
00110 #undef TABENERGY
00111 #undef NOTABENERGY
00112 #ifdef TABENERGYFLAG
00113   #define TABENERGYNAME(X) FEPNAME( X ## _tabener )
00114   #define TABENERGY(X) X
00115   #define NOTABENERGY(X)
00116 #else
00117   #define TABENERGYNAME(X) FEPNAME( X )
00118   #define TABENERGY(X)
00119   #define NOTABENERGY(X) X
00120 #endif
00121 
00122 // Here are what these macros stand for:
00123 // FEP/NOT_FEP: FEP free energy perturbation is active/inactive 
00124 //      (does NOT use LAM)
00125 // LES: locally-enhanced sampling is active
00126 // LAM: scale the potential by a factor lambda (except FEP)
00127 // INT: measure interaction energies
00128 // PPROF: pressure profiling
00129 
00130 #undef FEPNAME
00131 #undef FEP
00132 #undef LES
00133 #undef INT
00134 #undef PPROF
00135 #undef LAM
00136 #undef CUDA
00137 #undef ALCH
00138 #undef TI
00139 #undef GO
00140 #define FEPNAME(X) LAST( X )
00141 #define FEP(X)
00142 #define ALCHPAIR(X)
00143 #define NOT_ALCHPAIR(X) X
00144 #define LES(X)
00145 #define INT(X)
00146 #define PPROF(X)
00147 #define LAM(X)
00148 #define CUDA(X)
00149 #define ALCH(X)
00150 #define TI(X)
00151 #define GO(X)
00152 #ifdef FEPFLAG
00153   #undef FEPNAME
00154   #undef FEP
00155   #undef ALCH
00156   #define FEPNAME(X) LAST( X ## _fep )
00157   #define FEP(X) X
00158   #define ALCH(X) X
00159 #endif
00160 #ifdef TIFLAG
00161   #undef FEPNAME
00162   #undef TI
00163   #undef ALCH
00164   #define FEPNAME(X) LAST( X ## _ti )
00165   #define TI(X) X
00166   #define ALCH(X) X
00167 #endif
00168 #ifdef LESFLAG
00169   #undef FEPNAME
00170   #undef LES
00171   #undef LAM
00172   #define FEPNAME(X) LAST( X ## _les )
00173   #define LES(X) X
00174   #define LAM(X) X
00175 #endif
00176 #ifdef INTFLAG
00177   #undef FEPNAME
00178   #undef INT
00179   #define FEPNAME(X) LAST( X ## _int )
00180   #define INT(X) X
00181 #endif
00182 #ifdef PPROFFLAG
00183   #undef FEPNAME
00184   #undef INT
00185   #undef PPROF
00186   #define FEPNAME(X) LAST( X ## _pprof )
00187   #define INT(X) X
00188   #define PPROF(X) X
00189 #endif
00190 #ifdef GOFORCES
00191   #undef FEPNAME
00192   #undef GO
00193   #define FEPNAME(X) LAST( X ## _go )
00194   #define GO(X) X
00195 #endif
00196 #ifdef NAMD_CUDA
00197   #undef CUDA
00198   #define CUDA(X) X
00199 #endif
00200 
00201 #define LAST(X) X
00202 
00203 // see if things are really messed up
00204 SELF( PAIR( foo bar ) )
00205 LES( FEP( foo bar ) )
00206 LES( INT( foo bar ) )
00207 FEP( INT( foo bar ) )
00208 LAM( INT( foo bar ) )
00209 FEP( NOENERGY( foo bar ) )
00210 ENERGY( NOENERGY( foo bar ) )
00211 TABENERGY(NOTABENERGY( foo bar ) )
00212 
00213 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00214   #define COMPONENT_DOTPRODUCT(A,B)  ((A##_x * B##_x) + (A##_y * B##_y) + (A##_z * B##_z))
00215 #endif
00216 
00217 
00218 // ************************************************************
00219 // function header
00220 void ComputeNonbondedUtil :: NAME
00221   ( nonbonded *params )
00222 
00223 // function body
00224 {
00225   // int NAME;  // to track errors via unused variable warnings
00226 
00227   if ( ComputeNonbondedUtil::commOnly ) return;
00228 
00229 #ifdef NAMD_CUDA
00230   NOENERGY(return;)
00231 #endif
00232 
00233   // speedup variables
00234   BigReal *reduction = params->reduction;
00235   SimParameters *simParams = Node::Object()->simParameters;
00236 
00237   PPROF(
00238   BigReal *pressureProfileReduction = params->pressureProfileReduction;
00239   const BigReal invThickness = 1.0 / pressureProfileThickness;
00240   )
00241 
00242   Pairlists &pairlists = *(params->pairlists);
00243 #ifdef NAMD_CUDA
00244   int savePairlists = 0;
00245   int usePairlists = 0;
00246 #else
00247   int savePairlists = params->savePairlists;
00248   int usePairlists = params->usePairlists;
00249 #endif
00250   pairlists.reset();
00251   // PAIR(iout << "--------\n" << endi;)
00252   
00253   // BEGIN LA
00254   const int doLoweAndersen = params->doLoweAndersen;
00255   // END LA
00256 
00257   // local variables
00258   int exclChecksum = 0;
00259   FAST
00260   (
00261    // JLai
00262   ENERGY( BigReal vdwEnergy = 0;
00263           GO( BigReal goEnergyNative = 0;
00264               BigReal goEnergyNonnative = 0; ) )
00265   SHORT
00266   (
00267   ENERGY( BigReal electEnergy = 0; )
00268   )
00269 
00270   FEP
00271   (
00272   ENERGY( BigReal vdwEnergy_s = 0; )
00273   SHORT
00274   (
00275   ENERGY( BigReal electEnergy_s = 0; )
00276   )
00277   )
00278   
00279   SHORT
00280   (
00281   BigReal virial_xx = 0;
00282   BigReal virial_xy = 0;
00283   BigReal virial_xz = 0;
00284   BigReal virial_yy = 0;
00285   BigReal virial_yz = 0;
00286   BigReal virial_zz = 0;
00287   )
00288   )
00289   FULL
00290   (
00291   ENERGY( BigReal fullElectEnergy = 0; )
00292   FEP
00293   (
00294   ENERGY( BigReal fullElectEnergy_s = 0; )
00295   )
00296   BigReal fullElectVirial_xx = 0;
00297   BigReal fullElectVirial_xy = 0;
00298   BigReal fullElectVirial_xz = 0;
00299   BigReal fullElectVirial_yy = 0;
00300   BigReal fullElectVirial_yz = 0;
00301   BigReal fullElectVirial_zz = 0;
00302   )
00303 
00304   // Bringing stuff into local namespace for speed.
00305 
00306   const BigReal offset_x = params->offset.x;
00307   const BigReal offset_y = params->offset.y;
00308   const BigReal offset_z = params->offset.z;
00309 
00310   register const BigReal plcutoff2 = \
00311                         params->plcutoff * params->plcutoff;
00312   register const BigReal groupplcutoff2 = \
00313                         params->groupplcutoff * params->groupplcutoff;
00314   const BigReal dielectric_1 = ComputeNonbondedUtil:: dielectric_1;
00315   const LJTable* const ljTable = ComputeNonbondedUtil:: ljTable;
00316   LJTable::TableEntry ljNull;  ljNull.A = 0; ljNull.B = 0;
00317   const LJTable::TableEntry* const lj_null_pars = &ljNull;
00318   const Molecule* const mol = ComputeNonbondedUtil:: mol;
00319   SHORT
00320   (
00321   const BigReal* const table_four = ComputeNonbondedUtil:: table_short;
00322   )
00323   FULL
00324   (
00325   SHORT
00326   (
00327   const BigReal* const slow_table = ComputeNonbondedUtil:: slow_table;
00328   )
00329   NOSHORT
00330   (
00331 //#if 1 ALCH(-1)
00332   const BigReal* const table_four = ComputeNonbondedUtil:: table_noshort;
00333 //#else  // have to switch this for ALCH
00334 //  BigReal* table_four = ComputeNonbondedUtil:: table_noshort;
00335 //#endif
00336   )
00337   )
00338   const BigReal scaling = ComputeNonbondedUtil:: scaling;
00339   const BigReal modf_mod = 1.0 - scale14;
00340   FAST
00341   (
00342   const BigReal switchOn2 = ComputeNonbondedUtil:: switchOn2;
00343   const BigReal c1 = ComputeNonbondedUtil:: c1;
00344   const BigReal c3 = ComputeNonbondedUtil:: c3;
00345   )
00346   const BigReal r2_delta = ComputeNonbondedUtil:: r2_delta;
00347   const int r2_delta_exp = ComputeNonbondedUtil:: r2_delta_exp;
00348   // const int r2_delta_expc = 64 * (r2_delta_exp - 127);
00349   const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
00350 
00351   ALCH(
00352     const BigReal switchdist2 = ComputeNonbondedUtil::switchOn2;
00353     const BigReal cutoff2 = ComputeNonbondedUtil::cutoff2;
00354     const BigReal switchfactor = 1./((cutoff2 - switchdist2)*(cutoff2 - switchdist2)*(cutoff2 - switchdist2));
00355     const BigReal alchElecLambdaStart = ComputeNonbondedUtil::alchElecLambdaStart;
00356     const BigReal alchVdwLambdaEnd = ComputeNonbondedUtil::alchVdwLambdaEnd;
00357     const BigReal alchVdwShiftCoeff = ComputeNonbondedUtil::alchVdwShiftCoeff;
00358     const Bool Fep_WCA_repuOn = ComputeNonbondedUtil::Fep_WCA_repuOn;
00359     const Bool Fep_WCA_dispOn = ComputeNonbondedUtil::Fep_WCA_dispOn;
00360     const BigReal WCA_rcut1 = ComputeNonbondedUtil::WCA_rcut1;
00361     const BigReal WCA_rcut2 = ComputeNonbondedUtil::WCA_rcut2;
00362 
00363     /*lambda values 'up' are for atoms scaled up with lambda (partition 1)*/
00364     BigReal lambdaUp = ComputeNonbondedUtil::alchLambda;
00365     BigReal elecLambdaUp =  (lambdaUp <= alchElecLambdaStart)? 0. : \
00366               (lambdaUp - alchElecLambdaStart) / (1. - alchElecLambdaStart);
00367     BigReal vdwLambdaUp = 
00368         (lambdaUp >= alchVdwLambdaEnd)? 1. : lambdaUp / alchVdwLambdaEnd; 
00369     BigReal vdwShiftUp = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambdaUp);
00370     FEP(BigReal lambda2Up = ComputeNonbondedUtil::alchLambda2;)
00371     FEP(BigReal elecLambda2Up = (lambda2Up <= alchElecLambdaStart)? 0. : \
00372               (lambda2Up - alchElecLambdaStart) / (1. - alchElecLambdaStart);)
00373     FEP(BigReal vdwLambda2Up = 
00374         (lambda2Up >= alchVdwLambdaEnd)? 1. : lambda2Up / alchVdwLambdaEnd;) 
00375     FEP(BigReal vdwShift2Up = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambda2Up);)
00376 
00377         
00378     /*lambda values 'down' are for atoms scaled down with lambda (partition 2)*/
00379     BigReal lambdaDown = 1 - ComputeNonbondedUtil::alchLambda;
00380     BigReal elecLambdaDown =  (lambdaDown <= alchElecLambdaStart)? 0. : \
00381               (lambdaDown - alchElecLambdaStart) / (1. - alchElecLambdaStart);
00382     BigReal vdwLambdaDown = 
00383         (lambdaDown >= alchVdwLambdaEnd)? 1. : lambdaDown / alchVdwLambdaEnd; 
00384     BigReal vdwShiftDown = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambdaDown);
00385     FEP(BigReal lambda2Down = 1 - ComputeNonbondedUtil::alchLambda2;)
00386     FEP(BigReal elecLambda2Down = (lambda2Down <= alchElecLambdaStart)? 0. : \
00387         (lambda2Down - alchElecLambdaStart) / (1. - alchElecLambdaStart); )
00388     FEP(BigReal vdwLambda2Down = 
00389         (lambda2Down >= alchVdwLambdaEnd)? 1. : lambda2Down / alchVdwLambdaEnd; )
00390     FEP(BigReal vdwShift2Down = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambda2Down);)
00391 
00392   
00393   // Thermodynamic Integration Notes: 
00394   // Separation of atom pairs into different pairlist according to lambda
00395   // coupling, for alchemical free energy calculations. Normal pairlists
00396   // (pairlist[nxm]_save) are re-used for non-lambda-coupled pairs; new ones
00397   // (pairlist[nxm][12]_save are created for atoms switched up or down with
00398   // lambda respectively.
00399   // This makes TI coding far easier and more readable, since it's necessary 
00400   // to store dU/dlambda in different variables depending on whether atoms are
00401   // being switched up or down. Further, it allows the prior explicit coding of 
00402   // the separation-shifted vdW potential to stay in place without a big 
00403   // performance hit, since it's no longer necessary to calculate vdW potentials
00404   // explicity for the bulk (non-alchemical) interactions - so that part of the 
00405   // free energy code stays readable and easy to modify. Finally there should
00406   // be some performance gain over the old FEP implementation as we no
00407   // longer have to check the partitions of each atom pair and switch
00408   // parameters accordingly for every single NonbondedBase2.h loop - this is 
00409   // done at the pairlist level
00410   
00411   int pswitchTable[3*3];
00412   // determines which pairlist alchemical pairings get sent to
00413   // 0: non-alchemical pairs, partition 0 <-> partition 0
00414   // 1: atoms scaled up as lambda increases, p0<->p1
00415   // 2: atoms scaled down as lambda increases, p0<->p2
00416   // all p1<->p2 interactions to be dropped (99)
00417   // in general, 'up' refers to 1, 'down' refers to 2
00418   for (int ip=0; ip<3; ++ip) {
00419     for (int jp=0; jp<3; ++jp ) {
00420       pswitchTable[ip+3*jp] = 0;
00421       if ((ip==1 && jp==0) || (ip==0 && jp==1)) pswitchTable[ip+3*jp] = 1;
00422       if ((ip==2 && jp==0) || (ip==0 && jp==2)) pswitchTable[ip+3*jp] = 2;
00423       if (ip + jp == 3) pswitchTable[ip+3*jp] = 99; // no interaction
00424 
00425       if (! ComputeNonbondedUtil::alchDecouple) {
00426         // no decoupling: interactions within a partition are treated like
00427         // normal alchemical pairs
00428         if (ip == 1 && jp == 1) pswitchTable[ip+3*jp] = 1;
00429         if (ip == 2 && jp == 2) pswitchTable[ip+3*jp] = 2;
00430       }
00431       if (ComputeNonbondedUtil::alchDecouple) {
00432         // decoupling: PME calculates extra grids so that while PME 
00433         // interaction with the full system is switched off, a new PME grid
00434         // containing only alchemical atoms is switched on. Full interactions 
00435         // between alchemical atoms are maintained; potentials within one 
00436         // partition need not be scaled here.
00437         if (ip == 1 && jp == 1) pswitchTable[ip+3*jp] = 0;
00438         if (ip == 2 && jp == 2) pswitchTable[ip+3*jp] = 0;
00439       }
00440     }
00441   }
00442 
00443   // dU/dlambda variables for thermodynamic integration
00444   TI(
00445       BigReal vdwEnergy_ti_1 = 0;
00446       BigReal vdwEnergy_ti_2 = 0;
00447       SHORT(BigReal electEnergy_ti_1 = 0;
00448       BigReal electEnergy_ti_2 = 0;)
00449       FULL(BigReal fullElectEnergy_ti_1 = 0; 
00450       BigReal fullElectEnergy_ti_2 = 0;) 
00451    )
00452   )
00453         
00454         
00455   const int i_upper = params->numAtoms[0];
00456   register const int j_upper = params->numAtoms[1];
00457   register int j;
00458   register int i;
00459   const CompAtom *p_0 = params->p[0];
00460   const CompAtom *p_1 = params->p[1];
00461   const CompAtomExt *pExt_0 = params->pExt[0];
00462   const CompAtomExt *pExt_1 = params->pExt[1];
00463 
00464   char * excl_flags_buff = 0;
00465   const int32 * full_excl = 0;
00466   const int32 * mod_excl = 0;
00467 
00468   plint *pairlistn_save;  int npairn;
00469   plint *pairlistx_save;  int npairx;
00470   plint *pairlistm_save;  int npairm;
00471 
00472   ALCH(
00473   plint *pairlistnA1_save;  int npairnA1;
00474   plint *pairlistxA1_save;  int npairxA1;
00475   plint *pairlistmA1_save;  int npairmA1;
00476   plint *pairlistnA2_save;  int npairnA2;
00477   plint *pairlistxA2_save;  int npairxA2;
00478   plint *pairlistmA2_save;  int npairmA2;
00479   )
00480 
00481   NBWORKARRAYSINIT(params->workArrays);
00482 
00483   int arraysize = j_upper+5;
00484 
00485   NBWORKARRAY(plint,pairlisti,arraysize)
00486   NBWORKARRAY(BigReal,r2list,arraysize)
00487 
00488   union { double f; int32 i[2]; } byte_order_test;
00489   byte_order_test.f = 1.0;  // should occupy high-order bits only
00490   int32 *r2iilist = (int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
00491 
00492   if ( ! ( savePairlists || ! usePairlists ) ) arraysize = 0;
00493 
00494 
00495   // DMK - Atom Sort
00496   // 
00497   // Basic Idea: Determine the line between the center of masses of
00498   //   the two patches.  Project and then sort the lists of atoms
00499   //   along this line.  Then, as the pairlists are being generated
00500   //   for the atoms in the first atom list, use the sorted
00501   //   list to only add atoms from the second list that are between
00502   //   +/- ~cutoff from the atoms position on the line.
00503   #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00504 
00505   // NOTE: For the first atom list, just the sort values themselves need to be
00506   // calculated (a BigReal vs. SortEntry data structure).  However, a second
00507   // SortEntry array is needed to perform the merge sort on the second list of
00508   // atoms.  Use the atomSort_[0/1]_sortValues__ arrays to sort the second
00509   // list of atoms and then use the left over array to make a version of the
00510   // list that only includes non-fixed groups (fixg).
00511 
00512   NBWORKARRAY(SortEntry, atomSort_0_sortValues__, arraysize);
00513   NBWORKARRAY(SortEntry, atomSort_1_sortValues__, arraysize);
00514   NBWORKARRAY(BigReal, p_0_sortValues, arraysize);
00515 
00516   register SortEntry* p_1_sortValues = atomSort_0_sortValues__;
00517   register SortEntry* p_1_sortValues_fixg = atomSort_1_sortValues__;
00518 
00519   int p_0_sortValues_len = 0;
00520   int p_1_sortValues_len = 0;
00521   int p_1_sortValues_fixg_len = 0;
00522 
00523   // Calculate the distance between to projected points on the line that
00524   //   represents the cutoff distance.
00525   BigReal atomSort_windowRadius = sqrt(groupplcutoff2);
00526 
00527   if (savePairlists || !usePairlists) {
00528 
00529     register const BigReal projLineVec_x = params->projLineVec.x;
00530     register const BigReal projLineVec_y = params->projLineVec.y;
00531     register const BigReal projLineVec_z = params->projLineVec.z;
00532 
00533     // Calculate the sort values for the atoms in patch 1
00534     {
00535       register int nbgs = p_1->nonbondedGroupSize;
00536       register BigReal p_x = p_1->position.x;
00537       register BigReal p_y = p_1->position.y;
00538       register BigReal p_z = p_1->position.z;
00539       register int index = 0;
00540 
00541       for (register int j = nbgs; j < j_upper; j += nbgs) {
00542 
00543         // Set p_j_next to point to the atom for the next iteration and begin
00544         //   loading the 'nbgs' value for that atom.
00545         register const CompAtom* p_j_next = p_1 + j;
00546         nbgs = p_j_next->nonbondedGroupSize;
00547 
00548         // Calculate the distance along the projection vector
00549         // NOTE: If the vector from the origin to the point is 'A' and the vector
00550         //   between the patches is 'B' then to project 'A' onto 'B' we take the dot
00551         //   product of the two vectors 'A dot B' divided by the length of 'B'.
00552         // So... projection of A onto B = (A dot B) / length(B), however, note
00553         //   that length(B) == 1, therefore the sort value is simply (A dot B)
00554         register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00555 
00556         // Start loading the next iteration's atom's position
00557         p_x = p_j_next->position.x;
00558         p_y = p_j_next->position.y;
00559         p_z = p_j_next->position.z;
00560 
00561         // Store the caclulated sort value into the array of sort values
00562         register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
00563         p_1_sortValStorePtr->index = index;
00564         p_1_sortValStorePtr->sortValue = sortVal;
00565         p_1_sortValues_len++;
00566 
00567         // Update index for the next iteration
00568         index = j;       
00569 
00570       } // end while (j < j_upper)
00571 
00572       register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00573 
00574       register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
00575       p_1_sortValStorePtr->index = index;
00576       p_1_sortValStorePtr->sortValue = sortVal;
00577       p_1_sortValues_len++;
00578     }
00579 
00580     // NOTE: This list and another version of it with only non-fixed
00581     //   atoms will be used in place of grouplist and fixglist.
00582     #if 0   // Selection Sort
00583       sortEntries_selectionSort(p_1_sortValues, p_1_sortValues_len);
00584     #elif 0   // Bubble Sort
00585       sortEntries_bubbleSort(p_1_sortValues, p_1_sortValues_len);
00586     #else   // Merge Sort
00587       #if NAMD_ComputeNonbonded_SortAtoms_LessBranches == 0
00588         sortEntries_mergeSort_v1(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
00589       #else
00590         sortEntries_mergeSort_v2(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
00591       #endif
00592     #endif
00593 
00594     // Calculate the sort values for the atoms in patch 0
00595     {
00596       register int nbgs = p_0->nonbondedGroupSize;
00597       register BigReal p_x = p_0->position.x + offset_x;
00598       register BigReal p_y = p_0->position.y + offset_y;
00599       register BigReal p_z = p_0->position.z + offset_z;
00600       register int index = 0;
00601 
00602       for (register int i = nbgs; i < i_upper; i += nbgs) {
00603 
00604         // Set p_i_next to point to the atom for the next iteration and begin
00605         //   loading the 'nbgs' value for that atom.
00606         register const CompAtom* p_i_next = p_0 + i;
00607         nbgs = p_i_next->nonbondedGroupSize;
00608 
00609         // Calculate the distance along the projection vector
00610         register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00611 
00612         // Start loading the next iteration's atom's position
00613         p_x = p_i_next->position.x + offset_x;
00614         p_y = p_i_next->position.y + offset_y;
00615         p_z = p_i_next->position.z + offset_z;
00616 
00617         // Store the calculated sort value into the array of sort values
00618         register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
00619         *p_0_sortValStorePtr = sortVal;
00620 
00621         // Update index for the next iteration
00622         index = i;
00623       }
00624 
00625       register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00626 
00627       register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
00628       *p_0_sortValStorePtr = sortVal;
00629 
00630       p_0_sortValues_len = i_upper;
00631     }
00632 
00633   }  // end if (savePairlists || !usePairlists)
00634 
00635   #endif  // NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00636 
00637   // Atom Sort : The grouplist and fixglist arrays are not needed when the
00638   //   the atom sorting code is in use.
00639   #if ! (NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) )
00640     NBWORKARRAY(plint,grouplist,arraysize);
00641     NBWORKARRAY(plint,fixglist,arraysize);
00642   #endif
00643 
00644   NBWORKARRAY(plint,goodglist,arraysize);
00645   NBWORKARRAY(plint,pairlistx,arraysize);
00646   NBWORKARRAY(plint,pairlistm,arraysize);
00647   NBWORKARRAY(plint,pairlist,arraysize);
00648   NBWORKARRAY(plint,pairlist2,arraysize);
00649   ALCH(
00650   NBWORKARRAY(plint,pairlistnA1,arraysize);
00651   NBWORKARRAY(plint,pairlistxA1,arraysize);
00652   NBWORKARRAY(plint,pairlistmA1,arraysize);
00653   NBWORKARRAY(plint,pairlistnA2,arraysize);
00654   NBWORKARRAY(plint,pairlistxA2,arraysize);
00655   NBWORKARRAY(plint,pairlistmA2,arraysize);
00656   )
00657 
00658   int fixg_upper = 0;
00659   int g_upper = 0;
00660 
00661   if ( savePairlists || ! usePairlists ) {
00662 
00663     #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00664 
00665       // Create a sorted list of non-fixed groups
00666       register int fixg = 0;
00667       for (int tmpI = 0; tmpI < p_1_sortValues_len; tmpI++) {
00668         register SortEntry* p_1_sortEntry = p_1_sortValues + tmpI;
00669         register int p_1_index = p_1_sortEntry->index;
00670         if (!pExt_1[p_1_index].groupFixed) {
00671           register SortEntry* p_1_sortEntry_fixg = p_1_sortValues_fixg + p_1_sortValues_fixg_len;
00672           p_1_sortEntry_fixg->index = p_1_sortEntry->index;
00673           p_1_sortEntry_fixg->sortValue = p_1_sortEntry->sortValue;
00674           p_1_sortValues_fixg_len++;
00675         }
00676       }
00677 
00678     #else
00679 
00680       register int g = 0;
00681       for ( j = 0; j < j_upper; ++j ) {
00682         if ( p_1[j].nonbondedGroupSize ) {
00683           grouplist[g++] = j;
00684         }
00685       }
00686       g_upper = g;
00687       if ( g_upper ) grouplist[g_upper] = grouplist[g_upper-1];
00688       int fixg = 0;
00689 
00690       if ( fixedAtomsOn ) {
00691         for ( g = 0; g < g_upper; ++g ) {
00692           j = grouplist[g];
00693           if ( ! pExt_1[j].groupFixed ) {
00694             fixglist[fixg++] = j;
00695           }
00696         }
00697       }
00698 
00699       fixg_upper = fixg;
00700       if ( fixg_upper ) fixglist[fixg_upper] = fixglist[fixg_upper-1];
00701 
00702     #endif // NAMD_ComputeNonbonded_SortAtoms != 0
00703 
00704     pairlists.addIndex();
00705     pairlists.setIndexValue(i_upper);
00706 
00707   } else { // if ( savePairlists || ! usePairlists )
00708 
00709     if ( pairlists.getIndexValue() != i_upper )
00710       NAMD_bug("pairlist i_upper mismatch!");
00711 
00712   } // if ( savePairlists || ! usePairlists )
00713 
00714   SELF(
00715   int j_hgroup = 0;
00716   int g_lower = 0;
00717   int fixg_lower = 0;
00718   )
00719   int pairlistindex=0;
00720   int pairlistoffset=0;
00721 
00722   
00723 
00724 #if ( SHORT( FAST( 1+ ) ) 0 )
00725 #if ( PAIR( 1+ ) 0 )
00726     Force *f_0 = params->ff[0];
00727     Force *f_1 = params->ff[1];
00728 #else
00729 #define f_1 f_0
00730     NBWORKARRAY(Force,f_0,i_upper)
00731     memset( (void*) f_0, 0, i_upper * sizeof(Force) );
00732 #endif
00733 #endif
00734 #if ( FULL( 1+ ) 0 )
00735 #if ( PAIR( 1+ ) 0 )
00736     Force *fullf_0 = params->fullf[0];
00737     Force *fullf_1 = params->fullf[1];
00738 #else
00739 #define fullf_1 fullf_0
00740     NBWORKARRAY(Force,fullf_0,i_upper);
00741     memset( (void*) fullf_0, 0, i_upper * sizeof(Force) );
00742 #endif
00743 #endif
00744     
00745 
00746   int maxPart = params->numParts - 1;
00747   int groupCount = params->minPart;
00748   PAIR( 
00749     if ( savePairlists || ! usePairlists ) { 
00750       i = 0;
00751       pairlists.addIndex(); 
00752     } else {
00753       i = pairlists.getIndexValue();
00754     }
00755   )
00756    PAIR(for ( ; i < (i_upper);)) SELF(for ( i=0; i < (i_upper- 1);i++))
00757     {
00758     const CompAtom &p_i = p_0[i];
00759     const CompAtomExt &pExt_i = pExt_0[i];
00760 
00761     PAIR(if (savePairlists || ! usePairlists){)
00762     if ( p_i.hydrogenGroupSize ) {
00763       if ( groupCount ) {  // skip this group
00764         --groupCount;
00765         i += p_i.hydrogenGroupSize;
00766 #ifdef ARCH_POWERPC
00767         __dcbt((void *) &(p_0[i]));
00768 #endif
00769         SELF(--i;)
00770         continue;
00771       } else {  // compute this group
00772         groupCount = maxPart;
00773       }
00774     }
00775     PAIR(})
00776 
00777     register const BigReal p_i_x = p_i.position.x + offset_x;
00778     register const BigReal p_i_y = p_i.position.y + offset_y;
00779     register const BigReal p_i_z = p_i.position.z + offset_z;
00780 
00781     ALCH(const int p_i_partition = p_i.partition;)
00782 
00783     PPROF(
00784         const int p_i_partition = p_i.partition;
00785         int n1 = (int)floor((p_i.position.z-pressureProfileMin)*invThickness);
00786         pp_clamp(n1, pressureProfileSlabs);
00787         )
00788 
00789   SELF ( if ( p_i.hydrogenGroupSize ) j_hgroup = i + p_i.hydrogenGroupSize; )
00790 
00791   if ( savePairlists || ! usePairlists ) {
00792 
00793     #ifdef MEM_OPT_VERSION
00794     const ExclusionCheck *exclcheck = mol->get_excl_check_for_idx(pExt_i.exclId);        
00795     const int excl_min = pExt_i.id + exclcheck->min;
00796     const int excl_max = pExt_i.id + exclcheck->max;
00797     #else
00798     const ExclusionCheck *exclcheck = mol->get_excl_check_for_atom(pExt_i.id);
00799     const int excl_min = exclcheck->min;
00800     const int excl_max = exclcheck->max;
00801     #endif
00802     const char * excl_flags_var;
00803     if ( exclcheck->flags ) excl_flags_var = exclcheck->flags - excl_min;
00804     else {  // need to build list on the fly
00805 
00806     //TODO: Should change later!!!!!!!!!! --Chao Mei
00807     //Now just for the sake of passing compilation
00808     #ifndef MEM_OPT_VERSION 
00809       if ( excl_flags_buff ) {
00810         int nl,l;
00811         nl = full_excl[0] + 1;
00812         for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = 0;
00813         nl = mod_excl[0] + 1;
00814         for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = 0;
00815       } else {
00816         excl_flags_buff = new char[mol->numAtoms];
00817         memset( (void*) excl_flags_buff, 0, mol->numAtoms);
00818       }
00819       int nl,l;
00820       full_excl = mol->get_full_exclusions_for_atom(pExt_i.id);
00821       nl = full_excl[0] + 1;
00822       for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = EXCHCK_FULL;
00823       mod_excl = mol->get_mod_exclusions_for_atom(pExt_i.id);
00824       nl = mod_excl[0] + 1;
00825       for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = EXCHCK_MOD;
00826       excl_flags_var = excl_flags_buff;
00827     #endif
00828 
00829     }
00830     const char * const excl_flags = excl_flags_var;
00831 
00832   if ( p_i.nonbondedGroupSize ) {
00833 
00834     pairlistindex = 0;  // initialize with 0 elements
00835     pairlistoffset=0;
00836     const int groupfixed = ( fixedAtomsOn && pExt_i.groupFixed );
00837 
00838     // If patch divisions are not made by hydrogen groups, then
00839     // hydrogenGroupSize is set to 1 for all atoms.  Thus we can
00840     // carry on as if we did have groups - only less efficiently.
00841     // An optimization in this case is to not rebuild the temporary
00842     // pairlist but to include every atom in it.  This should be a
00843     // a very minor expense.
00844 
00845     SELF
00846     (
00847       if ( p_i.hydrogenGroupSize ) {
00848         // exclude child hydrogens of i
00849         // j_hgroup = i + p_i.hydrogenGroupSize;  (moved above)
00850         while ( g_lower < g_upper &&
00851                 grouplist[g_lower] < j_hgroup ) ++g_lower;
00852         while ( fixg_lower < fixg_upper &&
00853                 fixglist[fixg_lower] < j_hgroup ) ++fixg_lower;
00854       }
00855       // add all child or sister hydrogens of i
00856       for ( j = i + 1; j < j_hgroup; ++j ) {
00857         pairlist[pairlistindex++] = j;
00858       }
00859     )
00860 
00861     // add remaining atoms to pairlist via hydrogen groups
00862     register plint *pli = pairlist + pairlistindex;
00863 
00864     {
00865       // Atom Sort : Modify the values of g and gu based on the added information
00866       //   of the linear projections (sort values) information.
00867       #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00868 
00869         register int g = 0;
00870         register BigReal p_i_sortValue = p_0_sortValues[i];
00871         const BigReal p_i_sortValue_plus_windowRadius = p_i_sortValue + atomSort_windowRadius;
00872         register SortEntry* sortValues = ( groupfixed ? p_1_sortValues_fixg : p_1_sortValues );
00873 
00874         // Find the actual gu (upper bound in sorted list for this outer-loop atom) based on the sort values
00875         register int lower = 0;
00876         register int upper = (groupfixed ? p_1_sortValues_fixg_len : p_1_sortValues_len);
00877         while ((upper - lower) > 1) {
00878           register int j = ((lower + upper) >> 1);
00879           register BigReal jSortVal = sortValues[j].sortValue;
00880           if (jSortVal < p_i_sortValue_plus_windowRadius) {
00881             lower = j;
00882           } else {
00883             upper = j;
00884           }
00885         }
00886         const int gu = (sortValues[lower].sortValue >= p_i_sortValue_plus_windowRadius) ? lower : upper;
00887 
00888       #else
00889 
00890         register plint *gli = goodglist;
00891         const plint *glist = ( groupfixed ? fixglist : grouplist );
00892         SELF( const int gl = ( groupfixed ? fixg_lower : g_lower ); )
00893         const int gu = ( groupfixed ? fixg_upper : g_upper );
00894         register int g = PAIR(0) SELF(gl);
00895 
00896       #endif
00897 
00898 
00899       if ( g < gu ) {
00900         int hu = 0;
00901 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
00902         if ( gu - g  >  6 ) { 
00903 
00904           #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00905             register SortEntry* sortEntry0 = sortValues + g;
00906             register SortEntry* sortEntry1 = sortValues + g + 1;
00907             register int jprev0 = sortEntry0->index;
00908             register int jprev1 = sortEntry1->index;
00909           #else
00910             register int jprev0 = glist[g    ];
00911             register int jprev1 = glist[g + 1];
00912           #endif
00913           
00914           register int j0;
00915           register int j1;
00916 
00917           __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
00918           __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
00919           __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
00920 
00921           // these don't change here, so we could move them into outer scope
00922           const __m128d P_I_X = _mm_set1_pd(p_i_x);
00923           const __m128d P_I_Y = _mm_set1_pd(p_i_y);
00924           const __m128d P_I_Z = _mm_set1_pd(p_i_z);
00925  
00926           g += 2;
00927           for ( ; g < gu - 2; g +=2 ) {
00928             // compute 1d distance, 2-way parallel       
00929             j0     =  jprev0;
00930             j1     =  jprev1;
00931 
00932             __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
00933             __m128d R2_01 = _mm_mul_pd(T_01, T_01);
00934             T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
00935             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
00936             T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
00937             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
00938             
00939             #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00940               sortEntry0 = sortValues + g;
00941               sortEntry1 = sortValues + g + 1;
00942               jprev0 = sortEntry0->index;
00943               jprev1 = sortEntry1->index;
00944             #else
00945               jprev0     =  glist[g  ];
00946               jprev1     =  glist[g+1];
00947             #endif
00948            
00949             PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
00950             PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
00951             PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
00952 
00953             __align(16) double r2_01[2];
00954             _mm_store_pd(r2_01, R2_01); // 16-byte-aligned store
00955 
00956             // XXX these could possibly benefit from SSE-based conditionals
00957             bool test0 = ( r2_01[0] < groupplcutoff2 );
00958             bool test1 = ( r2_01[1] < groupplcutoff2 ); 
00959             
00960             //removing ifs benefits on many architectures
00961             //as the extra stores will only warm the cache up
00962             goodglist [ hu         ] = j0;
00963             goodglist [ hu + test0 ] = j1;
00964             
00965             hu += test0 + test1;
00966           }
00967           g-=2;
00968         }
00969 #else
00970         if ( gu - g  >  6 ) { 
00971 
00972           #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00973             register SortEntry* sortEntry0 = sortValues + g;
00974             register SortEntry* sortEntry1 = sortValues + g + 1;
00975             register int jprev0 = sortEntry0->index;
00976             register int jprev1 = sortEntry1->index;
00977           #else
00978             register int jprev0 = glist[g    ];
00979             register int jprev1 = glist[g + 1];
00980           #endif
00981           
00982           register  int j0; 
00983           register  int j1; 
00984           
00985           register  BigReal pj_x_0, pj_x_1; 
00986           register  BigReal pj_y_0, pj_y_1; 
00987           register  BigReal pj_z_0, pj_z_1; 
00988           register  BigReal t_0, t_1, r2_0, r2_1;
00989           
00990           pj_x_0 = p_1[jprev0].position.x;
00991           pj_x_1 = p_1[jprev1].position.x;  
00992           
00993           pj_y_0 = p_1[jprev0].position.y; 
00994           pj_y_1 = p_1[jprev1].position.y;  
00995           
00996           pj_z_0 = p_1[jprev0].position.z; 
00997           pj_z_1 = p_1[jprev1].position.z;
00998           
00999           g += 2;
01000           for ( ; g < gu - 2; g +=2 ) {
01001             // compute 1d distance, 2-way parallel       
01002             j0     =  jprev0;
01003             j1     =  jprev1;
01004             
01005             t_0    =  p_i_x - pj_x_0;
01006             t_1    =  p_i_x - pj_x_1;
01007             r2_0   =  t_0 * t_0;
01008             r2_1   =  t_1 * t_1;
01009             
01010             t_0    =  p_i_y - pj_y_0;
01011             t_1    =  p_i_y - pj_y_1;
01012             r2_0  +=  t_0 * t_0;
01013             r2_1  +=  t_1 * t_1;
01014             
01015             t_0    =  p_i_z - pj_z_0;
01016             t_1    =  p_i_z - pj_z_1;
01017             r2_0  +=  t_0 * t_0;
01018             r2_1  +=  t_1 * t_1;
01019             
01020             #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01021               sortEntry0 = sortValues + g;
01022               sortEntry1 = sortValues + g + 1;
01023               jprev0 = sortEntry0->index;
01024               jprev1 = sortEntry1->index;
01025             #else
01026               jprev0     =  glist[g  ];
01027               jprev1     =  glist[g+1];
01028             #endif
01029             
01030             pj_x_0     =  p_1[jprev0].position.x;
01031             pj_x_1     =  p_1[jprev1].position.x;
01032             pj_y_0     =  p_1[jprev0].position.y; 
01033             pj_y_1     =  p_1[jprev1].position.y;
01034             pj_z_0     =  p_1[jprev0].position.z; 
01035             pj_z_1     =  p_1[jprev1].position.z;
01036             
01037             bool test0 = ( r2_0 < groupplcutoff2 );
01038             bool test1 = ( r2_1 < groupplcutoff2 ); 
01039             
01040             //removing ifs benefits on many architectures
01041             //as the extra stores will only warm the cache up
01042             goodglist [ hu         ] = j0;
01043             goodglist [ hu + test0 ] = j1;
01044             
01045             hu += test0 + test1;
01046           }
01047           g-=2;
01048         }
01049 #endif
01050         
01051         for (; g < gu; g++) {
01052 
01053           #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01054             register SortEntry* sortEntry = sortValues + g;
01055             register int j = sortEntry->index;
01056           #else
01057             int j = glist[g];
01058           #endif
01059 
01060           BigReal p_j_x = p_1[j].position.x;
01061           BigReal p_j_y = p_1[j].position.y;
01062           BigReal p_j_z = p_1[j].position.z;
01063           
01064           BigReal r2 = p_i_x - p_j_x;
01065           r2 *= r2;
01066           BigReal t2 = p_i_y - p_j_y;
01067           r2 += t2 * t2;
01068           t2 = p_i_z - p_j_z;
01069           r2 += t2 * t2;
01070 
01071           if ( r2 <= groupplcutoff2 ) 
01072             goodglist[hu ++] = j; 
01073         }
01074 
01075         for ( int h=0; h<hu; ++h ) {
01076           int j = goodglist[h];
01077           int nbgs = p_1[j].nonbondedGroupSize;
01078           pli[0] = j;   // copy over the next three in any case
01079           pli[1] = j+1;
01080           pli[2] = j+2;
01081           if ( nbgs & 4 ) {  // if nbgs > 3, assume nbgs <= 5
01082             pli[3] = j+3;
01083             pli[4] = j+4;
01084           }
01085           pli += nbgs;
01086         }
01087       }
01088     }
01089 
01090     pairlistindex = pli - pairlist;
01091     // make sure padded element on pairlist points to real data
01092     if ( pairlistindex ) {
01093        pairlist[pairlistindex] = pairlist[pairlistindex-1];
01094     } PAIR( else {  // skip empty loops if no pairs were found
01095        i += p_i.nonbondedGroupSize;
01096        continue;
01097     } )
01098   } // if i is nonbonded group parent
01099   SELF
01100     (
01101     // self-comparisions require list to be incremented
01102     // pair-comparisions use entire list (pairlistoffset is 0)
01103     else pairlistoffset++;
01104     )
01105 
01106     const int atomfixed = ( fixedAtomsOn && pExt_i.atomFixed );
01107 
01108     register plint *pli = pairlist2;
01109 
01110     plint *pairlistn = pairlists.newlist(j_upper + 5 + 5 ALCH( + 20 ));
01111     register plint *plin = pairlistn;
01112 
01113     INT(
01114     if ( pairInteractionOn ) {
01115       const int ifep_type = p_i.partition;
01116       if (pairInteractionSelf) {
01117         if (ifep_type != 1) { PAIR(++i;) continue; }
01118         for (int k=pairlistoffset; k<pairlistindex; k++) {
01119           j = pairlist[k];
01120           const int jfep_type = p_1[j].partition;
01121           // for pair-self, both atoms must be in group 1.
01122           if (jfep_type == 1) {
01123             *(pli++) = j;
01124           }
01125         }
01126       } else {
01127         if (ifep_type != 1 && ifep_type != 2) { PAIR(++i;) continue; }
01128         for (int k=pairlistoffset; k<pairlistindex; k++) {
01129           j = pairlist[k];
01130           const int jfep_type = p_1[j].partition;
01131           // for pair, must have one from each group.
01132           if (ifep_type + jfep_type == 3) {
01133             *(pli++) = j;
01134           }
01135         }
01136       }
01137       int npair2_int = pli - pairlist2;
01138       pli = pairlist2;
01139       for (int k=0; k<npair2_int; k++) {
01140         j = pairlist2[k];
01141         BigReal p_j_x = p_1[j].position.x;
01142         BigReal r2 = p_i_x - p_j_x;
01143         r2 *= r2;
01144         BigReal p_j_y = p_1[j].position.y;
01145         BigReal t2 = p_i_y - p_j_y;
01146         r2 += t2 * t2;
01147         BigReal p_j_z = p_1[j].position.z;
01148         t2 = p_i_z - p_j_z;
01149         r2 += t2 * t2;
01150         if ( ( ! (atomfixed && pExt_1[j].atomFixed) ) && (r2 <= plcutoff2) ) {
01151           int atom2 = pExt_1[j].id;
01152           if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
01153           else *(plin++) = j;
01154         }
01155       }
01156     } else
01157     )
01158     if ( atomfixed ) {
01159       for (int k=pairlistoffset; k<pairlistindex; k++) {
01160         j = pairlist[k];
01161         BigReal p_j_x = p_1[j].position.x;
01162         BigReal r2 = p_i_x - p_j_x;
01163         r2 *= r2;
01164         BigReal p_j_y = p_1[j].position.y;
01165         BigReal t2 = p_i_y - p_j_y;
01166         r2 += t2 * t2;
01167         BigReal p_j_z = p_1[j].position.z;
01168         t2 = p_i_z - p_j_z;
01169         r2 += t2 * t2;
01170         if ( (! pExt_1[j].atomFixed) && (r2 <= plcutoff2) ) {
01171           int atom2 = pExt_1[j].id;
01172           if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
01173           else *(plin++) = j;
01174         }
01175       }
01176     } else {
01177       int k = pairlistoffset;
01178       int ku = pairlistindex;
01179       if ( k < ku ) {
01180 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
01181         if ( ku - k  >  6 ) {      
01182           register  int jprev0 = pairlist [k    ];
01183           register  int jprev1 = pairlist [k + 1];
01184           
01185           register  int j0; 
01186           register  int j1; 
01187 
01188           __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01189           __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01190           __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01191 
01192           // these don't change here, so we could move them into outer scope
01193           const __m128d P_I_X = _mm_set1_pd(p_i_x);
01194           const __m128d P_I_Y = _mm_set1_pd(p_i_y);
01195           const __m128d P_I_Z = _mm_set1_pd(p_i_z);
01196           
01197           int atom2_0 = pExt_1[jprev0].id;
01198           int atom2_1 = pExt_1[jprev1].id;
01199           
01200           k += 2;
01201           for ( ; k < ku - 2; k +=2 ) {
01202             // compute 1d distance, 2-way parallel       
01203             j0     =  jprev0;
01204             j1     =  jprev1;
01205             
01206             __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
01207             __m128d R2_01 = _mm_mul_pd(T_01, T_01);
01208             T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
01209             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01210             T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
01211             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01212             
01213             jprev0     =  pairlist[k];
01214             jprev1     =  pairlist[k+1];
01215             
01216             PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01217             PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01218             PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01219 
01220             __align(16) double r2_01[2];
01221             _mm_store_pd(r2_01, R2_01); // 16-byte-aligned store
01222             
01223             if (r2_01[0] <= plcutoff2) {
01224               if ( atom2_0 >= excl_min && atom2_0 <= excl_max ) 
01225                 *(pli++) = j0;
01226               else 
01227                 *(plin++) = j0;
01228             }
01229             atom2_0 = pExt_1[jprev0].id;
01230             
01231             if (r2_01[1] <= plcutoff2) {
01232               if ( atom2_1 >= excl_min && atom2_1 <= excl_max ) 
01233                 *(pli++) = j1;
01234               else 
01235                 *(plin++) = j1;
01236             }
01237             atom2_1 = pExt_1[jprev1].id;            
01238           }
01239           k-=2;
01240         }       
01241 #else
01242         if ( ku - k  >  6 ) {      
01243           register  int jprev0 = pairlist [k];
01244           register  int jprev1 = pairlist [k + 1];
01245           
01246           register  int j0; 
01247           register  int j1; 
01248           
01249           register  BigReal pj_x_0, pj_x_1; 
01250           register  BigReal pj_y_0, pj_y_1; 
01251           register  BigReal pj_z_0, pj_z_1; 
01252           register  BigReal t_0, t_1, r2_0, r2_1;
01253           
01254           pj_x_0 = p_1[jprev0].position.x;
01255           pj_x_1 = p_1[jprev1].position.x;  
01256           
01257           pj_y_0 = p_1[jprev0].position.y; 
01258           pj_y_1 = p_1[jprev1].position.y;  
01259           
01260           pj_z_0 = p_1[jprev0].position.z; 
01261           pj_z_1 = p_1[jprev1].position.z;
01262           
01263           int atom2_0 = pExt_1[jprev0].id;
01264           int atom2_1 = pExt_1[jprev1].id;
01265           
01266           k += 2;
01267           for ( ; k < ku - 2; k +=2 ) {
01268             // compute 1d distance, 2-way parallel       
01269             j0     =  jprev0;
01270             j1     =  jprev1;
01271             
01272             t_0    =  p_i_x - pj_x_0;
01273             t_1    =  p_i_x - pj_x_1;
01274             r2_0   =  t_0 * t_0;
01275             r2_1   =  t_1 * t_1;
01276             
01277             t_0    =  p_i_y - pj_y_0;
01278             t_1    =  p_i_y - pj_y_1;
01279             r2_0  +=  t_0 * t_0;
01280             r2_1  +=  t_1 * t_1;
01281             
01282             t_0    =  p_i_z - pj_z_0;
01283             t_1    =  p_i_z - pj_z_1;
01284             r2_0  +=  t_0 * t_0;
01285             r2_1  +=  t_1 * t_1;
01286             
01287             jprev0     =  pairlist[k];
01288             jprev1     =  pairlist[k+1];
01289             
01290             pj_x_0     =  p_1[jprev0].position.x;
01291             pj_x_1     =  p_1[jprev1].position.x;
01292             pj_y_0     =  p_1[jprev0].position.y; 
01293             pj_y_1     =  p_1[jprev1].position.y;
01294             pj_z_0     =  p_1[jprev0].position.z; 
01295             pj_z_1     =  p_1[jprev1].position.z;
01296             
01297             if (r2_0 <= plcutoff2) {
01298               if ( atom2_0 >= excl_min && atom2_0 <= excl_max ) 
01299                 *(pli++) = j0;
01300               else 
01301                 *(plin++) = j0;
01302             }
01303             atom2_0 = pExt_1[jprev0].id;
01304             
01305             if (r2_1 <= plcutoff2) {
01306               if ( atom2_1 >= excl_min && atom2_1 <= excl_max ) 
01307                 *(pli++) = j1;
01308               else 
01309                 *(plin++) = j1;
01310              }
01311             atom2_1 = pExt_1[jprev1].id;            
01312           }
01313           k-=2;
01314         }       
01315 #endif
01316 
01317         for (; k < ku; k++) {
01318           int j = pairlist[k];
01319           int atom2 = pExt_1[j].id;
01320           
01321           BigReal p_j_x = p_1[j].position.x;
01322           BigReal p_j_y = p_1[j].position.y;
01323           BigReal p_j_z = p_1[j].position.z;
01324           
01325           BigReal r2 = p_i_x - p_j_x;
01326           r2 *= r2;
01327           BigReal t2 = p_i_y - p_j_y;
01328           r2 += t2 * t2;
01329           t2 = p_i_z - p_j_z;
01330           r2 += t2 * t2;
01331           
01332           if (r2 <= plcutoff2) {
01333             if ( atom2 >= excl_min && atom2 <= excl_max ) 
01334               *(pli++) = j;
01335             else 
01336               *(plin++) = j;
01337           }
01338         }
01339       }
01340     }
01341 
01342     PAIR(
01343     if ( plin == pairlistn && pli == pairlist2 ) {
01344       ++i;
01345       continue;
01346     }
01347     pairlists.setIndexValue(i); 
01348     )
01349 
01350     plint *plix = pairlistx;
01351     plint *plim = pairlistm;
01352     ALCH(
01353     plint *plinA1 = pairlistnA1;
01354     plint *plixA1 = pairlistxA1;
01355     plint *plimA1 = pairlistmA1;
01356     plint *plinA2 = pairlistnA2;
01357     plint *plixA2 = pairlistxA2;
01358     plint *plimA2 = pairlistmA2;
01359     )
01360 
01361     int k;
01362 
01363 #if 0 ALCH(+1)
01364     int unsortedNpairn = plin - pairlistn;
01365     plin = pairlistn;
01366     for ( k=0; k<unsortedNpairn; ++k ) {
01367       int j = pairlistn[k];
01368       switch(pswitchTable[p_i_partition + 3*(p_1[j].partition)]) {
01369         case 0:  *(plin++) = j; break;
01370         case 1:  *(plinA1++) = j; break;
01371         case 2:  *(plinA2++) = j; break;
01372       }
01373     }
01374 #endif
01375 
01376     int npair2 = pli - pairlist2;
01377     // if ( npair2 ) pairlist2[npair2] = pairlist2[npair2-1];
01378     // removed code for implicit exclusions within hydrogen groups -JCP
01379     for (k=0; k < npair2; ++k ) {
01380       int j = pairlist2[k];
01381       int atom2 = pExt_1[j].id;
01382       int excl_flag = excl_flags[atom2];
01383       ALCH(int pswitch = pswitchTable[p_i_partition + 3*(p_1[j].partition)];)
01384       switch ( excl_flag ALCH( + 3 * pswitch)) {
01385       case 0:  *(plin++) = j;  break;
01386       case 1:  *(plix++) = j;  break;
01387       case 2:  *(plim++) = j;  break;
01388       ALCH(
01389       case 3:  *(plinA1++) = j; break;
01390       case 6:  *(plinA2++) = j; break;
01391       case 5:  *(plimA1++) = j; break;
01392       case 8:  *(plimA2++) = j; break;
01393       case 4:  *(plixA1++) = j; break;
01394       case 7:  *(plixA2++) = j; break;
01395       )
01396       }
01397     }
01398 
01399     npairn = plin - pairlistn;
01400     pairlistn_save = pairlistn;
01401     pairlistn_save[npairn] = npairn ? pairlistn_save[npairn-1] : -1;
01402     pairlists.newsize(npairn + 1);
01403 
01404     npairx = plix - pairlistx;
01405     pairlistx_save = pairlists.newlist();
01406     for ( k=0; k<npairx; ++k ) {
01407       pairlistx_save[k] = pairlistx[k];
01408     }
01409     pairlistx_save[k] = k ? pairlistx_save[k-1] : -1;
01410     pairlists.newsize(npairx + 1);
01411 
01412     npairm = plim - pairlistm;
01413     pairlistm_save = pairlists.newlist();
01414     for ( k=0; k<npairm; ++k ) {
01415       pairlistm_save[k] = pairlistm[k];
01416     }
01417     pairlistm_save[k] = k ? pairlistm_save[k-1] : -1;
01418     pairlists.newsize(npairm + 1);
01419 
01420 
01421 #if 0 ALCH(+1)
01422 #define PAIRLISTFROMARRAY(NPAIRS,PL1,PL2,PLSAVE) \
01423 { \
01424   (NPAIRS) = (PL2) - (PL1); \
01425   (PLSAVE) = pairlists.newlist(); \
01426   for ( k=0; k<(NPAIRS); ++k ) { \
01427     (PLSAVE)[k] = (PL1)[k]; \
01428   } \
01429   (PLSAVE)[k] = k ? (PLSAVE)[k-1] : -1; \
01430   pairlists.newsize((NPAIRS) + 1); \
01431 }
01432   
01433     PAIRLISTFROMARRAY(npairnA1,pairlistnA1,plinA1,pairlistnA1_save);
01434     PAIRLISTFROMARRAY(npairxA1,pairlistxA1,plixA1,pairlistxA1_save);
01435     PAIRLISTFROMARRAY(npairmA1,pairlistmA1,plimA1,pairlistmA1_save);
01436     PAIRLISTFROMARRAY(npairnA2,pairlistnA2,plinA2,pairlistnA2_save);
01437     PAIRLISTFROMARRAY(npairxA2,pairlistxA2,plixA2,pairlistxA2_save);
01438     PAIRLISTFROMARRAY(npairmA2,pairlistmA2,plimA2,pairlistmA2_save);
01439 #undef PAIRLISTFROMARRAY
01440 
01441 #endif
01442     
01443     if ( ! savePairlists ) pairlists.reset();  // limit space usage
01444     PAIR( pairlists.addIndex(); )
01445     
01446         // PAIR( iout << i << " " << i_upper << " save\n" << endi;)
01447   } else { // if ( savePairlists || ! usePairlists )
01448         // PAIR( iout << i << " " << i_upper << " use\n" << endi;)
01449 
01450     pairlists.nextlist(&pairlistn_save,&npairn);  --npairn;
01451     pairlists.nextlist(&pairlistx_save,&npairx);  --npairx;
01452     pairlists.nextlist(&pairlistm_save,&npairm);  --npairm;
01453     ALCH(
01454     pairlists.nextlist(&pairlistnA1_save,&npairnA1);  --npairnA1;
01455     pairlists.nextlist(&pairlistxA1_save,&npairxA1);  --npairxA1;
01456     pairlists.nextlist(&pairlistmA1_save,&npairmA1);  --npairmA1;
01457     pairlists.nextlist(&pairlistnA2_save,&npairnA2);  --npairnA2;
01458     pairlists.nextlist(&pairlistxA2_save,&npairxA2);  --npairxA2;
01459     pairlists.nextlist(&pairlistmA2_save,&npairmA2);  --npairmA2;
01460     )
01461 
01462   } // if ( savePairlists || ! usePairlists )
01463 
01464     LES( BigReal *lambda_table_i =
01465                         lambda_table + (lesFactor+1) * p_i.partition; )
01466 
01467     INT(
01468     const BigReal force_sign = (
01469       ( pairInteractionOn && ! pairInteractionSelf ) ?
01470         ( ( p_i.partition == 1 ) ? 1. : -1. ) : 0.
01471     );
01472       
01473     )
01474 
01475     const BigReal kq_i = COULOMB * p_i.charge * scaling * dielectric_1;
01476     const LJTable::TableEntry * const lj_row =
01477                 ljTable->table_row(p_i.vdwType);
01478 
01479     SHORT( FAST( BigReal f_i_x = 0.; ) )
01480     SHORT( FAST( BigReal f_i_y = 0.; ) )
01481     SHORT( FAST( BigReal f_i_z = 0.; ) )
01482     FULL( BigReal fullf_i_x = 0.; )
01483     FULL( BigReal fullf_i_y = 0.; )
01484     FULL( BigReal fullf_i_z = 0.; )
01485 
01486     int npairi;
01487     int k;
01488 
01489     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01490         p_i_x, p_i_y, p_i_z, p_1, pairlistn_save, npairn, pairlisti,
01491         r2_delta, r2list);
01492 
01493 // BEGIN NBTHOLE OF DRUDE MODEL
01494 #if (FAST(1+)0)
01495     if (drudeNbthole && p_i.hydrogenGroupSize > 1) {
01496   
01497       Parameters *parameters = Node::Object()->parameters;
01498       const NbtholePairValue * const nbthole_array = parameters->nbthole_array;
01499       const int NumNbtholePairParams = parameters->NumNbtholePairParams;
01500       BigReal drudeNbtholeCut = simParams -> drudeNbtholeCut;
01501       BigReal drudeNbtholeCut2 = (drudeNbtholeCut * drudeNbtholeCut) + r2_delta;
01502       BigReal CC = COULOMB * scaling * dielectric_1;
01503       int kk;
01504 
01505       for (k = 0; k < npairi; k++) {
01506             if (r2list[k] > drudeNbtholeCut2) { continue; }
01507   
01508             const int j = pairlisti[k];
01509             const CompAtom& p_j = p_1[j];
01510 
01511             if ( p_j.hydrogenGroupSize < 2 ) { continue; }
01512   
01513         for (kk = 0;kk < NumNbtholePairParams; kk++) {
01514 
01515                if (((nbthole_array[kk].ind1 == p_i.vdwType) &&
01516                   (nbthole_array[kk].ind2 == p_j.vdwType)) ||
01517                   ((nbthole_array[kk].ind2 == p_i.vdwType) &&
01518                    (nbthole_array[kk].ind1 == p_j.vdwType))) {
01519                  break;
01520                }
01521         }
01522         if ( kk < NumNbtholePairParams ) {
01523     
01524                 BigReal aprod = nbthole_array[kk].alphai * nbthole_array[kk].alphaj;
01525                 const BigReal aa = nbthole_array[kk].tholeij * powf(aprod, -1.f/6);
01526                 const BigReal qqaa = CC * p_0[i].charge * p_1[j].charge;
01527                 const BigReal qqad = CC * p_0[i].charge * p_1[j+1].charge;
01528                 const BigReal qqda = CC * p_0[i+1].charge * p_1[j].charge;
01529                 const BigReal qqdd = CC * p_0[i+1].charge * p_1[j+1].charge;
01530 
01531                 Vector raa = (p_0[i].position + params->offset) - p_1[j].position;
01532                 Vector rad = (p_0[i].position + params->offset) - p_1[j+1].position;
01533                 Vector rda = (p_0[i+1].position + params->offset) - p_1[j].position;
01534                 Vector rdd = (p_0[i+1].position + params->offset) - p_1[j+1].position;
01535 
01536                 BigReal raa_invlen = raa.rlength();  // reciprocal of length
01537                 BigReal rad_invlen = rad.rlength();
01538                 BigReal rda_invlen = rda.rlength();
01539                 BigReal rdd_invlen = rdd.rlength();
01540 
01541                 BigReal auaa = aa / raa_invlen;
01542                 BigReal auad = aa / rad_invlen;
01543                 BigReal auda = aa / rda_invlen;
01544                 BigReal audd = aa / rdd_invlen;
01545 
01546                 BigReal expauaa = exp(-auaa);
01547                 BigReal expauad = exp(-auad);
01548                 BigReal expauda = exp(-auda);
01549                 BigReal expaudd = exp(-audd);
01550 
01551                 BigReal polyauaa = 1 + 0.5*auaa;
01552                 BigReal polyauad = 1 + 0.5*auad;
01553                 BigReal polyauda = 1 + 0.5*auda;
01554                 BigReal polyaudd = 1 + 0.5*audd;
01555 
01556                 BigReal ethole = 0;
01557                 ethole += qqaa * raa_invlen * (- polyauaa * expauaa);
01558                 ethole += qqad * rad_invlen * (- polyauad * expauad);
01559                 ethole += qqda * rda_invlen * (- polyauda * expauda);
01560                 ethole += qqdd * rdd_invlen * (- polyaudd * expaudd);
01561 
01562                 polyauaa = 1 + auaa*polyauaa;
01563                 polyauad = 1 + auad*polyauad;
01564                 polyauda = 1 + auda*polyauda;
01565                 polyaudd = 1 + audd*polyaudd;
01566 
01567                 BigReal raa_invlen3 = raa_invlen * raa_invlen * raa_invlen;
01568                 BigReal rad_invlen3 = rad_invlen * rad_invlen * rad_invlen;
01569                 BigReal rda_invlen3 = rda_invlen * rda_invlen * rda_invlen;
01570                 BigReal rdd_invlen3 = rdd_invlen * rdd_invlen * rdd_invlen;
01571 
01572                 BigReal dfaa = qqaa * raa_invlen3 * (polyauaa*expauaa);
01573                 BigReal dfad = qqad * rad_invlen3 * (polyauad*expauad);
01574                 BigReal dfda = qqda * rda_invlen3 * (polyauda*expauda);
01575                 BigReal dfdd = qqdd * rdd_invlen3 * (polyaudd*expaudd);
01576 
01577                 Vector faa = -dfaa * raa;
01578                 Vector fad = -dfad * rad;
01579                 Vector fda = -dfda * rda;
01580                 Vector fdd = -dfdd * rdd;
01581 
01582                 params->ff[0][i] += faa + fad;
01583                 params->ff[0][i+1] += fda + fdd;
01584                 params->ff[1][j] -= faa + fda;
01585                 params->ff[1][j+1] -= fad + fdd;
01586 
01587                 reduction[virialIndex_XX] += faa.x * raa.x + fad.x * rad.x
01588                 + fda.x * rda.x + fdd.x * rdd.x;
01589                 reduction[virialIndex_XY] += faa.x * raa.y + fad.x * rad.y
01590                 + fda.x * rda.y + fdd.x * rdd.y;
01591                 reduction[virialIndex_XZ] += faa.x * raa.z + fad.x * rad.z
01592                 + fda.x * rda.z + fdd.x * rdd.z;
01593                 reduction[virialIndex_YX] += faa.y * raa.x + fad.y * rad.x
01594                 + fda.y * rda.x + fdd.y * rdd.x;
01595                 reduction[virialIndex_YY] += faa.y * raa.y + fad.y * rad.y
01596                 + fda.y * rda.y + fdd.y * rdd.y;
01597                 reduction[virialIndex_YZ] += faa.y * raa.z + fad.y * rad.z
01598                 + fda.y * rda.z + fdd.y * rdd.z;
01599                 reduction[virialIndex_ZX] += faa.z * raa.x + fad.z * rad.x
01600                 + fda.z * rda.x + fdd.z * rdd.x;
01601                 reduction[virialIndex_ZY] += faa.z * raa.y + fad.z * rad.y
01602                 + fda.z * rda.y + fdd.z * rdd.y;
01603                 reduction[virialIndex_ZZ] += faa.z * raa.z + fad.z * rad.z
01604                 + fda.z * rda.z + fdd.z * rdd.z;
01605                 reduction[electEnergyIndex] += ethole;
01606           }
01607        }
01608     }
01609 #endif
01610 // END NBTHOLE OF DRUDE MODEL
01611     
01612     // BEGIN LA
01613 #if (FAST(1+)0)
01614     if (doLoweAndersen && p_i.hydrogenGroupSize) {
01615         BigReal loweAndersenCutoff = simParams->loweAndersenCutoff;
01616         BigReal loweAndersenCutoff2 = (loweAndersenCutoff * loweAndersenCutoff) + r2_delta;
01617         BigReal loweAndersenProb = simParams->loweAndersenRate * (simParams->dt * simParams->nonbondedFrequency) * 0.001; // loweAndersenRate is in 1/ps
01618         const bool loweAndersenUseCOMvelocity = (simParams->rigidBonds != RIGID_NONE);
01619         const BigReal kbT = BOLTZMANN * (simParams->loweAndersenTemp);
01620         const BigReal dt_inv = TIMEFACTOR / (simParams->dt * simParams->nonbondedFrequency);
01621         //const BigReal dt_inv = 1.0 / simParams->dt;
01622         //BigReal kbT = 8.3145e-7 * (simParams->loweAndersenTemp); // in A^2/fs^2 * K
01623         
01624         const CompAtom* v_0 = params->v[0];
01625         const CompAtom* v_1 = params->v[1];
01626         const CompAtom& v_i = v_0[i];
01627         Mass mass_i = v_i.charge;
01628         Velocity vel_i = v_i.position;
01629         Position pos_i = p_i.position;
01630         int atom_i = pExt_0[i].id;
01631         
01632         if (loweAndersenUseCOMvelocity) {
01633             Mass mass = 0;
01634             Velocity vel = 0;
01635             Position pos = 0;
01636             for (int l = 0; l < p_i.hydrogenGroupSize; l++) {
01637                 vel += v_0[i+l].charge * v_0[i+l].position;
01638                 pos += v_0[i+l].charge * p_0[i+l].position;
01639                 mass += v_0[i+l].charge;
01640             }
01641             vel_i = vel / mass;
01642             pos_i = pos / mass;
01643             mass_i = mass;
01644         }
01645         
01646         // Note v[0].charge is actually mass
01647         //Random rand(CkMyPe()); // ?? OK ?? NO!!!!
01648         Random *rand = Node::Object()->rand;
01649         for (k = 0; k < npairi; k++) {
01650             if (r2list[k] > loweAndersenCutoff2) { continue; }
01651                 
01652             const int j = pairlisti[k];
01653             const CompAtom& v_j = v_1[j];
01654             const CompAtom& p_j = p_1[j];
01655                 
01656             if (!p_j.hydrogenGroupSize) { continue; }
01657             if (rand->uniform() > loweAndersenProb) { continue; }
01658                 
01659             Mass mass_j = v_j.charge;
01660             Velocity vel_j = v_j.position;
01661             Position pos_j = p_j.position;
01662             int atom_j = pExt_1[j].id;
01663                 
01664             if (loweAndersenUseCOMvelocity) {
01665                 Mass mass = 0;
01666                 Velocity vel = 0;
01667                 Position pos = 0;
01668                 for (int l = 0; l < p_j.hydrogenGroupSize; l++) {
01669                     vel += v_1[j+l].charge * v_1[j+l].position;
01670                     pos += v_1[j+l].charge * p_1[j+l].position;
01671                     mass += v_1[j+l].charge;
01672                 }
01673                 vel_j = vel / mass;
01674                 pos_j = pos / mass;
01675                 mass_j = mass;
01676             }
01677                 
01678             //Velocity deltaV = v_i.position - v_j.position;
01679             Velocity deltaV = vel_i - vel_j;
01680             Mass mu_ij = (mass_i * mass_j)/(mass_i + mass_j);
01681             //Vector sep = (p_i.position + params->offset) - p_j.position;
01682             Vector sep = (pos_i + params->offset) - pos_j;
01683             Vector sigma_ij = sep.unit();
01684             BigReal lambda = rand->gaussian() * sqrt(kbT / mu_ij);
01685             Force force = mu_ij * dt_inv * (lambda - (deltaV * sigma_ij)) * sigma_ij;
01686                 
01687             //DebugM(1, "atom1 atom2 = " << atom1 << " " << atom2 << " lambda = " << lambda << " force = " << force << " deltaP = " << sep.length() << " sqrt(r2) = " << sqrt(r2list[k]) << "\n");
01688             //DebugM(1, "atom1 atom2 = " << atom_i << " " << atom_j << " mass1 mass2 = " << mass_i << " " << mass_j << " hgrp1 hgrp2 " << p_i.hydrogenGroupSize << " " << p_j.hydrogenGroupSize << "\n");
01689                 
01690             if (loweAndersenUseCOMvelocity) {
01691                 BigReal inv_mass_i = 1.0 / mass_i;
01692                 BigReal inv_mass_j = 1.0 / mass_j;
01693                 for (int l = 0; l < p_i.hydrogenGroupSize; l++) {
01694                     params->ff[0][i+l] += (v_0[i+l].charge * inv_mass_i) * force;
01695                 }
01696                 for (int l = 0; l < p_j.hydrogenGroupSize; l++) {
01697                     params->ff[1][j+l] -= (v_1[j+l].charge * inv_mass_j) * force;
01698                 }
01699             } else {
01700                 params->ff[0][i] += force;
01701                 params->ff[1][j] -= force;
01702             }
01703                 
01704             reduction[virialIndex_XX] += force.x * sep.x;
01705             reduction[virialIndex_XY] += force.x * sep.y;
01706             reduction[virialIndex_XZ] += force.x * sep.z;
01707             reduction[virialIndex_YX] += force.y * sep.x;
01708             reduction[virialIndex_YY] += force.y * sep.y;
01709             reduction[virialIndex_YZ] += force.y * sep.z;
01710             reduction[virialIndex_ZX] += force.z * sep.x;
01711             reduction[virialIndex_ZY] += force.z * sep.y;
01712             reduction[virialIndex_ZZ] += force.z * sep.z;
01713         }
01714     }
01715 #endif
01716     // END LA
01717 
01718 #define NORMAL(X) X
01719 #define EXCLUDED(X)
01720 #define MODIFIED(X)
01721 #include  "ComputeNonbondedBase2.h"
01722 #undef NORMAL
01723 #undef EXCLUDED
01724 #undef MODIFIED
01725 
01726     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01727         p_i_x, p_i_y, p_i_z, p_1, pairlistm_save, npairm, pairlisti,
01728         r2_delta, r2list);
01729     exclChecksum += npairi;
01730 
01731 #define NORMAL(X)
01732 #define EXCLUDED(X)
01733 #define MODIFIED(X) X
01734 #include  "ComputeNonbondedBase2.h"
01735 #undef NORMAL
01736 #undef EXCLUDED
01737 #undef MODIFIED
01738 
01739 #ifdef FULLELECT
01740     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01741         p_i_x, p_i_y, p_i_z, p_1, pairlistx_save, npairx, pairlisti,
01742         r2_delta, r2list);
01743     exclChecksum += npairi;
01744 
01745 #undef FAST
01746 #define FAST(X)
01747 #define NORMAL(X)
01748 #define EXCLUDED(X) X
01749 #define MODIFIED(X)
01750 #include  "ComputeNonbondedBase2.h"
01751 #undef FAST
01752 #ifdef SLOWONLY
01753   #define FAST(X)
01754 #else
01755   #define FAST(X) X
01756 #endif
01757 #undef NORMAL
01758 #undef EXCLUDED
01759 #undef MODIFIED
01760 #else
01761     exclChecksum += npairx;
01762 #endif
01763 
01764 #if 0 ALCH(+1)   // nonbondedbase2 for alchemical-separeted pairlists
01765 
01766     #undef ALCHPAIR
01767     #define ALCHPAIR(X) X
01768     #undef NOT_ALCHPAIR
01769     #define NOT_ALCHPAIR(X)
01770     BigReal myLambda; FEP(BigReal myLambda2;)
01771     BigReal myElecLambda;  FEP(BigReal myElecLambda2;)
01772     BigReal myVdwLambda; FEP(BigReal myVdwLambda2;)
01773     BigReal myVdwShift; FEP(BigReal myVdwShift2;)
01774     BigReal alch_vdw_energy; BigReal alch_vdw_force;
01775     FEP(BigReal alch_vdw_energy_2;) TI(BigReal alch_vdw_dUdl;)
01776     BigReal shiftedElec; BigReal shiftedElecForce;
01777     
01778     /********************************************************************/
01779     /*******NONBONDEDBASE2 FOR NORMAL INTERACTIONS SCALED BY LAMBDA******/
01780     /********************************************************************/
01781     #define NORMAL(X) X
01782     #define EXCLUDED(X)
01783     #define MODIFIED(X)
01784 
01785     #define ALCH1(X) X
01786     #define ALCH2(X)
01787     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01788             p_i_x, p_i_y, p_i_z, p_1, pairlistnA1_save, npairnA1, pairlisti,
01789             r2_delta, r2list);
01790     #include  "ComputeNonbondedBase2.h" // normal, direction 'up'
01791     #undef ALCH1
01792     #undef ALCH2
01793 
01794     #define ALCH1(X)
01795     #define ALCH2(X) X
01796     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01797             p_i_x, p_i_y, p_i_z, p_1, pairlistnA2_save, npairnA2, pairlisti,
01798             r2_delta, r2list);
01799     #include  "ComputeNonbondedBase2.h" // normal, direction 'down'
01800     #undef ALCH1
01801     #undef ALCH2
01802 
01803     #undef NORMAL
01804     #undef EXCLUDED
01805     #undef MODIFIED
01806     
01807     /********************************************************************/
01808     /******NONBONDEDBASE2 FOR MODIFIED INTERACTIONS SCALED BY LAMBDA*****/
01809     /********************************************************************/
01810     #define NORMAL(X)
01811     #define EXCLUDED(X)
01812     #define MODIFIED(X) X
01813 
01814     #define ALCH1(X) X
01815     #define ALCH2(X)
01816     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01817             p_i_x, p_i_y, p_i_z, p_1, pairlistmA1_save, npairmA1, pairlisti,
01818             r2_delta, r2list);
01819         exclChecksum += npairi;
01820     #include  "ComputeNonbondedBase2.h" // modified, direction 'up'
01821     #undef ALCH1
01822     #undef ALCH2
01823 
01824     #define ALCH1(X)
01825     #define ALCH2(X) X
01826     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01827             p_i_x, p_i_y, p_i_z, p_1, pairlistmA2_save, npairmA2, pairlisti,
01828             r2_delta, r2list);
01829         exclChecksum += npairi;
01830     #include  "ComputeNonbondedBase2.h" // modified, direction 'down'
01831     #undef ALCH1
01832     #undef ALCH2
01833 
01834     #undef NORMAL
01835     #undef EXCLUDED
01836     #undef MODIFIED
01837     
01838     /********************************************************************/
01839     /******NONBONDEDBASE2 FOR EXCLUDED INTERACTIONS SCALED BY LAMBDA*****/
01840     /********************************************************************/
01841     #ifdef FULLELECT
01842     #undef FAST
01843     #define FAST(X)
01844     #define NORMAL(X)
01845     #define EXCLUDED(X) X
01846     #define MODIFIED(X)
01847 
01848     #define ALCH1(X) X
01849     #define ALCH2(X)
01850     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01851             p_i_x, p_i_y, p_i_z, p_1, pairlistxA1_save, npairxA1, pairlisti,
01852             r2_delta, r2list);
01853         exclChecksum += npairi;
01854     #include  "ComputeNonbondedBase2.h"  //excluded, direction 'up'
01855     #undef ALCH1
01856     #undef ALCH2
01857 
01858     #define ALCH1(X)
01859     #define ALCH2(X) X
01860     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01861             p_i_x, p_i_y, p_i_z, p_1, pairlistxA2_save, npairxA2, pairlisti,
01862             r2_delta, r2list);
01863         exclChecksum += npairi;
01864     #include  "ComputeNonbondedBase2.h"  //excluded, direction 'down'
01865     #undef ALCH1
01866     #undef ALCH2
01867 
01868     #undef FAST
01869     #ifdef SLOWONLY
01870       #define FAST(X)
01871     #else
01872       #define FAST(X) X
01873     #endif
01874     #undef NORMAL
01875     #undef EXCLUDED
01876     #undef MODIFIED
01877     #else
01878         exclChecksum += npairxA1 + npairxA2;
01879     #endif
01880 
01881     #undef ALCHPAIR
01882     #define ALCHPAIR(X)
01883     #undef NOT_ALCHPAIR
01884     #define NOT_ALCHPAIR(X) X
01885 
01886 #endif // end nonbondedbase2.h includes for alchemical pairlists
01887 
01888 #ifdef NETWORK_PROGRESS
01889     CkNetworkProgress();
01890 #endif
01891 
01892 #ifdef ARCH_POWERPC
01893     //data cache block touch the position structure
01894     __dcbt ((void *) &(p_0[i+1]));
01895     __prefetch_by_load ((void *)&(groupCount));
01896 #endif
01897 
01898 #ifndef NAMD_CUDA
01899     SHORT( FAST( f_0[i].x += f_i_x; ) )
01900     SHORT( FAST( f_0[i].y += f_i_y; ) )
01901     SHORT( FAST( f_0[i].z += f_i_z; ) )
01902     FULL( fullf_0[i].x += fullf_i_x; )
01903     FULL( fullf_0[i].y += fullf_i_y; )
01904     FULL( fullf_0[i].z += fullf_i_z; )
01905 #endif
01906 PAIR(
01907     if ( savePairlists || ! usePairlists ) {
01908       i++;
01909     } else {
01910       i = pairlists.getIndexValue();
01911     }
01912 )
01913 
01914         // PAIR( iout << i << " " << i_upper << " end\n" << endi;)
01915   } // for i
01916 
01917   // PAIR(iout << "++++++++\n" << endi;)
01918   PAIR( if ( savePairlists ) { pairlists.setIndexValue(i); } )
01919 
01920 #ifdef f_1
01921 #undef f_1
01922 #endif
01923 #if ( SHORT( FAST( 1+ ) ) 0 )
01924 #if ( SELF( 1+ ) 0 )
01925   {
01926     Force *patch_f_0 = params->ff[0];
01927 
01928 #ifndef NAMD_CUDA
01929 #ifndef ARCH_POWERPC 
01930 #pragma ivdep
01931 #endif
01932     for ( int i = 0; i < i_upper; ++i ) {
01933       patch_f_0[i].x += f_0[i].x;
01934       patch_f_0[i].y += f_0[i].y;
01935       patch_f_0[i].z += f_0[i].z;
01936       virial_xx += f_0[i].x * p_0[i].position.x;
01937       virial_xy += f_0[i].x * p_0[i].position.y;
01938       virial_xz += f_0[i].x * p_0[i].position.z;
01939       virial_yy += f_0[i].y * p_0[i].position.y;
01940       virial_yz += f_0[i].y * p_0[i].position.z;
01941       virial_zz += f_0[i].z * p_0[i].position.z;
01942     }
01943 #endif
01944   }
01945 #endif
01946 #endif
01947 #ifdef fullf_1
01948 #undef fullf_1
01949 #endif
01950 #if ( FULL( 1+ ) 0 )
01951 #if ( SELF( 1+ ) 0 )
01952   {
01953     Force *patch_fullf_0 = params->fullf[0];
01954 #ifndef NAMD_CUDA
01955 #ifndef ARCH_POWERPC 
01956 #pragma ivdep
01957 #endif
01958     for ( int i = 0; i < i_upper; ++i ) {
01959       patch_fullf_0[i].x += fullf_0[i].x;
01960       patch_fullf_0[i].y += fullf_0[i].y;
01961       patch_fullf_0[i].z += fullf_0[i].z;
01962       fullElectVirial_xx += fullf_0[i].x * p_0[i].position.x;
01963       fullElectVirial_xy += fullf_0[i].x * p_0[i].position.y;
01964       fullElectVirial_xz += fullf_0[i].x * p_0[i].position.z;
01965       fullElectVirial_yy += fullf_0[i].y * p_0[i].position.y;
01966       fullElectVirial_yz += fullf_0[i].y * p_0[i].position.z;
01967       fullElectVirial_zz += fullf_0[i].z * p_0[i].position.z;
01968     }
01969 #endif
01970   }
01971 #endif
01972 #endif
01973 
01974 #ifndef NAMD_CUDA
01975   reduction[exclChecksumIndex] += exclChecksum;
01976 #endif
01977   FAST
01978   (
01979    // JLai
01980   ENERGY( reduction[vdwEnergyIndex] += vdwEnergy;
01981           GO( reduction[goNativeEnergyIndex] += goEnergyNative;
01982               reduction[goNonnativeEnergyIndex] += goEnergyNonnative; ) )
01983   SHORT
01984   (
01985   ENERGY( reduction[electEnergyIndex] += electEnergy; )
01986   )
01987   ALCH
01988   (
01989     TI(reduction[vdwEnergyIndex_ti_1] += vdwEnergy_ti_1;) 
01990     TI(reduction[vdwEnergyIndex_ti_2] += vdwEnergy_ti_2;) 
01991     FEP( reduction[vdwEnergyIndex_s] += vdwEnergy_s; )
01992   SHORT
01993   (
01994     FEP( reduction[electEnergyIndex_s] += electEnergy_s; )
01995     TI(reduction[electEnergyIndex_ti_1] += electEnergy_ti_1;) 
01996     TI(reduction[electEnergyIndex_ti_2] += electEnergy_ti_2;) 
01997   )
01998   )
01999   SHORT
02000   (
02001   reduction[virialIndex_XX] += virial_xx;
02002   reduction[virialIndex_XY] += virial_xy;
02003   reduction[virialIndex_XZ] += virial_xz;
02004   reduction[virialIndex_YX] += virial_xy;
02005   reduction[virialIndex_YY] += virial_yy;
02006   reduction[virialIndex_YZ] += virial_yz;
02007   reduction[virialIndex_ZX] += virial_xz;
02008   reduction[virialIndex_ZY] += virial_yz;
02009   reduction[virialIndex_ZZ] += virial_zz;
02010   )
02011   )
02012   FULL
02013   (
02014   ENERGY( reduction[fullElectEnergyIndex] += fullElectEnergy; )
02015   ALCH
02016   (
02017     FEP( reduction[fullElectEnergyIndex_s] += fullElectEnergy_s; )
02018     TI(reduction[fullElectEnergyIndex_ti_1] += fullElectEnergy_ti_1;) 
02019     TI(reduction[fullElectEnergyIndex_ti_2] += fullElectEnergy_ti_2;) 
02020   )
02021   reduction[fullElectVirialIndex_XX] += fullElectVirial_xx;
02022   reduction[fullElectVirialIndex_XY] += fullElectVirial_xy;
02023   reduction[fullElectVirialIndex_XZ] += fullElectVirial_xz;
02024   reduction[fullElectVirialIndex_YX] += fullElectVirial_xy;
02025   reduction[fullElectVirialIndex_YY] += fullElectVirial_yy;
02026   reduction[fullElectVirialIndex_YZ] += fullElectVirial_yz;
02027   reduction[fullElectVirialIndex_ZX] += fullElectVirial_xz;
02028   reduction[fullElectVirialIndex_ZY] += fullElectVirial_yz;
02029   reduction[fullElectVirialIndex_ZZ] += fullElectVirial_zz;
02030   )
02031 
02032   delete [] excl_flags_buff;
02033 
02034 }
02035 

Generated on Fri May 25 04:07:14 2012 for NAMD by  doxygen 1.3.9.1