Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

ComputeNonbondedBase.h

Go to the documentation of this file.
00001 
00007 // Several special cases are defined:
00008 //   NBTYPE: exclusion method (NBPAIR, NBSELF -- mutually exclusive)
00009 //   FULLELECT full electrostatics calculation?
00010 
00011 #ifdef ARCH_POWERPC
00012 #include <builtins.h>
00013 #endif
00014 
00015 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
00016 #include <emmintrin.h>  // We're using SSE2 intrinsics
00017 #if defined(__INTEL_COMPILER)
00018 #define __align(X) __declspec(align(X) )
00019 #elif defined(__GNUC__) || defined(__PGI)
00020 #define __align(X)  __attribute__((aligned(X) ))
00021 #else
00022 #define __align(X) __declspec(align(X) )
00023 #endif
00024 #endif
00025 
00026 #ifdef DEFINITION // (
00027   #include "LJTable.h"
00028   #include "Molecule.h"
00029   #include "ComputeNonbondedUtil.h"
00030 #endif // )
00031 
00032 #if NAMD_ComputeNonbonded_SortAtoms != 0
00033   #include "PatchMap.h"
00034 #endif
00035 
00036 // determining class name
00037 #undef NAME
00038 #undef CLASS
00039 #undef CLASSNAME
00040 #define NAME CLASSNAME(calc)
00041 
00042 #undef PAIR
00043 #if NBTYPE == NBPAIR
00044   #define PAIR(X) X
00045   #define CLASS ComputeNonbondedPair
00046   #define CLASSNAME(X) ENERGYNAME( X ## _pair )
00047 #else
00048   #define PAIR(X)
00049 #endif
00050 
00051 #undef SELF
00052 #if NBTYPE == NBSELF
00053   #define SELF(X) X
00054   #define CLASS ComputeNonbondedSelf
00055   #define CLASSNAME(X) ENERGYNAME( X ## _self )
00056 #else
00057   #define SELF(X)
00058 #endif
00059 
00060 #undef ENERGYNAME
00061 #undef ENERGY
00062 #undef NOENERGY
00063 #ifdef CALCENERGY
00064   #define ENERGY(X) X
00065   #define NOENERGY(X)
00066   #define ENERGYNAME(X) SLOWONLYNAME( X ## _energy )
00067 #else
00068   #define ENERGY(X)
00069   #define NOENERGY(X) X
00070   #define ENERGYNAME(X) SLOWONLYNAME( X )
00071 #endif
00072 
00073 #undef SLOWONLYNAME
00074 #undef FAST
00075 #ifdef SLOWONLY
00076   #define FAST(X)
00077   #define SLOWONLYNAME(X) MERGEELECTNAME( X ## _slow )
00078 #else
00079   #define FAST(X) X
00080   #define SLOWONLYNAME(X) MERGEELECTNAME( X )
00081 #endif
00082 
00083 #undef MERGEELECTNAME
00084 #undef SHORT
00085 #undef NOSHORT
00086 #ifdef MERGEELECT
00087   #define SHORT(X)
00088   #define NOSHORT(X) X
00089   #define MERGEELECTNAME(X) FULLELECTNAME( X ## _merge )
00090 #else
00091   #define SHORT(X) X
00092   #define NOSHORT(X)
00093   #define MERGEELECTNAME(X) FULLELECTNAME( X )
00094 #endif
00095 
00096 #undef FULLELECTNAME
00097 #undef FULL
00098 #undef NOFULL
00099 #ifdef FULLELECT
00100   #define FULLELECTNAME(X) TABENERGYNAME( X ## _fullelect )
00101   #define FULL(X) X
00102   #define NOFULL(X)
00103 #else
00104   #define FULLELECTNAME(X) TABENERGYNAME( X )
00105   #define FULL(X)
00106   #define NOFULL(X) X
00107 #endif
00108 
00109 #undef TABENERGYNAME
00110 #undef TABENERGY
00111 #undef NOTABENERGY
00112 #ifdef TABENERGYFLAG
00113   #define TABENERGYNAME(X) FEPNAME( X ## _tabener )
00114   #define TABENERGY(X) X
00115   #define NOTABENERGY(X)
00116 #else
00117   #define TABENERGYNAME(X) FEPNAME( X )
00118   #define TABENERGY(X)
00119   #define NOTABENERGY(X) X
00120 #endif
00121 
00122 // Here are what these macros stand for:
00123 // FEP/NOT_FEP: FEP free energy perturbation is active/inactive 
00124 //      (does NOT use LAM)
00125 // LES: locally-enhanced sampling is active
00126 // LAM: scale the potential by a factor lambda (except FEP)
00127 // INT: measure interaction energies
00128 // PPROF: pressure profiling
00129 
00130 #undef FEPNAME
00131 #undef FEP
00132 #undef LES
00133 #undef INT
00134 #undef PPROF
00135 #undef LAM
00136 #undef CUDA
00137 #undef ALCH
00138 #undef TI
00139 #define FEPNAME(X) LAST( X )
00140 #define FEP(X)
00141 #define ALCHPAIR(X)
00142 #define NOT_ALCHPAIR(X) X
00143 #define LES(X)
00144 #define INT(X)
00145 #define PPROF(X)
00146 #define LAM(X)
00147 #define CUDA(X)
00148 #define ALCH(X)
00149 #define TI(X)
00150 #ifdef FEPFLAG
00151   #undef FEPNAME
00152   #undef FEP
00153   #undef ALCH
00154   #define FEPNAME(X) LAST( X ## _fep )
00155   #define FEP(X) X
00156   #define ALCH(X) X
00157 #endif
00158 #ifdef TIFLAG
00159   #undef FEPNAME
00160   #undef TI
00161   #undef ALCH
00162   #define FEPNAME(X) LAST( X ## _ti )
00163   #define TI(X) X
00164   #define ALCH(X) X
00165 #endif
00166 #ifdef LESFLAG
00167   #undef FEPNAME
00168   #undef LES
00169   #undef LAM
00170   #define FEPNAME(X) LAST( X ## _les )
00171   #define LES(X) X
00172   #define LAM(X) X
00173 #endif
00174 #ifdef INTFLAG
00175   #undef FEPNAME
00176   #undef INT
00177   #define FEPNAME(X) LAST( X ## _int )
00178   #define INT(X) X
00179 #endif
00180 #ifdef PPROFFLAG
00181   #undef FEPNAME
00182   #undef INT
00183   #undef PPROF
00184   #define FEPNAME(X) LAST( X ## _pprof )
00185   #define INT(X) X
00186   #define PPROF(X) X
00187 #endif
00188 #ifdef NAMD_CUDA
00189   #undef CUDA
00190   #define CUDA(X) X
00191 #endif
00192 
00193 #define LAST(X) X
00194 
00195 // see if things are really messed up
00196 SELF( PAIR( foo bar ) )
00197 LES( FEP( foo bar ) )
00198 LES( INT( foo bar ) )
00199 FEP( INT( foo bar ) )
00200 LAM( INT( foo bar ) )
00201 FEP( NOENERGY( foo bar ) )
00202 ENERGY( NOENERGY( foo bar ) )
00203 TABENERGY(NOTABENERGY( foo bar ) )
00204 
00205 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00206   #define COMPONENT_DOTPRODUCT(A,B)  ((A##_x * B##_x) + (A##_y * B##_y) + (A##_z * B##_z))
00207 #endif
00208 
00209 
00210 // ************************************************************
00211 // function header
00212 void ComputeNonbondedUtil :: NAME
00213   ( nonbonded *params )
00214 
00215 // function body
00216 {
00217   // int NAME;  // to track errors via unused variable warnings
00218 
00219   if ( ComputeNonbondedUtil::commOnly ) return;
00220 
00221 #ifdef NAMD_CUDA
00222   NOENERGY(return;)
00223 #endif
00224 
00225   // speedup variables
00226   BigReal *reduction = params->reduction;
00227   SimParameters *simParams = Node::Object()->simParameters;
00228 
00229   PPROF(
00230   BigReal *pressureProfileReduction = params->pressureProfileReduction;
00231   const BigReal invThickness = 1.0 / pressureProfileThickness;
00232   )
00233 
00234   Pairlists &pairlists = *(params->pairlists);
00235 #ifdef NAMD_CUDA
00236   int savePairlists = 0;
00237   int usePairlists = 0;
00238 #else
00239   int savePairlists = params->savePairlists;
00240   int usePairlists = params->usePairlists;
00241 #endif
00242   pairlists.reset();
00243   // PAIR(iout << "--------\n" << endi;)
00244 
00245   // local variables
00246   int exclChecksum = 0;
00247   FAST
00248   (
00249   ENERGY( BigReal vdwEnergy = 0; )
00250   SHORT
00251   (
00252   ENERGY( BigReal electEnergy = 0; )
00253   )
00254 
00255   FEP
00256   (
00257   ENERGY( BigReal vdwEnergy_s = 0; )
00258   SHORT
00259   (
00260   ENERGY( BigReal electEnergy_s = 0; )
00261   )
00262   )
00263   
00264   SHORT
00265   (
00266   BigReal virial_xx = 0;
00267   BigReal virial_xy = 0;
00268   BigReal virial_xz = 0;
00269   BigReal virial_yy = 0;
00270   BigReal virial_yz = 0;
00271   BigReal virial_zz = 0;
00272   )
00273   )
00274   FULL
00275   (
00276   ENERGY( BigReal fullElectEnergy = 0; )
00277   FEP
00278   (
00279   ENERGY( BigReal fullElectEnergy_s = 0; )
00280   )
00281   BigReal fullElectVirial_xx = 0;
00282   BigReal fullElectVirial_xy = 0;
00283   BigReal fullElectVirial_xz = 0;
00284   BigReal fullElectVirial_yy = 0;
00285   BigReal fullElectVirial_yz = 0;
00286   BigReal fullElectVirial_zz = 0;
00287   )
00288 
00289   // Bringing stuff into local namespace for speed.
00290 
00291   const BigReal offset_x = params->offset.x;
00292   const BigReal offset_y = params->offset.y;
00293   const BigReal offset_z = params->offset.z;
00294 
00295   register const BigReal plcutoff2 = \
00296                         params->plcutoff * params->plcutoff;
00297   register const BigReal groupplcutoff2 = \
00298                         params->groupplcutoff * params->groupplcutoff;
00299   const BigReal dielectric_1 = ComputeNonbondedUtil:: dielectric_1;
00300   const LJTable* const ljTable = ComputeNonbondedUtil:: ljTable;
00301   LJTable::TableEntry ljNull;  ljNull.A = 0; ljNull.B = 0; TABENERGY(ljNull.tabletype=-1;)
00302   const LJTable::TableEntry* const lj_null_pars = &ljNull;
00303   const Molecule* const mol = ComputeNonbondedUtil:: mol;
00304   SHORT
00305   (
00306   const BigReal* const table_four = ComputeNonbondedUtil:: table_short;
00307   )
00308   FULL
00309   (
00310   SHORT
00311   (
00312   const BigReal* const slow_table = ComputeNonbondedUtil:: slow_table;
00313   )
00314   NOSHORT
00315   (
00316 //#if 1 ALCH(-1)
00317   const BigReal* const table_four = ComputeNonbondedUtil:: table_noshort;
00318 //#else  // have to switch this for ALCH
00319 //  BigReal* table_four = ComputeNonbondedUtil:: table_noshort;
00320 //#endif
00321   )
00322   )
00323   const BigReal scaling = ComputeNonbondedUtil:: scaling;
00324   const BigReal modf_mod = 1.0 - scale14;
00325   FAST
00326   (
00327   const BigReal switchOn2 = ComputeNonbondedUtil:: switchOn2;
00328   const BigReal c1 = ComputeNonbondedUtil:: c1;
00329   const BigReal c3 = ComputeNonbondedUtil:: c3;
00330   )
00331   const BigReal r2_delta = ComputeNonbondedUtil:: r2_delta;
00332   const int r2_delta_exp = ComputeNonbondedUtil:: r2_delta_exp;
00333   // const int r2_delta_expc = 64 * (r2_delta_exp - 127);
00334   const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
00335 
00336   ALCH(
00337     const BigReal switchdist2 = ComputeNonbondedUtil::switchOn2;
00338     const BigReal cutoff2 = ComputeNonbondedUtil::cutoff2;
00339     const BigReal switchfactor = 1./((cutoff2 - switchdist2)*(cutoff2 - switchdist2)*(cutoff2 - switchdist2));
00340     const BigReal alchElecLambdaStart = ComputeNonbondedUtil::alchElecLambdaStart;
00341     const BigReal alchVdwLambdaEnd = ComputeNonbondedUtil::alchVdwLambdaEnd;
00342     const BigReal alchVdwShiftCoeff = ComputeNonbondedUtil::alchVdwShiftCoeff;
00343 
00344     /*lambda values 'up' are for atoms scaled up with lambda (partition 1)*/
00345     BigReal lambdaUp = ComputeNonbondedUtil::alchLambda;
00346     BigReal elecLambdaUp =  (lambdaUp <= alchElecLambdaStart)? 0. : \
00347               (lambdaUp - alchElecLambdaStart) / (1. - alchElecLambdaStart);
00348     BigReal vdwLambdaUp = 
00349         (lambdaUp >= alchVdwLambdaEnd)? 1. : lambdaUp / alchVdwLambdaEnd; 
00350     BigReal vdwShiftUp = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambdaUp);
00351     FEP(BigReal lambda2Up = ComputeNonbondedUtil::alchLambda2;)
00352     FEP(BigReal elecLambda2Up = (lambda2Up <= alchElecLambdaStart)? 0. : \
00353               (lambda2Up - alchElecLambdaStart) / (1. - alchElecLambdaStart);)
00354     FEP(BigReal vdwLambda2Up = 
00355         (lambda2Up >= alchVdwLambdaEnd)? 1. : lambda2Up / alchVdwLambdaEnd;) 
00356     FEP(BigReal vdwShift2Up = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambda2Up);)
00357 
00358         
00359     /*lambda values 'down' are for atoms scaled down with lambda (partition 2)*/
00360     BigReal lambdaDown = 1 - ComputeNonbondedUtil::alchLambda;
00361     BigReal elecLambdaDown =  (lambdaDown <= alchElecLambdaStart)? 0. : \
00362               (lambdaDown - alchElecLambdaStart) / (1. - alchElecLambdaStart);
00363     BigReal vdwLambdaDown = 
00364         (lambdaDown >= alchVdwLambdaEnd)? 1. : lambdaDown / alchVdwLambdaEnd; 
00365     BigReal vdwShiftDown = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambdaDown);
00366     FEP(BigReal lambda2Down = 1 - ComputeNonbondedUtil::alchLambda2;)
00367     FEP(BigReal elecLambda2Down = (lambda2Down <= alchElecLambdaStart)? 0. : \
00368         (lambda2Down - alchElecLambdaStart) / (1. - alchElecLambdaStart); )
00369     FEP(BigReal vdwLambda2Down = 
00370         (lambda2Down >= alchVdwLambdaEnd)? 1. : lambda2Down / alchVdwLambdaEnd; )
00371     FEP(BigReal vdwShift2Down = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambda2Down);)
00372 
00373   
00374   // Thermodynamic Integration Notes: 
00375   // Separation of atom pairs into different pairlist according to lambda
00376   // coupling, for alchemical free energy calculations. Normal pairlists
00377   // (pairlist[nxm]_save) are re-used for non-lambda-coupled pairs; new ones
00378   // (pairlist[nxm][12]_save are created for atoms switched up or down with
00379   // lambda respectively.
00380   // This makes TI coding far easier and more readable, since it's necessary 
00381   // to store dU/dlambda in different variables depending on whether atoms are
00382   // being switched up or down. Further, it allows the prior explicit coding of 
00383   // the separation-shifted vdW potential to stay in place without a big 
00384   // performance hit, since it's no longer necessary to calculate vdW potentials
00385   // explicity for the bulk (non-alchemical) interactions - so that part of the 
00386   // free energy code stays readable and easy to modify. Finally there should
00387   // be some performance gain over the old FEP implementation as we no
00388   // longer have to check the partitions of each atom pair and switch
00389   // parameters accordingly for every single NonbondedBase2.h loop - this is 
00390   // done at the pairlist level
00391   
00392   int pswitchTable[3*3];
00393   // determines which pairlist alchemical pairings get sent to
00394   // 0: non-alchemical pairs, partition 0 <-> partition 0
00395   // 1: atoms scaled up as lambda increases, p0<->p1
00396   // 2: atoms scaled down as lambda increases, p0<->p2
00397   // all p1<->p2 interactions to be dropped (99)
00398   // in general, 'up' refers to 1, 'down' refers to 2
00399   for (int ip=0; ip<3; ++ip) {
00400     for (int jp=0; jp<3; ++jp ) {
00401       pswitchTable[ip+3*jp] = 0;
00402       if ((ip==1 && jp==0) || (ip==0 && jp==1)) pswitchTable[ip+3*jp] = 1;
00403       if ((ip==2 && jp==0) || (ip==0 && jp==2)) pswitchTable[ip+3*jp] = 2;
00404       if (ip + jp == 3) pswitchTable[ip+3*jp] = 99; // no interaction
00405 
00406       if (! ComputeNonbondedUtil::alchDecouple) {
00407         // no decoupling: interactions within a partition are treated like
00408         // normal alchemical pairs
00409         if (ip == 1 && jp == 1) pswitchTable[ip+3*jp] = 1;
00410         if (ip == 2 && jp == 2) pswitchTable[ip+3*jp] = 2;
00411       }
00412       if (ComputeNonbondedUtil::alchDecouple) {
00413         // decoupling: PME calculates extra grids so that while PME 
00414         // interaction with the full system is switched off, a new PME grid
00415         // containing only alchemical atoms is switched on. Full interactions 
00416         // between alchemical atoms are maintained; potentials within one 
00417         // partition need not be scaled here.
00418         if (ip == 1 && jp == 1) pswitchTable[ip+3*jp] = 0;
00419         if (ip == 2 && jp == 2) pswitchTable[ip+3*jp] = 0;
00420       }
00421     }
00422   }
00423 
00424   // dU/dlambda variables for thermodynamic integration
00425   TI(
00426       BigReal vdwEnergy_ti_1 = 0;
00427       BigReal vdwEnergy_ti_2 = 0;
00428       SHORT(BigReal electEnergy_ti_1 = 0;
00429       BigReal electEnergy_ti_2 = 0;)
00430       FULL(BigReal fullElectEnergy_ti_1 = 0; 
00431       BigReal fullElectEnergy_ti_2 = 0;) 
00432    )
00433   )
00434         
00435         
00436   const int i_upper = params->numAtoms[0];
00437   register const int j_upper = params->numAtoms[1];
00438   register int j;
00439   register int i;
00440   const CompAtom *p_0 = params->p[0];
00441   const CompAtom *p_1 = params->p[1];
00442   const CompAtomExt *pExt_0 = params->pExt[0];
00443   const CompAtomExt *pExt_1 = params->pExt[1];
00444 
00445   char * excl_flags_buff = 0;
00446   const int32 * full_excl = 0;
00447   const int32 * mod_excl = 0;
00448 
00449   plint *pairlistn_save;  int npairn;
00450   plint *pairlistx_save;  int npairx;
00451   plint *pairlistm_save;  int npairm;
00452 
00453   ALCH(
00454   plint *pairlistnA1_save;  int npairnA1;
00455   plint *pairlistxA1_save;  int npairxA1;
00456   plint *pairlistmA1_save;  int npairmA1;
00457   plint *pairlistnA2_save;  int npairnA2;
00458   plint *pairlistxA2_save;  int npairxA2;
00459   plint *pairlistmA2_save;  int npairmA2;
00460   )
00461 
00462   NBWORKARRAYSINIT(params->workArrays);
00463 
00464   int arraysize = j_upper+5;
00465 
00466   NBWORKARRAY(plint,pairlisti,arraysize)
00467   NBWORKARRAY(BigReal,r2list,arraysize)
00468 
00469   union { double f; int32 i[2]; } byte_order_test;
00470   byte_order_test.f = 1.0;  // should occupy high-order bits only
00471   int32 *r2iilist = (int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
00472 
00473   if ( ! ( savePairlists || ! usePairlists ) ) arraysize = 0;
00474 
00475 
00476   // DMK - Atom Sort
00477   // 
00478   // Basic Idea: Determine the line between the center of masses of
00479   //   the two patches.  Project and then sort the lists of atoms
00480   //   along this line.  Then, as the pairlists are being generated
00481   //   for the atoms in the first atom list, use the sorted
00482   //   list to only add atoms from the second list that are between
00483   //   +/- ~cutoff from the atoms position on the line.
00484   #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00485 
00486   // NOTE: For the first atom list, just the sort values themselves need to be
00487   // calculated (a BigReal vs. SortEntry data structure).  However, a second
00488   // SortEntry array is needed to perform the merge sort on the second list of
00489   // atoms.  Use the atomSort_[0/1]_sortValues__ arrays to sort the second
00490   // list of atoms and then use the left over array to make a version of the
00491   // list that only includes non-fixed groups (fixg).
00492 
00493   NBWORKARRAY(SortEntry, atomSort_0_sortValues__, arraysize);
00494   NBWORKARRAY(SortEntry, atomSort_1_sortValues__, arraysize);
00495   NBWORKARRAY(BigReal, p_0_sortValues, arraysize);
00496 
00497   register SortEntry* p_1_sortValues = atomSort_0_sortValues__;
00498   register SortEntry* p_1_sortValues_fixg = atomSort_1_sortValues__;
00499 
00500   int p_0_sortValues_len = 0;
00501   int p_1_sortValues_len = 0;
00502   int p_1_sortValues_fixg_len = 0;
00503 
00504   // Calculate the distance between to projected points on the line that
00505   //   represents the cutoff distance.
00506   BigReal atomSort_windowRadius = sqrt(groupplcutoff2);
00507 
00508   if (savePairlists || !usePairlists) {
00509 
00510     register const BigReal projLineVec_x = params->projLineVec.x;
00511     register const BigReal projLineVec_y = params->projLineVec.y;
00512     register const BigReal projLineVec_z = params->projLineVec.z;
00513 
00514     // Calculate the sort values for the atoms in patch 1
00515     {
00516       register int j = 0;
00517       register int nbgs = p_1->nonbondedGroupSize;
00518       register BigReal p_x = p_1->position.x;
00519       register BigReal p_y = p_1->position.y;
00520       register BigReal p_z = p_1->position.z;
00521       register int index = j;
00522 
00523       while (j < j_upper) {
00524 
00525         j += nbgs;
00526 
00527         // Set p_j_next to point to the atom for the next iteration and begin
00528         //   loading the 'nbgs' value for that atom.
00529         register const CompAtom* p_j_next = p_1 + j;
00530         nbgs = p_j_next->nonbondedGroupSize;
00531 
00532         // Calculate the distance along the projection vector
00533         // NOTE: If the vector from the origin to the point is 'A' and the vector
00534         //   between the patches is 'B' then to project 'A' onto 'B' we take the dot
00535         //   product of the two vectors 'A dot B' divided by the length of 'B'.
00536         // So... projection of A onto B = (A dot B) / length(B), however, note
00537         //   that length(B) == 1, therefore the sort value is simply (A dot B)
00538         register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00539 
00540         // Start loading the next iteration's atom's position
00541         p_x = p_j_next->position.x;
00542         p_y = p_j_next->position.y;
00543         p_z = p_j_next->position.z;
00544 
00545         // Store the caclulated sort value into the array of sort values
00546         register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
00547         p_1_sortValStorePtr->index = index;
00548         p_1_sortValStorePtr->sortValue = sortVal;
00549         p_1_sortValues_len++;
00550 
00551         // Update index for the next iteration
00552         index = j;       
00553 
00554       } // end while (j < j_upper)
00555     }
00556 
00557     // NOTE: This list and another version of it with only non-fixed
00558     //   atoms will be used in place of grouplist and fixglist.
00559     #if 0   // Selection Sort
00560       sortEntries_selectionSort(p_1_sortValues, p_1_sortValues_len);
00561     #elif 0   // Bubble Sort
00562       sortEntries_bubbleSort(p_1_sortValues, p_1_sortValues_len);
00563     #else   // Merge Sort
00564       #if NAMD_ComputeNonbonded_SortAtoms_LessBranches == 0
00565         sortEntries_mergeSort_v1(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
00566       #else
00567         sortEntries_mergeSort_v2(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
00568       #endif
00569     #endif
00570 
00571     // Calculate the sort values for the atoms in patch 0
00572     {
00573       register int i = 0;
00574       register int nbgs = p_0->nonbondedGroupSize;
00575       register BigReal p_x = p_0->position.x + offset_x;
00576       register BigReal p_y = p_0->position.y + offset_y;
00577       register BigReal p_z = p_0->position.z + offset_z;
00578       register int index = i;
00579 
00580       while (i < i_upper) {
00581 
00582         i += nbgs;
00583 
00584         // Set p_i_next to point to the atom for the next iteration and begin
00585         //   loading the 'nbgs' value for that atom.
00586         register const CompAtom* p_i_next = p_0 + i;
00587         nbgs = p_i_next->nonbondedGroupSize;
00588 
00589         // Calculate the distance along the projection vector
00590         register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00591 
00592         // Start loading the next iteration's atom's position
00593         p_x = p_i_next->position.x + offset_x;
00594         p_y = p_i_next->position.y + offset_y;
00595         p_z = p_i_next->position.z + offset_z;
00596 
00597         // Store the calculated sort value into the array of sort values
00598         register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
00599         *p_0_sortValStorePtr = sortVal;
00600 
00601         // Update index for the next iteration
00602         index = i;
00603       }
00604 
00605       p_0_sortValues_len = i_upper;
00606     }
00607 
00608   }  // end if (savePairlists || !usePairlists)
00609 
00610   #endif  // NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00611 
00612   // Atom Sort : The grouplist and fixglist arrays are not needed when the
00613   //   the atom sorting code is in use.
00614   #if ! (NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) )
00615     NBWORKARRAY(plint,grouplist,arraysize);
00616     NBWORKARRAY(plint,fixglist,arraysize);
00617   #endif
00618 
00619   NBWORKARRAY(plint,goodglist,arraysize);
00620   NBWORKARRAY(plint,pairlistx,arraysize);
00621   NBWORKARRAY(plint,pairlistm,arraysize);
00622   NBWORKARRAY(plint,pairlist,arraysize);
00623   NBWORKARRAY(plint,pairlist2,arraysize);
00624   ALCH(
00625   NBWORKARRAY(plint,pairlistnAlch,arraysize);
00626   NBWORKARRAY(plint,pairlistnA0,arraysize);
00627   NBWORKARRAY(plint,pairlistnA1,arraysize);
00628   NBWORKARRAY(plint,pairlistxA1,arraysize);
00629   NBWORKARRAY(plint,pairlistmA1,arraysize);
00630   NBWORKARRAY(plint,pairlistnA2,arraysize);
00631   NBWORKARRAY(plint,pairlistxA2,arraysize);
00632   NBWORKARRAY(plint,pairlistmA2,arraysize);
00633   )
00634 
00635   NBWORKARRAY(short,vdwtype_array,j_upper+5);
00636   for (j = 0; j < j_upper; ++j)
00637     vdwtype_array [j] = p_1[j].vdwType;
00638 
00639   int fixg_upper = 0;
00640   int g_upper = 0;
00641 
00642   if ( savePairlists || ! usePairlists ) {
00643 
00644     #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00645 
00646       // Create a sorted list of non-fixed groups
00647       register int fixg = 0;
00648       for (int tmpI = 0; tmpI < p_1_sortValues_len; tmpI++) {
00649         register SortEntry* p_1_sortEntry = p_1_sortValues + tmpI;
00650         register int p_1_index = p_1_sortEntry->index;
00651         if (!pExt_1[p_1_index].groupFixed) {
00652           register SortEntry* p_1_sortEntry_fixg = p_1_sortValues_fixg + p_1_sortValues_fixg_len;
00653           p_1_sortEntry_fixg->index = p_1_sortEntry->index;
00654           p_1_sortEntry_fixg->sortValue = p_1_sortEntry->sortValue;
00655           p_1_sortValues_fixg_len++;
00656         }
00657       }
00658 
00659     #else
00660 
00661       register int g = 0;
00662       for ( j = 0; j < j_upper; ++j ) {
00663         if ( p_1[j].nonbondedGroupSize ) {
00664           grouplist[g++] = j;
00665         }
00666       }
00667       g_upper = g;
00668       if ( g_upper ) grouplist[g_upper] = grouplist[g_upper-1];
00669       int fixg = 0;
00670 
00671       if ( fixedAtomsOn ) {
00672         for ( g = 0; g < g_upper; ++g ) {
00673           j = grouplist[g];
00674           if ( ! pExt_1[j].groupFixed ) {
00675             fixglist[fixg++] = j;
00676           }
00677         }
00678       }
00679 
00680       fixg_upper = fixg;
00681       if ( fixg_upper ) fixglist[fixg_upper] = fixglist[fixg_upper-1];
00682 
00683     #endif // NAMD_ComputeNonbonded_SortAtoms != 0
00684 
00685     *(pairlists.newlist(1)) = i_upper;
00686     pairlists.newsize(1);
00687 
00688   } else { // if ( savePairlists || ! usePairlists )
00689 
00690     plint *i_upper_check;
00691     int i_upper_check_count;
00692     pairlists.nextlist(&i_upper_check,&i_upper_check_count);
00693     if ( i_upper_check[0] != i_upper )
00694       NAMD_bug("pairlist i_upper mismatch!");
00695 
00696   } // if ( savePairlists || ! usePairlists )
00697 
00698   SELF(
00699   int j_hgroup = 0;
00700   int g_lower = 0;
00701   int fixg_lower = 0;
00702   )
00703   int pairlistindex=0;
00704   int pairlistoffset=0;
00705 
00706   
00707 
00708 #if ( SHORT( FAST( 1+ ) ) 0 )
00709 #if ( PAIR( 1+ ) 0 )
00710     Force *f_0 = params->ff[0];
00711     Force *f_1 = params->ff[1];
00712 #else
00713 #define f_1 f_0
00714     NBWORKARRAY(Force,f_0,i_upper)
00715     memset( (void*) f_0, 0, i_upper * sizeof(Force) );
00716 #endif
00717 #endif
00718 #if ( FULL( 1+ ) 0 )
00719 #if ( PAIR( 1+ ) 0 )
00720     Force *fullf_0 = params->fullf[0];
00721     Force *fullf_1 = params->fullf[1];
00722 #else
00723 #define fullf_1 fullf_0
00724     NBWORKARRAY(Force,fullf_0,i_upper);
00725     memset( (void*) fullf_0, 0, i_upper * sizeof(Force) );
00726 #endif
00727 #endif
00728     
00729 
00730   int numParts = params->numParts;
00731   int myPart = params->minPart;
00732   int groupCount = 0;
00733 
00734   for ( i = 0; i < (i_upper SELF(- 1)); ++i )
00735   {
00736     const CompAtom &p_i = p_0[i];
00737     const CompAtomExt &pExt_i = pExt_0[i];
00738     if ( p_i.hydrogenGroupSize ) {
00739       //save current group count
00740       int curgrpcount = groupCount;      
00741       //increment group count
00742       groupCount ++;
00743 
00744       if (groupCount >= numParts)
00745         groupCount = 0;
00746       
00747       if ( curgrpcount != myPart ) {
00748         i += p_i.hydrogenGroupSize - 1;
00749         
00750         //Power PC alignment constraint
00751 #ifdef ARCH_POWERPC
00752         __dcbt((void *) &(p_0[i+1]));
00753 #endif
00754         continue;
00755       }
00756     }
00757 
00758     register const BigReal p_i_x = p_i.position.x + offset_x;
00759     register const BigReal p_i_y = p_i.position.y + offset_y;
00760     register const BigReal p_i_z = p_i.position.z + offset_z;
00761 
00762     ALCH(const int p_i_partition = p_i.partition;)
00763 
00764     PPROF(
00765         const int p_i_partition = p_i.partition;
00766         int n1 = (int)floor((p_i.position.z-pressureProfileMin)*invThickness);
00767         pp_clamp(n1, pressureProfileSlabs);
00768         )
00769 
00770   SELF ( if ( p_i.hydrogenGroupSize ) j_hgroup = i + p_i.hydrogenGroupSize; )
00771 
00772   if ( savePairlists || ! usePairlists ) {
00773 
00774     if ( ! savePairlists ) pairlists.reset();  // limit space usage
00775 
00776     #ifdef MEM_OPT_VERSION
00777     const ExclusionCheck *exclcheck = mol->get_excl_check_for_idx(pExt_i.exclId);        
00778     const int excl_min = pExt_i.id + exclcheck->min;
00779     const int excl_max = pExt_i.id + exclcheck->max;
00780     #else
00781     const ExclusionCheck *exclcheck = mol->get_excl_check_for_atom(pExt_i.id);
00782     const int excl_min = exclcheck->min;
00783     const int excl_max = exclcheck->max;
00784     #endif
00785     const char * excl_flags_var;
00786     if ( exclcheck->flags ) excl_flags_var = exclcheck->flags - excl_min;
00787     else {  // need to build list on the fly
00788 
00789     //TODO: Should change later!!!!!!!!!! --Chao Mei
00790     //Now just for the sake of passing compilation
00791     #ifndef MEM_OPT_VERSION 
00792       if ( excl_flags_buff ) {
00793         int nl,l;
00794         nl = full_excl[0] + 1;
00795         for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = 0;
00796         nl = mod_excl[0] + 1;
00797         for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = 0;
00798       } else {
00799         excl_flags_buff = new char[mol->numAtoms];
00800         memset( (void*) excl_flags_buff, 0, mol->numAtoms);
00801       }
00802       int nl,l;
00803       full_excl = mol->get_full_exclusions_for_atom(pExt_i.id);
00804       nl = full_excl[0] + 1;
00805       for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = EXCHCK_FULL;
00806       mod_excl = mol->get_mod_exclusions_for_atom(pExt_i.id);
00807       nl = mod_excl[0] + 1;
00808       for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = EXCHCK_MOD;
00809       excl_flags_var = excl_flags_buff;
00810     #endif
00811 
00812     }
00813     const char * const excl_flags = excl_flags_var;
00814 
00815   if ( p_i.nonbondedGroupSize ) {
00816 
00817     pairlistindex = 0;  // initialize with 0 elements
00818     pairlistoffset=0;
00819     const int groupfixed = ( fixedAtomsOn && pExt_i.groupFixed );
00820 
00821     // If patch divisions are not made by hydrogen groups, then
00822     // hydrogenGroupSize is set to 1 for all atoms.  Thus we can
00823     // carry on as if we did have groups - only less efficiently.
00824     // An optimization in this case is to not rebuild the temporary
00825     // pairlist but to include every atom in it.  This should be a
00826     // a very minor expense.
00827 
00828     SELF
00829     (
00830       if ( p_i.hydrogenGroupSize ) {
00831         // exclude child hydrogens of i
00832         // j_hgroup = i + p_i.hydrogenGroupSize;  (moved above)
00833         while ( g_lower < g_upper &&
00834                 grouplist[g_lower] < j_hgroup ) ++g_lower;
00835         while ( fixg_lower < fixg_upper &&
00836                 fixglist[fixg_lower] < j_hgroup ) ++fixg_lower;
00837       }
00838       // add all child or sister hydrogens of i
00839       for ( j = i + 1; j < j_hgroup; ++j ) {
00840         pairlist[pairlistindex++] = j;
00841       }
00842     )
00843 
00844     // add remaining atoms to pairlist via hydrogen groups
00845     register plint *pli = pairlist + pairlistindex;
00846 
00847     {
00848       // Atom Sort : Modify the values of g and gu based on the added information
00849       //   of the linear projections (sort values) information.
00850       #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00851 
00852         register int g = 0;
00853         register BigReal p_i_sortValue = p_0_sortValues[i];
00854         const BigReal p_i_sortValue_plus_windowRadius = p_i_sortValue + atomSort_windowRadius;
00855         register SortEntry* sortValues = ( groupfixed ? p_1_sortValues_fixg : p_1_sortValues );
00856 
00857         // Find the actual gu (upper bound in sorted list for this outer-loop atom) based on the sort values
00858         register int lower = 0;
00859         register int upper = (groupfixed ? p_1_sortValues_fixg_len : p_1_sortValues_len);
00860         while ((upper - lower) > 1) {
00861           register int j = ((lower + upper) >> 1);
00862           register BigReal jSortVal = sortValues[j].sortValue;
00863           if (jSortVal < p_i_sortValue_plus_windowRadius) {
00864             lower = j;
00865           } else {
00866             upper = j;
00867           }
00868         }
00869         const int gu = (sortValues[lower].sortValue >= p_i_sortValue_plus_windowRadius) ? lower : upper;
00870 
00871       #else
00872 
00873         register plint *gli = goodglist;
00874         const plint *glist = ( groupfixed ? fixglist : grouplist );
00875         SELF( const int gl = ( groupfixed ? fixg_lower : g_lower ); )
00876         const int gu = ( groupfixed ? fixg_upper : g_upper );
00877         register int g = PAIR(0) SELF(gl);
00878 
00879       #endif
00880 
00881 
00882       if ( g < gu ) {
00883         int hu = 0;
00884 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
00885         if ( gu - g  >  6 ) { 
00886 
00887           #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00888             register SortEntry* sortEntry0 = sortValues + g;
00889             register SortEntry* sortEntry1 = sortValues + g + 1;
00890             register int jprev0 = sortEntry0->index;
00891             register int jprev1 = sortEntry1->index;
00892           #else
00893             register int jprev0 = glist[g    ];
00894             register int jprev1 = glist[g + 1];
00895           #endif
00896           
00897           register int j0;
00898           register int j1;
00899 
00900           __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
00901           __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
00902           __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
00903 
00904           // these don't change here, so we could move them into outer scope
00905           const __m128d P_I_X = _mm_set1_pd(p_i_x);
00906           const __m128d P_I_Y = _mm_set1_pd(p_i_y);
00907           const __m128d P_I_Z = _mm_set1_pd(p_i_z);
00908  
00909           g += 2;
00910           for ( ; g < gu - 2; g +=2 ) {
00911             // compute 1d distance, 2-way parallel       
00912             j0     =  jprev0;
00913             j1     =  jprev1;
00914 
00915             __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
00916             __m128d R2_01 = _mm_mul_pd(T_01, T_01);
00917             T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
00918             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
00919             T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
00920             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
00921             
00922             #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00923               sortEntry0 = sortValues + g;
00924               sortEntry1 = sortValues + g + 1;
00925               jprev0 = sortEntry0->index;
00926               jprev1 = sortEntry1->index;
00927             #else
00928               jprev0     =  glist[g  ];
00929               jprev1     =  glist[g+1];
00930             #endif
00931            
00932             PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
00933             PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
00934             PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
00935 
00936             __align(16) double r2_01[2];
00937             _mm_store_pd(r2_01, R2_01); // 16-byte-aligned store
00938 
00939             // XXX these could possibly benefit from SSE-based conditionals
00940             bool test0 = ( r2_01[0] < groupplcutoff2 );
00941             bool test1 = ( r2_01[1] < groupplcutoff2 ); 
00942             
00943             //removing ifs benefits on many architectures
00944             //as the extra stores will only warm the cache up
00945             goodglist [ hu         ] = j0;
00946             goodglist [ hu + test0 ] = j1;
00947             
00948             hu += test0 + test1;
00949           }
00950           g-=2;
00951         }
00952 #else
00953         if ( gu - g  >  6 ) { 
00954 
00955           #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00956             register SortEntry* sortEntry0 = sortValues + g;
00957             register SortEntry* sortEntry1 = sortValues + g + 1;
00958             register int jprev0 = sortEntry0->index;
00959             register int jprev1 = sortEntry1->index;
00960           #else
00961             register int jprev0 = glist[g    ];
00962             register int jprev1 = glist[g + 1];
00963           #endif
00964           
00965           register  int j0; 
00966           register  int j1; 
00967           
00968           register  BigReal pj_x_0, pj_x_1; 
00969           register  BigReal pj_y_0, pj_y_1; 
00970           register  BigReal pj_z_0, pj_z_1; 
00971           register  BigReal t_0, t_1, r2_0, r2_1;
00972           
00973           pj_x_0 = p_1[jprev0].position.x;
00974           pj_x_1 = p_1[jprev1].position.x;  
00975           
00976           pj_y_0 = p_1[jprev0].position.y; 
00977           pj_y_1 = p_1[jprev1].position.y;  
00978           
00979           pj_z_0 = p_1[jprev0].position.z; 
00980           pj_z_1 = p_1[jprev1].position.z;
00981           
00982           g += 2;
00983           for ( ; g < gu - 2; g +=2 ) {
00984             // compute 1d distance, 2-way parallel       
00985             j0     =  jprev0;
00986             j1     =  jprev1;
00987             
00988             t_0    =  p_i_x - pj_x_0;
00989             t_1    =  p_i_x - pj_x_1;
00990             r2_0   =  t_0 * t_0;
00991             r2_1   =  t_1 * t_1;
00992             
00993             t_0    =  p_i_y - pj_y_0;
00994             t_1    =  p_i_y - pj_y_1;
00995             r2_0  +=  t_0 * t_0;
00996             r2_1  +=  t_1 * t_1;
00997             
00998             t_0    =  p_i_z - pj_z_0;
00999             t_1    =  p_i_z - pj_z_1;
01000             r2_0  +=  t_0 * t_0;
01001             r2_1  +=  t_1 * t_1;
01002             
01003             #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01004               sortEntry0 = sortValues + g;
01005               sortEntry1 = sortValues + g + 1;
01006               jprev0 = sortEntry0->index;
01007               jprev1 = sortEntry1->index;
01008             #else
01009               jprev0     =  glist[g  ];
01010               jprev1     =  glist[g+1];
01011             #endif
01012             
01013             pj_x_0     =  p_1[jprev0].position.x;
01014             pj_x_1     =  p_1[jprev1].position.x;
01015             pj_y_0     =  p_1[jprev0].position.y; 
01016             pj_y_1     =  p_1[jprev1].position.y;
01017             pj_z_0     =  p_1[jprev0].position.z; 
01018             pj_z_1     =  p_1[jprev1].position.z;
01019             
01020             bool test0 = ( r2_0 < groupplcutoff2 );
01021             bool test1 = ( r2_1 < groupplcutoff2 ); 
01022             
01023             //removing ifs benefits on many architectures
01024             //as the extra stores will only warm the cache up
01025             goodglist [ hu         ] = j0;
01026             goodglist [ hu + test0 ] = j1;
01027             
01028             hu += test0 + test1;
01029           }
01030           g-=2;
01031         }
01032 #endif
01033         
01034         for (; g < gu; g++) {
01035 
01036           #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01037             register SortEntry* sortEntry = sortValues + g;
01038             register int j = sortEntry->index;
01039           #else
01040             int j = glist[g];
01041           #endif
01042 
01043           BigReal p_j_x = p_1[j].position.x;
01044           BigReal p_j_y = p_1[j].position.y;
01045           BigReal p_j_z = p_1[j].position.z;
01046           
01047           BigReal r2 = p_i_x - p_j_x;
01048           r2 *= r2;
01049           BigReal t2 = p_i_y - p_j_y;
01050           r2 += t2 * t2;
01051           t2 = p_i_z - p_j_z;
01052           r2 += t2 * t2;
01053 
01054           if ( r2 <= groupplcutoff2 ) 
01055             goodglist[hu ++] = j; 
01056         }
01057 
01058         for ( int h=0; h<hu; ++h ) {
01059           int j = goodglist[h];
01060           int nbgs = p_1[j].nonbondedGroupSize;
01061           pli[0] = j;   // copy over the next four in any case
01062           pli[1] = j+1;
01063           pli[2] = j+2;
01064           pli[3] = j+3; // assume nbgs <= 4
01065           pli += nbgs;
01066         }
01067       }
01068     }
01069 
01070     pairlistindex = pli - pairlist;
01071     // make sure padded element on pairlist points to real data
01072     if ( pairlistindex ) {
01073        pairlist[pairlistindex] = pairlist[pairlistindex-1];
01074     } /* PAIR( else {  // skip empty loops if no pairs were found
01075        int nbgs = p_i.nonbondedGroupSize;
01076        i += nbgs - 1;
01077        continue;
01078     } ) */
01079   } // if i is hydrogen group parent
01080   SELF
01081     (
01082     // self-comparisions require list to be incremented
01083     // pair-comparisions use entire list (pairlistoffset is 0)
01084     else pairlistoffset++;
01085     )
01086 
01087     const int atomfixed = ( fixedAtomsOn && pExt_i.atomFixed );
01088 
01089     register plint *pli = pairlist2;
01090 #if 1 ALCH(-1)
01091     plint *pairlistn = pairlists.newlist(j_upper + 5 + 1 + 5) SELF(+ 1);
01092 #else
01093     plint *pairlistn = pairlistnAlch;
01094 #endif
01095     SELF( plint &pairlistn_skip = *(pairlistn-1); )
01096     register plint *plin = pairlistn;
01097 
01098 
01099     INT(
01100     if ( pairInteractionOn ) {
01101       const int ifep_type = p_i.partition;
01102       if (pairInteractionSelf) {
01103         if (ifep_type != 1) continue;
01104         for (int k=pairlistoffset; k<pairlistindex; k++) {
01105           j = pairlist[k];
01106           const int jfep_type = p_1[j].partition;
01107           // for pair-self, both atoms must be in group 1.
01108           if (jfep_type == 1) {
01109             *(pli++) = j;
01110           }
01111         }
01112       } else {
01113         if (ifep_type != 1 && ifep_type != 2) continue;
01114         for (int k=pairlistoffset; k<pairlistindex; k++) {
01115           j = pairlist[k];
01116           const int jfep_type = p_1[j].partition;
01117           // for pair, must have one from each group.
01118           if (ifep_type + jfep_type == 3) {
01119             *(pli++) = j;
01120           }
01121         }
01122       }
01123       int npair2_int = pli - pairlist2;
01124       pli = pairlist2;
01125       for (int k=0; k<npair2_int; k++) {
01126         j = pairlist2[k];
01127         BigReal p_j_x = p_1[j].position.x;
01128         BigReal r2 = p_i_x - p_j_x;
01129         r2 *= r2;
01130         BigReal p_j_y = p_1[j].position.y;
01131         BigReal t2 = p_i_y - p_j_y;
01132         r2 += t2 * t2;
01133         BigReal p_j_z = p_1[j].position.z;
01134         t2 = p_i_z - p_j_z;
01135         r2 += t2 * t2;
01136         if ( ( ! (atomfixed && pExt_1[j].atomFixed) ) && (r2 <= plcutoff2) ) {
01137           int atom2 = pExt_1[j].id;
01138           if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
01139           else *(plin++) = j;
01140         }
01141       }
01142     } else
01143     )
01144     if ( atomfixed ) {
01145       for (int k=pairlistoffset; k<pairlistindex; k++) {
01146         j = pairlist[k];
01147         BigReal p_j_x = p_1[j].position.x;
01148         BigReal r2 = p_i_x - p_j_x;
01149         r2 *= r2;
01150         BigReal p_j_y = p_1[j].position.y;
01151         BigReal t2 = p_i_y - p_j_y;
01152         r2 += t2 * t2;
01153         BigReal p_j_z = p_1[j].position.z;
01154         t2 = p_i_z - p_j_z;
01155         r2 += t2 * t2;
01156         if ( (! pExt_1[j].atomFixed) && (r2 <= plcutoff2) ) {
01157           int atom2 = pExt_1[j].id;
01158           if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
01159           else *(plin++) = j;
01160         }
01161       }
01162     } else {
01163       int k = pairlistoffset;
01164       int ku = pairlistindex;
01165       if ( k < ku ) {
01166 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
01167         if ( ku - k  >  6 ) {      
01168           register  int jprev0 = pairlist [k    ];
01169           register  int jprev1 = pairlist [k + 1];
01170           
01171           register  int j0; 
01172           register  int j1; 
01173 
01174           __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01175           __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01176           __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01177 
01178           // these don't change here, so we could move them into outer scope
01179           const __m128d P_I_X = _mm_set1_pd(p_i_x);
01180           const __m128d P_I_Y = _mm_set1_pd(p_i_y);
01181           const __m128d P_I_Z = _mm_set1_pd(p_i_z);
01182           
01183           int atom2_0 = pExt_1[jprev0].id;
01184           int atom2_1 = pExt_1[jprev1].id;
01185           
01186           k += 2;
01187           for ( ; k < ku - 2; k +=2 ) {
01188             // compute 1d distance, 2-way parallel       
01189             j0     =  jprev0;
01190             j1     =  jprev1;
01191             
01192             __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
01193             __m128d R2_01 = _mm_mul_pd(T_01, T_01);
01194             T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
01195             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01196             T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
01197             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01198             
01199             jprev0     =  pairlist[k];
01200             jprev1     =  pairlist[k+1];
01201             
01202             PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01203             PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01204             PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01205 
01206             __align(16) double r2_01[2];
01207             _mm_store_pd(r2_01, R2_01); // 16-byte-aligned store
01208             
01209             if (r2_01[0] <= plcutoff2) {
01210               if ( atom2_0 >= excl_min && atom2_0 <= excl_max ) 
01211                 *(pli++) = j0;
01212               else 
01213                 *(plin++) = j0;
01214             }
01215             atom2_0 = pExt_1[jprev0].id;
01216             
01217             if (r2_01[1] <= plcutoff2) {
01218               if ( atom2_1 >= excl_min && atom2_1 <= excl_max ) 
01219                 *(pli++) = j1;
01220               else 
01221                 *(plin++) = j1;
01222             }
01223             atom2_1 = pExt_1[jprev1].id;            
01224           }
01225           k-=2;
01226         }       
01227 #else
01228         if ( ku - k  >  6 ) {      
01229           register  int jprev0 = pairlist [k];
01230           register  int jprev1 = pairlist [k + 1];
01231           
01232           register  int j0; 
01233           register  int j1; 
01234           
01235           register  BigReal pj_x_0, pj_x_1; 
01236           register  BigReal pj_y_0, pj_y_1; 
01237           register  BigReal pj_z_0, pj_z_1; 
01238           register  BigReal t_0, t_1, r2_0, r2_1;
01239           
01240           pj_x_0 = p_1[jprev0].position.x;
01241           pj_x_1 = p_1[jprev1].position.x;  
01242           
01243           pj_y_0 = p_1[jprev0].position.y; 
01244           pj_y_1 = p_1[jprev1].position.y;  
01245           
01246           pj_z_0 = p_1[jprev0].position.z; 
01247           pj_z_1 = p_1[jprev1].position.z;
01248           
01249           int atom2_0 = pExt_1[jprev0].id;
01250           int atom2_1 = pExt_1[jprev1].id;
01251           
01252           k += 2;
01253           for ( ; k < ku - 2; k +=2 ) {
01254             // compute 1d distance, 2-way parallel       
01255             j0     =  jprev0;
01256             j1     =  jprev1;
01257             
01258             t_0    =  p_i_x - pj_x_0;
01259             t_1    =  p_i_x - pj_x_1;
01260             r2_0   =  t_0 * t_0;
01261             r2_1   =  t_1 * t_1;
01262             
01263             t_0    =  p_i_y - pj_y_0;
01264             t_1    =  p_i_y - pj_y_1;
01265             r2_0  +=  t_0 * t_0;
01266             r2_1  +=  t_1 * t_1;
01267             
01268             t_0    =  p_i_z - pj_z_0;
01269             t_1    =  p_i_z - pj_z_1;
01270             r2_0  +=  t_0 * t_0;
01271             r2_1  +=  t_1 * t_1;
01272             
01273             jprev0     =  pairlist[k];
01274             jprev1     =  pairlist[k+1];
01275             
01276             pj_x_0     =  p_1[jprev0].position.x;
01277             pj_x_1     =  p_1[jprev1].position.x;
01278             pj_y_0     =  p_1[jprev0].position.y; 
01279             pj_y_1     =  p_1[jprev1].position.y;
01280             pj_z_0     =  p_1[jprev0].position.z; 
01281             pj_z_1     =  p_1[jprev1].position.z;
01282             
01283             if (r2_0 <= plcutoff2) {
01284               if ( atom2_0 >= excl_min && atom2_0 <= excl_max ) 
01285                 *(pli++) = j0;
01286               else 
01287                 *(plin++) = j0;
01288             }
01289             atom2_0 = pExt_1[jprev0].id;
01290             
01291             if (r2_1 <= plcutoff2) {
01292               if ( atom2_1 >= excl_min && atom2_1 <= excl_max ) 
01293                 *(pli++) = j1;
01294               else 
01295                 *(plin++) = j1;
01296              }
01297             atom2_1 = pExt_1[jprev1].id;            
01298           }
01299           k-=2;
01300         }       
01301 #endif
01302 
01303         for (; k < ku; k++) {
01304           int j = pairlist[k];
01305           int atom2 = pExt_1[j].id;
01306           
01307           BigReal p_j_x = p_1[j].position.x;
01308           BigReal p_j_y = p_1[j].position.y;
01309           BigReal p_j_z = p_1[j].position.z;
01310           
01311           BigReal r2 = p_i_x - p_j_x;
01312           r2 *= r2;
01313           BigReal t2 = p_i_y - p_j_y;
01314           r2 += t2 * t2;
01315           t2 = p_i_z - p_j_z;
01316           r2 += t2 * t2;
01317           
01318           if (r2 <= plcutoff2) {
01319             if ( atom2 >= excl_min && atom2 <= excl_max ) 
01320               *(pli++) = j;
01321             else 
01322               *(plin++) = j;
01323           }
01324         }
01325       }
01326     }
01327     int npair2 = pli - pairlist2;
01328     if ( npair2 ) pairlist2[npair2] = pairlist2[npair2-1];
01329 
01330     plint *plix = pairlistx;
01331     plint *plim = pairlistm;
01332     plint *pln = pairlistn;
01333     ALCH(
01334     plint *plinA1 = pairlistnA1;
01335     plint *plixA1 = pairlistxA1;
01336     plint *plimA1 = pairlistmA1;
01337     plint *plinA2 = pairlistnA2;
01338     plint *plixA2 = pairlistxA2;
01339     plint *plimA2 = pairlistmA2;
01340     )
01341     int k=0;
01342 #if 1 ALCH(-1)
01343     SELF(
01344     for (; pln < plin && *pln < j_hgroup; ++pln) {
01345       *(plix++) = *pln;  // --exclChecksum;
01346     }
01347     pairlistn_skip = pln - pairlistn;
01348     for (; k < npair2 && pairlist2[k] < j_hgroup; ++k) {
01349       *(plix++) = pairlist2[k];  // --exclChecksum;
01350     }
01351     )
01352 #endif
01353 ALCH(
01354     SELF(
01355     for (; pln < plin && *pln < j_hgroup; ++pln) {
01356       switch (pswitchTable[4*p_i_partition]) { //p_i_partition + 3*p_1[i].partition
01357       case 0: *(plix++) = *pln;  break;
01358       case 1: *(plixA1++) = *pln; break;
01359       case 2: *(plixA2++) = *pln; break;
01360       default: NAMD_die("Alchemical pairlist error"); break;
01361       }
01362     }
01363     for (; k < npair2 && pairlist2[k] < j_hgroup; ++k) {
01364       switch (pswitchTable[4*p_i_partition]) {
01365       case 0: *(plix++) = pairlist2[k];  break;
01366       case 1: *(plixA1++) = pairlist2[k]; break;
01367       case 2: *(plixA2++) = pairlist2[k]; break;
01368       default: NAMD_die("Alchemical pairlist error"); break;
01369       }
01370     }
01371     )
01372 )
01373     for (; k < npair2; ++k ) {
01374       int j = pairlist2[k];
01375       int atom2 = pExt_1[j].id;
01376       int excl_flag = excl_flags[atom2];
01377       ALCH(int pswitch = pswitchTable[p_i_partition + 3*(p_1[j].partition)];)
01378       switch ( excl_flag ALCH( + 3 * pswitch)) {
01379       case 0:  *(plin++) = j;  break;
01380       case 1:  *(plix++) = j;  break;
01381       case 2:  *(plim++) = j;  break;
01382       ALCH(
01383       case 3:  *(plinA1++) = j; break;
01384       case 6:  *(plinA2++) = j; break;
01385       case 5:  *(plimA1++) = j; break;
01386       case 8:  *(plimA2++) = j; break;
01387       case 4:  *(plixA1++) = j; break;
01388       case 7:  *(plixA2++) = j; break;
01389       )
01390       }
01391     }
01392     // exclChecksum += (plix - pairlistx);
01393     // exclChecksum += (plim - pairlistm);
01394 
01395 
01396 #if 1 ALCH(-1)
01397     npairn = plin - pln;
01398     pairlistn_save = pln;
01399     pairlistn_save[npairn] = npairn ? pairlistn_save[npairn-1] : -1;
01400     pairlists.newsize(plin - pairlistn SELF(+ 1) + 1);
01401 #else
01402     plint *plinA0 = pairlistnA0;
01403     int unsortedNpairn = plin - pln;
01404     for ( k=0; k<unsortedNpairn; ++k ) {
01405       int j = pln[k];
01406       switch(pswitchTable[p_i_partition + 3*(p_1[j].partition)]) {
01407         case 0:  *(plinA0++) = j; break;
01408         case 1:  *(plinA1++) = j; break;
01409         case 2:  *(plinA2++) = j; break;
01410       }
01411     }
01412     
01413     npairn = plinA0 - pairlistnA0;
01414     // FB preallocation (incl extra for overhead) seems to be necessary
01415     pairlistn_save = pairlists.newlist(j_upper + 30);
01416     for ( k=0; k<npairn; ++k ) {
01417       pairlistn_save[k] = pairlistnA0[k];
01418     }
01419     pairlistn_save[k] = k ? pairlistn_save[k-1] : -1;
01420     pairlists.newsize(npairn + 1);
01421 
01422 #endif
01423     
01424     npairx = plix - pairlistx;
01425     pairlistx_save = pairlists.newlist(npairx + 1);
01426     for ( k=0; k<npairx; ++k ) {
01427       pairlistx_save[k] = pairlistx[k];
01428     }
01429     pairlistx_save[k] = k ? pairlistx_save[k-1] : -1;
01430     pairlists.newsize(npairx + 1);
01431 
01432     npairm = plim - pairlistm;
01433     pairlistm_save = pairlists.newlist(npairm + 1);
01434     for ( k=0; k<npairm; ++k ) {
01435       pairlistm_save[k] = pairlistm[k];
01436     }
01437     pairlistm_save[k] = k ? pairlistm_save[k-1] : -1;
01438     pairlists.newsize(npairm + 1);
01439 
01440     
01441 #if 0 ALCH(+1)
01442 #define PAIRLISTFROMARRAY(NPAIRS,PL1,PL2,PLSAVE) \
01443 { \
01444   (NPAIRS) = (PL2) - (PL1); \
01445   (PLSAVE) = pairlists.newlist((NPAIRS) + 1); \
01446   for ( k=0; k<(NPAIRS); ++k ) { \
01447     (PLSAVE)[k] = (PL1)[k]; \
01448   } \
01449   (PLSAVE)[k] = k ? (PLSAVE)[k-1] : -1; \
01450   pairlists.newsize((NPAIRS) + 1); \
01451 }
01452   
01453     PAIRLISTFROMARRAY(npairnA1,pairlistnA1,plinA1,pairlistnA1_save);
01454     PAIRLISTFROMARRAY(npairxA1,pairlistxA1,plixA1,pairlistxA1_save);
01455     PAIRLISTFROMARRAY(npairmA1,pairlistmA1,plimA1,pairlistmA1_save);
01456     PAIRLISTFROMARRAY(npairnA2,pairlistnA2,plinA2,pairlistnA2_save);
01457     PAIRLISTFROMARRAY(npairxA2,pairlistxA2,plixA2,pairlistxA2_save);
01458     PAIRLISTFROMARRAY(npairmA2,pairlistmA2,plimA2,pairlistmA2_save);
01459 #undef PAIRLISTFROMARRAY
01460 
01461 #endif
01462     
01463     
01464         // PAIR( iout << i << " " << i_upper << " save\n" << endi;)
01465   } else { // if ( savePairlists || ! usePairlists )
01466         // PAIR( iout << i << " " << i_upper << " use\n" << endi;)
01467 
01468     pairlists.nextlist(&pairlistn_save,&npairn);  --npairn;
01469     //if ( npairn > 1000 )
01470 //      iout << i << " " << i_upper << " " << npairn << " n\n" << endi;
01471 #if 1 ALCH(-1)
01472     SELF(
01473     int pairlistn_skip = *pairlistn_save;
01474     pairlistn_save += (pairlistn_skip + 1);
01475     npairn -= (pairlistn_skip + 1);
01476     )
01477 #endif
01478 
01479     pairlists.nextlist(&pairlistx_save,&npairx);  --npairx;
01480 
01481     //if ( npairx > 1000 )
01482 //      iout << i << " " << i_upper << " " << npairx << " x\n" << endi;
01483     // exclChecksum += npairx;
01484     pairlists.nextlist(&pairlistm_save,&npairm);  --npairm;
01485     ALCH(
01486     pairlists.nextlist(&pairlistnA1_save,&npairnA1);  --npairnA1;
01487     pairlists.nextlist(&pairlistxA1_save,&npairxA1);  --npairxA1;
01488     pairlists.nextlist(&pairlistmA1_save,&npairmA1);  --npairmA1;
01489     pairlists.nextlist(&pairlistnA2_save,&npairnA2);  --npairnA2;
01490     pairlists.nextlist(&pairlistxA2_save,&npairxA2);  --npairxA2;
01491     pairlists.nextlist(&pairlistmA2_save,&npairmA2);  --npairmA2;
01492     )
01493     //if ( npairm > 1000 )
01494 //      iout << i << " " << i_upper << " " << npairm << " m\n" << endi;
01495     // exclChecksum += npairm;
01496 
01497   } // if ( savePairlists || ! usePairlists )
01498 
01499     LES( BigReal *lambda_table_i =
01500                         lambda_table + (lesFactor+1) * p_i.partition; )
01501 
01502     INT(
01503     const BigReal force_sign = (
01504       ( pairInteractionOn && ! pairInteractionSelf ) ?
01505         ( ( p_i.partition == 1 ) ? 1. : -1. ) : 0.
01506     );
01507       
01508     )
01509 
01510     const BigReal kq_i = COLOUMB * p_i.charge * scaling * dielectric_1;
01511     const LJTable::TableEntry * const lj_row =
01512                 ljTable->table_row(p_i.vdwType);
01513 
01514     SHORT( FAST( BigReal f_i_x = 0.; ) )
01515     SHORT( FAST( BigReal f_i_y = 0.; ) )
01516     SHORT( FAST( BigReal f_i_z = 0.; ) )
01517     FULL( BigReal fullf_i_x = 0.; )
01518     FULL( BigReal fullf_i_y = 0.; )
01519     FULL( BigReal fullf_i_z = 0.; )
01520 
01521     int npairi;
01522     int k;
01523 
01524     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01525         p_i_x, p_i_y, p_i_z, p_1, pairlistn_save, npairn, pairlisti,
01526         r2_delta, r2list);
01527 
01528 #define NORMAL(X) X
01529 #define EXCLUDED(X)
01530 #define MODIFIED(X)
01531 #include  "ComputeNonbondedBase2.h"
01532 #undef NORMAL
01533 #undef EXCLUDED
01534 #undef MODIFIED
01535 
01536     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01537         p_i_x, p_i_y, p_i_z, p_1, pairlistm_save, npairm, pairlisti,
01538         r2_delta, r2list);
01539     exclChecksum += npairi;
01540 
01541 #define NORMAL(X)
01542 #define EXCLUDED(X)
01543 #define MODIFIED(X) X
01544 #include  "ComputeNonbondedBase2.h"
01545 #undef NORMAL
01546 #undef EXCLUDED
01547 #undef MODIFIED
01548 
01549 #ifdef FULLELECT
01550     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01551         p_i_x, p_i_y, p_i_z, p_1, pairlistx_save, npairx, pairlisti,
01552         r2_delta, r2list);
01553     exclChecksum += npairi;
01554 #ifndef NAMD_CUDA
01555     SELF(
01556     for (k=0; k<npairi && pairlisti[k] < j_hgroup; ++k) --exclChecksum;
01557     )
01558 #endif
01559 
01560 #undef FAST
01561 #define FAST(X)
01562 #define NORMAL(X)
01563 #define EXCLUDED(X) X
01564 #define MODIFIED(X)
01565 #include  "ComputeNonbondedBase2.h"
01566 #undef FAST
01567 #ifdef SLOWONLY
01568   #define FAST(X)
01569 #else
01570   #define FAST(X) X
01571 #endif
01572 #undef NORMAL
01573 #undef EXCLUDED
01574 #undef MODIFIED
01575 #else
01576     exclChecksum += npairx;
01577 #ifndef NAMD_CUDA
01578     SELF(
01579       for (k=0; k<npairx && pairlistx_save[k] < j_hgroup; ++k) --exclChecksum;
01580     )
01581 #endif
01582 #endif
01583 
01584 #if 0 ALCH(+1)   // nonbondedbase2 for alchemical-separeted pairlists
01585 
01586     #undef ALCHPAIR
01587     #define ALCHPAIR(X) X
01588     #undef NOT_ALCHPAIR
01589     #define NOT_ALCHPAIR(X)
01590     BigReal myLambda; FEP(BigReal myLambda2;)
01591     BigReal myElecLambda;  FEP(BigReal myElecLambda2;)
01592     BigReal myVdwLambda; FEP(BigReal myVdwLambda2;)
01593     BigReal myVdwShift; FEP(BigReal myVdwShift2;)
01594     BigReal alch_vdw_energy; BigReal alch_vdw_force;
01595     FEP(BigReal alch_vdw_energy_2;) TI(BigReal alch_vdw_dUdl;)
01596     BigReal shiftedElec; BigReal shiftedElecForce;
01597     
01598     /********************************************************************/
01599     /*******NONBONDEDBASE2 FOR NORMAL INTERACTIONS SCALED BY LAMBDA******/
01600     /********************************************************************/
01601     #define NORMAL(X) X
01602     #define EXCLUDED(X)
01603     #define MODIFIED(X)
01604 
01605     #define ALCH1(X) X
01606     #define ALCH2(X)
01607     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01608             p_i_x, p_i_y, p_i_z, p_1, pairlistnA1_save, npairnA1, pairlisti,
01609             r2_delta, r2list);
01610     #include  "ComputeNonbondedBase2.h" // normal, direction 'up'
01611     #undef ALCH1
01612     #undef ALCH2
01613 
01614     #define ALCH1(X)
01615     #define ALCH2(X) X
01616     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01617             p_i_x, p_i_y, p_i_z, p_1, pairlistnA2_save, npairnA2, pairlisti,
01618             r2_delta, r2list);
01619     #include  "ComputeNonbondedBase2.h" // normal, direction 'down'
01620     #undef ALCH1
01621     #undef ALCH2
01622 
01623     #undef NORMAL
01624     #undef EXCLUDED
01625     #undef MODIFIED
01626     
01627     /********************************************************************/
01628     /******NONBONDEDBASE2 FOR MODIFIED INTERACTIONS SCALED BY LAMBDA*****/
01629     /********************************************************************/
01630     #define NORMAL(X)
01631     #define EXCLUDED(X)
01632     #define MODIFIED(X) X
01633 
01634     #define ALCH1(X) X
01635     #define ALCH2(X)
01636     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01637             p_i_x, p_i_y, p_i_z, p_1, pairlistmA1_save, npairmA1, pairlisti,
01638             r2_delta, r2list);
01639         exclChecksum += npairi;
01640     #include  "ComputeNonbondedBase2.h" // modified, direction 'up'
01641     #undef ALCH1
01642     #undef ALCH2
01643 
01644     #define ALCH1(X)
01645     #define ALCH2(X) X
01646     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01647             p_i_x, p_i_y, p_i_z, p_1, pairlistmA2_save, npairmA2, pairlisti,
01648             r2_delta, r2list);
01649         exclChecksum += npairi;
01650     #include  "ComputeNonbondedBase2.h" // modified, direction 'down'
01651     #undef ALCH1
01652     #undef ALCH2
01653 
01654     #undef NORMAL
01655     #undef EXCLUDED
01656     #undef MODIFIED
01657     
01658     /********************************************************************/
01659     /******NONBONDEDBASE2 FOR EXCLUDED INTERACTIONS SCALED BY LAMBDA*****/
01660     /********************************************************************/
01661     #ifdef FULLELECT
01662     #undef FAST
01663     #define FAST(X)
01664     #define NORMAL(X)
01665     #define EXCLUDED(X) X
01666     #define MODIFIED(X)
01667 
01668     #define ALCH1(X) X
01669     #define ALCH2(X)
01670     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01671             p_i_x, p_i_y, p_i_z, p_1, pairlistxA1_save, npairxA1, pairlisti,
01672             r2_delta, r2list);
01673         exclChecksum += npairi;
01674         SELF(
01675         for (k=0; k<npairi && pairlisti[k] < j_hgroup; ++k) --exclChecksum;
01676         )
01677     #include  "ComputeNonbondedBase2.h"  //excluded, direction 'up'
01678     #undef ALCH1
01679     #undef ALCH2
01680 
01681     #define ALCH1(X)
01682     #define ALCH2(X) X
01683     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01684             p_i_x, p_i_y, p_i_z, p_1, pairlistxA2_save, npairxA2, pairlisti,
01685             r2_delta, r2list);
01686         exclChecksum += npairi;
01687         SELF(
01688         for (k=0; k<npairi && pairlisti[k] < j_hgroup; ++k) --exclChecksum;
01689         )
01690     #include  "ComputeNonbondedBase2.h"  //excluded, direction 'down'
01691     #undef ALCH1
01692     #undef ALCH2
01693 
01694     #undef FAST
01695     #ifdef SLOWONLY
01696       #define FAST(X)
01697     #else
01698       #define FAST(X) X
01699     #endif
01700     #undef NORMAL
01701     #undef EXCLUDED
01702     #undef MODIFIED
01703     #else
01704         exclChecksum += npairxA1 + npairxA2;
01705         SELF(
01706           for (k=0; k<npairxA1 && pairlistxA1_save[k] < j_hgroup; ++k) --exclChecksum;
01707           for (k=0; k<npairxA2 && pairlistxA2_save[k] < j_hgroup; ++k) --exclChecksum;
01708         )
01709     #endif
01710 
01711     #undef ALCHPAIR
01712     #define ALCHPAIR(X)
01713     #undef NOT_ALCHPAIR
01714     #define NOT_ALCHPAIR(X) X
01715 
01716 #endif // end nonbondedbase2.h includes for alchemical pairlists
01717 
01718 #ifdef NETWORK_PROGRESS
01719     CkNetworkProgress();
01720 #endif
01721 
01722 #ifdef ARCH_POWERPC
01723     //data cache block touch the position structure
01724     __dcbt ((void *) &(p_0[i+1]));
01725     __prefetch_by_load ((void *)&(groupCount));
01726 #endif
01727 
01728 #ifndef NAMD_CUDA
01729     SHORT( FAST( f_0[i].x += f_i_x; ) )
01730     SHORT( FAST( f_0[i].y += f_i_y; ) )
01731     SHORT( FAST( f_0[i].z += f_i_z; ) )
01732     FULL( fullf_0[i].x += fullf_i_x; )
01733     FULL( fullf_0[i].y += fullf_i_y; )
01734     FULL( fullf_0[i].z += fullf_i_z; )
01735 #endif
01736 
01737         // PAIR( iout << i << " " << i_upper << " end\n" << endi;)
01738   } // for i
01739 
01740   // PAIR(iout << "++++++++\n" << endi;)
01741 
01742 #ifdef f_1
01743 #undef f_1
01744 #endif
01745 #if ( SHORT( FAST( 1+ ) ) 0 )
01746 #if ( SELF( 1+ ) 0 )
01747   {
01748     Force *patch_f_0 = params->ff[0];
01749 
01750 #ifndef NAMD_CUDA
01751 #ifndef ARCH_POWERPC 
01752 #pragma ivdep
01753 #endif
01754     for ( int i = 0; i < i_upper; ++i ) {
01755       patch_f_0[i].x += f_0[i].x;
01756       patch_f_0[i].y += f_0[i].y;
01757       patch_f_0[i].z += f_0[i].z;
01758       virial_xx += f_0[i].x * p_0[i].position.x;
01759       virial_xy += f_0[i].x * p_0[i].position.y;
01760       virial_xz += f_0[i].x * p_0[i].position.z;
01761       virial_yy += f_0[i].y * p_0[i].position.y;
01762       virial_yz += f_0[i].y * p_0[i].position.z;
01763       virial_zz += f_0[i].z * p_0[i].position.z;
01764     }
01765 #endif
01766   }
01767 #endif
01768 #endif
01769 #ifdef fullf_1
01770 #undef fullf_1
01771 #endif
01772 #if ( FULL( 1+ ) 0 )
01773 #if ( SELF( 1+ ) 0 )
01774   {
01775     Force *patch_fullf_0 = params->fullf[0];
01776 #ifndef NAMD_CUDA
01777 #ifndef ARCH_POWERPC 
01778 #pragma ivdep
01779 #endif
01780     for ( int i = 0; i < i_upper; ++i ) {
01781       patch_fullf_0[i].x += fullf_0[i].x;
01782       patch_fullf_0[i].y += fullf_0[i].y;
01783       patch_fullf_0[i].z += fullf_0[i].z;
01784       fullElectVirial_xx += fullf_0[i].x * p_0[i].position.x;
01785       fullElectVirial_xy += fullf_0[i].x * p_0[i].position.y;
01786       fullElectVirial_xz += fullf_0[i].x * p_0[i].position.z;
01787       fullElectVirial_yy += fullf_0[i].y * p_0[i].position.y;
01788       fullElectVirial_yz += fullf_0[i].y * p_0[i].position.z;
01789       fullElectVirial_zz += fullf_0[i].z * p_0[i].position.z;
01790     }
01791 #endif
01792   }
01793 #endif
01794 #endif
01795 
01796 #ifndef NAMD_CUDA
01797   reduction[exclChecksumIndex] += exclChecksum;
01798 #endif
01799   FAST
01800   (
01801   ENERGY( reduction[vdwEnergyIndex] += vdwEnergy; )
01802   SHORT
01803   (
01804   ENERGY( reduction[electEnergyIndex] += electEnergy; )
01805   )
01806   ALCH
01807   (
01808     TI(reduction[vdwEnergyIndex_ti_1] += vdwEnergy_ti_1;) 
01809     TI(reduction[vdwEnergyIndex_ti_2] += vdwEnergy_ti_2;) 
01810     FEP( reduction[vdwEnergyIndex_s] += vdwEnergy_s; )
01811   SHORT
01812   (
01813     FEP( reduction[electEnergyIndex_s] += electEnergy_s; )
01814     TI(reduction[electEnergyIndex_ti_1] += electEnergy_ti_1;) 
01815     TI(reduction[electEnergyIndex_ti_2] += electEnergy_ti_2;) 
01816   )
01817   )
01818   SHORT
01819   (
01820   reduction[virialIndex_XX] += virial_xx;
01821   reduction[virialIndex_XY] += virial_xy;
01822   reduction[virialIndex_XZ] += virial_xz;
01823   reduction[virialIndex_YX] += virial_xy;
01824   reduction[virialIndex_YY] += virial_yy;
01825   reduction[virialIndex_YZ] += virial_yz;
01826   reduction[virialIndex_ZX] += virial_xz;
01827   reduction[virialIndex_ZY] += virial_yz;
01828   reduction[virialIndex_ZZ] += virial_zz;
01829   )
01830   )
01831   FULL
01832   (
01833   ENERGY( reduction[fullElectEnergyIndex] += fullElectEnergy; )
01834   ALCH
01835   (
01836     FEP( reduction[fullElectEnergyIndex_s] += fullElectEnergy_s; )
01837     TI(reduction[fullElectEnergyIndex_ti_1] += fullElectEnergy_ti_1;) 
01838     TI(reduction[fullElectEnergyIndex_ti_2] += fullElectEnergy_ti_2;) 
01839   )
01840   reduction[fullElectVirialIndex_XX] += fullElectVirial_xx;
01841   reduction[fullElectVirialIndex_XY] += fullElectVirial_xy;
01842   reduction[fullElectVirialIndex_XZ] += fullElectVirial_xz;
01843   reduction[fullElectVirialIndex_YX] += fullElectVirial_xy;
01844   reduction[fullElectVirialIndex_YY] += fullElectVirial_yy;
01845   reduction[fullElectVirialIndex_YZ] += fullElectVirial_yz;
01846   reduction[fullElectVirialIndex_ZX] += fullElectVirial_xz;
01847   reduction[fullElectVirialIndex_ZY] += fullElectVirial_yz;
01848   reduction[fullElectVirialIndex_ZZ] += fullElectVirial_zz;
01849   )
01850 
01851   delete [] excl_flags_buff;
01852 
01853 }
01854 

Generated on Mon Nov 23 04:59:19 2009 for NAMD by  doxygen 1.3.9.1