Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

ComputeNonbondedBase.h

Go to the documentation of this file.
00001 
00007 // Several special cases are defined:
00008 //   NBTYPE: exclusion method (NBPAIR, NBSELF -- mutually exclusive)
00009 //   FULLELECT full electrostatics calculation?
00010 
00011 #ifdef ARCH_POWERPC
00012 #include <builtins.h>
00013 #endif
00014 
00015 #if defined(NAMD_SSE) && defined(__INTEL_COMPILER) && defined(__SSE2__)
00016 #include <emmintrin.h>  // We're using SSE2 intrinsics
00017 #endif
00018 
00019 #ifdef DEFINITION // (
00020   #include "LJTable.h"
00021   #include "Molecule.h"
00022   #include "ComputeNonbondedUtil.h"
00023 #endif // )
00024 
00025 // determining class name
00026 #undef NAME
00027 #undef CLASS
00028 #undef CLASSNAME
00029 #define NAME CLASSNAME(calc)
00030 
00031 #undef PAIR
00032 #if NBTYPE == NBPAIR
00033   #define PAIR(X) X
00034   #define CLASS ComputeNonbondedPair
00035   #define CLASSNAME(X) ENERGYNAME( X ## _pair )
00036 #else
00037   #define PAIR(X)
00038 #endif
00039 
00040 #undef SELF
00041 #if NBTYPE == NBSELF
00042   #define SELF(X) X
00043   #define CLASS ComputeNonbondedSelf
00044   #define CLASSNAME(X) ENERGYNAME( X ## _self )
00045 #else
00046   #define SELF(X)
00047 #endif
00048 
00049 #undef ENERGYNAME
00050 #undef ENERGY
00051 #undef NOENERGY
00052 #ifdef CALCENERGY
00053   #define ENERGY(X) X
00054   #define NOENERGY(X)
00055   #define ENERGYNAME(X) SLOWONLYNAME( X ## _energy )
00056 #else
00057   #define ENERGY(X)
00058   #define NOENERGY(X) X
00059   #define ENERGYNAME(X) SLOWONLYNAME( X )
00060 #endif
00061 
00062 #undef SLOWONLYNAME
00063 #undef FAST
00064 #ifdef SLOWONLY
00065   #define FAST(X)
00066   #define SLOWONLYNAME(X) MERGEELECTNAME( X ## _slow )
00067 #else
00068   #define FAST(X) X
00069   #define SLOWONLYNAME(X) MERGEELECTNAME( X )
00070 #endif
00071 
00072 #undef MERGEELECTNAME
00073 #undef SHORT
00074 #undef NOSHORT
00075 #ifdef MERGEELECT
00076   #define SHORT(X)
00077   #define NOSHORT(X) X
00078   #define MERGEELECTNAME(X) FULLELECTNAME( X ## _merge )
00079 #else
00080   #define SHORT(X) X
00081   #define NOSHORT(X)
00082   #define MERGEELECTNAME(X) FULLELECTNAME( X )
00083 #endif
00084 
00085 #undef FULLELECTNAME
00086 #undef FULL
00087 #undef NOFULL
00088 #ifdef FULLELECT
00089   #define FULLELECTNAME(X) FEPNAME( X ## _fullelect )
00090   #define FULL(X) X
00091   #define NOFULL(X)
00092 #else
00093   #define FULLELECTNAME(X) FEPNAME( X )
00094   #define FULL(X)
00095   #define NOFULL(X) X
00096 #endif
00097 
00098 // Here are what these macros stand for:
00099 // FEP/NOT_FEP: FEP free energy perturbation is active/inactive 
00100 //      (does NOT use LAM)
00101 // LES: locally-enhanced sampling is active
00102 // LAM: scale the potential by a factor lambda (except FEP)
00103 // INT: measure interaction energies
00104 // PPROF: pressure profiling
00105 
00106 #undef FEPNAME
00107 #undef FEP
00108 #undef LES
00109 #undef INT
00110 #undef PPROF
00111 #undef LAM
00112 #undef ALCH
00113 #undef TI
00114 #define FEPNAME(X) LAST( X )
00115 #define FEP(X)
00116 #define ALCHPAIR(X)
00117 #define NOT_ALCHPAIR(X) X
00118 #define LES(X)
00119 #define INT(X)
00120 #define PPROF(X)
00121 #define LAM(X)
00122 #define ALCH(X)
00123 #define TI(X)
00124 #ifdef FEPFLAG
00125   #undef FEPNAME
00126   #undef FEP
00127   #undef ALCH
00128   #define FEPNAME(X) LAST( X ## _fep )
00129   #define FEP(X) X
00130   #define ALCH(X) X
00131 #endif
00132 #ifdef TIFLAG
00133   #undef FEPNAME
00134   #undef TI
00135   #undef ALCH
00136   #define FEPNAME(X) LAST( X ## _ti )
00137   #define TI(X) X
00138   #define ALCH(X) X
00139 #endif
00140 #ifdef LESFLAG
00141   #undef FEPNAME
00142   #undef LES
00143   #undef LAM
00144   #define FEPNAME(X) LAST( X ## _les )
00145   #define LES(X) X
00146   #define LAM(X) X
00147 #endif
00148 #ifdef INTFLAG
00149   #undef FEPNAME
00150   #undef INT
00151   #define FEPNAME(X) LAST( X ## _int )
00152   #define INT(X) X
00153 #endif
00154 #ifdef PPROFFLAG
00155   #undef FEPNAME
00156   #undef INT
00157   #undef PPROF
00158   #define FEPNAME(X) LAST( X ## _pprof )
00159   #define INT(X) X
00160   #define PPROF(X) X
00161 #endif
00162 
00163 #define LAST(X) X
00164 
00165 // see if things are really messed up
00166 SELF( PAIR( foo bar ) )
00167 LES( FEP( foo bar ) )
00168 LES( INT( foo bar ) )
00169 FEP( INT( foo bar ) )
00170 LAM( INT( foo bar ) )
00171 FEP( NOENERGY( foo bar ) )
00172 ENERGY( NOENERGY( foo bar ) )
00173 
00174 
00175 // ************************************************************
00176 // function header
00177 void ComputeNonbondedUtil :: NAME
00178   ( nonbonded *params )
00179 
00180 // function body
00181 {
00182   // int NAME;  // to track errors via unused variable warnings
00183 
00184   if ( ComputeNonbondedUtil::commOnly ) return;
00185 
00186   // speedup variables
00187   BigReal *reduction = params->reduction;
00188 
00189   PPROF(
00190   BigReal *pressureProfileReduction = params->pressureProfileReduction;
00191   const BigReal invThickness = 1.0 / pressureProfileThickness;
00192   )
00193 
00194   Pairlists &pairlists = *(params->pairlists);
00195   int savePairlists = params->savePairlists;
00196   int usePairlists = params->usePairlists;
00197   pairlists.reset();
00198   // PAIR(iout << "--------\n" << endi;)
00199 
00200   // local variables
00201   int exclChecksum = 0;
00202   FAST
00203   (
00204   ENERGY( BigReal vdwEnergy = 0; )
00205   SHORT
00206   (
00207   ENERGY( BigReal electEnergy = 0; )
00208   )
00209 
00210   FEP
00211   (
00212   ENERGY( BigReal vdwEnergy_s = 0; )
00213   SHORT
00214   (
00215   ENERGY( BigReal electEnergy_s = 0; )
00216   )
00217   )
00218   
00219   SHORT
00220   (
00221   BigReal virial_xx = 0;
00222   BigReal virial_xy = 0;
00223   BigReal virial_xz = 0;
00224   BigReal virial_yy = 0;
00225   BigReal virial_yz = 0;
00226   BigReal virial_zz = 0;
00227   )
00228   )
00229   FULL
00230   (
00231   ENERGY( BigReal fullElectEnergy = 0; )
00232   FEP
00233   (
00234   ENERGY( BigReal fullElectEnergy_s = 0; )
00235   )
00236   BigReal fullElectVirial_xx = 0;
00237   BigReal fullElectVirial_xy = 0;
00238   BigReal fullElectVirial_xz = 0;
00239   BigReal fullElectVirial_yy = 0;
00240   BigReal fullElectVirial_yz = 0;
00241   BigReal fullElectVirial_zz = 0;
00242   )
00243 
00244   // Bringing stuff into local namespace for speed.
00245 
00246   const BigReal offset_x = params->offset.x;
00247   const BigReal offset_y = params->offset.y;
00248   const BigReal offset_z = params->offset.z;
00249 
00250   register const BigReal plcutoff2 = \
00251                         params->plcutoff * params->plcutoff;
00252   register const BigReal groupplcutoff2 = \
00253                         params->groupplcutoff * params->groupplcutoff;
00254   const BigReal dielectric_1 = ComputeNonbondedUtil:: dielectric_1;
00255   const LJTable* const ljTable = ComputeNonbondedUtil:: ljTable;
00256   LJTable::TableEntry ljNull;  ljNull.A = 0; ljNull.B = 0;
00257   const LJTable::TableEntry* const lj_null_pars = &ljNull;
00258   const Molecule* const mol = ComputeNonbondedUtil:: mol;
00259   SHORT
00260   (
00261   const BigReal* const table_four = ComputeNonbondedUtil:: table_short;
00262   )
00263   FULL
00264   (
00265   SHORT
00266   (
00267   const BigReal* const slow_table = ComputeNonbondedUtil:: slow_table;
00268   )
00269   NOSHORT
00270   (
00271 //#if 1 ALCH(-1)
00272   const BigReal* const table_four = ComputeNonbondedUtil:: table_noshort;
00273 //#else  // have to switch this for ALCH
00274 //  BigReal* table_four = ComputeNonbondedUtil:: table_noshort;
00275 //#endif
00276   )
00277   )
00278   const BigReal scaling = ComputeNonbondedUtil:: scaling;
00279   const BigReal modf_mod = 1.0 - scale14;
00280   FAST
00281   (
00282   const BigReal switchOn2 = ComputeNonbondedUtil:: switchOn2;
00283   const BigReal c1 = ComputeNonbondedUtil:: c1;
00284   const BigReal c3 = ComputeNonbondedUtil:: c3;
00285   )
00286   const BigReal r2_delta = ComputeNonbondedUtil:: r2_delta;
00287   const int r2_delta_exp = ComputeNonbondedUtil:: r2_delta_exp;
00288   // const int r2_delta_expc = 64 * (r2_delta_exp - 127);
00289   const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
00290 
00291   ALCH(
00292     const BigReal switchdist2 = ComputeNonbondedUtil::switchOn2;
00293     const BigReal cutoff2 = ComputeNonbondedUtil::cutoff2;
00294     const BigReal switchfactor = 1./((cutoff2 - switchdist2)*(cutoff2 - switchdist2)*(cutoff2 - switchdist2));
00295     const BigReal fepElecLambdaStart = ComputeNonbondedUtil::fepElecLambdaStart;
00296     const BigReal fepVdwLambdaEnd = ComputeNonbondedUtil::fepVdwLambdaEnd;
00297     const BigReal fepVdwShiftCoeff = ComputeNonbondedUtil::fepVdwShiftCoeff;
00298 
00299     /*lambda values 'up' are for atoms scaled up with lambda (partition 1)*/
00300     BigReal lambdaUp = ComputeNonbondedUtil::lambda;
00301     BigReal elecLambdaUp =  (lambdaUp <= fepElecLambdaStart)? 0. : \
00302               (lambdaUp - fepElecLambdaStart) / (1. - fepElecLambdaStart);
00303     BigReal vdwLambdaUp = 
00304         (lambdaUp >= fepVdwLambdaEnd)? 1. : lambdaUp / fepVdwLambdaEnd; 
00305     BigReal vdwShiftUp = ComputeNonbondedUtil::fepVdwShiftCoeff * (1-vdwLambdaUp);
00306     FEP(BigReal lambda2Up = ComputeNonbondedUtil::lambda2;)
00307     FEP(BigReal elecLambda2Up = (lambda2Up <= fepElecLambdaStart)? 0. : \
00308               (lambda2Up - fepElecLambdaStart) / (1. - fepElecLambdaStart);)
00309     FEP(BigReal vdwLambda2Up = 
00310         (lambda2Up >= fepVdwLambdaEnd)? 1. : lambda2Up / fepVdwLambdaEnd;) 
00311     FEP(BigReal vdwShift2Up = ComputeNonbondedUtil::fepVdwShiftCoeff * (1-vdwLambda2Up);)
00312 
00313         
00314     /*lambda values 'down' are for atoms scaled down with lambda (partition 2)*/
00315     BigReal lambdaDown = 1 - ComputeNonbondedUtil::lambda;
00316     BigReal elecLambdaDown =  (lambdaDown <= fepElecLambdaStart)? 0. : \
00317               (lambdaDown - fepElecLambdaStart) / (1. - fepElecLambdaStart);
00318     BigReal vdwLambdaDown = 
00319         (lambdaDown >= fepVdwLambdaEnd)? 1. : lambdaDown / fepVdwLambdaEnd; 
00320     BigReal vdwShiftDown = ComputeNonbondedUtil::fepVdwShiftCoeff * (1-vdwLambdaDown);
00321     FEP(BigReal lambda2Down = 1 - ComputeNonbondedUtil::lambda2;)
00322     FEP(BigReal elecLambda2Down = (lambda2Down <= fepElecLambdaStart)? 0. : \
00323               (lambda2Down - fepElecLambdaStart) / (1. - fepElecLambdaStart); )
00324     FEP(BigReal vdwLambda2Down = 
00325         (lambda2Down >= fepVdwLambdaEnd)? 1. : lambda2Down / fepVdwLambdaEnd; )
00326     FEP(BigReal vdwShift2Down = ComputeNonbondedUtil::fepVdwShiftCoeff * (1-vdwLambda2Down);)
00327 
00328   
00329   // Thermodynamic Integration Notes (F. Buelens): 
00330   // Separation of atom pairs into different pairlist according to lambda
00331   // coupling, for alchemical free energy calculations. Normal pairlists
00332   // (pairlist[nxm]_save) are re-used for non-lambda-coupled pairs; new ones
00333   // (pairlist[nxm][12]_save are created for atoms switched up or down with
00334   // lambda respectively.
00335   // This makes TI coding far easier and more readable, since it's necessary 
00336   // to store dU/dlambda in different variables depending on whether atoms are
00337   // being switched up or down. Further, it allows Jordi's explicit coding of 
00338   // the separation-shifted vdW potential to stay in place without a big 
00339   // performance hit, since it's no longer necessary to calculate vdW potentials
00340   // explicity for the bulk (non-alchemical) interactions - so that part of the 
00341   // free energy code stays readable and easy to modify. Finally there should
00342   // be some performance gain over the old FEP implementation as we no
00343   // longer have to check the partitions of each atom pair and switch
00344   // parameters accordingly for every single NonbondedBase2.h loop - this is 
00345   // done at the pairlist level
00346   
00347   int pswitchTable[3*3];
00348   // determines which pairlist alchemical pairings get sent to
00349   // 0: non-alchemical pairs, partition 0 <-> partition 0
00350   // 1: atoms scaled up as lambda increases, p0<->p1
00351   // 2: atoms scaled down as lambda increases, p0<->p2
00352   // all p1<->p2 interactions to be dropped (99)
00353   // in general, 'up' refers to 1, 'down' refers to 2
00354   for (int ip=0; ip<3; ++ip) {
00355     for (int jp=0; jp<3; ++jp ) {
00356       pswitchTable[ip+3*jp] = 0;
00357       if ((ip==1 && jp==0) || (ip==0 && jp==1)) pswitchTable[ip+3*jp] = 1;
00358       if ((ip==2 && jp==0) || (ip==0 && jp==2)) pswitchTable[ip+3*jp] = 2;
00359       if (ip + jp == 3) pswitchTable[ip+3*jp] = 99; // no interaction
00360 
00361       if (! ComputeNonbondedUtil::decouple) {
00362         // no decoupling: interactions within a partition are treated like
00363         // normal alchemical pairs
00364         if (ip == 1 && jp == 1) pswitchTable[ip+3*jp] = 1;
00365         if (ip == 2 && jp == 2) pswitchTable[ip+3*jp] = 2;
00366       }
00367       if (ComputeNonbondedUtil::decouple) {
00368         // decoupling: PME calculates extra grids so that while PME 
00369         // interaction with the full system is switched off, a new PME grid
00370         // containing only alchemical atoms is switched on. Full interactions 
00371         // between alchemical atoms are maintained; potentials within one 
00372         // partition need not be scaled here.
00373         if (ip == 1 && jp == 1) pswitchTable[ip+3*jp] = 0;
00374         if (ip == 2 && jp == 2) pswitchTable[ip+3*jp] = 0;
00375       }
00376     }
00377   }
00378 
00379   // dU/dlambda variables for thermodynamic integration
00380   TI(
00381       BigReal vdwEnergy_ti_1 = 0;
00382       BigReal vdwEnergy_ti_2 = 0;
00383       SHORT(BigReal electEnergy_ti_1 = 0;
00384       BigReal electEnergy_ti_2 = 0;)
00385       FULL(BigReal fullElectEnergy_ti_1 = 0; 
00386       BigReal fullElectEnergy_ti_2 = 0;) 
00387    )
00388   )
00389         
00390         
00391   const int i_upper = params->numAtoms[0];
00392   register const int j_upper = params->numAtoms[1];
00393   register int j;
00394   register int i;
00395   const CompAtom *p_0 = params->p[0];
00396   const CompAtom *p_1 = params->p[1];
00397 #ifdef MEM_OPT_VERSION
00398   const CompAtomExt *pExt_0 = params->pExt[0];
00399   const CompAtomExt *pExt_1 = params->pExt[1];
00400 #endif
00401 
00402   char * excl_flags_buff = 0;
00403   const int32 * full_excl = 0;
00404   const int32 * mod_excl = 0;
00405 
00406   plint *pairlistn_save;  int npairn;
00407   plint *pairlistx_save;  int npairx;
00408   plint *pairlistm_save;  int npairm;
00409 
00410   ALCH(
00411   plint *pairlistnA1_save;  int npairnA1;
00412   plint *pairlistxA1_save;  int npairxA1;
00413   plint *pairlistmA1_save;  int npairmA1;
00414   plint *pairlistnA2_save;  int npairnA2;
00415   plint *pairlistxA2_save;  int npairxA2;
00416   plint *pairlistmA2_save;  int npairmA2;
00417   )
00418 
00419   NBWORKARRAYSINIT(params->workArrays);
00420 
00421   int arraysize = j_upper+5;
00422 
00423   NBWORKARRAY(plint,pairlisti,arraysize)
00424   NBWORKARRAY(BigReal,r2list,arraysize)
00425 
00426   union { double f; int32 i[2]; } byte_order_test;
00427   byte_order_test.f = 1.0;  // should occupy high-order bits only
00428   int32 *r2iilist = (int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
00429 
00430   if ( ! ( savePairlists || ! usePairlists ) ) arraysize = 0;
00431 
00432 
00433   // DMK - Atom Sort
00434   // 
00435   // Basic Idea: Determine the line between the center of masses of
00436   //   the two patches.  Project and then sort the lists of atoms
00437   //   along this line.  Then, as the pairlists are being generated
00438   //   for the atoms in the first atom list, use the sorted
00439   //   list to only add atoms from the second list that are between
00440   //   +/- ~cutoff from the atoms position on the line.
00441   #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00442 
00443   // NOTE: For the first atom list, just the sort values themselves need to be
00444   // calculated (a BigReal vs. SortEntry data structure).  However, a second
00445   // SortEntry array is needed to perform the merge sort on the second list of
00446   // atoms.  Use the atomSort_[0/1]_sortValues__ arrays to sort the second
00447   // list of atoms and then use the left over array to make a version of the
00448   // list that only includes non-fixed groups (fixg).
00449 
00450   NBWORKARRAY(SortEntry, atomSort_0_sortValues__, arraysize);
00451   NBWORKARRAY(SortEntry, atomSort_1_sortValues__, arraysize);
00452   NBWORKARRAY(BigReal, p_0_sortValues, arraysize);
00453 
00454   register SortEntry* p_1_sortValues = atomSort_0_sortValues__;
00455   register SortEntry* p_1_sortValues_fixg = atomSort_1_sortValues__;
00456 
00457   int p_0_sortValues_len = 0;
00458   int p_1_sortValues_len = 0;
00459   int p_1_sortValues_fixg_len = 0;
00460 
00461   BigReal atomSort_windowRadius = 0.0;
00462 
00463   if (savePairlists || !usePairlists) {
00464 
00466 
00467     // Center of mass for the first patch's atoms
00468     register BigReal p_0_avgX = 0.0;
00469     register BigReal p_0_avgY = 0.0;
00470     register BigReal p_0_avgZ = 0.0;
00471     {
00472       register int atomCount = 0;
00473       register int i = 0;
00474 
00475       register BigReal p_x = p_0->position.x;
00476       register BigReal p_y = p_0->position.y;
00477       register BigReal p_z = p_0->position.z;
00478       register int hgs = p_0->hydrogenGroupSize;
00479 
00480       while (i < i_upper) {
00481 
00482         i += hgs;
00483         register const CompAtom* p_i = p_0 + i;
00484         hgs = p_i->hydrogenGroupSize;
00485 
00486         p_0_avgX += p_x;
00487         p_0_avgY += p_y;
00488         p_0_avgZ += p_z;
00489         p_x = p_i->position.x;
00490         p_y = p_i->position.y;
00491         p_z = p_i->position.z;
00492 
00493         atomCount++;
00494       }
00495 
00496       p_0_avgX = p_0_avgX / ((double)atomCount);
00497       p_0_avgY = p_0_avgY / ((double)atomCount);
00498       p_0_avgZ = p_0_avgZ / ((double)atomCount);
00499     }
00500 
00501     // Center of mass for the second patch's atoms
00502     register BigReal p_1_avgX = 0.0;
00503     register BigReal p_1_avgY = 0.0;
00504     register BigReal p_1_avgZ = 0.0;
00505     {
00506       register int atomCount = 0;
00507       register int i = 0;
00508 
00509       register BigReal p_x = p_1->position.x;
00510       register BigReal p_y = p_1->position.y;
00511       register BigReal p_z = p_1->position.z;
00512       register int hgs = p_1->hydrogenGroupSize;
00513 
00514       while (i < j_upper) {
00515 
00516         i += hgs;
00517         register const CompAtom* p_i = p_1 + i;
00518         hgs = p_i->hydrogenGroupSize;
00519 
00520         p_1_avgX += p_x;
00521         p_1_avgY += p_y;
00522         p_1_avgZ += p_z;
00523         p_x = p_i->position.x;
00524         p_y = p_i->position.y;
00525         p_z = p_i->position.z;
00526 
00527         atomCount++;
00528       }
00529 
00530       p_1_avgX = p_1_avgX / ((double)atomCount);
00531       p_1_avgY = p_1_avgY / ((double)atomCount);
00532       p_1_avgZ = p_1_avgZ / ((double)atomCount);
00533     }
00534 
00535     // Need to move the points away from each other (so all of the
00536     // projected positions are between the two points).
00537     //   P0 = ( p_0_avgX, p_0_avgY, p_0_avgZ )
00538     //   P1 = ( p_1_avgX, p_1_avgY, p_1_avgZ )
00539     // P1 - P0 = V... so, V points from P0 towards P1
00540     //   Scale V so that |V| > corner to opposite corner distance of Patch
00541     //   NOTE: For now, scale to cutoff * 3
00542     register BigReal V_length;   // = |V|    (length of V)
00543     register BigReal V_length2;  // = |V|^2  (length of V squared)
00544     {
00545       register BigReal V_X = p_1_avgX - p_0_avgX;
00546       register BigReal V_Y = p_1_avgY - p_0_avgY;
00547       register BigReal V_Z = p_1_avgZ - p_0_avgZ;
00548       V_length2 = (V_X * V_X) + (V_Y * V_Y) + (V_Z * V_Z);
00549       V_length = sqrt(V_length2);
00550       register BigReal cutoff = ComputeNonbondedUtil::cutoff;
00551       register BigReal scaleFactor = (3.0 * cutoff) / V_length;
00552       V_X *= scaleFactor;
00553       V_Y *= scaleFactor;
00554       V_Z *= scaleFactor;
00555 
00556       // Move the points away from each other (P0 -= V, P1 += V)
00557       p_0_avgX -= V_X;
00558       p_0_avgY -= V_Y;
00559       p_0_avgZ -= V_Z;
00560       p_1_avgX += V_X;
00561       p_1_avgY += V_Y;
00562       p_1_avgZ += V_Z;
00563 
00564       // Recalculate V_length2 and V_length now that the points have moved
00565       V_length += 2.0 * (scaleFactor * V_length);
00566       V_length2 = V_length * V_length;
00567     }
00568 
00570 
00571     // For every atom, determine the sort value (i.e., the distance
00572     //   along 'the line, L, between P0 and P1' from P0 to the projection of
00573     //   the atom on L).
00574     // Let PA = position of atom in the patch space
00575     //   a = | P0 - PA |
00576     //   b = | P1 - PA |
00577     //   c = | P0 - P1 | = V_length
00578     // Case:  Triangle formed by P0, P1, and PA (position of atom).
00579     //   Projection of PA on the line L  between P0 and P1 (call this point
00580     //   PAp, note that line between P0 and P1 is perpendicular to the
00581     //   line between PA and PAp AND PAp is between P0 and P1 on L).
00582     // then ... 'distance between P0 and PAp' = (a^2 - b^2 + c^2) / (2 * c)
00583     // NOTE:  The '1 / (2 * c)' portion of this is constant since P0 and P1
00584     //   are now fixed.  Place this constant in a register to avoid repeating
00585     //   the divide calculation.
00586     register const BigReal V_length_multiplier = (1.0 / (2.0 * V_length));
00587 
00588     // NOTE: The actual test below is 'distance^2 + r2_delta < groupplcutoff2'
00589     //  or... 'distance^2 < groupplcutoff2 - r2_delta'
00590     //  so... window radius needs to be 'distance < sqrt(groupplcutoff2 - r2_delta)'
00591     atomSort_windowRadius = sqrt(groupplcutoff2 - r2_delta);
00592 
00593     // Atom list 1
00594     {
00595       register int i = 0;
00596       register unsigned int ngia = p_1->nonbondedGroupIsAtom;
00597       register unsigned int hgs = p_1->hydrogenGroupSize;
00598       register BigReal p_x = p_1->position.x;
00599       register BigReal p_y = p_1->position.y;
00600       register BigReal p_z = p_1->position.z;
00601       register int index = 0;
00602 
00603       // Try to take advantage of multiply-add instructions by pulling out the
00604       //   'V_length2 * V_length_multiplier' term in the sortVal calculation.
00605       //   So...  '(a2 - b2 + V_length2) * V_length_multiplier' becomes...
00606       //          '((a2 - b2) * V_length_multiplier) + (V_length2 * V_length_multiplier)'
00607       register const BigReal sortVal_addAmmount = V_length2 * V_length_multiplier;
00608 
00609       while (i < j_upper) {
00610 
00611         // Advance i... NOTE: ngia is either 0 or 1 so if ngia is set '1' is
00612         //   added to 'i'.  Otherwise, the value of 'hgs' is added to 'i'.
00613         i += (ngia) + ((1 - ngia) * hgs);
00614 
00615         // Update p_i_next to point to the atom for the next iteration and begin
00616         //   loading the 'ngia' and 'hgs' values for that atom.
00617         register const CompAtom* p_i_next = p_1 + i;
00618         ngia = p_i_next->nonbondedGroupIsAtom;
00619         hgs = p_i_next->hydrogenGroupSize;
00620 
00621         // Use the position values then start loading the next atom's position values.
00622         register BigReal a_x_diff = (p_0_avgX - p_x);
00623         register BigReal a_y_diff = (p_0_avgY - p_y);
00624         register BigReal a_z_diff = (p_0_avgZ - p_z);
00625         register BigReal b_x_diff = (p_1_avgX - p_x);
00626         register BigReal b_y_diff = (p_1_avgY - p_y);
00627         register BigReal b_z_diff = (p_1_avgZ - p_z);
00628         p_x = p_i_next->position.x;
00629         p_y = p_i_next->position.y;
00630         p_z = p_i_next->position.z;
00631 
00632         // Calculate and store the sort value for this atom (NOTE: c = V_length and c^2 = V_length2)
00633         register BigReal a2 = (a_x_diff * a_x_diff) + (a_y_diff * a_y_diff) + (a_z_diff * a_z_diff);
00634         register BigReal b2 = (b_x_diff * b_x_diff) + (b_y_diff * b_y_diff) + (b_z_diff * b_z_diff);
00635         register BigReal sortVal = (a2 - b2) * V_length_multiplier + sortVal_addAmmount;
00636         register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
00637         p_1_sortValStorePtr->index = index;
00638         p_1_sortValStorePtr->sortValue = sortVal;
00639         p_1_sortValues_len++;
00640         index = i;
00641       }
00642     }
00643 
00644     // NOTE: This list and another version of it with only non-fixed
00645     //   atoms will be used in place of grouplist and fixglist.
00646     {
00647       #if 0   // Selection Sort
00648 
00649         for (int i = 0; i < p_1_sortValues_len; i++) {
00650 
00651           // Search through the remaining elements, finding the lowest
00652           //   value, and then swap it with the first remaining element.
00653           //   Start by assuming the first element is the smallest.
00654           register int smallestIndex = i;
00655           register BigReal smallestValue = p_1_sortValues[i].sortValue;
00656           for (int j = i + 1; j < p_1_sortValues_len; j++) {
00657             register BigReal currentValue = p_1_sortValues[j].sortValue;
00658             if (currentValue < smallestValue) {
00659               smallestIndex = j;
00660               smallestValue = currentValue;
00661             }
00662           }
00663 
00664           // Swap the first remaining element with the smallest element
00665           if (smallestIndex != i) {
00666             register SortEntry* entryA = p_1_sortValues + i;
00667             register SortEntry* entryB = p_1_sortValues + smallestIndex;
00668             register unsigned int tmpIndex = entryA->index;
00669             register BigReal tmpSortValue = entryA->sortValue;
00670             entryA->index = entryB->index;
00671             entryA->sortValue = entryB->sortValue;
00672             entryB->index = tmpIndex;
00673             entryB->sortValue = tmpSortValue;
00674           }
00675         }
00676 
00677       #elif 0   // Bubble Sort
00678 
00679         register int keepSorting = 0;
00680         do {
00681 
00682           // Reset the keepSorting flag (assume no swaps will occur)
00683           keepSorting = 0;
00684 
00685           // Loop through the pairs and swap if needed
00686           register SortEntry* sortEntry1 = p_1_sortValues;
00687           for (int i = 1; i < p_1_sortValues_len; i++) {
00688             register SortEntry* sortEntry0 = sortEntry1;
00689             sortEntry1 = p_1_sortValues + i;
00690             register BigReal sortEntry0_sortValue = sortEntry0->sortValue;
00691             register BigReal sortEntry1_sortValue = sortEntry1->sortValue;
00692             if (sortEntry0_sortValue > sortEntry1_sortValue) {
00693               register int sortEntry0_index = sortEntry0->index;
00694               register int sortEntry1_index = sortEntry1->index;
00695               sortEntry0->index = sortEntry1_index;
00696               sortEntry0->sortValue = sortEntry1_sortValue;
00697               sortEntry1->index = sortEntry0_index;
00698               sortEntry1->sortValue = sortEntry0_sortValue;
00699               keepSorting = 1;
00700             }
00701           }
00702 
00703         } while (keepSorting != 0);  // Loop again if at least one set of
00704                                      //   elements was swapped.
00705       #else   // Merge Sort
00706 
00707         #if NAMD_ComputeNonbonded_SortAtoms_LessBranches == 0
00708 
00709         register SortEntry* srcArray = p_1_sortValues;
00710         register SortEntry* dstArray = p_1_sortValues_fixg; //tmpArray;
00711 
00712         // Start with each element being a separate list.  Start
00713         //   merging the "lists" into larger lists.
00714         register int subListSize = 1;
00715         while (subListSize < p_1_sortValues_len) {
00716 
00717           // NOTE: This iteration consumes sublists of length
00718           //   subListSize and produces sublists of length
00719           //   (2*subListSize).  So, keep looping while the length of a
00720           //   single sorted sublist is not the size of the entire array.
00721 
00722           // Iterate through the lists, merging each consecutive pair of lists.
00723           register int firstListOffset = 0;
00724           while (firstListOffset < p_1_sortValues_len) {
00725 
00727             register int numElements = min(2 * subListSize, p_1_sortValues_len - firstListOffset);
00728             register int list0len;
00729             register int list1len;
00730             if (numElements > subListSize) {
00731               list0len = subListSize;                // First list full
00732               list1len = numElements - subListSize;  // 1+ elements in second list
00733             } else {
00734               list0len = numElements;                // 1+ elements in first list
00735               list1len = 0;                          // Zero elements in second list
00736             }
00737 
00738             register SortEntry* list0ptr = srcArray + firstListOffset;
00739             register SortEntry* list1ptr = list0ptr + subListSize;
00740             register SortEntry* dstptr = dstArray + firstListOffset;
00741 
00743 
00744             // While there are elements in both lists, pick from one
00745             while (list0len > 0 && list1len > 0) {
00746 
00747               register BigReal sortValue0 = list0ptr->sortValue;
00748               register BigReal sortValue1 = list1ptr->sortValue;
00749 
00750               if (sortValue0 < sortValue1) {  // choose first list (list0)
00751 
00752                 // Copy the value from srcArray to dstArray
00753                 register int index0 = list0ptr->index;
00754                 dstptr->sortValue = sortValue0;
00755                 dstptr->index = index0;
00756 
00757                 // Move the pointers forward for the sublists
00758                 dstptr++;
00759                 list0ptr++;
00760                 list0len--;
00761 
00762               } else {                        // choose second list (list1)
00763 
00764                 // Copy the value from srcArray to dstArray
00765                 register int index1 = list1ptr->index;
00766                 dstptr->sortValue = sortValue1;
00767                 dstptr->index = index1;
00768 
00769                 // Move the pointers forward for the sublists
00770                 dstptr++;
00771                 list1ptr++;
00772                 list1len--;
00773               }
00774 
00775             } // end while (list0len > 0 && list1len > 0)
00776 
00777             // NOTE: Either list0len or list1len is zero at this point
00778             //   so only one of the following loops should execute.
00779 
00780             // Drain remaining elements from the first list (list0)
00781             while (list0len > 0) {
00782 
00783               // Copy the value from srcArray to dstArray
00784               register BigReal sortValue0 = list0ptr->sortValue;
00785               register int index0 = list0ptr->index;
00786               dstptr->sortValue = sortValue0;
00787               dstptr->index = index0;
00788 
00789               // Move the pointers forward for the sublists
00790               dstptr++;
00791               list0ptr++;
00792               list0len--;
00793 
00794             } // end while (list0len > 0)
00795 
00796             // Drain remaining elements from the first list (list1)
00797             while (list1len > 0) {
00798 
00799               // Copy the value from srcArray to dstArray
00800               register BigReal sortValue1 = list1ptr->sortValue;
00801               register int index1 = list1ptr->index;
00802               dstptr->sortValue = sortValue1;
00803               dstptr->index = index1;
00804 
00805               // Move the pointers forward for the sublists
00806               dstptr++;
00807               list1ptr++;
00808               list1len--;
00809 
00810             } // end while (list1len > 0)
00811 
00812             // Move forward to the next pair of sub-lists
00813             firstListOffset += (2 * subListSize);
00814 
00815           } // end while (firstListOffset < p_1_sortValues_len) {
00816 
00817           // Swap the dstArray and srcArray pointers
00818           register SortEntry* tmpPtr = dstArray;
00819           dstArray = srcArray;
00820           srcArray = tmpPtr;
00821 
00822           // Double the subListSize
00823           subListSize <<= 1;
00824 
00825         }  // end while (subListSize < p_1_sortValues_len)
00826 
00827         // Set the sort values pointers (NOTE: srcArray and dstArray are
00828         //   swapped at the end of each iteration of the merge sort outer-loop).
00829         p_1_sortValues_fixg = dstArray;
00830         p_1_sortValues = srcArray;
00831 
00832         #else
00833 
00834         // NOTE: This macro "returns" either val0 (if test == 0) or val1 (if
00835         // test == 1).  It expects test to be either 0 or 1 (no other values).
00836         #define TERNARY_ASSIGN(test, val0, val1)   ((test * val0) + ((1 - test) * val1))
00837 
00838         register SortEntry* srcArray = p_1_sortValues;
00839         register SortEntry* dstArray = p_1_sortValues_fixg; //tmpArray;
00840 
00841         // Start with each element being a separate list.  Start
00842         //   merging the "lists" into larger lists.
00843         register int subListSize = 1;
00844         while (subListSize < p_1_sortValues_len) {
00845 
00846           // NOTE: This iteration consumes sublists of length
00847           //   subListSize and produces sublists of length
00848           //   (2*subListSize).  So, keep looping while the length of a
00849           //   single sorted sublist is not the size of the entire array.
00850 
00851           // Iterate through the lists, merging each consecutive pair of lists.
00852           register int firstListOffset = 0;
00853           while (firstListOffset < p_1_sortValues_len) {
00854 
00856 
00857             // Calculate the number of elements for both sublists...
00858             //   min(2 * subListSize, p_1_sortValues_len - firstListOffset);
00859             register int numElements;
00860             {
00861               register int numElements_val0 = 2 * subListSize;
00862               register int numElements_val1 = p_1_sortValues_len - firstListOffset;
00863               register bool numElements_test = (numElements_val0 < numElements_val1);
00864               numElements = TERNARY_ASSIGN(numElements_test, numElements_val0, numElements_val1);
00865             }
00866 
00867             // Setup the pointers for the source and destination arrays
00868             register SortEntry* dstptr = dstArray + firstListOffset;    // destination array pointer
00869             register SortEntry* list0ptr = srcArray + firstListOffset;  // source list 0 pointer
00870             register SortEntry* list1ptr = list0ptr + subListSize;      // source list 1 pointer
00871             register SortEntry* list0ptr_end;  // pointer to end of source list0's elements (element after last)
00872             register SortEntry* list1ptr_end;  // pointer to end of source list1's elements (element after last)
00873             {
00874               register bool lenTest = (numElements > subListSize);
00875               register int list0len_val0 = subListSize;
00876               register int list1len_val0 = numElements - subListSize;
00877               register int list0len_val1 = numElements;  // NOTE: list1len_val1 = 0
00878               register int list0len = TERNARY_ASSIGN(lenTest, list0len_val0, list0len_val1);
00879               register int list1len = TERNARY_ASSIGN(lenTest, list1len_val0, 0);
00880               list0ptr_end = list0ptr + list0len;
00881               list1ptr_end = list1ptr + list1len;
00882             }
00883 
00884             // The firstListOffset variable won't be used again until the next
00885             //   iteration, so go ahead and update it now...
00886             //   Move forward to the next pair of sub-lists
00887             firstListOffset += (2 * subListSize);
00888 
00890 
00891             // Pre-load values from both source arrays
00892             register BigReal sortValue0 = list0ptr->sortValue;
00893             register BigReal sortValue1 = list1ptr->sortValue;
00894             register int index0 = list0ptr->index;
00895             register int index1 = list1ptr->index;
00896 
00897             // While both lists have at least one element in them, compare the
00898             //   heads of each list and place the smaller of the two in the
00899             //   destination array.
00900             while (list0ptr < list0ptr_end && list1ptr < list1ptr_end) {
00901 
00902               // Compare the values
00903               register bool test = (sortValue0 < sortValue1);
00904 
00905               // Place the "winner" in the destination array
00906               dstptr->sortValue = TERNARY_ASSIGN(test, sortValue0, sortValue1);
00907               dstptr->index = TERNARY_ASSIGN(test, index0, index1);
00908               dstptr++;
00909 
00910               // Update the pointers
00911               list0ptr += TERNARY_ASSIGN(test, 1, 0);
00912               list1ptr += TERNARY_ASSIGN(test, 0, 1);
00913 
00914               // Refill the sortValue and index register
00915               // NOTE: These memory locations are likely to be in cache
00916               sortValue0 = list0ptr->sortValue;
00917               sortValue1 = list1ptr->sortValue;
00918               index0 = list0ptr->index;
00919               index1 = list1ptr->index;
00920 
00921             } // end while (list0ptr < list0ptr_end && list1ptr < list1ptr_end)
00922 
00923             // NOTE: At this point, at least one of the lists is empty so no
00924             //   more than one of the loops will be executed.
00925 
00926             // Drain the remaining elements from list0
00927             while (list0ptr < list0ptr_end) {
00928 
00929               // Place the value into the destination array
00930               dstptr->sortValue = sortValue0;
00931               dstptr->index = index0;
00932               dstptr++;
00933 
00934               // Load the next entry in list0
00935               list0ptr++;
00936               sortValue0 = list0ptr->sortValue;
00937               index0 = list0ptr->index;
00938 
00939             } // end while (list0ptr < list0ptr_end)
00940 
00941             // Drain the remaining elements from list1
00942             while (list1ptr < list1ptr_end) {
00943 
00944               // Place the value into the destination array
00945               dstptr->sortValue = sortValue1;
00946               dstptr->index = index1;
00947               dstptr++;
00948 
00949               // Load the next entry in list1
00950               list1ptr++;
00951               sortValue1 = list1ptr->sortValue;
00952               index1 = list1ptr->index;
00953 
00954             } // end while (list1ptr < list1ptr_end)
00955 
00956           } // end while (firstListOffset < p_1_sortValues_len) {
00957 
00958           // Swap the dstArray and srcArray pointers
00959           register SortEntry* tmpPtr = dstArray;
00960           dstArray = srcArray;
00961           srcArray = tmpPtr;
00962 
00963           // Double the subListSize
00964           subListSize <<= 1;
00965 
00966         }  // end while (subListSize < p_1_sortValues_len)
00967 
00968         // Set the sort values pointers (NOTE: srcArray and dstArray are
00969         //   swapped at the end of each iteration of the merge sort outer-loop).
00970         p_1_sortValues_fixg = dstArray;
00971         p_1_sortValues = srcArray;
00972 
00973         #endif
00974 
00975       #endif
00976     }
00977 
00978 
00979     // Atom list 0
00980     {
00981       // Loop through the first list of atoms (p[0])
00982       // NOTE: This list will NOT be sorted, so the indexes into p_0
00983       //   and p_0_sortValues match up.  Also, all atoms in this list
00984       //   (the outer-loop list, need a sort value so do all atoms).
00985 
00986       register int i = 0;
00987       register unsigned int ngia = p_0->nonbondedGroupIsAtom;
00988       register unsigned int hgs = p_0->hydrogenGroupSize;
00989       register BigReal p_x = p_0->position.x;
00990       register BigReal p_y = p_0->position.y;
00991       register BigReal p_z = p_0->position.z;
00992 
00993       // Try to take advantage of multiply-add instructions by pulling out the
00994       //   'V_length2 * V_length_multiplier' term in the sortVal calculation.
00995       //   So...  '(a2 - b2 + V_length2) * V_length_multiplier' becomes...
00996       //          '((a2 - b2) * V_length_multiplier) + (V_length2 * V_length_multiplier)'
00997       register const BigReal sortVal_addAmmount = V_length2 * V_length_multiplier;
00998 
00999       while (i < i_upper) {
01000 
01001         // Advance i... NOTE: ngia is either 0 or 1 so if ngia is set '1' is
01002         //   added to 'i'.  Otherwise, the value of 'hgs' is added to 'i'.
01003         register BigReal* p_0_sortValStorePtr = p_0_sortValues + i;
01004         i += (ngia) + ((1 - ngia) * hgs);
01005 
01006         // Update p_i_next to point to the atom for the next iteration and begin
01007         //   loading the 'ngia' and 'hgs' values for that atom.
01008         register const CompAtom* p_i_next = p_0 + i;
01009         ngia = p_i_next->nonbondedGroupIsAtom;
01010         hgs = p_i_next->hydrogenGroupSize;
01011 
01012         // Use the position values then start loading the next atom's position values.
01013         register BigReal a_x_diff = (p_0_avgX - p_x);
01014         register BigReal a_y_diff = (p_0_avgY - p_y);
01015         register BigReal a_z_diff = (p_0_avgZ - p_z);
01016         register BigReal b_x_diff = (p_1_avgX - p_x);
01017         register BigReal b_y_diff = (p_1_avgY - p_y);
01018         register BigReal b_z_diff = (p_1_avgZ - p_z);
01019         p_x = p_i_next->position.x;
01020         p_y = p_i_next->position.y;
01021         p_z = p_i_next->position.z;
01022 
01023         // Calculate and store the sort value for this atom (NOTE: c = V_length and c^2 = V_length2)
01024         register BigReal a2 = (a_x_diff * a_x_diff) + (a_y_diff * a_y_diff) + (a_z_diff * a_z_diff);
01025         register BigReal b2 = (b_x_diff * b_x_diff) + (b_y_diff * b_y_diff) + (b_z_diff * b_z_diff);
01026         register BigReal sortVal = (a2 - b2) * V_length_multiplier + sortVal_addAmmount;
01027         *p_0_sortValStorePtr = sortVal;
01028       }
01029 
01030       p_0_sortValues_len = i_upper;
01031     }
01032 
01033   }  // end if (savePairlists || !usePairlists)
01034   #endif
01035 
01036 
01037   // DMK - Atom Sort
01038   // NOTE: These arrays aren't used for pair computes that are spacially
01039   //   sorting atoms.
01040   #if ! (NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) )
01041     NBWORKARRAY(plint,grouplist,arraysize);
01042     NBWORKARRAY(plint,fixglist,arraysize);
01043   #endif
01044 
01045   NBWORKARRAY(plint,goodglist,arraysize);
01046   NBWORKARRAY(plint,pairlistx,arraysize);
01047   NBWORKARRAY(plint,pairlistm,arraysize);
01048   NBWORKARRAY(plint,pairlist,arraysize);
01049   NBWORKARRAY(plint,pairlist2,arraysize);
01050   ALCH(
01051   NBWORKARRAY(plint,pairlistnAlch,arraysize);
01052   NBWORKARRAY(plint,pairlistnA0,arraysize);
01053   NBWORKARRAY(plint,pairlistnA1,arraysize);
01054   NBWORKARRAY(plint,pairlistxA1,arraysize);
01055   NBWORKARRAY(plint,pairlistmA1,arraysize);
01056   NBWORKARRAY(plint,pairlistnA2,arraysize);
01057   NBWORKARRAY(plint,pairlistxA2,arraysize);
01058   NBWORKARRAY(plint,pairlistmA2,arraysize);
01059   )
01060 
01061   NBWORKARRAY(short,vdwtype_array,j_upper+5);
01062 
01063   
01064 #ifdef MEM_OPT_VERSION
01065   for (j = 0; j < j_upper; ++j){
01066     vdwtype_array[j] = pExt_1[j].vdwType;
01067   }
01068 #else
01069   const Atom *atomlist = mol->getAtoms();
01070 #ifdef ARCH_POWERPC
01071 #pragma disjoint (*atomlist, *vdwtype_array)
01072 #pragma disjoint (*p_1, *vdwtype_array)
01073 #pragma unroll(4)
01074 #endif
01075   for (j = 0; j < j_upper; ++j) {
01076     int id = p_1[j].id;
01077     vdwtype_array [j] = atomlist[id].vdw_type;
01078   }
01079 #endif
01080 
01081   int fixg_upper = 0;
01082   int g_upper = 0;
01083 
01084   if ( savePairlists || ! usePairlists ) {
01085 
01086 
01087     // DMK - Atom Sort
01088     #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01089 
01090       // Create a sorted list of non-fixed groups
01091       register int fixg = 0;
01092       for (int tmpI = 0; tmpI < p_1_sortValues_len; tmpI++) {
01093         register SortEntry* p_1_sortEntry = p_1_sortValues + tmpI;
01094         register int p_1_index = p_1_sortEntry->index;
01095         if (!p_1[p_1_index].groupFixed) {
01096           register SortEntry* p_1_sortEntry_fixg = p_1_sortValues_fixg + p_1_sortValues_fixg_len;
01097           p_1_sortEntry_fixg->index = p_1_sortEntry->index;
01098           p_1_sortEntry_fixg->sortValue = p_1_sortEntry->sortValue;
01099           p_1_sortValues_fixg_len++;
01100         }
01101       }
01102 
01103     #else
01104 
01105       register int g = 0;
01106       for ( j = 0; j < j_upper; ++j ) {
01107         if ( p_1[j].hydrogenGroupSize || p_1[j].nonbondedGroupIsAtom ) {
01108           grouplist[g++] = j;
01109         }
01110       }
01111       g_upper = g;
01112       if ( g_upper ) grouplist[g_upper] = grouplist[g_upper-1];
01113       int fixg = 0;
01114 
01115       if ( fixedAtomsOn ) {
01116         for ( g = 0; g < g_upper; ++g ) {
01117           j = grouplist[g];
01118           if ( ! p_1[j].groupFixed ) {
01119             fixglist[fixg++] = j;
01120           }
01121         }
01122       }
01123 
01124       fixg_upper = fixg;
01125       if ( fixg_upper ) fixglist[fixg_upper] = fixglist[fixg_upper-1];
01126 
01127     #endif // NAMD_ComputeNonbonded_AtomSort != 0
01128 
01129 
01130 
01131     *(pairlists.newlist(1)) = i_upper;
01132     pairlists.newsize(1);
01133 
01134   } else { // if ( savePairlists || ! usePairlists )
01135 
01136     plint *i_upper_check;
01137     int i_upper_check_count;
01138     pairlists.nextlist(&i_upper_check,&i_upper_check_count);
01139     if ( i_upper_check[0] != i_upper )
01140       NAMD_bug("pairlist i_upper mismatch!");
01141 
01142   } // if ( savePairlists || ! usePairlists )
01143 
01144   SELF(
01145   int j_hgroup = 0;
01146   int g_lower = 0;
01147   int fixg_lower = 0;
01148   )
01149   int pairlistindex=0;
01150   int pairlistoffset=0;
01151 
01152   
01153 
01154 #if ( SHORT( FAST( 1+ ) ) 0 )
01155 #if ( PAIR( 1+ ) 0 )
01156     Force *f_0 = params->ff[0];
01157     Force *f_1 = params->ff[1];
01158 #else
01159 #define f_1 f_0
01160     NBWORKARRAY(Force,f_0,i_upper)
01161     memset( (void*) f_0, 0, i_upper * sizeof(Force) );
01162 #endif
01163 #endif
01164 #if ( FULL( 1+ ) 0 )
01165 #if ( PAIR( 1+ ) 0 )
01166     Force *fullf_0 = params->fullf[0];
01167     Force *fullf_1 = params->fullf[1];
01168 #else
01169 #define fullf_1 fullf_0
01170     NBWORKARRAY(Force,fullf_0,i_upper);
01171     memset( (void*) fullf_0, 0, i_upper * sizeof(Force) );
01172 #endif
01173 #endif
01174     
01175 
01176   int numParts = params->numParts;
01177   int myPart = params->minPart;
01178   int groupCount = 0;
01179 
01180   for ( i = 0; i < (i_upper SELF(- 1)); ++i )
01181   {
01182     const CompAtom &p_i = p_0[i];
01183 #ifdef MEM_OPT_VERSION
01184     const CompAtomExt &pExt_i = pExt_0[i];
01185 #endif
01186     if ( p_i.hydrogenGroupSize ) {
01187       //save current group count
01188       int curgrpcount = groupCount;      
01189       //increment group count
01190       groupCount ++;
01191 
01192       if (groupCount >= numParts)
01193         groupCount = 0;
01194       
01195       if ( curgrpcount != myPart ) {
01196         i += p_i.hydrogenGroupSize - 1;
01197         
01198         //Power PC alignment constraint
01199 #ifdef ARCH_POWERPC
01200         __dcbt((void *) &(p_0[i+1]));
01201 #endif
01202         continue;
01203       }
01204     }
01205 
01206     register const BigReal p_i_x = p_i.position.x + offset_x;
01207     register const BigReal p_i_y = p_i.position.y + offset_y;
01208     register const BigReal p_i_z = p_i.position.z + offset_z;
01209 
01210     ALCH(const int p_i_partition = p_i.partition;)
01211 
01212     PPROF(
01213         const int p_i_partition = p_i.partition;
01214         int n1 = (int)floor((p_i.position.z-pressureProfileMin)*invThickness);
01215         pp_clamp(n1, pressureProfileSlabs);
01216         )
01217 
01218   SELF ( if ( p_i.hydrogenGroupSize ) j_hgroup = i + p_i.hydrogenGroupSize; )
01219 
01220   if ( savePairlists || ! usePairlists ) {
01221 
01222     if ( ! savePairlists ) pairlists.reset();  // limit space usage
01223 
01224     #ifdef MEM_OPT_VERSION
01225     const ExclusionCheck *exclcheck = mol->get_excl_check_for_idx(pExt_i.exclId);        
01226     const int excl_min = p_i.id + exclcheck->min;
01227     const int excl_max = p_i.id + exclcheck->max;
01228     #else
01229     const ExclusionCheck *exclcheck = mol->get_excl_check_for_atom(p_i.id);
01230     const int excl_min = exclcheck->min;
01231     const int excl_max = exclcheck->max;
01232     #endif
01233     const char * excl_flags_var;
01234     if ( exclcheck->flags ) excl_flags_var = exclcheck->flags - excl_min;
01235     else {  // need to build list on the fly
01236 
01237     //TODO: Should change later!!!!!!!!!! --Chao Mei
01238     //Now just for the sake of passing compilation
01239     #ifndef MEM_OPT_VERSION 
01240       if ( excl_flags_buff ) {
01241         int nl,l;
01242         nl = full_excl[0] + 1;
01243         for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = 0;
01244         nl = mod_excl[0] + 1;
01245         for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = 0;
01246       } else {
01247         excl_flags_buff = new char[mol->numAtoms];
01248         memset( (void*) excl_flags_buff, 0, mol->numAtoms);
01249       }
01250       int nl,l;
01251       full_excl = mol->get_full_exclusions_for_atom(p_i.id);
01252       nl = full_excl[0] + 1;
01253       for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = EXCHCK_FULL;
01254       mod_excl = mol->get_mod_exclusions_for_atom(p_i.id);
01255       nl = mod_excl[0] + 1;
01256       for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = EXCHCK_MOD;
01257       excl_flags_var = excl_flags_buff;
01258     #endif
01259 
01260     }
01261     const char * const excl_flags = excl_flags_var;
01262 
01263   if (p_i.hydrogenGroupSize || p_i.nonbondedGroupIsAtom) {
01264 
01265     pairlistindex = 0;  // initialize with 0 elements
01266     pairlistoffset=0;
01267     const int groupfixed = ( fixedAtomsOn && p_i.groupFixed );
01268 
01269     // If patch divisions are not made by hydrogen groups, then
01270     // hydrogenGroupSize is set to 1 for all atoms.  Thus we can
01271     // carry on as if we did have groups - only less efficiently.
01272     // An optimization in this case is to not rebuild the temporary
01273     // pairlist but to include every atom in it.  This should be a
01274     // a very minor expense.
01275 
01276     SELF
01277     (
01278       if ( p_i.hydrogenGroupSize ) {
01279         // exclude child hydrogens of i
01280         // j_hgroup = i + p_i.hydrogenGroupSize;  (moved above)
01281         while ( g_lower < g_upper &&
01282                 grouplist[g_lower] < j_hgroup ) ++g_lower;
01283         while ( fixg_lower < fixg_upper &&
01284                 fixglist[fixg_lower] < j_hgroup ) ++fixg_lower;
01285       }
01286       // add all child or sister hydrogens of i
01287       for ( j = i + 1; j < j_hgroup; ++j ) {
01288         pairlist[pairlistindex++] = j;
01289       }
01290     )
01291 
01292     // add remaining atoms to pairlist via hydrogen groups
01293     register plint *pli = pairlist + pairlistindex;
01294 
01295     {
01296       // DMK - Atom Sort
01297       // Modify the values of g and gu based on the added information
01298       //   of the linear projections (sort values) information.
01299       #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01300 
01301         register SortEntry* sortValues = ( groupfixed ? p_1_sortValues_fixg : p_1_sortValues );
01302         register int g = 0;
01303         const int gu = ( groupfixed ? p_1_sortValues_fixg_len : p_1_sortValues_len );
01304 
01305         register BigReal p_i_sortValue = p_0_sortValues[i];
01306         const BigReal p_i_sortValue_plus_windowRadius = p_i_sortValue + atomSort_windowRadius;
01307 
01308       #else
01309 
01310         register plint *gli = goodglist;
01311         const plint *glist = ( groupfixed ? fixglist : grouplist );
01312         SELF( const int gl = ( groupfixed ? fixg_lower : g_lower ); )
01313         const int gu = ( groupfixed ? fixg_upper : g_upper );
01314         register int g = PAIR(0) SELF(gl);
01315 
01316       #endif
01317 
01318 
01319       if ( g < gu ) {
01320         int hu = 0;
01321 #if defined(NAMD_SSE) && defined(__INTEL_COMPILER) && defined(__SSE2__)
01322         if ( gu - g  >  6 ) { 
01323 
01324           // DMK - Atom Sort
01325           #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01326 
01327             register SortEntry* sortEntry0 = sortValues + g;
01328             register SortEntry* sortEntry1 = sortValues + g + 1;
01329             register int jprev0 = sortEntry0->index;
01330             register int jprev1 = sortEntry1->index;
01331 
01332             // Test if we need to loop again
01333             // Only if both of the atoms in the next iteration are
01334             //   outside of the bounds.  If one is within, it must
01335             //   be added to goodglist next iteration so keep going.
01336             bool test2 = ((p_i_sortValue_plus_windowRadius < sortEntry0->sortValue)
01337                        && (p_i_sortValue_plus_windowRadius < sortEntry1->sortValue));
01338             g += (test2 * gu);  // Either add '0' or 'gu'
01339 
01340           #else
01341 
01342             register int jprev0 = glist[g    ];
01343             register int jprev1 = glist[g + 1];
01344 
01345           #endif
01346           
01347           register int j0;
01348           register int j1;
01349 
01350           __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01351           __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01352           __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01353 
01354           // these don't change here, so we could move them into outer scope
01355           const __m128d R2_DELTA = _mm_set1_pd(r2_delta);        
01356           const __m128d P_I_X = _mm_set1_pd(p_i_x);
01357           const __m128d P_I_Y = _mm_set1_pd(p_i_y);
01358           const __m128d P_I_Z = _mm_set1_pd(p_i_z);
01359  
01360           g += 2;
01361           for ( ; g < gu - 2; g +=2 ) {
01362             // compute 1d distance, 2-way parallel       
01363             j0     =  jprev0;
01364             j1     =  jprev1;
01365 
01366             __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
01367             __m128d R2_01 = _mm_add_pd(_mm_mul_pd(T_01, T_01), R2_DELTA);
01368             T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
01369             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01370             T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
01371             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01372             
01373             // DMK - Atom Sort
01374             #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01375 
01376               sortEntry0 = sortValues + g;
01377               sortEntry1 = sortValues + g + 1;
01378               jprev0 = sortEntry0->index;
01379               jprev1 = sortEntry1->index;
01380 
01381               // Test if we need to loop again
01382               // Only if both of the atoms in the next iteration are
01383               //   outside of the bounds.  If one is within, it must
01384               //   be added to goodglist next iteration so keep going.
01385               bool test2 = ((p_i_sortValue_plus_windowRadius < sortEntry0->sortValue)
01386                          && (p_i_sortValue_plus_windowRadius < sortEntry1->sortValue));
01387               g += (test2 * gu);  // Either add '0' or 'gu'
01388 
01389             #else
01390 
01391               jprev0     =  glist[g  ];
01392               jprev1     =  glist[g+1];
01393 
01394             #endif
01395            
01396             PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01397             PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01398             PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01399 
01400             __declspec(align(16)) double r2_01[2];
01401             _mm_store_pd(r2_01, R2_01); // 16-byte-aligned store
01402 
01403             // XXX these could possibly benefit from SSE-based conditionals
01404             bool test0 = ( r2_01[0] < groupplcutoff2 );
01405             bool test1 = ( r2_01[1] < groupplcutoff2 ); 
01406             
01407             //removing ifs benefits on many architectures
01408             //as the extra stores will only warm the cache up
01409             goodglist [ hu         ] = j0;
01410             goodglist [ hu + test0 ] = j1;
01411             
01412             hu += test0 + test1;
01413           }
01414           g-=2;
01415         }
01416 #else
01417         if ( gu - g  >  6 ) { 
01418 
01419           // DMK - Atom Sort
01420           #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01421 
01422             register SortEntry* sortEntry0 = sortValues + g;
01423             register SortEntry* sortEntry1 = sortValues + g + 1;
01424             register int jprev0 = sortEntry0->index;
01425             register int jprev1 = sortEntry1->index;
01426 
01427             // Test if we need to loop again
01428             // Only if both of the atoms in the next iteration are
01429             //   outside of the bounds.  If one is within, it must
01430             //   be added to goodglist next iteration so keep going.
01431             bool test2 = ((p_i_sortValue_plus_windowRadius < sortEntry0->sortValue)
01432                        && (p_i_sortValue_plus_windowRadius < sortEntry1->sortValue));
01433             g += (test2 * gu);  // Either add '0' or 'gu'
01434 
01435           #else
01436 
01437             register int jprev0 = glist[g    ];
01438             register int jprev1 = glist[g + 1];
01439 
01440           #endif
01441           
01442           register  int j0; 
01443           register  int j1; 
01444           
01445           register  BigReal pj_x_0, pj_x_1; 
01446           register  BigReal pj_y_0, pj_y_1; 
01447           register  BigReal pj_z_0, pj_z_1; 
01448           register  BigReal t_0, t_1, r2_0, r2_1;
01449           
01450           pj_x_0 = p_1[jprev0].position.x;
01451           pj_x_1 = p_1[jprev1].position.x;  
01452           
01453           pj_y_0 = p_1[jprev0].position.y; 
01454           pj_y_1 = p_1[jprev1].position.y;  
01455           
01456           pj_z_0 = p_1[jprev0].position.z; 
01457           pj_z_1 = p_1[jprev1].position.z;
01458           
01459           g += 2;
01460           for ( ; g < gu - 2; g +=2 ) {
01461             // compute 1d distance, 2-way parallel       
01462             j0     =  jprev0;
01463             j1     =  jprev1;
01464             
01465             t_0    =  p_i_x - pj_x_0;
01466             t_1    =  p_i_x - pj_x_1;
01467             r2_0   =  t_0 * t_0 + r2_delta;
01468             r2_1   =  t_1 * t_1 + r2_delta;
01469             
01470             t_0    =  p_i_y - pj_y_0;
01471             t_1    =  p_i_y - pj_y_1;
01472             r2_0  +=  t_0 * t_0;
01473             r2_1  +=  t_1 * t_1;
01474             
01475             t_0    =  p_i_z - pj_z_0;
01476             t_1    =  p_i_z - pj_z_1;
01477             r2_0  +=  t_0 * t_0;
01478             r2_1  +=  t_1 * t_1;
01479             
01480             // DMK - Atom Sort
01481             #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01482 
01483               sortEntry0 = sortValues + g;
01484               sortEntry1 = sortValues + g + 1;
01485               jprev0 = sortEntry0->index;
01486               jprev1 = sortEntry1->index;
01487 
01488               // Test if we need to loop again
01489               // Only if both of the atoms in the next iteration are
01490               //   outside of the bounds.  If one is within, it must
01491               //   be added to goodglist next iteration so keep going.
01492               bool test2 = ((p_i_sortValue_plus_windowRadius < sortEntry0->sortValue)
01493                          && (p_i_sortValue_plus_windowRadius < sortEntry1->sortValue));
01494               g += (test2 * gu);  // Either add '0' or 'gu'
01495 
01496             #else
01497 
01498               jprev0     =  glist[g  ];
01499               jprev1     =  glist[g+1];
01500 
01501             #endif
01502             
01503             pj_x_0     =  p_1[jprev0].position.x;
01504             pj_x_1     =  p_1[jprev1].position.x;
01505             pj_y_0     =  p_1[jprev0].position.y; 
01506             pj_y_1     =  p_1[jprev1].position.y;
01507             pj_z_0     =  p_1[jprev0].position.z; 
01508             pj_z_1     =  p_1[jprev1].position.z;
01509             
01510             bool test0 = ( r2_0 < groupplcutoff2 );
01511             bool test1 = ( r2_1 < groupplcutoff2 ); 
01512             
01513             //removing ifs benefits on many architectures
01514             //as the extra stores will only warm the cache up
01515             goodglist [ hu         ] = j0;
01516             goodglist [ hu + test0 ] = j1;
01517             
01518             hu += test0 + test1;
01519           }
01520           g-=2;
01521         }
01522 #endif
01523         
01524         for (; g < gu; g++) {
01525 
01526           // DMK - Atom Sort
01527           #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01528 
01529             register SortEntry* sortEntry = sortValues + g;
01530             register int j = sortEntry->index;
01531 
01532             // Test if we need to loop again
01533             // NOTE: To avoid a branch, always do this iteration's distance
01534             //   calculation and just test this sort value to see if the
01535             //   rest of the iterations can be skiped (vs. a 'break' if
01536             //   this one can be skiped).
01537             bool test = (p_i_sortValue_plus_windowRadius < sortEntry->sortValue);
01538             g += (test * gu);  // Either add '0' or 'gu'
01539 
01540           #else
01541 
01542             int j = glist[g];
01543 
01544           #endif
01545 
01546           BigReal p_j_x = p_1[j].position.x;
01547           BigReal p_j_y = p_1[j].position.y;
01548           BigReal p_j_z = p_1[j].position.z;
01549           
01550           BigReal r2 = p_i_x - p_j_x;
01551           r2 *= r2;
01552           BigReal t2 = p_i_y - p_j_y;
01553           r2 += t2 * t2;
01554           t2 = p_i_z - p_j_z;
01555           r2 += t2 * t2;
01556 
01557           if ( r2 <= groupplcutoff2 ) 
01558             goodglist[hu ++] = j; 
01559         }
01560 
01561         for ( int h=0; h<hu; ++h ) {
01562           int j = goodglist[h];
01563           int hgs = ( p_1[j].nonbondedGroupIsAtom ? 1 :
01564                       p_1[j].hydrogenGroupSize );
01565           pli[0] = j;   // copy over the next four in any case
01566           pli[1] = j+1;
01567           pli[2] = j+2;
01568           pli[3] = j+3; // assume hgs <= 4
01569           pli += hgs;
01570         }
01571       }
01572     }
01573 
01574     pairlistindex = pli - pairlist;
01575     // make sure padded element on pairlist points to real data
01576     if ( pairlistindex ) {
01577        pairlist[pairlistindex] = pairlist[pairlistindex-1];
01578     } /* PAIR( else {  // skip empty loops if no pairs were found
01579        int hgs = ( p_i.nonbondedGroupIsAtom ? 1 : p_i.hydrogenGroupSize );
01580        i += hgs - 1;
01581        continue;
01582     } ) */
01583   } // if i is hydrogen group parent
01584   SELF
01585     (
01586     // self-comparisions require list to be incremented
01587     // pair-comparisions use entire list (pairlistoffset is 0)
01588     else pairlistoffset++;
01589     )
01590 
01591     const int atomfixed = ( fixedAtomsOn && p_i.atomFixed );
01592 
01593     register plint *pli = pairlist2;
01594 #if 1 ALCH(-1)
01595     plint *pairlistn = pairlists.newlist(j_upper + 5 + 1 + 5) SELF(+ 1);
01596 #else
01597     plint *pairlistn = pairlistnAlch;
01598 #endif
01599     SELF( plint &pairlistn_skip = *(pairlistn-1); )
01600     register plint *plin = pairlistn;
01601 
01602 
01603