Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

ComputeNonbondedBase.h File Reference

#include "ComputeNonbondedBase2.h"
#include "PatchMap.h"

Go to the source code of this file.

Defines

#define NAME   CLASSNAME(calc)
#define PAIR(X)   X
#define CLASS   ComputeNonbondedPair
#define CLASSNAME(X)   ENERGYNAME( X ## _pair )
#define SELF(X)   X
#define CLASS   ComputeNonbondedSelf
#define CLASSNAME(X)   ENERGYNAME( X ## _self )
#define ENERGY(X)
#define NOENERGY(X)   X
#define ENERGYNAME(X)   SLOWONLYNAME( X )
#define FAST(X)   X
#define SLOWONLYNAME(X)   MERGEELECTNAME( X )
#define SHORT(X)   X
#define NOSHORT(X)
#define MERGEELECTNAME(X)   FULLELECTNAME( X )
#define FULLELECTNAME(X)   FEPNAME( X )
#define FULL(X)
#define NOFULL(X)   X
#define FEPNAME(X)   LAST( X )
#define FEP(X)
#define ALCHPAIR(X)
#define NOT_ALCHPAIR(X)   X
#define LES(X)
#define INT(X)
#define PPROF(X)
#define LAM(X)
#define ALCH(X)
#define TI(X)
#define LAST(X)   X
#define NORMAL(X)   X
#define EXCLUDED(X)
#define MODIFIED(X)
#define NORMAL(X)
#define EXCLUDED(X)
#define MODIFIED(X)   X

Functions

 SELF (PAIR(foo bar)) LES(FEP(foo bar)) LES(INT(foo bar)) FEP(INT(foo bar)) LAM(INT(foo bar)) FEP(NOENERGY(foo bar)) ENERGY(NOENERGY(foo bar)) void ComputeNonbondedUtil


Define Documentation

#define ALCH  ) 
 

Definition at line 131 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define ALCHPAIR  ) 
 

Definition at line 125 of file ComputeNonbondedBase.h.

#define CLASS   ComputeNonbondedSelf
 

Definition at line 52 of file ComputeNonbondedBase.h.

#define CLASS   ComputeNonbondedPair
 

Definition at line 52 of file ComputeNonbondedBase.h.

#define CLASSNAME  )     ENERGYNAME( X ## _self )
 

Definition at line 53 of file ComputeNonbondedBase.h.

#define CLASSNAME  )     ENERGYNAME( X ## _pair )
 

Definition at line 53 of file ComputeNonbondedBase.h.

#define ENERGY  ) 
 

Definition at line 66 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define ENERGYNAME  )     SLOWONLYNAME( X )
 

Definition at line 68 of file ComputeNonbondedBase.h.

#define EXCLUDED  ) 
 

#define EXCLUDED  ) 
 

#define FAST  )     X
 

Definition at line 77 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define FEP  ) 
 

Definition at line 124 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define FEPNAME  )     LAST( X )
 

Definition at line 123 of file ComputeNonbondedBase.h.

#define FULL  ) 
 

Definition at line 103 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define FULLELECTNAME  )     FEPNAME( X )
 

Definition at line 102 of file ComputeNonbondedBase.h.

#define INT  ) 
 

Definition at line 128 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define LAM  ) 
 

Definition at line 130 of file ComputeNonbondedBase.h.

#define LAST  )     X
 

Definition at line 172 of file ComputeNonbondedBase.h.

#define LES  ) 
 

Definition at line 127 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define MERGEELECTNAME  )     FULLELECTNAME( X )
 

Definition at line 91 of file ComputeNonbondedBase.h.

#define MODIFIED  )     X
 

#define MODIFIED  ) 
 

#define NAME   CLASSNAME(calc)
 

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved.

Definition at line 38 of file ComputeNonbondedBase.h.

#define NOENERGY  )     X
 

Definition at line 67 of file ComputeNonbondedBase.h.

#define NOFULL  )     X
 

Definition at line 104 of file ComputeNonbondedBase.h.

#define NORMAL  ) 
 

#define NORMAL  )     X
 

#define NOSHORT  ) 
 

Definition at line 90 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define NOT_ALCHPAIR  )     X
 

Definition at line 126 of file ComputeNonbondedBase.h.

#define PAIR  )     X
 

Definition at line 42 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define PPROF  ) 
 

Definition at line 129 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define SELF  )     X
 

Definition at line 51 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define SHORT  )     X
 

Definition at line 89 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define SLOWONLYNAME  )     MERGEELECTNAME( X )
 

Definition at line 78 of file ComputeNonbondedBase.h.

#define TI  ) 
 

Definition at line 132 of file ComputeNonbondedBase.h.

Referenced by SELF().


Function Documentation

SELF PAIR(foo bar)   ) 
 

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved.

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved.

Definition at line 175 of file ComputeNonbondedBase.h.

References LJTable::TableEntry::A, ALCH, Atom, CompAtom::atomFixed, Molecule::atomvdwtype(), LJTable::TableEntry::B, BigReal, CompAtom::charge, COLOUMB, ENERGY, CompAtomExt::exclId, FAST, FEP, ExclusionCheck::flags, Force, FULL, Molecule::get_excl_check_for_atom(), Molecule::get_full_exclusions_for_atom(), Molecule::get_mod_exclusions_for_atom(), Molecule::getAtoms(), CompAtom::groupFixed, CompAtom::hydrogenGroupSize, CompAtom::id, __sort_entry::index, INT, int32, LES, ExclusionCheck::max, ExclusionCheck::min, NAMD_bug(), NAMD_ComputeNonbonded_SortAtoms, NAMD_die(), NBWORKARRAY, NBWORKARRAYSINIT, Pairlists::newlist(), Pairlists::newsize(), Pairlists::nextlist(), CompAtom::nonbondedGroupIsAtom, NOSHORT, Molecule::numAtoms, PAIR, pairlist_from_pairlist(), CompAtom::partition, plint, CompAtom::position, pp_clamp(), PPROF, Pairlists::reset(), SELF, SHORT, sortEntries_bubbleSort(), sortEntries_mergeSort_v1(), sortEntries_mergeSort_v2(), sortEntries_selectionSort(), SortEntry, __sort_entry::sortValue, LJTable::table_row(), TI, atom_constants::vdw_type, CompAtomExt::vdwType, Vector::x, Vector::y, and Vector::z.

00195 {
00196   // int NAME;  // to track errors via unused variable warnings
00197 
00198   if ( ComputeNonbondedUtil::commOnly ) return;
00199 
00200   // speedup variables
00201   BigReal *reduction = params->reduction;
00202 
00203   PPROF(
00204   BigReal *pressureProfileReduction = params->pressureProfileReduction;
00205   const BigReal invThickness = 1.0 / pressureProfileThickness;
00206   )
00207 
00208   Pairlists &pairlists = *(params->pairlists);
00209   int savePairlists = params->savePairlists;
00210   int usePairlists = params->usePairlists;
00211   pairlists.reset();
00212   // PAIR(iout << "--------\n" << endi;)
00213 
00214   // local variables
00215   int exclChecksum = 0;
00216   FAST
00217   (
00218   ENERGY( BigReal vdwEnergy = 0; )
00219   SHORT
00220   (
00221   ENERGY( BigReal electEnergy = 0; )
00222   )
00223 
00224   FEP
00225   (
00226   ENERGY( BigReal vdwEnergy_s = 0; )
00227   SHORT
00228   (
00229   ENERGY( BigReal electEnergy_s = 0; )
00230   )
00231   )
00232   
00233   SHORT
00234   (
00235   BigReal virial_xx = 0;
00236   BigReal virial_xy = 0;
00237   BigReal virial_xz = 0;
00238   BigReal virial_yy = 0;
00239   BigReal virial_yz = 0;
00240   BigReal virial_zz = 0;
00241   )
00242   )
00243   FULL
00244   (
00245   ENERGY( BigReal fullElectEnergy = 0; )
00246   FEP
00247   (
00248   ENERGY( BigReal fullElectEnergy_s = 0; )
00249   )
00250   BigReal fullElectVirial_xx = 0;
00251   BigReal fullElectVirial_xy = 0;
00252   BigReal fullElectVirial_xz = 0;
00253   BigReal fullElectVirial_yy = 0;
00254   BigReal fullElectVirial_yz = 0;
00255   BigReal fullElectVirial_zz = 0;
00256   )
00257 
00258   // Bringing stuff into local namespace for speed.
00259 
00260   const BigReal offset_x = params->offset.x;
00261   const BigReal offset_y = params->offset.y;
00262   const BigReal offset_z = params->offset.z;
00263 
00264   register const BigReal plcutoff2 = \
00265                         params->plcutoff * params->plcutoff;
00266   register const BigReal groupplcutoff2 = \
00267                         params->groupplcutoff * params->groupplcutoff;
00268   const BigReal dielectric_1 = ComputeNonbondedUtil:: dielectric_1;
00269   const LJTable* const ljTable = ComputeNonbondedUtil:: ljTable;
00270   LJTable::TableEntry ljNull;  ljNull.A = 0; ljNull.B = 0;
00271   const LJTable::TableEntry* const lj_null_pars = &ljNull;
00272   const Molecule* const mol = ComputeNonbondedUtil:: mol;
00273   SHORT
00274   (
00275   const BigReal* const table_four = ComputeNonbondedUtil:: table_short;
00276   )
00277   FULL
00278   (
00279   SHORT
00280   (
00281   const BigReal* const slow_table = ComputeNonbondedUtil:: slow_table;
00282   )
00283   NOSHORT
00284   (
00285 //#if 1 ALCH(-1)
00286   const BigReal* const table_four = ComputeNonbondedUtil:: table_noshort;
00287 //#else  // have to switch this for ALCH
00288 //  BigReal* table_four = ComputeNonbondedUtil:: table_noshort;
00289 //#endif
00290   )
00291   )
00292   const BigReal scaling = ComputeNonbondedUtil:: scaling;
00293   const BigReal modf_mod = 1.0 - scale14;
00294   FAST
00295   (
00296   const BigReal switchOn2 = ComputeNonbondedUtil:: switchOn2;
00297   const BigReal c1 = ComputeNonbondedUtil:: c1;
00298   const BigReal c3 = ComputeNonbondedUtil:: c3;
00299   )
00300   const BigReal r2_delta = ComputeNonbondedUtil:: r2_delta;
00301   const int r2_delta_exp = ComputeNonbondedUtil:: r2_delta_exp;
00302   // const int r2_delta_expc = 64 * (r2_delta_exp - 127);
00303   const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
00304 
00305   ALCH(
00306     const BigReal switchdist2 = ComputeNonbondedUtil::switchOn2;
00307     const BigReal cutoff2 = ComputeNonbondedUtil::cutoff2;
00308     const BigReal switchfactor = 1./((cutoff2 - switchdist2)*(cutoff2 - switchdist2)*(cutoff2 - switchdist2));
00309     const BigReal fepElecLambdaStart = ComputeNonbondedUtil::fepElecLambdaStart;
00310     const BigReal fepVdwLambdaEnd = ComputeNonbondedUtil::fepVdwLambdaEnd;
00311     const BigReal fepVdwShiftCoeff = ComputeNonbondedUtil::fepVdwShiftCoeff;
00312 
00313     /*lambda values 'up' are for atoms scaled up with lambda (partition 1)*/
00314     BigReal lambdaUp = ComputeNonbondedUtil::lambda;
00315     BigReal elecLambdaUp =  (lambdaUp <= fepElecLambdaStart)? 0. : \
00316               (lambdaUp - fepElecLambdaStart) / (1. - fepElecLambdaStart);
00317     BigReal vdwLambdaUp = 
00318         (lambdaUp >= fepVdwLambdaEnd)? 1. : lambdaUp / fepVdwLambdaEnd; 
00319     BigReal vdwShiftUp = ComputeNonbondedUtil::fepVdwShiftCoeff * (1-vdwLambdaUp);
00320     FEP(BigReal lambda2Up = ComputeNonbondedUtil::lambda2;)
00321     FEP(BigReal elecLambda2Up = (lambda2Up <= fepElecLambdaStart)? 0. : \
00322               (lambda2Up - fepElecLambdaStart) / (1. - fepElecLambdaStart);)
00323     FEP(BigReal vdwLambda2Up = 
00324         (lambda2Up >= fepVdwLambdaEnd)? 1. : lambda2Up / fepVdwLambdaEnd;) 
00325     FEP(BigReal vdwShift2Up = ComputeNonbondedUtil::fepVdwShiftCoeff * (1-vdwLambda2Up);)
00326 
00327         
00328     /*lambda values 'down' are for atoms scaled down with lambda (partition 2)*/
00329     BigReal lambdaDown = 1 - ComputeNonbondedUtil::lambda;
00330     BigReal elecLambdaDown =  (lambdaDown <= fepElecLambdaStart)? 0. : \
00331               (lambdaDown - fepElecLambdaStart) / (1. - fepElecLambdaStart);
00332     BigReal vdwLambdaDown = 
00333         (lambdaDown >= fepVdwLambdaEnd)? 1. : lambdaDown / fepVdwLambdaEnd; 
00334     BigReal vdwShiftDown = ComputeNonbondedUtil::fepVdwShiftCoeff * (1-vdwLambdaDown);
00335     FEP(BigReal lambda2Down = 1 - ComputeNonbondedUtil::lambda2;)
00336     FEP(BigReal elecLambda2Down = (lambda2Down <= fepElecLambdaStart)? 0. : \
00337               (lambda2Down - fepElecLambdaStart) / (1. - fepElecLambdaStart); )
00338     FEP(BigReal vdwLambda2Down = 
00339         (lambda2Down >= fepVdwLambdaEnd)? 1. : lambda2Down / fepVdwLambdaEnd; )
00340     FEP(BigReal vdwShift2Down = ComputeNonbondedUtil::fepVdwShiftCoeff * (1-vdwLambda2Down);)
00341 
00342   
00343   // Thermodynamic Integration Notes (F. Buelens): 
00344   // Separation of atom pairs into different pairlist according to lambda
00345   // coupling, for alchemical free energy calculations. Normal pairlists
00346   // (pairlist[nxm]_save) are re-used for non-lambda-coupled pairs; new ones
00347   // (pairlist[nxm][12]_save are created for atoms switched up or down with
00348   // lambda respectively.
00349   // This makes TI coding far easier and more readable, since it's necessary 
00350   // to store dU/dlambda in different variables depending on whether atoms are
00351   // being switched up or down. Further, it allows Jordi's explicit coding of 
00352   // the separation-shifted vdW potential to stay in place without a big 
00353   // performance hit, since it's no longer necessary to calculate vdW potentials
00354   // explicity for the bulk (non-alchemical) interactions - so that part of the 
00355   // free energy code stays readable and easy to modify. Finally there should
00356   // be some performance gain over the old FEP implementation as we no
00357   // longer have to check the partitions of each atom pair and switch
00358   // parameters accordingly for every single NonbondedBase2.h loop - this is 
00359   // done at the pairlist level
00360   
00361   int pswitchTable[3*3];
00362   // determines which pairlist alchemical pairings get sent to
00363   // 0: non-alchemical pairs, partition 0 <-> partition 0
00364   // 1: atoms scaled up as lambda increases, p0<->p1
00365   // 2: atoms scaled down as lambda increases, p0<->p2
00366   // all p1<->p2 interactions to be dropped (99)
00367   // in general, 'up' refers to 1, 'down' refers to 2
00368   for (int ip=0; ip<3; ++ip) {
00369     for (int jp=0; jp<3; ++jp ) {
00370       pswitchTable[ip+3*jp] = 0;
00371       if ((ip==1 && jp==0) || (ip==0 && jp==1)) pswitchTable[ip+3*jp] = 1;
00372       if ((ip==2 && jp==0) || (ip==0 && jp==2)) pswitchTable[ip+3*jp] = 2;
00373       if (ip + jp == 3) pswitchTable[ip+3*jp] = 99; // no interaction
00374 
00375       if (! ComputeNonbondedUtil::decouple) {
00376         // no decoupling: interactions within a partition are treated like
00377         // normal alchemical pairs
00378         if (ip == 1 && jp == 1) pswitchTable[ip+3*jp] = 1;
00379         if (ip == 2 && jp == 2) pswitchTable[ip+3*jp] = 2;
00380       }
00381       if (ComputeNonbondedUtil::decouple) {
00382         // decoupling: PME calculates extra grids so that while PME 
00383         // interaction with the full system is switched off, a new PME grid
00384         // containing only alchemical atoms is switched on. Full interactions 
00385         // between alchemical atoms are maintained; potentials within one 
00386         // partition need not be scaled here.
00387         if (ip == 1 && jp == 1) pswitchTable[ip+3*jp] = 0;
00388         if (ip == 2 && jp == 2) pswitchTable[ip+3*jp] = 0;
00389       }
00390     }
00391   }
00392 
00393   // dU/dlambda variables for thermodynamic integration
00394   TI(
00395       BigReal vdwEnergy_ti_1 = 0;
00396       BigReal vdwEnergy_ti_2 = 0;
00397       SHORT(BigReal electEnergy_ti_1 = 0;
00398       BigReal electEnergy_ti_2 = 0;)
00399       FULL(BigReal fullElectEnergy_ti_1 = 0; 
00400       BigReal fullElectEnergy_ti_2 = 0;) 
00401    )
00402   )
00403         
00404         
00405   const int i_upper = params->numAtoms[0];
00406   register const int j_upper = params->numAtoms[1];
00407   register int j;
00408   register int i;
00409   const CompAtom *p_0 = params->p[0];
00410   const CompAtom *p_1 = params->p[1];
00411 #ifdef MEM_OPT_VERSION
00412   const CompAtomExt *pExt_0 = params->pExt[0];
00413   const CompAtomExt *pExt_1 = params->pExt[1];
00414 #endif
00415 
00416   char * excl_flags_buff = 0;
00417   const int32 * full_excl = 0;
00418   const int32 * mod_excl = 0;
00419 
00420   plint *pairlistn_save;  int npairn;
00421   plint *pairlistx_save;  int npairx;
00422   plint *pairlistm_save;  int npairm;
00423 
00424   ALCH(
00425   plint *pairlistnA1_save;  int npairnA1;
00426   plint *pairlistxA1_save;  int npairxA1;
00427   plint *pairlistmA1_save;  int npairmA1;
00428   plint *pairlistnA2_save;  int npairnA2;
00429   plint *pairlistxA2_save;  int npairxA2;
00430   plint *pairlistmA2_save;  int npairmA2;
00431   )
00432 
00433   NBWORKARRAYSINIT(params->workArrays);
00434 
00435   int arraysize = j_upper+5;
00436 
00437   NBWORKARRAY(plint,pairlisti,arraysize)
00438   NBWORKARRAY(BigReal,r2list,arraysize)
00439 
00440   union { double f; int32 i[2]; } byte_order_test;
00441   byte_order_test.f = 1.0;  // should occupy high-order bits only
00442   int32 *r2iilist = (int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
00443 
00444   if ( ! ( savePairlists || ! usePairlists ) ) arraysize = 0;
00445 
00446 
00447   // DMK - Atom Sort
00448   // 
00449   // Basic Idea: Determine the line between the center of masses of
00450   //   the two patches.  Project and then sort the lists of atoms
00451   //   along this line.  Then, as the pairlists are being generated
00452   //   for the atoms in the first atom list, use the sorted
00453   //   list to only add atoms from the second list that are between
00454   //   +/- ~cutoff from the atoms position on the line.
00455   #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00456 
00457   // NOTE: For the first atom list, just the sort values themselves need to be
00458   // calculated (a BigReal vs. SortEntry data structure).  However, a second
00459   // SortEntry array is needed to perform the merge sort on the second list of
00460   // atoms.  Use the atomSort_[0/1]_sortValues__ arrays to sort the second
00461   // list of atoms and then use the left over array to make a version of the
00462   // list that only includes non-fixed groups (fixg).
00463 
00464   NBWORKARRAY(SortEntry, atomSort_0_sortValues__, arraysize);
00465   NBWORKARRAY(SortEntry, atomSort_1_sortValues__, arraysize);
00466   NBWORKARRAY(BigReal, p_0_sortValues, arraysize);
00467 
00468   register SortEntry* p_1_sortValues = atomSort_0_sortValues__;
00469   register SortEntry* p_1_sortValues_fixg = atomSort_1_sortValues__;
00470 
00471   int p_0_sortValues_len = 0;
00472   int p_1_sortValues_len = 0;
00473   int p_1_sortValues_fixg_len = 0;
00474 
00475   // Calculate the distance between to projected points on the line that
00476   //   represents the cutoff distance.
00477   BigReal atomSort_windowRadius = sqrt(groupplcutoff2);
00478 
00479   if (savePairlists || !usePairlists) {
00480 
00481     register const BigReal projLineVec_x = params->projLineVec.x;
00482     register const BigReal projLineVec_y = params->projLineVec.y;
00483     register const BigReal projLineVec_z = params->projLineVec.z;
00484 
00485     // Calculate the sort values for the atoms in patch 1
00486     {
00487       register int j = 0;
00488       register unsigned int ngia = p_1->nonbondedGroupIsAtom;
00489       register unsigned int hgs = p_1->hydrogenGroupSize;
00490       register BigReal p_x = p_1->position.x;
00491       register BigReal p_y = p_1->position.y;
00492       register BigReal p_z = p_1->position.z;
00493       register int index = j;
00494 
00495       while (j < j_upper) {
00496 
00497         // Advance j... NOTE: ngia is either 0 or 1, so if ngia is set '1'
00498         // is added to 'j'.  Otherwise, the value of 'hgs' is added to j.
00499         j += ((ngia) + ((1 - ngia) * hgs));
00500 
00501         // Set p_j_next to point to the atom for the next iteration and begin
00502         //   loading the 'ngia' and 'hgs' values for that atom.
00503         register const CompAtom* p_j_next = p_1 + j;
00504         ngia = p_j_next->nonbondedGroupIsAtom;
00505         hgs = p_j_next->hydrogenGroupSize;
00506 
00507         // Calculate the distance along the projection vector
00508         // NOTE: If the vector from the origin to the point is 'A' and the vector
00509         //   between the patches is 'B' then to project 'A' onto 'B' we take the dot
00510         //   product of the two vectors 'A dot B' divided by the length of 'B'.
00511         // So... projection of A onto B = (A dot B) / length(B), however, note
00512         //   that length(B) == 1, therefore the sort value is simply (A dot B)
00513         register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00514 
00515         // Start loading the next iteration's atom's position
00516         p_x = p_j_next->position.x;
00517         p_y = p_j_next->position.y;
00518         p_z = p_j_next->position.z;
00519 
00520         // Store the caclulated sort value into the array of sort values
00521         register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
00522         p_1_sortValStorePtr->index = index;
00523         p_1_sortValStorePtr->sortValue = sortVal;
00524         p_1_sortValues_len++;
00525 
00526         // Update index for the next iteration
00527         index = j;       
00528 
00529       } // end while (j < j_upper)
00530     }
00531 
00532     // NOTE: This list and another version of it with only non-fixed
00533     //   atoms will be used in place of grouplist and fixglist.
00534     #if 0   // Selection Sort
00535       sortEntries_selectionSort(p_1_sortValues, p_1_sortValues_len);
00536     #elif 0   // Bubble Sort
00537       sortEntries_bubbleSort(p_1_sortValues, p_1_sortValues_len);
00538     #else   // Merge Sort
00539       #if NAMD_ComputeNonbonded_SortAtoms_LessBranches == 0
00540         sortEntries_mergeSort_v1(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
00541       #else
00542         sortEntries_mergeSort_v2(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
00543       #endif
00544     #endif
00545 
00546     // Calculate the sort values for the atoms in patch 0
00547     {
00548       register int i = 0;
00549       register unsigned int ngia = p_0->nonbondedGroupIsAtom;
00550       register unsigned int hgs = p_0->hydrogenGroupSize;
00551       register BigReal p_x = p_0->position.x;
00552       register BigReal p_y = p_0->position.y;
00553       register BigReal p_z = p_0->position.z;
00554       register int index = i;
00555 
00556       while (i < i_upper) {
00557 
00558         // Advance i... NOTE: ngia is either 0 or 1, so if ngia is set '1'
00559         //   is added to 'i'.  Otherwise, the value of 'hgs' is added to i.
00560         i += ((ngia) + ((1 - ngia) * hgs));
00561 
00562         // Set p_i_next to point to the atom for the next iteration and begin
00563         //   loading the 'ngia' and 'hgs' values for that atom.
00564         register const CompAtom* p_i_next = p_0 + i;
00565         ngia = p_i_next->nonbondedGroupIsAtom;
00566         hgs = p_i_next->hydrogenGroupSize;
00567 
00568         // Calculate the distance along the projection vector
00569         register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00570 
00571         // Start loading the next iteration's atom's position
00572         p_x = p_i_next->position.x;
00573         p_y = p_i_next->position.y;
00574         p_z = p_i_next->position.z;
00575 
00576         // Store the calculated sort value into the array of sort values
00577         register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
00578         *p_0_sortValStorePtr = sortVal;
00579 
00580         // Update index for the next iteration
00581         index = i;
00582       }
00583 
00584       p_0_sortValues_len = i_upper;
00585     }
00586 
00587   }  // end if (savePairlists || !usePairlists)
00588 
00589   #endif  // NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00590 
00591   // Atom Sort : The grouplist and fixglist arrays are not needed when the
00592   //   the atom sorting code is in use.
00593   #if ! (NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) )
00594     NBWORKARRAY(plint,grouplist,arraysize);
00595     NBWORKARRAY(plint,fixglist,arraysize);
00596   #endif
00597 
00598   NBWORKARRAY(plint,goodglist,arraysize);
00599   NBWORKARRAY(plint,pairlistx,arraysize);
00600   NBWORKARRAY(plint,pairlistm,arraysize);
00601   NBWORKARRAY(plint,pairlist,arraysize);
00602   NBWORKARRAY(plint,pairlist2,arraysize);
00603   ALCH(
00604   NBWORKARRAY(plint,pairlistnAlch,arraysize);
00605   NBWORKARRAY(plint,pairlistnA0,arraysize);
00606   NBWORKARRAY(plint,pairlistnA1,arraysize);
00607   NBWORKARRAY(plint,pairlistxA1,arraysize);
00608   NBWORKARRAY(plint,pairlistmA1,arraysize);
00609   NBWORKARRAY(plint,pairlistnA2,arraysize);
00610   NBWORKARRAY(plint,pairlistxA2,arraysize);
00611   NBWORKARRAY(plint,pairlistmA2,arraysize);
00612   )
00613 
00614   NBWORKARRAY(short,vdwtype_array,j_upper+5);
00615 
00616   
00617 #ifdef MEM_OPT_VERSION
00618   for (j = 0; j < j_upper; ++j){
00619     vdwtype_array[j] = pExt_1[j].vdwType;
00620   }
00621 #else
00622   const Atom *atomlist = mol->getAtoms();
00623 #ifdef ARCH_POWERPC
00624 #pragma disjoint (*atomlist, *vdwtype_array)
00625 #pragma disjoint (*p_1, *vdwtype_array)
00626 #pragma unroll(4)
00627 #endif
00628   for (j = 0; j < j_upper; ++j) {
00629     int id = p_1[j].id;
00630     vdwtype_array [j] = atomlist[id].vdw_type;
00631   }
00632 #endif
00633 
00634   int fixg_upper = 0;
00635   int g_upper = 0;
00636 
00637   if ( savePairlists || ! usePairlists ) {
00638 
00639     #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00640 
00641       // Create a sorted list of non-fixed groups
00642       register int fixg = 0;
00643       for (int tmpI = 0; tmpI < p_1_sortValues_len; tmpI++) {
00644         register SortEntry* p_1_sortEntry = p_1_sortValues + tmpI;
00645         register int p_1_index = p_1_sortEntry->index;
00646         if (!p_1[p_1_index].groupFixed) {
00647           register SortEntry* p_1_sortEntry_fixg = p_1_sortValues_fixg + p_1_sortValues_fixg_len;
00648           p_1_sortEntry_fixg->index = p_1_sortEntry->index;
00649           p_1_sortEntry_fixg->sortValue = p_1_sortEntry->sortValue;
00650           p_1_sortValues_fixg_len++;
00651         }
00652       }
00653 
00654     #else
00655 
00656       register int g = 0;
00657       for ( j = 0; j < j_upper; ++j ) {
00658         if ( p_1[j].hydrogenGroupSize || p_1[j].nonbondedGroupIsAtom ) {
00659           grouplist[g++] = j;
00660         }
00661       }
00662       g_upper = g;
00663       if ( g_upper ) grouplist[g_upper] = grouplist[g_upper-1];
00664       int fixg = 0;
00665 
00666       if ( fixedAtomsOn ) {
00667         for ( g = 0; g < g_upper; ++g ) {
00668           j = grouplist[g];
00669           if ( ! p_1[j].groupFixed ) {
00670             fixglist[fixg++] = j;
00671           }
00672         }
00673       }
00674 
00675       fixg_upper = fixg;
00676       if ( fixg_upper ) fixglist[fixg_upper] = fixglist[fixg_upper-1];
00677 
00678     #endif // NAMD_ComputeNonbonded_SortAtoms != 0
00679 
00680     *(pairlists.newlist(1)) = i_upper;
00681     pairlists.newsize(1);
00682 
00683   } else { // if ( savePairlists || ! usePairlists )
00684 
00685     plint *i_upper_check;
00686     int i_upper_check_count;
00687     pairlists.nextlist(&i_upper_check,&i_upper_check_count);
00688     if ( i_upper_check[0] != i_upper )
00689       NAMD_bug("pairlist i_upper mismatch!");
00690 
00691   } // if ( savePairlists || ! usePairlists )
00692 
00693   SELF(
00694   int j_hgroup = 0;
00695   int g_lower = 0;
00696   int fixg_lower = 0;
00697   )
00698   int pairlistindex=0;
00699   int pairlistoffset=0;
00700 
00701   
00702 
00703 #if ( SHORT( FAST( 1+ ) ) 0 )
00704 #if ( PAIR( 1+ ) 0 )
00705     Force *f_0 = params->ff[0];
00706     Force *f_1 = params->ff[1];
00707 #else
00708 #define f_1 f_0
00709     NBWORKARRAY(Force,f_0,i_upper)
00710     memset( (void*) f_0, 0, i_upper * sizeof(Force) );
00711 #endif
00712 #endif
00713 #if ( FULL( 1+ ) 0 )
00714 #if ( PAIR( 1+ ) 0 )
00715     Force *fullf_0 = params->fullf[0];
00716     Force *fullf_1 = params->fullf[1];
00717 #else
00718 #define fullf_1 fullf_0
00719     NBWORKARRAY(Force,fullf_0,i_upper);
00720     memset( (void*) fullf_0, 0, i_upper * sizeof(Force) );
00721 #endif
00722 #endif
00723     
00724 
00725   int numParts = params->numParts;
00726   int myPart = params->minPart;
00727   int groupCount = 0;
00728 
00729   for ( i = 0; i < (i_upper SELF(- 1)); ++i )
00730   {
00731     const CompAtom &p_i = p_0[i];
00732 #ifdef MEM_OPT_VERSION
00733     const CompAtomExt &pExt_i = pExt_0[i];
00734 #endif
00735     if ( p_i.hydrogenGroupSize ) {
00736       //save current group count
00737       int curgrpcount = groupCount;      
00738       //increment group count
00739       groupCount ++;
00740 
00741       if (groupCount >= numParts)
00742         groupCount = 0;
00743       
00744       if ( curgrpcount != myPart ) {
00745         i += p_i.hydrogenGroupSize - 1;
00746         
00747         //Power PC alignment constraint
00748 #ifdef ARCH_POWERPC
00749         __dcbt((void *) &(p_0[i+1]));
00750 #endif
00751         continue;
00752       }
00753     }
00754 
00755     register const BigReal p_i_x = p_i.position.x + offset_x;
00756     register const BigReal p_i_y = p_i.position.y + offset_y;
00757     register const BigReal p_i_z = p_i.position.z + offset_z;
00758 
00759     ALCH(const int p_i_partition = p_i.partition;)
00760 
00761     PPROF(
00762         const int p_i_partition = p_i.partition;
00763         int n1 = (int)floor((p_i.position.z-pressureProfileMin)*invThickness);
00764         pp_clamp(n1, pressureProfileSlabs);
00765         )
00766 
00767   SELF ( if ( p_i.hydrogenGroupSize ) j_hgroup = i + p_i.hydrogenGroupSize; )
00768 
00769   if ( savePairlists || ! usePairlists ) {
00770 
00771     if ( ! savePairlists ) pairlists.reset();  // limit space usage
00772 
00773     #ifdef MEM_OPT_VERSION
00774     const ExclusionCheck *exclcheck = mol->get_excl_check_for_idx(pExt_i.exclId);        
00775     const int excl_min = p_i.id + exclcheck->min;
00776     const int excl_max = p_i.id + exclcheck->max;
00777     #else
00778     const ExclusionCheck *exclcheck = mol->get_excl_check_for_atom(p_i.id);
00779     const int excl_min = exclcheck->min;
00780     const int excl_max = exclcheck->max;
00781     #endif
00782     const char * excl_flags_var;
00783     if ( exclcheck->flags ) excl_flags_var = exclcheck->flags - excl_min;
00784     else {  // need to build list on the fly
00785 
00786     //TODO: Should change later!!!!!!!!!! --Chao Mei
00787     //Now just for the sake of passing compilation
00788     #ifndef MEM_OPT_VERSION 
00789       if ( excl_flags_buff ) {
00790         int nl,l;
00791         nl = full_excl[0] + 1;
00792         for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = 0;
00793         nl = mod_excl[0] + 1;
00794         for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = 0;
00795       } else {
00796         excl_flags_buff = new char[mol->numAtoms];
00797         memset( (void*) excl_flags_buff, 0, mol->numAtoms);
00798       }
00799       int nl,l;
00800       full_excl = mol->get_full_exclusions_for_atom(p_i.id);
00801       nl = full_excl[0] + 1;
00802       for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = EXCHCK_FULL;
00803       mod_excl = mol->get_mod_exclusions_for_atom(p_i.id);
00804       nl = mod_excl[0] + 1;
00805       for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = EXCHCK_MOD;
00806       excl_flags_var = excl_flags_buff;
00807     #endif
00808 
00809     }
00810     const char * const excl_flags = excl_flags_var;
00811 
00812   if (p_i.hydrogenGroupSize || p_i.nonbondedGroupIsAtom) {
00813 
00814     pairlistindex = 0;  // initialize with 0 elements
00815     pairlistoffset=0;
00816     const int groupfixed = ( fixedAtomsOn && p_i.groupFixed );
00817 
00818     // If patch divisions are not made by hydrogen groups, then
00819     // hydrogenGroupSize is set to 1 for all atoms.  Thus we can
00820     // carry on as if we did have groups - only less efficiently.
00821     // An optimization in this case is to not rebuild the temporary
00822     // pairlist but to include every atom in it.  This should be a
00823     // a very minor expense.
00824 
00825     SELF
00826     (
00827       if ( p_i.hydrogenGroupSize ) {
00828         // exclude child hydrogens of i
00829         // j_hgroup = i + p_i.hydrogenGroupSize;  (moved above)
00830         while ( g_lower < g_upper &&
00831                 grouplist[g_lower] < j_hgroup ) ++g_lower;
00832         while ( fixg_lower < fixg_upper &&
00833                 fixglist[fixg_lower] < j_hgroup ) ++fixg_lower;
00834       }
00835       // add all child or sister hydrogens of i
00836       for ( j = i + 1; j < j_hgroup; ++j ) {
00837         pairlist[pairlistindex++] = j;
00838       }
00839     )
00840 
00841     // add remaining atoms to pairlist via hydrogen groups
00842     register plint *pli = pairlist + pairlistindex;
00843 
00844     {
00845       // Atom Sort : Modify the values of g and gu based on the added information
00846       //   of the linear projections (sort values) information.
00847       #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00848 
00849         register int g = 0;
00850         register BigReal p_i_sortValue = p_0_sortValues[i];
00851         const BigReal p_i_sortValue_plus_windowRadius = p_i_sortValue + atomSort_windowRadius;
00852         register SortEntry* sortValues = ( groupfixed ? p_1_sortValues_fixg : p_1_sortValues );
00853 
00854         // Find the actual gu (upper bound in sorted list for this outer-loop atom) based on the sort values
00855         register int lower = 0;
00856         register int upper = (groupfixed ? p_1_sortValues_fixg_len : p_1_sortValues_len);
00857         while ((upper - lower) > 1) {
00858           register int j = ((lower + upper) >> 1);
00859           register BigReal jSortVal = sortValues[j].sortValue;
00860           if (jSortVal < p_i_sortValue_plus_windowRadius) {
00861             lower = j;
00862           } else {
00863             upper = j;
00864           }
00865         }
00866         const int gu = (sortValues[lower].sortValue >= p_i_sortValue_plus_windowRadius) ? lower : upper;
00867 
00868       #else
00869 
00870         register plint *gli = goodglist;
00871         const plint *glist = ( groupfixed ? fixglist : grouplist );
00872         SELF( const int gl = ( groupfixed ? fixg_lower : g_lower ); )
00873         const int gu = ( groupfixed ? fixg_upper : g_upper );
00874         register int g = PAIR(0) SELF(gl);
00875 
00876       #endif
00877 
00878 
00879       if ( g < gu ) {
00880         int hu = 0;
00881 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
00882         if ( gu - g  >  6 ) { 
00883 
00884           #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00885             register SortEntry* sortEntry0 = sortValues + g;
00886             register SortEntry* sortEntry1 = sortValues + g + 1;
00887             register int jprev0 = sortEntry0->index;
00888             register int jprev1 = sortEntry1->index;
00889           #else
00890             register int jprev0 = glist[g    ];
00891             register int jprev1 = glist[g + 1];
00892           #endif
00893           
00894           register int j0;
00895           register int j1;
00896 
00897           __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
00898           __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
00899           __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
00900 
00901           // these don't change here, so we could move them into outer scope
00902           const __m128d P_I_X = _mm_set1_pd(p_i_x);
00903           const __m128d P_I_Y = _mm_set1_pd(p_i_y);
00904           const __m128d P_I_Z = _mm_set1_pd(p_i_z);
00905  
00906           g += 2;
00907           for ( ; g < gu - 2; g +=2 ) {
00908             // compute 1d distance, 2-way parallel       
00909             j0     =  jprev0;
00910             j1     =  jprev1;
00911 
00912             __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
00913             __m128d R2_01 = _mm_mul_pd(T_01, T_01);
00914             T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
00915             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
00916             T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
00917             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
00918             
00919             #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00920               sortEntry0 = sortValues + g;
00921               sortEntry1 = sortValues + g + 1;
00922               jprev0 = sortEntry0->index;
00923               jprev1 = sortEntry1->index;
00924             #else
00925               jprev0     =  glist[g  ];
00926               jprev1     =  glist[g+1];
00927             #endif
00928            
00929             PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
00930             PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
00931             PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
00932 
00933             __align(16) double r2_01[2];
00934             _mm_store_pd(r2_01, R2_01); // 16-byte-aligned store
00935 
00936             // XXX these could possibly benefit from SSE-based conditionals
00937             bool test0 = ( r2_01[0] < groupplcutoff2 );
00938             bool test1 = ( r2_01[1] < groupplcutoff2 ); 
00939             
00940             //removing ifs benefits on many architectures
00941             //as the extra stores will only warm the cache up
00942             goodglist [ hu         ] = j0;
00943             goodglist [ hu + test0 ] = j1;
00944             
00945             hu += test0 + test1;
00946           }
00947           g-=2;
00948         }
00949 #else
00950         if ( gu - g  >  6 ) { 
00951 
00952           #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00953             register SortEntry* sortEntry0 = sortValues + g;
00954             register SortEntry* sortEntry1 = sortValues + g + 1;
00955             register int jprev0 = sortEntry0->index;
00956             register int jprev1 = sortEntry1->index;
00957           #else
00958             register int jprev0 = glist[g    ];
00959             register int jprev1 = glist[g + 1];
00960           #endif
00961           
00962           register  int j0; 
00963           register  int j1; 
00964           
00965           register  BigReal pj_x_0, pj_x_1; 
00966           register  BigReal pj_y_0, pj_y_1; 
00967           register  BigReal pj_z_0, pj_z_1; 
00968           register  BigReal t_0, t_1, r2_0, r2_1;
00969           
00970           pj_x_0 = p_1[jprev0].position.x;
00971           pj_x_1 = p_1[jprev1].position.x;  
00972           
00973           pj_y_0 = p_1[jprev0].position.y; 
00974           pj_y_1 = p_1[jprev1].position.y;  
00975           
00976           pj_z_0 = p_1[jprev0].position.z; 
00977           pj_z_1 = p_1[jprev1].position.z;
00978           
00979           g += 2;
00980           for ( ; g < gu - 2; g +=2 ) {
00981             // compute 1d distance, 2-way parallel       
00982             j0     =  jprev0;
00983             j1     =  jprev1;
00984             
00985             t_0    =  p_i_x - pj_x_0;
00986             t_1    =  p_i_x - pj_x_1;
00987             r2_0   =  t_0 * t_0;
00988             r2_1   =  t_1 * t_1;
00989             
00990             t_0    =  p_i_y - pj_y_0;
00991             t_1    =  p_i_y - pj_y_1;
00992             r2_0  +=  t_0 * t_0;
00993             r2_1  +=  t_1 * t_1;
00994             
00995             t_0    =  p_i_z - pj_z_0;
00996             t_1    =  p_i_z - pj_z_1;
00997             r2_0  +=  t_0 * t_0;
00998             r2_1  +=  t_1 * t_1;
00999             
01000             #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01001               sortEntry0 = sortValues + g;
01002               sortEntry1 = sortValues + g + 1;
01003               jprev0 = sortEntry0->index;
01004               jprev1 = sortEntry1->index;
01005             #else
01006               jprev0     =  glist[g  ];
01007               jprev1     =  glist[g+1];
01008             #endif
01009             
01010             pj_x_0     =  p_1[jprev0].position.x;
01011             pj_x_1     =  p_1[jprev1].position.x;
01012             pj_y_0     =  p_1[jprev0].position.y; 
01013             pj_y_1     =  p_1[jprev1].position.y;
01014             pj_z_0     =  p_1[jprev0].position.z; 
01015             pj_z_1     =  p_1[jprev1].position.z;
01016             
01017             bool test0 = ( r2_0 < groupplcutoff2 );
01018             bool test1 = ( r2_1 < groupplcutoff2 ); 
01019             
01020             //removing ifs benefits on many architectures
01021             //as the extra stores will only warm the cache up
01022             goodglist [ hu         ] = j0;
01023             goodglist [ hu + test0 ] = j1;
01024             
01025             hu += test0 + test1;
01026           }
01027           g-=2;
01028         }
01029 #endif
01030         
01031         for (; g < gu; g++) {
01032 
01033           #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01034             register SortEntry* sortEntry = sortValues + g;
01035             register int j = sortEntry->index;
01036           #else
01037             int j = glist[g];
01038           #endif
01039 
01040           BigReal p_j_x = p_1[j].position.x;
01041           BigReal p_j_y = p_1[j].position.y;
01042           BigReal p_j_z = p_1[j].position.z;
01043           
01044           BigReal r2 = p_i_x - p_j_x;
01045           r2 *= r2;
01046           BigReal t2 = p_i_y - p_j_y;
01047           r2 += t2 * t2;
01048           t2 = p_i_z - p_j_z;
01049           r2 += t2 * t2;
01050 
01051           if ( r2 <= groupplcutoff2 ) 
01052             goodglist[hu ++] = j; 
01053         }
01054 
01055         for ( int h=0; h<hu; ++h ) {
01056           int j = goodglist[h];
01057           int hgs = ( p_1[j].nonbondedGroupIsAtom ? 1 :
01058                       p_1[j].hydrogenGroupSize );
01059           pli[0] = j;   // copy over the next four in any case
01060           pli[1] = j+1;
01061           pli[2] = j+2;
01062           pli[3] = j+3; // assume hgs <= 4
01063           pli += hgs;
01064         }
01065       }
01066     }
01067 
01068     pairlistindex = pli - pairlist;
01069     // make sure padded element on pairlist points to real data
01070     if ( pairlistindex ) {
01071        pairlist[pairlistindex] = pairlist[pairlistindex-1];
01072     } /* PAIR( else {  // skip empty loops if no pairs were found
01073        int hgs = ( p_i.nonbondedGroupIsAtom ? 1 : p_i.hydrogenGroupSize );
01074        i += hgs - 1;
01075        continue;
01076     } ) */
01077   } // if i is hydrogen group parent
01078   SELF
01079     (
01080     // self-comparisions require list to be incremented
01081     // pair-comparisions use entire list (pairlistoffset is 0)
01082     else pairlistoffset++;
01083     )
01084 
01085     const int atomfixed = ( fixedAtomsOn && p_i.atomFixed );
01086 
01087     register plint *pli = pairlist2;
01088 #if 1 ALCH(-1)
01089     plint *pairlistn = pairlists.newlist(j_upper + 5 + 1 + 5) SELF(+ 1);
01090 #else
01091     plint *pairlistn = pairlistnAlch;
01092 #endif
01093     SELF( plint &pairlistn_skip = *(pairlistn-1); )
01094     register plint *plin = pairlistn;
01095 
01096 
01097     INT(
01098     if ( pairInteractionOn ) {
01099       const int ifep_type = p_i.partition;
01100       if (pairInteractionSelf) {
01101         if (ifep_type != 1) continue;
01102         for (int k=pairlistoffset; k<pairlistindex; k++) {
01103           j = pairlist[k];
01104           const int jfep_type = p_1[j].partition;
01105           // for pair-self, both atoms must be in group 1.
01106           if (jfep_type == 1) {
01107             *(pli++) = j;
01108           }
01109         }
01110       } else {
01111         if (ifep_type != 1 && ifep_type != 2) continue;
01112         for (int k=pairlistoffset; k<pairlistindex; k++) {
01113           j = pairlist[k];
01114           const int jfep_type = p_1[j].partition;
01115           // for pair, must have one from each group.
01116           if (ifep_type + jfep_type == 3) {
01117             *(pli++) = j;
01118           }
01119         }
01120       }
01121       int npair2_int = pli - pairlist2;
01122       pli = pairlist2;
01123       for (int k=0; k<npair2_int; k++) {
01124         j = pairlist2[k];
01125         BigReal p_j_x = p_1[j].position.x;
01126         BigReal r2 = p_i_x - p_j_x;
01127         r2 *= r2;
01128         BigReal p_j_y = p_1[j].position.y;
01129         BigReal t2 = p_i_y - p_j_y;
01130         r2 += t2 * t2;
01131         BigReal p_j_z = p_1[j].position.z;
01132         t2 = p_i_z - p_j_z;
01133         r2 += t2 * t2;
01134         if ( ( ! (atomfixed && p_1[j].atomFixed) ) && (r2 <= plcutoff2) ) {
01135           int atom2 = p_1[j].id;
01136           if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
01137           else *(plin++) = j;
01138         }
01139       }
01140     } else
01141     )
01142     if ( atomfixed ) {
01143       for (int k=pairlistoffset; k<pairlistindex; k++) {
01144         j = pairlist[k];
01145         BigReal p_j_x = p_1[j].position.x;
01146         BigReal r2 = p_i_x - p_j_x;
01147         r2 *= r2;
01148         BigReal p_j_y = p_1[j].position.y;
01149         BigReal t2 = p_i_y - p_j_y;
01150         r2 += t2 * t2;
01151         BigReal p_j_z = p_1[j].position.z;
01152         t2 = p_i_z - p_j_z;
01153         r2 += t2 * t2;
01154         if ( (! p_1[j].atomFixed) && (r2 <= plcutoff2) ) {
01155           int atom2 = p_1[j].id;
01156           if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
01157           else *(plin++) = j;
01158         }
01159       }
01160     } else {
01161       int k = pairlistoffset;
01162       int ku = pairlistindex;
01163       if ( k < ku ) {
01164 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
01165         if ( ku - k  >  6 ) {      
01166           register  int jprev0 = pairlist [k    ];
01167           register  int jprev1 = pairlist [k + 1];
01168           
01169           register  int j0; 
01170           register  int j1; 
01171 
01172           __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01173           __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01174           __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01175 
01176           // these don't change here, so we could move them into outer scope
01177           const __m128d P_I_X = _mm_set1_pd(p_i_x);
01178           const __m128d P_I_Y = _mm_set1_pd(p_i_y);
01179           const __m128d P_I_Z = _mm_set1_pd(p_i_z);
01180           
01181           int atom2_0 = p_1[jprev0].id;
01182           int atom2_1 = p_1[jprev1].id;
01183           
01184           k += 2;
01185           for ( ; k < ku - 2; k +=2 ) {
01186             // compute 1d distance, 2-way parallel       
01187             j0     =  jprev0;
01188             j1     =  jprev1;
01189             
01190             __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
01191             __m128d R2_01 = _mm_mul_pd(T_01, T_01);
01192             T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
01193             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01194             T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
01195             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01196             
01197             jprev0     =  pairlist[k];
01198             jprev1     =  pairlist[k+1];
01199             
01200             PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01201             PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01202             PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01203 
01204             __align(16) double r2_01[2];
01205             _mm_store_pd(r2_01, R2_01); // 16-byte-aligned store
01206             
01207             if (r2_01[0] <= plcutoff2) {
01208               if ( atom2_0 >= excl_min && atom2_0 <= excl_max ) 
01209                 *(pli++) = j0;
01210               else 
01211                 *(plin++) = j0;
01212             }
01213             ato