Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

ComputeNonbondedBase.h File Reference

#include "ComputeNonbondedBase2.h"
#include "PatchMap.h"

Go to the source code of this file.

Defines

#define NAME   CLASSNAME(calc)
#define PAIR(X)   X
#define CLASS   ComputeNonbondedPair
#define CLASSNAME(X)   ENERGYNAME( X ## _pair )
#define SELF(X)   X
#define CLASS   ComputeNonbondedSelf
#define CLASSNAME(X)   ENERGYNAME( X ## _self )
#define ENERGY(X)
#define NOENERGY(X)   X
#define ENERGYNAME(X)   SLOWONLYNAME( X )
#define FAST(X)   X
#define SLOWONLYNAME(X)   MERGEELECTNAME( X )
#define SHORT(X)   X
#define NOSHORT(X)
#define MERGEELECTNAME(X)   FULLELECTNAME( X )
#define FULLELECTNAME(X)   TABENERGYNAME( X )
#define FULL(X)
#define NOFULL(X)   X
#define TABENERGYNAME(X)   FEPNAME( X )
#define TABENERGY(X)
#define NOTABENERGY(X)   X
#define FEPNAME(X)   LAST( X )
#define FEP(X)
#define ALCHPAIR(X)
#define NOT_ALCHPAIR(X)   X
#define LES(X)
#define INT(X)
#define PPROF(X)
#define LAM(X)
#define CUDA(X)
#define ALCH(X)
#define TI(X)
#define CUDA(X)   X
#define LAST(X)   X
#define NORMAL(X)   X
#define EXCLUDED(X)
#define MODIFIED(X)
#define NORMAL(X)
#define EXCLUDED(X)
#define MODIFIED(X)   X

Functions

 SELF (PAIR(foo bar)) LES(FEP(foo bar)) LES(INT(foo bar)) FEP(INT(foo bar)) LAM(INT(foo bar)) FEP(NOENERGY(foo bar)) ENERGY(NOENERGY(foo bar)) TABENERGY(NOTABENERGY(foo bar)) void ComputeNonbondedUtil


Define Documentation

#define ALCH  ) 
 

Definition at line 148 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define ALCHPAIR  ) 
 

Definition at line 141 of file ComputeNonbondedBase.h.

#define CLASS   ComputeNonbondedSelf
 

Definition at line 54 of file ComputeNonbondedBase.h.

#define CLASS   ComputeNonbondedPair
 

Definition at line 54 of file ComputeNonbondedBase.h.

#define CLASSNAME  )     ENERGYNAME( X ## _self )
 

Definition at line 55 of file ComputeNonbondedBase.h.

#define CLASSNAME  )     ENERGYNAME( X ## _pair )
 

Definition at line 55 of file ComputeNonbondedBase.h.

#define CUDA  )     X
 

Definition at line 190 of file ComputeNonbondedBase.h.

#define CUDA  ) 
 

Definition at line 190 of file ComputeNonbondedBase.h.

#define ENERGY  ) 
 

Definition at line 68 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define ENERGYNAME  )     SLOWONLYNAME( X )
 

Definition at line 70 of file ComputeNonbondedBase.h.

#define EXCLUDED  ) 
 

#define EXCLUDED  ) 
 

#define FAST  )     X
 

Definition at line 79 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define FEP  ) 
 

Definition at line 140 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define FEPNAME  )     LAST( X )
 

Definition at line 139 of file ComputeNonbondedBase.h.

#define FULL  ) 
 

Definition at line 105 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define FULLELECTNAME  )     TABENERGYNAME( X )
 

Definition at line 104 of file ComputeNonbondedBase.h.

#define INT  ) 
 

Definition at line 144 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define LAM  ) 
 

Definition at line 146 of file ComputeNonbondedBase.h.

#define LAST  )     X
 

Definition at line 193 of file ComputeNonbondedBase.h.

#define LES  ) 
 

Definition at line 143 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define MERGEELECTNAME  )     FULLELECTNAME( X )
 

Definition at line 93 of file ComputeNonbondedBase.h.

#define MODIFIED  )     X
 

#define MODIFIED  ) 
 

#define NAME   CLASSNAME(calc)
 

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved.

Definition at line 40 of file ComputeNonbondedBase.h.

#define NOENERGY  )     X
 

Definition at line 69 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define NOFULL  )     X
 

Definition at line 106 of file ComputeNonbondedBase.h.

#define NORMAL  ) 
 

#define NORMAL  )     X
 

#define NOSHORT  ) 
 

Definition at line 92 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define NOT_ALCHPAIR  )     X
 

Definition at line 142 of file ComputeNonbondedBase.h.

#define NOTABENERGY  )     X
 

Definition at line 119 of file ComputeNonbondedBase.h.

#define PAIR  )     X
 

Definition at line 44 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define PPROF  ) 
 

Definition at line 145 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define SELF  )     X
 

Definition at line 53 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define SHORT  )     X
 

Definition at line 91 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define SLOWONLYNAME  )     MERGEELECTNAME( X )
 

Definition at line 80 of file ComputeNonbondedBase.h.

#define TABENERGY  ) 
 

Definition at line 118 of file ComputeNonbondedBase.h.

Referenced by SELF().

#define TABENERGYNAME  )     FEPNAME( X )
 

Definition at line 117 of file ComputeNonbondedBase.h.

#define TI  ) 
 

Definition at line 149 of file ComputeNonbondedBase.h.

Referenced by SELF().


Function Documentation

SELF PAIR(foo bar)   ) 
 

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved.

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved.

Definition at line 196 of file ComputeNonbondedBase.h.

References LJTable::TableEntry::A, ALCH, CompAtomExt::atomFixed, LJTable::TableEntry::B, BigReal, CompAtom::charge, COLOUMB, ENERGY, FAST, FEP, ExclusionCheck::flags, Force, FULL, Molecule::get_excl_check_for_atom(), Molecule::get_full_exclusions_for_atom(), Molecule::get_mod_exclusions_for_atom(), CompAtomExt::groupFixed, CompAtom::hydrogenGroupSize, CompAtomExt::id, __sort_entry::index, INT, int32, j, LES, ExclusionCheck::max, ExclusionCheck::min, NAMD_bug(), NAMD_ComputeNonbonded_SortAtoms, NAMD_die(), NBWORKARRAY, NBWORKARRAYSINIT, Pairlists::newlist(), Pairlists::newsize(), Pairlists::nextlist(), NOENERGY, CompAtom::nonbondedGroupSize, NOSHORT, Molecule::numAtoms, Node::Object(), PAIR, pairlist_from_pairlist(), CompAtom::partition, plint, CompAtom::position, pp_clamp(), PPROF, Pairlists::reset(), SELF, SHORT, Node::simParameters, simParams, sortEntries_bubbleSort(), sortEntries_mergeSort_v1(), sortEntries_mergeSort_v2(), sortEntries_selectionSort(), SortEntry, __sort_entry::sortValue, TABENERGY, LJTable::table_row(), LJTable::TableEntry::tabletype, TI, SimParameters::usePairlists, CompAtom::vdwType, Vector::x, Vector::y, and Vector::z.

00216 {
00217   // int NAME;  // to track errors via unused variable warnings
00218 
00219   if ( ComputeNonbondedUtil::commOnly ) return;
00220 
00221 #ifdef NAMD_CUDA
00222   NOENERGY(return;)
00223 #endif
00224 
00225   // speedup variables
00226   BigReal *reduction = params->reduction;
00227   SimParameters *simParams = Node::Object()->simParameters;
00228 
00229   PPROF(
00230   BigReal *pressureProfileReduction = params->pressureProfileReduction;
00231   const BigReal invThickness = 1.0 / pressureProfileThickness;
00232   )
00233 
00234   Pairlists &pairlists = *(params->pairlists);
00235 #ifdef NAMD_CUDA
00236   int savePairlists = 0;
00237   int usePairlists = 0;
00238 #else
00239   int savePairlists = params->savePairlists;
00240   int usePairlists = params->usePairlists;
00241 #endif
00242   pairlists.reset();
00243   // PAIR(iout << "--------\n" << endi;)
00244 
00245   // local variables
00246   int exclChecksum = 0;
00247   FAST
00248   (
00249   ENERGY( BigReal vdwEnergy = 0; )
00250   SHORT
00251   (
00252   ENERGY( BigReal electEnergy = 0; )
00253   )
00254 
00255   FEP
00256   (
00257   ENERGY( BigReal vdwEnergy_s = 0; )
00258   SHORT
00259   (
00260   ENERGY( BigReal electEnergy_s = 0; )
00261   )
00262   )
00263   
00264   SHORT
00265   (
00266   BigReal virial_xx = 0;
00267   BigReal virial_xy = 0;
00268   BigReal virial_xz = 0;
00269   BigReal virial_yy = 0;
00270   BigReal virial_yz = 0;
00271   BigReal virial_zz = 0;
00272   )
00273   )
00274   FULL
00275   (
00276   ENERGY( BigReal fullElectEnergy = 0; )
00277   FEP
00278   (
00279   ENERGY( BigReal fullElectEnergy_s = 0; )
00280   )
00281   BigReal fullElectVirial_xx = 0;
00282   BigReal fullElectVirial_xy = 0;
00283   BigReal fullElectVirial_xz = 0;
00284   BigReal fullElectVirial_yy = 0;
00285   BigReal fullElectVirial_yz = 0;
00286   BigReal fullElectVirial_zz = 0;
00287   )
00288 
00289   // Bringing stuff into local namespace for speed.
00290 
00291   const BigReal offset_x = params->offset.x;
00292   const BigReal offset_y = params->offset.y;
00293   const BigReal offset_z = params->offset.z;
00294 
00295   register const BigReal plcutoff2 = \
00296                         params->plcutoff * params->plcutoff;
00297   register const BigReal groupplcutoff2 = \
00298                         params->groupplcutoff * params->groupplcutoff;
00299   const BigReal dielectric_1 = ComputeNonbondedUtil:: dielectric_1;
00300   const LJTable* const ljTable = ComputeNonbondedUtil:: ljTable;
00301   LJTable::TableEntry ljNull;  ljNull.A = 0; ljNull.B = 0; TABENERGY(ljNull.tabletype=-1;)
00302   const LJTable::TableEntry* const lj_null_pars = &ljNull;
00303   const Molecule* const mol = ComputeNonbondedUtil:: mol;
00304   SHORT
00305   (
00306   const BigReal* const table_four = ComputeNonbondedUtil:: table_short;
00307   )
00308   FULL
00309   (
00310   SHORT
00311   (
00312   const BigReal* const slow_table = ComputeNonbondedUtil:: slow_table;
00313   )
00314   NOSHORT
00315   (
00316 //#if 1 ALCH(-1)
00317   const BigReal* const table_four = ComputeNonbondedUtil:: table_noshort;
00318 //#else  // have to switch this for ALCH
00319 //  BigReal* table_four = ComputeNonbondedUtil:: table_noshort;
00320 //#endif
00321   )
00322   )
00323   const BigReal scaling = ComputeNonbondedUtil:: scaling;
00324   const BigReal modf_mod = 1.0 - scale14;
00325   FAST
00326   (
00327   const BigReal switchOn2 = ComputeNonbondedUtil:: switchOn2;
00328   const BigReal c1 = ComputeNonbondedUtil:: c1;
00329   const BigReal c3 = ComputeNonbondedUtil:: c3;
00330   )
00331   const BigReal r2_delta = ComputeNonbondedUtil:: r2_delta;
00332   const int r2_delta_exp = ComputeNonbondedUtil:: r2_delta_exp;
00333   // const int r2_delta_expc = 64 * (r2_delta_exp - 127);
00334   const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
00335 
00336   ALCH(
00337     const BigReal switchdist2 = ComputeNonbondedUtil::switchOn2;
00338     const BigReal cutoff2 = ComputeNonbondedUtil::cutoff2;
00339     const BigReal switchfactor = 1./((cutoff2 - switchdist2)*(cutoff2 - switchdist2)*(cutoff2 - switchdist2));
00340     const BigReal alchElecLambdaStart = ComputeNonbondedUtil::alchElecLambdaStart;
00341     const BigReal alchVdwLambdaEnd = ComputeNonbondedUtil::alchVdwLambdaEnd;
00342     const BigReal alchVdwShiftCoeff = ComputeNonbondedUtil::alchVdwShiftCoeff;
00343 
00344     /*lambda values 'up' are for atoms scaled up with lambda (partition 1)*/
00345     BigReal lambdaUp = ComputeNonbondedUtil::alchLambda;
00346     BigReal elecLambdaUp =  (lambdaUp <= alchElecLambdaStart)? 0. : \
00347               (lambdaUp - alchElecLambdaStart) / (1. - alchElecLambdaStart);
00348     BigReal vdwLambdaUp = 
00349         (lambdaUp >= alchVdwLambdaEnd)? 1. : lambdaUp / alchVdwLambdaEnd; 
00350     BigReal vdwShiftUp = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambdaUp);
00351     FEP(BigReal lambda2Up = ComputeNonbondedUtil::alchLambda2;)
00352     FEP(BigReal elecLambda2Up = (lambda2Up <= alchElecLambdaStart)? 0. : \
00353               (lambda2Up - alchElecLambdaStart) / (1. - alchElecLambdaStart);)
00354     FEP(BigReal vdwLambda2Up = 
00355         (lambda2Up >= alchVdwLambdaEnd)? 1. : lambda2Up / alchVdwLambdaEnd;) 
00356     FEP(BigReal vdwShift2Up = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambda2Up);)
00357 
00358         
00359     /*lambda values 'down' are for atoms scaled down with lambda (partition 2)*/
00360     BigReal lambdaDown = 1 - ComputeNonbondedUtil::alchLambda;
00361     BigReal elecLambdaDown =  (lambdaDown <= alchElecLambdaStart)? 0. : \
00362               (lambdaDown - alchElecLambdaStart) / (1. - alchElecLambdaStart);
00363     BigReal vdwLambdaDown = 
00364         (lambdaDown >= alchVdwLambdaEnd)? 1. : lambdaDown / alchVdwLambdaEnd; 
00365     BigReal vdwShiftDown = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambdaDown);
00366     FEP(BigReal lambda2Down = 1 - ComputeNonbondedUtil::alchLambda2;)
00367     FEP(BigReal elecLambda2Down = (lambda2Down <= alchElecLambdaStart)? 0. : \
00368         (lambda2Down - alchElecLambdaStart) / (1. - alchElecLambdaStart); )
00369     FEP(BigReal vdwLambda2Down = 
00370         (lambda2Down >= alchVdwLambdaEnd)? 1. : lambda2Down / alchVdwLambdaEnd; )
00371     FEP(BigReal vdwShift2Down = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambda2Down);)
00372 
00373   
00374   // Thermodynamic Integration Notes: 
00375   // Separation of atom pairs into different pairlist according to lambda
00376   // coupling, for alchemical free energy calculations. Normal pairlists
00377   // (pairlist[nxm]_save) are re-used for non-lambda-coupled pairs; new ones
00378   // (pairlist[nxm][12]_save are created for atoms switched up or down with
00379   // lambda respectively.
00380   // This makes TI coding far easier and more readable, since it's necessary 
00381   // to store dU/dlambda in different variables depending on whether atoms are
00382   // being switched up or down. Further, it allows the prior explicit coding of 
00383   // the separation-shifted vdW potential to stay in place without a big 
00384   // performance hit, since it's no longer necessary to calculate vdW potentials
00385   // explicity for the bulk (non-alchemical) interactions - so that part of the 
00386   // free energy code stays readable and easy to modify. Finally there should
00387   // be some performance gain over the old FEP implementation as we no
00388   // longer have to check the partitions of each atom pair and switch
00389   // parameters accordingly for every single NonbondedBase2.h loop - this is 
00390   // done at the pairlist level
00391   
00392   int pswitchTable[3*3];
00393   // determines which pairlist alchemical pairings get sent to
00394   // 0: non-alchemical pairs, partition 0 <-> partition 0
00395   // 1: atoms scaled up as lambda increases, p0<->p1
00396   // 2: atoms scaled down as lambda increases, p0<->p2
00397   // all p1<->p2 interactions to be dropped (99)
00398   // in general, 'up' refers to 1, 'down' refers to 2
00399   for (int ip=0; ip<3; ++ip) {
00400     for (int jp=0; jp<3; ++jp ) {
00401       pswitchTable[ip+3*jp] = 0;
00402       if ((ip==1 && jp==0) || (ip==0 && jp==1)) pswitchTable[ip+3*jp] = 1;
00403       if ((ip==2 && jp==0) || (ip==0 && jp==2)) pswitchTable[ip+3*jp] = 2;
00404       if (ip + jp == 3) pswitchTable[ip+3*jp] = 99; // no interaction
00405 
00406       if (! ComputeNonbondedUtil::alchDecouple) {
00407         // no decoupling: interactions within a partition are treated like
00408         // normal alchemical pairs
00409         if (ip == 1 && jp == 1) pswitchTable[ip+3*jp] = 1;
00410         if (ip == 2 && jp == 2) pswitchTable[ip+3*jp] = 2;
00411       }
00412       if (ComputeNonbondedUtil::alchDecouple) {
00413         // decoupling: PME calculates extra grids so that while PME 
00414         // interaction with the full system is switched off, a new PME grid
00415         // containing only alchemical atoms is switched on. Full interactions 
00416         // between alchemical atoms are maintained; potentials within one 
00417         // partition need not be scaled here.
00418         if (ip == 1 && jp == 1) pswitchTable[ip+3*jp] = 0;
00419         if (ip == 2 && jp == 2) pswitchTable[ip+3*jp] = 0;
00420       }
00421     }
00422   }
00423 
00424   // dU/dlambda variables for thermodynamic integration
00425   TI(
00426       BigReal vdwEnergy_ti_1 = 0;
00427       BigReal vdwEnergy_ti_2 = 0;
00428       SHORT(BigReal electEnergy_ti_1 = 0;
00429       BigReal electEnergy_ti_2 = 0;)
00430       FULL(BigReal fullElectEnergy_ti_1 = 0; 
00431       BigReal fullElectEnergy_ti_2 = 0;) 
00432    )
00433   )
00434         
00435         
00436   const int i_upper = params->numAtoms[0];
00437   register const int j_upper = params->numAtoms[1];
00438   register int j;
00439   register int i;
00440   const CompAtom *p_0 = params->p[0];
00441   const CompAtom *p_1 = params->p[1];
00442   const CompAtomExt *pExt_0 = params->pExt[0];
00443   const CompAtomExt *pExt_1 = params->pExt[1];
00444 
00445   char * excl_flags_buff = 0;
00446   const int32 * full_excl = 0;
00447   const int32 * mod_excl = 0;
00448 
00449   plint *pairlistn_save;  int npairn;
00450   plint *pairlistx_save;  int npairx;
00451   plint *pairlistm_save;  int npairm;
00452 
00453   ALCH(
00454   plint *pairlistnA1_save;  int npairnA1;
00455   plint *pairlistxA1_save;  int npairxA1;
00456   plint *pairlistmA1_save;  int npairmA1;
00457   plint *pairlistnA2_save;  int npairnA2;
00458   plint *pairlistxA2_save;  int npairxA2;
00459   plint *pairlistmA2_save;  int npairmA2;
00460   )
00461 
00462   NBWORKARRAYSINIT(params->workArrays);
00463 
00464   int arraysize = j_upper+5;
00465 
00466   NBWORKARRAY(plint,pairlisti,arraysize)
00467   NBWORKARRAY(BigReal,r2list,arraysize)
00468 
00469   union { double f; int32 i[2]; } byte_order_test;
00470   byte_order_test.f = 1.0;  // should occupy high-order bits only
00471   int32 *r2iilist = (int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
00472 
00473   if ( ! ( savePairlists || ! usePairlists ) ) arraysize = 0;
00474 
00475 
00476   // DMK - Atom Sort
00477   // 
00478   // Basic Idea: Determine the line between the center of masses of
00479   //   the two patches.  Project and then sort the lists of atoms
00480   //   along this line.  Then, as the pairlists are being generated
00481   //   for the atoms in the first atom list, use the sorted
00482   //   list to only add atoms from the second list that are between
00483   //   +/- ~cutoff from the atoms position on the line.
00484   #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00485 
00486   // NOTE: For the first atom list, just the sort values themselves need to be
00487   // calculated (a BigReal vs. SortEntry data structure).  However, a second
00488   // SortEntry array is needed to perform the merge sort on the second list of
00489   // atoms.  Use the atomSort_[0/1]_sortValues__ arrays to sort the second
00490   // list of atoms and then use the left over array to make a version of the
00491   // list that only includes non-fixed groups (fixg).
00492 
00493   NBWORKARRAY(SortEntry, atomSort_0_sortValues__, arraysize);
00494   NBWORKARRAY(SortEntry, atomSort_1_sortValues__, arraysize);
00495   NBWORKARRAY(BigReal, p_0_sortValues, arraysize);
00496 
00497   register SortEntry* p_1_sortValues = atomSort_0_sortValues__;
00498   register SortEntry* p_1_sortValues_fixg = atomSort_1_sortValues__;
00499 
00500   int p_0_sortValues_len = 0;
00501   int p_1_sortValues_len = 0;
00502   int p_1_sortValues_fixg_len = 0;
00503 
00504   // Calculate the distance between to projected points on the line that
00505   //   represents the cutoff distance.
00506   BigReal atomSort_windowRadius = sqrt(groupplcutoff2);
00507 
00508   if (savePairlists || !usePairlists) {
00509 
00510     register const BigReal projLineVec_x = params->projLineVec.x;
00511     register const BigReal projLineVec_y = params->projLineVec.y;
00512     register const BigReal projLineVec_z = params->projLineVec.z;
00513 
00514     // Calculate the sort values for the atoms in patch 1
00515     {
00516       register int j = 0;
00517       register int nbgs = p_1->nonbondedGroupSize;
00518       register BigReal p_x = p_1->position.x;
00519       register BigReal p_y = p_1->position.y;
00520       register BigReal p_z = p_1->position.z;
00521       register int index = j;
00522 
00523       while (j < j_upper) {
00524 
00525         j += nbgs;
00526 
00527         // Set p_j_next to point to the atom for the next iteration and begin
00528         //   loading the 'nbgs' value for that atom.
00529         register const CompAtom* p_j_next = p_1 + j;
00530         nbgs = p_j_next->nonbondedGroupSize;
00531 
00532         // Calculate the distance along the projection vector
00533         // NOTE: If the vector from the origin to the point is 'A' and the vector
00534         //   between the patches is 'B' then to project 'A' onto 'B' we take the dot
00535         //   product of the two vectors 'A dot B' divided by the length of 'B'.
00536         // So... projection of A onto B = (A dot B) / length(B), however, note
00537         //   that length(B) == 1, therefore the sort value is simply (A dot B)
00538         register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00539 
00540         // Start loading the next iteration's atom's position
00541         p_x = p_j_next->position.x;
00542         p_y = p_j_next->position.y;
00543         p_z = p_j_next->position.z;
00544 
00545         // Store the caclulated sort value into the array of sort values
00546         register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
00547         p_1_sortValStorePtr->index = index;
00548         p_1_sortValStorePtr->sortValue = sortVal;
00549         p_1_sortValues_len++;
00550 
00551         // Update index for the next iteration
00552         index = j;       
00553 
00554       } // end while (j < j_upper)
00555     }
00556 
00557     // NOTE: This list and another version of it with only non-fixed
00558     //   atoms will be used in place of grouplist and fixglist.
00559     #if 0   // Selection Sort
00560       sortEntries_selectionSort(p_1_sortValues, p_1_sortValues_len);
00561     #elif 0   // Bubble Sort
00562       sortEntries_bubbleSort(p_1_sortValues, p_1_sortValues_len);
00563     #else   // Merge Sort
00564       #if NAMD_ComputeNonbonded_SortAtoms_LessBranches == 0
00565         sortEntries_mergeSort_v1(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
00566       #else
00567         sortEntries_mergeSort_v2(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
00568       #endif
00569     #endif
00570 
00571     // Calculate the sort values for the atoms in patch 0
00572     {
00573       register int i = 0;
00574       register int nbgs = p_0->nonbondedGroupSize;
00575       register BigReal p_x = p_0->position.x + offset_x;
00576       register BigReal p_y = p_0->position.y + offset_y;
00577       register BigReal p_z = p_0->position.z + offset_z;
00578       register int index = i;
00579 
00580       while (i < i_upper) {
00581 
00582         i += nbgs;
00583 
00584         // Set p_i_next to point to the atom for the next iteration and begin
00585         //   loading the 'nbgs' value for that atom.
00586         register const CompAtom* p_i_next = p_0 + i;
00587         nbgs = p_i_next->nonbondedGroupSize;
00588 
00589         // Calculate the distance along the projection vector
00590         register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00591 
00592         // Start loading the next iteration's atom's position
00593         p_x = p_i_next->position.x + offset_x;
00594         p_y = p_i_next->position.y + offset_y;
00595         p_z = p_i_next->position.z + offset_z;
00596 
00597         // Store the calculated sort value into the array of sort values
00598         register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
00599         *p_0_sortValStorePtr = sortVal;
00600 
00601         // Update index for the next iteration
00602         index = i;
00603       }
00604 
00605       p_0_sortValues_len = i_upper;
00606     }
00607 
00608   }  // end if (savePairlists || !usePairlists)
00609 
00610   #endif  // NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00611 
00612   // Atom Sort : The grouplist and fixglist arrays are not needed when the
00613   //   the atom sorting code is in use.
00614   #if ! (NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) )
00615     NBWORKARRAY(plint,grouplist,arraysize);
00616     NBWORKARRAY(plint,fixglist,arraysize);
00617   #endif
00618 
00619   NBWORKARRAY(plint,goodglist,arraysize);
00620   NBWORKARRAY(plint,pairlistx,arraysize);
00621   NBWORKARRAY(plint,pairlistm,arraysize);
00622   NBWORKARRAY(plint,pairlist,arraysize);
00623   NBWORKARRAY(plint,pairlist2,arraysize);
00624   ALCH(
00625   NBWORKARRAY(plint,pairlistnAlch,arraysize);
00626   NBWORKARRAY(plint,pairlistnA0,arraysize);
00627   NBWORKARRAY(plint,pairlistnA1,arraysize);
00628   NBWORKARRAY(plint,pairlistxA1,arraysize);
00629   NBWORKARRAY(plint,pairlistmA1,arraysize);
00630   NBWORKARRAY(plint,pairlistnA2,arraysize);
00631   NBWORKARRAY(plint,pairlistxA2,arraysize);
00632   NBWORKARRAY(plint,pairlistmA2,arraysize);
00633   )
00634 
00635   NBWORKARRAY(short,vdwtype_array,j_upper+5);
00636   for (j = 0; j < j_upper; ++j)
00637     vdwtype_array [j] = p_1[j].vdwType;
00638 
00639   int fixg_upper = 0;
00640   int g_upper = 0;
00641 
00642   if ( savePairlists || ! usePairlists ) {
00643 
00644     #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00645 
00646       // Create a sorted list of non-fixed groups
00647       register int fixg = 0;
00648       for (int tmpI = 0; tmpI < p_1_sortValues_len; tmpI++) {
00649         register SortEntry* p_1_sortEntry = p_1_sortValues + tmpI;
00650         register int p_1_index = p_1_sortEntry->index;
00651         if (!pExt_1[p_1_index].groupFixed) {
00652           register SortEntry* p_1_sortEntry_fixg = p_1_sortValues_fixg + p_1_sortValues_fixg_len;
00653           p_1_sortEntry_fixg->index = p_1_sortEntry->index;
00654           p_1_sortEntry_fixg->sortValue = p_1_sortEntry->sortValue;
00655           p_1_sortValues_fixg_len++;
00656         }
00657       }
00658 
00659     #else
00660 
00661       register int g = 0;
00662       for ( j = 0; j < j_upper; ++j ) {
00663         if ( p_1[j].nonbondedGroupSize ) {
00664           grouplist[g++] = j;
00665         }
00666       }
00667       g_upper = g;
00668       if ( g_upper ) grouplist[g_upper] = grouplist[g_upper-1];
00669       int fixg = 0;
00670 
00671       if ( fixedAtomsOn ) {
00672         for ( g = 0; g < g_upper; ++g ) {
00673           j = grouplist[g];
00674           if ( ! pExt_1[j].groupFixed ) {
00675             fixglist[fixg++] = j;
00676           }
00677         }
00678       }
00679 
00680       fixg_upper = fixg;
00681       if ( fixg_upper ) fixglist[fixg_upper] = fixglist[fixg_upper-1];
00682 
00683     #endif // NAMD_ComputeNonbonded_SortAtoms != 0
00684 
00685     *(pairlists.newlist(1)) = i_upper;
00686     pairlists.newsize(1);
00687 
00688   } else { // if ( savePairlists || ! usePairlists )
00689 
00690     plint *i_upper_check;
00691     int i_upper_check_count;
00692     pairlists.nextlist(&i_upper_check,&i_upper_check_count);
00693     if ( i_upper_check[0] != i_upper )
00694       NAMD_bug("pairlist i_upper mismatch!");
00695 
00696   } // if ( savePairlists || ! usePairlists )
00697 
00698   SELF(
00699   int j_hgroup = 0;
00700   int g_lower = 0;
00701   int fixg_lower = 0;
00702   )
00703   int pairlistindex=0;
00704   int pairlistoffset=0;
00705 
00706   
00707 
00708 #if ( SHORT( FAST( 1+ ) ) 0 )
00709 #if ( PAIR( 1+ ) 0 )
00710     Force *f_0 = params->ff[0];
00711     Force *f_1 = params->ff[1];
00712 #else
00713 #define f_1 f_0
00714     NBWORKARRAY(Force,f_0,i_upper)
00715     memset( (void*) f_0, 0, i_upper * sizeof(Force) );
00716 #endif
00717 #endif
00718 #if ( FULL( 1+ ) 0 )
00719 #if ( PAIR( 1+ ) 0 )
00720     Force *fullf_0 = params->fullf[0];
00721     Force *fullf_1 = params->fullf[1];
00722 #else
00723 #define fullf_1 fullf_0
00724     NBWORKARRAY(Force,fullf_0,i_upper);
00725     memset( (void*) fullf_0, 0, i_upper * sizeof(Force) );
00726 #endif
00727 #endif
00728     
00729 
00730   int numParts = params->numParts;
00731   int myPart = params->minPart;
00732   int groupCount = 0;
00733 
00734   for ( i = 0; i < (i_upper SELF(- 1)); ++i )
00735   {
00736     const CompAtom &p_i = p_0[i];
00737     const CompAtomExt &pExt_i = pExt_0[i];
00738     if ( p_i.hydrogenGroupSize ) {
00739       //save current group count
00740       int curgrpcount = groupCount;      
00741       //increment group count
00742       groupCount ++;
00743 
00744       if (groupCount >= numParts)
00745         groupCount = 0;
00746       
00747       if ( curgrpcount != myPart ) {
00748         i += p_i.hydrogenGroupSize - 1;
00749         
00750         //Power PC alignment constraint
00751 #ifdef ARCH_POWERPC
00752         __dcbt((void *) &(p_0[i+1]));
00753 #endif
00754         continue;
00755       }
00756     }
00757 
00758     register const BigReal p_i_x = p_i.position.x + offset_x;
00759     register const BigReal p_i_y = p_i.position.y + offset_y;
00760     register const BigReal p_i_z = p_i.position.z + offset_z;
00761 
00762     ALCH(const int p_i_partition = p_i.partition;)
00763 
00764     PPROF(
00765         const int p_i_partition = p_i.partition;
00766         int n1 = (int)floor((p_i.position.z-pressureProfileMin)*invThickness);
00767         pp_clamp(n1, pressureProfileSlabs);
00768         )
00769 
00770   SELF ( if ( p_i.hydrogenGroupSize ) j_hgroup = i + p_i.hydrogenGroupSize; )
00771 
00772   if ( savePairlists || ! usePairlists ) {
00773 
00774     if ( ! savePairlists ) pairlists.reset();  // limit space usage
00775 
00776     #ifdef MEM_OPT_VERSION
00777     const ExclusionCheck *exclcheck = mol->get_excl_check_for_idx(pExt_i.exclId);        
00778     const int excl_min = pExt_i.id + exclcheck->min;
00779     const int excl_max = pExt_i.id + exclcheck->max;
00780     #else
00781     const ExclusionCheck *exclcheck = mol->get_excl_check_for_atom(pExt_i.id);
00782     const int excl_min = exclcheck->min;
00783     const int excl_max = exclcheck->max;
00784     #endif
00785     const char * excl_flags_var;
00786     if ( exclcheck->flags ) excl_flags_var = exclcheck->flags - excl_min;
00787     else {  // need to build list on the fly
00788 
00789     //TODO: Should change later!!!!!!!!!! --Chao Mei
00790     //Now just for the sake of passing compilation
00791     #ifndef MEM_OPT_VERSION 
00792       if ( excl_flags_buff ) {
00793         int nl,l;
00794         nl = full_excl[0] + 1;
00795         for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = 0;
00796         nl = mod_excl[0] + 1;
00797         for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = 0;
00798       } else {
00799         excl_flags_buff = new char[mol->numAtoms];
00800         memset( (void*) excl_flags_buff, 0, mol->numAtoms);
00801       }
00802       int nl,l;
00803       full_excl = mol->get_full_exclusions_for_atom(pExt_i.id);
00804       nl = full_excl[0] + 1;
00805       for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = EXCHCK_FULL;
00806       mod_excl = mol->get_mod_exclusions_for_atom(pExt_i.id);
00807       nl = mod_excl[0] + 1;
00808       for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = EXCHCK_MOD;
00809       excl_flags_var = excl_flags_buff;
00810     #endif
00811 
00812     }
00813     const char * const excl_flags = excl_flags_var;
00814 
00815   if ( p_i.nonbondedGroupSize ) {
00816 
00817     pairlistindex = 0;  // initialize with 0 elements
00818     pairlistoffset=0;
00819     const int groupfixed = ( fixedAtomsOn && pExt_i.groupFixed );
00820 
00821     // If patch divisions are not made by hydrogen groups, then
00822     // hydrogenGroupSize is set to 1 for all atoms.  Thus we can
00823     // carry on as if we did have groups - only less efficiently.
00824     // An optimization in this case is to not rebuild the temporary
00825     // pairlist but to include every atom in it.  This should be a
00826     // a very minor expense.
00827 
00828     SELF
00829     (
00830       if ( p_i.hydrogenGroupSize ) {
00831         // exclude child hydrogens of i
00832         // j_hgroup = i + p_i.hydrogenGroupSize;  (moved above)
00833         while ( g_lower < g_upper &&
00834                 grouplist[g_lower] < j_hgroup ) ++g_lower;
00835         while ( fixg_lower < fixg_upper &&
00836                 fixglist[fixg_lower] < j_hgroup ) ++fixg_lower;
00837       }
00838       // add all child or sister hydrogens of i
00839       for ( j = i + 1; j < j_hgroup; ++j ) {
00840         pairlist[pairlistindex++] = j;
00841       }
00842     )
00843 
00844     // add remaining atoms to pairlist via hydrogen groups
00845     register plint *pli = pairlist + pairlistindex;
00846 
00847     {
00848       // Atom Sort : Modify the values of g and gu based on the added information
00849       //   of the linear projections (sort values) information.
00850       #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00851 
00852         register int g = 0;
00853         register BigReal p_i_sortValue = p_0_sortValues[i];
00854         const BigReal p_i_sortValue_plus_windowRadius = p_i_sortValue + atomSort_windowRadius;
00855         register SortEntry* sortValues = ( groupfixed ? p_1_sortValues_fixg : p_1_sortValues );
00856 
00857         // Find the actual gu (upper bound in sorted list for this outer-loop atom) based on the sort values
00858         register int lower = 0;
00859         register int upper = (groupfixed ? p_1_sortValues_fixg_len : p_1_sortValues_len);
00860         while ((upper - lower) > 1) {
00861           register int j = ((lower + upper) >> 1);
00862           register BigReal jSortVal = sortValues[j].sortValue;
00863           if (jSortVal < p_i_sortValue_plus_windowRadius) {
00864             lower = j;
00865           } else {
00866             upper = j;
00867           }
00868         }
00869         const int gu = (sortValues[lower].sortValue >= p_i_sortValue_plus_windowRadius) ? lower : upper;
00870 
00871       #else
00872 
00873         register plint *gli = goodglist;
00874         const plint *glist = ( groupfixed ? fixglist : grouplist );
00875         SELF( const int gl = ( groupfixed ? fixg_lower : g_lower ); )
00876         const int gu = ( groupfixed ? fixg_upper : g_upper );
00877         register int g = PAIR(0) SELF(gl);
00878 
00879       #endif
00880 
00881 
00882       if ( g < gu ) {
00883         int hu = 0;
00884 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
00885         if ( gu - g  >  6 ) { 
00886 
00887           #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00888             register SortEntry* sortEntry0 = sortValues + g;
00889             register SortEntry* sortEntry1 = sortValues + g + 1;
00890             register int jprev0 = sortEntry0->index;
00891             register int jprev1 = sortEntry1->index;
00892           #else
00893             register int jprev0 = glist[g    ];
00894             register int jprev1 = glist[g + 1];
00895           #endif
00896           
00897           register int j0;
00898           register int j1;
00899 
00900           __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
00901           __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
00902           __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
00903 
00904           // these don't change here, so we could move them into outer scope
00905           const __m128d P_I_X = _mm_set1_pd(p_i_x);
00906           const __m128d P_I_Y = _mm_set1_pd(p_i_y);
00907           const __m128d P_I_Z = _mm_set1_pd(p_i_z);
00908  
00909           g += 2;
00910           for ( ; g < gu - 2; g +=2 ) {
00911             // compute 1d distance, 2-way parallel       
00912             j0     =  jprev0;
00913             j1     =  jprev1;
00914 
00915             __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
00916             __m128d R2_01 = _mm_mul_pd(T_01, T_01);
00917             T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
00918             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
00919             T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
00920             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
00921             
00922             #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00923               sortEntry0 = sortValues + g;
00924               sortEntry1 = sortValues + g + 1;
00925               jprev0 = sortEntry0->index;
00926               jprev1 = sortEntry1->index;
00927             #else
00928               jprev0     =  glist[g  ];
00929               jprev1     =  glist[g+1];
00930             #endif
00931            
00932             PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
00933             PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
00934             PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
00935 
00936             __align(16) double r2_01[2];
00937             _mm_store_pd(r2_01, R2_01); // 16-byte-aligned store
00938 
00939             // XXX these could possibly benefit from SSE-based conditionals
00940             bool test0 = ( r2_01[0] < groupplcutoff2 );
00941             bool test1 = ( r2_01[1] < groupplcutoff2 ); 
00942             
00943             //removing ifs benefits on many architectures
00944             //as the extra stores will only warm the cache up
00945             goodglist [ hu         ] = j0;
00946             goodglist [ hu + test0 ] = j1;
00947             
00948             hu += test0 + test1;
00949           }
00950           g-=2;
00951         }
00952 #else
00953         if ( gu - g  >  6 ) { 
00954 
00955           #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00956             register SortEntry* sortEntry0 = sortValues + g;
00957             register SortEntry* sortEntry1 = sortValues + g + 1;
00958             register int jprev0 = sortEntry0->index;
00959             register int jprev1 = sortEntry1->index;
00960           #else
00961             register int jprev0 = glist[g    ];
00962             register int jprev1 = glist[g + 1];
00963           #endif
00964           
00965           register  int j0; 
00966           register  int j1; 
00967           
00968           register  BigReal pj_x_0, pj_x_1; 
00969           register  BigReal pj_y_0, pj_y_1; 
00970           register  BigReal pj_z_0, pj_z_1; 
00971           register  BigReal t_0, t_1, r2_0, r2_1;
00972           
00973           pj_x_0 = p_1[jprev0].position.x;
00974           pj_x_1 = p_1[jprev1].position.x;  
00975           
00976           pj_y_0 = p_1[jprev0].position.y; 
00977           pj_y_1 = p_1[jprev1].position.y;  
00978           
00979           pj_z_0 = p_1[jprev0].position.z; 
00980           pj_z_1 = p_1[jprev1].position.z;
00981           
00982           g += 2;
00983           for ( ; g < gu - 2; g +=2 ) {
00984             // compute 1d distance, 2-way parallel       
00985             j0     =  jprev0;
00986             j1     =  jprev1;
00987             
00988             t_0    =  p_i_x - pj_x_0;
00989             t_1    =  p_i_x - pj_x_1;
00990             r2_0   =  t_0 * t_0;
00991             r2_1   =  t_1 * t_1;
00992             
00993             t_0    =  p_i_y - pj_y_0;
00994             t_1    =  p_i_y - pj_y_1;
00995             r2_0  +=  t_0 * t_0;
00996             r2_1  +=  t_1 * t_1;
00997             
00998             t_0    =  p_i_z - pj_z_0;
00999             t_1    =  p_i_z - pj_z_1;
01000             r2_0  +=  t_0 * t_0;
01001             r2_1  +=  t_1 * t_1;
01002             
01003             #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01004               sortEntry0 = sortValues + g;
01005               sortEntry1 = sortValues + g + 1;
01006               jprev0 = sortEntry0->index;
01007               jprev1 = sortEntry1->index;
01008             #else
01009               jprev0     =  glist[g  ];
01010               jprev1     =  glist[g+1];
01011             #endif
01012             
01013             pj_x_0     =  p_1[jprev0].position.x;
01014             pj_x_1     =  p_1[jprev1].position.x;
01015             pj_y_0     =  p_1[jprev0].position.y; 
01016             pj_y_1     =  p_1[jprev1].position.y;
01017             pj_z_0     =  p_1[jprev0].position.z; 
01018             pj_z_1     =  p_1[jprev1].position.z;
01019             
01020             bool test0 = ( r2_0 < groupplcutoff2 );
01021             bool test1 = ( r2_1 < groupplcutoff2 ); 
01022             
01023             //removing ifs benefits on many architectures
01024             //as the extra stores will only warm the cache up
01025             goodglist [ hu         ] = j0;
01026             goodglist [ hu + test0 ] = j1;
01027             
01028             hu += test0 + test1;
01029           }
01030           g-=2;
01031         }
01032 #endif
01033         
01034         for (; g < gu; g++) {
01035 
01036           #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01037             register SortEntry* sortEntry = sortValues + g;
01038             register int j = sortEntry->index;
01039           #else
01040             int j = glist[g];
01041           #endif
01042 
01043           BigReal p_j_x = p_1[j].position.x;
01044           BigReal p_j_y = p_1[j].position.y;
01045           BigReal p_j_z = p_1[j].position.z;
01046           
01047           BigReal r2 = p_i_x - p_j_x;
01048           r2 *= r2;
01049           BigReal t2 = p_i_y - p_j_y;
01050           r2 += t2 * t2;
01051           t2 = p_i_z - p_j_z;
01052           r2 += t2 * t2;
01053 
01054           if ( r2 <= groupplcutoff2 ) 
01055             goodglist[hu ++] = j; 
01056         }
01057 
01058         for ( int h=0; h<hu; ++h ) {
01059           int j = goodglist[h];
01060           int nbgs = p_1[j].nonbondedGroupSize;
01061           pli[0] = j;   // copy over the next four in any case
01062           pli[1] = j+1;
01063           pli[2] = j+2;
01064           pli[3] = j+3; // assume nbgs <= 4
01065           pli += nbgs;
01066         }
01067       }
01068     }
01069 
01070     pairlistindex = pli - pairlist;
01071     // make sure padded element on pairlist points to real data
01072     if ( pairlistindex ) {
01073        pairlist[pairlistindex] = pairlist[pairlistindex-1];
01074     } /* PAIR( else {  // skip empty loops if no pairs were found
01075        int nbgs = p_i.nonbondedGroupSize;
01076        i += nbgs - 1;
01077        continue;
01078     } ) */
01079   } // if i is hydrogen group parent
01080   SELF
01081     (
01082     // self-comparisions require list to be incremented
01083     // pair-comparisions use entire list (pairlistoffset is 0)
01084     else pairlistoffset++;
01085     )
01086 
01087     const int atomfixed = ( fixedAtomsOn && pExt_i.atomFixed );
01088 
01089     register plint *pli = pairlist2;
01090 #if 1 ALCH(-1)
01091     plint *pairlistn = pairlists.newlist(j_upper + 5 + 1 + 5) SELF(+ 1);
01092 #else
01093     plint *pairlistn = pairlistnAlch;
01094 #endif
01095     SELF( plint &pairlistn_skip = *(pairlistn-1); )
01096     register plint *plin = pairlistn;
01097 
01098 
01099     INT(
01100     if ( pairInteractionOn ) {
01101       const int ifep_type = p_i.partition;
01102       if (pairInteractionSelf) {
01103         if (ifep_type != 1) continue;
01104         for (int k=pairlistoffset; k<pairlistindex; k++) {
01105           j = pairlist[k];
01106           const int jfep_type = p_1[j].partition;
01107           // for pair-self, both atoms must be in group 1.
01108           if (jfep_type == 1) {
01109             *(pli++) = j;
01110           }
01111         }
01112       } else {
01113         if (ifep_type != 1 && ifep_type != 2) continue;
01114         for (int k=pairlistoffset; k<pairlistindex; k++) {
01115           j = pairlist[k];
01116           const int jfep_type = p_1[j].partition;
01117           // for pair, must have one from each group.
01118           if (ifep_type + jfep_type == 3) {
01119             *(pli++) = j;
01120           }
01121         }
01122       }
01123       int npair2_int = pli - pairlist2;
01124       pli = pairlist2;
01125       for (int k=0; k<npair2_int; k++) {
01126         j = pairlist2[k];
01127         BigReal p_j_x = p_1[j].position.x;
01128         BigReal r2 = p_i_x - p_j_x;
01129         r2 *= r2;
01130         BigReal p_j_y = p_1[j].position.y;
01131         BigReal t2 = p_i_y - p_j_y;
01132         r2 += t2 * t2;
01133         BigReal p_j_z = p_1[j].position.z;
01134         t2 = p_i_z - p_j_z;
01135         r2 += t2 * t2;
01136         if ( ( ! (atomfixed && pExt_1[j].atomFixed) ) && (r2 <= plcutoff2) ) {
01137           int atom2 = pExt_1[j].id;
01138           if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
01139           else *(plin++) = j;
01140         }
01141       }
01142     } else
01143     )
01144     if ( atomfixed ) {
01145       for (int k=pairlistoffset; k<pairlistindex; k++) {
01146         j = pairlist[k];
01147         BigReal p_j_x = p_1[j].position.x;
01148         BigReal r2 = p_i_x - p_j_x;
01149         r2 *= r2;
01150         BigReal p_j_y = p_1[j].position.y;
01151         BigReal t2 = p_i_y - p_j_y;
01152         r2 += t2 * t2;
01153         BigReal p_j_z = p_1[j].position.z;
01154         t2 = p_i_z - p_j_z;
01155         r2 += t2 * t2;
01156         if ( (! pExt_1[j].atomFixed) && (r2 <= plcutoff2) ) {
01157           int atom2 = pExt_1[j].id;
01158           if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
01159           else *(plin++) = j;
01160         }
01161       }
01162     } else {
01163       int k = pairlistoffset;
01164       int ku = pairlistindex;
01165       if ( k < ku ) {
01166 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
01167         if ( ku - k  >  6 ) {      
01168           register  int jprev0 = pairlist [k    ];
01169           register  int jprev1 = pairlist [k + 1];
01170           
01171           register  int j0; 
01172           register  int j1; 
01173 
01174           __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01175           __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01176           __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01177 
01178           // these don't change here, so we could move them into outer scope
01179           const __m128d P_I_X = _mm_set1_pd(p_i_x);
01180           const __m128d P_I_Y = _mm_set1_pd(p_i_y);
01181           const __m128d P_I_Z = _mm_set1_pd(p_i_z);
01182           
01183           int atom2_0 = pExt_1[jprev0].id;
01184           int atom2_1 = pExt_1[jprev1].id;
01185           
01186           k += 2;
01187           for ( ; k < ku - 2; k +=2 ) {
01188             // compute 1d distance, 2-way parallel       
01189             j0     =  jprev0;
01190             j1     =  jprev1;
01191             
01192             __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
01193             __m128d R2_01 = _mm_mul_pd(T_01, T_01);
01194             T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
01195             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01196             T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
01197             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01198             
01199             jprev0     =  pairlist[k];
01200             jprev1     =  pairlist[k+1];
01201             
01202             PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01203             PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01204             PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01205 
01206             __align(16) double r2_01[2];
01207             _mm_store_pd(r2_01, R2_01); // 16-byte-aligned store
01208             
01209             if (r2_01[0] <= plcutoff2) {
01210               if ( atom2_0 >= excl_min && atom2_0 <= excl_max ) 
01211                 *(pli++) = j0;
01212               else 
01213                 *(plin++) = j0;
01214             }
01215             atom2_0 = pExt_1[jprev0].id;
01216             
01217             if (r2_01[1] <= plcutoff2) {
01218               if ( atom2_1 >= excl_min && atom2_1 <= excl_max ) 
01219                 *(pli++) = j1;
01220               else 
01221                 *(plin++) = j1;
01222             }
01223             atom2_1 = pExt_1[jprev1].id;            
01224           }
01225           k-=2;
01226         }       
01227 #else
01228         if ( ku - k  >  6 ) {      
01229           register  int jprev0 = pairlist [k];
01230           register  int jprev1 = pairlist [k + 1];
01231           
01232           register  int j0; 
01233           register  int j1; 
01234           
01235           register  BigReal pj_x_0, pj_x_1; 
01236           register  BigReal pj_y_0, pj_y_1; 
01237           register  BigReal pj_z_0, pj_z_1; 
01238           register  BigReal t_0, t_1, r2_0, r2_1;
01239           
01240           pj_x_0 = p_1[jprev0].position.x;
01241           pj_x_1 = p_1[jprev1].position.x;  
01242           
01243           pj_y_0 = p_1[jprev0].position.y; 
01244           pj_y_1 = p_1[jprev1].position.y;  
01245           
01246           pj_z_0 = p_1[jprev0].position.z; 
01247           pj_z_1 = p_1[jprev1].position.z;
01248           
01249           int atom2_0 = pExt_1[jprev0].id;
01250           int atom2_1 = pExt_1[jprev1].id;
01251           
01252           k += 2;
01253           for ( ; k < ku - 2; k +=2 ) {
01254             // compute 1d distance, 2-way parallel       
01255             j0     =  jprev0;
01256             j1     =  jprev1;
01257             
01258             t_0    =  p_i_x - pj_x_0;
01259             t_1    =  p_i_x - pj_x_1;
01260             r2_0   =  t_0 * t_0;
01261             r2_1   =  t_1 * t_1;
01262             
01263             t_0    =  p_i_y - pj_y_0;
01264             t_1    =  p_i_y - pj_y_1;
01265             r2_0  +=  t_0 * t_0;
01266             r2_1  +=  t_1 * t_1;
01267             
01268             t_0    =  p_i_z - pj_z_0;
01269             t_1    =  p_i_z - pj_z_1;
01270             r2_0  +=  t_0 * t_0;
01271             r2_1  +=  t_1 * t_1;
01272             
01273             jprev0     =  pairlist[k];
01274             jprev1     =  pairlist[k+1];
01275             
01276             pj_x_0     =  p_1[jprev0].position.x;
01277             pj_x_1     =  p_1[jprev1].position.x;
01278             pj_y_0     =  p_1[jprev0].position.y; 
01279             pj_y_1     =  p_1[jprev1].position.y;
01280             pj_z_0     =  p_1[jprev0].position.z; 
01281             pj_z_1     =  p_1[jprev1].position.z;
01282             
01283             if (r2_0 <= plcutoff2) {
01284               if ( atom2_0 >= excl_min && atom2_0 <= excl_max ) 
01285                 *(pli++) = j0;
01286               else 
01287                 *(plin++) = j0;
01288             }
01289             atom2_0 = pExt_1[jprev0].id;
01290             
01291             if (r2_1 <= plcutoff2) {
01292               if ( atom2_1 >= excl_min && atom2_1 <= excl_max ) 
01293                 *(pli++) = j1;
01294               else 
01295                 *(plin++) = j1;
01296              }
01297             atom2_1 = pExt_1[jprev1].id;            
01298           }
01299           k-=2;
01300         }       
01301 #endif
01302 
01303         for (; k < ku; k++) {
01304           int j = pairlist[k];
01305           int atom2 = pExt_1[j].id;
01306           
01307           BigReal p_j_x = p_1[j].position.x;
01308           BigReal p_j_y = p_1[j].position.y;
01309           BigReal p_j_z = p_1[j].position.z;
01310           
01311           BigReal r2 = p_i_x - p_j_x;
01312           r2 *= r2;
01313           BigReal t2 = p_i_y - p_j_y;
01314           r2 += t2 * t2;
01315           t2 = p_i_z - p_j_z;
01316           r2 += t2 * t2;
01317           
01318           if (r2 <= plcutoff2) {
01319             if ( atom2 >= excl_min && atom2 <= excl_max ) 
01320               *(pli++) = j;
01321             else 
01322               *(plin++) = j;
01323           }
01324         }
01325       }
01326     }
01327     int npair2 = pli - pairlist2;
01328     if ( npair2 ) pairlist2[npair2] = pairlist2[npair2-1];
01329 
01330     plint *plix = pairlistx;
01331     plint *plim = pairlistm;
01332     plint *pln = pairlistn;
01333     ALCH(
01334     plint *plinA1 = pairlistnA1;
01335     plint *plixA1 = pairlistxA1;
01336     plint *plimA1 = pairlistmA1;
01337     plint *plinA2 = pairlistnA2;
01338     plint *plixA2 = pairlistxA2;
01339     plint *plimA2 = pairlistmA2;
01340     )
01341     int k=0;
01342 #if 1 ALCH(-1)
01343     SELF(
01344     for (; pln < plin && *pln < j_hgroup; ++pln) {
01345       *(plix++) = *pln;  // --exclChecksum;
01346     }
01347     pairlistn_skip = pln - pairlistn;
01348     for (; k < npair2 && pairlist2[k] < j_hgroup; ++k) {
01349       *(plix++) = pairlist2[k];  // --exclChecksum;
01350     }
01351     )
01352 #endif
01353 ALCH(
01354     SELF(
01355     for (; pln < plin && *pln < j_hgroup; ++pln) {
01356       switch (pswitchTable[4*p_i_partition]) { //p_i_partition + 3*p_1[i].partition
01357       case 0: *(plix++) = *pln;  break;
01358       case 1: *(plixA1++) = *pln; break;
01359       case 2: *(plixA2++) = *pln; break;
01360       default: NAMD_die("Alchemical pairlist error"); break;
01361       }
01362     }
01363     for (; k < npair2 && pairlist2[k] < j_hgroup; ++k) {
01364       switch (pswitchTable[4*p_i_partition]) {
01365       case 0: *(plix++) = pairlist2[k];  break;
01366       case 1: *(plixA1++) = pairlist2[k]; break;
01367       case 2: *(plixA2++) = pairlist2[k]; break;
01368       default: NAMD_die("Alchemical pairlist error"); break;
01369       }
01370     }
01371     )
01372 )
01373     for (; k < npair2; ++k ) {
01374       int j = pairlist2[k];
01375       int atom2 = pExt_1[j].id;
01376       int excl_flag = excl_flags[atom2];
01377       ALCH(int pswitch = pswitchTable[p_i_partition + 3*(p_1[j].partition)];)
01378       switch ( excl_flag ALCH( + 3 * pswitch)) {
01379       case 0:  *(plin++) = j;  break;
01380       case 1:  *(plix++) = j;  break;
01381       case 2:  *(plim++) = j;  break;
01382       ALCH(
01383       case 3:  *(plinA1++) = j; break;
01384       case 6:  *(plinA2++) = j; break;
01385       case 5:  *(plimA1++) = j; break;
01386       case 8:  *(plimA2++) = j; break;
01387       case 4:  *(plixA1++) = j; break;
01388       case 7:  *(plixA2++) = j; break;
01389       )
01390       }
01391     }
01392     // exclChecksum += (plix - pairlistx);
01393     // exclChecksum += (plim - pairlistm);
01394 
01395 
01396 #if 1 ALCH(-1)
01397     npairn = plin - pln;
01398     pairlistn_save = pln;
01399     pairlistn_save[npairn] = npairn ? pairlistn_save[npairn-1] : -1;
01400     pairlists.newsize(plin - pairlistn SELF(+ 1) + 1);
01401 #else
01402     plint *plinA0 = pairlistnA0;
01403     int unsortedNpairn = plin - pln;
01404     for ( k=0; k<unsortedNpairn; ++k ) {
01405       int j = pln[k];
01406       switch(pswitchTable[p_i_partition + 3*(p_1[j].partition)]) {
01407         case 0:  *(plinA0++) = j; break;
01408         case 1:  *(plinA1++) = j; break;
01409         case 2:  *(plinA2++) = j; break;
01410       }
01411     }
01412     
01413     npairn = plinA0 - pairlistnA0;
01414     // FB preallocation (incl extra for overhead) seems to be necessary
01415     pairlistn_save = pairlists.newlist(j_upper + 30);
01416     for ( k=0; k<npairn; ++k ) {
01417       pairlistn_save[k] = pairlistnA0[k];
01418     }
01419     pairlistn_save[k] = k ? pairlistn_save[k-1] : -1;
01420     pairlists.newsize(npairn + 1);
01421 
01422 #endif
01423     
01424     npairx = plix - pairlistx;
01425     pairlistx_save = pairlists.newlist(npairx + 1);
01426     for ( k=0; k<npairx; ++k ) {
01427       pairlistx_save[k] = pairlistx[k];
01428     }
01429     pairlistx_save[k] = k ? pairlistx_save[k-1] : -1;
01430     pairlists.newsize(npairx + 1);
01431 
01432     npairm = plim - pairlistm;
01433     pairlistm_save = pairlists.newlist(npairm + 1);
01434     for ( k=0; k<npairm; ++k ) {
01435       pairlistm_save[k] = pairlistm[k];
01436     }
01437     pairlistm_save[k] = k ? pairlistm_save[k-1] : -1;
01438     pairlists.newsize(npairm + 1);
01439 
01440     
01441 #if 0 ALCH(+1)
01442 #define PAIRLISTFROMARRAY(NPAIRS,PL1,PL2,PLSAVE) \
01443 { \
01444   (NPAIRS) = (PL2) - (PL1); \
01445   (PLSAVE) = pairlists.newlist((NPAIRS) + 1); \
01446   for ( k=0; k<(NPAIRS); ++k ) { \
01447     (PLSAVE)[k] = (PL1)[k]; \
01448   } \
01449   (PLSAVE)[k] = k ? (PLSAVE)[k-1] : -1; \
01450   pairlists.newsize((NPAIRS) + 1); \
01451 }
01452   
01453     PAIRLISTFROMARRAY(npairnA1,pairlistnA1,plinA1,pairlistnA1_save);
01454     PAIRLISTFROMARRAY(npairxA1,pairlistxA1,plixA1,pairlistxA1_save);
01455     PAIRLISTFROMARRAY(npairmA1,pairlistmA1,plimA1,pairlistmA1_save);
01456     PAIRLISTFROMARRAY(npairnA2,pairlistnA2,plinA2,pairlistnA2_save);
01457     PAIRLISTFROMARRAY(npairxA2,pairlistxA2,plixA2,pairlistxA2_save);
01458     PAIRLISTFROMARRAY(npairmA2,pairlistmA2,plimA2,pairlistmA2_save);
01459 #undef PAIRLISTFROMARRAY
01460 
01461 #endif
01462     
01463     
01464         // PAIR( iout << i << " " << i_upper << " save\n" << endi;)
01465   } else { // if ( savePairlists || ! usePairlists )
01466         // PAIR( iout << i << " " << i_upper << " use\n" << endi;)
01467 
01468     pairlists.nextlist(&pairlistn_save,&npairn);  --npairn;
01469     //if ( npairn > 1000 )
01470 //      iout << i << " " << i_upper << " " << npairn << " n\n" << endi;
01471 #if 1 ALCH(-1)
01472     SELF(
01473     int pairlistn_skip = *pairlistn_save;
01474     pairlistn_save += (pairlistn_skip + 1);
01475     npairn -= (pairlistn_skip + 1);
01476     )
01477 #endif
01478 
01479     pairlists.nextlist(&pairlistx_save,&npairx);  --npairx;
01480 
01481     //if ( npairx > 1000 )
01482 //      iout << i << " " << i_upper << " " << npairx << " x\n" << endi;
01483     // exclChecksum += npairx;
01484     pairlists.nextlist(&pairlistm_save,&npairm);  --npairm;
01485     ALCH(
01486     pairlists.nextlist(&pairlistnA1_save,&npairnA1);  --npairnA1;
01487     pairlists.nextlist(&pairlistxA1_save,&npairxA1);  --npairxA1;
01488     pairlists.nextlist(&pairlistmA1_save,&npairmA1);  --npairmA1;
01489     pairlists.nextlist(&pairlistnA2_save,&npairnA2);  --npairnA2;
01490     pairlists.nextlist(&pairlistxA2_save,&npairxA2);  --npairxA2;
01491     pairlists.nextlist(&pairlistmA2_save,&npairmA2);  --npairmA2;
01492     )
01493     //if ( npairm > 1000 )
01494 //      iout << i << " " << i_upper << " " << npairm << " m\n" << endi;
01495     // exclChecksum += npairm;
01496 
01497   } // if ( savePairlists || ! usePairlists )
01498 
01499     LES( BigReal *lambda_table_i =
01500                         lambda_table + (lesFactor+1) * p_i.partition; )
01501 
01502     INT(
01503     const BigReal force_sign = (
01504       ( pairInteractionOn && ! pairInteractionSelf ) ?
01505         ( ( p_i.partition == 1 ) ? 1. : -1. ) : 0.
01506     );
01507       
01508     )
01509 
01510     const BigReal kq_i = COLOUMB * p_i.charge * scaling * dielectric_1;
01511     const LJTable::TableEntry * const lj_row =
01512                 ljTable->table_row(p_i.vdwType);
01513 
01514     SHORT( FAST( BigReal f_i_x = 0.; ) )
01515     SHORT( FAST( BigReal f_i_y = 0.; ) )
01516     SHORT( FAST( BigReal f_i_z = 0.; ) )
01517     FULL( BigReal fullf_i_x = 0.; )
01518     FULL( BigReal fullf_i_y = 0.; )
01519     FULL( BigReal fullf_i_z = 0.; )
01520 
01521     int npairi;
01522     int k;
01523 
01524     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01525         p_i_x, p_i_y, p_i_z, p_1, pairlistn_save, npairn, pairlisti,
01526         r2_delta, r2list);
01527 
01528 #define NORMAL(X) X
01529 #define EXCLUDED(X)
01530 #define MODIFIED(X)
01531 #include  "ComputeNonbondedBase2.h"
01532 #undef NORMAL
01533 #undef EXCLUDED
01534 #undef MODIFIED
01535 
01536     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01537         p_i_x, p_i_y, p_i_z, p_1, pairlistm_save, npairm, pairlisti,
01538         r2_delta, r2list);
01539     exclChecksum += npairi;
01540 
01541 #define NORMAL(X)
01542 #define EXCLUDED(X)
01543 #define MODIFIED(X) X
01544 #include  "ComputeNonbondedBase2.h"
01545 #undef NORMAL
01546 #undef EXCLUDED
01547 #undef MODIFIED
01548 
01549 #ifdef FULLELECT
01550     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01551         p_i_x, p_i_y, p_i_z, p_1, pairlistx_save, npairx, pairlisti,
01552         r2_delta, r2list);
01553     exclChecksum += npairi;
01554 #ifndef NAMD_CUDA
01555     SELF(
01556     for (k=0; k<npairi && pairlisti[k] < j_hgroup; ++k) --exclChecksum;
01557     )
01558 #endif
01559 
01560 #undef FAST
01561 #define FAST(X)
01562 #define NORMAL(X)
01563 #define EXCLUDED(X) X
01564 #define MODIFIED(X)
01565 #include  "ComputeNonbondedBase2.h"
01566 #undef FAST
01567 #ifdef SLOWONLY
01568   #define FAST(X)
01569 #else
01570   #define FAST(X) X
01571 #endif
01572 #undef NORMAL
01573 #undef EXCLUDED
01574 #undef MODIFIED
01575 #else
01576     exclChecksum += npairx;
01577 #ifndef NAMD_CUDA
01578     SELF(
01579       for (k=0; k<npairx && pairlistx_save[k] < j_hgroup; ++k) --exclChecksum;
01580     )
01581 #endif
01582 #endif
01583 
01584 #if 0 ALCH(+1)   // nonbondedbase2 for alchemical-separeted pairlists
01585 
01586     #undef ALCHPAIR
01587     #define ALCHPAIR(X) X
01588     #undef NOT_ALCHPAIR
01589     #define NOT_ALCHPAIR(X)
01590     BigReal myLambda; FEP(BigReal myLambda2;)
01591     BigReal myElecLambda;  FEP(BigReal myElecLambda2;)
01592     BigReal myVdwLambda; FEP(BigReal myVdwLambda2;)
01593     BigReal myVdwShift; FEP(BigReal myVdwShift2;)
01594     BigReal alch_vdw_energy; BigReal alch_vdw_force;
01595     FEP(BigReal alch_vdw_energy_2;) TI(BigReal alch_vdw_dUdl;)
01596     BigReal shiftedElec; BigReal shiftedElecForce;
01597     
01598     /********************************************************************/
01599     /*******NONBONDEDBASE2 FOR NORMAL INTERACTIONS SCALED BY LAMBDA******/
01600     /********************************************************************/
01601     #define NORMAL(X) X
01602     #define EXCLUDED(X)
01603     #define MODIFIED(X)
01604 
01605     #define ALCH1(X) X
01606     #define ALCH2(X)
01607     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01608             p_i_x, p_i_y, p_i_z, p_1, pairlistnA1_save, npairnA1, pairlisti,
01609             r2_delta, r2list);
01610     #include  "ComputeNonbondedBase2.h" // normal, direction 'up'
01611     #undef ALCH1
01612     #undef ALCH2
01613 
01614     #define ALCH1(X)
01615     #define ALCH2(X) X
01616     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01617             p_i_x, p_i_y, p_i_z, p_1, pairlistnA2_save, npairnA2, pairlisti,
01618             r2_delta, r2list);
01619     #include  "ComputeNonbondedBase2.h" // normal, direction 'down'
01620     #undef ALCH1
01621     #undef ALCH2
01622 
01623     #undef NORMAL
01624     #undef EXCLUDED
01625     #undef MODIFIED
01626     
01627     /********************************************************************/
01628     /******NONBONDEDBASE2 FOR MODIFIED INTERACTIONS SCALED BY LAMBDA*****/
01629     /********************************************************************/
01630     #define NORMAL(X)
01631     #define EXCLUDED(X)
01632     #define MODIFIED(X) X
01633 
01634     #define ALCH1(X) X
01635     #define ALCH2(X)
01636     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01637             p_i_x, p_i_y, p_i_z, p_1, pairlistmA1_save, npairmA1, pairlisti,
01638             r2_delta, r2list);
01639         exclChecksum += npairi;
01640     #include  "ComputeNonbondedBase2.h" // modified, direction 'up'
01641     #undef ALCH1
01642     #undef ALCH2
01643 
01644     #define ALCH1(X)
01645     #define ALCH2(X) X
01646     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01647             p_i_x, p_i_y, p_i_z, p_1, pairlistmA2_save, npairmA2, pairlisti,
01648             r2_delta, r2list);
01649         exclChecksum += npairi;
01650     #include  "ComputeNonbondedBase2.h" // modified, direction 'down'
01651     #undef ALCH1
01652     #undef ALCH2
01653 
01654     #undef NORMAL
01655     #undef EXCLUDED
01656     #undef MODIFIED
01657     
01658     /********************************************************************/
01659     /******NONBONDEDBASE2 FOR EXCLUDED INTERACTIONS SCALED BY LAMBDA*****/
01660     /********************************************************************/
01661     #ifdef FULLELECT
01662     #undef FAST
01663     #define FAST(X)
01664     #define NORMAL(X)
01665     #define EXCLUDED(X) X
01666     #define MODIFIED(X)
01667 
01668     #define ALCH1(X) X
01669     #define ALCH2(X)
01670     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01671             p_i_x, p_i_y, p_i_z, p_1, pairlistxA1_save, npairxA1, pairlisti,
01672             r2_delta, r2list);
01673         exclChecksum += npairi;
01674         SELF(
01675         for (k=0; k<npairi && pairlisti[k] < j_hgroup; ++k) --exclChecksum;
01676         )
01677     #include  "ComputeNonbondedBase2.h"  //excluded, direction 'up'
01678     #undef ALCH1
01679     #undef ALCH2
01680 
01681     #define ALCH1(X)
01682     #define ALCH2(X) X
01683     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01684             p_i_x, p_i_y, p_i_z, p_1, pairlistxA2_save, npairxA2, pairlisti,
01685             r2_delta, r2list);
01686         exclChecksum += npairi;
01687         SELF(
01688         for (k=0; k<npairi && pairlisti[k] < j_hgroup; ++k) --exclChecksum;
01689         )
01690     #include  "ComputeNonbondedBase2.h"  //excluded, direction 'down'
01691     #undef ALCH1
01692     #undef ALCH2
01693 
01694     #undef FAST
01695     #ifdef SLOWONLY
01696       #define FAST(X)
01697     #else
01698       #define FAST(X) X
01699     #endif
01700     #undef NORMAL
01701     #undef EXCLUDED
01702     #undef MODIFIED
01703     #else
01704         exclChecksum += npairxA1 + npairxA2;
01705         SELF(
01706           for (k=0; k<npairxA1 && pairlistxA1_save[k] < j_hgroup; ++k) --exclChecksum;
01707           for (k=0; k<npairxA2 && pairlistxA2_save[k] < j_hgroup; ++k) --exclChecksum;
01708         )
01709     #endif
01710 
01711     #undef ALCHPAIR
01712     #define ALCHPAIR(X)
01713     #undef NOT_ALCHPAIR
01714     #define NOT_ALCHPAIR(X) X
01715 
01716 #endif // end nonbondedbase2.h includes for alchemical pairlists
01717 
01718 #ifdef NETWORK_PROGRESS
01719     CkNetworkProgress();
01720 #endif
01721 
01722 #ifdef ARCH_POWERPC
01723     //data cache block touch the position structure
01724     __dcbt ((void *) &(p_0[i+1]));
01725     __prefetch_by_load ((void *)&(groupCount));
01726 #endif
01727 
01728 #ifndef NAMD_CUDA
01729     SHORT( FAST( f_0[i].x += f_i_x; ) )
01730     SHORT( FAST( f_0[i].y += f_i_y; ) )
01731     SHORT( FAST( f_0[i].z += f_i_z; ) )
01732     FULL( fullf_0[i].x += fullf_i_x; )
01733     FULL( fullf_0[i].y += fullf_i_y; )
01734     FULL( fullf_0[i].z += fullf_i_z; )
01735 #endif
01736 
01737         // PAIR( iout << i << " " << i_upper << " end\n" << endi;)
01738   } // for i
01739 
01740   // PAIR(iout << "++++++++\n" << endi;)
01741 
01742 #ifdef f_1
01743 #undef f_1
01744 #endif
01745 #if ( SHORT( FAST( 1+ ) ) 0 )
01746 #if ( SELF( 1+ ) 0 )
01747   {
01748     Force *patch_f_0 = params->ff[0];
01749 
01750 #ifndef NAMD_CUDA
01751 #ifndef ARCH_POWERPC 
01752 #pragma ivdep
01753 #endif
01754     for ( int i = 0; i < i_upper; ++i ) {
01755       patch_f_0[i].x += f_0[i].x;
01756       patch_f_0[i].y += f_0[i].y;
01757       patch_f_0[i].z += f_0[i].z;
01758       virial_xx += f_0[i].x * p_0[i].position.x;
01759       virial_xy += f_0[i].x * p_0[i].position.y;
01760       virial_xz += f_0[i].x * p_0[i].position.z;
01761       virial_yy += f_0[i].y * p_0[i].position.y;
01762       virial_yz += f_0[i].y * p_0[i].position.z;
01763       virial_zz += f_0[i].z * p_0[i].position.z;
01764     }
01765 #endif
01766   }
01767 #endif
01768 #endif
01769 #ifdef fullf_1
01770 #undef fullf_1
01771 #endif
01772 #if ( FULL( 1+ ) 0 )
01773 #if ( SELF( 1+ ) 0 )
01774   {
01775     Force *patch_fullf_0 = params->fullf[0];
01776 #ifndef NAMD_CUDA
01777 #ifndef ARCH_POWERPC 
01778 #pragma ivdep
01779 #endif
01780     for ( int i = 0; i < i_upper; ++i ) {
01781       patch_fullf_0[i].x += fullf_0[i].x;
01782       patch_fullf_0[i].y += fullf_0[i].y;
01783       patch_fullf_0[i].z += fullf_0[i].z;
01784       fullElectVirial_xx += fullf_0[i].x * p_0[i].position.x;
01785       fullElectVirial_xy += fullf_0[i].x * p_0[i].position.y;
01786       fullElectVirial_xz += fullf_0[i].x * p_0[i].position.z;
01787       fullElectVirial_yy += fullf_0[i].y * p_0[i].position.y;
01788       fullElectVirial_yz += fullf_0[i].y * p_0[i].position.z;
01789       fullElectVirial_zz += fullf_0[i].z * p_0[i].position.z;
01790     }
01791 #endif
01792   }
01793 #endif
01794 #endif
01795 
01796 #ifndef NAMD_CUDA
01797   reduction[exclChecksumIndex] += exclChecksum;
01798 #endif
01799   FAST
01800   (
01801   ENERGY( reduction[vdwEnergyIndex] += vdwEnergy; )
01802   SHORT
01803   (
01804   ENERGY( reduction[electEnergyIndex] += electEnergy; )
01805   )
01806   ALCH
01807   (
01808     TI(reduction[vdwEnergyIndex_ti_1] += vdwEnergy_ti_1;) 
01809     TI(reduction[vdwEnergyIndex_ti_2] += vdwEnergy_ti_2;) 
01810     FEP( reduction[vdwEnergyIndex_s] += vdwEnergy_s; )
01811   SHORT
01812   (
01813     FEP( reduction[electEnergyIndex_s] += electEnergy_s; )
01814     TI(reduction[electEnergyIndex_ti_1] += electEnergy_ti_1;) 
01815     TI(reduction[electEnergyIndex_ti_2] += electEnergy_ti_2;) 
01816   )
01817   )
01818   SHORT
01819   (
01820   reduction[virialIndex_XX] += virial_xx;
01821   reduction[virialIndex_XY] += virial_xy;
01822   reduction[virialIndex_XZ] += virial_xz;
01823   reduction[virialIndex_YX] += virial_xy;
01824   reduction[virialIndex_YY] += virial_yy;
01825   reduction[virialIndex_YZ] += virial_yz;
01826   reduction[virialIndex_ZX] += virial_xz;
01827   reduction[virialIndex_ZY] += virial_yz;
01828   reduction[virialIndex_ZZ] += virial_zz;
01829   )
01830   )
01831   FULL
01832   (
01833   ENERGY( reduction[fullElectEnergyIndex] += fullElectEnergy; )
01834   ALCH
01835   (
01836     FEP( reduction[fullElectEnergyIndex_s] += fullElectEnergy_s; )
01837     TI(reduction[fullElectEnergyIndex_ti_1] += fullElectEnergy_ti_1;) 
01838     TI(reduction[fullElectEnergyIndex_ti_2] += fullElectEnergy_ti_2;) 
01839   )
01840   reduction[fullElectVirialIndex_XX] += fullElectVirial_xx;
01841   reduction[fullElectVirialIndex_XY] += fullElectVirial_xy;
01842   reduction[fullElectVirialIndex_XZ] += fullElectVirial_xz;
01843   reduction[fullElectVirialIndex_YX] += fullElectVirial_xy;
01844   reduction[fullElectVirialIndex_YY] += fullElectVirial_yy;
01845   reduction[fullElectVirialIndex_YZ] += fullElectVirial_yz;
01846   reduction[fullElectVirialIndex_ZX] += fullElectVirial_xz;
01847   reduction[fullElectVirialIndex_ZY] += fullElectVirial_yz;
01848   reduction[fullElectVirialIndex_ZZ] += fullElectVirial_zz;
01849   )
01850 
01851   delete [] excl_flags_buff;
01852 
01853 }


Generated on Tue Nov 24 04:07:46 2009 for NAMD by  doxygen 1.3.9.1