Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

ComputeNonbondedBase.h

Go to the documentation of this file.
00001 
00007 // Several special cases are defined:
00008 //   NBTYPE: exclusion method (NBPAIR, NBSELF -- mutually exclusive)
00009 //   FULLELECT full electrostatics calculation?
00010 
00011 #ifdef ARCH_POWERPC
00012 #include <builtins.h>
00013 #endif
00014 
00015 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
00016 #include <emmintrin.h>  // We're using SSE2 intrinsics
00017 #if defined(__INTEL_COMPILER)
00018 #define __align(X) __declspec(align(X) )
00019 #elif defined(__GNUC__) || defined(__PGI)
00020 #define __align(X)  __attribute__((aligned(X) ))
00021 #else
00022 #define __align(X) __declspec(align(X) )
00023 #endif
00024 #endif
00025 
00026 #ifdef DEFINITION // (
00027   #include "LJTable.h"
00028   #include "Molecule.h"
00029   #include "ComputeNonbondedUtil.h"
00030 #endif // )
00031   #include "Parameters.h"
00032 #if NAMD_ComputeNonbonded_SortAtoms != 0
00033   #include "PatchMap.h"
00034 #endif
00035 
00036 // determining class name
00037 #undef NAME
00038 #undef CLASS
00039 #undef CLASSNAME
00040 #define NAME CLASSNAME(calc)
00041 
00042 #undef PAIR
00043 #if NBTYPE == NBPAIR
00044   #define PAIR(X) X
00045   #define CLASS ComputeNonbondedPair
00046   #define CLASSNAME(X) ENERGYNAME( X ## _pair )
00047 #else
00048   #define PAIR(X)
00049 #endif
00050 
00051 #undef SELF
00052 #if NBTYPE == NBSELF
00053   #define SELF(X) X
00054   #define CLASS ComputeNonbondedSelf
00055   #define CLASSNAME(X) ENERGYNAME( X ## _self )
00056 #else
00057   #define SELF(X)
00058 #endif
00059 
00060 #undef ENERGYNAME
00061 #undef ENERGY
00062 #undef NOENERGY
00063 #ifdef CALCENERGY
00064   #define ENERGY(X) X
00065   #define NOENERGY(X)
00066   #define ENERGYNAME(X) SLOWONLYNAME( X ## _energy )
00067 #else
00068   #define ENERGY(X)
00069   #define NOENERGY(X) X
00070   #define ENERGYNAME(X) SLOWONLYNAME( X )
00071 #endif
00072 
00073 #undef SLOWONLYNAME
00074 #undef FAST
00075 #ifdef SLOWONLY
00076   #define FAST(X)
00077   #define SLOWONLYNAME(X) MERGEELECTNAME( X ## _slow )
00078 #else
00079   #define FAST(X) X
00080   #define SLOWONLYNAME(X) MERGEELECTNAME( X )
00081 #endif
00082 
00083 #undef MERGEELECTNAME
00084 #undef SHORT
00085 #undef NOSHORT
00086 #ifdef MERGEELECT
00087   #define SHORT(X)
00088   #define NOSHORT(X) X
00089   #define MERGEELECTNAME(X) FULLELECTNAME( X ## _merge )
00090 #else
00091   #define SHORT(X) X
00092   #define NOSHORT(X)
00093   #define MERGEELECTNAME(X) FULLELECTNAME( X )
00094 #endif
00095 
00096 #undef FULLELECTNAME
00097 #undef FULL
00098 #undef NOFULL
00099 #ifdef FULLELECT
00100   #define FULLELECTNAME(X) TABENERGYNAME( X ## _fullelect )
00101   #define FULL(X) X
00102   #define NOFULL(X)
00103 #else
00104   #define FULLELECTNAME(X) TABENERGYNAME( X )
00105   #define FULL(X)
00106   #define NOFULL(X) X
00107 #endif
00108 
00109 #undef TABENERGYNAME
00110 #undef TABENERGY
00111 #undef NOTABENERGY
00112 #ifdef TABENERGYFLAG
00113   #define TABENERGYNAME(X) FEPNAME( X ## _tabener )
00114   #define TABENERGY(X) X
00115   #define NOTABENERGY(X)
00116 #else
00117   #define TABENERGYNAME(X) FEPNAME( X )
00118   #define TABENERGY(X)
00119   #define NOTABENERGY(X) X
00120 #endif
00121 
00122 // Here are what these macros stand for:
00123 // FEP/NOT_FEP: FEP free energy perturbation is active/inactive 
00124 //      (does NOT use LAM)
00125 // LES: locally-enhanced sampling is active
00126 // LAM: scale the potential by a factor lambda (except FEP)
00127 // INT: measure interaction energies
00128 // PPROF: pressure profiling
00129 
00130 #undef FEPNAME
00131 #undef FEP
00132 #undef LES
00133 #undef INT
00134 #undef PPROF
00135 #undef LAM
00136 #undef CUDA
00137 #undef ALCH
00138 #undef TI
00139 #undef GO
00140 #define FEPNAME(X) LAST( X )
00141 #define FEP(X)
00142 #define ALCHPAIR(X)
00143 #define NOT_ALCHPAIR(X) X
00144 #define LES(X)
00145 #define INT(X)
00146 #define PPROF(X)
00147 #define LAM(X)
00148 #define CUDA(X)
00149 #define ALCH(X)
00150 #define TI(X)
00151 #define GO(X)
00152 #ifdef FEPFLAG
00153   #undef FEPNAME
00154   #undef FEP
00155   #undef ALCH
00156   #define FEPNAME(X) LAST( X ## _fep )
00157   #define FEP(X) X
00158   #define ALCH(X) X
00159 #endif
00160 #ifdef TIFLAG
00161   #undef FEPNAME
00162   #undef TI
00163   #undef ALCH
00164   #define FEPNAME(X) LAST( X ## _ti )
00165   #define TI(X) X
00166   #define ALCH(X) X
00167 #endif
00168 #ifdef LESFLAG
00169   #undef FEPNAME
00170   #undef LES
00171   #undef LAM
00172   #define FEPNAME(X) LAST( X ## _les )
00173   #define LES(X) X
00174   #define LAM(X) X
00175 #endif
00176 #ifdef INTFLAG
00177   #undef FEPNAME
00178   #undef INT
00179   #define FEPNAME(X) LAST( X ## _int )
00180   #define INT(X) X
00181 #endif
00182 #ifdef PPROFFLAG
00183   #undef FEPNAME
00184   #undef INT
00185   #undef PPROF
00186   #define FEPNAME(X) LAST( X ## _pprof )
00187   #define INT(X) X
00188   #define PPROF(X) X
00189 #endif
00190 #ifdef GOFORCES
00191   #undef FEPNAME
00192   #undef GO
00193   #define FEPNAME(X) LAST( X ## _go )
00194   #define GO(X) X
00195 #endif
00196 #ifdef NAMD_CUDA
00197   #undef CUDA
00198   #define CUDA(X) X
00199 #endif
00200 
00201 #define LAST(X) X
00202 
00203 // see if things are really messed up
00204 SELF( PAIR( foo bar ) )
00205 LES( FEP( foo bar ) )
00206 LES( INT( foo bar ) )
00207 FEP( INT( foo bar ) )
00208 LAM( INT( foo bar ) )
00209 FEP( NOENERGY( foo bar ) )
00210 ENERGY( NOENERGY( foo bar ) )
00211 TABENERGY(NOTABENERGY( foo bar ) )
00212 
00213 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00214   #define COMPONENT_DOTPRODUCT(A,B)  ((A##_x * B##_x) + (A##_y * B##_y) + (A##_z * B##_z))
00215 #endif
00216 
00217 
00218 // ************************************************************
00219 // function header
00220 void ComputeNonbondedUtil :: NAME
00221   ( nonbonded *params )
00222 
00223 // function body
00224 {
00225   // int NAME;  // to track errors via unused variable warnings
00226 
00227   if ( ComputeNonbondedUtil::commOnly ) return;
00228 
00229 #ifdef NAMD_CUDA
00230   NOENERGY(return;)
00231 #endif
00232 
00233   // speedup variables
00234   BigReal *reduction = params->reduction;
00235   SimParameters *simParams = Node::Object()->simParameters;
00236 
00237   PPROF(
00238   BigReal *pressureProfileReduction = params->pressureProfileReduction;
00239   const BigReal invThickness = 1.0 / pressureProfileThickness;
00240   )
00241 
00242   Pairlists &pairlists = *(params->pairlists);
00243 #ifdef NAMD_CUDA
00244   int savePairlists = 0;
00245   int usePairlists = 0;
00246 #else
00247   int savePairlists = params->savePairlists;
00248   int usePairlists = params->usePairlists;
00249 #endif
00250   pairlists.reset();
00251   // PAIR(iout << "--------\n" << endi;)
00252   
00253   // BEGIN LA
00254   const int doLoweAndersen = params->doLoweAndersen;
00255   // END LA
00256 
00257   // local variables
00258   int exclChecksum = 0;
00259   FAST
00260   (
00261    // JLai
00262   ENERGY( BigReal vdwEnergy = 0;
00263           GO( BigReal groLJEnergy = 0;
00264               BigReal groGaussEnergy = 0;
00265               BigReal goEnergyNative = 0;
00266               BigReal goEnergyNonnative = 0; ) )
00267   SHORT
00268   (
00269   ENERGY( BigReal electEnergy = 0; )
00270   )
00271 
00272   FEP
00273   (
00274   ENERGY( BigReal vdwEnergy_s = 0; BigReal vdwEnergy_s_Left = 0; )
00275   SHORT
00276   (
00277   ENERGY( BigReal electEnergy_s = 0; )
00278   )
00279   )
00280   )
00281     
00282 #ifndef A2_QPX
00283   FAST 
00284   (
00285   SHORT
00286   (
00287   BigReal virial_xx = 0;
00288   BigReal virial_xy = 0;
00289   BigReal virial_xz = 0;
00290   BigReal virial_yy = 0;
00291   BigReal virial_yz = 0;
00292   BigReal virial_zz = 0;
00293   )
00294   )
00295   FULL
00296   (
00297   ENERGY( BigReal fullElectEnergy = 0; )
00298   FEP
00299   (
00300   ENERGY( BigReal fullElectEnergy_s = 0; )
00301   )
00302   BigReal fullElectVirial_xx = 0;
00303   BigReal fullElectVirial_xy = 0;
00304   BigReal fullElectVirial_xz = 0;
00305   BigReal fullElectVirial_yy = 0;
00306   BigReal fullElectVirial_yz = 0;
00307   BigReal fullElectVirial_zz = 0;
00308   )
00309 #else 
00310   vector4double virial_v0   = (vector4double)(0.0);
00311   vector4double virial_v1   = (vector4double)(0.0);
00312   vector4double virial_v2   = (vector4double)(0.0);
00313   vector4double fullvirial_v0   = (vector4double) (0.0);
00314   vector4double fullvirial_v1   = (vector4double) (0.0);
00315   vector4double fullvirial_v2   = (vector4double) (0.0);
00316   BigReal fullElectEnergy = 0;
00317   BigReal fullElectEnergy_s = 0;
00318 #endif
00319 
00320   // Bringing stuff into local namespace for speed.
00321   const BigReal offset_x = params->offset.x;
00322   const BigReal offset_y = params->offset.y;
00323   const BigReal offset_z = params->offset.z;
00324 
00325   register const BigReal plcutoff2 = \
00326                         params->plcutoff * params->plcutoff;
00327   register const BigReal groupplcutoff2 = \
00328                         params->groupplcutoff * params->groupplcutoff;
00329   const BigReal dielectric_1 = ComputeNonbondedUtil:: dielectric_1;
00330   const LJTable* const ljTable = ComputeNonbondedUtil:: ljTable;
00331   LJTable::TableEntry ljNull;  ljNull.A = 0; ljNull.B = 0;
00332   const LJTable::TableEntry* const lj_null_pars = &ljNull;
00333   const Molecule* const mol = ComputeNonbondedUtil:: mol;
00334   SHORT
00335   (
00336   const BigReal* const table_four = ComputeNonbondedUtil:: table_short;
00337   )
00338   FULL
00339   (
00340   SHORT
00341   (
00342   const BigReal* const slow_table = ComputeNonbondedUtil:: slow_table;
00343   )
00344   NOSHORT
00345   (
00346 //#if 1 ALCH(-1)
00347   const BigReal* const table_four = ComputeNonbondedUtil:: table_noshort;
00348 //#else  // have to switch this for ALCH
00349 //  BigReal* table_four = ComputeNonbondedUtil:: table_noshort;
00350 //#endif
00351   )
00352   )
00353   BigReal scaling = ComputeNonbondedUtil:: scaling;
00354 #ifdef  A2_QPX
00355   vector4double scalingv = vec_splats(scaling);
00356 #endif
00357   const BigReal modf_mod = 1.0 - scale14;
00358   FAST
00359   (
00360   const BigReal switchOn2 = ComputeNonbondedUtil:: switchOn2;
00361   const BigReal c1 = ComputeNonbondedUtil:: c1;
00362   const BigReal c3 = ComputeNonbondedUtil:: c3;
00363   )
00364   const BigReal r2_delta = ComputeNonbondedUtil:: r2_delta;
00365   const int r2_delta_exp = ComputeNonbondedUtil:: r2_delta_exp;
00366   // const int r2_delta_expc = 64 * (r2_delta_exp - 127);
00367   const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
00368 
00369   ALCH(
00370     const BigReal switchdist2 = ComputeNonbondedUtil::switchOn2;
00371     const BigReal cutoff2 = ComputeNonbondedUtil::cutoff2;
00372     const BigReal switchfactor = 1./((cutoff2 - switchdist2)*(cutoff2 - switchdist2)*(cutoff2 - switchdist2));
00373     const BigReal alchElecLambdaStart = ComputeNonbondedUtil::alchElecLambdaStart;
00374     const BigReal alchVdwLambdaEnd = ComputeNonbondedUtil::alchVdwLambdaEnd;
00375     const BigReal alchVdwShiftCoeff = ComputeNonbondedUtil::alchVdwShiftCoeff;
00376     const Bool Fep_WCA_repuOn = ComputeNonbondedUtil::Fep_WCA_repuOn;
00377     const Bool Fep_WCA_dispOn = ComputeNonbondedUtil::Fep_WCA_dispOn;
00378     const Bool Fep_ElecOn = ComputeNonbondedUtil::Fep_ElecOn;
00379     const Bool Fep_Wham = ComputeNonbondedUtil::Fep_Wham;
00380     const BigReal WCA_rcut1 = ComputeNonbondedUtil::WCA_rcut1;
00381     const BigReal WCA_rcut2 = ComputeNonbondedUtil::WCA_rcut2;
00382     const BigReal WCA_rcut3 = ComputeNonbondedUtil::WCA_rcut3;
00383 
00384     /*lambda values 'up' are for atoms scaled up with lambda (partition 1)*/
00385     BigReal lambdaUp = ComputeNonbondedUtil::alchLambda;
00386     BigReal elecLambdaUp =  (lambdaUp <= alchElecLambdaStart)? 0. : \
00387               (lambdaUp - alchElecLambdaStart) / (1. - alchElecLambdaStart);
00388     BigReal vdwLambdaUp = 
00389         (lambdaUp >= alchVdwLambdaEnd)? 1. : lambdaUp / alchVdwLambdaEnd; 
00390     BigReal vdwShiftUp = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambdaUp);
00391     FEP(BigReal lambda2Up = ComputeNonbondedUtil::alchLambda2;)
00392     FEP(BigReal elecLambda2Up = (lambda2Up <= alchElecLambdaStart)? 0. : \
00393               (lambda2Up - alchElecLambdaStart) / (1. - alchElecLambdaStart);)
00394     FEP(BigReal vdwLambda2Up = 
00395         (lambda2Up >= alchVdwLambdaEnd)? 1. : lambda2Up / alchVdwLambdaEnd;) 
00396     FEP(BigReal vdwShift2Up = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambda2Up);)
00397 
00398 
00399     FEP( if( (Fep_Wham) && (Fep_WCA_repuOn) ) {
00400         elecLambdaUp=0.0; 
00401         vdwLambdaUp=ComputeNonbondedUtil::alchRepLambda;
00402     })
00403     FEP( if( (Fep_Wham) && (Fep_WCA_dispOn) ) {
00404         elecLambdaUp=0.0; 
00405         vdwLambdaUp=ComputeNonbondedUtil::alchDispLambda;
00406     })
00407     FEP( if( (Fep_Wham) && (Fep_ElecOn) ) {
00408         elecLambdaUp=ComputeNonbondedUtil::alchElecLambda; 
00409         vdwLambdaUp=1.0;
00410         vdwLambda2Up=1.0;
00411             vdwShiftUp = 0.0;
00412             vdwShift2Up = 0.0;
00413     })
00414         
00415     /*lambda values 'down' are for atoms scaled down with lambda (partition 2)*/
00416     BigReal lambdaDown = 1 - ComputeNonbondedUtil::alchLambda;
00417     BigReal elecLambdaDown =  (lambdaDown <= alchElecLambdaStart)? 0. : \
00418               (lambdaDown - alchElecLambdaStart) / (1. - alchElecLambdaStart);
00419     BigReal vdwLambdaDown = 
00420         (lambdaDown >= alchVdwLambdaEnd)? 1. : lambdaDown / alchVdwLambdaEnd; 
00421     BigReal vdwShiftDown = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambdaDown);
00422     FEP(BigReal lambda2Down = 1 - ComputeNonbondedUtil::alchLambda2;)
00423     FEP(BigReal elecLambda2Down = (lambda2Down <= alchElecLambdaStart)? 0. : \
00424         (lambda2Down - alchElecLambdaStart) / (1. - alchElecLambdaStart); )
00425     FEP(BigReal vdwLambda2Down = 
00426         (lambda2Down >= alchVdwLambdaEnd)? 1. : lambda2Down / alchVdwLambdaEnd; )
00427     FEP(BigReal vdwShift2Down = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambda2Down);)
00428 
00429 
00430     FEP( if( (Fep_Wham) && (Fep_WCA_repuOn) ) {
00431         elecLambdaDown=0.0; 
00432         vdwLambdaDown = 1.0 - ComputeNonbondedUtil::alchRepLambda;
00433     })
00434     FEP( if( (Fep_Wham) && (Fep_WCA_dispOn) ) {
00435         elecLambdaDown=0.0; 
00436         vdwLambdaDown = 1.0 - ComputeNonbondedUtil::alchDispLambda;
00437     })
00438     FEP( if( (Fep_Wham) && (Fep_ElecOn) ) {
00439         elecLambdaDown = 1.0 - ComputeNonbondedUtil::alchElecLambda; 
00440         vdwLambdaDown = 1.0;
00441         vdwLambda2Down = 1.0;
00442             vdwShiftDown = 0.0;
00443             vdwShift2Down = 0.0;
00444     })
00445   
00446   // Thermodynamic Integration Notes: 
00447   // Separation of atom pairs into different pairlist according to lambda
00448   // coupling, for alchemical free energy calculations. Normal pairlists
00449   // (pairlist[nxm]_save) are re-used for non-lambda-coupled pairs; new ones
00450   // (pairlist[nxm][12]_save are created for atoms switched up or down with
00451   // lambda respectively.
00452   // This makes TI coding far easier and more readable, since it's necessary 
00453   // to store dU/dlambda in different variables depending on whether atoms are
00454   // being switched up or down. Further, it allows the prior explicit coding of 
00455   // the separation-shifted vdW potential to stay in place without a big 
00456   // performance hit, since it's no longer necessary to calculate vdW potentials
00457   // explicity for the bulk (non-alchemical) interactions - so that part of the 
00458   // free energy code stays readable and easy to modify. Finally there should
00459   // be some performance gain over the old FEP implementation as we no
00460   // longer have to check the partitions of each atom pair and switch
00461   // parameters accordingly for every single NonbondedBase2.h loop - this is 
00462   // done at the pairlist level
00463   
00464   int pswitchTable[3*3];
00465   // determines which pairlist alchemical pairings get sent to
00466   // 0: non-alchemical pairs, partition 0 <-> partition 0
00467   // 1: atoms scaled up as lambda increases, p0<->p1
00468   // 2: atoms scaled down as lambda increases, p0<->p2
00469   // all p1<->p2 interactions to be dropped (99)
00470   // in general, 'up' refers to 1, 'down' refers to 2
00471   for (int ip=0; ip<3; ++ip) {
00472     for (int jp=0; jp<3; ++jp ) {
00473       pswitchTable[ip+3*jp] = 0;
00474       if ((ip==1 && jp==0) || (ip==0 && jp==1)) pswitchTable[ip+3*jp] = 1;
00475       if ((ip==2 && jp==0) || (ip==0 && jp==2)) pswitchTable[ip+3*jp] = 2;
00476       if (ip + jp == 3) pswitchTable[ip+3*jp] = 99; // no interaction
00477 
00478       if (! ComputeNonbondedUtil::alchDecouple) {
00479         // no decoupling: interactions within a partition are treated like
00480         // normal alchemical pairs
00481         if (ip == 1 && jp == 1) pswitchTable[ip+3*jp] = 1;
00482         if (ip == 2 && jp == 2) pswitchTable[ip+3*jp] = 2;
00483       }
00484       if (ComputeNonbondedUtil::alchDecouple) {
00485         // decoupling: PME calculates extra grids so that while PME 
00486         // interaction with the full system is switched off, a new PME grid
00487         // containing only alchemical atoms is switched on. Full interactions 
00488         // between alchemical atoms are maintained; potentials within one 
00489         // partition need not be scaled here.
00490         if (ip == 1 && jp == 1) pswitchTable[ip+3*jp] = 0;
00491         if (ip == 2 && jp == 2) pswitchTable[ip+3*jp] = 0;
00492       }
00493     }
00494   }
00495 
00496   // dU/dlambda variables for thermodynamic integration
00497   TI(
00498       BigReal vdwEnergy_ti_1 = 0;
00499       BigReal vdwEnergy_ti_2 = 0;
00500       SHORT(BigReal electEnergy_ti_1 = 0;
00501       BigReal electEnergy_ti_2 = 0;)
00502       FULL(BigReal fullElectEnergy_ti_1 = 0; 
00503       BigReal fullElectEnergy_ti_2 = 0;) 
00504    )
00505   )
00506         
00507         
00508   const int i_upper = params->numAtoms[0];
00509   register const int j_upper = params->numAtoms[1];
00510   register int j;
00511   register int i;
00512   const CompAtom *p_0 = params->p[0];
00513   const CompAtom *p_1 = params->p[1];
00514   const CompAtomExt *pExt_0 = params->pExt[0];
00515   const CompAtomExt *pExt_1 = params->pExt[1];
00516 
00517   char * excl_flags_buff = 0;
00518   const int32 * full_excl = 0;
00519   const int32 * mod_excl = 0;
00520 
00521   plint *pairlistn_save;  int npairn;
00522   plint *pairlistx_save;  int npairx;
00523   plint *pairlistm_save;  int npairm;
00524 
00525   ALCH(
00526   plint *pairlistnA1_save;  int npairnA1;
00527   plint *pairlistxA1_save;  int npairxA1;
00528   plint *pairlistmA1_save;  int npairmA1;
00529   plint *pairlistnA2_save;  int npairnA2;
00530   plint *pairlistxA2_save;  int npairxA2;
00531   plint *pairlistmA2_save;  int npairmA2;
00532   )
00533 
00534   NBWORKARRAYSINIT(params->workArrays);
00535 
00536   int arraysize = j_upper+5;
00537 
00538   NBWORKARRAY(plint,pairlisti,arraysize)
00539   NBWORKARRAY(BigReal,r2list,arraysize)
00540 
00541   union { double f; int32 i[2]; } byte_order_test;
00542   byte_order_test.f = 1.0;  // should occupy high-order bits only
00543   int32 *r2iilist = (int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
00544 
00545   if ( ! ( savePairlists || ! usePairlists ) ) arraysize = 0;
00546 
00547 
00548   // DMK - Atom Sort
00549   // 
00550   // Basic Idea: Determine the line between the center of masses of
00551   //   the two patches.  Project and then sort the lists of atoms
00552   //   along this line.  Then, as the pairlists are being generated
00553   //   for the atoms in the first atom list, use the sorted
00554   //   list to only add atoms from the second list that are between
00555   //   +/- ~cutoff from the atoms position on the line.
00556   #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00557 
00558   // NOTE: For the first atom list, just the sort values themselves need to be
00559   // calculated (a BigReal vs. SortEntry data structure).  However, a second
00560   // SortEntry array is needed to perform the merge sort on the second list of
00561   // atoms.  Use the atomSort_[0/1]_sortValues__ arrays to sort the second
00562   // list of atoms and then use the left over array to make a version of the
00563   // list that only includes non-fixed groups (fixg).
00564 
00565   NBWORKARRAY(SortEntry, atomSort_0_sortValues__, arraysize);
00566   NBWORKARRAY(SortEntry, atomSort_1_sortValues__, arraysize);
00567   NBWORKARRAY(BigReal, p_0_sortValues, arraysize);
00568 
00569   register SortEntry* p_1_sortValues = atomSort_0_sortValues__;
00570   register SortEntry* p_1_sortValues_fixg = atomSort_1_sortValues__;
00571 
00572   int p_0_sortValues_len = 0;
00573   int p_1_sortValues_len = 0;
00574   int p_1_sortValues_fixg_len = 0;
00575 
00576   // Calculate the distance between to projected points on the line that
00577   //   represents the cutoff distance.
00578   BigReal atomSort_windowRadius = sqrt(groupplcutoff2);
00579 
00580   if (savePairlists || !usePairlists) {
00581 
00582     register const BigReal projLineVec_x = params->projLineVec.x;
00583     register const BigReal projLineVec_y = params->projLineVec.y;
00584     register const BigReal projLineVec_z = params->projLineVec.z;
00585 
00586     // Calculate the sort values for the atoms in patch 1
00587     {
00588       register int nbgs = p_1->nonbondedGroupSize;
00589       register BigReal p_x = p_1->position.x;
00590       register BigReal p_y = p_1->position.y;
00591       register BigReal p_z = p_1->position.z;
00592       register int index = 0;
00593 
00594       for (register int j = nbgs; j < j_upper; j += nbgs) {
00595 
00596         // Set p_j_next to point to the atom for the next iteration and begin
00597         //   loading the 'nbgs' value for that atom.
00598         register const CompAtom* p_j_next = p_1 + j;
00599         nbgs = p_j_next->nonbondedGroupSize;
00600 
00601         // Calculate the distance along the projection vector
00602         // NOTE: If the vector from the origin to the point is 'A' and the vector
00603         //   between the patches is 'B' then to project 'A' onto 'B' we take the dot
00604         //   product of the two vectors 'A dot B' divided by the length of 'B'.
00605         // So... projection of A onto B = (A dot B) / length(B), however, note
00606         //   that length(B) == 1, therefore the sort value is simply (A dot B)
00607         register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00608 
00609         // Start loading the next iteration's atom's position
00610         p_x = p_j_next->position.x;
00611         p_y = p_j_next->position.y;
00612         p_z = p_j_next->position.z;
00613 
00614         // Store the caclulated sort value into the array of sort values
00615         register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
00616         p_1_sortValStorePtr->index = index;
00617         p_1_sortValStorePtr->sortValue = sortVal;
00618         p_1_sortValues_len++;
00619 
00620         // Update index for the next iteration
00621         index = j;       
00622 
00623       } // end while (j < j_upper)
00624 
00625       register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00626 
00627       register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
00628       p_1_sortValStorePtr->index = index;
00629       p_1_sortValStorePtr->sortValue = sortVal;
00630       p_1_sortValues_len++;
00631     }
00632 
00633     // NOTE: This list and another version of it with only non-fixed
00634     //   atoms will be used in place of grouplist and fixglist.
00635     #if 0   // Selection Sort
00636       sortEntries_selectionSort(p_1_sortValues, p_1_sortValues_len);
00637     #elif 0   // Bubble Sort
00638       sortEntries_bubbleSort(p_1_sortValues, p_1_sortValues_len);
00639     #else   // Merge Sort
00640       #if NAMD_ComputeNonbonded_SortAtoms_LessBranches == 0
00641         sortEntries_mergeSort_v1(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
00642       #else
00643         sortEntries_mergeSort_v2(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
00644       #endif
00645     #endif
00646 
00647     // Calculate the sort values for the atoms in patch 0
00648     {
00649       register int nbgs = p_0->nonbondedGroupSize;
00650       register BigReal p_x = p_0->position.x + offset_x;
00651       register BigReal p_y = p_0->position.y + offset_y;
00652       register BigReal p_z = p_0->position.z + offset_z;
00653       register int index = 0;
00654 
00655       for (register int i = nbgs; i < i_upper; i += nbgs) {
00656 
00657         // Set p_i_next to point to the atom for the next iteration and begin
00658         //   loading the 'nbgs' value for that atom.
00659         register const CompAtom* p_i_next = p_0 + i;
00660         nbgs = p_i_next->nonbondedGroupSize;
00661 
00662         // Calculate the distance along the projection vector
00663         register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00664 
00665         // Start loading the next iteration's atom's position
00666         p_x = p_i_next->position.x + offset_x;
00667         p_y = p_i_next->position.y + offset_y;
00668         p_z = p_i_next->position.z + offset_z;
00669 
00670         // Store the calculated sort value into the array of sort values
00671         register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
00672         *p_0_sortValStorePtr = sortVal;
00673 
00674         // Update index for the next iteration
00675         index = i;
00676       }
00677 
00678       register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00679 
00680       register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
00681       *p_0_sortValStorePtr = sortVal;
00682 
00683       p_0_sortValues_len = i_upper;
00684     }
00685 
00686   }  // end if (savePairlists || !usePairlists)
00687 
00688   #endif  // NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00689 
00690   // Atom Sort : The grouplist and fixglist arrays are not needed when the
00691   //   the atom sorting code is in use.
00692   #if ! (NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) )
00693     NBWORKARRAY(plint,grouplist,arraysize);
00694     NBWORKARRAY(plint,fixglist,arraysize);
00695   #endif
00696 
00697   NBWORKARRAY(plint,goodglist,arraysize);
00698   NBWORKARRAY(plint,pairlistx,arraysize);
00699   NBWORKARRAY(plint,pairlistm,arraysize);
00700   NBWORKARRAY(plint,pairlist,arraysize);
00701   NBWORKARRAY(plint,pairlist2,arraysize);
00702   ALCH(
00703   NBWORKARRAY(plint,pairlistnA1,arraysize);
00704   NBWORKARRAY(plint,pairlistxA1,arraysize);
00705   NBWORKARRAY(plint,pairlistmA1,arraysize);
00706   NBWORKARRAY(plint,pairlistnA2,arraysize);
00707   NBWORKARRAY(plint,pairlistxA2,arraysize);
00708   NBWORKARRAY(plint,pairlistmA2,arraysize);
00709   )
00710 
00711   int fixg_upper = 0;
00712   int g_upper = 0;
00713 
00714   if ( savePairlists || ! usePairlists ) {
00715 
00716     #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00717 
00718       // Create a sorted list of non-fixed groups
00719       register int fixg = 0;
00720       for (int tmpI = 0; tmpI < p_1_sortValues_len; tmpI++) {
00721         register SortEntry* p_1_sortEntry = p_1_sortValues + tmpI;
00722         register int p_1_index = p_1_sortEntry->index;
00723         if (!pExt_1[p_1_index].groupFixed) {
00724           register SortEntry* p_1_sortEntry_fixg = p_1_sortValues_fixg + p_1_sortValues_fixg_len;
00725           p_1_sortEntry_fixg->index = p_1_sortEntry->index;
00726           p_1_sortEntry_fixg->sortValue = p_1_sortEntry->sortValue;
00727           p_1_sortValues_fixg_len++;
00728         }
00729       }
00730 
00731     #else
00732 
00733       register int g = 0;
00734       for ( j = 0; j < j_upper; ++j ) {
00735         if ( p_1[j].nonbondedGroupSize ) {
00736           grouplist[g++] = j;
00737         }
00738       }
00739       g_upper = g;
00740       if ( g_upper ) grouplist[g_upper] = grouplist[g_upper-1];
00741       int fixg = 0;
00742 
00743       if ( fixedAtomsOn ) {
00744         for ( g = 0; g < g_upper; ++g ) {
00745           j = grouplist[g];
00746           if ( ! pExt_1[j].groupFixed ) {
00747             fixglist[fixg++] = j;
00748           }
00749         }
00750       }
00751 
00752       fixg_upper = fixg;
00753       if ( fixg_upper ) fixglist[fixg_upper] = fixglist[fixg_upper-1];
00754 
00755     #endif // NAMD_ComputeNonbonded_SortAtoms != 0
00756 
00757     pairlists.addIndex();
00758     pairlists.setIndexValue(i_upper);
00759 
00760   } else { // if ( savePairlists || ! usePairlists )
00761 
00762     if ( pairlists.getIndexValue() != i_upper )
00763       NAMD_bug("pairlist i_upper mismatch!");
00764 
00765   } // if ( savePairlists || ! usePairlists )
00766 
00767   SELF(
00768   int j_hgroup = 0;
00769   int g_lower = 0;
00770   int fixg_lower = 0;
00771   )
00772   int pairlistindex=0;
00773   int pairlistoffset=0;
00774 
00775   
00776 
00777 #if ( SHORT( FAST( 1+ ) ) 0 )
00778 #if ( PAIR( 1+ ) 0 )
00779     Force *f_0 = params->ff[0];
00780     Force *f_1 = params->ff[1];
00781 #else
00782 #define f_1 f_0
00783     NBWORKARRAY(Force,f_0,i_upper)
00784     memset( (void*) f_0, 0, i_upper * sizeof(Force) );
00785 #endif
00786 #endif
00787 #if ( FULL( 1+ ) 0 )
00788 #if ( PAIR( 1+ ) 0 )
00789     Force *fullf_0 = params->fullf[0];
00790     Force *fullf_1 = params->fullf[1];
00791 #else
00792 #define fullf_1 fullf_0
00793     NBWORKARRAY(Force,fullf_0,i_upper);
00794     memset( (void*) fullf_0, 0, i_upper * sizeof(Force) );
00795 #endif
00796 #endif
00797     
00798 
00799   int maxPart = params->numParts - 1;
00800   int groupCount = params->minPart;
00801   PAIR( 
00802     if ( savePairlists || ! usePairlists ) { 
00803       i = 0;
00804       pairlists.addIndex(); 
00805     } else {
00806       i = pairlists.getIndexValue();
00807     }
00808   )
00809    PAIR(for ( ; i < (i_upper);)) SELF(for ( i=0; i < (i_upper- 1);i++))
00810     {
00811     const CompAtom &p_i = p_0[i];
00812     const CompAtomExt &pExt_i = pExt_0[i];
00813 
00814     PAIR(if (savePairlists || ! usePairlists){)
00815     if ( p_i.hydrogenGroupSize ) {
00816       if ( groupCount ) {  // skip this group
00817         --groupCount;
00818         i += p_i.hydrogenGroupSize;
00819 #ifdef ARCH_POWERPC
00820         __dcbt((void *) &(p_0[i]));
00821 #endif
00822         SELF(--i;)
00823         continue;
00824       } else {  // compute this group
00825         groupCount = maxPart;
00826       }
00827     }
00828     PAIR(})
00829 
00830     register const BigReal p_i_x = p_i.position.x + offset_x;
00831     register const BigReal p_i_y = p_i.position.y + offset_y;
00832     register const BigReal p_i_z = p_i.position.z + offset_z;
00833 #ifdef A2_QPX
00834     vector4double p_i_v = {p_i_x, p_i_y, p_i_z, 0.0};
00835 #endif
00836 
00837     ALCH(const int p_i_partition = p_i.partition;)
00838 
00839     PPROF(
00840         const int p_i_partition = p_i.partition;
00841         int n1 = (int)floor((p_i.position.z-pressureProfileMin)*invThickness);
00842         pp_clamp(n1, pressureProfileSlabs);
00843         )
00844 
00845   SELF ( if ( p_i.hydrogenGroupSize ) j_hgroup = i + p_i.hydrogenGroupSize; )
00846 
00847   if ( savePairlists || ! usePairlists ) {
00848 
00849     #ifdef MEM_OPT_VERSION
00850     const ExclusionCheck *exclcheck = mol->get_excl_check_for_idx(pExt_i.exclId);        
00851     const int excl_min = pExt_i.id + exclcheck->min;
00852     const int excl_max = pExt_i.id + exclcheck->max;
00853     #else
00854     const ExclusionCheck *exclcheck = mol->get_excl_check_for_atom(pExt_i.id);
00855     const int excl_min = exclcheck->min;
00856     const int excl_max = exclcheck->max;
00857     #endif
00858     const char * excl_flags_var;
00859     if ( exclcheck->flags ) excl_flags_var = exclcheck->flags - excl_min;
00860     else {  // need to build list on the fly
00861 
00862     //TODO: Should change later!!!!!!!!!! --Chao Mei
00863     //Now just for the sake of passing compilation
00864     #ifndef MEM_OPT_VERSION 
00865       if ( excl_flags_buff ) {
00866         int nl,l;
00867         nl = full_excl[0] + 1;
00868         for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = 0;
00869         nl = mod_excl[0] + 1;
00870         for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = 0;
00871       } else {
00872         excl_flags_buff = new char[mol->numAtoms];
00873         memset( (void*) excl_flags_buff, 0, mol->numAtoms);
00874       }
00875       int nl,l;
00876       full_excl = mol->get_full_exclusions_for_atom(pExt_i.id);
00877       nl = full_excl[0] + 1;
00878       for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = EXCHCK_FULL;
00879       mod_excl = mol->get_mod_exclusions_for_atom(pExt_i.id);
00880       nl = mod_excl[0] + 1;
00881       for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = EXCHCK_MOD;
00882       excl_flags_var = excl_flags_buff;
00883     #endif
00884 
00885     }
00886     const char * const excl_flags = excl_flags_var;
00887 
00888   if ( p_i.nonbondedGroupSize ) {
00889 
00890     pairlistindex = 0;  // initialize with 0 elements
00891     pairlistoffset=0;
00892     const int groupfixed = ( fixedAtomsOn && pExt_i.groupFixed );
00893 
00894     // If patch divisions are not made by hydrogen groups, then
00895     // hydrogenGroupSize is set to 1 for all atoms.  Thus we can
00896     // carry on as if we did have groups - only less efficiently.
00897     // An optimization in this case is to not rebuild the temporary
00898     // pairlist but to include every atom in it.  This should be a
00899     // a very minor expense.
00900 
00901     SELF
00902     (
00903       if ( p_i.hydrogenGroupSize ) {
00904         // exclude child hydrogens of i
00905         // j_hgroup = i + p_i.hydrogenGroupSize;  (moved above)
00906         while ( g_lower < g_upper &&
00907                 grouplist[g_lower] < j_hgroup ) ++g_lower;
00908         while ( fixg_lower < fixg_upper &&
00909                 fixglist[fixg_lower] < j_hgroup ) ++fixg_lower;
00910       }
00911       // add all child or sister hydrogens of i
00912       for ( j = i + 1; j < j_hgroup; ++j ) {
00913         pairlist[pairlistindex++] = j;
00914       }
00915     )
00916 
00917     // add remaining atoms to pairlist via hydrogen groups
00918     register plint *pli = pairlist + pairlistindex;
00919 
00920     {
00921       // Atom Sort : Modify the values of g and gu based on the added information
00922       //   of the linear projections (sort values) information.
00923       #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00924 
00925         register int g = 0;
00926         register BigReal p_i_sortValue = p_0_sortValues[i];
00927         const BigReal p_i_sortValue_plus_windowRadius = p_i_sortValue + atomSort_windowRadius;
00928         register SortEntry* sortValues = ( groupfixed ? p_1_sortValues_fixg : p_1_sortValues );
00929 
00930         // Find the actual gu (upper bound in sorted list for this outer-loop atom) based on the sort values
00931         register int lower = 0;
00932         register int upper = (groupfixed ? p_1_sortValues_fixg_len : p_1_sortValues_len);
00933         while ((upper - lower) > 1) {
00934           register int j = ((lower + upper) >> 1);
00935           register BigReal jSortVal = sortValues[j].sortValue;
00936           if (jSortVal < p_i_sortValue_plus_windowRadius) {
00937             lower = j;
00938           } else {
00939             upper = j;
00940           }
00941         }
00942         const int gu = (sortValues[lower].sortValue >= p_i_sortValue_plus_windowRadius) ? lower : upper;
00943 
00944       #else
00945 
00946         register plint *gli = goodglist;
00947         const plint *glist = ( groupfixed ? fixglist : grouplist );
00948         SELF( const int gl = ( groupfixed ? fixg_lower : g_lower ); )
00949         const int gu = ( groupfixed ? fixg_upper : g_upper );
00950         register int g = PAIR(0) SELF(gl);
00951 
00952       #endif
00953 
00954 
00955       if ( g < gu ) {
00956         int hu = 0;
00957 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
00958         if ( gu - g  >  6 ) { 
00959 
00960           #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00961             register SortEntry* sortEntry0 = sortValues + g;
00962             register SortEntry* sortEntry1 = sortValues + g + 1;
00963             register int jprev0 = sortEntry0->index;
00964             register int jprev1 = sortEntry1->index;
00965           #else
00966             register int jprev0 = glist[g    ];
00967             register int jprev1 = glist[g + 1];
00968           #endif
00969           
00970           register int j0;
00971           register int j1;
00972 
00973           __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
00974           __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
00975           __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
00976 
00977           // these don't change here, so we could move them into outer scope
00978           const __m128d P_I_X = _mm_set1_pd(p_i_x);
00979           const __m128d P_I_Y = _mm_set1_pd(p_i_y);
00980           const __m128d P_I_Z = _mm_set1_pd(p_i_z);
00981  
00982           g += 2;
00983           for ( ; g < gu - 2; g +=2 ) {
00984             // compute 1d distance, 2-way parallel       
00985             j0     =  jprev0;
00986             j1     =  jprev1;
00987 
00988             __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
00989             __m128d R2_01 = _mm_mul_pd(T_01, T_01);
00990             T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
00991             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
00992             T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
00993             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
00994             
00995             #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00996               sortEntry0 = sortValues + g;
00997               sortEntry1 = sortValues + g + 1;
00998               jprev0 = sortEntry0->index;
00999               jprev1 = sortEntry1->index;
01000             #else
01001               jprev0     =  glist[g  ];
01002               jprev1     =  glist[g+1];
01003             #endif
01004            
01005             PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01006             PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01007             PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01008 
01009             __align(16) double r2_01[2];
01010             _mm_store_pd(r2_01, R2_01); // 16-byte-aligned store
01011 
01012             // XXX these could possibly benefit from SSE-based conditionals
01013             bool test0 = ( r2_01[0] < groupplcutoff2 );
01014             bool test1 = ( r2_01[1] < groupplcutoff2 ); 
01015             
01016             //removing ifs benefits on many architectures
01017             //as the extra stores will only warm the cache up
01018             goodglist [ hu         ] = j0;
01019             goodglist [ hu + test0 ] = j1;
01020             
01021             hu += test0 + test1;
01022           }
01023           g-=2;
01024         }
01025 #elif defined (A2_QPX)
01026         if ( gu - g  >  6 ) { 
01027 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01028           register SortEntry* sortEntry0 = sortValues + g;
01029           register SortEntry* sortEntry1 = sortValues + g + 1;
01030           register int jprev0 = sortEntry0->index;
01031           register int jprev1 = sortEntry1->index;
01032           #else
01033           register int jprev0 = glist[g    ];
01034           register int jprev1 = glist[g + 1];
01035           #endif
01036           
01037           __dcbt ((void*)(p_1 + jprev0));
01038           register  int j0; 
01039           register  int j1;           
01040           vector4double    pj_v_0, pj_v_1; 
01041           vector4double    v_0, v_1;
01042           register BigReal r2_0, r2_1;
01043           
01044           pj_v_0 = vec_ld(jprev0 * sizeof(CompAtom), (BigReal *)p_1);
01045           pj_v_1 = vec_ld(jprev1 * sizeof(CompAtom), (BigReal *)p_1);  
01046           
01047           g += 2;
01048           for ( ; g < gu - 2; g +=2 ) {
01049             // compute 1d distance, 2-way parallel       
01050             j0     =  jprev0;
01051             j1     =  jprev1;
01052 
01053 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01054             sortEntry0 = sortValues + g;
01055             sortEntry1 = sortValues + g + 1;
01056             jprev0 = sortEntry0->index;
01057             jprev1 = sortEntry1->index;
01058             #else
01059             jprev0     =  glist[g  ];
01060             jprev1     =  glist[g+1];
01061             #endif
01062 
01063             v_0 = vec_sub (p_i_v, pj_v_0);
01064             v_1 = vec_sub (p_i_v, pj_v_1);
01065             v_0 = vec_mul (v_0, v_0);
01066             v_1 = vec_mul (v_1, v_1);
01067 
01068             r2_0 = vec_extract(v_0, 0) + vec_extract(v_0, 1) + vec_extract(v_0, 2);
01069             r2_1 = vec_extract(v_1, 0) + vec_extract(v_1, 1) + vec_extract(v_1, 2);
01070             
01071             pj_v_0 = vec_ld(jprev0 * sizeof(CompAtom), (BigReal *)p_1);
01072             pj_v_1 = vec_ld(jprev1 * sizeof(CompAtom), (BigReal *)p_1);  
01073             
01074             size_t test0 = ( groupplcutoff2 >  r2_0 );
01075             size_t test1 = ( groupplcutoff2 >  r2_1 ); 
01076             
01077             //removing ifs benefits on many architectures
01078             //as the extra stores will only warm the cache up
01079             goodglist [ hu         ] = j0;
01080             goodglist [ hu + test0 ] = j1;
01081             
01082             hu += test0 + test1;
01083           }
01084           g-=2;
01085         }
01086 #else
01087         if ( gu - g  >  6 ) { 
01088 
01089           #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01090             register SortEntry* sortEntry0 = sortValues + g;
01091             register SortEntry* sortEntry1 = sortValues + g + 1;
01092             register int jprev0 = sortEntry0->index;
01093             register int jprev1 = sortEntry1->index;
01094           #else
01095             register int jprev0 = glist[g    ];
01096             register int jprev1 = glist[g + 1];
01097           #endif
01098           
01099           register  int j0; 
01100           register  int j1; 
01101           
01102           register  BigReal pj_x_0, pj_x_1; 
01103           register  BigReal pj_y_0, pj_y_1; 
01104           register  BigReal pj_z_0, pj_z_1; 
01105           register  BigReal t_0, t_1, r2_0, r2_1;
01106           
01107           pj_x_0 = p_1[jprev0].position.x;
01108           pj_x_1 = p_1[jprev1].position.x;  
01109           
01110           pj_y_0 = p_1[jprev0].position.y; 
01111           pj_y_1 = p_1[jprev1].position.y;  
01112           
01113           pj_z_0 = p_1[jprev0].position.z; 
01114           pj_z_1 = p_1[jprev1].position.z;
01115           
01116           g += 2;
01117           for ( ; g < gu - 2; g +=2 ) {
01118             // compute 1d distance, 2-way parallel       
01119             j0     =  jprev0;
01120             j1     =  jprev1;
01121             
01122             t_0    =  p_i_x - pj_x_0;
01123             t_1    =  p_i_x - pj_x_1;
01124             r2_0   =  t_0 * t_0;
01125             r2_1   =  t_1 * t_1;
01126             
01127             t_0    =  p_i_y - pj_y_0;
01128             t_1    =  p_i_y - pj_y_1;
01129             r2_0  +=  t_0 * t_0;
01130             r2_1  +=  t_1 * t_1;
01131             
01132             t_0    =  p_i_z - pj_z_0;
01133             t_1    =  p_i_z - pj_z_1;
01134             r2_0  +=  t_0 * t_0;
01135             r2_1  +=  t_1 * t_1;
01136             
01137             #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01138               sortEntry0 = sortValues + g;
01139               sortEntry1 = sortValues + g + 1;
01140               jprev0 = sortEntry0->index;
01141               jprev1 = sortEntry1->index;
01142             #else
01143               jprev0     =  glist[g  ];
01144               jprev1     =  glist[g+1];
01145             #endif
01146             
01147             pj_x_0     =  p_1[jprev0].position.x;
01148             pj_x_1     =  p_1[jprev1].position.x;
01149             pj_y_0     =  p_1[jprev0].position.y; 
01150             pj_y_1     =  p_1[jprev1].position.y;
01151             pj_z_0     =  p_1[jprev0].position.z; 
01152             pj_z_1     =  p_1[jprev1].position.z;
01153             
01154             bool test0 = ( r2_0 < groupplcutoff2 );
01155             bool test1 = ( r2_1 < groupplcutoff2 ); 
01156             
01157             //removing ifs benefits on many architectures
01158             //as the extra stores will only warm the cache up
01159             goodglist [ hu         ] = j0;
01160             goodglist [ hu + test0 ] = j1;
01161             
01162             hu += test0 + test1;
01163           }
01164           g-=2;
01165         }
01166 #endif
01167         
01168         for (; g < gu; g++) {
01169 
01170           #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01171             register SortEntry* sortEntry = sortValues + g;
01172             register int j = sortEntry->index;
01173           #else
01174             int j = glist[g];
01175           #endif
01176 
01177           BigReal p_j_x = p_1[j].position.x;
01178           BigReal p_j_y = p_1[j].position.y;
01179           BigReal p_j_z = p_1[j].position.z;
01180           
01181           BigReal r2 = p_i_x - p_j_x;
01182           r2 *= r2;
01183           BigReal t2 = p_i_y - p_j_y;
01184           r2 += t2 * t2;
01185           t2 = p_i_z - p_j_z;
01186           r2 += t2 * t2;
01187 
01188           if ( r2 <= groupplcutoff2 ) 
01189             goodglist[hu ++] = j; 
01190         }
01191 
01192         for ( int h=0; h<hu; ++h ) {
01193           int j = goodglist[h];
01194           int nbgs = p_1[j].nonbondedGroupSize;
01195           pli[0] = j;   // copy over the next three in any case
01196           pli[1] = j+1;
01197           pli[2] = j+2;
01198           if ( nbgs & 4 ) {  // if nbgs > 3, assume nbgs <= 5
01199             pli[3] = j+3;
01200             pli[4] = j+4;
01201           }
01202           pli += nbgs;
01203         }
01204       }
01205     }
01206 
01207     pairlistindex = pli - pairlist;
01208     // make sure padded element on pairlist points to real data
01209     if ( pairlistindex ) {
01210        pairlist[pairlistindex] = pairlist[pairlistindex-1];
01211     } PAIR( else {  // skip empty loops if no pairs were found
01212        i += p_i.nonbondedGroupSize;
01213        continue;
01214     } )
01215   } // if i is nonbonded group parent
01216   SELF
01217     (
01218     // self-comparisions require list to be incremented
01219     // pair-comparisions use entire list (pairlistoffset is 0)
01220     else pairlistoffset++;
01221     )
01222 
01223     const int atomfixed = ( fixedAtomsOn && pExt_i.atomFixed );
01224 
01225     register plint *pli = pairlist2;
01226 
01227     plint *pairlistn = pairlists.newlist(j_upper + 5 + 5 ALCH( + 20 ));
01228     register plint *plin = pairlistn;
01229 
01230     INT(
01231     if ( pairInteractionOn ) {
01232       const int ifep_type = p_i.partition;
01233       if (pairInteractionSelf) {
01234         if (ifep_type != 1) { PAIR(++i;) continue; }
01235         for (int k=pairlistoffset; k<pairlistindex; k++) {
01236           j = pairlist[k];
01237           const int jfep_type = p_1[j].partition;
01238           // for pair-self, both atoms must be in group 1.
01239           if (jfep_type == 1) {
01240             *(pli++) = j;
01241           }
01242         }
01243       } else {
01244         if (ifep_type != 1 && ifep_type != 2) { PAIR(++i;) continue; }
01245         for (int k=pairlistoffset; k<pairlistindex; k++) {
01246           j = pairlist[k];
01247           const int jfep_type = p_1[j].partition;
01248           // for pair, must have one from each group.
01249           if (ifep_type + jfep_type == 3) {
01250             *(pli++) = j;
01251           }
01252         }
01253       }
01254       int npair2_int = pli - pairlist2;
01255       pli = pairlist2;
01256       for (int k=0; k<npair2_int; k++) {
01257         j = pairlist2[k];
01258         BigReal p_j_x = p_1[j].position.x;
01259         BigReal r2 = p_i_x - p_j_x;
01260         r2 *= r2;
01261         BigReal p_j_y = p_1[j].position.y;
01262         BigReal t2 = p_i_y - p_j_y;
01263         r2 += t2 * t2;
01264         BigReal p_j_z = p_1[j].position.z;
01265         t2 = p_i_z - p_j_z;
01266         r2 += t2 * t2;
01267         if ( ( ! (atomfixed && pExt_1[j].atomFixed) ) && (r2 <= plcutoff2) ) {
01268           int atom2 = pExt_1[j].id;
01269           if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
01270           else *(plin++) = j;
01271         }
01272       }
01273     } else
01274     )
01275     if ( atomfixed ) {
01276       for (int k=pairlistoffset; k<pairlistindex; k++) {
01277         j = pairlist[k];
01278         BigReal p_j_x = p_1[j].position.x;
01279         BigReal r2 = p_i_x - p_j_x;
01280         r2 *= r2;
01281         BigReal p_j_y = p_1[j].position.y;
01282         BigReal t2 = p_i_y - p_j_y;
01283         r2 += t2 * t2;
01284         BigReal p_j_z = p_1[j].position.z;
01285         t2 = p_i_z - p_j_z;
01286         r2 += t2 * t2;
01287         if ( (! pExt_1[j].atomFixed) && (r2 <= plcutoff2) ) {
01288           int atom2 = pExt_1[j].id;
01289           if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
01290           else *(plin++) = j;
01291         }
01292       }
01293     } else {
01294       int k = pairlistoffset;
01295       int ku = pairlistindex;
01296       if ( k < ku ) {
01297 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
01298         if ( ku - k  >  6 ) {      
01299           register  int jprev0 = pairlist [k    ];
01300           register  int jprev1 = pairlist [k + 1];
01301           
01302           register  int j0; 
01303           register  int j1; 
01304 
01305           __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01306           __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01307           __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01308 
01309           // these don't change here, so we could move them into outer scope
01310           const __m128d P_I_X = _mm_set1_pd(p_i_x);
01311           const __m128d P_I_Y = _mm_set1_pd(p_i_y);
01312           const __m128d P_I_Z = _mm_set1_pd(p_i_z);
01313           
01314           int atom2_0 = pExt_1[jprev0].id;
01315           int atom2_1 = pExt_1[jprev1].id;
01316           
01317           k += 2;
01318           for ( ; k < ku - 2; k +=2 ) {
01319             // compute 1d distance, 2-way parallel       
01320             j0     =  jprev0;
01321             j1     =  jprev1;
01322             
01323             __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
01324             __m128d R2_01 = _mm_mul_pd(T_01, T_01);
01325             T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
01326             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01327             T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
01328             R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01329             
01330             jprev0     =  pairlist[k];
01331             jprev1     =  pairlist[k+1];
01332             
01333             PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01334             PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01335             PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01336 
01337             __align(16) double r2_01[2];
01338             _mm_store_pd(r2_01, R2_01); // 16-byte-aligned store
01339             
01340             if (r2_01[0] <= plcutoff2) {
01341               if ( atom2_0 >= excl_min && atom2_0 <= excl_max ) 
01342                 *(pli++) = j0;
01343               else 
01344                 *(plin++) = j0;
01345             }
01346             atom2_0 = pExt_1[jprev0].id;
01347             
01348             if (r2_01[1] <= plcutoff2) {
01349               if ( atom2_1 >= excl_min && atom2_1 <= excl_max ) 
01350                 *(pli++) = j1;
01351               else 
01352                 *(plin++) = j1;
01353             }
01354             atom2_1 = pExt_1[jprev1].id;            
01355           }
01356           k-=2;
01357         }       
01358 #elif defined(A2_QPX)
01359         if ( ku - k  >  6 ) {      
01360           register  int jprev0 = pairlist [k];
01361           register  int jprev1 = pairlist [k + 1];
01362           
01363           register  int j0; 
01364           register  int j1; 
01365           vector4double    pj_v_0, pj_v_1; 
01366           vector4double    v_0, v_1;
01367           BigReal          r2_0, r2_1;
01368 
01369           pj_v_0 = vec_ld(jprev0 * sizeof(CompAtom), (BigReal *)p_1);
01370           pj_v_1 = vec_ld(jprev1 * sizeof(CompAtom), (BigReal *)p_1);  
01371           
01372           int atom2_0 = pExt_1[jprev0].id;
01373           int atom2_1 = pExt_1[jprev1].id;
01374           
01375           k += 2;
01376           for ( ; k < ku - 2; k +=2 ) {
01377             // compute 1d distance, 2-way parallel       
01378             j0     =  jprev0;
01379             j1     =  jprev1;
01380           
01381             v_0 = vec_sub (p_i_v, pj_v_0);
01382             v_1 = vec_sub (p_i_v, pj_v_1);
01383             v_0 = vec_mul (v_0, v_0);
01384             v_1 = vec_mul (v_1, v_1);
01385 
01386             r2_0 = vec_extract(v_0, 0) + vec_extract(v_0, 1) + vec_extract(v_0, 2);
01387             r2_1 = vec_extract(v_1, 0) + vec_extract(v_1, 1) + vec_extract(v_1, 2);
01388 
01389             jprev0     =  pairlist[k];
01390             jprev1     =  pairlist[k+1];
01391             
01392             pj_v_0 = vec_ld(jprev0 * sizeof(CompAtom), (BigReal *)p_1);
01393             pj_v_1 = vec_ld(jprev1 * sizeof(CompAtom), (BigReal *)p_1);  
01394                     
01395             if (r2_0 <= plcutoff2) {
01396               if ( atom2_0 >= excl_min && atom2_0 <= excl_max ) 
01397                 *(pli++) = j0;
01398               else 
01399                 *(plin++) = j0;
01400             }
01401             atom2_0 = pExt_1[jprev0].id;
01402             
01403             if (r2_1 <= plcutoff2) {
01404               if ( atom2_1 >= excl_min && atom2_1 <= excl_max ) 
01405                 *(pli++) = j1;
01406               else 
01407                 *(plin++) = j1;
01408             }
01409             atom2_1 = pExt_1[jprev1].id;            
01410           }
01411           k-=2;
01412         }      
01413 #else
01414         if ( ku - k  >  6 ) {      
01415           register  int jprev0 = pairlist [k];
01416           register  int jprev1 = pairlist [k + 1];
01417           
01418           register  int j0; 
01419           register  int j1; 
01420           
01421           register  BigReal pj_x_0, pj_x_1; 
01422           register  BigReal pj_y_0, pj_y_1; 
01423           register  BigReal pj_z_0, pj_z_1; 
01424           register  BigReal t_0, t_1, r2_0, r2_1;
01425           
01426           pj_x_0 = p_1[jprev0].position.x;
01427           pj_x_1 = p_1[jprev1].position.x;  
01428           
01429           pj_y_0 = p_1[jprev0].position.y; 
01430           pj_y_1 = p_1[jprev1].position.y;  
01431           
01432           pj_z_0 = p_1[jprev0].position.z; 
01433           pj_z_1 = p_1[jprev1].position.z;
01434           
01435           int atom2_0 = pExt_1[jprev0].id;
01436           int atom2_1 = pExt_1[jprev1].id;
01437           
01438           k += 2;
01439           for ( ; k < ku - 2; k +=2 ) {
01440             // compute 1d distance, 2-way parallel       
01441             j0     =  jprev0;
01442             j1     =  jprev1;
01443             
01444             t_0    =  p_i_x - pj_x_0;
01445             t_1    =  p_i_x - pj_x_1;
01446             r2_0   =  t_0 * t_0;
01447             r2_1   =  t_1 * t_1;
01448             
01449             t_0    =  p_i_y - pj_y_0;
01450             t_1    =  p_i_y - pj_y_1;
01451             r2_0  +=  t_0 * t_0;
01452             r2_1  +=  t_1 * t_1;
01453             
01454             t_0    =  p_i_z - pj_z_0;
01455             t_1    =  p_i_z - pj_z_1;
01456             r2_0  +=  t_0 * t_0;
01457             r2_1  +=  t_1 * t_1;
01458             
01459             jprev0     =  pairlist[k];
01460             jprev1     =  pairlist[k+1];
01461             
01462             pj_x_0     =  p_1[jprev0].position.x;
01463             pj_x_1     =  p_1[jprev1].position.x;
01464             pj_y_0     =  p_1[jprev0].position.y; 
01465             pj_y_1     =  p_1[jprev1].position.y;
01466             pj_z_0     =  p_1[jprev0].position.z; 
01467             pj_z_1     =  p_1[jprev1].position.z;
01468             
01469             if (r2_0 <= plcutoff2) {
01470               if ( atom2_0 >= excl_min && atom2_0 <= excl_max ) 
01471                 *(pli++) = j0;
01472               else 
01473                 *(plin++) = j0;
01474             }
01475             atom2_0 = pExt_1[jprev0].id;
01476             
01477             if (r2_1 <= plcutoff2) {
01478               if ( atom2_1 >= excl_min && atom2_1 <= excl_max ) 
01479                 *(pli++) = j1;
01480               else 
01481                 *(plin++) = j1;
01482              }
01483             atom2_1 = pExt_1[jprev1].id;            
01484           }
01485           k-=2;
01486         }       
01487 #endif
01488 
01489         for (; k < ku; k++) {
01490           int j = pairlist[k];
01491           int atom2 = pExt_1[j].id;
01492           
01493           BigReal p_j_x = p_1[j].position.x;
01494           BigReal p_j_y = p_1[j].position.y;
01495           BigReal p_j_z = p_1[j].position.z;
01496           
01497           BigReal r2 = p_i_x - p_j_x;
01498           r2 *= r2;
01499           BigReal t2 = p_i_y - p_j_y;
01500           r2 += t2 * t2;
01501           t2 = p_i_z - p_j_z;
01502           r2 += t2 * t2;
01503           
01504           if (r2 <= plcutoff2) {
01505             if ( atom2 >= excl_min && atom2 <= excl_max ) 
01506               *(pli++) = j;
01507             else 
01508               *(plin++) = j;
01509           }
01510         }
01511       }
01512     }
01513 
01514     PAIR(
01515     if ( plin == pairlistn && pli == pairlist2 ) {
01516       ++i;
01517       continue;
01518     }
01519     pairlists.setIndexValue(i); 
01520     )
01521 
01522     plint *plix = pairlistx;
01523     plint *plim = pairlistm;
01524     ALCH(
01525     plint *plinA1 = pairlistnA1;
01526     plint *plixA1 = pairlistxA1;
01527     plint *plimA1 = pairlistmA1;
01528     plint *plinA2 = pairlistnA2;
01529     plint *plixA2 = pairlistxA2;
01530     plint *plimA2 = pairlistmA2;
01531     )
01532 
01533     int k;
01534 
01535 #if 0 ALCH(+1)
01536     int unsortedNpairn = plin - pairlistn;
01537     plin = pairlistn;
01538     for ( k=0; k<unsortedNpairn; ++k ) {
01539       int j = pairlistn[k];
01540       switch(pswitchTable[p_i_partition + 3*(p_1[j].partition)]) {
01541         case 0:  *(plin++) = j; break;
01542         case 1:  *(plinA1++) = j; break;
01543         case 2:  *(plinA2++) = j; break;
01544       }
01545     }
01546 #endif
01547 
01548     int npair2 = pli - pairlist2;
01549     // if ( npair2 ) pairlist2[npair2] = pairlist2[npair2-1];
01550     // removed code for implicit exclusions within hydrogen groups -JCP
01551     for (k=0; k < npair2; ++k ) {
01552       int j = pairlist2[k];
01553       int atom2 = pExt_1[j].id;
01554       int excl_flag = excl_flags[atom2];
01555       ALCH(int pswitch = pswitchTable[p_i_partition + 3*(p_1[j].partition)];)
01556       switch ( excl_flag ALCH( + 3 * pswitch)) {
01557       case 0:  *(plin++) = j;  break;
01558       case 1:  *(plix++) = j;  break;
01559       case 2:  *(plim++) = j;  break;
01560       ALCH(
01561       case 3:  *(plinA1++) = j; break;
01562       case 6:  *(plinA2++) = j; break;
01563       case 5:  *(plimA1++) = j; break;
01564       case 8:  *(plimA2++) = j; break;
01565       case 4:  *(plixA1++) = j; break;
01566       case 7:  *(plixA2++) = j; break;
01567       )
01568       }
01569     }
01570 
01571     npairn = plin - pairlistn;
01572     pairlistn_save = pairlistn;
01573     pairlistn_save[npairn] = npairn ? pairlistn_save[npairn-1] : -1;
01574     pairlists.newsize(npairn + 1);
01575 
01576     npairx = plix - pairlistx;
01577     pairlistx_save = pairlists.newlist();
01578     for ( k=0; k<npairx; ++k ) {
01579       pairlistx_save[k] = pairlistx[k];
01580     }
01581     pairlistx_save[k] = k ? pairlistx_save[k-1] : -1;
01582     pairlists.newsize(npairx + 1);
01583 
01584     npairm = plim - pairlistm;
01585     pairlistm_save = pairlists.newlist();
01586     for ( k=0; k<npairm; ++k ) {
01587       pairlistm_save[k] = pairlistm[k];
01588     }
01589     pairlistm_save[k] = k ? pairlistm_save[k-1] : -1;
01590     pairlists.newsize(npairm + 1);
01591 
01592 
01593 #if 0 ALCH(+1)
01594 #define PAIRLISTFROMARRAY(NPAIRS,PL1,PL2,PLSAVE) \
01595 { \
01596   (NPAIRS) = (PL2) - (PL1); \
01597   (PLSAVE) = pairlists.newlist(); \
01598   for ( k=0; k<(NPAIRS); ++k ) { \
01599     (PLSAVE)[k] = (PL1)[k]; \
01600   } \
01601   (PLSAVE)[k] = k ? (PLSAVE)[k-1] : -1; \
01602   pairlists.newsize((NPAIRS) + 1); \
01603 }
01604   
01605     PAIRLISTFROMARRAY(npairnA1,pairlistnA1,plinA1,pairlistnA1_save);
01606     PAIRLISTFROMARRAY(npairxA1,pairlistxA1,plixA1,pairlistxA1_save);
01607     PAIRLISTFROMARRAY(npairmA1,pairlistmA1,plimA1,pairlistmA1_save);
01608     PAIRLISTFROMARRAY(npairnA2,pairlistnA2,plinA2,pairlistnA2_save);
01609     PAIRLISTFROMARRAY(npairxA2,pairlistxA2,plixA2,pairlistxA2_save);
01610     PAIRLISTFROMARRAY(npairmA2,pairlistmA2,plimA2,pairlistmA2_save);
01611 #undef PAIRLISTFROMARRAY
01612 
01613 #endif
01614     
01615     if ( ! savePairlists ) pairlists.reset();  // limit space usage
01616     PAIR( pairlists.addIndex(); )
01617     
01618         // PAIR( iout << i << " " << i_upper << " save\n" << endi;)
01619   } else { // if ( savePairlists || ! usePairlists )
01620         // PAIR( iout << i << " " << i_upper << " use\n" << endi;)
01621 
01622     pairlists.nextlist(&pairlistn_save,&npairn);  --npairn;
01623     pairlists.nextlist(&pairlistx_save,&npairx);  --npairx;
01624     pairlists.nextlist(&pairlistm_save,&npairm);  --npairm;
01625     ALCH(
01626     pairlists.nextlist(&pairlistnA1_save,&npairnA1);  --npairnA1;
01627     pairlists.nextlist(&pairlistxA1_save,&npairxA1);  --npairxA1;
01628     pairlists.nextlist(&pairlistmA1_save,&npairmA1);  --npairmA1;
01629     pairlists.nextlist(&pairlistnA2_save,&npairnA2);  --npairnA2;
01630     pairlists.nextlist(&pairlistxA2_save,&npairxA2);  --npairxA2;
01631     pairlists.nextlist(&pairlistmA2_save,&npairmA2);  --npairmA2;
01632     )
01633 
01634   } // if ( savePairlists || ! usePairlists )
01635 
01636     LES( BigReal *lambda_table_i =
01637                         lambda_table + (lesFactor+1) * p_i.partition; )
01638 
01639     INT(
01640     const BigReal force_sign = (
01641       ( pairInteractionOn && ! pairInteractionSelf ) ?
01642         ( ( p_i.partition == 1 ) ? 1. : -1. ) : 0.
01643     );
01644       
01645     )
01646 
01647     const BigReal kq_i = COULOMB * p_i.charge * scaling * dielectric_1;
01648     const LJTable::TableEntry * const lj_row =
01649                 ljTable->table_row(p_i.vdwType);
01650 
01651 #ifdef A2_QPX
01652     vector4double kq_iv = (vector4double)(0.0);
01653     vector4double f_i_v = (vector4double)(0.0);
01654     vector4double fullf_i_v = (vector4double)(0.0);
01655     vector4double full_cnst = (vector4double)(0.);
01656 #define f_i_x       vec_extract(f_i_v, 0)
01657 #define f_i_y       vec_extract(f_i_v, 1)
01658 #define f_i_z       vec_extract(f_i_v, 2)
01659 #define fullf_i_x   vec_extract(fullf_i_v, 0)
01660 #define fullf_i_y   vec_extract(fullf_i_v, 1)
01661 #define fullf_i_z   vec_extract(fullf_i_v, 2)
01662 #else
01663     SHORT( FAST( BigReal f_i_x = 0.; ) )
01664     SHORT( FAST( BigReal f_i_y = 0.; ) )
01665     SHORT( FAST( BigReal f_i_z = 0.; ) )
01666     FULL( BigReal fullf_i_x = 0.; )
01667     FULL( BigReal fullf_i_y = 0.; )
01668     FULL( BigReal fullf_i_z = 0.; )
01669 #endif
01670 
01671     int npairi;
01672     int k;
01673 
01674     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01675         p_i_x, p_i_y, p_i_z, p_1, pairlistn_save, npairn, pairlisti,
01676         r2_delta, r2list);
01677 
01678 // BEGIN NBTHOLE OF DRUDE MODEL
01679 #if (FAST(1+)0)
01680     if (drudeNbthole && p_i.hydrogenGroupSize > 1) {
01681   
01682       Parameters *parameters = Node::Object()->parameters;
01683       const NbtholePairValue * const nbthole_array = parameters->nbthole_array;
01684       const int NumNbtholePairParams = parameters->NumNbtholePairParams;
01685       BigReal drudeNbtholeCut = simParams -> drudeNbtholeCut;
01686       BigReal drudeNbtholeCut2 = (drudeNbtholeCut * drudeNbtholeCut) + r2_delta;
01687       BigReal CC = COULOMB * scaling * dielectric_1;
01688       int kk;
01689 
01690       for (k = 0; k < npairi; k++) {
01691             if (r2list[k] > drudeNbtholeCut2) { continue; }
01692   
01693             const int j = pairlisti[k];
01694             const CompAtom& p_j = p_1[j];
01695 
01696             if ( p_j.hydrogenGroupSize < 2 ) { continue; }
01697   
01698         for (kk = 0;kk < NumNbtholePairParams; kk++) {
01699 
01700                if (((nbthole_array[kk].ind1 == p_i.vdwType) &&
01701                   (nbthole_array[kk].ind2 == p_j.vdwType)) ||
01702                   ((nbthole_array[kk].ind2 == p_i.vdwType) &&
01703                    (nbthole_array[kk].ind1 == p_j.vdwType))) {
01704                  break;
01705                }
01706         }
01707         if ( kk < NumNbtholePairParams ) {
01708     
01709                 BigReal aprod = mol->GetAtomAlpha(i) * mol->GetAtomAlpha(j);
01710                 const BigReal aa = nbthole_array[kk].tholeij * powf(aprod, -1.f/6);
01711                 const BigReal qqaa = CC * p_0[i].charge * p_1[j].charge;
01712                 const BigReal qqad = CC * p_0[i].charge * p_1[j+1].charge;
01713                 const BigReal qqda = CC * p_0[i+1].charge * p_1[j].charge;
01714                 const BigReal qqdd = CC * p_0[i+1].charge * p_1[j+1].charge;
01715 
01716                 Vector raa = (p_0[i].position + params->offset) - p_1[j].position;
01717                 Vector rad = (p_0[i].position + params->offset) - p_1[j+1].position;
01718                 Vector rda = (p_0[i+1].position + params->offset) - p_1[j].position;
01719                 Vector rdd = (p_0[i+1].position + params->offset) - p_1[j+1].position;
01720 
01721                 BigReal raa_invlen = raa.rlength();  // reciprocal of length
01722                 BigReal rad_invlen = rad.rlength();
01723                 BigReal rda_invlen = rda.rlength();
01724                 BigReal rdd_invlen = rdd.rlength();
01725 
01726                 BigReal auaa = aa / raa_invlen;
01727                 BigReal auad = aa / rad_invlen;
01728                 BigReal auda = aa / rda_invlen;
01729                 BigReal audd = aa / rdd_invlen;
01730 
01731                 BigReal expauaa = exp(-auaa);
01732                 BigReal expauad = exp(-auad);
01733                 BigReal expauda = exp(-auda);
01734                 BigReal expaudd = exp(-audd);
01735 
01736                 BigReal polyauaa = 1 + 0.5*auaa;
01737                 BigReal polyauad = 1 + 0.5*auad;
01738                 BigReal polyauda = 1 + 0.5*auda;
01739                 BigReal polyaudd = 1 + 0.5*audd;
01740 
01741                 BigReal ethole = 0;
01742                 ethole += qqaa * raa_invlen * (- polyauaa * expauaa);
01743                 ethole += qqad * rad_invlen * (- polyauad * expauad);
01744                 ethole += qqda * rda_invlen * (- polyauda * expauda);
01745                 ethole += qqdd * rdd_invlen * (- polyaudd * expaudd);
01746 
01747                 polyauaa = 1 + auaa*polyauaa;
01748                 polyauad = 1 + auad*polyauad;
01749                 polyauda = 1 + auda*polyauda;
01750                 polyaudd = 1 + audd*polyaudd;
01751 
01752                 BigReal raa_invlen3 = raa_invlen * raa_invlen * raa_invlen;
01753                 BigReal rad_invlen3 = rad_invlen * rad_invlen * rad_invlen;
01754                 BigReal rda_invlen3 = rda_invlen * rda_invlen * rda_invlen;
01755                 BigReal rdd_invlen3 = rdd_invlen * rdd_invlen * rdd_invlen;
01756 
01757                 BigReal dfaa = qqaa * raa_invlen3 * (polyauaa*expauaa);
01758                 BigReal dfad = qqad * rad_invlen3 * (polyauad*expauad);
01759                 BigReal dfda = qqda * rda_invlen3 * (polyauda*expauda);
01760                 BigReal dfdd = qqdd * rdd_invlen3 * (polyaudd*expaudd);
01761 
01762                 Vector faa = -dfaa * raa;
01763                 Vector fad = -dfad * rad;
01764                 Vector fda = -dfda * rda;
01765                 Vector fdd = -dfdd * rdd;
01766 
01767                 params->ff[0][i] += faa + fad;
01768                 params->ff[0][i+1] += fda + fdd;
01769                 params->ff[1][j] -= faa + fda;
01770                 params->ff[1][j+1] -= fad + fdd;
01771 
01772                 reduction[virialIndex_XX] += faa.x * raa.x + fad.x * rad.x
01773                 + fda.x * rda.x + fdd.x * rdd.x;
01774                 reduction[virialIndex_XY] += faa.x * raa.y + fad.x * rad.y
01775                 + fda.x * rda.y + fdd.x * rdd.y;
01776                 reduction[virialIndex_XZ] += faa.x * raa.z + fad.x * rad.z
01777                 + fda.x * rda.z + fdd.x * rdd.z;
01778                 reduction[virialIndex_YX] += faa.y * raa.x + fad.y * rad.x
01779                 + fda.y * rda.x + fdd.y * rdd.x;
01780                 reduction[virialIndex_YY] += faa.y * raa.y + fad.y * rad.y
01781                 + fda.y * rda.y + fdd.y * rdd.y;
01782                 reduction[virialIndex_YZ] += faa.y * raa.z + fad.y * rad.z
01783                 + fda.y * rda.z + fdd.y * rdd.z;
01784                 reduction[virialIndex_ZX] += faa.z * raa.x + fad.z * rad.x
01785                 + fda.z * rda.x + fdd.z * rdd.x;
01786                 reduction[virialIndex_ZY] += faa.z * raa.y + fad.z * rad.y
01787                 + fda.z * rda.y + fdd.z * rdd.y;
01788                 reduction[virialIndex_ZZ] += faa.z * raa.z + fad.z * rad.z
01789                 + fda.z * rda.z + fdd.z * rdd.z;
01790                 reduction[electEnergyIndex] += ethole;
01791           }
01792        }
01793     }
01794 #endif
01795 // END NBTHOLE OF DRUDE MODEL
01796     
01797     // BEGIN LA
01798 #if (FAST(1+)0)
01799     if (doLoweAndersen && p_i.hydrogenGroupSize) {
01800         BigReal loweAndersenCutoff = simParams->loweAndersenCutoff;
01801         BigReal loweAndersenCutoff2 = (loweAndersenCutoff * loweAndersenCutoff) + r2_delta;
01802         BigReal loweAndersenProb = simParams->loweAndersenRate * (simParams->dt * simParams->nonbondedFrequency) * 0.001; // loweAndersenRate is in 1/ps
01803         const bool loweAndersenUseCOMvelocity = (simParams->rigidBonds != RIGID_NONE);
01804         const BigReal kbT = BOLTZMANN * (simParams->loweAndersenTemp);
01805         const BigReal dt_inv = TIMEFACTOR / (simParams->dt * simParams->nonbondedFrequency);
01806         //const BigReal dt_inv = 1.0 / simParams->dt;
01807         //BigReal kbT = 8.3145e-7 * (simParams->loweAndersenTemp); // in A^2/fs^2 * K
01808         
01809         const CompAtom* v_0 = params->v[0];
01810         const CompAtom* v_1 = params->v[1];
01811         const CompAtom& v_i = v_0[i];
01812         Mass mass_i = v_i.charge;
01813         Velocity vel_i = v_i.position;
01814         Position pos_i = p_i.position;
01815         int atom_i = pExt_0[i].id;
01816         
01817         if (loweAndersenUseCOMvelocity) {
01818             Mass mass = 0;
01819             Velocity vel = 0;
01820             Position pos = 0;
01821             for (int l = 0; l < p_i.hydrogenGroupSize; l++) {
01822                 vel += v_0[i+l].charge * v_0[i+l].position;
01823                 pos += v_0[i+l].charge * p_0[i+l].position;
01824                 mass += v_0[i+l].charge;
01825             }
01826             vel_i = vel / mass;
01827             pos_i = pos / mass;
01828             mass_i = mass;
01829         }
01830         
01831         // Note v[0].charge is actually mass
01832         //Random rand(CkMyPe()); // ?? OK ?? NO!!!!
01833         Random *rand = Node::Object()->rand;
01834         for (k = 0; k < npairi; k++) {
01835             if (r2list[k] > loweAndersenCutoff2) { continue; }
01836                 
01837             const int j = pairlisti[k];
01838             const CompAtom& v_j = v_1[j];
01839             const CompAtom& p_j = p_1[j];
01840                 
01841             if (!p_j.hydrogenGroupSize) { continue; }
01842             if (rand->uniform() > loweAndersenProb) { continue; }
01843                 
01844             Mass mass_j = v_j.charge;
01845             Velocity vel_j = v_j.position;
01846             Position pos_j = p_j.position;
01847             int atom_j = pExt_1[j].id;
01848                 
01849             if (loweAndersenUseCOMvelocity) {
01850                 Mass mass = 0;
01851                 Velocity vel = 0;
01852                 Position pos = 0;
01853                 for (int l = 0; l < p_j.hydrogenGroupSize; l++) {
01854                     vel += v_1[j+l].charge * v_1[j+l].position;
01855                     pos += v_1[j+l].charge * p_1[j+l].position;
01856                     mass += v_1[j+l].charge;
01857                 }
01858                 vel_j = vel / mass;
01859                 pos_j = pos / mass;
01860                 mass_j = mass;
01861             }
01862                 
01863             //Velocity deltaV = v_i.position - v_j.position;
01864             Velocity deltaV = vel_i - vel_j;
01865             Mass mu_ij = (mass_i * mass_j)/(mass_i + mass_j);
01866             //Vector sep = (p_i.position + params->offset) - p_j.position;
01867             Vector sep = (pos_i + params->offset) - pos_j;
01868             Vector sigma_ij = sep.unit();
01869             BigReal lambda = rand->gaussian() * sqrt(kbT / mu_ij);
01870             Force force = mu_ij * dt_inv * (lambda - (deltaV * sigma_ij)) * sigma_ij;
01871                 
01872             //DebugM(1, "atom1 atom2 = " << atom1 << " " << atom2 << " lambda = " << lambda << " force = " << force << " deltaP = " << sep.length() << " sqrt(r2) = " << sqrt(r2list[k]) << "\n");
01873             //DebugM(1, "atom1 atom2 = " << atom_i << " " << atom_j << " mass1 mass2 = " << mass_i << " " << mass_j << " hgrp1 hgrp2 " << p_i.hydrogenGroupSize << " " << p_j.hydrogenGroupSize << "\n");
01874                 
01875             if (loweAndersenUseCOMvelocity) {
01876                 BigReal inv_mass_i = 1.0 / mass_i;
01877                 BigReal inv_mass_j = 1.0 / mass_j;
01878                 for (int l = 0; l < p_i.hydrogenGroupSize; l++) {
01879                     params->ff[0][i+l] += (v_0[i+l].charge * inv_mass_i) * force;
01880                 }
01881                 for (int l = 0; l < p_j.hydrogenGroupSize; l++) {
01882                     params->ff[1][j+l] -= (v_1[j+l].charge * inv_mass_j) * force;
01883                 }
01884             } else {
01885                 params->ff[0][i] += force;
01886                 params->ff[1][j] -= force;
01887             }
01888                 
01889             reduction[virialIndex_XX] += force.x * sep.x;
01890             reduction[virialIndex_XY] += force.x * sep.y;
01891             reduction[virialIndex_XZ] += force.x * sep.z;
01892             reduction[virialIndex_YX] += force.y * sep.x;
01893             reduction[virialIndex_YY] += force.y * sep.y;
01894             reduction[virialIndex_YZ] += force.y * sep.z;
01895             reduction[virialIndex_ZX] += force.z * sep.x;
01896             reduction[virialIndex_ZY] += force.z * sep.y;
01897             reduction[virialIndex_ZZ] += force.z * sep.z;
01898         }
01899     }
01900 #endif
01901     // END LA
01902 
01903 #define NORMAL(X) X
01904 #define EXCLUDED(X)
01905 #define MODIFIED(X)
01906 #include  "ComputeNonbondedBase2.h"
01907 #undef NORMAL
01908 #undef EXCLUDED
01909 #undef MODIFIED
01910 
01911     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01912         p_i_x, p_i_y, p_i_z, p_1, pairlistm_save, npairm, pairlisti,
01913         r2_delta, r2list);
01914     exclChecksum += npairi;
01915 
01916 #define NORMAL(X)
01917 #define EXCLUDED(X)
01918 #define MODIFIED(X) X
01919 #include  "ComputeNonbondedBase2.h"
01920 #undef NORMAL
01921 #undef EXCLUDED
01922 #undef MODIFIED
01923 
01924 #ifdef FULLELECT
01925     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01926         p_i_x, p_i_y, p_i_z, p_1, pairlistx_save, npairx, pairlisti,
01927         r2_delta, r2list);
01928     exclChecksum += npairi;
01929 
01930 #undef FAST
01931 #define FAST(X)
01932 #define NORMAL(X)
01933 #define EXCLUDED(X) X
01934 #define MODIFIED(X)
01935 #include  "ComputeNonbondedBase2.h"
01936 #undef FAST
01937 #ifdef SLOWONLY
01938   #define FAST(X)
01939 #else
01940   #define FAST(X) X
01941 #endif
01942 #undef NORMAL
01943 #undef EXCLUDED
01944 #undef MODIFIED
01945 #else
01946     exclChecksum += npairx;
01947 #endif
01948 
01949 #if 0 ALCH(+1)   // nonbondedbase2 for alchemical-separeted pairlists
01950 
01951     #undef ALCHPAIR
01952     #define ALCHPAIR(X) X
01953     #undef NOT_ALCHPAIR
01954     #define NOT_ALCHPAIR(X)
01955     BigReal myLambda; FEP(BigReal myLambda2;)
01956     BigReal myElecLambda;  FEP(BigReal myElecLambda2;)
01957     BigReal myVdwLambda; FEP(BigReal myVdwLambda2;)
01958     BigReal myVdwShift; FEP(BigReal myVdwShift2;)
01959     BigReal alch_vdw_energy; BigReal alch_vdw_force;
01960     FEP(BigReal alch_vdw_energy_2; BigReal alch_vdw_energy_2_Left;) TI(BigReal alch_vdw_dUdl;)
01961     BigReal shiftedElec; BigReal shiftedElecForce;
01962     
01963     /********************************************************************/
01964     /*******NONBONDEDBASE2 FOR NORMAL INTERACTIONS SCALED BY LAMBDA******/
01965     /********************************************************************/
01966     #define NORMAL(X) X
01967     #define EXCLUDED(X)
01968     #define MODIFIED(X)
01969 
01970     #define ALCH1(X) X
01971     #define ALCH2(X)
01972     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01973             p_i_x, p_i_y, p_i_z, p_1, pairlistnA1_save, npairnA1, pairlisti,
01974             r2_delta, r2list);
01975     #include  "ComputeNonbondedBase2.h" // normal, direction 'up'
01976     #undef ALCH1
01977     #undef ALCH2
01978 
01979     #define ALCH1(X)
01980     #define ALCH2(X) X
01981     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01982             p_i_x, p_i_y, p_i_z, p_1, pairlistnA2_save, npairnA2, pairlisti,
01983             r2_delta, r2list);
01984     #include  "ComputeNonbondedBase2.h" // normal, direction 'down'
01985     #undef ALCH1
01986     #undef ALCH2
01987 
01988     #undef NORMAL
01989     #undef EXCLUDED
01990     #undef MODIFIED
01991     
01992     /********************************************************************/
01993     /******NONBONDEDBASE2 FOR MODIFIED INTERACTIONS SCALED BY LAMBDA*****/
01994     /********************************************************************/
01995     #define NORMAL(X)
01996     #define EXCLUDED(X)
01997     #define MODIFIED(X) X
01998 
01999     #define ALCH1(X) X
02000     #define ALCH2(X)
02001     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
02002             p_i_x, p_i_y, p_i_z, p_1, pairlistmA1_save, npairmA1, pairlisti,
02003             r2_delta, r2list);
02004         exclChecksum += npairi;
02005     #include  "ComputeNonbondedBase2.h" // modified, direction 'up'
02006     #undef ALCH1
02007     #undef ALCH2
02008 
02009     #define ALCH1(X)
02010     #define ALCH2(X) X
02011     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
02012             p_i_x, p_i_y, p_i_z, p_1, pairlistmA2_save, npairmA2, pairlisti,
02013             r2_delta, r2list);
02014         exclChecksum += npairi;
02015     #include  "ComputeNonbondedBase2.h" // modified, direction 'down'
02016     #undef ALCH1
02017     #undef ALCH2
02018 
02019     #undef NORMAL
02020     #undef EXCLUDED
02021     #undef MODIFIED
02022     
02023     /********************************************************************/
02024     /******NONBONDEDBASE2 FOR EXCLUDED INTERACTIONS SCALED BY LAMBDA*****/
02025     /********************************************************************/
02026     #ifdef FULLELECT
02027     #undef FAST
02028     #define FAST(X)
02029     #define NORMAL(X)
02030     #define EXCLUDED(X) X
02031     #define MODIFIED(X)
02032 
02033     #define ALCH1(X) X
02034     #define ALCH2(X)
02035     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
02036             p_i_x, p_i_y, p_i_z, p_1, pairlistxA1_save, npairxA1, pairlisti,
02037             r2_delta, r2list);
02038         exclChecksum += npairi;
02039     #include  "ComputeNonbondedBase2.h"  //excluded, direction 'up'
02040     #undef ALCH1
02041     #undef ALCH2
02042 
02043     #define ALCH1(X)
02044     #define ALCH2(X) X
02045     npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
02046             p_i_x, p_i_y, p_i_z, p_1, pairlistxA2_save, npairxA2, pairlisti,
02047             r2_delta, r2list);
02048         exclChecksum += npairi;
02049     #include  "ComputeNonbondedBase2.h"  //excluded, direction 'down'
02050     #undef ALCH1
02051     #undef ALCH2
02052 
02053     #undef FAST
02054     #ifdef SLOWONLY
02055       #define FAST(X)
02056     #else
02057       #define FAST(X) X
02058     #endif
02059     #undef NORMAL
02060     #undef EXCLUDED
02061     #undef MODIFIED
02062     #else
02063         exclChecksum += npairxA1 + npairxA2;
02064     #endif
02065 
02066     #undef ALCHPAIR
02067     #define ALCHPAIR(X)
02068     #undef NOT_ALCHPAIR
02069     #define NOT_ALCHPAIR(X) X
02070 
02071 #endif // end nonbondedbase2.h includes for alchemical pairlists
02072 
02073 #ifdef NETWORK_PROGRESS
02074     CkNetworkProgress();
02075 #endif
02076 
02077 #ifdef ARCH_POWERPC
02078     //data cache block touch the position structure
02079     __dcbt ((void *) &(p_0[i+1]));
02080     //__prefetch_by_load ((void *)&(groupCount));
02081 #endif
02082 
02083 #ifndef NAMD_CUDA
02084     SHORT( FAST( f_0[i].x += f_i_x; ) )
02085     SHORT( FAST( f_0[i].y += f_i_y; ) )
02086     SHORT( FAST( f_0[i].z += f_i_z; ) )
02087     FULL( fullf_0[i].x += fullf_i_x; )
02088     FULL( fullf_0[i].y += fullf_i_y; )
02089     FULL( fullf_0[i].z += fullf_i_z; )
02090 #endif
02091 PAIR(
02092     if ( savePairlists || ! usePairlists ) {
02093       i++;
02094     } else {
02095       i = pairlists.getIndexValue();
02096     }
02097 )
02098 
02099         // PAIR( iout << i << " " << i_upper << " end\n" << endi;)
02100   } // for i
02101 
02102   // PAIR(iout << "++++++++\n" << endi;)
02103   PAIR( if ( savePairlists ) { pairlists.setIndexValue(i); } )
02104 
02105 #ifdef A2_QPX
02106     BigReal  virial_xx   =  vec_extract (virial_v0, 0);
02107     BigReal  virial_xy   =  vec_extract (virial_v0, 1);
02108     BigReal  virial_xz   =  vec_extract (virial_v0, 2);
02109     BigReal  virial_yy   =  vec_extract (virial_v1, 1);
02110     BigReal  virial_yz   =  vec_extract (virial_v1, 2);
02111     BigReal  virial_zz   =  vec_extract (virial_v2, 2);
02112 #endif
02113 #ifdef f_1
02114 #undef f_1
02115 #endif
02116 #if ( SHORT( FAST( 1+ ) ) 0 )
02117 #if ( SELF( 1+ ) 0 )
02118   {
02119     Force *patch_f_0 = params->ff[0];
02120 #ifndef NAMD_CUDA
02121 #ifndef ARCH_POWERPC 
02122 #pragma ivdep
02123 #endif
02124     for ( int i = 0; i < i_upper; ++i ) {
02125       patch_f_0[i].x += f_0[i].x;
02126       patch_f_0[i].y += f_0[i].y;
02127       patch_f_0[i].z += f_0[i].z;
02128       virial_xx += f_0[i].x * p_0[i].position.x;
02129       virial_xy += f_0[i].x * p_0[i].position.y;
02130       virial_xz += f_0[i].x * p_0[i].position.z;
02131       virial_yy += f_0[i].y * p_0[i].position.y;
02132       virial_yz += f_0[i].y * p_0[i].position.z;
02133       virial_zz += f_0[i].z * p_0[i].position.z;
02134     }
02135 #endif
02136   }
02137 #endif
02138 #endif
02139 
02140 #ifdef A2_QPX
02141     BigReal  fullElectVirial_xx  =  vec_extract(fullvirial_v0, 0);
02142     BigReal  fullElectVirial_xy  =  vec_extract(fullvirial_v0, 1);
02143     BigReal  fullElectVirial_xz  =  vec_extract(fullvirial_v0, 2);    
02144     BigReal  fullElectVirial_yy  =  vec_extract(fullvirial_v1, 1);
02145     BigReal  fullElectVirial_yz  =  vec_extract(fullvirial_v1, 2);
02146     BigReal  fullElectVirial_zz  =  vec_extract(fullvirial_v2, 2);
02147 #endif
02148 #ifdef fullf_1
02149 #undef fullf_1
02150 #endif
02151 #if ( FULL( 1+ ) 0 )
02152 #if ( SELF( 1+ ) 0 )
02153   {
02154     Force *patch_fullf_0 = params->fullf[0];
02155 #ifndef NAMD_CUDA
02156 #ifndef ARCH_POWERPC 
02157 #pragma ivdep
02158 #endif
02159     for ( int i = 0; i < i_upper; ++i ) {
02160       patch_fullf_0[i].x += fullf_0[i].x;
02161       patch_fullf_0[i].y += fullf_0[i].y;
02162       patch_fullf_0[i].z += fullf_0[i].z;
02163       fullElectVirial_xx += fullf_0[i].x * p_0[i].position.x;
02164       fullElectVirial_xy += fullf_0[i].x * p_0[i].position.y;
02165       fullElectVirial_xz += fullf_0[i].x * p_0[i].position.z;
02166       fullElectVirial_yy += fullf_0[i].y * p_0[i].position.y;
02167       fullElectVirial_yz += fullf_0[i].y * p_0[i].position.z;
02168       fullElectVirial_zz += fullf_0[i].z * p_0[i].position.z;
02169     }
02170 #endif
02171   }
02172 #endif
02173 #endif
02174 
02175 #ifndef NAMD_CUDA
02176   reduction[exclChecksumIndex] += exclChecksum;
02177 #endif
02178   FAST
02179   (
02180    // JLai
02181   ENERGY( reduction[vdwEnergyIndex] += vdwEnergy;
02182           GO( reduction[groLJEnergyIndex] += groLJEnergy;
02183               reduction[groGaussEnergyIndex] += groGaussEnergy;
02184               reduction[goNativeEnergyIndex] += goEnergyNative;
02185               reduction[goNonnativeEnergyIndex] += goEnergyNonnative; ) )
02186   SHORT
02187   (
02188   ENERGY( reduction[electEnergyIndex] += electEnergy; )
02189   )
02190   ALCH
02191   (
02192     TI(reduction[vdwEnergyIndex_ti_1] += vdwEnergy_ti_1;) 
02193     TI(reduction[vdwEnergyIndex_ti_2] += vdwEnergy_ti_2;) 
02194     FEP( reduction[vdwEnergyIndex_s] += vdwEnergy_s; )
02195     FEP( reduction[vdwEnergyIndex_s_Left] += vdwEnergy_s_Left; )
02196   SHORT
02197   (
02198     FEP( reduction[electEnergyIndex_s] += electEnergy_s; )
02199     TI(reduction[electEnergyIndex_ti_1] += electEnergy_ti_1;) 
02200     TI(reduction[electEnergyIndex_ti_2] += electEnergy_ti_2;) 
02201   )
02202   )
02203   SHORT
02204   (
02205   reduction[virialIndex_XX] += virial_xx;
02206   reduction[virialIndex_XY] += virial_xy;
02207   reduction[virialIndex_XZ] += virial_xz;
02208   reduction[virialIndex_YX] += virial_xy;
02209   reduction[virialIndex_YY] += virial_yy;
02210   reduction[virialIndex_YZ] += virial_yz;
02211   reduction[virialIndex_ZX] += virial_xz;
02212   reduction[virialIndex_ZY] += virial_yz;
02213   reduction[virialIndex_ZZ] += virial_zz;
02214   )
02215   )
02216   FULL
02217   (
02218   ENERGY( reduction[fullElectEnergyIndex] += fullElectEnergy; )
02219   ALCH
02220   (
02221     FEP( reduction[fullElectEnergyIndex_s] += fullElectEnergy_s; )
02222     TI(reduction[fullElectEnergyIndex_ti_1] += fullElectEnergy_ti_1;) 
02223     TI(reduction[fullElectEnergyIndex_ti_2] += fullElectEnergy_ti_2;) 
02224   )
02225   reduction[fullElectVirialIndex_XX] += fullElectVirial_xx;
02226   reduction[fullElectVirialIndex_XY] += fullElectVirial_xy;
02227   reduction[fullElectVirialIndex_XZ] += fullElectVirial_xz;
02228   reduction[fullElectVirialIndex_YX] += fullElectVirial_xy;
02229   reduction[fullElectVirialIndex_YY] += fullElectVirial_yy;
02230   reduction[fullElectVirialIndex_YZ] += fullElectVirial_yz;
02231   reduction[fullElectVirialIndex_ZX] += fullElectVirial_xz;
02232   reduction[fullElectVirialIndex_ZY] += fullElectVirial_yz;
02233   reduction[fullElectVirialIndex_ZZ] += fullElectVirial_zz;
02234   )
02235 
02236   delete [] excl_flags_buff;
02237 
02238 }
02239 

Generated on Tue Jun 18 04:07:45 2013 for NAMD by  doxygen 1.3.9.1