NAMD
ComputeNonbondedBase.h
Go to the documentation of this file.
1 
7 // Several special cases are defined:
8 // NBTYPE: exclusion method (NBPAIR, NBSELF -- mutually exclusive)
9 // FULLELECT full electrostatics calculation?
10 
11 // XXX Remove support for dead architectures and unused features:
12 //
13 // 1. SLOWONLY to generate _slow kernels is used only by Molly
14 // (mollified impulse method) so removal of Molly allows removal
15 // of this supporting machinery.
16 // 2. LES (locally enhanced sampling) is no longer in use;
17 // feature overlap with FEP.
18 
19 #ifdef ARCH_POWERPC
20 #include <builtins.h>
21 #endif
22 
23 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
24 #include <emmintrin.h> // We're using SSE2 intrinsics
25 #if defined(__INTEL_COMPILER)
26 #define __align(X) __declspec(align(X) )
27 #elif defined(__GNUC__) || defined(__PGI)
28 #define __align(X) __attribute__((aligned(X) ))
29 #else
30 #define __align(X) __declspec(align(X) )
31 #endif
32 #endif
33 
34 #if defined(__GNUC__)
35 #define UNLIKELY(X) __builtin_expect((X), 0)
36 #else
37 #define UNLIKELY(X) (X)
38 #endif
39 
40 #ifdef DEFINITION // (
41  #include "LJTable.h"
42  #include "Molecule.h"
43  #include "ComputeNonbondedUtil.h"
44 #endif // )
45  #include "Parameters.h"
46 #if NAMD_ComputeNonbonded_SortAtoms != 0
47  #include "PatchMap.h"
48 #endif
49 
50 // determining class name
51 #undef NAME
52 #undef CLASS
53 #undef CLASSNAME
54 #define NAME CLASSNAME(calc)
55 
56 // nonbonded interactions between a pair of patches
57 #undef PAIR
58 #if NBTYPE == NBPAIR
59  #define PAIR(X) X
60  #define CLASS ComputeNonbondedPair
61  #define CLASSNAME(X) ENERGYNAME( X ## _pair )
62 #else
63  #define PAIR(X)
64 #endif
65 
66 // nonbonded interactions within a patch
67 #undef SELF
68 #if NBTYPE == NBSELF
69  #define SELF(X) X
70  #define CLASS ComputeNonbondedSelf
71  #define CLASSNAME(X) ENERGYNAME( X ## _self )
72 #else
73  #define SELF(X)
74 #endif
75 
76 // calculate energy in addition to forces
77 #undef ENERGYNAME
78 #undef ENERGY
79 #undef NOENERGY
80 #ifdef CALCENERGY
81  #define ENERGY(X) X
82  #define NOENERGY(X)
83  #define ENERGYNAME(X) SLOWONLYNAME( X ## _energy )
84 #else
85  #define ENERGY(X)
86  #define NOENERGY(X) X
87  #define ENERGYNAME(X) SLOWONLYNAME( X )
88 #endif
89 
90 // SLOWONLY used just by Molly
91 // FAST is van der Waals
92 #undef SLOWONLYNAME
93 #undef FAST
94 #undef NOFAST
95 #ifdef SLOWONLY
96  #define FAST(X)
97  #define NOFAST(X) X
98  #define SLOWONLYNAME(X) MERGEELECTNAME( X ## _slow )
99 #else
100  #define FAST(X) X
101  #define NOFAST(X)
102  #define SLOWONLYNAME(X) MERGEELECTNAME( X )
103 #endif
104 
105 // MERGEELECT optimizes force buffer use for single timestepping
106 // SHORT is electrostatics
107 #undef MERGEELECTNAME
108 #undef SHORT
109 #undef NOSHORT
110 #ifdef MERGEELECT
111  #define SHORT(X)
112  #define NOSHORT(X) X
113  #define MERGEELECTNAME(X) FULLELECTNAME( X ## _merge )
114 #else
115  #define SHORT(X) X
116  #define NOSHORT(X)
117  #define MERGEELECTNAME(X) FULLELECTNAME( X )
118 #endif
119 
120 // FULL when on a full electrostatics (e.g. PME) step
121 #undef FULLELECTNAME
122 #undef FULL
123 #undef NOFULL
124 #ifdef FULLELECT
125  #define FULLELECTNAME(X) FULLDISPNAME( X ## _fullelect )
126  #define FULL(X) X
127  #define NOFULL(X)
128 #else
129  #define FULLELECTNAME(X) FULLDISPNAME( X )
130  #define FULL(X)
131  #define NOFULL(X) X
132 #endif
133 
134 // DISP when on a full dispersion (LJ-PME) step
135 #undef FULLDISPNAME
136 #undef DISP
137 #undef NODISP
138 #ifdef FULLDISP
139  #define FULLDISPNAME(X) TABENERGYNAME( X ## _fulldisp )
140  #define DISP(X) X
141  #define NODISP(X)
142 #else
143  #define FULLDISPNAME(X) TABENERGYNAME( X )
144  #define DISP(X)
145  #define NODISP(X) X
146 #endif
147 
148 // TABENERGY when using tabulated energy
149 #undef TABENERGYNAME
150 #undef TABENERGY
151 #undef NOTABENERGY
152 #ifdef TABENERGYFLAG
153  #define TABENERGYNAME(X) FEPNAME( X ## _tabener )
154  #define TABENERGY(X) X
155  #define NOTABENERGY(X)
156 #else
157  #define TABENERGYNAME(X) FEPNAME( X )
158  #define TABENERGY(X)
159  #define NOTABENERGY(X) X
160 #endif
161 
162 // Here are what these macros stand for:
163 // FEP/NOT_FEP: FEP free energy perturbation is active/inactive
164 // (does NOT use LAM)
165 // LES: locally-enhanced sampling is active
166 // LAM: scale the potential by a factor lambda (except FEP)
167 // INT: measure interaction energies
168 // PPROF: pressure profiling
169 
170 #undef FEPNAME
171 #undef FEP
172 #undef LES
173 #undef INT
174 #undef PPROF
175 #undef LAM
176 #undef ALCH
177 #undef TI
178 #undef GO
179 #define FEPNAME(X) LAST( X )
180 #define FEP(X)
181 #define ALCHPAIR(X)
182 #define NOT_ALCHPAIR(X) X
183 #define LES(X)
184 #define INT(X)
185 #define PPROF(X)
186 #define LAM(X)
187 #define ALCH(X)
188 #define TI(X)
189 #define GO(X)
190 #ifdef FEPFLAG
191  #undef FEPNAME
192  #undef FEP
193  #undef ALCH
194  #define FEPNAME(X) LAST( X ## _fep )
195  #define FEP(X) X
196  #define ALCH(X) X
197 #endif
198 #ifdef TIFLAG
199  #undef FEPNAME
200  #undef TI
201  #undef ALCH
202  #define FEPNAME(X) LAST( X ## _ti )
203  #define TI(X) X
204  #define ALCH(X) X
205 #endif
206 #ifdef LESFLAG
207  #undef FEPNAME
208  #undef LES
209  #undef LAM
210  #define FEPNAME(X) LAST( X ## _les )
211  #define LES(X) X
212  #define LAM(X) X
213 #endif
214 #ifdef INTFLAG
215  #undef FEPNAME
216  #undef INT
217  #define FEPNAME(X) LAST( X ## _int )
218  #define INT(X) X
219 #endif
220 #ifdef PPROFFLAG
221  #undef FEPNAME
222  #undef INT
223  #undef PPROF
224  #define FEPNAME(X) LAST( X ## _pprof )
225  #define INT(X) X
226  #define PPROF(X) X
227 #endif
228 #ifdef GOFORCES
229  #undef FEPNAME
230  #undef GO
231  #define FEPNAME(X) LAST( X ## _go )
232  #define GO(X) X
233 #endif
234 
235 #define LAST(X) X
236 
237 // see if things are really messed up
238 SELF( PAIR( foo bar ) )
239 LES( FEP( foo bar ) )
240 LES( INT( foo bar ) )
241 FEP( INT( foo bar ) )
242 LAM( INT( foo bar ) )
243 FEP( NOENERGY( foo bar ) )
244 ENERGY( NOENERGY( foo bar ) )
245 TABENERGY(NOTABENERGY( foo bar ) )
246 
247 #define KNL_MAKE_DEPENDS_INCLUDE
249 #undef KNL_MAKE_DEPENDS_INCLUDE
250 
251 #undef KNL
252 #undef NOKNL
253 #ifdef NAMD_KNL
254  #if ( TABENERGY(1+) FEP(1+) TI(1+) INT(1+) LES(1+) GO(1+) PPROF(1+) NOFAST(1+) 0 )
255  #define KNL(X)
256  #define NOKNL(X) X
257  #else
258  #define KNL(X) X
259  #define NOKNL(X)
260  #endif
261 #else
262  #define KNL(X)
263  #define NOKNL(X) X
264 #endif
265 
266 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
267  #define COMPONENT_DOTPRODUCT(A,B) ((A##_x * B##_x) + (A##_y * B##_y) + (A##_z * B##_z))
268 #endif
269 
270 
271 // ************************************************************
272 // function header
274  ( nonbonded *params )
275 
276 // function body
277 {
278  // int NAME; // to track errors via unused variable warnings
279 
280  if ( ComputeNonbondedUtil::commOnly ) return;
281 
282  // speedup variables
283  BigReal *reduction = params->reduction;
285 
286  PPROF(
287  BigReal *pressureProfileReduction = params->pressureProfileReduction;
288  const BigReal invThickness = 1.0 / pressureProfileThickness;
289  )
290 
291  Pairlists &pairlists = *(params->pairlists);
292  int savePairlists = params->savePairlists;
293  int usePairlists = params->usePairlists;
294  pairlists.reset();
295  // PAIR(iout << "--------\n" << endi;)
296 
297  // BEGIN LA
298  const int doLoweAndersen = params->doLoweAndersen;
299  // END LA
300 
301  // local variables
302  int exclChecksum = 0;
303  FAST
304  (
305  // JLai
306  ENERGY( BigReal vdwEnergy = 0;
307  GO( BigReal groLJEnergy = 0;
308  BigReal groGaussEnergy = 0;
309  BigReal goEnergyNative = 0;
310  BigReal goEnergyNonnative = 0; ) )
311  SHORT
312  (
313  ENERGY( BigReal electEnergy = 0; )
314  Vector f_net = 0.;
315  )
316 
317  FEP
318  (
319  ENERGY( BigReal vdwEnergy_s = 0; )
320  SHORT
321  (
322  ENERGY( BigReal electEnergy_s = 0; )
323  )
324  )
325  )
326 
327  FULL
328  (
329  ENERGY(
330  BigReal fullElectEnergy = 0;
331  DISP( BigReal fullVdwEnergy = 0; )
332  )
333  Vector fullf_net = 0.;
334  DISP( Vector fulldispf_net = 0.; )
335  FEP
336  (
337  ENERGY( BigReal fullElectEnergy_s = 0; )
338  )
339  )
340 
341  // Bringing stuff into local namespace for speed.
342  const BigReal offset_x = params->offset.x;
343  const BigReal offset_y = params->offset.y;
344  const BigReal offset_z = params->offset.z;
345 
346  // Note that offset_f assumes positions are relative to patch centers
347  const float offset_x_f = params->offset_f.x;
348  const float offset_y_f = params->offset_f.y;
349  const float offset_z_f = params->offset_f.z;
350 
351  register const BigReal plcutoff2 = \
352  params->plcutoff * params->plcutoff;
353  register const BigReal groupplcutoff2 = \
354  params->groupplcutoff * params->groupplcutoff;
357  LJTable::TableEntry ljNull; ljNull.A = 0; ljNull.B = 0;
358  const LJTable::TableEntry* const lj_null_pars = &ljNull;
359  const Molecule* const mol = ComputeNonbondedUtil:: mol;
360  SHORT
361  (
362  const BigReal* const table_four = ComputeNonbondedUtil:: table_short;
363  )
364  FULL
365  (
366  SHORT
367  (
369  )
370  NOSHORT
371  (
372 //#if 1 ALCH(-1)
373  const BigReal* const table_four = ComputeNonbondedUtil:: table_noshort;
374 //#else // have to switch this for ALCH
375 // BigReal* table_four = ComputeNonbondedUtil:: table_noshort;
376 //#endif
377  )
378  )
380  const float scaling_f = scaling;
381  const BigReal modf_mod = 1.0 - scale14;
382  FAST
383  (
385  const float switchOn2_f = switchOn2;
389  const float c1_f = c1;
390  const float c3_f = c3;
391  )
394  // const int r2_delta_expc = 64 * (r2_delta_exp - 127);
395  const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
396 
397  DISP(
398  const BigReal alphaLJ = simParams->LJPMEEwaldCoefficient;
400  const BigReal rcut2inv = 1 / rcut2;
401  const BigReal rcut6inv = rcut2inv * rcut2inv * rcut2inv;
402  const BigReal rcut12inv = rcut6inv * rcut6inv;
403  const BigReal aRc = alphaLJ * ComputeNonbondedUtil::cutoff;
404  const BigReal aRc2 = aRc * aRc;
405  const BigReal screen_rc = (1 - (1 + aRc2 + aRc2*aRc2/2)*exp(-aRc2));
406  )
407 
408  ALCH(
409  const BigReal switchdist2 = ComputeNonbondedUtil::switchOn2;
411  //JM: switchfactor is set to inf if the switching flag is off
412  const BigReal diff = cutoff2 - switchdist2;
413  const BigReal switchfactor = 1./(diff*diff*diff);
417 
418  /* BKR - Enable dynamic lambda by passing the timestep to simParams.
419  The SimParameters object now has all knowledge of the lambda schedule
420  and all operations should ask _it_ for lambda, rather than recomputing
421  it.
422  */
423  const BigReal alchLambda = simParams->getCurrentLambda(params->step);
424 
425  /*lambda values 'up' are for atoms scaled up with lambda (partition 1)*/
426  BigReal lambdaUp = alchLambda; // needed in ComputeNonbondedBase2.h!
427  BigReal elecLambdaUp = simParams->getElecLambda(lambdaUp);
428  BigReal vdwLambdaUp = simParams->getVdwLambda(lambdaUp);
429  BigReal repLambdaUp = simParams->getRepLambda(lambdaUp);
430  BigReal vdwShiftUp = alchVdwShiftCoeff*(1 - vdwLambdaUp);
431  FEP(BigReal lambda2Up = simParams->getCurrentLambda2(params->step);)
432  FEP(BigReal elecLambda2Up = simParams->getElecLambda(lambda2Up);)
433  FEP(BigReal vdwLambda2Up = simParams->getVdwLambda(lambda2Up);)
434  FEP(BigReal repLambda2Up = simParams->getRepLambda(lambda2Up);)
435  FEP(BigReal vdwShift2Up = alchVdwShiftCoeff*(1 - vdwLambda2Up);)
436 
437  /*lambda values 'down' are for atoms scaled down with lambda (partition 2)*/
438  BigReal lambdaDown = 1 - alchLambda; // needed in ComputeNonbondedBase2.h!
439  BigReal elecLambdaDown = simParams->getElecLambda(lambdaDown);
440  BigReal vdwLambdaDown = simParams->getVdwLambda(lambdaDown);
441  BigReal repLambdaDown = simParams->getRepLambda(lambdaDown);
442  BigReal vdwShiftDown = alchVdwShiftCoeff*(1 - vdwLambdaDown);
443  FEP(BigReal lambda2Down = 1 - simParams->getCurrentLambda2(params->step);)
444  FEP(BigReal elecLambda2Down = simParams->getElecLambda(lambda2Down);)
445  FEP(BigReal vdwLambda2Down = simParams->getVdwLambda(lambda2Down);)
446  FEP(BigReal repLambda2Down = simParams->getRepLambda(lambda2Down);)
447  FEP(BigReal vdwShift2Down = alchVdwShiftCoeff*(1 - vdwLambda2Down);)
448 
449  // Thermodynamic Integration Notes:
450  // Separation of atom pairs into different pairlist according to lambda
451  // coupling, for alchemical free energy calculations. Normal pairlists
452  // (pairlist[nxm]_save) are re-used for non-lambda-coupled pairs; new ones
453  // (pairlist[nxm][12]_save are created for atoms switched up or down with
454  // lambda respectively.
455  // This makes TI coding far easier and more readable, since it's necessary
456  // to store dU/dlambda in different variables depending on whether atoms are
457  // being switched up or down. Further, it allows the prior explicit coding of
458  // the separation-shifted vdW potential to stay in place without a big
459  // performance hit, since it's no longer necessary to calculate vdW potentials
460  // explicity for the bulk (non-alchemical) interactions - so that part of the
461  // free energy code stays readable and easy to modify. Finally there should
462  // be some performance gain over the old FEP implementation as we no
463  // longer have to check the partitions of each atom pair and switch
464  // parameters accordingly for every single NonbondedBase2.h loop - this is
465  // done at the pairlist level
466 
467  // XXX pswitchTable is repeatedly calculated each time
468  // since its value is fixed, it should be calculated once up front
469  int pswitchTable[5*5];
470  // determines which pairlist alchemical pairings get sent to
471  // 0: non-alchemical pairs, partition 0 <-> partition 0
472  // 1 and 3: atoms scaled up as lambda increases, p0<->p1, p0<->p3
473  // 2 and 4: atoms scaled down as lambda increases, p0<->p2, p0<->p4
474  // all p1<->p2, p1<->p4, p3<->p2, p3<->p4 interactions to be dropped (99)
475  // in general, 'up' refers to 1 and 3, 'down' refers to 2 and 4
476  for (int ip=0; ip<5; ++ip) {
477  for (int jp=0; jp<5; ++jp ) {
478  pswitchTable[ip+5*jp] = 0;
479  if ((ip==1 && jp==0) || (ip==0 && jp==1)) pswitchTable[ip+5*jp] = 1;
480  if ((ip==2 && jp==0) || (ip==0 && jp==2)) pswitchTable[ip+5*jp] = 2;
481  if ((ip==3 && jp==0) || (ip==0 && jp==3)) pswitchTable[ip+5*jp] = 3;
482  if ((ip==4 && jp==0) || (ip==0 && jp==4)) pswitchTable[ip+5*jp] = 4;
483 
484  if (ip && jp && (abs(ip - jp) != 2)) pswitchTable[ip+5*jp] = 99; // no interaction
486  // no decoupling: interactions within an alchemical moiety are treated like
487  // normal alchemical pairs
488  if ((ip == 1 && jp == 1) || (ip == 1 && jp == 3) || (ip == 3 && jp == 1)) pswitchTable[ip+5*jp] = 1;
489  if ((ip == 2 && jp == 2) || (ip == 2 && jp == 4) || (ip == 4 && jp == 2)) pswitchTable[ip+5*jp] = 2;
490  if (ip == 3 && jp == 3) pswitchTable[ip+5*jp] = 3;
491  if (ip == 4 && jp == 4) pswitchTable[ip+5*jp] = 4;
492  }
494  // decoupling: PME calculates extra grids so that while PME
495  // interaction with the full system is switched off, a new PME grid
496  // containing only alchemical atoms is switched on. Full interactions
497  // between alchemical atoms are maintained; potentials within one
498  // partition need not be scaled here.
499  if (ip == 1 && jp == 1) pswitchTable[ip+5*jp] = 0; //Single topology only supports alchDecouple off!
500  if (ip == 2 && jp == 2) pswitchTable[ip+5*jp] = 0; //So 'alchDecouple on' only works for dual topology!
501  }
502  }
503  }
504 
505  // dU/dlambda variables for thermodynamic integration
506  TI(
507  BigReal vdwEnergy_ti_1 = 0;
508  BigReal vdwEnergy_ti_2 = 0;
509  SHORT(BigReal electEnergy_ti_1 = 0;
510  BigReal electEnergy_ti_2 = 0;)
511  FULL(BigReal fullElectEnergy_ti_1 = 0;
512  BigReal fullElectEnergy_ti_2 = 0;)
513  )
514  )
515 
516 
517  const int i_upper = params->numAtoms[0];
518  register const int j_upper = params->numAtoms[1];
519  register int j;
520  register int i;
521  const CompAtom *p_0 = params->p[0];
522  const CompAtom *p_1 = params->p[1];
523  KNL( const CompAtomFlt *pFlt_0 = params->pFlt[0]; )
524  KNL( const CompAtomFlt *pFlt_1 = params->pFlt[1]; )
525  const CompAtomExt *pExt_0 = params->pExt[0];
526  const CompAtomExt *pExt_1 = params->pExt[1];
527 
528  char * excl_flags_buff = 0;
529  const int32 * full_excl = 0;
530  const int32 * mod_excl = 0;
531 
532  plint *pairlistn_save; int npairn;
533  plint *pairlistx_save; int npairx;
534  plint *pairlistm_save; int npairm;
535 
536  ALCH(
537  // n = normal, x = excluded, m = modified
538  // A[1-4] = alchemical partition 1-4
539  plint *pairlistnA1_save; int npairnA1;
540  plint *pairlistxA1_save; int npairxA1;
541  plint *pairlistmA1_save; int npairmA1;
542  plint *pairlistnA2_save; int npairnA2;
543  plint *pairlistxA2_save; int npairxA2;
544  plint *pairlistmA2_save; int npairmA2;
545  plint *pairlistnA3_save; int npairnA3;
546  plint *pairlistxA3_save; int npairxA3;
547  plint *pairlistmA3_save; int npairmA3;
548  plint *pairlistnA4_save; int npairnA4;
549  plint *pairlistxA4_save; int npairxA4;
550  plint *pairlistmA4_save; int npairmA4;
551  )
552 
553  NBWORKARRAYSINIT(params->workArrays);
554 
555  int arraysize = j_upper+5;
556 
557  NBWORKARRAY(int,pairlisti,arraysize)
558  NBWORKARRAY(BigReal,r2list,arraysize)
559  KNL( NBWORKARRAY(float,r2list_f,arraysize) )
560  KNL( NBWORKARRAY(float,xlist,arraysize) )
561  KNL( NBWORKARRAY(float,ylist,arraysize) )
562  KNL( NBWORKARRAY(float,zlist,arraysize) )
563 
564  union { double f; int32 i[2]; } byte_order_test;
565  byte_order_test.f = 1.0; // should occupy high-order bits only
566  int32 *r2iilist = (int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
567 
568  if ( ! ( savePairlists || ! usePairlists ) ) arraysize = 0;
569 
570 
571  // DMK - Atom Sort
572  //
573  // Basic Idea: Determine the line between the center of masses of
574  // the two patches. Project and then sort the lists of atoms
575  // along this line. Then, as the pairlists are being generated
576  // for the atoms in the first atom list, use the sorted
577  // list to only add atoms from the second list that are between
578  // +/- ~cutoff from the atoms position on the line.
579  #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
580 
581  // NOTE: For the first atom list, just the sort values themselves need to be
582  // calculated (a BigReal vs. SortEntry data structure). However, a second
583  // SortEntry array is needed to perform the merge sort on the second list of
584  // atoms. Use the atomSort_[0/1]_sortValues__ arrays to sort the second
585  // list of atoms and then use the left over array to make a version of the
586  // list that only includes non-fixed groups (fixg).
587 
588  NBWORKARRAY(SortEntry, atomSort_0_sortValues__, arraysize);
589  NBWORKARRAY(SortEntry, atomSort_1_sortValues__, arraysize);
590  NBWORKARRAY(BigReal, p_0_sortValues, arraysize);
591 
592  register SortEntry* p_1_sortValues = atomSort_0_sortValues__;
593  register SortEntry* p_1_sortValues_fixg = atomSort_1_sortValues__;
594 
595  int p_0_sortValues_len = 0;
596  int p_1_sortValues_len = 0;
597  int p_1_sortValues_fixg_len = 0;
598 
599  // Calculate the distance between to projected points on the line that
600  // represents the cutoff distance.
601  BigReal atomSort_windowRadius = sqrt(groupplcutoff2);
602 
603  if (savePairlists || !usePairlists) {
604 
605  register const BigReal projLineVec_x = params->projLineVec.x;
606  register const BigReal projLineVec_y = params->projLineVec.y;
607  register const BigReal projLineVec_z = params->projLineVec.z;
608 
609  // Calculate the sort values for the atoms in patch 1
610  {
611  register int nbgs = p_1->nonbondedGroupSize;
612  register BigReal p_x = p_1->position.x;
613  register BigReal p_y = p_1->position.y;
614  register BigReal p_z = p_1->position.z;
615  register int index = 0;
616 
617  for (register int j = nbgs; j < j_upper; j += nbgs) {
618 
619  // Set p_j_next to point to the atom for the next iteration and begin
620  // loading the 'nbgs' value for that atom.
621  register const CompAtom* p_j_next = p_1 + j;
622  nbgs = p_j_next->nonbondedGroupSize;
623 
624  // Calculate the distance along the projection vector
625  // NOTE: If the vector from the origin to the point is 'A' and the vector
626  // between the patches is 'B' then to project 'A' onto 'B' we take the dot
627  // product of the two vectors 'A dot B' divided by the length of 'B'.
628  // So... projection of A onto B = (A dot B) / length(B), however, note
629  // that length(B) == 1, therefore the sort value is simply (A dot B)
630  register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
631 
632  // Start loading the next iteration's atom's position
633  p_x = p_j_next->position.x;
634  p_y = p_j_next->position.y;
635  p_z = p_j_next->position.z;
636 
637  // Store the caclulated sort value into the array of sort values
638  register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
639  p_1_sortValStorePtr->index = index;
640  p_1_sortValStorePtr->sortValue = sortVal;
641  p_1_sortValues_len++;
642 
643  // Update index for the next iteration
644  index = j;
645 
646  } // end while (j < j_upper)
647 
648  register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
649 
650  register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
651  p_1_sortValStorePtr->index = index;
652  p_1_sortValStorePtr->sortValue = sortVal;
653  p_1_sortValues_len++;
654  }
655 
656  // NOTE: This list and another version of it with only non-fixed
657  // atoms will be used in place of grouplist and fixglist.
658  #if 0 // Selection Sort
659  sortEntries_selectionSort(p_1_sortValues, p_1_sortValues_len);
660  #elif 0 // Bubble Sort
661  sortEntries_bubbleSort(p_1_sortValues, p_1_sortValues_len);
662  #else // Merge Sort
663  #if NAMD_ComputeNonbonded_SortAtoms_LessBranches == 0
664  sortEntries_mergeSort_v1(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
665  #else
666  sortEntries_mergeSort_v2(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
667  #endif
668  #endif
669 
670  // Calculate the sort values for the atoms in patch 0
671  {
672  register int nbgs = p_0->nonbondedGroupSize;
673  register BigReal p_x = p_0->position.x + offset_x;
674  register BigReal p_y = p_0->position.y + offset_y;
675  register BigReal p_z = p_0->position.z + offset_z;
676  register int index = 0;
677 
678  for (register int i = nbgs; i < i_upper; i += nbgs) {
679 
680  // Set p_i_next to point to the atom for the next iteration and begin
681  // loading the 'nbgs' value for that atom.
682  register const CompAtom* p_i_next = p_0 + i;
683  nbgs = p_i_next->nonbondedGroupSize;
684 
685  // Calculate the distance along the projection vector
686  register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
687 
688  // Start loading the next iteration's atom's position
689  p_x = p_i_next->position.x + offset_x;
690  p_y = p_i_next->position.y + offset_y;
691  p_z = p_i_next->position.z + offset_z;
692 
693  // Store the calculated sort value into the array of sort values
694  register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
695  *p_0_sortValStorePtr = sortVal;
696 
697  // Update index for the next iteration
698  index = i;
699  }
700 
701  register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
702 
703  register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
704  *p_0_sortValStorePtr = sortVal;
705 
706  p_0_sortValues_len = i_upper;
707  }
708 
709  } // end if (savePairlists || !usePairlists)
710 
711  #endif // NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
712 
713  // Atom Sort : The grouplist and fixglist arrays are not needed when the
714  // the atom sorting code is in use.
715  #if ! (NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) )
716  NBWORKARRAY(int,grouplist,arraysize);
717  NBWORKARRAY(int,fixglist,arraysize);
718  #endif
719 
720  NBWORKARRAY(int,goodglist,arraysize);
721  NBWORKARRAY(plint,pairlistx,arraysize);
722  NBWORKARRAY(plint,pairlistm,arraysize);
723  NBWORKARRAY(int,pairlist,arraysize);
724  NBWORKARRAY(int,pairlist2,arraysize);
725  ALCH(
726  NBWORKARRAY(plint,pairlistnA1,arraysize);
727  NBWORKARRAY(plint,pairlistxA1,arraysize);
728  NBWORKARRAY(plint,pairlistmA1,arraysize);
729  NBWORKARRAY(plint,pairlistnA2,arraysize);
730  NBWORKARRAY(plint,pairlistxA2,arraysize);
731  NBWORKARRAY(plint,pairlistmA2,arraysize);
732  NBWORKARRAY(plint,pairlistnA3,arraysize);
733  NBWORKARRAY(plint,pairlistxA3,arraysize);
734  NBWORKARRAY(plint,pairlistmA3,arraysize);
735  NBWORKARRAY(plint,pairlistnA4,arraysize);
736  NBWORKARRAY(plint,pairlistxA4,arraysize);
737  NBWORKARRAY(plint,pairlistmA4,arraysize);
738  )
739 
740  int fixg_upper = 0;
741  int g_upper = 0;
742 
743  if ( savePairlists || ! usePairlists ) {
744 
745  #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
746 
747  // Create a sorted list of non-fixed groups
748  register int fixg = 0;
749  for (int tmpI = 0; tmpI < p_1_sortValues_len; tmpI++) {
750  register SortEntry* p_1_sortEntry = p_1_sortValues + tmpI;
751  register int p_1_index = p_1_sortEntry->index;
752  if (!pExt_1[p_1_index].groupFixed) {
753  register SortEntry* p_1_sortEntry_fixg = p_1_sortValues_fixg + p_1_sortValues_fixg_len;
754  p_1_sortEntry_fixg->index = p_1_sortEntry->index;
755  p_1_sortEntry_fixg->sortValue = p_1_sortEntry->sortValue;
756  p_1_sortValues_fixg_len++;
757  }
758  }
759 
760  #else
761 
762  register int g = 0;
763  for ( j = 0; j < j_upper; ++j ) {
764  if ( p_1[j].nonbondedGroupSize ) {
765  grouplist[g++] = j;
766  }
767  }
768  g_upper = g;
769  if ( g_upper ) grouplist[g_upper] = grouplist[g_upper-1];
770  int fixg = 0;
771 
772  if ( fixedAtomsOn ) {
773  for ( g = 0; g < g_upper; ++g ) {
774  j = grouplist[g];
775  if ( ! pExt_1[j].groupFixed ) {
776  fixglist[fixg++] = j;
777  }
778  }
779  }
780 
781  fixg_upper = fixg;
782  if ( fixg_upper ) fixglist[fixg_upper] = fixglist[fixg_upper-1];
783 
784  #endif // NAMD_ComputeNonbonded_SortAtoms != 0
785 
786  pairlists.addIndex();
787  pairlists.setIndexValue(i_upper);
788 
789  } else { // if ( savePairlists || ! usePairlists )
790 
791  if ( pairlists.getIndexValue() != i_upper )
792  NAMD_bug("pairlist i_upper mismatch!");
793 
794  } // if ( savePairlists || ! usePairlists )
795 
796  SELF(
797  int j_hgroup = 0;
798  int g_lower = 0;
799  int fixg_lower = 0;
800  )
801  int pairlistindex=0;
802  int pairlistoffset=0;
803 
804 
805 
806 #if ( SHORT( FAST( 1+ ) ) 0 )
807  Force *f_0 = params->ff[0];
808 #if ( PAIR( 1+ ) 0 )
809  Force *f_1 = params->ff[1];
810 #else
811 #define f_1 f_0
812 #endif
813 #endif
814 #if ( FULL( 1+ ) 0 )
815  Force *fullf_0 = params->fullf[0];
816 #if ( PAIR( 1+ ) 0 )
817  Force *fullf_1 = params->fullf[1];
818 #else
819 #define fullf_1 fullf_0
820 #endif
821 #endif
822 
823 
824  int maxPart = params->numParts - 1;
825  int groupCount = params->minPart;
826  PAIR(
827  if ( savePairlists || ! usePairlists ) {
828  i = 0;
829  pairlists.addIndex();
830  } else {
831  i = pairlists.getIndexValue();
832  }
833  )
834  PAIR(for ( ; i < (i_upper);)) SELF(for ( i=0; i < (i_upper- 1);i++))
835  {
836  const CompAtom &p_i = p_0[i];
837  KNL( const CompAtomFlt &pFlt_i = pFlt_0[i]; )
838  const CompAtomExt &pExt_i = pExt_0[i];
839 
840  PAIR(if (savePairlists || ! usePairlists){)
841  if ( p_i.hydrogenGroupSize ) {
842  if ( groupCount ) { // skip this group
843  --groupCount;
844  i += p_i.hydrogenGroupSize;
845 #ifdef ARCH_POWERPC
846  __dcbt((void *) &(p_0[i]));
847 #endif
848  SELF(--i;)
849  continue;
850  } else { // compute this group
851  groupCount = maxPart;
852  }
853  }
854  PAIR(})
855 
856  register const BigReal p_i_x = p_i.position.x + offset_x;
857  register const BigReal p_i_y = p_i.position.y + offset_y;
858  register const BigReal p_i_z = p_i.position.z + offset_z;
859 #if KNL(1)+0
860  register const BigReal p_i_x_f = pFlt_i.position.x + offset_x_f;
861  register const BigReal p_i_y_f = pFlt_i.position.y + offset_y_f;
862  register const BigReal p_i_z_f = pFlt_i.position.z + offset_z_f;
863 #endif
864 
865  ALCH(const int p_i_partition = p_i.partition;)
866 
867  PPROF(
868  const int p_i_partition = p_i.partition;
869  int n1 = (int)floor((p_i.position.z-pressureProfileMin)*invThickness);
871  )
872 
873  SELF ( if ( p_i.hydrogenGroupSize ) j_hgroup = i + p_i.hydrogenGroupSize; )
874 
875  if ( savePairlists || ! usePairlists ) {
876 
877  #ifdef MEM_OPT_VERSION
878  const ExclusionCheck *exclcheck = mol->get_excl_check_for_idx(pExt_i.exclId);
879  const int excl_min = pExt_i.id + exclcheck->min;
880  const int excl_max = pExt_i.id + exclcheck->max;
881  #else
882  const ExclusionCheck *exclcheck = mol->get_excl_check_for_atom(pExt_i.id);
883  const int excl_min = exclcheck->min;
884  const int excl_max = exclcheck->max;
885  #endif
886  const char * excl_flags_var;
887  if ( exclcheck->flags ) excl_flags_var = exclcheck->flags - excl_min;
888  else { // need to build list on the fly
889 
890  //TODO: Should change later!!!!!!!!!! --Chao Mei
891  //Now just for the sake of passing compilation
892  #ifndef MEM_OPT_VERSION
893 #define REZERO_EXCL_FLAGS_BUFF if ( excl_flags_buff ) { \
894  int nl,l; \
895  nl = full_excl[0] + 1; \
896  for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = 0; \
897  nl = mod_excl[0] + 1; \
898  for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = 0; \
899  }
901  ResizeArray<char> &wa = computeNonbondedWorkArrays->excl_flags_buff;
902  if ( wa.size() < mol->numAtoms ) {
903  int oldsize = wa.size();
904  wa.resize(mol->numAtoms);
905  memset( (void*) &wa[oldsize], 0, wa.size() - oldsize);
906  }
907  excl_flags_buff = &wa[0];
908  }
909  int nl,l;
910  full_excl = mol->get_full_exclusions_for_atom(pExt_i.id);
911  nl = full_excl[0] + 1;
912  for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = EXCHCK_FULL;
913  mod_excl = mol->get_mod_exclusions_for_atom(pExt_i.id);
914  nl = mod_excl[0] + 1;
915  for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = EXCHCK_MOD;
916  excl_flags_var = excl_flags_buff;
917  #endif
918 
919  }
920  const char * const excl_flags = excl_flags_var;
921 
922  if ( p_i.nonbondedGroupSize ) {
923 
924  pairlistindex = 0; // initialize with 0 elements
925  pairlistoffset=0;
926  const int groupfixed = ( fixedAtomsOn && pExt_i.groupFixed );
927 
928  // If patch divisions are not made by hydrogen groups, then
929  // hydrogenGroupSize is set to 1 for all atoms. Thus we can
930  // carry on as if we did have groups - only less efficiently.
931  // An optimization in this case is to not rebuild the temporary
932  // pairlist but to include every atom in it. This should be a
933  // a very minor expense.
934 
935  SELF
936  (
937  if ( p_i.hydrogenGroupSize ) {
938  // exclude child hydrogens of i
939  // j_hgroup = i + p_i.hydrogenGroupSize; (moved above)
940  while ( g_lower < g_upper &&
941  grouplist[g_lower] < j_hgroup ) ++g_lower;
942  while ( fixg_lower < fixg_upper &&
943  fixglist[fixg_lower] < j_hgroup ) ++fixg_lower;
944  }
945  // add all child or sister hydrogens of i
946  for ( j = i + 1; j < j_hgroup; ++j ) {
947  pairlist[pairlistindex++] = j;
948  }
949  )
950 
951  // add remaining atoms to pairlist via hydrogen groups
952  register int *pli = pairlist + pairlistindex;
953 
954  {
955  // Atom Sort : Modify the values of g and gu based on the added information
956  // of the linear projections (sort values) information.
957  #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
958 
959  register int g = 0;
960  register BigReal p_i_sortValue = p_0_sortValues[i];
961  const BigReal p_i_sortValue_plus_windowRadius = p_i_sortValue + atomSort_windowRadius;
962  register SortEntry* sortValues = ( groupfixed ? p_1_sortValues_fixg : p_1_sortValues );
963 
964  // Find the actual gu (upper bound in sorted list for this outer-loop atom) based on the sort values
965  register int lower = 0;
966  register int upper = (groupfixed ? p_1_sortValues_fixg_len : p_1_sortValues_len);
967  while ((upper - lower) > 1) {
968  register int j = ((lower + upper) >> 1);
969  register BigReal jSortVal = sortValues[j].sortValue;
970  if (jSortVal < p_i_sortValue_plus_windowRadius) {
971  lower = j;
972  } else {
973  upper = j;
974  }
975  }
976  const int gu = (sortValues[lower].sortValue >= p_i_sortValue_plus_windowRadius) ? lower : upper;
977 
978  #else
979 
980  register int *gli = goodglist;
981  const int *glist = ( groupfixed ? fixglist : grouplist );
982  SELF( const int gl = ( groupfixed ? fixg_lower : g_lower ); )
983  const int gu = ( groupfixed ? fixg_upper : g_upper );
984  register int g = PAIR(0) SELF(gl);
985 
986  #endif
987 
988 
989  if ( g < gu ) {
990  int hu = 0;
991 #ifndef NAMD_KNL
992 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
993  if ( gu - g > 6 ) {
994 
995  #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
996  register SortEntry* sortEntry0 = sortValues + g;
997  register SortEntry* sortEntry1 = sortValues + g + 1;
998  register int jprev0 = sortEntry0->index;
999  register int jprev1 = sortEntry1->index;
1000  #else
1001  register int jprev0 = glist[g ];
1002  register int jprev1 = glist[g + 1];
1003  #endif
1004 
1005  register int j0;
1006  register int j1;
1007 
1008  __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
1009  __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
1010  __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
1011 
1012  // these don't change here, so we could move them into outer scope
1013  const __m128d P_I_X = _mm_set1_pd(p_i_x);
1014  const __m128d P_I_Y = _mm_set1_pd(p_i_y);
1015  const __m128d P_I_Z = _mm_set1_pd(p_i_z);
1016 
1017  g += 2;
1018  for ( ; g < gu - 2; g +=2 ) {
1019  // compute 1d distance, 2-way parallel
1020  j0 = jprev0;
1021  j1 = jprev1;
1022 
1023  __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
1024  __m128d R2_01 = _mm_mul_pd(T_01, T_01);
1025  T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
1026  R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
1027  T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
1028  R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
1029 
1030  #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
1031  sortEntry0 = sortValues + g;
1032  sortEntry1 = sortValues + g + 1;
1033  jprev0 = sortEntry0->index;
1034  jprev1 = sortEntry1->index;
1035  #else
1036  jprev0 = glist[g ];
1037  jprev1 = glist[g+1];
1038  #endif
1039 
1040  PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
1041  PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
1042  PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
1043 
1044  __align(16) double r2_01[2];
1045  _mm_store_pd(r2_01, R2_01); // 16-byte-aligned store
1046 
1047  // XXX these could possibly benefit from SSE-based conditionals
1048  bool test0 = ( r2_01[0] < groupplcutoff2 );
1049  bool test1 = ( r2_01[1] < groupplcutoff2 );
1050 
1051  //removing ifs benefits on many architectures
1052  //as the extra stores will only warm the cache up
1053  goodglist [ hu ] = j0;
1054  goodglist [ hu + test0 ] = j1;
1055 
1056  hu += test0 + test1;
1057  }
1058  g-=2;
1059  }
1060 #else
1061  if ( gu - g > 6 ) {
1062 
1063  #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
1064  register SortEntry* sortEntry0 = sortValues + g;
1065  register SortEntry* sortEntry1 = sortValues + g + 1;
1066  register int jprev0 = sortEntry0->index;
1067  register int jprev1 = sortEntry1->index;
1068  #else
1069  register int jprev0 = glist[g ];
1070  register int jprev1 = glist[g + 1];
1071  #endif
1072 
1073  register int j0;
1074  register int j1;
1075 
1076  register BigReal pj_x_0, pj_x_1;
1077  register BigReal pj_y_0, pj_y_1;
1078  register BigReal pj_z_0, pj_z_1;
1079  register BigReal t_0, t_1, r2_0, r2_1;
1080 
1081  pj_x_0 = p_1[jprev0].position.x;
1082  pj_x_1 = p_1[jprev1].position.x;
1083 
1084  pj_y_0 = p_1[jprev0].position.y;
1085  pj_y_1 = p_1[jprev1].position.y;
1086 
1087  pj_z_0 = p_1[jprev0].position.z;
1088  pj_z_1 = p_1[jprev1].position.z;
1089 
1090  g += 2;
1091  for ( ; g < gu - 2; g +=2 ) {
1092  // compute 1d distance, 2-way parallel
1093  j0 = jprev0;
1094  j1 = jprev1;
1095 
1096  t_0 = p_i_x - pj_x_0;
1097  t_1 = p_i_x - pj_x_1;
1098  r2_0 = t_0 * t_0;
1099  r2_1 = t_1 * t_1;
1100 
1101  t_0 = p_i_y - pj_y_0;
1102  t_1 = p_i_y - pj_y_1;
1103  r2_0 += t_0 * t_0;
1104  r2_1 += t_1 * t_1;
1105 
1106  t_0 = p_i_z - pj_z_0;
1107  t_1 = p_i_z - pj_z_1;
1108  r2_0 += t_0 * t_0;
1109  r2_1 += t_1 * t_1;
1110 
1111  #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
1112  sortEntry0 = sortValues + g;
1113  sortEntry1 = sortValues + g + 1;
1114  jprev0 = sortEntry0->index;
1115  jprev1 = sortEntry1->index;
1116  #else
1117  jprev0 = glist[g ];
1118  jprev1 = glist[g+1];
1119  #endif
1120 
1121  pj_x_0 = p_1[jprev0].position.x;
1122  pj_x_1 = p_1[jprev1].position.x;
1123  pj_y_0 = p_1[jprev0].position.y;
1124  pj_y_1 = p_1[jprev1].position.y;
1125  pj_z_0 = p_1[jprev0].position.z;
1126  pj_z_1 = p_1[jprev1].position.z;
1127 
1128  bool test0 = ( r2_0 < groupplcutoff2 );
1129  bool test1 = ( r2_1 < groupplcutoff2 );
1130 
1131  //removing ifs benefits on many architectures
1132  //as the extra stores will only warm the cache up
1133  goodglist [ hu ] = j0;
1134  goodglist [ hu + test0 ] = j1;
1135 
1136  hu += test0 + test1;
1137  }
1138  g-=2;
1139  }
1140 #endif
1141 #endif // NAMD_KNL
1142 
1143  const int gp = g;
1144 #pragma omp simd simdlen(16)
1145  for (g = gp; g < gu; g++) {
1146 
1147  #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
1148  register SortEntry* sortEntry = sortValues + g;
1149  register int j = sortEntry->index;
1150  #else
1151  int j = glist[g];
1152  #endif
1153 
1154  BigReal p_j_x = p_1[j].position.x;
1155  BigReal p_j_y = p_1[j].position.y;
1156  BigReal p_j_z = p_1[j].position.z;
1157 
1158  BigReal r2 = p_i_x - p_j_x;
1159  r2 *= r2;
1160  BigReal t2 = p_i_y - p_j_y;
1161  r2 += t2 * t2;
1162  t2 = p_i_z - p_j_z;
1163  r2 += t2 * t2;
1164 
1165 #pragma omp ordered simd monotonic(hu:1)
1166  if ( r2 <= groupplcutoff2 )
1167  goodglist[hu ++] = j;
1168  }
1169 
1170  for ( int h=0; h<hu; ++h ) {
1171  int j = goodglist[h];
1172  int nbgs = p_1[j].nonbondedGroupSize;
1173  pli[0] = j; // copy over the next three in any case
1174  pli[1] = j+1;
1175  pli[2] = j+2;
1176  if ( nbgs & 4 ) { // if nbgs > 3, assume nbgs <= 5
1177  pli[3] = j+3;
1178  pli[4] = j+4;
1179  }
1180  pli += nbgs;
1181  }
1182  }
1183  }
1184 
1185  pairlistindex = pli - pairlist;
1186  // make sure padded element on pairlist points to real data
1187  if ( pairlistindex ) {
1188  pairlist[pairlistindex] = pairlist[pairlistindex-1];
1189  } PAIR( else { // skip empty loops if no pairs were found
1190  i += p_i.nonbondedGroupSize;
1191  continue;
1192  } )
1193  } // if i is nonbonded group parent
1194  SELF
1195  (
1196  // self-comparisions require list to be incremented
1197  // pair-comparisions use entire list (pairlistoffset is 0)
1198  else pairlistoffset++;
1199  )
1200 
1201  const int atomfixed = ( fixedAtomsOn && pExt_i.atomFixed );
1202 
1203  register int *pli = pairlist2;
1204 
1205  plint *pairlistn = pairlists.newlist(j_upper + 5 + 5 ALCH( + 20 ));
1206  register plint *plin = pairlistn;
1207 
1208 
1209  if ( qmForcesOn ) {
1210 
1211  Real qmGroup_i = mol->get_qmAtomGroup(pExt_i.id);
1212 
1213  for (int k=pairlistoffset; k<pairlistindex; k++) {
1214  j = pairlist[k];
1215 
1216  Real qmGroup_j = mol->get_qmAtomGroup(pExt_1[j].id);
1217 
1218  // There can be no non-bonded interaction between an QM-QM pair of the same QM group
1219  if (qmGroup_i > 0) {
1220  if (qmGroup_i == qmGroup_j) {
1221  continue;
1222  } else {
1223  *(pli++) = j;
1224  }
1225  } else {
1226  *(pli++) = j;
1227  }
1228  }
1229 
1230  int npair2_int = pli - pairlist2;
1231  pli = pairlist2;
1232  for (int k=0; k<npair2_int; k++) {
1233 
1234  j = pairlist2[k];
1235 
1236  BigReal p_j_x = p_1[j].position.x;
1237  BigReal r2 = p_i_x - p_j_x;
1238  r2 *= r2;
1239  BigReal p_j_y = p_1[j].position.y;
1240  BigReal t2 = p_i_y - p_j_y;
1241  r2 += t2 * t2;
1242  BigReal p_j_z = p_1[j].position.z;
1243  t2 = p_i_z - p_j_z;
1244  r2 += t2 * t2;
1245 
1246  if ( ( ! (atomfixed && pExt_1[j].atomFixed) ) && (r2 <= plcutoff2) ) {
1247  int atom2 = pExt_1[j].id;
1248  if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
1249  else *(plin++) = j;
1250  }
1251  }
1252  } else
1253 
1254 
1255 
1256  INT(
1257  if ( pairInteractionOn ) {
1258  const int ifep_type = p_i.partition;
1259  if (pairInteractionSelf) {
1260  if (ifep_type != 1) { PAIR(++i;) continue; }
1261  for (int k=pairlistoffset; k<pairlistindex; k++) {
1262  j = pairlist[k];
1263  const int jfep_type = p_1[j].partition;
1264  // for pair-self, both atoms must be in group 1.
1265  if (jfep_type == 1) {
1266  *(pli++) = j;
1267  }
1268  }
1269  } else {
1270  if (ifep_type != 1 && ifep_type != 2) { PAIR(++i;) continue; }
1271  for (int k=pairlistoffset; k<pairlistindex; k++) {
1272  j = pairlist[k];
1273  const int jfep_type = p_1[j].partition;
1274  // for pair, must have one from each group.
1275  if (ifep_type + jfep_type == 3) {
1276  *(pli++) = j;
1277  }
1278  }
1279  }
1280  int npair2_int = pli - pairlist2;
1281  pli = pairlist2;
1282  for (int k=0; k<npair2_int; k++) {
1283  j = pairlist2[k];
1284  BigReal p_j_x = p_1[j].position.x;
1285  BigReal r2 = p_i_x - p_j_x;
1286  r2 *= r2;
1287  BigReal p_j_y = p_1[j].position.y;
1288  BigReal t2 = p_i_y - p_j_y;
1289  r2 += t2 * t2;
1290  BigReal p_j_z = p_1[j].position.z;
1291  t2 = p_i_z - p_j_z;
1292  r2 += t2 * t2;
1293  if ( ( ! (atomfixed && pExt_1[j].atomFixed) ) && (r2 <= plcutoff2) ) {
1294  int atom2 = pExt_1[j].id;
1295  if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
1296  else *(plin++) = j;
1297  }
1298  }
1299  } else
1300  )
1301  if ( atomfixed ) {
1302  for (int k=pairlistoffset; k<pairlistindex; k++) {
1303  j = pairlist[k];
1304  BigReal p_j_x = p_1[j].position.x;
1305  BigReal r2 = p_i_x - p_j_x;
1306  r2 *= r2;
1307  BigReal p_j_y = p_1[j].position.y;
1308  BigReal t2 = p_i_y - p_j_y;
1309  r2 += t2 * t2;
1310  BigReal p_j_z = p_1[j].position.z;
1311  t2 = p_i_z - p_j_z;
1312  r2 += t2 * t2;
1313  if ( (! pExt_1[j].atomFixed) && (r2 <= plcutoff2) ) {
1314  int atom2 = pExt_1[j].id;
1315  if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
1316  else *(plin++) = j;
1317  }
1318  }
1319  } else {
1320  int k = pairlistoffset;
1321  int ku = pairlistindex;
1322  if ( k < ku ) {
1323 #ifndef NAMD_KNL
1324 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
1325  if ( ku - k > 6 ) {
1326  register int jprev0 = pairlist [k ];
1327  register int jprev1 = pairlist [k + 1];
1328 
1329  register int j0;
1330  register int j1;
1331 
1332  __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
1333  __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
1334  __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
1335 
1336  // these don't change here, so we could move them into outer scope
1337  const __m128d P_I_X = _mm_set1_pd(p_i_x);
1338  const __m128d P_I_Y = _mm_set1_pd(p_i_y);
1339  const __m128d P_I_Z = _mm_set1_pd(p_i_z);
1340 
1341  int atom2_0 = pExt_1[jprev0].id;
1342  int atom2_1 = pExt_1[jprev1].id;
1343 
1344  k += 2;
1345  for ( ; k < ku - 2; k +=2 ) {
1346  // compute 1d distance, 2-way parallel
1347  j0 = jprev0;
1348  j1 = jprev1;
1349 
1350  __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
1351  __m128d R2_01 = _mm_mul_pd(T_01, T_01);
1352  T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
1353  R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
1354  T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
1355  R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
1356 
1357  jprev0 = pairlist[k];
1358  jprev1 = pairlist[k+1];
1359 
1360  PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
1361  PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
1362  PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
1363 
1364  __align(16) double r2_01[2];
1365  _mm_store_pd(r2_01, R2_01); // 16-byte-aligned store
1366 
1367  if (r2_01[0] <= plcutoff2) {
1368  if (UNLIKELY( atom2_0 >= excl_min && atom2_0 <= excl_max ))
1369  *(pli++) = j0;
1370  else
1371  *(plin++) = j0;
1372  }
1373  atom2_0 = pExt_1[jprev0].id;
1374 
1375  if (r2_01[1] <= plcutoff2) {
1376  if (UNLIKELY( atom2_1 >= excl_min && atom2_1 <= excl_max ))
1377  *(pli++) = j1;
1378  else
1379  *(plin++) = j1;
1380  }
1381  atom2_1 = pExt_1[jprev1].id;
1382  }
1383  k-=2;
1384  }
1385 #else
1386  if ( ku - k > 6 ) {
1387  register int jprev0 = pairlist [k];
1388  register int jprev1 = pairlist [k + 1];
1389 
1390  register int j0;
1391  register int j1;
1392 
1393  register BigReal pj_x_0, pj_x_1;
1394  register BigReal pj_y_0, pj_y_1;
1395  register BigReal pj_z_0, pj_z_1;
1396  register BigReal t_0, t_1, r2_0, r2_1;
1397 
1398  pj_x_0 = p_1[jprev0].position.x;
1399  pj_x_1 = p_1[jprev1].position.x;
1400 
1401  pj_y_0 = p_1[jprev0].position.y;
1402  pj_y_1 = p_1[jprev1].position.y;
1403 
1404  pj_z_0 = p_1[jprev0].position.z;
1405  pj_z_1 = p_1[jprev1].position.z;
1406 
1407  int atom2_0 = pExt_1[jprev0].id;
1408  int atom2_1 = pExt_1[jprev1].id;
1409 
1410  k += 2;
1411  for ( ; k < ku - 2; k +=2 ) {
1412  // compute 1d distance, 2-way parallel
1413  j0 = jprev0;
1414  j1 = jprev1;
1415 
1416  t_0 = p_i_x - pj_x_0;
1417  t_1 = p_i_x - pj_x_1;
1418  r2_0 = t_0 * t_0;
1419  r2_1 = t_1 * t_1;
1420 
1421  t_0 = p_i_y - pj_y_0;
1422  t_1 = p_i_y - pj_y_1;
1423  r2_0 += t_0 * t_0;
1424  r2_1 += t_1 * t_1;
1425 
1426  t_0 = p_i_z - pj_z_0;
1427  t_1 = p_i_z - pj_z_1;
1428  r2_0 += t_0 * t_0;
1429  r2_1 += t_1 * t_1;
1430 
1431  jprev0 = pairlist[k];
1432  jprev1 = pairlist[k+1];
1433 
1434  pj_x_0 = p_1[jprev0].position.x;
1435  pj_x_1 = p_1[jprev1].position.x;
1436  pj_y_0 = p_1[jprev0].position.y;
1437  pj_y_1 = p_1[jprev1].position.y;
1438  pj_z_0 = p_1[jprev0].position.z;
1439  pj_z_1 = p_1[jprev1].position.z;
1440 
1441  if (r2_0 <= plcutoff2) {
1442  if ( atom2_0 >= excl_min && atom2_0 <= excl_max )
1443  *(pli++) = j0;
1444  else
1445  *(plin++) = j0;
1446  }
1447  atom2_0 = pExt_1[jprev0].id;
1448 
1449  if (r2_1 <= plcutoff2) {
1450  if ( atom2_1 >= excl_min && atom2_1 <= excl_max )
1451  *(pli++) = j1;
1452  else
1453  *(plin++) = j1;
1454  }
1455  atom2_1 = pExt_1[jprev1].id;
1456  }
1457  k-=2;
1458  }
1459 #endif
1460 #endif // NAMD_KNL
1461 
1462  const int kp = k;
1463 #pragma omp simd simdlen(16)
1464  for (k = kp; k < ku; k++) {
1465  int j = pairlist[k];
1466  int atom2 = pExt_1[j].id;
1467 
1468  BigReal p_j_x = p_1[j].position.x;
1469  BigReal p_j_y = p_1[j].position.y;
1470  BigReal p_j_z = p_1[j].position.z;
1471 
1472  BigReal r2 = p_i_x - p_j_x;
1473  r2 *= r2;
1474  BigReal t2 = p_i_y - p_j_y;
1475  r2 += t2 * t2;
1476  t2 = p_i_z - p_j_z;
1477  r2 += t2 * t2;
1478 
1479 #pragma omp ordered simd monotonic(plin:1, pli:1)
1480  if (r2 <= plcutoff2) {
1481  if ( atom2 >= excl_min && atom2 <= excl_max )
1482  *(pli++) = j;
1483  else
1484  *(plin++) = j;
1485  }
1486  }
1487  }
1488  }
1489 
1490  PAIR(
1491  if ( plin == pairlistn && pli == pairlist2 ) {
1492  ++i;
1493  continue;
1494  }
1495  pairlists.setIndexValue(i);
1496  )
1497 
1498  plint *plix = pairlistx;
1499  plint *plim = pairlistm;
1500  ALCH(
1501  plint *plinA1 = pairlistnA1;
1502  plint *plixA1 = pairlistxA1;
1503  plint *plimA1 = pairlistmA1;
1504  plint *plinA2 = pairlistnA2;
1505  plint *plixA2 = pairlistxA2;
1506  plint *plimA2 = pairlistmA2;
1507  plint *plinA3 = pairlistnA3;
1508  plint *plixA3 = pairlistxA3;
1509  plint *plimA3 = pairlistmA3;
1510  plint *plinA4 = pairlistnA4;
1511  plint *plixA4 = pairlistxA4;
1512  plint *plimA4 = pairlistmA4;
1513  )
1514 
1515  int k;
1516 
1517 #if 0 ALCH(+1)
1518  int unsortedNpairn = plin - pairlistn;
1519  plin = pairlistn;
1520  for ( k=0; k<unsortedNpairn; ++k ) {
1521  int j = pairlistn[k];
1522  switch(pswitchTable[p_i_partition + 5*(p_1[j].partition)]) {
1523  case 0: *(plin++) = j; break;
1524  case 1: *(plinA1++) = j; break;
1525  case 2: *(plinA2++) = j; break;
1526  case 3: *(plinA3++) = j; break;
1527  case 4: *(plinA4++) = j; break;
1528  }
1529  }
1530 #endif
1531 
1532  int npair2 = pli - pairlist2;
1533  // if ( npair2 ) pairlist2[npair2] = pairlist2[npair2-1];
1534  // removed code for implicit exclusions within hydrogen groups -JCP
1535  for (k=0; k < npair2; ++k ) {
1536  int j = pairlist2[k];
1537  int atom2 = pExt_1[j].id;
1538  int excl_flag = excl_flags[atom2];
1539  ALCH(int pswitch = pswitchTable[p_i_partition + 5*(p_1[j].partition)];)
1540  switch ( excl_flag ALCH( + 3 * pswitch)) {
1541  case 0: *(plin++) = j; break;
1542  case 1: *(plix++) = j; break;
1543  case 2: *(plim++) = j; break;
1544  ALCH(
1545  case 3: *(plinA1++) = j; break;
1546  case 6: *(plinA2++) = j; break;
1547  case 9: *(plinA3++) = j; break;
1548  case 12: *(plinA4++) = j; break;
1549  case 5: *(plimA1++) = j; break;
1550  case 8: *(plimA2++) = j; break;
1551  case 11: *(plimA3++) = j; break;
1552  case 14: *(plimA4++) = j; break;
1553  case 4: *(plixA1++) = j; break;
1554  case 7: *(plixA2++) = j; break;
1555  case 10: *(plixA3++) = j; break;
1556  case 13: *(plixA4++) = j; break;
1557  )
1558  }
1559  }
1560 
1561  npairn = plin - pairlistn;
1562  pairlistn_save = pairlistn;
1563  pairlistn_save[npairn] = npairn ? pairlistn_save[npairn-1] : -1;
1564  pairlists.newsize(npairn + 1);
1565 
1566  npairx = plix - pairlistx;
1567  pairlistx_save = pairlists.newlist();
1568  for ( k=0; k<npairx; ++k ) {
1569  pairlistx_save[k] = pairlistx[k];
1570  }
1571  pairlistx_save[k] = k ? pairlistx_save[k-1] : -1;
1572  pairlists.newsize(npairx + 1);
1573 
1574  npairm = plim - pairlistm;
1575  pairlistm_save = pairlists.newlist();
1576  for ( k=0; k<npairm; ++k ) {
1577  pairlistm_save[k] = pairlistm[k];
1578  }
1579  pairlistm_save[k] = k ? pairlistm_save[k-1] : -1;
1580  pairlists.newsize(npairm + 1);
1581 
1582 
1583 #if 0 ALCH(+1)
1584 #define PAIRLISTFROMARRAY(NPAIRS,PL1,PL2,PLSAVE) \
1585 { \
1586  (NPAIRS) = (PL2) - (PL1); \
1587  (PLSAVE) = pairlists.newlist(); \
1588  for ( k=0; k<(NPAIRS); ++k ) { \
1589  (PLSAVE)[k] = (PL1)[k]; \
1590  } \
1591  (PLSAVE)[k] = k ? (PLSAVE)[k-1] : -1; \
1592  pairlists.newsize((NPAIRS) + 1); \
1593 }
1594 
1595  PAIRLISTFROMARRAY(npairnA1,pairlistnA1,plinA1,pairlistnA1_save);
1596  PAIRLISTFROMARRAY(npairxA1,pairlistxA1,plixA1,pairlistxA1_save);
1597  PAIRLISTFROMARRAY(npairmA1,pairlistmA1,plimA1,pairlistmA1_save);
1598  PAIRLISTFROMARRAY(npairnA2,pairlistnA2,plinA2,pairlistnA2_save);
1599  PAIRLISTFROMARRAY(npairxA2,pairlistxA2,plixA2,pairlistxA2_save);
1600  PAIRLISTFROMARRAY(npairmA2,pairlistmA2,plimA2,pairlistmA2_save);
1601  PAIRLISTFROMARRAY(npairnA3,pairlistnA3,plinA3,pairlistnA3_save);
1602  PAIRLISTFROMARRAY(npairxA3,pairlistxA3,plixA3,pairlistxA3_save);
1603  PAIRLISTFROMARRAY(npairmA3,pairlistmA3,plimA3,pairlistmA3_save);
1604  PAIRLISTFROMARRAY(npairnA4,pairlistnA4,plinA4,pairlistnA4_save);
1605  PAIRLISTFROMARRAY(npairxA4,pairlistxA4,plixA4,pairlistxA4_save);
1606  PAIRLISTFROMARRAY(npairmA4,pairlistmA4,plimA4,pairlistmA4_save);
1607 #undef PAIRLISTFROMARRAY
1608 
1609 #endif
1610 
1611  if ( ! savePairlists ) pairlists.reset(); // limit space usage
1612  PAIR( pairlists.addIndex(); )
1613 
1614  // PAIR( iout << i << " " << i_upper << " save\n" << endi;)
1615  } else { // if ( savePairlists || ! usePairlists )
1616  // PAIR( iout << i << " " << i_upper << " use\n" << endi;)
1617 
1618  pairlists.nextlist(&pairlistn_save,&npairn); --npairn;
1619  pairlists.nextlist(&pairlistx_save,&npairx); --npairx;
1620  pairlists.nextlist(&pairlistm_save,&npairm); --npairm;
1621  ALCH(
1622  pairlists.nextlist(&pairlistnA1_save,&npairnA1); --npairnA1;
1623  pairlists.nextlist(&pairlistxA1_save,&npairxA1); --npairxA1;
1624  pairlists.nextlist(&pairlistmA1_save,&npairmA1); --npairmA1;
1625  pairlists.nextlist(&pairlistnA2_save,&npairnA2); --npairnA2;
1626  pairlists.nextlist(&pairlistxA2_save,&npairxA2); --npairxA2;
1627  pairlists.nextlist(&pairlistmA2_save,&npairmA2); --npairmA2;
1628  pairlists.nextlist(&pairlistnA3_save,&npairnA3); --npairnA3;
1629  pairlists.nextlist(&pairlistxA3_save,&npairxA3); --npairxA3;
1630  pairlists.nextlist(&pairlistmA3_save,&npairmA3); --npairmA3;
1631  pairlists.nextlist(&pairlistnA4_save,&npairnA4); --npairnA4;
1632  pairlists.nextlist(&pairlistxA4_save,&npairxA4); --npairxA4;
1633  pairlists.nextlist(&pairlistmA4_save,&npairmA4); --npairmA4;
1634  )
1635 
1636  } // if ( savePairlists || ! usePairlists )
1637 
1638  LES( BigReal *lambda_table_i =
1639  lambda_table + (lesFactor+1) * p_i.partition; )
1640 
1641  INT(
1642  const BigReal force_sign = (
1644  ( ( p_i.partition == 1 ) ? 1. : -1. ) : 0.
1645  );
1646 
1647  )
1648 
1649  const BigReal kq_i = COULOMB * p_i.charge * scaling * dielectric_1;
1650  DISP( FULL( const BigReal kd_i = pExt_i.dispcoef; ) )
1651  const LJTable::TableEntry * const lj_row =
1652  ljTable->table_row(p_i.vdwType);
1653 
1654  SHORT( FAST( BigReal f_i_x = 0.; ) )
1655  SHORT( FAST( BigReal f_i_y = 0.; ) )
1656  SHORT( FAST( BigReal f_i_z = 0.; ) )
1657  FULL( BigReal fullf_i_x = 0.; )
1658  FULL( BigReal fullf_i_y = 0.; )
1659  FULL( BigReal fullf_i_z = 0.; )
1660 
1661  int npairi;
1662  int k;
1663 
1664 #if KNL(1)+0
1665  const float kq_i_f = kq_i;
1666 
1667  npairi = pairlist_from_pairlist_knl(ComputeNonbondedUtil::cutoff2_f,
1668  p_i_x_f, p_i_y_f, p_i_z_f, pFlt_1, pairlistn_save, npairn, pairlisti,
1669  r2list_f, xlist, ylist, zlist);
1670 #else
1672  p_i_x, p_i_y, p_i_z, p_1, pairlistn_save, npairn, pairlisti,
1673  r2_delta, r2list);
1674 #endif
1675 
1676  if ( npairi ) {
1677 
1678 // BEGIN NBTHOLE OF DRUDE MODEL
1679 #if (FAST(1+)0)
1680  if (drudeNbthole && p_i.hydrogenGroupSize > 1) {
1681 
1682  Parameters *parameters = params->parameters;
1683  const NbtholePairValue * const nbthole_array = parameters->nbthole_array;
1684  const int NumNbtholePairParams = parameters->NumNbtholePairParams;
1685  BigReal drudeNbtholeCut = simParams -> drudeNbtholeCut;
1686  NOKNL( BigReal drudeNbtholeCut2 = (drudeNbtholeCut * drudeNbtholeCut) + r2_delta; )
1687  KNL( float drudeNbtholeCut2 = (drudeNbtholeCut * drudeNbtholeCut); )
1689  int kk;
1690 
1691  for (k = 0; k < npairi; k++) {
1692  NOKNL( if (r2list[k] > drudeNbtholeCut2) { continue; } )
1693  KNL( if (r2list_f[k] > drudeNbtholeCut2) { continue; } )
1694 
1695  const int j = pairlisti[k];
1696  const CompAtom& p_j = p_1[j];
1697 
1698  if ( p_j.hydrogenGroupSize < 2 ) { continue; }
1699 
1700  for (kk = 0;kk < NumNbtholePairParams; kk++) {
1701 
1702  if (((nbthole_array[kk].ind1 == p_i.vdwType) &&
1703  (nbthole_array[kk].ind2 == p_j.vdwType)) ||
1704  ((nbthole_array[kk].ind2 == p_i.vdwType) &&
1705  (nbthole_array[kk].ind1 == p_j.vdwType))) {
1706  break;
1707  }
1708  }
1709  if ( kk < NumNbtholePairParams ) {
1710 
1711  BigReal aprod = mol->GetAtomAlpha(pExt_0[i].id) * mol->GetAtomAlpha(pExt_1[j].id);
1712  const BigReal aa = nbthole_array[kk].tholeij * powf(aprod, -1.f/6);
1713  const BigReal qqaa = CC * p_0[i].charge * p_1[j].charge;
1714  const BigReal qqad = CC * p_0[i].charge * p_1[j+1].charge;
1715  const BigReal qqda = CC * p_0[i+1].charge * p_1[j].charge;
1716  const BigReal qqdd = CC * p_0[i+1].charge * p_1[j+1].charge;
1717 
1718  Vector raa = (p_0[i].position + params->offset) - p_1[j].position;
1719  Vector rad = (p_0[i].position + params->offset) - p_1[j+1].position;
1720  Vector rda = (p_0[i+1].position + params->offset) - p_1[j].position;
1721  Vector rdd = (p_0[i+1].position + params->offset) - p_1[j+1].position;
1722 
1723  BigReal raa_invlen = raa.rlength(); // reciprocal of length
1724  BigReal rad_invlen = rad.rlength();
1725  BigReal rda_invlen = rda.rlength();
1726  BigReal rdd_invlen = rdd.rlength();
1727 
1728  BigReal auaa = aa / raa_invlen;
1729  BigReal auad = aa / rad_invlen;
1730  BigReal auda = aa / rda_invlen;
1731  BigReal audd = aa / rdd_invlen;
1732 
1733  BigReal expauaa = exp(-auaa);
1734  BigReal expauad = exp(-auad);
1735  BigReal expauda = exp(-auda);
1736  BigReal expaudd = exp(-audd);
1737 
1738  BigReal polyauaa = 1 + 0.5*auaa;
1739  BigReal polyauad = 1 + 0.5*auad;
1740  BigReal polyauda = 1 + 0.5*auda;
1741  BigReal polyaudd = 1 + 0.5*audd;
1742 
1743  BigReal ethole = 0;
1744  ethole += qqaa * raa_invlen * (- polyauaa * expauaa);
1745  ethole += qqad * rad_invlen * (- polyauad * expauad);
1746  ethole += qqda * rda_invlen * (- polyauda * expauda);
1747  ethole += qqdd * rdd_invlen * (- polyaudd * expaudd);
1748 
1749  polyauaa = 1 + auaa*polyauaa;
1750  polyauad = 1 + auad*polyauad;
1751  polyauda = 1 + auda*polyauda;
1752  polyaudd = 1 + audd*polyaudd;
1753 
1754  BigReal raa_invlen3 = raa_invlen * raa_invlen * raa_invlen;
1755  BigReal rad_invlen3 = rad_invlen * rad_invlen * rad_invlen;
1756  BigReal rda_invlen3 = rda_invlen * rda_invlen * rda_invlen;
1757  BigReal rdd_invlen3 = rdd_invlen * rdd_invlen * rdd_invlen;
1758 
1759  BigReal dfaa = qqaa * raa_invlen3 * (polyauaa*expauaa);
1760  BigReal dfad = qqad * rad_invlen3 * (polyauad*expauad);
1761  BigReal dfda = qqda * rda_invlen3 * (polyauda*expauda);
1762  BigReal dfdd = qqdd * rdd_invlen3 * (polyaudd*expaudd);
1763 
1764  Vector faa = -dfaa * raa;
1765  Vector fad = -dfad * rad;
1766  Vector fda = -dfda * rda;
1767  Vector fdd = -dfdd * rdd;
1768 
1769  SHORT(f_net) NOSHORT(fullf_net) += (faa + fad) + (fda + fdd);
1770  params->ff[0][i] += faa + fad;
1771  params->ff[0][i+1] += fda + fdd;
1772  params->ff[1][j] -= faa + fda;
1773  params->ff[1][j+1] -= fad + fdd;
1774 
1775  reduction[electEnergyIndex] += ethole;
1776  }
1777  }
1778  }
1779 #endif
1780 // END NBTHOLE OF DRUDE MODEL
1781 
1782  // BEGIN LA
1783 #if (FAST(1+)0)
1784  if (doLoweAndersen && p_i.hydrogenGroupSize) {
1785  BigReal loweAndersenCutoff = simParams->loweAndersenCutoff;
1786  NOKNL( BigReal loweAndersenCutoff2 = (loweAndersenCutoff * loweAndersenCutoff) + r2_delta; )
1787  KNL( float loweAndersenCutoff2 = (loweAndersenCutoff * loweAndersenCutoff); )
1788  BigReal loweAndersenProb = simParams->loweAndersenRate * (simParams->dt * simParams->nonbondedFrequency) * 0.001; // loweAndersenRate is in 1/ps
1789  const bool loweAndersenUseCOMvelocity = (simParams->rigidBonds != RIGID_NONE);
1790  const BigReal kbT = BOLTZMANN * (simParams->loweAndersenTemp);
1791  const BigReal dt_inv = TIMEFACTOR / (simParams->dt * simParams->nonbondedFrequency);
1792  //const BigReal dt_inv = 1.0 / simParams->dt;
1793  //BigReal kbT = 8.3145e-7 * (simParams->loweAndersenTemp); // in A^2/fs^2 * K
1794 
1795  const CompAtom* v_0 = params->v[0];
1796  const CompAtom* v_1 = params->v[1];
1797  const CompAtom& v_i = v_0[i];
1798  Mass mass_i = v_i.charge;
1799  Velocity vel_i = v_i.position;
1800  Position pos_i = p_i.position;
1801  int atom_i = pExt_0[i].id;
1802 
1803  if (loweAndersenUseCOMvelocity) {
1804  Mass mass = 0;
1805  Velocity vel = 0;
1806  Position pos = 0;
1807  for (int l = 0; l < p_i.hydrogenGroupSize; l++) {
1808  vel += v_0[i+l].charge * v_0[i+l].position;
1809  pos += v_0[i+l].charge * p_0[i+l].position;
1810  mass += v_0[i+l].charge;
1811  }
1812  vel_i = vel / mass;
1813  pos_i = pos / mass;
1814  mass_i = mass;
1815  }
1816 
1817  // Note v[0].charge is actually mass
1818  //Random rand(CkMyPe()); // ?? OK ?? NO!!!!
1819  Random *rand = params->random;
1820  for (k = 0; k < npairi; k++) {
1821  NOKNL( if (r2list[k] > loweAndersenCutoff2) { continue; } )
1822  KNL( if (r2list_f[k] > loweAndersenCutoff2) { continue; } )
1823 
1824  const int j = pairlisti[k];
1825  const CompAtom& v_j = v_1[j];
1826  const CompAtom& p_j = p_1[j];
1827 
1828  if (!p_j.hydrogenGroupSize) { continue; }
1829  if (rand->uniform() > loweAndersenProb) { continue; }
1830 
1831  Mass mass_j = v_j.charge;
1832  Velocity vel_j = v_j.position;
1833  Position pos_j = p_j.position;
1834  int atom_j = pExt_1[j].id;
1835 
1836  if (loweAndersenUseCOMvelocity) {
1837  Mass mass = 0;
1838  Velocity vel = 0;
1839  Position pos = 0;
1840  for (int l = 0; l < p_j.hydrogenGroupSize; l++) {
1841  vel += v_1[j+l].charge * v_1[j+l].position;
1842  pos += v_1[j+l].charge * p_1[j+l].position;
1843  mass += v_1[j+l].charge;
1844  }
1845  vel_j = vel / mass;
1846  pos_j = pos / mass;
1847  mass_j = mass;
1848  }
1849 
1850  //Velocity deltaV = v_i.position - v_j.position;
1851  Velocity deltaV = vel_i - vel_j;
1852  Mass mu_ij = (mass_i * mass_j)/(mass_i + mass_j);
1853  //Vector sep = (p_i.position + params->offset) - p_j.position;
1854  Vector sep = (pos_i + params->offset) - pos_j;
1855  Vector sigma_ij = sep.unit();
1856  BigReal lambda = rand->gaussian() * sqrt(kbT / mu_ij);
1857  Force force = mu_ij * dt_inv * (lambda - (deltaV * sigma_ij)) * sigma_ij;
1858 
1859  //DebugM(1, "atom1 atom2 = " << atom1 << " " << atom2 << " lambda = " << lambda << " force = " << force << " deltaP = " << sep.length() << " sqrt(r2) = " << sqrt(r2list[k]) << "\n");
1860  //DebugM(1, "atom1 atom2 = " << atom_i << " " << atom_j << " mass1 mass2 = " << mass_i << " " << mass_j << " hgrp1 hgrp2 " << p_i.hydrogenGroupSize << " " << p_j.hydrogenGroupSize << "\n");
1861 
1862  if (loweAndersenUseCOMvelocity) {
1863  BigReal inv_mass_i = 1.0 / mass_i;
1864  BigReal inv_mass_j = 1.0 / mass_j;
1865  for (int l = 0; l < p_i.hydrogenGroupSize; l++) {
1866  params->ff[0][i+l] += (v_0[i+l].charge * inv_mass_i) * force;
1867  }
1868  for (int l = 0; l < p_j.hydrogenGroupSize; l++) {
1869  params->ff[1][j+l] -= (v_1[j+l].charge * inv_mass_j) * force;
1870  }
1871  } else {
1872  params->ff[0][i] += force;
1873  params->ff[1][j] -= force;
1874  }
1875  SHORT(f_net) NOSHORT(fullf_net) += force;
1876  }
1877  }
1878 #endif
1879  // END LA
1880 
1881 // DJH expand inner loop over "normal" interactions
1882 #define NORMAL(X) X
1883 #define EXCLUDED(X)
1884 #define MODIFIED(X)
1885 #define PRAGMA_SIMD
1886 #if KNL(1+)0
1887  switch ( vdw_switch_mode ) {
1888 
1889 #define VDW_SWITCH_MODE VDW_SWITCH_MODE_ENERGY
1890  case VDW_SWITCH_MODE:
1891 #include "ComputeNonbondedBase2KNL.h"
1892  break;
1893 #undef VDW_SWITCH_MODE
1894 
1895 #define VDW_SWITCH_MODE VDW_SWITCH_MODE_MARTINI
1896  case VDW_SWITCH_MODE:
1897 #include "ComputeNonbondedBase2KNL.h"
1898  break;
1899 #undef VDW_SWITCH_MODE
1900 
1901 #define VDW_SWITCH_MODE VDW_SWITCH_MODE_FORCE
1902  case VDW_SWITCH_MODE:
1903 #include "ComputeNonbondedBase2KNL.h"
1904  break;
1905 #undef VDW_SWITCH_MODE
1906 
1907  }
1908 #else
1909 #include "ComputeNonbondedBase2.h"
1910 #endif
1911 #undef PRAGMA_SIMD
1912 #undef NORMAL
1913 #undef EXCLUDED
1914 #undef MODIFIED
1915 
1916  } // if ( npairi )
1917 
1919  p_i_x, p_i_y, p_i_z, p_1, pairlistm_save, npairm, pairlisti,
1920  r2_delta, r2list);
1921  exclChecksum += npairi;
1922 
1923 // DJH expand inner loop over "modified (1-4)" interactions
1924 #define NORMAL(X)
1925 #define EXCLUDED(X)
1926 #define MODIFIED(X) X
1927 #include "ComputeNonbondedBase2.h"
1928 #undef NORMAL
1929 #undef EXCLUDED
1930 #undef MODIFIED
1931 
1932 #ifdef FULLELECT
1934  p_i_x, p_i_y, p_i_z, p_1, pairlistx_save, npairx, pairlisti,
1935  r2_delta, r2list);
1936  exclChecksum += npairi;
1937 
1938 // DJH expand inner loop over "excluded" interactions (have to subtract long-range contribution)
1939 #undef FAST
1940 #define FAST(X)
1941 #define NORMAL(X)
1942 #define EXCLUDED(X) X
1943 #define MODIFIED(X)
1944 #include "ComputeNonbondedBase2.h"
1945 #undef FAST
1946 #ifdef SLOWONLY
1947  #define FAST(X)
1948 #else
1949  #define FAST(X) X
1950 #endif
1951 #undef NORMAL
1952 #undef EXCLUDED
1953 #undef MODIFIED
1954 #else
1955  exclChecksum += npairx;
1956 #endif
1957 
1958 #if 0 ALCH(+1) // nonbondedbase2 for alchemical-separeted pairlists
1959 
1960  #undef ALCHPAIR
1961  #define ALCHPAIR(X) X
1962  #undef NOT_ALCHPAIR
1963  #define NOT_ALCHPAIR(X)
1964  BigReal myLambda; FEP(BigReal myLambda2;)
1965  BigReal myElecLambda; FEP(BigReal myElecLambda2;)
1966  BigReal myVdwLambda; FEP(BigReal myVdwLambda2;)
1967  BigReal myRepLambda; FEP(BigReal myRepLambda2;)
1968  BigReal myVdwShift; FEP(BigReal myVdwShift2;)
1969  BigReal alch_vdw_energy; BigReal alch_vdw_force;
1970  FEP(BigReal alch_vdw_energy_2; ) TI(BigReal alch_vdw_dUdl;)
1971  BigReal shiftedElec; BigReal shiftedElecForce;
1972 
1973  /********************************************************************/
1974  /*******NONBONDEDBASE2 FOR NORMAL INTERACTIONS SCALED BY LAMBDA******/
1975  /********************************************************************/
1976  #define NORMAL(X) X
1977  #define EXCLUDED(X)
1978  #define MODIFIED(X)
1979 
1980  #define ALCH1(X) X
1981  #define ALCH2(X)
1982  #define ALCH3(X)
1983  #define ALCH4(X)
1985  p_i_x, p_i_y, p_i_z, p_1, pairlistnA1_save, npairnA1, pairlisti,
1986  r2_delta, r2list);
1987  #include "ComputeNonbondedBase2.h" // normal, direction 'up'
1988  #undef ALCH1
1989  #undef ALCH2
1990  #undef ALCH3
1991  #undef ALCH4
1992 
1993  #define ALCH1(X)
1994  #define ALCH2(X) X
1995  #define ALCH3(X)
1996  #define ALCH4(X)
1998  p_i_x, p_i_y, p_i_z, p_1, pairlistnA2_save, npairnA2, pairlisti,
1999  r2_delta, r2list);
2000  #include "ComputeNonbondedBase2.h" // normal, direction 'down'
2001  #undef ALCH1
2002  #undef ALCH2
2003  #undef ALCH3
2004  #undef ALCH4
2005 
2006  #define ALCH3(X) X
2007  #define ALCH1(X)
2008  #define ALCH2(X)
2009  #define ALCH4(X)
2011  p_i_x, p_i_y, p_i_z, p_1, pairlistnA3_save, npairnA3, pairlisti,
2012  r2_delta, r2list);
2013  #include "ComputeNonbondedBase2.h" // normal, direction 'up'
2014  #undef ALCH1
2015  #undef ALCH2
2016  #undef ALCH3
2017  #undef ALCH4
2018 
2019  #define ALCH4(X) X
2020  #define ALCH1(X)
2021  #define ALCH2(X)
2022  #define ALCH3(X)
2024  p_i_x, p_i_y, p_i_z, p_1, pairlistnA4_save, npairnA4, pairlisti,
2025  r2_delta, r2list);
2026  #include "ComputeNonbondedBase2.h" // normal, direction 'down'
2027  #undef ALCH1
2028  #undef ALCH2
2029  #undef ALCH3
2030  #undef ALCH4
2031 
2032  #undef NORMAL
2033  #undef EXCLUDED
2034  #undef MODIFIED
2035  /********************************************************************/
2036  /******NONBONDEDBASE2 FOR MODIFIED INTERACTIONS SCALED BY LAMBDA*****/
2037  /********************************************************************/
2038  #define NORMAL(X)
2039  #define EXCLUDED(X)
2040  #define MODIFIED(X) X
2041 
2042  #define ALCH1(X) X
2043  #define ALCH2(X)
2044  #define ALCH3(X)
2045  #define ALCH4(X)
2047  p_i_x, p_i_y, p_i_z, p_1, pairlistmA1_save, npairmA1, pairlisti,
2048  r2_delta, r2list);
2049  exclChecksum += npairi;
2050  #include "ComputeNonbondedBase2.h" // modified, direction 'up'
2051  #undef ALCH1
2052  #undef ALCH2
2053  #undef ALCH3
2054  #undef ALCH4
2055 
2056  #define ALCH1(X)
2057  #define ALCH2(X) X
2058  #define ALCH3(X)
2059  #define ALCH4(X)
2061  p_i_x, p_i_y, p_i_z, p_1, pairlistmA2_save, npairmA2, pairlisti,
2062  r2_delta, r2list);
2063  exclChecksum += npairi;
2064  #include "ComputeNonbondedBase2.h" // modified, direction 'down'
2065  #undef ALCH1
2066  #undef ALCH2
2067  #undef ALCH3
2068  #undef ALCH4
2069 
2070  #define ALCH1(X)
2071  #define ALCH2(X)
2072  #define ALCH3(X) X
2073  #define ALCH4(X)
2075  p_i_x, p_i_y, p_i_z, p_1, pairlistmA3_save, npairmA3, pairlisti,
2076  r2_delta, r2list);
2077  exclChecksum += npairi;
2078  #include "ComputeNonbondedBase2.h" // modified, direction 'up'
2079  #undef ALCH1
2080  #undef ALCH2
2081  #undef ALCH3
2082  #undef ALCH4
2083 
2084  #define ALCH1(X)
2085  #define ALCH2(X)
2086  #define ALCH3(X)
2087  #define ALCH4(X) X
2089  p_i_x, p_i_y, p_i_z, p_1, pairlistmA4_save, npairmA4, pairlisti,
2090  r2_delta, r2list);
2091  exclChecksum += npairi;
2092  #include "ComputeNonbondedBase2.h" // modified, direction 'down'
2093  #undef ALCH1
2094  #undef ALCH2
2095  #undef ALCH3
2096  #undef ALCH4
2097 
2098  #undef NORMAL
2099  #undef EXCLUDED
2100  #undef MODIFIED
2101 
2102  /********************************************************************/
2103  /******NONBONDEDBASE2 FOR EXCLUDED INTERACTIONS SCALED BY LAMBDA*****/
2104  /********************************************************************/
2105  #ifdef FULLELECT
2106  #undef FAST
2107  #define FAST(X)
2108  #define NORMAL(X)
2109  #define EXCLUDED(X) X
2110  #define MODIFIED(X)
2111 
2112  #define ALCH1(X) X
2113  #define ALCH2(X)
2114  #define ALCH3(X)
2115  #define ALCH4(X)
2117  p_i_x, p_i_y, p_i_z, p_1, pairlistxA1_save, npairxA1, pairlisti,
2118  r2_delta, r2list);
2119  exclChecksum += npairi;
2120  #include "ComputeNonbondedBase2.h" //excluded, direction 'up'
2121  #undef ALCH1
2122  #undef ALCH2
2123  #undef ALCH3
2124  #undef ALCH4
2125 
2126  #define ALCH1(X)
2127  #define ALCH2(X) X
2128  #define ALCH3(X)
2129  #define ALCH4(X)
2131  p_i_x, p_i_y, p_i_z, p_1, pairlistxA2_save, npairxA2, pairlisti,
2132  r2_delta, r2list);
2133  exclChecksum += npairi;
2134  #include "ComputeNonbondedBase2.h" //excluded, direction 'down'
2135  #undef ALCH1
2136  #undef ALCH2
2137  #undef ALCH3
2138  #undef ALCH4
2139 
2140  #define ALCH1(X)
2141  #define ALCH2(X)
2142  #define ALCH3(X) X
2143  #define ALCH4(X)
2145  p_i_x, p_i_y, p_i_z, p_1, pairlistxA3_save, npairxA3, pairlisti,
2146  r2_delta, r2list);
2147  exclChecksum += npairi;
2148  #include "ComputeNonbondedBase2.h"
2149  #undef ALCH1
2150  #undef ALCH2
2151  #undef ALCH3
2152  #undef ALCH4
2153 
2154  #define ALCH1(X)
2155  #define ALCH2(X)
2156  #define ALCH3(X)
2157  #define ALCH4(X) X
2159  p_i_x, p_i_y, p_i_z, p_1, pairlistxA4_save, npairxA4, pairlisti,
2160  r2_delta, r2list);
2161  exclChecksum += npairi;
2162  #include "ComputeNonbondedBase2.h"
2163  #undef ALCH1
2164  #undef ALCH2
2165  #undef ALCH3
2166  #undef ALCH4
2167 
2168  #undef FAST
2169  #ifdef SLOWONLY
2170  #define FAST(X)
2171  #else
2172  #define FAST(X) X
2173  #endif
2174  #undef NORMAL
2175  #undef EXCLUDED
2176  #undef MODIFIED
2177  #else
2178  exclChecksum += npairxA1 + npairxA2 + npairxA3 + npairxA4;
2179  #endif
2180 
2181  #undef ALCHPAIR
2182  #define ALCHPAIR(X)
2183  #undef NOT_ALCHPAIR
2184  #define NOT_ALCHPAIR(X) X
2185 
2186 #endif // end nonbondedbase2.h includes for alchemical pairlists
2187 
2188 #ifdef NETWORK_PROGRESS
2189  CkNetworkProgress();
2190 #endif
2191 
2192 #ifdef ARCH_POWERPC
2193  //data cache block touch the position structure
2194  __dcbt ((void *) &(p_0[i+1]));
2195  //__prefetch_by_load ((void *)&(groupCount));
2196 #endif
2197 
2198  SHORT( FAST( f_net.x += f_i_x; ) )
2199  SHORT( FAST( f_net.y += f_i_y; ) )
2200  SHORT( FAST( f_net.z += f_i_z; ) )
2201  FULL( fullf_net.x += fullf_i_x; )
2202  FULL( fullf_net.y += fullf_i_y; )
2203  FULL( fullf_net.z += fullf_i_z; )
2204  SHORT( FAST( f_0[i].x += f_i_x; ) )
2205  SHORT( FAST( f_0[i].y += f_i_y; ) )
2206  SHORT( FAST( f_0[i].z += f_i_z; ) )
2207  FULL( fullf_0[i].x += fullf_i_x; )
2208  FULL( fullf_0[i].y += fullf_i_y; )
2209  FULL( fullf_0[i].z += fullf_i_z; )
2210 PAIR(
2211  if ( savePairlists || ! usePairlists ) {
2212  i++;
2213  } else {
2214  i = pairlists.getIndexValue();
2215  }
2216 )
2217 
2218  // PAIR( iout << i << " " << i_upper << " end\n" << endi;)
2219  } // for i
2220 
2221  // PAIR(iout << "++++++++\n" << endi;)
2222  PAIR( if ( savePairlists ) { pairlists.setIndexValue(i); } )
2223 
2224 #ifdef f_1
2225 #undef f_1
2226 #endif
2227 #if ( SHORT( FAST( 1+ ) ) 0 )
2228 #if ( PAIR( 1+ ) 0 )
2229  {
2230  BigReal virial_xx = f_net.x * params->offset_f.x;
2231  BigReal virial_xy = f_net.x * params->offset_f.y;
2232  BigReal virial_xz = f_net.x * params->offset_f.z;
2233  BigReal virial_yy = f_net.y * params->offset_f.y;
2234  BigReal virial_yz = f_net.y * params->offset_f.z;
2235  BigReal virial_zz = f_net.z * params->offset_f.z;
2236 
2237  reduction[virialIndex_XX] += virial_xx;
2238  reduction[virialIndex_XY] += virial_xy;
2239  reduction[virialIndex_XZ] += virial_xz;
2240  reduction[virialIndex_YX] += virial_xy;
2241  reduction[virialIndex_YY] += virial_yy;
2242  reduction[virialIndex_YZ] += virial_yz;
2243  reduction[virialIndex_ZX] += virial_xz;
2244  reduction[virialIndex_ZY] += virial_yz;
2245  reduction[virialIndex_ZZ] += virial_zz;
2246  }
2247 #endif
2248 #endif
2249 
2250 #ifdef fullf_1
2251 #undef fullf_1
2252 #endif
2253 #if ( FULL( 1+ ) 0 )
2254 #if ( PAIR( 1+ ) 0 )
2255  {
2256  BigReal fullElectVirial_xx = fullf_net.x * params->offset_f.x;
2257  BigReal fullElectVirial_xy = fullf_net.x * params->offset_f.y;
2258  BigReal fullElectVirial_xz = fullf_net.x * params->offset_f.z;
2259  BigReal fullElectVirial_yy = fullf_net.y * params->offset_f.y;
2260  BigReal fullElectVirial_yz = fullf_net.y * params->offset_f.z;
2261  BigReal fullElectVirial_zz = fullf_net.z * params->offset_f.z;
2262 
2263  reduction[fullElectVirialIndex_XX] += fullElectVirial_xx;
2264  reduction[fullElectVirialIndex_XY] += fullElectVirial_xy;
2265  reduction[fullElectVirialIndex_XZ] += fullElectVirial_xz;
2266  reduction[fullElectVirialIndex_YX] += fullElectVirial_xy;
2267  reduction[fullElectVirialIndex_YY] += fullElectVirial_yy;
2268  reduction[fullElectVirialIndex_YZ] += fullElectVirial_yz;
2269  reduction[fullElectVirialIndex_ZX] += fullElectVirial_xz;
2270  reduction[fullElectVirialIndex_ZY] += fullElectVirial_yz;
2271  reduction[fullElectVirialIndex_ZZ] += fullElectVirial_zz;
2272  }
2273 #endif
2274 #endif
2275 
2276  reduction[exclChecksumIndex] += exclChecksum;
2277  FAST
2278  (
2279  // JLai
2280  ENERGY( reduction[vdwEnergyIndex] += vdwEnergy;
2281  GO( reduction[groLJEnergyIndex] += groLJEnergy;
2282  reduction[groGaussEnergyIndex] += groGaussEnergy;
2283  reduction[goNativeEnergyIndex] += goEnergyNative;
2284  reduction[goNonnativeEnergyIndex] += goEnergyNonnative; ) )
2285  SHORT
2286  (
2287  ENERGY( reduction[electEnergyIndex] += electEnergy; )
2288  )
2289  ALCH
2290  (
2291  TI(reduction[vdwEnergyIndex_ti_1] += vdwEnergy_ti_1;)
2292  TI(reduction[vdwEnergyIndex_ti_2] += vdwEnergy_ti_2;)
2293  FEP( reduction[vdwEnergyIndex_s] += vdwEnergy_s; )
2294  SHORT
2295  (
2296  FEP( reduction[electEnergyIndex_s] += electEnergy_s; )
2297  TI(reduction[electEnergyIndex_ti_1] += electEnergy_ti_1;)
2298  TI(reduction[electEnergyIndex_ti_2] += electEnergy_ti_2;)
2299  )
2300  )
2301  )
2302  FULL
2303  (
2304  ENERGY(
2305  reduction[fullElectEnergyIndex] += fullElectEnergy;
2306  DISP(
2307  reduction[fullVdwEnergyIndex] += fullVdwEnergy;
2308  )
2309  )
2310  ALCH
2311  (
2312  FEP( reduction[fullElectEnergyIndex_s] += fullElectEnergy_s; )
2313  TI(reduction[fullElectEnergyIndex_ti_1] += fullElectEnergy_ti_1;)
2314  TI(reduction[fullElectEnergyIndex_ti_2] += fullElectEnergy_ti_2;)
2315  )
2316  )
2317 
2318 #ifndef MEM_OPT_VERSION
2321 #endif
2322 
2323 }
2324 
register BigReal virial_xy
Pairlists * pairlists
void sortEntries_mergeSort_v1(SortEntry *&se, SortEntry *&buf, int seLen)
register BigReal virial_xz
const TableEntry * table_row(unsigned int i) const
Definition: LJTable.h:31
#define DISP(X)
int size(void) const
Definition: ResizeArray.h:131
#define PAIR(X)
register BigReal virial_yz
BigReal uniform(void)
Definition: Random.h:109
Parameters * parameters
#define BOLTZMANN
Definition: common.h:54
void sortEntries_selectionSort(SortEntry *const se, const int seLen)
#define LAM(X)
BigReal gaussian(void)
Definition: Random.h:116
#define NOKNL(X)
static void partition(int *order, const FullAtom *atoms, int begin, int end)
Definition: SortAtoms.C:45
#define PPROF(X)
CompAtom * p[2]
Definition: Vector.h:72
register BigReal electEnergy
static const Molecule * mol
Definition: NamdTypes.h:315
char * flags
Definition: Molecule.h:71
float Real
Definition: common.h:118
#define COULOMB
Definition: common.h:53
int32_t int32
Definition: common.h:38
CompAtom * v[2]
int pairlist_from_pairlist(BigReal cutoff2, BigReal p_i_x, BigReal p_i_y, BigReal p_i_z, const CompAtom *p_j, const plint *list, int list_size, int *newlist, BigReal r2_delta, BigReal *r2list)
BigReal z
Definition: Vector.h:74
#define NBWORKARRAY(TYPE, NAME, SIZE)
Position position
Definition: NamdTypes.h:78
#define NOSHORT(X)
BigReal * reduction
static BigReal pressureProfileThickness
const int32 * get_full_exclusions_for_atom(int anum) const
Definition: Molecule.h:1232
BigReal GetAtomAlpha(int i) const
Definition: Molecule.h:512
#define p_j
const int32 * get_mod_exclusions_for_atom(int anum) const
Definition: Molecule.h:1234
SimParameters * simParameters
const ExclusionCheck * get_excl_check_for_atom(int anum) const
Definition: Molecule.h:1248
#define EXCHCK_MOD
Definition: Molecule.h:86
Molecule stores the structural information for the system.
Definition: Molecule.h:174
void sortEntries_bubbleSort(SortEntry *const se, const int seLen)
#define TI(X)
void resize(int i)
Definition: ResizeArray.h:84
uint32 id
Definition: NamdTypes.h:160
int32 index
Definition: NamdTypes.h:316
#define INT(X)
Charge charge
Definition: NamdTypes.h:79
#define NBWORKARRAYSINIT(ARRAYS)
#define NOTABENERGY(X)
register BigReal virial_yy
unsigned short plint
void pp_clamp(int &n, int nslabs)
Definition: Random.h:37
plint getIndexValue()
void nextlist(plint **list, int *list_size)
int NumNbtholePairParams
Definition: Parameters.h:339
static BigReal * table_noshort
void NAMD_bug(const char *err_msg)
Definition: common.C:195
static BigReal pressureProfileMin
#define UNLIKELY(X)
#define NAME
#define EXCHCK_FULL
Definition: Molecule.h:85
int16 vdwType
Definition: NamdTypes.h:80
#define SHORT(X)
BigReal sortValue
Definition: NamdTypes.h:317
int Bool
Definition: common.h:142
#define SELF(X)
#define ALCH(X)
CompAtomExt * pExt[2]
uint8 partition
Definition: NamdTypes.h:81
register BigReal virial_zz
BigReal x
Definition: Vector.h:74
uint8 hydrogenGroupSize
Definition: NamdTypes.h:89
const Real * get_qmAtomGroup() const
Definition: Molecule.h:859
int numAtoms
Definition: Molecule.h:586
#define FAST(X)
NbtholePairValue * nbthole_array
Definition: Parameters.h:317
#define FEP(X)
static BigReal * lambda_table
static BigReal * slow_table
uint8 nonbondedGroupSize
Definition: NamdTypes.h:82
#define REZERO_EXCL_FLAGS_BUFF
#define simParams
Definition: Output.C:131
#define KNL(X)
BigReal y
Definition: Vector.h:74
static const LJTable * ljTable
#define ENERGY(X)
register BigReal virial_xx
#define FULL(X)
void newsize(int list_size)
static BigReal * table_short
#define TIMEFACTOR
Definition: common.h:55
void setIndexValue(plint i)
float Mass
Definition: ComputeGBIS.inl:20
#define COMPONENT_DOTPRODUCT(A, B)
#define LES(X)
Force * fullf[2]
static BigReal alchVdwShiftCoeff
NAMD_HOST_DEVICE BigReal rlength(void) const
Definition: Vector.h:210
BigReal * pressureProfileReduction
plint * newlist(int max_size)
#define NOENERGY(X)
#define RIGID_NONE
Definition: SimParameters.h:80
NAMD_HOST_DEVICE Vector unit(void) const
Definition: Vector.h:215
void sortEntries_mergeSort_v2(SortEntry *&se, SortEntry *&buf, int seLen)
#define GO(X)
ComputeNonbondedWorkArrays * workArrays
double BigReal
Definition: common.h:123
for(int i=0;i< n1;++i)
#define TABENERGY(X)