NAMD
ComputeNonbondedBase2.h
Go to the documentation of this file.
1 
7 EXCLUDED( FAST( foo bar ) )
8 EXCLUDED( MODIFIED( foo bar ) )
9 EXCLUDED( NORMAL( foo bar ) )
10 NORMAL( MODIFIED( foo bar ) )
11 ALCHPAIR( NOT_ALCHPAIR( foo bar ) )
12 
13 ALCHPAIR(
14  // get alchemical nonbonded scaling parameters (once per pairlist)
15  myLambda = ALCH1(lambdaUp) ALCH2(lambdaDown) ALCH3(lambdaUp) ALCH4(lambdaDown);
16  FEP(myLambda2 = ALCH1(lambda2Up) ALCH2(lambda2Down) ALCH3(lambda2Up) ALCH4(lambda2Down);)
17  myElecLambda = ALCH1(elecLambdaUp) ALCH2(elecLambdaDown) ALCH3(elecLambdaUp) ALCH4(elecLambdaDown);
18  FEP(myElecLambda2 = ALCH1(elecLambda2Up) ALCH2(elecLambda2Down) ALCH3(elecLambda2Up) ALCH4(elecLambda2Down);)
19  myVdwLambda = ALCH1(vdwLambdaUp) ALCH2(vdwLambdaDown) ALCH3(vdwLambdaUp) ALCH4(vdwLambdaDown);
20  FEP(myVdwLambda2 = ALCH1(vdwLambda2Up) ALCH2(vdwLambda2Down) ALCH3(vdwLambda2Up) ALCH4(vdwLambda2Down);)
21  ALCH1(myRepLambda = repLambdaUp) ALCH2(myRepLambda = repLambdaDown);
22  FEP(ALCH1(myRepLambda2 = repLambda2Up) ALCH2(myRepLambda2 = repLambda2Down);)
23  ALCH1(myVdwShift = vdwShiftUp) ALCH2(myVdwShift = vdwShiftDown);
24  FEP(ALCH1(myVdwShift2 = vdwShift2Up) ALCH2(myVdwShift2 = vdwShift2Down);)
25 )
26 
27 #ifdef A2_QPX
28 #if ( SHORT(1+) 0 )
29 NORMAL(kq_iv = vec_splats(kq_i); )
30 MODIFIED(kq_iv = vec_splats((1.0-modf_mod) *kq_i); )
31 #endif
32 
33 #if ( FULL( 1+ ) 0 )
34 EXCLUDED(
35  SHORT(
36  full_cnst = (vector4double)(6., 4., 2., 1.);
37  )
38  NOSHORT(
39  full_cnst = (vector4double)(1., 1., 1., 1.);
40  )
41  )
42 MODIFIED(
43  SHORT(
44  full_cnst = (vector4double)(6., 4., 2., 1.);
45  full_cnst = vec_mul (full_cnst, vec_splats(modf_mod));
46  )
47  NOSHORT(
48  full_cnst = vec_splats(modf_mod);
49  )
50  )
51 #endif
52 #endif
53 
54 #ifdef ARCH_POWERPC
55  __alignx(64, table_four);
56  __alignx(32, p_1);
57 #pragma unroll(1)
58 #pragma ibm independent_loop
59 #endif
60 
61 #ifndef ARCH_POWERPC
62 #pragma ivdep
63 #endif
64 
65 
66 
67 #if ( FULL( EXCLUDED( SHORT( 1+ ) ) ) 0 )
68 // avoid bug in Intel 15.0 compiler
69 #pragma novector
70 #else
71 #ifdef PRAGMA_SIMD
72 #ifndef TABENERGYFLAG
73 #ifndef GOFORCES
74 #pragma omp simd SHORT(FAST(reduction(+:f_i_x,f_i_y,f_i_z)) ENERGY(FAST(reduction(+:vdwEnergy) SHORT(reduction(+:electEnergy))))) \
75  FULL(reduction(+:fullf_i_x,fullf_i_y,fullf_i_z) ENERGY(reduction(+:fullElectEnergy)))
76 #endif
77 #endif
78 #pragma loop_count avg=100
79 #else // PRAGMA_SIMD
80 #pragma loop_count avg=4
81 #endif // PRAGMA_SIMD
82 #endif
83  for (k=0; k<npairi; ++k) {
84  TABENERGY(
85  const int numtypes = simParams->tableNumTypes;
86  const float table_spacing = simParams->tableSpacing;
87  const int npertype = (int) (namdnearbyint(simParams->tableMaxDist / simParams->tableSpacing) + 1);
88  )
89 
90  int table_i = (r2iilist[2*k] >> 14) + r2_delta_expc; // table_i >= 0
91  const int j = pairlisti[k];
92  //register const CompAtom *p_j = p_1 + j;
93 #define p_j (p_1+j)
94 #ifdef A2_QPX
95  register double *p_j_d = (double *) p_j;
96 #endif
97  // const CompAtomExt *pExt_j = pExt_1 + j;
98 
99  BigReal diffa = r2list[k] - r2_table[table_i];
100  //const BigReal* const table_four_i = table_four + 16*table_i;
101 #define table_four_i (table_four + 16*table_i)
102 
103 #if ( FAST( 1 + ) TABENERGY( 1 + ) 0 ) // FAST or TABENERGY
104  //const LJTable::TableEntry * lj_pars =
105  // lj_row + 2 * p_j->vdwType MODIFIED(+ 1);
106  const int lj_index = 2 * p_j->vdwType MODIFIED(+ 1);
107 #define lj_pars (lj_row+lj_index)
108 #ifdef A2_QPX
109  double *lj_pars_d = (double *) lj_pars;
110 #endif
111 #endif
112 
113  TABENERGY(
114  register const int tabtype = -1 - ( lj_pars->A < 0 ? lj_pars->A : 0 );
115  )
116 
117 #if ( SHORT( FAST( 1+ ) ) 0 )
118  //Force *f_j = f_1 + j;
119 #define f_j (f_1+j)
120 #endif
121 
122 #if ( FULL( 1+ ) 0 )
123  //Force *fullf_j = fullf_1 + j;
124 #define fullf_j (fullf_1+j)
125 #endif
126 
127  //Power PC aliasing and alignment constraints
128 #ifdef ARCH_POWERPC
129 #if ( FULL( 1+ ) 0 )
130 #pragma disjoint (*table_four, *fullf_1)
131 #pragma disjoint (*p_1, *fullf_1)
132 #ifdef A2_QPX
133 #pragma disjoint (*p_j_d, *fullf_1)
134 #endif
135 #pragma disjoint (*r2_table, *fullf_1)
136 #pragma disjoint (*r2list, *fullf_1)
137 #if ( SHORT( FAST( 1+ ) ) 0 )
138 #pragma disjoint (*f_1 , *fullf_1)
139 #pragma disjoint (*fullf_1, *f_1)
140 #endif //Short + fast
141 #endif //Full
142 
143 #if ( SHORT( FAST( 1+ ) ) 0 )
144 #pragma disjoint (*table_four, *f_1)
145 #pragma disjoint (*p_1, *f_1)
146 #pragma disjoint (*r2_table, *f_1)
147 #pragma disjoint (*r2list, *f_1)
148 #pragma disjoint (*lj_row, *f_1)
149 #ifdef A2_QPX
150 #pragma disjoint (*p_j_d, *f_1)
151 #endif
152 #endif //Short + Fast
153 
154  __alignx(64, table_four_i);
155  FAST (
156  __alignx(32, lj_pars);
157  )
158  __alignx(32, p_j);
159 #endif //ARCH_POWERPC
160 
161  /*
162  BigReal modf = 0.0;
163  int atom2 = p_j->id;
164  register char excl_flag = ( (atom2 >= excl_min && atom2 <= excl_max) ?
165  excl_flags[atom2-excl_min] : 0 );
166  if ( excl_flag ) { ++exclChecksum; }
167  SELF( if ( j < j_hgroup ) { excl_flag = EXCHCK_FULL; } )
168  if ( excl_flag ) {
169  if ( excl_flag == EXCHCK_FULL ) {
170  lj_pars = lj_null_pars;
171  modf = 1.0;
172  } else {
173  ++lj_pars;
174  modf = modf_mod;
175  }
176  }
177  */
178 
179  BigReal kqq = kq_i * p_j->charge;
180 
181 
182 #ifdef A2_QPX
183  float * cg = (float *)&p_j->charge;
184 #if ( FULL( 1+ ) 0 )
185 #pragma disjoint (*cg, *fullf_1)
186 #endif //Full
187 
188 #if ( SHORT( FAST( 1+ ) ) 0 )
189 #pragma disjoint (*cg, *f_1)
190 #endif //Short + fast
191 #endif
192 
193  LES( BigReal lambda_pair = lambda_table_i[p_j->partition]; )
194 
195 #ifndef A2_QPX
196  register const BigReal p_ij_x = p_i_x - p_j->position.x;
197  register const BigReal p_ij_y = p_i_y - p_j->position.y;
198  register const BigReal p_ij_z = p_i_z - p_j->position.z;
199 #else
200  vector4double charge_v = vec_lds(0, cg);
201  vector4double kqqv = vec_mul(kq_iv, charge_v );
202  vector4double p_ij_v = vec_sub(p_i_v, vec_ld (0, p_j_d));
203 #define p_ij_x vec_extract(p_i_v, 0)
204 #define p_ij_y vec_extract(p_i_v, 1)
205 #define p_ij_z vec_extract(p_i_v, 2)
206 #endif
207 
208 #if ( FAST(1+) 0 )
209  const BigReal A = scaling * lj_pars->A;
210  const BigReal B = scaling * lj_pars->B;
211 #ifndef A2_QPX
213  BigReal vdw_c = A * table_four_i[1] - B * table_four_i[5];
214  BigReal vdw_b = A * table_four_i[2] - B * table_four_i[6];
215  BigReal vdw_a = A * table_four_i[3] - B * table_four_i[7];
216 #else
217  const vector4double Av = vec_mul(scalingv, vec_lds(0, lj_pars_d));
218  const vector4double Bv = vec_mul(scalingv, vec_lds(8, lj_pars_d));
219  vector4double vdw_v = vec_msub( Av, vec_ld(0, (BigReal*)table_four_i), vec_mul(Bv, vec_ld(4*sizeof(BigReal), (BigReal*)table_four_i)) );
220 #define vdw_d vec_extract(vdw_v, 0)
221 #define vdw_c vec_extract(vdw_v, 1)
222 #define vdw_b vec_extract(vdw_v, 2)
223 #define vdw_a vec_extract(vdw_v, 3)
224 #endif
225 
227  // Alchemical free energy calculation
228  // Pairlist 1 and 2 are for softcore atoms, while 3 and 4 are single topology atoms.
229  // Pairlists are separated so that lambda-coupled pairs are handled
230  // independently from normal nonbonded (inside ALCHPAIR macro).
231  // The separation-shifted van der Waals potential and a shifted
232  // electrostatics potential for decoupling are calculated explicitly.
233  // Would be faster with lookup tables but because only a small minority
234  // of nonbonded pairs are lambda-coupled the impact is minimal.
235  // Explicit calculation also makes things easier to modify.
236  // These are now inline functions (in ComputeNonbondedFep.C) to
237  // tidy the code
238 
239  const BigReal r2 = r2list[k] - r2_delta;
240 
241  // These are now inline functions (in ComputeNonbondedFep.C) to
242  // tidy the code
243 
244  FEP(
245  ALCH1 ( // Don't merge/recombine the ALCH 1, 2, 3 ,4. Their functions might be modified for future algorithm changes.
246  fep_vdw_forceandenergies(A, B, r2, myVdwShift, myVdwShift2,
247  switchdist2, cutoff2, switchfactor, vdwForceSwitching, myVdwLambda,
248  myVdwLambda2, alchWCAOn, myRepLambda, myRepLambda2, &alch_vdw_energy,
249  &alch_vdw_force, &alch_vdw_energy_2);)
250  ALCH2 (
251  fep_vdw_forceandenergies(A, B, r2, myVdwShift, myVdwShift2,
252  switchdist2, cutoff2, switchfactor, vdwForceSwitching, myVdwLambda,
253  myVdwLambda2, alchWCAOn, myRepLambda, myRepLambda2, &alch_vdw_energy,
254  &alch_vdw_force, &alch_vdw_energy_2);)
255  ALCH3 ( // In single topology region ALCH3 & 4, all atoms are paired so softcore potential is unnecessary.
256  ENERGY(alch_vdw_energy = -myVdwLambda * (( ( diffa * vdw_d * (1/6.)+ vdw_c * (1/4.)) * diffa + vdw_b *(1/2.)) * diffa + vdw_a);)
257  alch_vdw_energy_2 = -myVdwLambda2 * (( ( diffa * vdw_d * (1/6.)+ vdw_c * (1/4.)) * diffa + vdw_b *(1/2.)) * diffa + vdw_a);
258  alch_vdw_force = myVdwLambda * ((diffa * vdw_d + vdw_c) * diffa + vdw_b);)
259  ALCH4 (
260  ENERGY(alch_vdw_energy = -myVdwLambda * (( ( diffa * vdw_d * (1/6.)+ vdw_c * (1/4.)) * diffa + vdw_b *(1/2.)) * diffa + vdw_a);)
261  alch_vdw_energy_2 = -myVdwLambda2 * (( ( diffa * vdw_d * (1/6.)+ vdw_c * (1/4.)) * diffa + vdw_b *(1/2.)) * diffa + vdw_a);
262  alch_vdw_force = myVdwLambda * ((diffa * vdw_d + vdw_c) * diffa + vdw_b);)
263  )
264  TI(ti_vdw_force_energy_dUdl(A, B, r2, myVdwShift, switchdist2, cutoff2,
265  switchfactor, vdwForceSwitching, myVdwLambda, alchVdwShiftCoeff,
266  alchWCAOn, myRepLambda, &alch_vdw_energy, &alch_vdw_force,
267  &alch_vdw_dUdl);)
268  )
269 
270  //NOT_ALCHPAIR(
271  //TABENERGY(
272 #if (NOT_ALCHPAIR(1+) 0)
273 #if (TABENERGY(1+) 0)
274  if (tabtype >= 0) {
275  register BigReal r1;
276  r1 = sqrt(p_ij_x*p_ij_x + p_ij_y*p_ij_y + p_ij_z*p_ij_z);
277 
278  //CkPrintf("%i %i %f %f %i\n", npertype, tabtype, r1, table_spacing, (int) (namdnearbyint(r1 / table_spacing)));
279  register int eneraddress;
280  eneraddress = 2 * ((npertype * tabtype) + ((int) namdnearbyint(r1 / table_spacing)));
281  //CkPrintf("Using distance bin %i for distance %f\n", eneraddress, r1);
282 #ifndef A2_QPX
283  vdw_d = 0.;
284  vdw_c = 0.;
285  vdw_b = table_ener[eneraddress + 1] / r1;
286  vdw_a = (-1/2.) * diffa * vdw_b;
287 #else
288  vec_insert(0., vdw_v, 0);
289  vec_insert(0., vdw_v, 1);
290  vec_insert(table_ener[eneraddress + 1] / r1, vdw_v, 2);
291  vec_insert((-1/2.) * diffa * vdw_b, vdw_v, 3);
292 #endif
293  ENERGY(
294  register BigReal vdw_val = table_ener[eneraddress];
295  //CkPrintf("Found vdw energy of %f\n", vdw_val);
296  vdwEnergy += LAM(lambda_pair *) vdw_val;
297  FEP( vdwEnergy_s += d_lambda_pair * vdw_val; )
298  )
299  } else {
300  //)
301 #endif
302  ENERGY(
303  register BigReal vdw_val =
304  ( ( diffa * vdw_d * (1/6.)+ vdw_c * (1/4.)) * diffa + vdw_b *(1/2.)) * diffa + vdw_a;
305 
306  vdwEnergy -= LAM(lambda_pair *) vdw_val;
307 
308  FEP(vdwEnergy_s -= vdw_val;)
309  )
310  //TABENERGY( } ) /* endif (tabtype >= 0) */
311 #if (TABENERGY (1+) 0)
312  }
313 #endif
314  //) // NOT_ALCHPAIR
315 #endif
316 
317  ALCHPAIR(
318  ENERGY(vdwEnergy += alch_vdw_energy;)
319  FEP(vdwEnergy_s += alch_vdw_energy_2;)
320  TI(ALCH1(vdwEnergy_ti_1 += alch_vdw_dUdl;) ALCH2(vdwEnergy_ti_2 += alch_vdw_dUdl;))
321  ) // ALCHPAIR
322 
323 #endif // FAST
324 
325 #if ( FAST(1+) 0 )
326  INT(
327  register BigReal vdw_dir;
328  vdw_dir = ( diffa * vdw_d + vdw_c ) * diffa + vdw_b;
329  //BigReal force_r = LAM(lambda_pair *) vdw_dir;
330  reduction[pairVDWForceIndex_X] += force_sign * vdw_dir * p_ij_x;
331  reduction[pairVDWForceIndex_Y] += force_sign * vdw_dir * p_ij_y;
332  reduction[pairVDWForceIndex_Z] += force_sign * vdw_dir * p_ij_z;
333  )
334 
335 #if ( SHORT(1+) 0 ) // Short-range electrostatics
336 
337 #ifndef A2_QPX
338  NORMAL(
339  BigReal fast_d = kqq * table_four_i[8];
340  BigReal fast_c = kqq * table_four_i[9];
341  BigReal fast_b = kqq * table_four_i[10];
342  BigReal fast_a = kqq * table_four_i[11];
343  )
344  MODIFIED(
345  BigReal modfckqq = (1.0-modf_mod) * kqq;
346  BigReal fast_d = modfckqq * table_four_i[8];
347  BigReal fast_c = modfckqq * table_four_i[9];
348  BigReal fast_b = modfckqq * table_four_i[10];
349  BigReal fast_a = modfckqq * table_four_i[11];
350  )
351 #else
352  vector4double fastv = vec_mul(kqqv, vec_ld(8 * sizeof(BigReal), (BigReal*)table_four_i));
353 #define fast_d vec_extract(fastv, 0)
354 #define fast_c vec_extract(fastv, 1)
355 #define fast_b vec_extract(fastv, 2)
356 #define fast_a vec_extract(fastv, 3)
357 #endif
358 
359  {
360  ENERGY(
361  register BigReal fast_val =
362  ( ( diffa * fast_d * (1/6.)+ fast_c * (1/4.)) * diffa + fast_b *(1/2.)) * diffa + fast_a;
363  NOT_ALCHPAIR (
364  electEnergy -= LAM(lambda_pair *) fast_val;
365  FEP(electEnergy_s -= fast_val;)
366  )
367  ) //ENERGY
368  ALCHPAIR(
369  ENERGY(electEnergy -= myElecLambda * fast_val;)
370  FEP(electEnergy_s -= myElecLambda2 * fast_val;)
371  TI(
372  NOENERGY(register BigReal fast_val =
373  ( ( diffa * fast_d * (1/6.)+ fast_c * (1/4.)) * diffa + fast_b *(1/2.)) * diffa + fast_a;)
374  ALCH1(electEnergy_ti_1 -= fast_val;) ALCH2(electEnergy_ti_2 -= fast_val;)
375  )
376  )
377 
378  INT(
379  register BigReal fast_dir =
380  ( diffa * fast_d + fast_c ) * diffa + fast_b;
381  // force_r -= -1.0 * LAM(lambda_pair *) fast_dir;
382  reduction[pairElectForceIndex_X] += force_sign * fast_dir * p_ij_x;
383  reduction[pairElectForceIndex_Y] += force_sign * fast_dir * p_ij_y;
384  reduction[pairElectForceIndex_Z] += force_sign * fast_dir * p_ij_z;
385  )
386  }
387 
388 
389  /***** JE - Go *****/
390  // Now Go energy should appear in VDW place -- put vdw_b back into place
391 #if ( NORMAL (1+) 0)
392 #if ( GO (1+) 0)
393 
394 
395 // JLai
396 #ifndef CODE_REDUNDANT
397 #define CODE_REDUNDANT 0
398 #endif
399 #if CODE_REDUNDANT
401  // Explicit goGroPair calculation; only calculates goGroPair if goGroPair is turned on
402  //
403  // get_gro_force has an internal checklist that sees if atom_i and atom_j are
404  // in the explicit pairlist. This is done because there is no guarantee that a
405  // processor will have atom_i and atom_j so we cannot loop over the explict atom pairs.
406  // We can only loop over all pairs.
407  //
408  // NOTE: It does not look like fast_b is not normalized by the r vector.
409  //
410  // JLai
411  BigReal groLJe = 0.0;
412  BigReal groGausse = 0.0;
413  const CompAtomExt *pExt_z = pExt_1 + j;
414  BigReal groForce = mol->get_gro_force2(p_ij_x, p_ij_y, p_ij_z,pExt_i.id,pExt_z->id,&groLJe,&groGausse);
415  NAMD_die("Failsafe. This line should never be reached\n");
416 #ifndef A2_QPX
417  fast_b += groForce;
418 #else
419  vec_insert(fast_b + groForce, fastv, 2);
420 #endif
421  ENERGY(
422  NOT_ALCHPAIR (
423  // JLai
424  groLJEnergy += groLJe;
425  groGaussEnergy += groGausse;
426  )
427  ) //ENERGY
428  }
429 #endif
430  BigReal goNative = 0;
433  register const CompAtomExt *pExt_j = pExt_1 + j;
435  goForce = mol->get_go_force2(p_ij_x, p_ij_y, p_ij_z, pExt_i.id, pExt_j->id,&goNative,&goNonnative);
436  } else {
437  // Ported by JLai -- JE - added (
438  const BigReal r2go = square(p_ij_x, p_ij_y, p_ij_z);
439  const BigReal rgo = sqrt(r2go);
440 
442  goForce = mol->get_go_force(rgo, pExt_i.id, pExt_j->id, &goNative, &goNonnative);
443  } else if (ComputeNonbondedUtil::goMethod == 3) {
444  goForce = mol->get_go_force_new(rgo, pExt_i.id, pExt_j->id, &goNative, &goNonnative);
445  } else {
446  NAMD_die("I SHOULDN'T BE HERE. DYING MELODRAMATICALLY.\n");
447  }
448  }
449 
450 #ifndef A2_QPX
451  fast_b += goForce;
452 #else
453  vec_insert(fast_b + goForce, fastv, 2);
454 #endif
455  {
456  ENERGY(
457  NOT_ALCHPAIR (
458  // JLai
459  goEnergyNative += goNative;
460  goEnergyNonnative += goNonnative;
461  )
462  ) //ENERGY
463  INT(
464  reduction[pairVDWForceIndex_X] += force_sign * goForce * p_ij_x;
465  reduction[pairVDWForceIndex_Y] += force_sign * goForce * p_ij_y;
466  reduction[pairVDWForceIndex_Z] += force_sign * goForce * p_ij_z;
467  )
468  }
469  // End of INT
470 
471  //DebugM(3,"rgo:" << rgo << ", pExt_i.id:" << pExt_i.id << ", pExt_j->id:" << pExt_j->id << \
472  // ", goForce:" << goForce << ", fast_b:" << fast_b << std::endl);
473 #endif // ) // End of GO macro
474  /***** JE - End Go *****/
475  // End of port JL
476 #endif //) // End of Normal MACRO
477 
478  // Combined short-range electrostatics and VdW force:
479 #if ( NOT_ALCHPAIR(1+) 0)
480 #ifndef A2_QPX
481  fast_d += vdw_d;
482  fast_c += vdw_c;
483  fast_b += vdw_b;
484  fast_a += vdw_a; // not used!
485 #else
486  fastv = vec_add(fastv, vdw_v);
487 #endif
488 #endif
489 
490  register BigReal fast_dir =
491  (diffa * fast_d + fast_c) * diffa + fast_b;
492 
493  BigReal force_r = LAM(lambda_pair *) fast_dir;
494  ALCHPAIR(
495  force_r *= myElecLambda;
496  force_r += alch_vdw_force;
497  // special ALCH forces already multiplied by relevant lambda
498  )
499 
500 #if !(defined(NAMD_CUDA) || defined(NAMD_HIP))
501 #ifndef A2_QPX
502  register BigReal tmp_x = force_r * p_ij_x;
503  f_i_x += tmp_x;
504  f_j->x -= tmp_x;
505 
506  register BigReal tmp_y = force_r * p_ij_y;
507  f_i_y += tmp_y;
508  f_j->y -= tmp_y;
509 
510  register BigReal tmp_z = force_r * p_ij_z;
511  f_i_z += tmp_z;
512  f_j->z -= tmp_z;
513 #else
514  vector4double force_rv = vec_splats (force_r);
515  vector4double tmp_v = vec_mul(force_rv, p_ij_v);
516  f_i_v = vec_add(f_i_v, tmp_v);
517 
518 #define tmp_x vec_extract(tmp_v, 0)
519 #define tmp_y vec_extract(tmp_v, 1)
520 #define tmp_z vec_extract(tmp_v, 2)
521 
522  f_j->x -= tmp_x;
523  f_j->y -= tmp_y;
524  f_j->z -= tmp_z;
525 #endif
526 
527  PPROF(
528  const BigReal p_j_z = p_j->position.z;
529  int n2 = (int)floor((p_j_z-pressureProfileMin)*invThickness);
530  pp_clamp(n2, pressureProfileSlabs);
531  int p_j_partition = p_j->partition;
532 
533  pp_reduction(pressureProfileSlabs, n1, n2,
534  p_i_partition, p_j_partition, pressureProfileAtomTypes,
535  tmp_x*p_ij_x, tmp_y * p_ij_y, tmp_z*p_ij_z,
536  pressureProfileReduction);
537  )
538 #endif
539 
540 #endif // SHORT
541 #endif // FAST
542 
543 #if ( FULL (EXCLUDED( SHORT ( 1+ ) ) ) 0 )
544  //const BigReal* const slow_i = slow_table + 4*table_i;
545 #define slow_i (slow_table + 4*table_i)
546 
547 #ifdef ARCH_POWERPC //Alignment and aliasing constraints
548  __alignx (32, slow_table);
549 #if ( SHORT( FAST( 1+ ) ) 0 )
550 #pragma disjoint (*slow_table, *f_1)
551 #endif
552 #pragma disjoint (*slow_table, *fullf_1)
553 #endif //ARCH_POWERPC
554 
555 #endif //FULL
556 
557 
558 #if ( FULL (MODIFIED( SHORT ( 1+ ) ) ) 0 )
559  //const BigReal* const slow_i = slow_table + 4*table_i;
560 #define slow_i (slow_table + 4*table_i)
561 
562 #ifdef ARCH_POWERPC //Alignment and aliasing constraints
563  __alignx (32, slow_table);
564 #if ( SHORT( FAST( 1+ ) ) 0 )
565 #pragma disjoint (*slow_table, *f_1)
566 #endif
567 #pragma disjoint (*slow_table, *fullf_1)
568 #endif //ARCH_POWERPC
569 
570 #endif //FULL
571 
572 #if ( FULL( 1+ ) 0 )
573 #ifndef A2_QPX
574  BigReal slow_d = table_four_i[8 SHORT(+ 4)];
575  BigReal slow_c = table_four_i[9 SHORT(+ 4)];
576  BigReal slow_b = table_four_i[10 SHORT(+ 4)];
577  BigReal slow_a = table_four_i[11 SHORT(+ 4)];
578  EXCLUDED(
579  SHORT(
580  slow_a += slow_i[3];
581  slow_b += 2.*slow_i[2];
582  slow_c += 4.*slow_i[1];
583  slow_d += 6.*slow_i[0];
584  )
585  NOSHORT(
586  slow_d -= table_four_i[12];
587  slow_c -= table_four_i[13];
588  slow_b -= table_four_i[14];
589  slow_a -= table_four_i[15];
590  )
591  )
592  MODIFIED(
593  SHORT(
594  slow_a += modf_mod * slow_i[3];
595  slow_b += 2.*modf_mod * slow_i[2];
596  slow_c += 4.*modf_mod * slow_i[1];
597  slow_d += 6.*modf_mod * slow_i[0];
598  )
599  NOSHORT(
600  slow_d -= modf_mod * table_four_i[12];
601  slow_c -= modf_mod * table_four_i[13];
602  slow_b -= modf_mod * table_four_i[14];
603  slow_a -= modf_mod * table_four_i[15];
604  )
605  )
606  slow_d *= kqq;
607  slow_c *= kqq;
608  slow_b *= kqq;
609  slow_a *= kqq;
610 #else
611  vector4double slow_v = vec_ld((8 SHORT(+ 4)) * sizeof(BigReal), (BigReal*)table_four_i);
612  EXCLUDED(
613  SHORT(
614  slow_v = vec_madd(full_cnst, vec_ld(0, (BigReal*)slow_i), slow_v);
615  )
616  NOSHORT(
617  slow_v = vec_sub(slow_v, vec_ld(12*sizeof(BigReal), (BigReal*)table_four_i));
618  )
619  );
620  MODIFIED(
621  SHORT(
622  slow_v = vec_madd(full_cnst, vec_ld(0, (BigReal*)slow_i), slow_v);
623  )
624  NOSHORT(
625  slow_v = vec_nmsub(full_cnst, vec_ld(12*sizeof(BigReal), (BigReal*)table_four_i), slow_v);
626  )
627  );
628  slow_v = vec_mul (slow_v, vec_splats(kqq));
629 
630 #define slow_d vec_extract(slow_v, 0)
631 #define slow_c vec_extract(slow_v, 1)
632 #define slow_b vec_extract(slow_v, 2)
633 #define slow_a vec_extract(slow_v, 3)
634 
635 #endif
636 
637  ENERGY(
638  register BigReal slow_val =
639  ( ( diffa * slow_d *(1/6.)+ slow_c * (1/4.)) * diffa + slow_b *(1/2.)) * diffa + slow_a;
640 
641  NOT_ALCHPAIR (
642  fullElectEnergy -= LAM(lambda_pair *) slow_val;
643  FEP(fullElectEnergy_s -= slow_val;)
644  )
645  ) // ENERGY
646 
647  ALCHPAIR(
648  ENERGY(fullElectEnergy -= myElecLambda * slow_val;)
649  FEP(fullElectEnergy_s -= myElecLambda2 * slow_val;)
650  TI(
651  NOENERGY(register BigReal slow_val =
652  ( ( diffa * slow_d *(1/6.)+ slow_c * (1/4.)) * diffa + slow_b *(1/2.)) * diffa + slow_a;)
653  ALCH1(fullElectEnergy_ti_1 -= slow_val;) ALCH2(fullElectEnergy_ti_2 -= slow_val;)
654  )
655  )
656 
657  INT( {
658  register BigReal slow_dir =
659  ( diffa * slow_d + slow_c ) * diffa + slow_b;
660  reduction[pairElectForceIndex_X] += force_sign * slow_dir * p_ij_x;
661  reduction[pairElectForceIndex_Y] += force_sign * slow_dir * p_ij_y;
662  reduction[pairElectForceIndex_Z] += force_sign * slow_dir * p_ij_z;
663  } )
664 
665 
666 #if (NOT_ALCHPAIR (1+) 0)
667 #if (FAST(1+) 0)
668 #if (NOSHORT(1+) 0)
669 #ifndef A2_QPX
670  slow_d += vdw_d;
671  slow_c += vdw_c;
672  slow_b += vdw_b;
673  slow_a += vdw_a; // unused!
674 #else
675  slow_v = vec_add (slow_v, vdw_v);
676 #endif
677 #endif
678 #endif
679 #endif
680 
681  register BigReal slow_dir = (diffa * slow_d + slow_c) * diffa + slow_b;
682  BigReal fullforce_r = slow_dir LAM(* lambda_pair);
683  ALCHPAIR (
684  fullforce_r *= myElecLambda;
685  FAST( NOSHORT(
686  fullforce_r += alch_vdw_force;
687  ))
688  )
689 
690 #if !(defined(NAMD_CUDA) || defined(NAMD_HIP))
691  {
692 #ifndef A2_QPX
693  register BigReal ftmp_x = fullforce_r * p_ij_x;
694  fullf_i_x += ftmp_x;
695  fullf_j->x -= ftmp_x;
696  register BigReal ftmp_y = fullforce_r * p_ij_y;
697  fullf_i_y += ftmp_y;
698  fullf_j->y -= ftmp_y;
699  register BigReal ftmp_z = fullforce_r * p_ij_z;
700  fullf_i_z += ftmp_z;
701  fullf_j->z -= ftmp_z;
702 #else
703  vector4double fforce_rv = vec_splats (fullforce_r);
704  vector4double ftmp_v = vec_mul(fforce_rv, p_ij_v);
705  fullf_i_v = vec_add(fullf_i_v, ftmp_v);
706 
707 #define ftmp_x vec_extract(ftmp_v, 0)
708 #define ftmp_y vec_extract(ftmp_v, 1)
709 #define ftmp_z vec_extract(ftmp_v, 2)
710 
711  fullf_j->x -= ftmp_x;
712  fullf_j->y -= ftmp_y;
713  fullf_j->z -= ftmp_z;
714 
715 #endif
716 
717  PPROF(
718  const BigReal p_j_z = p_j->position.z;
719  int n2 = (int)floor((p_j_z-pressureProfileMin)*invThickness);
720  pp_clamp(n2, pressureProfileSlabs);
721  int p_j_partition = p_j->partition;
722 
723  pp_reduction(pressureProfileSlabs, n1, n2,
724  p_i_partition, p_j_partition, pressureProfileAtomTypes,
725  ftmp_x*p_ij_x, ftmp_y * p_ij_y, ftmp_z*p_ij_z,
726  pressureProfileReduction);
727 
728  )
729  }
730 #endif
731 #endif //FULL
732 
733  } // for pairlist
734 
735 #undef p_j
736 #undef lj_pars
737 #undef table_four_i
738 #undef slow_i
739 #undef f_j
740 #undef fullf_j
741 
#define NORMAL(X)
#define namdnearbyint(x)
Definition: common.h:76
register BigReal fast_dir
BigReal force_r
#define LAM(X)
#define PPROF(X)
void pp_reduction(int nslabs, int n1, int n2, int atype1, int atype2, int numtypes, BigReal vxx, BigReal vyy, BigReal vzz, BigReal *reduction)
const BigReal A
register BigReal electEnergy
#define CODE_REDUNDANT
register const CompAtomExt * pExt_j
const BigReal rgo
#define NOSHORT(X)
if(ComputeNonbondedUtil::goMethod==2)
BigReal vdw_b
#define lj_pars
#define ALCHPAIR(X)
#define f_j
#define TI(X)
BigReal vdw_c
register const BigReal p_ij_z
#define INT(X)
void pp_clamp(int &n, int nslabs)
void fep_vdw_forceandenergies(BigReal A, BigReal B, BigReal r2, BigReal myVdwShift, BigReal myVdwShift2, BigReal switchdist2, BigReal cutoff2, BigReal switchfactor, Bool vdwForceSwitching, BigReal myVdwLambda, BigReal myVdwLambda2, Bool alchWCAOn, BigReal myRepLambda, BigReal myRepLambda2, BigReal *alch_vdw_energy, BigReal *alch_vdw_force, BigReal *alch_vdw_energy_2)
#define SHORT(X)
#define NOT_ALCHPAIR(X)
#define p_j
void NAMD_die(const char *err_msg)
Definition: common.C:85
BigReal goNonnative
#define table_four_i
#define FAST(X)
void ti_vdw_force_energy_dUdl(BigReal A, BigReal B, BigReal r2, BigReal myVdwShift, BigReal switchdist2, BigReal cutoff2, BigReal switchfactor, Bool vdwForceSwitching, BigReal myVdwLambda, BigReal alchVdwShiftCoeff, Bool alchWCAOn, BigReal myRepLambda, BigReal *alch_vdw_energy, BigReal *alch_vdw_force, BigReal *alch_vdw_dUdl)
#define FEP(X)
#define MODIFIED(X)
register const BigReal p_ij_x
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cutoff2
#define simParams
Definition: Output.C:127
BigReal vdw_d
BigReal goForce
#define ENERGY(X)
const BigReal B
#define FULL(X)
BigReal vdw_a
#define LES(X)
#define NOENERGY(X)
register const BigReal p_ij_y
#define GO(X)
double BigReal
Definition: common.h:114
#define EXCLUDED(X)
#define TABENERGY(X)