15 myLambda = ALCH1(lambdaUp) ALCH2(lambdaDown) ALCH3(lambdaUp) ALCH4(lambdaDown);
16 FEP(myLambda2 = ALCH1(lambda2Up) ALCH2(lambda2Down) ALCH3(lambda2Up) ALCH4(lambda2Down);)
17 myElecLambda = ALCH1(elecLambdaUp) ALCH2(elecLambdaDown) ALCH3(elecLambdaUp) ALCH4(elecLambdaDown);
18 FEP(myElecLambda2 = ALCH1(elecLambda2Up) ALCH2(elecLambda2Down) ALCH3(elecLambda2Up) ALCH4(elecLambda2Down);)
19 myVdwLambda = ALCH1(vdwLambdaUp) ALCH2(vdwLambdaDown) ALCH3(vdwLambdaUp) ALCH4(vdwLambdaDown);
20 FEP(myVdwLambda2 = ALCH1(vdwLambda2Up) ALCH2(vdwLambda2Down) ALCH3(vdwLambda2Up) ALCH4(vdwLambda2Down);)
21 ALCH1(myRepLambda = repLambdaUp) ALCH2(myRepLambda = repLambdaDown);
22 FEP(ALCH1(myRepLambda2 = repLambda2Up) ALCH2(myRepLambda2 = repLambda2Down);)
23 ALCH1(myVdwShift = vdwShiftUp) ALCH2(myVdwShift = vdwShiftDown);
24 FEP(ALCH1(myVdwShift2 = vdwShift2Up) ALCH2(myVdwShift2 = vdwShift2Down);)
29 NORMAL(kq_iv = vec_splats(kq_i); )
30 MODIFIED(kq_iv = vec_splats((1.0-modf_mod) *kq_i); )
36 full_cnst = (vector4double)(6., 4., 2., 1.);
39 full_cnst = (vector4double)(1., 1., 1., 1.);
44 full_cnst = (vector4double)(6., 4., 2., 1.);
45 full_cnst = vec_mul (full_cnst, vec_splats(modf_mod));
48 full_cnst = vec_splats(modf_mod);
55 __alignx(64, table_four);
58 #pragma ibm independent_loop
67 #if ( FULL( EXCLUDED( SHORT( 1+ ) ) ) 0 )
74 #pragma omp simd SHORT(FAST(reduction(+:f_i_x,f_i_y,f_i_z)) ENERGY(FAST(reduction(+:vdwEnergy) SHORT(reduction(+:electEnergy))))) \
75 FULL(reduction(+:fullf_i_x,fullf_i_y,fullf_i_z) ENERGY(reduction(+:fullElectEnergy)))
78 #pragma loop_count avg=100
80 #pragma loop_count avg=4
83 for (k=0; k<npairi; ++k) {
85 const int numtypes =
simParams->tableNumTypes;
86 const float table_spacing =
simParams->tableSpacing;
90 int table_i = (r2iilist[2*k] >> 14) + r2_delta_expc;
91 const
int j = pairlisti[k];
95 register double *p_j_d = (
double *)
p_j;
99 BigReal diffa = r2list[k] - r2_table[table_i];
101 #define table_four_i (table_four + 16*table_i)
103 #if ( FAST( 1 + ) TABENERGY( 1 + ) 0 ) // FAST or TABENERGY
106 const int lj_index = 2 *
p_j->vdwType
MODIFIED(+ 1);
107 #define lj_pars (lj_row+lj_index)
109 double *lj_pars_d = (
double *)
lj_pars;
114 register const int tabtype = -1 - (
lj_pars->A < 0 ?
lj_pars->A : 0 );
117 #if ( SHORT( FAST( 1+ ) ) 0 )
124 #define fullf_j (fullf_1+j)
130 #pragma disjoint (*table_four, *fullf_1)
131 #pragma disjoint (*p_1, *fullf_1)
133 #pragma disjoint (*p_j_d, *fullf_1)
135 #pragma disjoint (*r2_table, *fullf_1)
136 #pragma disjoint (*r2list, *fullf_1)
137 #if ( SHORT( FAST( 1+ ) ) 0 )
138 #pragma disjoint (*f_1 , *fullf_1)
139 #pragma disjoint (*fullf_1, *f_1)
140 #endif //Short + fast
143 #if ( SHORT( FAST( 1+ ) ) 0 )
144 #pragma disjoint (*table_four, *f_1)
145 #pragma disjoint (*p_1, *f_1)
146 #pragma disjoint (*r2_table, *f_1)
147 #pragma disjoint (*r2list, *f_1)
148 #pragma disjoint (*lj_row, *f_1)
150 #pragma disjoint (*p_j_d, *f_1)
152 #endif //Short + Fast
159 #endif //ARCH_POWERPC
183 float * cg = (
float *)&
p_j->charge;
185 #pragma disjoint (*cg, *fullf_1)
188 #if ( SHORT( FAST( 1+ ) ) 0 )
189 #pragma disjoint (*cg, *f_1)
190 #endif //Short + fast
200 vector4double charge_v = vec_lds(0, cg);
201 vector4double kqqv = vec_mul(kq_iv, charge_v );
202 vector4double p_ij_v = vec_sub(p_i_v, vec_ld (0, p_j_d));
203 #define p_ij_x vec_extract(p_i_v, 0)
204 #define p_ij_y vec_extract(p_i_v, 1)
205 #define p_ij_z vec_extract(p_i_v, 2)
217 const vector4double Av = vec_mul(scalingv, vec_lds(0, lj_pars_d));
218 const vector4double Bv = vec_mul(scalingv, vec_lds(8, lj_pars_d));
219 vector4double vdw_v = vec_msub( Av, vec_ld(0, (
BigReal*)table_four_i), vec_mul(Bv, vec_ld(4*
sizeof(
BigReal), (
BigReal*)table_four_i)) );
220 #define vdw_d vec_extract(vdw_v, 0)
221 #define vdw_c vec_extract(vdw_v, 1)
222 #define vdw_b vec_extract(vdw_v, 2)
223 #define vdw_a vec_extract(vdw_v, 3)
239 const BigReal r2 = r2list[k] - r2_delta;
247 switchdist2,
cutoff2, switchfactor, vdwForceSwitching, myVdwLambda,
248 myVdwLambda2, alchWCAOn, myRepLambda, myRepLambda2, &alch_vdw_energy,
249 &alch_vdw_force, &alch_vdw_energy_2);)
252 switchdist2,
cutoff2, switchfactor, vdwForceSwitching, myVdwLambda,
253 myVdwLambda2, alchWCAOn, myRepLambda, myRepLambda2, &alch_vdw_energy,
254 &alch_vdw_force, &alch_vdw_energy_2);)
256 ENERGY(alch_vdw_energy = -myVdwLambda * (( ( diffa * vdw_d * (1/6.)+ vdw_c * (1/4.)) * diffa + vdw_b *(1/2.)) * diffa + vdw_a);)
257 alch_vdw_energy_2 = -myVdwLambda2 * (( ( diffa * vdw_d * (1/6.)+ vdw_c * (1/4.)) * diffa + vdw_b *(1/2.)) * diffa + vdw_a);
258 alch_vdw_force = myVdwLambda * ((diffa * vdw_d +
vdw_c) * diffa + vdw_b);)
260 ENERGY(alch_vdw_energy = -myVdwLambda * (( ( diffa * vdw_d * (1/6.)+ vdw_c * (1/4.)) * diffa + vdw_b *(1/2.)) * diffa + vdw_a);)
261 alch_vdw_energy_2 = -myVdwLambda2 * (( ( diffa * vdw_d * (1/6.)+ vdw_c * (1/4.)) * diffa + vdw_b *(1/2.)) * diffa + vdw_a);
262 alch_vdw_force = myVdwLambda * ((diffa * vdw_d +
vdw_c) * diffa + vdw_b);)
265 switchfactor, vdwForceSwitching, myVdwLambda, alchVdwShiftCoeff,
266 alchWCAOn, myRepLambda, &alch_vdw_energy, &alch_vdw_force,
272 #if (NOT_ALCHPAIR(1+) 0)
273 #if (TABENERGY(1+) 0)
276 r1 = sqrt(p_ij_x*p_ij_x + p_ij_y*p_ij_y + p_ij_z*p_ij_z);
279 register int eneraddress;
280 eneraddress = 2 * ((npertype * tabtype) + ((
int)
namdnearbyint(r1 / table_spacing)));
285 vdw_b = table_ener[eneraddress + 1] / r1;
286 vdw_a = (-1/2.) * diffa *
vdw_b;
288 vec_insert(0., vdw_v, 0);
289 vec_insert(0., vdw_v, 1);
290 vec_insert(table_ener[eneraddress + 1] / r1, vdw_v, 2);
291 vec_insert((-1/2.) * diffa * vdw_b, vdw_v, 3);
297 FEP( vdwEnergy_s += d_lambda_pair *
vdw_val; )
304 ( ( diffa * vdw_d * (1/6.)+ vdw_c * (1/4.)) * diffa + vdw_b *(1/2.)) * diffa + vdw_a;
306 vdwEnergy -=
LAM(lambda_pair *) vdw_val;
308 FEP(vdwEnergy_s -= vdw_val;)
311 #if (TABENERGY (1+) 0)
318 ENERGY(vdwEnergy += alch_vdw_energy;)
319 FEP(vdwEnergy_s += alch_vdw_energy_2;)
320 TI(ALCH1(vdwEnergy_ti_1 += alch_vdw_dUdl;) ALCH2(vdwEnergy_ti_2 += alch_vdw_dUdl;))
328 vdw_dir = ( diffa * vdw_d +
vdw_c ) * diffa + vdw_b;
330 reduction[pairVDWForceIndex_X] += force_sign * vdw_dir *
p_ij_x;
331 reduction[pairVDWForceIndex_Y] += force_sign * vdw_dir *
p_ij_y;
332 reduction[pairVDWForceIndex_Z] += force_sign * vdw_dir *
p_ij_z;
339 BigReal fast_d = kqq * table_four_i[8];
345 BigReal modfckqq = (1.0-modf_mod) * kqq;
346 BigReal fast_d = modfckqq * table_four_i[8];
347 BigReal fast_c = modfckqq * table_four_i[9];
348 BigReal fast_b = modfckqq * table_four_i[10];
349 BigReal fast_a = modfckqq * table_four_i[11];
352 vector4double fastv = vec_mul(kqqv, vec_ld(8 *
sizeof(
BigReal), (
BigReal*)table_four_i));
353 #define fast_d vec_extract(fastv, 0)
354 #define fast_c vec_extract(fastv, 1)
355 #define fast_b vec_extract(fastv, 2)
356 #define fast_a vec_extract(fastv, 3)
362 ( ( diffa * fast_d * (1/6.)+ fast_c * (1/4.)) * diffa + fast_b *(1/2.)) * diffa + fast_a;
365 FEP(electEnergy_s -= fast_val;)
370 FEP(electEnergy_s -= myElecLambda2 * fast_val;)
373 ( ( diffa * fast_d * (1/6.)+ fast_c * (1/4.)) * diffa + fast_b *(1/2.)) * diffa + fast_a;)
374 ALCH1(electEnergy_ti_1 -= fast_val;) ALCH2(electEnergy_ti_2 -= fast_val;)
380 ( diffa * fast_d + fast_c ) * diffa +
fast_b;
382 reduction[pairElectForceIndex_X] += force_sign *
fast_dir *
p_ij_x;
383 reduction[pairElectForceIndex_Y] += force_sign *
fast_dir *
p_ij_y;
384 reduction[pairElectForceIndex_Z] += force_sign *
fast_dir *
p_ij_z;
414 BigReal groForce = mol->get_gro_force2(p_ij_x, p_ij_y, p_ij_z,pExt_i.id,pExt_z->
id,&groLJe,&groGausse);
415 NAMD_die(
"Failsafe. This line should never be reached\n");
419 vec_insert(fast_b + groForce, fastv, 2);
424 groLJEnergy += groLJe;
425 groGaussEnergy += groGausse;
435 goForce = mol->get_go_force2(p_ij_x, p_ij_y, p_ij_z, pExt_i.id, pExt_j->
id,&goNative,&goNonnative);
438 const BigReal r2go = square(p_ij_x, p_ij_y, p_ij_z);
442 goForce = mol->get_go_force(rgo, pExt_i.id, pExt_j->
id, &goNative, &goNonnative);
444 goForce = mol->get_go_force_new(rgo, pExt_i.id, pExt_j->
id, &goNative, &goNonnative);
446 NAMD_die(
"I SHOULDN'T BE HERE. DYING MELODRAMATICALLY.\n");
453 vec_insert(fast_b + goForce, fastv, 2);
459 goEnergyNative += goNative;
464 reduction[pairVDWForceIndex_X] += force_sign * goForce * p_ij_x;
465 reduction[pairVDWForceIndex_Y] += force_sign * goForce * p_ij_y;
466 reduction[pairVDWForceIndex_Z] += force_sign * goForce * p_ij_z;
473 #endif // ) // End of GO macro
476 #endif //) // End of Normal MACRO
479 #if ( NOT_ALCHPAIR(1+) 0)
486 fastv = vec_add(fastv, vdw_v);
491 (diffa * fast_d +
fast_c) * diffa + fast_b;
495 force_r *= myElecLambda;
496 force_r += alch_vdw_force;
500 #if !(defined(NAMD_CUDA) || defined(NAMD_HIP))
514 vector4double force_rv = vec_splats (force_r);
515 vector4double tmp_v = vec_mul(force_rv, p_ij_v);
516 f_i_v = vec_add(f_i_v, tmp_v);
518 #define tmp_x vec_extract(tmp_v, 0)
519 #define tmp_y vec_extract(tmp_v, 1)
520 #define tmp_z vec_extract(tmp_v, 2)
529 int n2 = (
int)floor((p_j_z-pressureProfileMin)*invThickness);
531 int p_j_partition =
p_j->partition;
534 p_i_partition, p_j_partition, pressureProfileAtomTypes,
535 tmp_x*p_ij_x, tmp_y * p_ij_y, tmp_z*p_ij_z,
536 pressureProfileReduction);
545 #define slow_i (slow_table + 4*table_i)
547 #ifdef ARCH_POWERPC //Alignment and aliasing constraints
548 __alignx (32, slow_table);
549 #if ( SHORT( FAST( 1+ ) ) 0 )
550 #pragma disjoint (*slow_table, *f_1)
552 #pragma disjoint (*slow_table, *fullf_1)
553 #endif //ARCH_POWERPC
558 #if ( FULL (MODIFIED( SHORT ( 1+ ) ) ) 0 )
560 #define slow_i (slow_table + 4*table_i)
562 #ifdef ARCH_POWERPC //Alignment and aliasing constraints
563 __alignx (32, slow_table);
564 #if ( SHORT( FAST( 1+ ) ) 0 )
565 #pragma disjoint (*slow_table, *f_1)
567 #pragma disjoint (*slow_table, *fullf_1)
568 #endif //ARCH_POWERPC
581 slow_b += 2.*slow_i[2];
582 slow_c += 4.*slow_i[1];
583 slow_d += 6.*slow_i[0];
586 slow_d -= table_four_i[12];
587 slow_c -= table_four_i[13];
588 slow_b -= table_four_i[14];
589 slow_a -= table_four_i[15];
594 slow_a += modf_mod * slow_i[3];
595 slow_b += 2.*modf_mod * slow_i[2];
596 slow_c += 4.*modf_mod * slow_i[1];
597 slow_d += 6.*modf_mod * slow_i[0];
600 slow_d -= modf_mod * table_four_i[12];
601 slow_c -= modf_mod * table_four_i[13];
602 slow_b -= modf_mod * table_four_i[14];
603 slow_a -= modf_mod * table_four_i[15];
614 slow_v = vec_madd(full_cnst, vec_ld(0, (
BigReal*)slow_i), slow_v);
617 slow_v = vec_sub(slow_v, vec_ld(12*
sizeof(
BigReal), (
BigReal*)table_four_i));
622 slow_v = vec_madd(full_cnst, vec_ld(0, (
BigReal*)slow_i), slow_v);
625 slow_v = vec_nmsub(full_cnst, vec_ld(12*
sizeof(
BigReal), (
BigReal*)table_four_i), slow_v);
628 slow_v = vec_mul (slow_v, vec_splats(kqq));
630 #define slow_d vec_extract(slow_v, 0)
631 #define slow_c vec_extract(slow_v, 1)
632 #define slow_b vec_extract(slow_v, 2)
633 #define slow_a vec_extract(slow_v, 3)
639 ( ( diffa * slow_d *(1/6.)+ slow_c * (1/4.)) * diffa + slow_b *(1/2.)) * diffa + slow_a;
642 fullElectEnergy -=
LAM(lambda_pair *) slow_val;
643 FEP(fullElectEnergy_s -= slow_val;)
648 ENERGY(fullElectEnergy -= myElecLambda * slow_val;)
649 FEP(fullElectEnergy_s -= myElecLambda2 * slow_val;)
652 ( ( diffa * slow_d *(1/6.)+ slow_c * (1/4.)) * diffa + slow_b *(1/2.)) * diffa + slow_a;)
653 ALCH1(fullElectEnergy_ti_1 -= slow_val;) ALCH2(fullElectEnergy_ti_2 -= slow_val;)
659 ( diffa * slow_d + slow_c ) * diffa + slow_b;
660 reduction[pairElectForceIndex_X] += force_sign * slow_dir *
p_ij_x;
661 reduction[pairElectForceIndex_Y] += force_sign * slow_dir *
p_ij_y;
662 reduction[pairElectForceIndex_Z] += force_sign * slow_dir *
p_ij_z;
675 slow_v = vec_add (slow_v, vdw_v);
681 register BigReal slow_dir = (diffa * slow_d + slow_c) * diffa + slow_b;
682 BigReal fullforce_r = slow_dir
LAM(* lambda_pair);
684 fullforce_r *= myElecLambda;
686 fullforce_r += alch_vdw_force;
690 #
if !(defined(NAMD_CUDA) || defined(NAMD_HIP))
695 fullf_j->x -= ftmp_x;
698 fullf_j->y -= ftmp_y;
701 fullf_j->z -= ftmp_z;
703 vector4double fforce_rv = vec_splats (fullforce_r);
704 vector4double ftmp_v = vec_mul(fforce_rv, p_ij_v);
705 fullf_i_v = vec_add(fullf_i_v, ftmp_v);
707 #define ftmp_x vec_extract(ftmp_v, 0)
708 #define ftmp_y vec_extract(ftmp_v, 1)
709 #define ftmp_z vec_extract(ftmp_v, 2)
711 fullf_j->x -= ftmp_x;
712 fullf_j->y -= ftmp_y;
713 fullf_j->z -= ftmp_z;
719 int n2 = (
int)floor((p_j_z-pressureProfileMin)*invThickness);
721 int p_j_partition =
p_j->partition;
724 p_i_partition, p_j_partition, pressureProfileAtomTypes,
725 ftmp_x*p_ij_x, ftmp_y * p_ij_y, ftmp_z*p_ij_z,
726 pressureProfileReduction);
register BigReal fast_dir
void pp_reduction(int nslabs, int n1, int n2, int atype1, int atype2, int numtypes, BigReal vxx, BigReal vyy, BigReal vzz, BigReal *reduction)
register BigReal electEnergy
register const CompAtomExt * pExt_j
if(ComputeNonbondedUtil::goMethod==2)
register const BigReal p_ij_z
void pp_clamp(int &n, int nslabs)
void fep_vdw_forceandenergies(BigReal A, BigReal B, BigReal r2, BigReal myVdwShift, BigReal myVdwShift2, BigReal switchdist2, BigReal cutoff2, BigReal switchfactor, Bool vdwForceSwitching, BigReal myVdwLambda, BigReal myVdwLambda2, Bool alchWCAOn, BigReal myRepLambda, BigReal myRepLambda2, BigReal *alch_vdw_energy, BigReal *alch_vdw_force, BigReal *alch_vdw_energy_2)
void NAMD_die(const char *err_msg)
void ti_vdw_force_energy_dUdl(BigReal A, BigReal B, BigReal r2, BigReal myVdwShift, BigReal switchdist2, BigReal cutoff2, BigReal switchfactor, Bool vdwForceSwitching, BigReal myVdwLambda, BigReal alchVdwShiftCoeff, Bool alchWCAOn, BigReal myRepLambda, BigReal *alch_vdw_energy, BigReal *alch_vdw_force, BigReal *alch_vdw_dUdl)
register const BigReal p_ij_x
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cutoff2
register const BigReal p_ij_y