NAMD
ComputeGBIS.C
Go to the documentation of this file.
1 
8 //#define BENCHMARK
9 #define CHECK_PRIORITIES 0
10 //#define PRINT_COMP
11 
12 //approximate flop equivalence of transcendental functs
13 #ifdef BENCHMARK
14 #define SQRT_FLOPS 15
15 #define LOG_FLOPS 20
16 #define EXP_FLOPS 20
17 #define DIV_FLOPS 1
18 #endif
19 
20 #include "ComputeNonbondedUtil.h"
21 #include "ComputeGBIS.inl"
22 #include "time.h"
23 
24 /*******************************************************************************
25  When GBIS is active, the method calcGBIS is called from a nonbonded
26  compute object ::doForce() three times each timestep with
27  gbisPhase = 1, 2, 3.
28 
29  At the beginning of the first phase, the pairlists are generated which are
30  used for all three phases; these pairlists are re-generated every timestep.
31  4 mutually exclusive pairlists are used, in an effort to accelerate the
32  iteration over pairs.
33  pairlist 0: atoms fall within interaction domain 2 (largest group)
34  pairlist 1: atoms fall within interaction domain 1 (next largest)
35  pairlist 2: atoms are within the phase 1,3 cutoff (all the rest)
36  pairlist 3: atoms are within the phase 2 cutoff (generally longer than previous cutoff)
37 
38  These calculations generate nonbonded forces (and associated energies) which
39  are calculated in addition to Coulomb and van der Waals forces.
40 *******************************************************************************/
41 
42 /*Searches all pair of atoms to create
43 largest pairlist lasting all cycle*/
44 inline void pairlistFromAll(
45  nonbonded *params,
46  GBISParamStruct *gbisParams,
47  int minIg,
48  int strideIg,
49  int maxI
50 ) {
51  const BigReal offset_x = params->offset.x;
52  const BigReal offset_y = params->offset.y;
53  const BigReal offset_z = params->offset.z;
54 
55  int unique = (gbisParams->numPatches == 1) ? 1 : 0;
56 
57  int maxPairs = params->numAtoms[1];
58  int seq = gbisParams->sequence;
59  int cid = gbisParams->cid;
60  int pe = CkMyPe();
61  int gbisPhase = gbisParams->gbisPhase;
62 
63  //determine long and short cutoff
64  float fsMax = FS_MAX; // gbisParams->fsMax;
65  float a_cut = gbisParams->a_cut - fsMax;
66  float a_cut_ps2 = (a_cut+fsMax)*(a_cut+fsMax);
67  float r_cut = gbisParams->cutoff;
68  float a_cut2 = a_cut*a_cut;
69  float r_cut2 = r_cut*r_cut;
70  int s = 0;// 0 is short cutoff list
71  int l = 1;// 1 is long-short cutoff list
72 #ifdef BENCHMARK
73  double t1 = 1.0*clock()/CLOCKS_PER_SEC;
74  int nops = 0;
75 #endif
76  Position ri, rj;
77  Position ngri, ngrj;
78  float r, dr, r2;
79  float rhoi0, rhois, rhoj0, rhojs, rhois2, rhojs2, rhois2_16;
80  float ami2, amj2, api2, apj2;
81  int numGBISPairlists = 4;
82 
83  float max_gcut2 = r_cut;
84  if (a_cut+fsMax > r_cut)
85  max_gcut2 = a_cut+fsMax;
86  max_gcut2 += 2.0*gbisParams->maxGroupRadius;
87  //CkPrintf("max_cut = %f, %f\n",max_gcut2,gbisParams->maxGroupRadius);
88  max_gcut2 *= max_gcut2;
89 
90  int maxGroupPairs = params->numAtoms[1];
91  short *groupPairs = new short[maxGroupPairs];/*delete*/
92 
93  //foreach nonbonded group i
94  for (int ngi = minIg; ngi < maxI; /*ngi updated at loop bottom*/ ) {
95  int numGroupPairs = 0;
96  ngri = params->p[0][ngi].position;
97  ngri.x += offset_x;
98  ngri.y += offset_y;
99  ngri.z += offset_z;
100 
101  //find close j-groups; include i-group
102  for (int ngj = unique*(ngi+params->p[0][ngi].nonbondedGroupSize); ngj < params->numAtoms[1]; ngj+=params->p[1][ngj].nonbondedGroupSize) {
103 
104  ngrj = params->p[1][ngj].position;
105  dr = ngri.x - ngrj.x;
106  r2 = dr*dr;
107  dr = ngri.y - ngrj.y;
108  r2 += dr*dr;
109  dr = ngri.z - ngrj.z;
110  r2 += dr*dr;
111  if (r2 < max_gcut2) {
112  groupPairs[numGroupPairs++] = ngj;
113  }
114  }
115 
116  //for all i in i-group
117  int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
118  for (int i=ngi; i<ngi+iGroupSize; i++) {
119  //CkPrintf("\tFORALL i=%05i\n",params->pExt[0][i].id);
120  //extend pairlists
121  int *size = new int[numGBISPairlists];
122  plint **pairs = new plint*[numGBISPairlists];
123  for (int k = 0; k < numGBISPairlists; k++) {
124  size[k] = 0;
125  pairs[k] = gbisParams->gbisStepPairlists[k]->
126  newlist(maxPairs);
127  }
128 
129  //load i values
130  ri = params->p[0][i].position;
131  ri.x += offset_x;
132  ri.y += offset_y;
133  ri.z += offset_z;
134  rhois = gbisParams->intRad[0][2*i+1];
135  api2 = a_cut + rhois;
136  api2 *= api2;
137  ami2 = a_cut - rhois;
138  ami2 *= ami2;
139  rhois2 = rhois*rhois;
140  rhois2_16 = 16.0*rhois2;
141 
142  //for all other i's in i-group
143  if (unique)
144  for (int j = i+1; j < ngi+iGroupSize; j++) {
145 
146 #ifdef BENCHMARK
147  nops ++;
148 #endif
149  rj = params->p[1][j].position;
150  dr = ri.x - rj.x;
151  r2 = dr*dr;
152  dr = ri.y - rj.y;
153  r2 += dr*dr;
154  dr = ri.z - rj.z;
155  r2 += dr*dr;
156 
157  rhojs = gbisParams->intRad[1][2*j+1];
158  rhojs2 = rhojs*rhojs;
159  //r = sqrt(r2);
160  amj2 = a_cut - rhojs;
161  amj2 *= amj2;
162 
163  apj2 = a_cut + rhojs;
164  apj2 *= apj2;
165  //CkPrintf("IPAIR %5i %5i %5i\n",0*gbisParams->cid,params->pExt[0][i].id,params->pExt[1][j].id);
166 
167  if ( r2 < ami2 && r2 < amj2 &&
168  r2 > rhois2_16 && r2 > 16.0*rhojs2 ) {
169  //belongs to 22
170  pairs[0][size[0]++] = j;
171  } else if ( r2 < api2 && r2 < apj2 &&
172  r2 > ami2 && r2 > amj2 ) {
173  //belongs to 11
174  pairs[1][size[1]++] = j;
175  } else if ( r2 < a_cut_ps2 ) {
176  //belongs to other a_cut
177  pairs[2][size[2]++] = j;
178  } else if ( r2 < r_cut2 ) {
179  //belongs to r_cut and (r_cut > a_cut)
180  pairs[3][size[3]++] = j;
181  }
182  }
183 
184  //foreach j-group
185  for (int g = 0; g < numGroupPairs; g++) {
186  int ngj = groupPairs[g];
187  int jGroupSize = params->p[1][ngj].nonbondedGroupSize;
188  //for each j in j-group
189  int maxJ = ngj+jGroupSize;
190  for (int j = ngj; j < maxJ; j++) {
191 
192  //CkPrintf("for j-atom%i\n",j);
193 #ifdef BENCHMARK
194  nops ++;
195 #endif
196  rj = params->p[1][j].position;
197  dr = ri.x - rj.x;
198  r2 = dr*dr;
199  dr = ri.y - rj.y;
200  r2 += dr*dr;
201  dr = ri.z - rj.z;
202  r2 += dr*dr;
203  //CkPrintf("PAIR %5i %5i %5i\n",0*gbisParams->cid,params->pExt[0][i].id,params->pExt[1][j].id);
204 
205  rhojs = gbisParams->intRad[1][2*j+1];
206  rhojs2 = rhojs*rhojs;
207  //r = sqrt(r2);
208  amj2 = a_cut - rhojs;
209  amj2 *= amj2;
210 
211  apj2 = a_cut + rhojs;
212  apj2 *= apj2;
213 
214  if ( r2 < ami2 && r2 < amj2 &&
215  r2 > rhois2_16 && r2 > 16.0*rhojs2 ) {
216  //belongs to 22
217  //CkPrintf("Pair22 %06i %06i\n",params->pExt[0][i].id,params->pExt[1][j].id);
218  pairs[0][size[0]++] = j;
219  } else if ( r2 < api2 && r2 < apj2 &&
220  r2 > ami2 && r2 > amj2 ) {
221  //CkPrintf("Pair11 %06i %06i\n",params->pExt[0][i].id,params->pExt[1][j].id);
222  //belongs to 11
223  pairs[1][size[1]++] = j;
224  } else if ( r2 < a_cut_ps2 ) {
225  //CkPrintf("PairAA %06i %06i\n",params->pExt[0][i].id,params->pExt[1][j].id);
226  //belongs to other a_cut
227  pairs[2][size[2]++] = j;
228  } else if ( r2 < r_cut2 ) {
229  //CkPrintf("PairRR %06i %06i\n",params->pExt[0][i].id,params->pExt[1][j].id);
230  //belongs to r_cut and (r_cut > a_cut)
231  pairs[3][size[3]++] = j;
232  }
233  }//end j inner loop
234  }//end j-group loop
235  for (int k = 0; k < numGBISPairlists; k++) {
236  gbisParams->gbisStepPairlists[k]->newsize(size[k]);
237  }
238  delete[] size;
239  delete[] pairs;
240  }//end i all atom loop
241 
242  //jump to next nbg for round-robin
243  //same iteration patter followed in all three gbisPhases below
244  for (int s = 0; s < strideIg; s++) {
245  ngi+=params->p[0][ngi].nonbondedGroupSize;
246  }
247  }//end i-group loop
248  delete[] groupPairs;
249  for (int k = 0; k < numGBISPairlists; k++)
250  gbisParams->gbisStepPairlists[k]->reset();
251 
252 #ifdef BENCHMARK
253  double t2 = 1.0*clock()/CLOCKS_PER_SEC;
254  CkPrintf("PHASELST: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
255 #endif
256 }//end pairlist method
257 
258 /*******************************************************************************
259  * Calculate GBIS 3 Phases
260 *******************************************************************************/
262 //CkPrintf("SEQ%03i, P%i, CID%05i(%02i,%02i) ENTER\n",gbisParams->sequence,gbisParams->gbisPhase,gbisParams->cid,gbisParams->patchID[0],gbisParams->patchID[1]);
263 #if CHECK_PRIORITIES
264 CkPrintf("PE%i, S%09i, P%i\n",CkMyPe(),gbisParams->sequence,gbisParams->gbisPhase);
265 #endif
266  if (params->numAtoms[0] == 0 || params->numAtoms[1] == 0) return;
267 
268  const BigReal offset_x = params->offset.x;
269  const BigReal offset_y = params->offset.y;
270  const BigReal offset_z = params->offset.z;
271 
272  int partSize = params->numAtoms[0] / params->numParts;
273  int minIg = 0;
274  for (int s = 0; s < params->minPart; s++) {
275  minIg+=params->p[0][minIg].nonbondedGroupSize;
276  }
277  int maxI = params->numAtoms[0];
278  int strideIg = params->numParts;
279 
280  int unique = (gbisParams->numPatches == 1) ? 1 : 0;//should inner loop be unique from ourter loop
281  int numGBISPairlists = 4;
282  float r_cut = gbisParams->cutoff;
283  float fsMax = FS_MAX; // gbisParams->fsMax;//FS_MAX;
284  float a_cut = gbisParams->a_cut-fsMax;
285  float a_cut2 = a_cut*a_cut;
286  float a_cut_ps = a_cut + fsMax;//max screen radius
287  float r_cut2 = r_cut*r_cut;
288  float a_cut_ps2 = a_cut_ps*a_cut_ps;
289  //put PL pointer back to beginning of lists
290  for (int k = 0; k < numGBISPairlists; k++)
291  gbisParams->gbisStepPairlists[k]->reset();
292 
293 
294 /***********************************************************
295 * GBIS Phase 1
296 ***********************************************************/
297 if (gbisParams->gbisPhase == 1) {
298 //CkPrintf("SEQ%03i, P%i, CID%05i(%02i,%02i):[%03i,%03i]\n",gbisParams->sequence,gbisParams->gbisPhase,gbisParams->cid,gbisParams->patchID[0],gbisParams->patchID[1],minI,maxI);
299 
300  pairlistFromAll(params,gbisParams,minIg,strideIg,maxI);
301 
302 #ifdef BENCHMARK
303  int nops = 0;
304  int domains[] = {0, 0, 0, 0, 0, 0, 0, 0};
305  int numDom = 7;
306  double t1 = 1.0*clock()/CLOCKS_PER_SEC;
307 #endif
308  register GBReal psiI;
309 
310  register float dr;
311  register float r2;
312  register float r, r_i, r2_i;
313  float rhoi0, rhois, rhojs, rhoj0;
314  Position ri, rj;
315  register int j;
316  int numPairs;
317  register float ta = TA;
318  register float tb = TB;
319  register float tc = TC;
320  register float td = TD;
321  register float te = TE;
322 
323  register float hij,hji;
324  int dij,dji;
325  float k;
326  float rhois2, rhojs2;
327 
328  //calculate piecewise-22 Pairs
329  int c = 0;
330  for (int ngi = minIg; ngi < maxI; ) {
331  int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
332  for (int i = ngi; i < ngi+iGroupSize; i++) {
333  ri = params->p[0][i].position;
334  ri.x += offset_x;
335  ri.y += offset_y;
336  ri.z += offset_z;
337  rhoi0 = gbisParams->intRad[0][2*i+0];
338  rhois = gbisParams->intRad[0][2*i+1];
339  rhois2 = rhois*rhois;
340  psiI = ZERO;
341  plint *pairs;
342  gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
343  for (register int jj = 0; jj < numPairs; jj++) {
344 #ifdef BENCHMARK
345  nops++;
346 #endif
347  j = pairs[jj];
348  rj = params->p[1][j].position;
349 
350  dr = (ri.x - rj.x);
351  r2 = dr*dr;
352  dr = (ri.y - rj.y);
353  r2 += dr*dr;
354  dr = (ri.z - rj.z);
355  r2 += dr*dr;
356  r2_i = 1.0/r2;
357 
358  rhoj0 = gbisParams->intRad[1][2*j+0];
359  rhojs = gbisParams->intRad[1][2*j+1];
360  rhojs2 = rhojs*rhojs;
361 
362  k = rhojs2*r2_i;//k=(rs/r)^2
363  hij = rhojs*r2_i*k*(ta+k*(tb+k*(tc+k*(td+k*te))));
364 
365  k = rhois2*r2_i;//k=(rs/r)^2
366  hji = rhois*r2_i*k*(ta+k*(tb+k*(tc+k*(td+k*te))));
367 
368 //#if 1
369 #ifdef PRINT_COMP
370  int id1 = params->pExt[0][i].id;
371  int id2 = params->pExt[1][j].id;
372  float h1 = hij;
373  float h2 = hji;
374  float r10 = rhoi0;
375  float r1s = rhojs;
376  float r20 = rhoj0;
377  float r2s = rhois;
378 /*printf("PSIPAIR %05i %05i%9.5f%6.3f%6.3f%2i% 13.5e\n",
379 id1,id2,sqrt(r2),
380 rhoi0, rhojs,
381 2,hij);
382 printf("PSIPAIR %05i %05i%9.5f%6.3f%6.3f%2i% 13.5e\n",
383 id2,id1,sqrt(r2),
384 rhoj0, rhois,
385 2,hji);
386 */
387  if (id1 > id2) {
388  int tmp = id1;
389  id1 = id2;
390  id2 = tmp;
391  h1 = hji;
392  h2 = hij;
393  r20 = rhoi0;
394  r2s = rhojs;
395  r10 = rhoj0;
396  r1s = rhois;
397  }
398 // CkPrintf("PSIPAIR%5i%5i%10.5f%5i%14.8f%7.4f%7.4f\n",id1,id2,sqrt(r2),2,h1,r10,r1s);
399 // CkPrintf("PSIPAIR%5i%5i%10.5f%5i%14.8f%7.4f%7.4f\n",id2,id1,sqrt(r2),2,h2,r20,r2s);
400  //CkPrintf("PPSI(%04i)[%04i,%04i] = 22\n",gbisParams->cid,id1,id2);
401 #endif
402 
403  psiI += hij;
404  gbisParams->psiSum[1][j] += hji;
405  }//end inner j
406  gbisParams->psiSum[0][i] += psiI;
407  }//end outer i
408  for (int s = 0; s < strideIg; s++) {
409  ngi+=params->p[0][ngi].nonbondedGroupSize;
410  }
411  }
412 #ifdef BENCHMARK
413  double t2 = 1.0*clock()/CLOCKS_PER_SEC;
414  //nops *= (9 + 2*DIV_FLOPS+SQRT_FLOPS) + (14 + 0*DIV_FLOPS + 0*LOG_FLOPS);
415  //double flops = 1.0 * nops / (t2 - t1);
416  //double gflops = flops / 1000000000.0;
417  CkPrintf("PHASE1.1: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
418 nops = 0;
419  t1 = 1.0*clock()/CLOCKS_PER_SEC;
420  nops = 0;
421 #endif
422 
423  float rmris, rmrjs;
424  float rmrsi;
425  float rmrs2;
426  float rs2;
427  float logri, logrj;
428  float rci2;
429  float a_cut_i = 1.0 / a_cut;
430  float a_cut_i2 = a_cut_i*a_cut_i;
431 
432  //calculate piecewise-11 pairs
433  c = 1;
434  for (int ngi = minIg; ngi < maxI; ) {
435  int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
436  for (int i = ngi; i < ngi+iGroupSize; i++) {
437  ri = params->p[0][i].position;
438  ri.x += offset_x;
439  ri.y += offset_y;
440  ri.z += offset_z;
441  rhoi0 = gbisParams->intRad[0][2*i+0];
442  rhois = gbisParams->intRad[0][2*i+1];
443  psiI = ZERO;
444  plint *pairs;
445  gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
446  for (register int jj = 0; jj < numPairs; jj++) {
447  j = pairs[jj];
448  rj = params->p[1][j].position;
449 
450  dr = (ri.x - rj.x);
451  r2 = dr*dr;
452  dr = (ri.y - rj.y);
453  r2 += dr*dr;
454  dr = (ri.z - rj.z);
455  r2 += dr*dr;
456  r_i = 1.0/sqrt(r2);
457  r = r2*r_i;
458 
459  rhoj0 = gbisParams->intRad[1][2*j+0];
460  rhojs = gbisParams->intRad[1][2*j+1];
461 
462  float tmp1 = 0.125*r_i;
463  float tmp2 = r2 - 4.0*a_cut*r;
464  float rr = 2.0*r;
465 
466 
467  rmrjs = r-rhojs;
468  rmris = r-rhois;
469  logri = log(rmris*a_cut_i);
470  logrj = log(rmrjs*a_cut_i);
471 
472  rmrsi = 1.0/rmrjs;
473  //rmrs2 = rmrjs*rmrjs;
474  rs2 = rhojs*rhojs;
475  hij = /*0.125*r_i*/tmp1*(1 + rr*rmrsi +
476  a_cut_i2*(/*r2 - 4.0*a_cut*r*/tmp2 - rs2) + logrj+logrj);
477 
478 
479  rmrsi = 1.0/rmris;
480  //rmrs2 = rmris*rmris;
481  rs2 = rhois*rhois;
482  hji = /*0.125*r_i*/tmp1*(1 + rr*rmrsi +
483  a_cut_i2*(/*r2 - 4.0*a_cut*r*/tmp2 - rs2) + 2.0* logri);
484 //#if 1
485 #ifdef PRINT_COMP
486  int id1 = params->pExt[0][i].id;
487  int id2 = params->pExt[1][j].id;
488  float h1 = hij;
489  float h2 = hji;
490  float r10 = rhoi0;
491  float r1s = rhojs;
492  float r20 = rhoj0;
493  float r2s = rhois;
494 /*printf("PSIPAIR %05i %05i%9.5f%6.3f%6.3f%2i% 13.5e\n",
495 id1,id2,sqrt(r2),
496 rhoi0, rhojs,
497 1,hij);
498 printf("PSIPAIR %05i %05i%9.5f%6.3f%6.3f%2i% 13.5e\n",
499 id2,id1,sqrt(r2),
500 rhoj0, rhois,
501 1,hji);*/
502  if (id1 > id2) {
503  int tmp = id1;
504  id1 = id2;
505  id2 = tmp;
506  h1 = hji;
507  h2 = hij;
508  r20 = rhoi0;
509  r2s = rhojs;
510  r10 = rhoj0;
511  r1s = rhois;
512  }
513 // CkPrintf("PSIPAIR%5i%5i%10.5f%5i%14.8f%7.4f%7.4f\n",id1,id2,sqrt(r2),1,h1,r10,r1s);
514 // CkPrintf("PSIPAIR%5i%5i%10.5f%5i%14.8f%7.4f%7.4f\n",id2,id1,sqrt(r2),1,h2,r20,r2s);
515  //CkPrintf("PSI(%04i)[%04i,%04i] = 11 % .4e % .4e\n",gbisParams->sequence,id1,id2,h1, h2);
516  //CkPrintf("PPSI(%04i)[%04i,%04i] = 11\n",gbisParams->cid,id1,id2);
517 #endif
518 
519 #ifdef BENCHMARK
520  nops++;
521 #endif
522 
523  psiI += hij;
524  gbisParams->psiSum[1][j] += hji;
525  }//end inner j
526  gbisParams->psiSum[0][i] += psiI;
527  }//end outer i
528  for (int s = 0; s < strideIg; s++) {
529  ngi+=params->p[0][ngi].nonbondedGroupSize;
530  }
531  }
532 #ifdef BENCHMARK
533  t2 = 1.0*clock()/CLOCKS_PER_SEC;
534  CkPrintf("PHASE1.2: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
535 nops = 0;
536  t1 = 1.0*clock()/CLOCKS_PER_SEC;
537  nops = 0;
538 #endif
539 
540  //calculate all other piecewise pairs
541  c = 2;
542  for (int ngi = minIg; ngi < maxI; ) {
543  int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
544  for (int i = ngi; i < ngi+iGroupSize; i++) {
545  ri = params->p[0][i].position;
546  ri.x += offset_x;
547  ri.y += offset_y;
548  ri.z += offset_z;
549  rhoi0 = gbisParams->intRad[0][2*i+0];
550  rhois = gbisParams->intRad[0][2*i+1];
551  psiI = ZERO;
552  plint *pairs;
553  gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
554  for (register int jj = 0; jj < numPairs; jj++) {
555  j = pairs[jj];
556  rj = params->p[1][j].position;
557 
558  dr = (ri.x - rj.x);
559  r2 = dr*dr;
560  dr = (ri.y - rj.y);
561  r2 += dr*dr;
562  dr = (ri.z - rj.z);
563  r2 += dr*dr;
564  rhojs = gbisParams->intRad[1][2*j+1];
565  rhoj0 = gbisParams->intRad[1][2*j+0];
566  r_i = 1.0/sqrt(r2);
567  r = r2 * r_i;
568 
569  CalcHPair(r,r2,r_i,a_cut,
570  rhoi0, rhojs,
571  rhoj0, rhois,
572  dij,dji,hij,hji);
573 //#if 1
574 #ifdef PRINT_COMP
575  int id1 = params->pExt[0][i].id;
576  int id2 = params->pExt[1][j].id;
577  float h1 = hij;
578  float h2 = hji;
579  int d1 = dij;
580  int d2 = dji;
581  float r10 = rhoi0;
582  float r1s = rhojs;
583  float r20 = rhoj0;
584  float r2s = rhois;
585 /* if (dij > 0 ) {
586 printf("PSIPAIR %05i %05i%9.5f%6.3f%6.3f%2i% 13.5e\n",
587 id1,id2,sqrt(r2),
588 rhoi0, rhojs,
589 dij,hij);
590  }
591  if (dji > 0 ) {
592 printf("PSIPAIR %05i %05i%9.5f%6.3f%6.3f%2i% 13.5e\n",
593 id2,id1,sqrt(r2),
594 rhoj0, rhois,
595 dji,hji);
596  }*/
597 // CkPrintf("PSIPAIR%5i%5i%10.5f%5i%14.8f%7.4f%7.4f\n",id1,id2,sqrt(r2),d1,h1,r10,r1s);
598  if (id1 > id2) {
599  int tmp = id1;
600  id1 = id2;
601  id2 = tmp;
602  h1 = hji;
603  h2 = hij;
604  d1 = dji;
605  d2 = dij;
606  r20 = rhoi0;
607  r2s = rhojs;
608  r10 = rhoj0;
609  r1s = rhois;
610  }
611 // CkPrintf("PSIPAIR%5i%5i%10.5f%5i%14.8f%7.4f%7.4f\n",id2,id1,sqrt(r2),d2,h2,r20,r2s);
612  //CkPrintf("PSI(%04i)[%04i,%04i] = %i%i % .4e % .4e\n",gbisParams->sequence,id1,id2,d1,d2,h1, h2);
613  //CkPrintf("PPSI(%04i)[%04i,%04i] = %i%i\n",gbisParams->cid,id1,id2,d1,d2);
614 #endif
615 
616 #ifdef BENCHMARK
617  nops++;
618 #endif
619  psiI += hij;
620  gbisParams->psiSum[1][j] += hji;
621  }//end inner j
622  gbisParams->psiSum[0][i] += psiI;
623  }//end outer i
624  for (int s = 0; s < strideIg; s++) {
625  ngi+=params->p[0][ngi].nonbondedGroupSize;
626  }
627  }
628 
629 #ifdef BENCHMARK
630  t2 = 1.0*clock()/CLOCKS_PER_SEC;
631  CkPrintf("PHASE1.3: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
632 #endif
633 
634 /***********************************************************
635 * GBIS Phase 2
636 ***********************************************************/
637 } else if (gbisParams->gbisPhase == 2) {
638 
639  float epsilon_s = gbisParams->epsilon_s;
640  float epsilon_p = gbisParams->epsilon_p;
641  float epsilon_s_i = 1/epsilon_s;
642  float epsilon_p_i = 1/epsilon_p;
643  float kappa = gbisParams->kappa;
644 
645  //values used in loop
646  float r_cut_2 = 1.0 / r_cut2;
647  float r_cut_4 = 4.0*r_cut_2*r_cut_2;
648  float coulEij=0,ddrCoulEij=0,gbEij=0,ddrGbEij=0;
649  float dEdai=0,dEdaj=0, qiqj=0;
650  float scale=0, ddrScale=0;
651  float rnx=0,rny=0,rnz=0;
652  float fx=0,fy=0,fz=0,forceCoul=0, forcedEdr=0;
653 
654  int nops = 0;
655  double t1 = 1.0*clock()/CLOCKS_PER_SEC;
656  float r2;
657  float dr;
658  BigReal dx, dy, dz;
659  float r, r_i, ratio;
660  float fIx, fIy, fIz;
661  GBReal dEdaSumI;
662  float bornRadI, bornRadJ;
663  float qi;
664  Position ri, rj;
665  float aiaj,expr2aiaj4,fij,f_i,expkappa,Dij;
666  float aiaj4,ddrDij,ddrf_i,ddrfij,tmp_dEda;
667 
668  for (int c = 0; c < 4/*dEdrPLs*/; c++) {
669  for (int ngi = minIg; ngi < maxI; ) {
670  int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
671  for (int i = ngi; i < ngi+iGroupSize; i++) {
672  ri = params->p[0][i].position;
673  ri.x += offset_x;
674  ri.y += offset_y;
675  ri.z += offset_z;
676  qi = - COULOMB * params->p[0][i].charge * scaling;
677  //printf("ATOM(%05i) %.3e %.3e %.3e\n", params->pExt[0][i].id, params->p[0][i].charge, COULOMB, scaling);
678  int numPairs;
679  plint *pairs;
680  gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
681  fIx = fIy = fIz = 0.0;
682  dEdaSumI = 0.0;
683  bornRadI = gbisParams->bornRad[0][i];
684  for (int jj = 0; jj < numPairs; jj++) {
685  int j = pairs[jj];
686  rj = params->p[1][j].position;
687 
688  //distance
689  dx = (ri.x - rj.x);
690  dy = (ri.y - rj.y);
691  dz = (ri.z - rj.z);
692  r2 = dx*dx + dy*dy + dz*dz;
693  if (r2 > r_cut2) continue;
694  qiqj = qi*params->p[1][j].charge;
695  bornRadJ = gbisParams->bornRad[1][j];
696  r_i = 1.0/sqrt(r2);
697  r = r2 * r_i;
698 
699  //pairwise calculation
700 /* Phase2_Pair(r, r2, r_i, qiqj,
701  bornRadI,
702  bornRadJ,
703  epsilon_p_i, epsilon_s_i,
704  kappa, gbisParams->doFullElectrostatics,
705  gbEij, ddrGbEij, dEdai, dEdaj);
706 */
707 
708  //calculate GB energy
709  aiaj = bornRadI*bornRadJ;
710  aiaj4 = 4*aiaj;
711  expr2aiaj4 = exp(-r2/aiaj4);
712  fij = sqrt(r2+aiaj*expr2aiaj4);
713  f_i = 1/fij;
714  expkappa = (kappa > 0) ? exp(-kappa*fij) : 1.0;
715  Dij = epsilon_p_i - expkappa*epsilon_s_i;
716  gbEij = qiqj*Dij*f_i;
717 
718  //calculate energy derivatives
719  ddrfij = r*f_i*(1 - 0.25*expr2aiaj4);
720  ddrf_i = -ddrfij*f_i*f_i;
721  ddrDij = kappa*expkappa*ddrfij*epsilon_s_i;
722  ddrGbEij = qiqj*(ddrDij*f_i+Dij*ddrf_i);
723 
724  //calc dEda
725  //NAMD smoothing function
726  scale = 1;
727  ddrScale = 0;
728  if (gbisParams->doSmoothing) {
729  scale = r2 * r_cut_2 - 1;
730  scale *= scale;
731  ddrScale = r*(r2-r_cut2)*r_cut_4;
732  //CkPrintf("SCALE %f %f\n",scale,ddrScale);
733  gbisParams->gbInterEnergy += gbEij * scale;
734  forcedEdr = -(ddrGbEij)*scale-(gbEij)*ddrScale;
735  } else {
736  gbisParams->gbInterEnergy += gbEij;
737  forcedEdr = -ddrGbEij;
738  }
739 
740  //add dEda
741  if (gbisParams->doFullElectrostatics) {
742  //gbisParams->dEdaSum[0][i] += dEdai*scale;
743  tmp_dEda = 0.5*qiqj*f_i*f_i
744  *(kappa*epsilon_s_i*expkappa-Dij*f_i)
745  *(aiaj+0.25*r2)*expr2aiaj4;//0
746  dEdai = tmp_dEda/bornRadI;
747  dEdaj = tmp_dEda/bornRadJ;
748  dEdaSumI += dEdai*scale;
749  gbisParams->dEdaSum[1][j] += dEdaj*scale;
750  }
751 
752 #if 1
753 //#ifdef PRINT_COMP
754  int id1 = params->pExt[0][i].id;
755  int id2 = params->pExt[1][j].id;
756  float deda1 = dEdai;
757  float deda2 = dEdaj;
758  float bR1 = bornRadI;
759  float bR2 = bornRadJ;
760  if (id1 > id2) {
761  int tmp = id1;
762  id1 = id2;
763  id2 = tmp;
764  deda1 = dEdaj;
765  deda2 = dEdai;
766  bR1 = bornRadJ;
767  bR2 = bornRadI;
768  }
769  //CkPrintf("DEDR(%04i)[%04i,%04i] = % .4e\n",gbisParams->sequence,id1,id2,forcedEdr);
770  //CkPrintf("DASM(%04i)[%04i,%04i] = % .4e % .4e\n",gbisParams->sequence,id1, id2, deda1,deda2);
771  //CkPrintf("P2RM(%04i)[%04i,%04i] = % .4e % .4e % .4e % .4e % .4e\n",gbisParams->sequence,id1, id2, r, bR1,bR2,epsilon_p_i,epsilon_s_i,kappa);
772 /*CkPrintf("P2PAIR %05i %05i%9.5f%6.3f%6.3f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e\n",
773 params->pExt[0][i].id, params->pExt[1][j].id,sqrt(r2),
774 bornRadI, bornRadJ, forcedEdr, dEdai, qiqj, Dij, scale
775 );
776 CkPrintf("P2PAIR %05i %05i%9.5f%6.3f%6.3f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e\n",
777 params->pExt[1][j].id, params->pExt[0][i].id,sqrt(r2),
778 bornRadJ, bornRadI, forcedEdr, dEdaj, qiqj, Dij, scale
779 );*/
780 #endif
781 
782  forcedEdr *= r_i;
783  fx = dx*forcedEdr;
784  fy = dy*forcedEdr;
785  fz = dz*forcedEdr;
786 
787  params->ff[1][j].x -= fx;
788  params->ff[1][j].y -= fy;
789  params->ff[1][j].z -= fz;
790 
791  fIx += fx;
792  fIy += fy;
793  fIz += fz;
794 
795 #ifdef BENCHMARK
796  nops ++;//= (59 + 4*DIV_FLOPS + 2*EXP_FLOPS+SQRT_FLOPS);//56 w/o nops
797 #endif
798 
799  }//end inner j
800  gbisParams->dEdaSum[0][i] += dEdaSumI;
801  params->ff[0][i].x += fIx;
802  params->ff[0][i].y += fIy;
803  params->ff[0][i].z += fIz;
804 
805  //self energy of each atom
806  if (c == 0 && gbisParams->doEnergy && gbisParams->numPatches == 1) {
807  float fij = bornRadI;//inf
808  float expkappa = exp(-kappa*fij);//0
809  float Dij = epsilon_p_i - expkappa*epsilon_s_i;
810  float gbEij = qi*params->p[0][i].charge*Dij/fij;
811  gbisParams->gbSelfEnergy += 0.5*gbEij;//self energy
812  }
813  }// end outer i
814  for (int s = 0; s < strideIg; s++) {
815  ngi+=params->p[0][ngi].nonbondedGroupSize;
816  }//end i
817  }//end ig
818  }//end c
819 #ifdef BENCHMARK
820  double t2 = 1.0*clock()/CLOCKS_PER_SEC;
821  //double flops = 1.0 * nops / (t2 - t1);
822  //double gflops = flops / 1000000000.0;
823 
824  CkPrintf("PHASE2.0: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
825 #endif
826 
827 /***********************************************************
828 * GBIS Phase 3
829 ***********************************************************/
830 } else if (gbisParams->gbisPhase == 3 && gbisParams->doFullElectrostatics) {
831 
832 #ifdef BENCHMARK
833  //int domains[] = {0, 0, 0, 0, 0, 0, 0, 0};
834  double t1 = 1.0*clock()/CLOCKS_PER_SEC;
835  double t2;
836  int nops = 0;
837 #endif
838  //CkPrintf("GBIS(%3i)[%2i]::P3 %3i(%3i) %3i(%3i)\n",gbisParams->sequence,gbisParams->cid, gbisParams->patchID[0],params->numAtoms[0],gbisParams->patchID[1],params->numAtoms[1]);
839 
840  register BigReal dx, dy, dz;
841  register float r2;
842  register float r, r_i;
843  register float rhoi0, rhois;
844  float rhojs, rhoj0;
845  float fx, fy, fz;
846  float forceAlpha;
847  register float fIx, fIy, fIz;
848 
849  float dhij;
850  float dhji;
851  int dij;
852  int dji;
853  register float dHdrPrefixI;
854  float dHdrPrefixJ;
855  register Position ri;
856  register Position rj;
857  register int c, numPairs, jj, j;
858  register float k;
859  register float da = DA;
860  register float db = DB;
861  register float dc = DC;
862  register float dd = DD;
863  register float de = DE;
864  float r_i3;
865 
866  //piecewise 22
867  c = 0;
868  for (int ngi = minIg; ngi < maxI; ) {
869  int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
870  for (int i = ngi; i < ngi+iGroupSize; i++) {
871  ri = params->p[0][i].position;
872  ri.x += offset_x;
873  ri.y += offset_y;
874  ri.z += offset_z;
875  rhois = gbisParams->intRad[0][2*i+1];
876  plint *pairs;
877  gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
878  fIx = fIy = fIz = 0.0;
879  dHdrPrefixI = gbisParams->dHdrPrefix[0][i];
880  for (jj = 0; jj < numPairs; jj++) {
881  j = pairs[jj];
882  rj = params->p[1][j].position;
883 
884  dx = (ri.x - rj.x);
885  dy = (ri.y - rj.y);
886  dz = (ri.z - rj.z);
887  r2 = dx*dx + dy*dy + dz*dz;
888  dHdrPrefixJ = gbisParams->dHdrPrefix[1][j];
889 
890  r_i = 1.0/sqrt(r2);//rptI takes 50% of loop time
891  r_i3 = r_i*r_i*r_i;
892 
893  rhojs = gbisParams->intRad[1][2*j+1];
894 
895  k = rhojs*r_i; k*=k;//k=(rs/r)^2
896  dhij = -rhojs*r_i3*k*
897  (da+k*(db+k*(dc+k*(dd+k*de))));
898 
899  k = rhois*r_i; k*=k;//k=(rs/r)^2
900  dhji = -rhois*r_i3*k*
901  (da+k*(db+k*(dc+k*(dd+k*de))));
902 
903  //add dEda*dadr force
904  forceAlpha = -r_i*(dHdrPrefixI*dhij+dHdrPrefixJ*dhji);
905  fx = dx * forceAlpha;
906  fy = dy * forceAlpha;
907  fz = dz * forceAlpha;
908 
909  params->fullf[1][j].x -= fx;
910  params->fullf[1][j].y -= fy;
911  params->fullf[1][j].z -= fz;
912 
913  fIx += fx;
914  fIy += fy;
915  fIz += fz;
916 
917 #if 1
918 //#ifdef PRINT_COMP
919  int id1 = params->pExt[0][i].id;
920  int id2 = params->pExt[1][j].id;
921  float h1 = dhij;
922  float h2 = dhji;
923  if (id1 > id2) {
924  int tmp = id1;
925  id1 = id2;
926  id2 = tmp;
927  h1 = dhji;
928  h2 = dhij;
929  }
930 /*CkPrintf("P3PAIR %05i %05i%9.5f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e %i\n",
931 params->pExt[0][i].id, params->pExt[1][j].id,sqrt(r2),
932 dHdrPrefixI, dHdrPrefixJ, dhij, dhji, forceAlpha, 2
933 );
934 CkPrintf("P3PAIR %05i %05i%9.5f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e %i\n",
935 params->pExt[1][j].id, params->pExt[0][i].id,sqrt(r2),
936 dHdrPrefixJ, dHdrPrefixI, dhji, dhij, forceAlpha, 2
937 );*/
938  //CkPrintf("DEDA(%04i)[%04i,%04i] = 22 % .4e % .4e % .4e\n",gbisParams->sequence,id1,id2,h1,h2, forceAlpha);
939 #endif
940 
941 #ifdef BENCHMARK
942  nops++;//= (24 + 2*DIV_FLOPS+SQRT_FLOPS);// 8 w/o nops
943 #endif
944 
945  }//end inner j
946  params->fullf[0][i].x += fIx;
947  params->fullf[0][i].y += fIy;
948  params->fullf[0][i].z += fIz;
949  }//end outer i
950  for (int s = 0; s < strideIg; s++) {
951  ngi+=params->p[0][ngi].nonbondedGroupSize;
952  }
953  }
954 
955 #ifdef BENCHMARK
956  t2 = 1.0*clock()/CLOCKS_PER_SEC;
957  CkPrintf("PHASE3.1: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
958 nops = 0;
959  t1 = 1.0*clock()/CLOCKS_PER_SEC;
960  nops = 0;
961 #endif
962 
963  float a_cut_i = 1.0/a_cut;
964  float a_cut_i2 = a_cut_i*a_cut_i;
965  float a_cut2 = a_cut*a_cut;
966  float rmrs;
967  float rmrsi;
968  float rmrs2;
969  float rhois2, rhojs2;
970  float logri, logrj;
971  float r_i2;
972 
973  //piecewise 11
974  c = 1;
975  for (int ngi = minIg; ngi < maxI; ) {
976  int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
977  for (int i = ngi; i < ngi+iGroupSize; i++) {
978  ri = params->p[0][i].position;
979  ri.x += offset_x;
980  ri.y += offset_y;
981  ri.z += offset_z;
982  rhois = gbisParams->intRad[0][2*i+1];
983  rhois2 = rhois*rhois;
984  plint *pairs;
985  gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
986  fIx = fIy = fIz = 0.0;
987  dHdrPrefixI = gbisParams->dHdrPrefix[0][i];
988  for (jj = 0; jj < numPairs; jj++) {
989  j = pairs[jj];
990  rj = params->p[1][j].position;
991  dHdrPrefixJ = gbisParams->dHdrPrefix[1][j];
992 
993  dx = (ri.x - rj.x);
994  dy = (ri.y - rj.y);
995  dz = (ri.z - rj.z);
996  r2 = dx*dx + dy*dy + dz*dz;
997  r_i = 1.0/sqrt(r2);//rptI
998  r = r2* r_i;
999  r_i2 = r_i*r_i;
1000 
1001  rhojs = gbisParams->intRad[1][2*j+1];
1002  rhojs2 = rhojs*rhojs;
1003 
1004 
1005  rmrs = r-rhojs;// 4 times
1006  rmrsi = 1.0/rmrs;
1007  rmrs2 = rmrs*rmrs;
1008  logrj = log(rmrs*a_cut_i);
1009  dhij = r_i2*(-0.25*logrj - (a_cut2 - rmrs2)*(rhojs2 + r2)*0.125*a_cut_i2*rmrsi*rmrsi);
1010 
1011 
1012  rmrs = r-rhois;// 4 times
1013  rmrsi = 1.0/rmrs;
1014  rmrs2 = rmrs*rmrs;
1015  logri = log(rmrs*a_cut_i);
1016  dhji = r_i2*(-0.25*logri - (a_cut2 - rmrs2)*(rhois2 + r2)*0.125*a_cut_i2*rmrsi*rmrsi);
1017 
1018  //add dEda*dadr force
1019  forceAlpha = -r_i*(dHdrPrefixI*dhij+dHdrPrefixJ*dhji);
1020  fx = dx * forceAlpha;
1021  fy = dy * forceAlpha;
1022  fz = dz * forceAlpha;
1023 
1024  params->fullf[1][j].x -= fx;
1025  params->fullf[1][j].y -= fy;
1026  params->fullf[1][j].z -= fz;
1027 
1028  fIx += fx;
1029  fIy += fy;
1030  fIz += fz;
1031 
1032 #if 1
1033 //#ifdef PRINT_COMP
1034  int id1 = params->pExt[0][i].id;
1035  int id2 = params->pExt[1][j].id;
1036  float h1 = dhij;
1037  float h2 = dhji;
1038  if (id1 > id2) {
1039  int tmp = id1;
1040  id1 = id2;
1041  id2 = tmp;
1042  h1 = dhji;
1043  h2 = dhij;
1044  }
1045  //CkPrintf("DEDA(%04i)[%04i,%04i] = 11 % .4e % .4e % .4e\n",gbisParams->sequence,id1,id2,h1,h2, forceAlpha);
1046 /*CkPrintf("P3PAIR %05i %05i%9.5f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e %i\n",
1047 params->pExt[0][i].id, params->pExt[1][j].id,sqrt(r2),
1048 dHdrPrefixI, dHdrPrefixJ, dhij, dhji, forceAlpha, 1
1049 );
1050 CkPrintf("P3PAIR %05i %05i%9.5f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e %i\n",
1051 params->pExt[1][j].id, params->pExt[0][i].id,sqrt(r2),
1052 dHdrPrefixJ, dHdrPrefixI, dhji, dhij, forceAlpha, 1
1053 );*/
1054 #endif
1055 
1056 #ifdef BENCHMARK
1057  nops++;
1058 #endif
1059 
1060  }//end inner j
1061  params->fullf[0][i].x += fIx;
1062  params->fullf[0][i].y += fIy;
1063  params->fullf[0][i].z += fIz;
1064  }//end outer i
1065  for (int s = 0; s < strideIg; s++) {
1066  ngi+=params->p[0][ngi].nonbondedGroupSize;
1067  }
1068  }
1069 
1070 #ifdef BENCHMARK
1071  t2 = 1.0*clock()/CLOCKS_PER_SEC;
1072  CkPrintf("PHASE3.2: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
1073 nops = 0;
1074  t1 = 1.0*clock()/CLOCKS_PER_SEC;
1075  nops = 0;
1076 #endif
1077 
1078  //piecewise all others
1079  c = 2;
1080  for (int ngi = minIg; ngi < maxI; ) {
1081  int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
1082  for (int i = ngi; i < ngi+iGroupSize; i++) {
1083  ri = params->p[0][i].position;
1084  ri.x += offset_x;
1085  ri.y += offset_y;
1086  ri.z += offset_z;
1087  rhoi0 = gbisParams->intRad[0][2*i+0];
1088  rhois = gbisParams->intRad[0][2*i+1];
1089  plint *pairs;
1090  gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
1091  fIx = fIy = fIz = 0.0;
1092  dHdrPrefixI = gbisParams->dHdrPrefix[0][i];
1093  for (jj = 0; jj < numPairs; jj++) {
1094  j = pairs[jj];
1095  rj = params->p[1][j].position;
1096  dHdrPrefixJ = gbisParams->dHdrPrefix[1][j];
1097 
1098  dx = (ri.x - rj.x);
1099  dy = (ri.y - rj.y);
1100  dz = (ri.z - rj.z);
1101  r2 = dx*dx + dy*dy + dz*dz;
1102 
1103  r_i = 1.0/sqrt(r2);//rptI
1104  r = r2* r_i;
1105 
1106  rhojs = gbisParams->intRad[1][2*j+1];
1107  rhoj0 = gbisParams->intRad[1][2*j+0];
1108 
1109  CalcDHPair(r,r2,r_i,a_cut,
1110  rhoi0,rhojs,
1111  rhoj0,rhois,
1112  dij,dji,dhij,dhji);
1113 
1114  //add dEda*dadr force
1115  forceAlpha = -r_i*(dHdrPrefixI*dhij+dHdrPrefixJ*dhji); // *scaling ?
1116  fx = dx * forceAlpha;
1117  fy = dy * forceAlpha;
1118  fz = dz * forceAlpha;
1119 
1120  fIx += fx;
1121  fIy += fy;
1122  fIz += fz;
1123 
1124  params->fullf[1][j].x -= fx;
1125  params->fullf[1][j].y -= fy;
1126  params->fullf[1][j].z -= fz;
1127 
1128 #if 1
1129 //#ifdef PRINT_COMP
1130  int id1 = params->pExt[0][i].id;
1131  int id2 = params->pExt[1][j].id;
1132  int d1 = dij;
1133  int d2 = dji;
1134  float h1 = dhij;
1135  float h2 = dhji;
1136  if (id1 > id2) {
1137  int tmp = id1;
1138  id1 = id2;
1139  id2 = tmp;
1140  d1 = dji;
1141  d2 = dij;
1142  h1 = dhji;
1143  h2 = dhij;
1144  }
1145 // CkPrintf("DEDA(%04i)[%04i,%04i] = %i%i % .4e % .4e % .4e\n",gbisParams->sequence,id1,id2,d1,d2,h1,h2, forceAlpha);
1146 // CkPrintf("DEDA(%04i)[%04i,%04i] = %i%i % .4e % .4e % .4e\n",gbisParams->sequence,id1,id2,d1,d2,h1,h2, forceAlpha);
1147 /*if ( dij > 0 ) {
1148 CkPrintf("P3PAIR %05i %05i%9.5f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e %i\n",
1149 params->pExt[0][i].id, params->pExt[1][j].id,sqrt(r2),
1150 dHdrPrefixI, dHdrPrefixJ, dhij, dhji, forceAlpha, dij
1151 );
1152 }
1153 if ( dji > 0 ) {
1154 CkPrintf("P3PAIR %05i %05i%9.5f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e %i\n",
1155 params->pExt[1][j].id, params->pExt[0][i].id,sqrt(r2),
1156 dHdrPrefixJ, dHdrPrefixI, dhji, dhij, forceAlpha, dji
1157 );
1158 }*/
1159 #endif
1160 
1161 #ifdef BENCHMARK
1162  nops++;
1163 #endif
1164 
1165  }//end inner j
1166  params->fullf[0][i].x += fIx;
1167  params->fullf[0][i].y += fIy;
1168  params->fullf[0][i].z += fIz;
1169  }//end outer i
1170  for (int s = 0; s < strideIg; s++) {
1171  ngi+=params->p[0][ngi].nonbondedGroupSize;
1172  }
1173  }
1174 
1175 
1176 #ifdef BENCHMARK
1177  t2 = 1.0*clock()/CLOCKS_PER_SEC;
1178  CkPrintf("PHASE3.3: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
1179 #endif
1180 
1181 }//end if gbisPhase
1182 
1183 }//end calcGBIS
#define TE
Definition: ComputeGBIS.inl:36
#define TD
Definition: ComputeGBIS.inl:35
#define TA
Definition: ComputeGBIS.inl:32
CompAtom * p[2]
void calcGBIS(nonbonded *params, GBISParamStruct *gbisParams)
Definition: ComputeGBIS.C:261
Definition: Vector.h:64
#define COULOMB
Definition: common.h:46
BigReal z
Definition: Vector.h:66
Position position
Definition: NamdTypes.h:53
static void h2(float r, float r2, float ri, float rc, float r0, float rs, float &h)
#define TC
Definition: ComputeGBIS.inl:34
#define DA
Definition: ComputeGBIS.inl:37
#define DD
Definition: ComputeGBIS.inl:40
Charge charge
Definition: NamdTypes.h:54
static void CalcDHPair(float r, float r2, float ri, float rc, float ri0, float rjs, float rj0, float ris, int &dij, int &dji, float &dhij, float &dhji)
unsigned short plint
void nextlist(plint **list, int *list_size)
static void h1(float r, float r2, float ri, float rc, float r0, float rs, float &h)
Pairlists * gbisStepPairlists[4]
#define DB
Definition: ComputeGBIS.inl:38
#define DC
Definition: ComputeGBIS.inl:39
CompAtomExt * pExt[2]
#define ZERO
Definition: common.h:95
BigReal x
Definition: Vector.h:66
#define FS_MAX
Definition: ComputeGBIS.inl:24
static void CalcHPair(float r, float r2, float ri, float rc, float ri0, float rjs, float rj0, float ris, int &dij, int &dji, float &hij, float &hji)
BigReal y
Definition: Vector.h:66
void newsize(int list_size)
unsigned int nonbondedGroupSize
Definition: NamdTypes.h:57
#define TB
Definition: ComputeGBIS.inl:33
Force * fullf[2]
#define DE
Definition: ComputeGBIS.inl:41
double BigReal
Definition: common.h:114
float GBReal
Definition: ComputeGBIS.inl:17
void pairlistFromAll(nonbonded *params, GBISParamStruct *gbisParams, int minIg, int strideIg, int maxI)
Definition: ComputeGBIS.C:44