00001
00008
00009 #define CHECK_PRIORITIES 0
00010
00011
00012
00013 #ifdef BENCHMARK
00014 #define SQRT_FLOPS 15
00015 #define LOG_FLOPS 20
00016 #define EXP_FLOPS 20
00017 #define DIV_FLOPS 1
00018 #endif
00019
00020 #include "ComputeNonbondedUtil.h"
00021 #include "ComputeGBIS.inl"
00022 #include "time.h"
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044 inline void pairlistFromAll(
00045 nonbonded *params,
00046 GBISParamStruct *gbisParams,
00047 int minIg,
00048 int strideIg,
00049 int maxI
00050 ) {
00051 const BigReal offset_x = params->offset.x;
00052 const BigReal offset_y = params->offset.y;
00053 const BigReal offset_z = params->offset.z;
00054
00055 int unique = (gbisParams->numPatches == 1) ? 1 : 0;
00056
00057 int maxPairs = params->numAtoms[1];
00058 int seq = gbisParams->sequence;
00059 int cid = gbisParams->cid;
00060 int pe = CkMyPe();
00061 int gbisPhase = gbisParams->gbisPhase;
00062
00063
00064 float fsMax = FS_MAX;
00065 float a_cut = gbisParams->a_cut - fsMax;
00066 float a_cut_ps2 = (a_cut+fsMax)*(a_cut+fsMax);
00067 float r_cut = gbisParams->cutoff;
00068 float a_cut2 = a_cut*a_cut;
00069 float r_cut2 = r_cut*r_cut;
00070 int s = 0;
00071 int l = 1;
00072 #ifdef BENCHMARK
00073 double t1 = 1.0*clock()/CLOCKS_PER_SEC;
00074 int nops = 0;
00075 #endif
00076 Position ri, rj;
00077 Position ngri, ngrj;
00078 float r, dr, r2;
00079 float rhoi0, rhois, rhoj0, rhojs, rhois2, rhojs2, rhois2_16;
00080 float ami2, amj2, api2, apj2;
00081 int numGBISPairlists = 4;
00082
00083 float max_gcut2 = r_cut;
00084 if (a_cut+fsMax > r_cut)
00085 max_gcut2 = a_cut+fsMax;
00086 max_gcut2 += 2.0*gbisParams->maxGroupRadius;
00087
00088 max_gcut2 *= max_gcut2;
00089
00090 int maxGroupPairs = params->numAtoms[1];
00091 short *groupPairs = new short[maxGroupPairs];
00092
00093
00094 for (int ngi = minIg; ngi < maxI; ) {
00095 int numGroupPairs = 0;
00096 ngri = params->p[0][ngi].position;
00097 ngri.x += offset_x;
00098 ngri.y += offset_y;
00099 ngri.z += offset_z;
00100
00101
00102 for (int ngj = unique*(ngi+params->p[0][ngi].nonbondedGroupSize); ngj < params->numAtoms[1]; ngj+=params->p[1][ngj].nonbondedGroupSize) {
00103
00104 ngrj = params->p[1][ngj].position;
00105 dr = ngri.x - ngrj.x;
00106 r2 = dr*dr;
00107 dr = ngri.y - ngrj.y;
00108 r2 += dr*dr;
00109 dr = ngri.z - ngrj.z;
00110 r2 += dr*dr;
00111 if (r2 < max_gcut2) {
00112 groupPairs[numGroupPairs++] = ngj;
00113 }
00114 }
00115
00116
00117 int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
00118 for (int i=ngi; i<ngi+iGroupSize; i++) {
00119
00120
00121 int *size = new int[numGBISPairlists];
00122 plint **pairs = new plint*[numGBISPairlists];
00123 for (int k = 0; k < numGBISPairlists; k++) {
00124 size[k] = 0;
00125 pairs[k] = gbisParams->gbisStepPairlists[k]->
00126 newlist(maxPairs);
00127 }
00128
00129
00130 ri = params->p[0][i].position;
00131 ri.x += offset_x;
00132 ri.y += offset_y;
00133 ri.z += offset_z;
00134 rhois = gbisParams->intRad[0][2*i+1];
00135 api2 = a_cut + rhois;
00136 api2 *= api2;
00137 ami2 = a_cut - rhois;
00138 ami2 *= ami2;
00139 rhois2 = rhois*rhois;
00140 rhois2_16 = 16.0*rhois2;
00141
00142
00143 if (unique)
00144 for (int j = i+1; j < ngi+iGroupSize; j++) {
00145
00146 #ifdef BENCHMARK
00147 nops ++;
00148 #endif
00149 rj = params->p[1][j].position;
00150 dr = ri.x - rj.x;
00151 r2 = dr*dr;
00152 dr = ri.y - rj.y;
00153 r2 += dr*dr;
00154 dr = ri.z - rj.z;
00155 r2 += dr*dr;
00156
00157 rhojs = gbisParams->intRad[1][2*j+1];
00158 rhojs2 = rhojs*rhojs;
00159
00160 amj2 = a_cut - rhojs;
00161 amj2 *= amj2;
00162
00163 apj2 = a_cut + rhojs;
00164 apj2 *= apj2;
00165
00166
00167 if ( r2 < ami2 && r2 < amj2 &&
00168 r2 > rhois2_16 && r2 > 16.0*rhojs2 ) {
00169
00170 pairs[0][size[0]++] = j;
00171 } else if ( r2 < api2 && r2 < apj2 &&
00172 r2 > ami2 && r2 > amj2 ) {
00173
00174 pairs[1][size[1]++] = j;
00175 } else if ( r2 < a_cut_ps2 ) {
00176
00177 pairs[2][size[2]++] = j;
00178 } else if ( r2 < r_cut2 ) {
00179
00180 pairs[3][size[3]++] = j;
00181 }
00182 }
00183
00184
00185 for (int g = 0; g < numGroupPairs; g++) {
00186 int ngj = groupPairs[g];
00187 int jGroupSize = params->p[1][ngj].nonbondedGroupSize;
00188
00189 int maxJ = ngj+jGroupSize;
00190 for (int j = ngj; j < maxJ; j++) {
00191
00192
00193 #ifdef BENCHMARK
00194 nops ++;
00195 #endif
00196 rj = params->p[1][j].position;
00197 dr = ri.x - rj.x;
00198 r2 = dr*dr;
00199 dr = ri.y - rj.y;
00200 r2 += dr*dr;
00201 dr = ri.z - rj.z;
00202 r2 += dr*dr;
00203
00204
00205 rhojs = gbisParams->intRad[1][2*j+1];
00206 rhojs2 = rhojs*rhojs;
00207
00208 amj2 = a_cut - rhojs;
00209 amj2 *= amj2;
00210
00211 apj2 = a_cut + rhojs;
00212 apj2 *= apj2;
00213
00214 if ( r2 < ami2 && r2 < amj2 &&
00215 r2 > rhois2_16 && r2 > 16.0*rhojs2 ) {
00216
00217
00218 pairs[0][size[0]++] = j;
00219 } else if ( r2 < api2 && r2 < apj2 &&
00220 r2 > ami2 && r2 > amj2 ) {
00221
00222
00223 pairs[1][size[1]++] = j;
00224 } else if ( r2 < a_cut_ps2 ) {
00225
00226
00227 pairs[2][size[2]++] = j;
00228 } else if ( r2 < r_cut2 ) {
00229
00230
00231 pairs[3][size[3]++] = j;
00232 }
00233 }
00234 }
00235 for (int k = 0; k < numGBISPairlists; k++) {
00236 gbisParams->gbisStepPairlists[k]->newsize(size[k]);
00237 }
00238 delete[] size;
00239 delete[] pairs;
00240 }
00241
00242
00243
00244 for (int s = 0; s < strideIg; s++) {
00245 ngi+=params->p[0][ngi].nonbondedGroupSize;
00246 }
00247 }
00248 delete[] groupPairs;
00249 for (int k = 0; k < numGBISPairlists; k++)
00250 gbisParams->gbisStepPairlists[k]->reset();
00251
00252 #ifdef BENCHMARK
00253 double t2 = 1.0*clock()/CLOCKS_PER_SEC;
00254 CkPrintf("PHASELST: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
00255 #endif
00256 }
00257
00258
00259
00260
00261 void ComputeNonbondedUtil::calcGBIS(nonbonded *params, GBISParamStruct *gbisParams) {
00262
00263 #if CHECK_PRIORITIES
00264 CkPrintf("PE%i, S%09i, P%i\n",CkMyPe(),gbisParams->sequence,gbisParams->gbisPhase);
00265 #endif
00266 if (params->numAtoms[0] == 0 || params->numAtoms[1] == 0) return;
00267
00268 const BigReal offset_x = params->offset.x;
00269 const BigReal offset_y = params->offset.y;
00270 const BigReal offset_z = params->offset.z;
00271
00272 int partSize = params->numAtoms[0] / params->numParts;
00273 int minIg = 0;
00274 for (int s = 0; s < params->minPart; s++) {
00275 minIg+=params->p[0][minIg].nonbondedGroupSize;
00276 }
00277 int maxI = params->numAtoms[0];
00278 int strideIg = params->numParts;
00279
00280 int unique = (gbisParams->numPatches == 1) ? 1 : 0;
00281 int numGBISPairlists = 4;
00282 float r_cut = gbisParams->cutoff;
00283 float fsMax = FS_MAX;
00284 float a_cut = gbisParams->a_cut-fsMax;
00285 float a_cut2 = a_cut*a_cut;
00286 float a_cut_ps = a_cut + fsMax;
00287 float r_cut2 = r_cut*r_cut;
00288 float a_cut_ps2 = a_cut_ps*a_cut_ps;
00289
00290 for (int k = 0; k < numGBISPairlists; k++)
00291 gbisParams->gbisStepPairlists[k]->reset();
00292
00293
00294
00295
00296
00297 if (gbisParams->gbisPhase == 1) {
00298
00299
00300 pairlistFromAll(params,gbisParams,minIg,strideIg,maxI);
00301
00302 #ifdef BENCHMARK
00303 int nops = 0;
00304 int domains[] = {0, 0, 0, 0, 0, 0, 0, 0};
00305 int numDom = 7;
00306 double t1 = 1.0*clock()/CLOCKS_PER_SEC;
00307 #endif
00308 register GBReal psiI;
00309
00310 register float dr;
00311 register float r2;
00312 register float r, r_i, r2_i;
00313 float rhoi0, rhois, rhojs, rhoj0;
00314 Position ri, rj;
00315 register int j;
00316 int numPairs;
00317 register float ta = TA;
00318 register float tb = TB;
00319 register float tc = TC;
00320 register float td = TD;
00321 register float te = TE;
00322
00323 register float hij,hji;
00324 int dij,dji;
00325 float k;
00326 float rhois2, rhojs2;
00327
00328
00329 int c = 0;
00330 for (int ngi = minIg; ngi < maxI; ) {
00331 int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
00332 for (int i = ngi; i < ngi+iGroupSize; i++) {
00333 ri = params->p[0][i].position;
00334 ri.x += offset_x;
00335 ri.y += offset_y;
00336 ri.z += offset_z;
00337 rhoi0 = gbisParams->intRad[0][2*i+0];
00338 rhois = gbisParams->intRad[0][2*i+1];
00339 rhois2 = rhois*rhois;
00340 psiI = ZERO;
00341 numPairs;
00342 plint *pairs;
00343 gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
00344 for (register int jj = 0; jj < numPairs; jj++) {
00345 #ifdef BENCHMARK
00346 nops++;
00347 #endif
00348 j = pairs[jj];
00349 rj = params->p[1][j].position;
00350
00351 dr = (ri.x - rj.x);
00352 r2 = dr*dr;
00353 dr = (ri.y - rj.y);
00354 r2 += dr*dr;
00355 dr = (ri.z - rj.z);
00356 r2 += dr*dr;
00357 r2_i = 1.0/r2;
00358
00359 rhoj0 = gbisParams->intRad[1][2*j+0];
00360 rhojs = gbisParams->intRad[1][2*j+1];
00361 rhojs2 = rhojs*rhojs;
00362
00363 k = rhojs2*r2_i;
00364 hij = rhojs*r2_i*k*(ta+k*(tb+k*(tc+k*(td+k*te))));
00365
00366 k = rhois2*r2_i;
00367 hji = rhois*r2_i*k*(ta+k*(tb+k*(tc+k*(td+k*te))));
00368
00369
00370 #ifdef PRINT_COMP
00371 int id1 = params->pExt[0][i].id;
00372 int id2 = params->pExt[1][j].id;
00373 float h1 = hij;
00374 float h2 = hji;
00375 float r10 = rhoi0;
00376 float r1s = rhojs;
00377 float r20 = rhoj0;
00378 float r2s = rhois;
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388 if (id1 > id2) {
00389 int tmp = id1;
00390 id1 = id2;
00391 id2 = tmp;
00392 h1 = hji;
00393 h2 = hij;
00394 r20 = rhoi0;
00395 r2s = rhojs;
00396 r10 = rhoj0;
00397 r1s = rhois;
00398 }
00399
00400
00401
00402 #endif
00403
00404 psiI += hij;
00405 gbisParams->psiSum[1][j] += hji;
00406 }
00407 gbisParams->psiSum[0][i] += psiI;
00408 }
00409 for (int s = 0; s < strideIg; s++) {
00410 ngi+=params->p[0][ngi].nonbondedGroupSize;
00411 }
00412 }
00413 #ifdef BENCHMARK
00414 double t2 = 1.0*clock()/CLOCKS_PER_SEC;
00415
00416
00417
00418 CkPrintf("PHASE1.1: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
00419 nops = 0;
00420 t1 = 1.0*clock()/CLOCKS_PER_SEC;
00421 nops = 0;
00422 #endif
00423
00424 float rmris, rmrjs;
00425 float rmrsi;
00426 float rmrs2;
00427 float rs2;
00428 float logri, logrj;
00429 float rci2;
00430 float a_cut_i = 1.0 / a_cut;
00431 float a_cut_i2 = a_cut_i*a_cut_i;
00432
00433
00434 c = 1;
00435 for (int ngi = minIg; ngi < maxI; ) {
00436 int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
00437 for (int i = ngi; i < ngi+iGroupSize; i++) {
00438 ri = params->p[0][i].position;
00439 ri.x += offset_x;
00440 ri.y += offset_y;
00441 ri.z += offset_z;
00442 rhoi0 = gbisParams->intRad[0][2*i+0];
00443 rhois = gbisParams->intRad[0][2*i+1];
00444 psiI = ZERO;
00445 numPairs;
00446 plint *pairs;
00447 gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
00448 for (register int jj = 0; jj < numPairs; jj++) {
00449 j = pairs[jj];
00450 rj = params->p[1][j].position;
00451
00452 dr = (ri.x - rj.x);
00453 r2 = dr*dr;
00454 dr = (ri.y - rj.y);
00455 r2 += dr*dr;
00456 dr = (ri.z - rj.z);
00457 r2 += dr*dr;
00458 r_i = 1.0/sqrt(r2);
00459 r = r2*r_i;
00460
00461 rhoj0 = gbisParams->intRad[1][2*j+0];
00462 rhojs = gbisParams->intRad[1][2*j+1];
00463
00464 float tmp1 = 0.125*r_i;
00465 float tmp2 = r2 - 4.0*a_cut*r;
00466 float rr = 2.0*r;
00467
00468
00469 rmrjs = r-rhojs;
00470 rmris = r-rhois;
00471 logri = log(rmris*a_cut_i);
00472 logrj = log(rmrjs*a_cut_i);
00473
00474 rmrsi = 1.0/rmrjs;
00475
00476 rs2 = rhojs*rhojs;
00477 hij = tmp1*(1 + rr*rmrsi +
00478 a_cut_i2*(tmp2 - rs2) + logrj+logrj);
00479
00480
00481 rmrsi = 1.0/rmris;
00482
00483 rs2 = rhois*rhois;
00484 hji = tmp1*(1 + rr*rmrsi +
00485 a_cut_i2*(tmp2 - rs2) + 2.0* logri);
00486
00487 #ifdef PRINT_COMP
00488 int id1 = params->pExt[0][i].id;
00489 int id2 = params->pExt[1][j].id;
00490 float h1 = hij;
00491 float h2 = hji;
00492 float r10 = rhoi0;
00493 float r1s = rhojs;
00494 float r20 = rhoj0;
00495 float r2s = rhois;
00496
00497
00498
00499
00500
00501
00502
00503
00504 if (id1 > id2) {
00505 int tmp = id1;
00506 id1 = id2;
00507 id2 = tmp;
00508 h1 = hji;
00509 h2 = hij;
00510 r20 = rhoi0;
00511 r2s = rhojs;
00512 r10 = rhoj0;
00513 r1s = rhois;
00514 }
00515
00516
00517
00518
00519 #endif
00520
00521 #ifdef BENCHMARK
00522 nops++;
00523 #endif
00524
00525 psiI += hij;
00526 gbisParams->psiSum[1][j] += hji;
00527 }
00528 gbisParams->psiSum[0][i] += psiI;
00529 }
00530 for (int s = 0; s < strideIg; s++) {
00531 ngi+=params->p[0][ngi].nonbondedGroupSize;
00532 }
00533 }
00534 #ifdef BENCHMARK
00535 t2 = 1.0*clock()/CLOCKS_PER_SEC;
00536 CkPrintf("PHASE1.2: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
00537 nops = 0;
00538 t1 = 1.0*clock()/CLOCKS_PER_SEC;
00539 nops = 0;
00540 #endif
00541
00542
00543 c = 2;
00544 for (int ngi = minIg; ngi < maxI; ) {
00545 int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
00546 for (int i = ngi; i < ngi+iGroupSize; i++) {
00547 ri = params->p[0][i].position;
00548 ri.x += offset_x;
00549 ri.y += offset_y;
00550 ri.z += offset_z;
00551 rhoi0 = gbisParams->intRad[0][2*i+0];
00552 rhois = gbisParams->intRad[0][2*i+1];
00553 psiI = ZERO;
00554 numPairs;
00555 plint *pairs;
00556 gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
00557 for (register int jj = 0; jj < numPairs; jj++) {
00558 j = pairs[jj];
00559 rj = params->p[1][j].position;
00560
00561 dr = (ri.x - rj.x);
00562 r2 = dr*dr;
00563 dr = (ri.y - rj.y);
00564 r2 += dr*dr;
00565 dr = (ri.z - rj.z);
00566 r2 += dr*dr;
00567 rhojs = gbisParams->intRad[1][2*j+1];
00568 rhoj0 = gbisParams->intRad[1][2*j+0];
00569 r_i = 1.0/sqrt(r2);
00570 r = r2 * r_i;
00571
00572 CalcHPair(r,r2,r_i,a_cut,
00573 rhoi0, rhojs,
00574 rhoj0, rhois,
00575 dij,dji,hij,hji);
00576
00577 #ifdef PRINT_COMP
00578 int id1 = params->pExt[0][i].id;
00579 int id2 = params->pExt[1][j].id;
00580 float h1 = hij;
00581 float h2 = hji;
00582 int d1 = dij;
00583 int d2 = dji;
00584 float r10 = rhoi0;
00585 float r1s = rhojs;
00586 float r20 = rhoj0;
00587 float r2s = rhois;
00588
00589
00590
00591
00592
00593
00594
00595
00596
00597
00598
00599
00600
00601 if (id1 > id2) {
00602 int tmp = id1;
00603 id1 = id2;
00604 id2 = tmp;
00605 h1 = hji;
00606 h2 = hij;
00607 d1 = dji;
00608 d2 = dij;
00609 r20 = rhoi0;
00610 r2s = rhojs;
00611 r10 = rhoj0;
00612 r1s = rhois;
00613 }
00614
00615
00616
00617 #endif
00618
00619 #ifdef BENCHMARK
00620 nops++;
00621 #endif
00622 psiI += hij;
00623 gbisParams->psiSum[1][j] += hji;
00624 }
00625 gbisParams->psiSum[0][i] += psiI;
00626 }
00627 for (int s = 0; s < strideIg; s++) {
00628 ngi+=params->p[0][ngi].nonbondedGroupSize;
00629 }
00630 }
00631
00632 #ifdef BENCHMARK
00633 t2 = 1.0*clock()/CLOCKS_PER_SEC;
00634 CkPrintf("PHASE1.3: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
00635 #endif
00636
00637
00638
00639
00640 } else if (gbisParams->gbisPhase == 2) {
00641
00642 float epsilon_s = gbisParams->epsilon_s;
00643 float epsilon_p = gbisParams->epsilon_p;
00644 float epsilon_s_i = 1/epsilon_s;
00645 float epsilon_p_i = 1/epsilon_p;
00646 float kappa = gbisParams->kappa;
00647
00648
00649 float r_cut_2 = 1.0 / r_cut2;
00650 float r_cut_4 = 4.0*r_cut_2*r_cut_2;
00651 float coulEij=0,ddrCoulEij=0,gbEij=0,ddrGbEij=0;
00652 float dEdai=0,dEdaj=0, qiqj=0;
00653 float scale=0, ddrScale=0;
00654 float rnx=0,rny=0,rnz=0;
00655 float fx=0,fy=0,fz=0,forceCoul=0, forcedEdr=0;
00656
00657 int nops = 0;
00658 double t1 = 1.0*clock()/CLOCKS_PER_SEC;
00659 float r2;
00660 float dr;
00661 BigReal dx, dy, dz;
00662 float r, r_i, ratio;
00663 float fIx, fIy, fIz;
00664 GBReal dEdaSumI;
00665 float bornRadI, bornRadJ;
00666 float qi;
00667 Position ri, rj;
00668 float aiaj,expr2aiaj4,fij,f_i,expkappa,Dij;
00669 float aiaj4,ddrDij,ddrf_i,ddrfij,tmp_dEda;
00670
00671 for (int c = 0; c < 4; c++) {
00672 for (int ngi = minIg; ngi < maxI; ) {
00673 int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
00674 for (int i = ngi; i < ngi+iGroupSize; i++) {
00675 ri = params->p[0][i].position;
00676 ri.x += offset_x;
00677 ri.y += offset_y;
00678 ri.z += offset_z;
00679 qi = - COULOMB * params->p[0][i].charge * scaling;
00680
00681 int numPairs;
00682 plint *pairs;
00683 gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
00684 fIx = fIy = fIz = 0.0;
00685 dEdaSumI = 0.0;
00686 bornRadI = gbisParams->bornRad[0][i];
00687 for (int jj = 0; jj < numPairs; jj++) {
00688 int j = pairs[jj];
00689 rj = params->p[1][j].position;
00690
00691
00692 dx = (ri.x - rj.x);
00693 dy = (ri.y - rj.y);
00694 dz = (ri.z - rj.z);
00695 r2 = dx*dx + dy*dy + dz*dz;
00696 if (r2 > r_cut2) continue;
00697 qiqj = qi*params->p[1][j].charge;
00698 bornRadJ = gbisParams->bornRad[1][j];
00699 r_i = 1.0/sqrt(r2);
00700 r = r2 * r_i;
00701
00702
00703
00704
00705
00706
00707
00708
00709
00710
00711
00712 aiaj = bornRadI*bornRadJ;
00713 aiaj4 = 4*aiaj;
00714 expr2aiaj4 = exp(-r2/aiaj4);
00715 fij = sqrt(r2+aiaj*expr2aiaj4);
00716 f_i = 1/fij;
00717 expkappa = (kappa > 0) ? exp(-kappa*fij) : 1.0;
00718 Dij = epsilon_p_i - expkappa*epsilon_s_i;
00719 gbEij = qiqj*Dij*f_i;
00720
00721
00722 ddrfij = r*f_i*(1 - 0.25*expr2aiaj4);
00723 ddrf_i = -ddrfij*f_i*f_i;
00724 ddrDij = kappa*expkappa*ddrfij*epsilon_s_i;
00725 ddrGbEij = qiqj*(ddrDij*f_i+Dij*ddrf_i);
00726
00727
00728
00729 scale = 1;
00730 ddrScale = 0;
00731 if (gbisParams->doSmoothing) {
00732 scale = r2 * r_cut_2 - 1;
00733 scale *= scale;
00734 ddrScale = r*(r2-r_cut2)*r_cut_4;
00735
00736 gbisParams->gbInterEnergy += gbEij * scale;
00737 forcedEdr = -(ddrGbEij)*scale-(gbEij)*ddrScale;
00738 } else {
00739 gbisParams->gbInterEnergy += gbEij;
00740 forcedEdr = -ddrGbEij;
00741 }
00742
00743
00744 if (gbisParams->doFullElectrostatics) {
00745
00746 tmp_dEda = 0.5*qiqj*f_i*f_i
00747 *(kappa*epsilon_s_i*expkappa-Dij*f_i)
00748 *(aiaj+0.25*r2)*expr2aiaj4;
00749 dEdai = tmp_dEda/bornRadI;
00750 dEdaj = tmp_dEda/bornRadJ;
00751 dEdaSumI += dEdai*scale;
00752 gbisParams->dEdaSum[1][j] += dEdaj*scale;
00753 }
00754
00755 #if 1
00756
00757 int id1 = params->pExt[0][i].id;
00758 int id2 = params->pExt[1][j].id;
00759 float deda1 = dEdai;
00760 float deda2 = dEdaj;
00761 float bR1 = bornRadI;
00762 float bR2 = bornRadJ;
00763 if (id1 > id2) {
00764 int tmp = id1;
00765 id1 = id2;
00766 id2 = tmp;
00767 deda1 = dEdaj;
00768 deda2 = dEdai;
00769 bR1 = bornRadJ;
00770 bR2 = bornRadI;
00771 }
00772
00773
00774
00775
00776
00777
00778
00779
00780
00781
00782
00783 #endif
00784
00785 forcedEdr *= r_i;
00786 fx = dx*forcedEdr;
00787 fy = dy*forcedEdr;
00788 fz = dz*forcedEdr;
00789
00790 params->ff[1][j].x -= fx;
00791 params->ff[1][j].y -= fy;
00792 params->ff[1][j].z -= fz;
00793
00794 fIx += fx;
00795 fIy += fy;
00796 fIz += fz;
00797
00798 #ifdef BENCHMARK
00799 nops ++;
00800 #endif
00801
00802 }
00803 gbisParams->dEdaSum[0][i] += dEdaSumI;
00804 params->ff[0][i].x += fIx;
00805 params->ff[0][i].y += fIy;
00806 params->ff[0][i].z += fIz;
00807
00808
00809 if (c == 0 && gbisParams->doEnergy && gbisParams->numPatches == 1) {
00810 float fij = bornRadI;
00811 float expkappa = exp(-kappa*fij);
00812 float Dij = epsilon_p_i - expkappa*epsilon_s_i;
00813 float gbEij = qi*params->p[0][i].charge*Dij/fij;
00814 gbisParams->gbSelfEnergy += 0.5*gbEij;
00815 }
00816 }
00817 for (int s = 0; s < strideIg; s++) {
00818 ngi+=params->p[0][ngi].nonbondedGroupSize;
00819 }
00820 }
00821 }
00822 #ifdef BENCHMARK
00823 double t2 = 1.0*clock()/CLOCKS_PER_SEC;
00824
00825
00826
00827 CkPrintf("PHASE2.0: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
00828 #endif
00829
00830
00831
00832
00833 } else if (gbisParams->gbisPhase == 3 && gbisParams->doFullElectrostatics) {
00834
00835 #ifdef BENCHMARK
00836
00837 double t1 = 1.0*clock()/CLOCKS_PER_SEC;
00838 double t2;
00839 int nops = 0;
00840 #endif
00841
00842
00843 register BigReal dx, dy, dz;
00844 register float r2;
00845 register float r, r_i;
00846 register float rhoi0, rhois;
00847 float rhojs, rhoj0;
00848 float fx, fy, fz;
00849 float forceAlpha;
00850 register float fIx, fIy, fIz;
00851
00852 float dhij;
00853 float dhji;
00854 int dij;
00855 int dji;
00856 register float dHdrPrefixI;
00857 float dHdrPrefixJ;
00858 register Position ri;
00859 register Position rj;
00860 register int c, numPairs, jj, j;
00861 register float k;
00862 register float da = DA;
00863 register float db = DB;
00864 register float dc = DC;
00865 register float dd = DD;
00866 register float de = DE;
00867 float r_i3;
00868
00869
00870 c = 0;
00871 for (int ngi = minIg; ngi < maxI; ) {
00872 int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
00873 for (int i = ngi; i < ngi+iGroupSize; i++) {
00874 ri = params->p[0][i].position;
00875 ri.x += offset_x;
00876 ri.y += offset_y;
00877 ri.z += offset_z;
00878 rhois = gbisParams->intRad[0][2*i+1];
00879 plint *pairs;
00880 gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
00881 fIx = fIy = fIz = 0.0;
00882 dHdrPrefixI = gbisParams->dHdrPrefix[0][i];
00883 for (jj = 0; jj < numPairs; jj++) {
00884 j = pairs[jj];
00885 rj = params->p[1][j].position;
00886
00887 dx = (ri.x - rj.x);
00888 dy = (ri.y - rj.y);
00889 dz = (ri.z - rj.z);
00890 r2 = dx*dx + dy*dy + dz*dz;
00891 dHdrPrefixJ = gbisParams->dHdrPrefix[1][j];
00892
00893 r_i = 1.0/sqrt(r2);
00894 r_i3 = r_i*r_i*r_i;
00895
00896 rhojs = gbisParams->intRad[1][2*j+1];
00897
00898 k = rhojs*r_i; k*=k;
00899 dhij = -rhojs*r_i3*k*
00900 (da+k*(db+k*(dc+k*(dd+k*de))));
00901
00902 k = rhois*r_i; k*=k;
00903 dhji = -rhois*r_i3*k*
00904 (da+k*(db+k*(dc+k*(dd+k*de))));
00905
00906
00907 forceAlpha = -r_i*(dHdrPrefixI*dhij+dHdrPrefixJ*dhji);
00908 fx = dx * forceAlpha;
00909 fy = dy * forceAlpha;
00910 fz = dz * forceAlpha;
00911
00912 params->fullf[1][j].x -= fx;
00913 params->fullf[1][j].y -= fy;
00914 params->fullf[1][j].z -= fz;
00915
00916 fIx += fx;
00917 fIy += fy;
00918 fIz += fz;
00919
00920 #if 1
00921
00922 int id1 = params->pExt[0][i].id;
00923 int id2 = params->pExt[1][j].id;
00924 float h1 = dhij;
00925 float h2 = dhji;
00926 if (id1 > id2) {
00927 int tmp = id1;
00928 id1 = id2;
00929 id2 = tmp;
00930 h1 = dhji;
00931 h2 = dhij;
00932 }
00933
00934
00935
00936
00937
00938
00939
00940
00941
00942 #endif
00943
00944 #ifdef BENCHMARK
00945 nops++;
00946 #endif
00947
00948 }
00949 params->fullf[0][i].x += fIx;
00950 params->fullf[0][i].y += fIy;
00951 params->fullf[0][i].z += fIz;
00952 }
00953 for (int s = 0; s < strideIg; s++) {
00954 ngi+=params->p[0][ngi].nonbondedGroupSize;
00955 }
00956 }
00957
00958 #ifdef BENCHMARK
00959 t2 = 1.0*clock()/CLOCKS_PER_SEC;
00960 CkPrintf("PHASE3.1: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
00961 nops = 0;
00962 t1 = 1.0*clock()/CLOCKS_PER_SEC;
00963 nops = 0;
00964 #endif
00965
00966 float a_cut_i = 1.0/a_cut;
00967 float a_cut_i2 = a_cut_i*a_cut_i;
00968 float a_cut2 = a_cut*a_cut;
00969 float rmrs;
00970 float rmrsi;
00971 float rmrs2;
00972 float rhois2, rhojs2;
00973 float logri, logrj;
00974 float r_i2;
00975
00976
00977 c = 1;
00978 for (int ngi = minIg; ngi < maxI; ) {
00979 int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
00980 for (int i = ngi; i < ngi+iGroupSize; i++) {
00981 ri = params->p[0][i].position;
00982 ri.x += offset_x;
00983 ri.y += offset_y;
00984 ri.z += offset_z;
00985 rhois = gbisParams->intRad[0][2*i+1];
00986 rhois2 = rhois*rhois;
00987 plint *pairs;
00988 gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
00989 fIx = fIy = fIz = 0.0;
00990 dHdrPrefixI = gbisParams->dHdrPrefix[0][i];
00991 for (jj = 0; jj < numPairs; jj++) {
00992 j = pairs[jj];
00993 rj = params->p[1][j].position;
00994 dHdrPrefixJ = gbisParams->dHdrPrefix[1][j];
00995
00996 dx = (ri.x - rj.x);
00997 dy = (ri.y - rj.y);
00998 dz = (ri.z - rj.z);
00999 r2 = dx*dx + dy*dy + dz*dz;
01000 r_i = 1.0/sqrt(r2);
01001 r = r2* r_i;
01002 r_i2 = r_i*r_i;
01003
01004 rhojs = gbisParams->intRad[1][2*j+1];
01005 rhojs2 = rhojs*rhojs;
01006
01007
01008 rmrs = r-rhojs;
01009 rmrsi = 1.0/rmrs;
01010 rmrs2 = rmrs*rmrs;
01011 logrj = log(rmrs*a_cut_i);
01012 dhij = r_i2*(-0.25*logrj - (a_cut2 - rmrs2)*(rhojs2 + r2)*0.125*a_cut_i2*rmrsi*rmrsi);
01013
01014
01015 rmrs = r-rhois;
01016 rmrsi = 1.0/rmrs;
01017 rmrs2 = rmrs*rmrs;
01018 logri = log(rmrs*a_cut_i);
01019 dhji = r_i2*(-0.25*logri - (a_cut2 - rmrs2)*(rhois2 + r2)*0.125*a_cut_i2*rmrsi*rmrsi);
01020
01021
01022 forceAlpha = -r_i*(dHdrPrefixI*dhij+dHdrPrefixJ*dhji);
01023 fx = dx * forceAlpha;
01024 fy = dy * forceAlpha;
01025 fz = dz * forceAlpha;
01026
01027 params->fullf[1][j].x -= fx;
01028 params->fullf[1][j].y -= fy;
01029 params->fullf[1][j].z -= fz;
01030
01031 fIx += fx;
01032 fIy += fy;
01033 fIz += fz;
01034
01035 #if 1
01036
01037 int id1 = params->pExt[0][i].id;
01038 int id2 = params->pExt[1][j].id;
01039 float h1 = dhij;
01040 float h2 = dhji;
01041 if (id1 > id2) {
01042 int tmp = id1;
01043 id1 = id2;
01044 id2 = tmp;
01045 h1 = dhji;
01046 h2 = dhij;
01047 }
01048
01049
01050
01051
01052
01053
01054
01055
01056
01057 #endif
01058
01059 #ifdef BENCHMARK
01060 nops++;
01061 #endif
01062
01063 }
01064 params->fullf[0][i].x += fIx;
01065 params->fullf[0][i].y += fIy;
01066 params->fullf[0][i].z += fIz;
01067 }
01068 for (int s = 0; s < strideIg; s++) {
01069 ngi+=params->p[0][ngi].nonbondedGroupSize;
01070 }
01071 }
01072
01073 #ifdef BENCHMARK
01074 t2 = 1.0*clock()/CLOCKS_PER_SEC;
01075 CkPrintf("PHASE3.2: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
01076 nops = 0;
01077 t1 = 1.0*clock()/CLOCKS_PER_SEC;
01078 nops = 0;
01079 #endif
01080
01081
01082 c = 2;
01083 for (int ngi = minIg; ngi < maxI; ) {
01084 int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
01085 for (int i = ngi; i < ngi+iGroupSize; i++) {
01086 ri = params->p[0][i].position;
01087 ri.x += offset_x;
01088 ri.y += offset_y;
01089 ri.z += offset_z;
01090 rhoi0 = gbisParams->intRad[0][2*i+0];
01091 rhois = gbisParams->intRad[0][2*i+1];
01092 plint *pairs;
01093 gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
01094 fIx = fIy = fIz = 0.0;
01095 dHdrPrefixI = gbisParams->dHdrPrefix[0][i];
01096 for (jj = 0; jj < numPairs; jj++) {
01097 j = pairs[jj];
01098 rj = params->p[1][j].position;
01099 dHdrPrefixJ = gbisParams->dHdrPrefix[1][j];
01100
01101 dx = (ri.x - rj.x);
01102 dy = (ri.y - rj.y);
01103 dz = (ri.z - rj.z);
01104 r2 = dx*dx + dy*dy + dz*dz;
01105
01106 r_i = 1.0/sqrt(r2);
01107 r = r2* r_i;
01108
01109 rhojs = gbisParams->intRad[1][2*j+1];
01110 rhoj0 = gbisParams->intRad[1][2*j+0];
01111
01112 CalcDHPair(r,r2,r_i,a_cut,
01113 rhoi0,rhojs,
01114 rhoj0,rhois,
01115 dij,dji,dhij,dhji);
01116
01117
01118 forceAlpha = -r_i*(dHdrPrefixI*dhij+dHdrPrefixJ*dhji);
01119 fx = dx * forceAlpha;
01120 fy = dy * forceAlpha;
01121 fz = dz * forceAlpha;
01122
01123 fIx += fx;
01124 fIy += fy;
01125 fIz += fz;
01126
01127 params->fullf[1][j].x -= fx;
01128 params->fullf[1][j].y -= fy;
01129 params->fullf[1][j].z -= fz;
01130
01131 #if 1
01132
01133 int id1 = params->pExt[0][i].id;
01134 int id2 = params->pExt[1][j].id;
01135 int d1 = dij;
01136 int d2 = dji;
01137 float h1 = dhij;
01138 float h2 = dhji;
01139 if (id1 > id2) {
01140 int tmp = id1;
01141 id1 = id2;
01142 id2 = tmp;
01143 d1 = dji;
01144 d2 = dij;
01145 h1 = dhji;
01146 h2 = dhij;
01147 }
01148
01149
01150
01151
01152
01153
01154
01155
01156
01157
01158
01159
01160
01161
01162 #endif
01163
01164 #ifdef BENCHMARK
01165 nops++;
01166 #endif
01167
01168 }
01169 params->fullf[0][i].x += fIx;
01170 params->fullf[0][i].y += fIy;
01171 params->fullf[0][i].z += fIz;
01172 }
01173 for (int s = 0; s < strideIg; s++) {
01174 ngi+=params->p[0][ngi].nonbondedGroupSize;
01175 }
01176 }
01177
01178
01179 #ifdef BENCHMARK
01180 t2 = 1.0*clock()/CLOCKS_PER_SEC;
01181 CkPrintf("PHASE3.3: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
01182 #endif
01183
01184 }
01185
01186 }