ComputeGBIS.C

Go to the documentation of this file.
00001 
00008 //#define BENCHMARK
00009 #define CHECK_PRIORITIES 0
00010 //#define PRINT_COMP
00011 
00012 //approximate flop equivalence of transcendental functs
00013 #ifdef BENCHMARK
00014 #define SQRT_FLOPS 15
00015 #define LOG_FLOPS 20
00016 #define EXP_FLOPS 20
00017 #define DIV_FLOPS 1
00018 #endif
00019 
00020 #include "ComputeNonbondedUtil.h"
00021 #include "ComputeGBIS.inl"
00022 #include "time.h"
00023 
00024 /*******************************************************************************
00025   When GBIS is active, the method calcGBIS is called from a nonbonded
00026   compute object ::doForce() three times each timestep with
00027   gbisPhase = 1, 2, 3.
00028 
00029   At the beginning of the first phase, the pairlists are generated which are
00030   used for all three phases; these pairlists are re-generated every timestep.
00031   4 mutually exclusive pairlists are used, in an effort to accelerate the
00032   iteration over pairs.
00033   pairlist 0: atoms fall within interaction domain 2 (largest group)
00034   pairlist 1: atoms fall within interaction domain 1 (next largest)
00035   pairlist 2: atoms are within the phase 1,3 cutoff (all the rest)
00036   pairlist 3: atoms are within the phase 2 cutoff (generally longer than previous cutoff)
00037 
00038   These calculations generate nonbonded forces (and associated energies) which
00039   are calculated in addition to Coulomb and van der Waals forces.
00040 *******************************************************************************/
00041 
00042 /*Searches all pair of atoms to create
00043 largest pairlist lasting all cycle*/
00044 inline void pairlistFromAll(
00045   nonbonded *params,
00046   GBISParamStruct *gbisParams,
00047   int minIg,
00048   int strideIg,
00049   int maxI
00050 ) {
00051   const BigReal offset_x = params->offset.x;
00052   const BigReal offset_y = params->offset.y;
00053   const BigReal offset_z = params->offset.z;
00054 
00055   int unique = (gbisParams->numPatches == 1) ? 1 : 0;
00056 
00057   int maxPairs = params->numAtoms[1];
00058   int seq = gbisParams->sequence;
00059   int cid = gbisParams->cid;
00060   int pe = CkMyPe();
00061   int gbisPhase = gbisParams->gbisPhase;
00062 
00063   //determine long and short cutoff
00064   float fsMax = FS_MAX; // gbisParams->fsMax;
00065   float a_cut = gbisParams->a_cut - fsMax;
00066   float a_cut_ps2 = (a_cut+fsMax)*(a_cut+fsMax);
00067   float r_cut = gbisParams->cutoff;
00068   float a_cut2 = a_cut*a_cut;
00069   float r_cut2 = r_cut*r_cut;
00070   int s = 0;// 0 is short cutoff list
00071   int l = 1;// 1 is long-short cutoff list
00072 #ifdef BENCHMARK
00073   double t1 = 1.0*clock()/CLOCKS_PER_SEC;
00074   int nops = 0;
00075 #endif
00076   Position ri, rj;
00077   Position ngri, ngrj;
00078   float r, dr, r2;
00079   float rhoi0, rhois, rhoj0, rhojs, rhois2, rhojs2, rhois2_16;
00080   float ami2, amj2, api2, apj2;
00081   int numGBISPairlists = 4;
00082 
00083   float max_gcut2  = r_cut;
00084   if (a_cut+fsMax > r_cut)
00085     max_gcut2 = a_cut+fsMax;
00086   max_gcut2 += 2.0*gbisParams->maxGroupRadius;
00087   //CkPrintf("max_cut = %f, %f\n",max_gcut2,gbisParams->maxGroupRadius);
00088   max_gcut2 *= max_gcut2;
00089 
00090   int maxGroupPairs = params->numAtoms[1];
00091   short *groupPairs = new short[maxGroupPairs];/*delete*/
00092 
00093   //foreach nonbonded group i
00094   for (int ngi = minIg; ngi < maxI; /*ngi updated at loop bottom*/ ) {
00095     int numGroupPairs = 0;
00096     ngri = params->p[0][ngi].position;
00097     ngri.x += offset_x;
00098     ngri.y += offset_y;
00099     ngri.z += offset_z;
00100 
00101     //find close j-groups; include i-group
00102     for (int ngj = unique*(ngi+params->p[0][ngi].nonbondedGroupSize); ngj < params->numAtoms[1]; ngj+=params->p[1][ngj].nonbondedGroupSize) {
00103       
00104       ngrj = params->p[1][ngj].position;
00105       dr = ngri.x - ngrj.x;
00106       r2 = dr*dr;
00107       dr = ngri.y - ngrj.y;
00108       r2 += dr*dr;
00109       dr = ngri.z - ngrj.z;
00110       r2 += dr*dr;
00111       if (r2 < max_gcut2) {
00112         groupPairs[numGroupPairs++] = ngj;
00113       }
00114     }
00115 
00116     //for all i in i-group
00117     int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
00118     for (int i=ngi; i<ngi+iGroupSize; i++) {
00119       //CkPrintf("\tFORALL i=%05i\n",params->pExt[0][i].id);
00120       //extend pairlists
00121       int *size = new int[numGBISPairlists];
00122       plint **pairs = new plint*[numGBISPairlists];
00123       for (int k = 0; k < numGBISPairlists; k++) {
00124         size[k] = 0;
00125         pairs[k] = gbisParams->gbisStepPairlists[k]->
00126                              newlist(maxPairs);
00127       }
00128 
00129       //load i values
00130       ri = params->p[0][i].position;
00131       ri.x += offset_x;
00132       ri.y += offset_y;
00133       ri.z += offset_z;
00134       rhois = gbisParams->intRad[0][2*i+1];
00135       api2 = a_cut + rhois;
00136       api2 *= api2;
00137       ami2 = a_cut - rhois;
00138       ami2 *= ami2;
00139       rhois2 = rhois*rhois;
00140       rhois2_16 = 16.0*rhois2;
00141 
00142       //for all other i's in i-group
00143       if (unique)
00144       for (int j = i+1; j < ngi+iGroupSize; j++) {
00145 
00146 #ifdef BENCHMARK
00147           nops ++;
00148 #endif
00149           rj = params->p[1][j].position;
00150           dr = ri.x - rj.x;
00151           r2 = dr*dr;
00152           dr = ri.y - rj.y;
00153           r2 += dr*dr;
00154           dr = ri.z - rj.z;
00155           r2 += dr*dr;
00156 
00157           rhojs = gbisParams->intRad[1][2*j+1];
00158           rhojs2 = rhojs*rhojs;
00159           //r = sqrt(r2);
00160           amj2 = a_cut - rhojs;
00161           amj2 *= amj2;
00162 
00163           apj2 = a_cut + rhojs;
00164           apj2 *= apj2;
00165           //CkPrintf("IPAIR %5i %5i %5i\n",0*gbisParams->cid,params->pExt[0][i].id,params->pExt[1][j].id);
00166 
00167           if (  r2 < ami2 && r2 < amj2 &&
00168             r2 > rhois2_16 && r2 > 16.0*rhojs2 ) {
00169             //belongs to 22
00170             pairs[0][size[0]++] = j;
00171           } else if ( r2 < api2 && r2 < apj2 &&
00172             r2 > ami2 && r2 > amj2 ) {
00173             //belongs to 11
00174             pairs[1][size[1]++] = j;
00175           } else if ( r2 < a_cut_ps2 ) {
00176             //belongs to other a_cut
00177             pairs[2][size[2]++] = j;
00178           } else if ( r2 < r_cut2 ) {
00179             //belongs to r_cut and (r_cut > a_cut)
00180             pairs[3][size[3]++] = j;
00181           }
00182       }
00183 
00184       //foreach j-group
00185       for (int g = 0; g < numGroupPairs; g++) {
00186         int ngj = groupPairs[g];
00187         int jGroupSize = params->p[1][ngj].nonbondedGroupSize;
00188         //for each j in j-group
00189         int maxJ = ngj+jGroupSize;
00190         for (int j = ngj; j < maxJ; j++) {
00191 
00192           //CkPrintf("for j-atom%i\n",j);
00193 #ifdef BENCHMARK
00194           nops ++;
00195 #endif
00196           rj = params->p[1][j].position;
00197           dr = ri.x - rj.x;
00198           r2 = dr*dr;
00199           dr = ri.y - rj.y;
00200           r2 += dr*dr;
00201           dr = ri.z - rj.z;
00202           r2 += dr*dr;
00203           //CkPrintf("PAIR %5i %5i %5i\n",0*gbisParams->cid,params->pExt[0][i].id,params->pExt[1][j].id);
00204 
00205           rhojs = gbisParams->intRad[1][2*j+1];
00206           rhojs2 = rhojs*rhojs;
00207           //r = sqrt(r2);
00208           amj2 = a_cut - rhojs;
00209           amj2 *= amj2;
00210 
00211           apj2 = a_cut + rhojs;
00212           apj2 *= apj2;
00213 
00214           if (  r2 < ami2 && r2 < amj2 &&
00215             r2 > rhois2_16 && r2 > 16.0*rhojs2 ) {
00216             //belongs to 22
00217             //CkPrintf("Pair22 %06i %06i\n",params->pExt[0][i].id,params->pExt[1][j].id);
00218             pairs[0][size[0]++] = j;
00219           } else if ( r2 < api2 && r2 < apj2 &&
00220             r2 > ami2 && r2 > amj2 ) {
00221             //CkPrintf("Pair11 %06i %06i\n",params->pExt[0][i].id,params->pExt[1][j].id);
00222             //belongs to 11
00223             pairs[1][size[1]++] = j;
00224           } else if ( r2 < a_cut_ps2 ) {
00225             //CkPrintf("PairAA %06i %06i\n",params->pExt[0][i].id,params->pExt[1][j].id);
00226             //belongs to other a_cut
00227             pairs[2][size[2]++] = j;
00228           } else if ( r2 < r_cut2 ) {
00229             //CkPrintf("PairRR %06i %06i\n",params->pExt[0][i].id,params->pExt[1][j].id);
00230             //belongs to r_cut and (r_cut > a_cut)
00231             pairs[3][size[3]++] = j;
00232           }
00233         }//end j inner loop
00234       }//end j-group loop
00235     for (int k = 0; k < numGBISPairlists; k++) {
00236       gbisParams->gbisStepPairlists[k]->newsize(size[k]);
00237     }
00238     delete[] size;
00239     delete[] pairs;
00240     }//end i all atom loop
00241 
00242     //jump to next nbg for round-robin
00243     //same iteration patter followed in all three gbisPhases below
00244     for (int s = 0; s < strideIg; s++) {
00245       ngi+=params->p[0][ngi].nonbondedGroupSize;
00246     }
00247   }//end i-group loop
00248     delete[] groupPairs;
00249   for (int k = 0; k < numGBISPairlists; k++)
00250     gbisParams->gbisStepPairlists[k]->reset();
00251 
00252 #ifdef BENCHMARK
00253   double t2 = 1.0*clock()/CLOCKS_PER_SEC;
00254   CkPrintf("PHASELST: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
00255 #endif
00256 }//end pairlist method
00257 
00258 /*******************************************************************************
00259  * Calculate GBIS 3 Phases
00260 *******************************************************************************/
00261 void ComputeNonbondedUtil::calcGBIS(nonbonded *params, GBISParamStruct *gbisParams) {
00262 //CkPrintf("SEQ%03i, P%i, CID%05i(%02i,%02i) ENTER\n",gbisParams->sequence,gbisParams->gbisPhase,gbisParams->cid,gbisParams->patchID[0],gbisParams->patchID[1]);
00263 #if CHECK_PRIORITIES
00264 CkPrintf("PE%i, S%09i, P%i\n",CkMyPe(),gbisParams->sequence,gbisParams->gbisPhase);
00265 #endif
00266   if (params->numAtoms[0] == 0 || params->numAtoms[1] == 0) return;
00267 
00268   const BigReal offset_x = params->offset.x;
00269   const BigReal offset_y = params->offset.y;
00270   const BigReal offset_z = params->offset.z;
00271 
00272   int partSize = params->numAtoms[0] / params->numParts;
00273   int minIg = 0;
00274   for (int s = 0; s < params->minPart; s++) {
00275     minIg+=params->p[0][minIg].nonbondedGroupSize;
00276   }
00277   int maxI = params->numAtoms[0];
00278   int strideIg = params->numParts;
00279 
00280   int unique = (gbisParams->numPatches == 1) ? 1 : 0;//should inner loop be unique from ourter loop
00281   int numGBISPairlists = 4;
00282   float r_cut = gbisParams->cutoff;
00283   float fsMax = FS_MAX; // gbisParams->fsMax;//FS_MAX;
00284   float a_cut = gbisParams->a_cut-fsMax;
00285   float a_cut2 = a_cut*a_cut;
00286   float a_cut_ps = a_cut + fsMax;//max screen radius
00287   float r_cut2 = r_cut*r_cut;
00288   float a_cut_ps2 = a_cut_ps*a_cut_ps;
00289   //put PL pointer back to beginning of lists
00290   for (int k = 0; k < numGBISPairlists; k++)
00291     gbisParams->gbisStepPairlists[k]->reset();
00292       
00293 
00294 /***********************************************************
00295 * GBIS Phase 1
00296 ***********************************************************/
00297 if (gbisParams->gbisPhase == 1) {
00298 //CkPrintf("SEQ%03i, P%i, CID%05i(%02i,%02i):[%03i,%03i]\n",gbisParams->sequence,gbisParams->gbisPhase,gbisParams->cid,gbisParams->patchID[0],gbisParams->patchID[1],minI,maxI);
00299 
00300   pairlistFromAll(params,gbisParams,minIg,strideIg,maxI);
00301 
00302 #ifdef BENCHMARK
00303   int nops = 0;
00304   int domains[] = {0, 0, 0, 0, 0, 0, 0, 0};
00305   int numDom = 7;
00306   double t1 = 1.0*clock()/CLOCKS_PER_SEC;
00307 #endif
00308   register GBReal psiI;
00309 
00310   register float dr;
00311   register float r2;
00312   register float r, r_i, r2_i;
00313   float rhoi0, rhois, rhojs, rhoj0;
00314   Position ri, rj;
00315   register int j;
00316   int numPairs;
00317   register float ta = TA;
00318   register float tb = TB;
00319   register float tc = TC;
00320   register float td = TD;
00321   register float te = TE;
00322 
00323   register float hij,hji;
00324   int dij,dji;
00325   float k;
00326   float rhois2, rhojs2;
00327 
00328   //calculate piecewise-22 Pairs
00329   int c = 0;
00330   for (int ngi = minIg; ngi < maxI;  ) {
00331     int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
00332   for (int i = ngi; i < ngi+iGroupSize; i++) {
00333     ri = params->p[0][i].position;
00334     ri.x += offset_x;
00335     ri.y += offset_y;
00336     ri.z += offset_z;
00337     rhoi0 = gbisParams->intRad[0][2*i+0];
00338     rhois = gbisParams->intRad[0][2*i+1];
00339     rhois2 = rhois*rhois;
00340     psiI = ZERO;
00341     plint *pairs;
00342     gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
00343     for (register int jj = 0; jj < numPairs; jj++) {
00344 #ifdef BENCHMARK
00345       nops++;
00346 #endif
00347       j = pairs[jj];
00348       rj = params->p[1][j].position;
00349 
00350       dr = (ri.x - rj.x);
00351       r2 = dr*dr;
00352       dr = (ri.y - rj.y);
00353       r2 += dr*dr;
00354       dr = (ri.z - rj.z);
00355       r2 += dr*dr;
00356       r2_i = 1.0/r2;
00357 
00358       rhoj0 = gbisParams->intRad[1][2*j+0];
00359       rhojs = gbisParams->intRad[1][2*j+1];
00360       rhojs2 = rhojs*rhojs;
00361 
00362       k = rhojs2*r2_i;//k=(rs/r)^2
00363       hij = rhojs*r2_i*k*(ta+k*(tb+k*(tc+k*(td+k*te))));
00364 
00365       k = rhois2*r2_i;//k=(rs/r)^2
00366       hji = rhois*r2_i*k*(ta+k*(tb+k*(tc+k*(td+k*te))));
00367 
00368 //#if 1
00369 #ifdef PRINT_COMP
00370       int id1 = params->pExt[0][i].id;
00371       int id2 = params->pExt[1][j].id;
00372       float h1 = hij;
00373       float h2 = hji;
00374       float r10 = rhoi0;
00375       float r1s = rhojs;
00376       float r20 = rhoj0;
00377       float r2s = rhois;
00378 /*printf("PSIPAIR %05i %05i%9.5f%6.3f%6.3f%2i% 13.5e\n",
00379 id1,id2,sqrt(r2),
00380 rhoi0, rhojs,
00381 2,hij);
00382 printf("PSIPAIR %05i %05i%9.5f%6.3f%6.3f%2i% 13.5e\n",
00383 id2,id1,sqrt(r2),
00384 rhoj0, rhois,
00385 2,hji);
00386 */
00387       if (id1 > id2) {
00388         int tmp = id1;
00389         id1 = id2;
00390         id2 = tmp;
00391         h1 = hji;
00392         h2 = hij;
00393         r20 = rhoi0;
00394         r2s = rhojs;
00395         r10 = rhoj0;
00396         r1s = rhois;
00397       }
00398 //      CkPrintf("PSIPAIR%5i%5i%10.5f%5i%14.8f%7.4f%7.4f\n",id1,id2,sqrt(r2),2,h1,r10,r1s);
00399 //      CkPrintf("PSIPAIR%5i%5i%10.5f%5i%14.8f%7.4f%7.4f\n",id2,id1,sqrt(r2),2,h2,r20,r2s);
00400       //CkPrintf("PPSI(%04i)[%04i,%04i] = 22\n",gbisParams->cid,id1,id2);
00401 #endif
00402 
00403       psiI += hij;
00404       gbisParams->psiSum[1][j] += hji;
00405     }//end inner j
00406     gbisParams->psiSum[0][i] += psiI;
00407   }//end outer i
00408   for (int s = 0; s < strideIg; s++) {
00409     ngi+=params->p[0][ngi].nonbondedGroupSize;
00410   }
00411   }
00412 #ifdef BENCHMARK
00413   double t2 = 1.0*clock()/CLOCKS_PER_SEC;
00414   //nops *= (9 + 2*DIV_FLOPS+SQRT_FLOPS) + (14 + 0*DIV_FLOPS + 0*LOG_FLOPS);
00415   //double flops = 1.0 * nops / (t2 - t1);
00416   //double gflops = flops / 1000000000.0;
00417   CkPrintf("PHASE1.1: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
00418 nops = 0;
00419   t1 = 1.0*clock()/CLOCKS_PER_SEC;
00420   nops = 0;
00421 #endif
00422 
00423   float rmris, rmrjs;
00424   float rmrsi;
00425   float rmrs2;
00426   float rs2;
00427   float logri, logrj;
00428   float rci2;
00429   float a_cut_i = 1.0 / a_cut;
00430   float a_cut_i2 = a_cut_i*a_cut_i;
00431 
00432   //calculate piecewise-11 pairs
00433   c = 1;
00434   for (int ngi = minIg; ngi < maxI;  ) {
00435     int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
00436   for (int i = ngi; i < ngi+iGroupSize; i++) {
00437     ri = params->p[0][i].position;
00438     ri.x += offset_x;
00439     ri.y += offset_y;
00440     ri.z += offset_z;
00441     rhoi0 = gbisParams->intRad[0][2*i+0];
00442     rhois = gbisParams->intRad[0][2*i+1];
00443     psiI = ZERO;
00444     plint *pairs;
00445     gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
00446     for (register int jj = 0; jj < numPairs; jj++) {
00447       j = pairs[jj];
00448       rj = params->p[1][j].position;
00449 
00450       dr = (ri.x - rj.x);
00451       r2 = dr*dr;
00452       dr = (ri.y - rj.y);
00453       r2 += dr*dr;
00454       dr = (ri.z - rj.z);
00455       r2 += dr*dr;
00456       r_i = 1.0/sqrt(r2);
00457       r = r2*r_i;
00458 
00459       rhoj0 = gbisParams->intRad[1][2*j+0];
00460       rhojs = gbisParams->intRad[1][2*j+1];
00461 
00462       float tmp1 = 0.125*r_i;
00463       float tmp2 = r2 - 4.0*a_cut*r;
00464       float rr = 2.0*r;
00465 
00466 
00467       rmrjs = r-rhojs;
00468       rmris = r-rhois;
00469       logri = log(rmris*a_cut_i);
00470       logrj = log(rmrjs*a_cut_i);
00471 
00472       rmrsi = 1.0/rmrjs;
00473       //rmrs2 = rmrjs*rmrjs;
00474       rs2 = rhojs*rhojs;
00475       hij = /*0.125*r_i*/tmp1*(1 + rr*rmrsi +
00476         a_cut_i2*(/*r2 - 4.0*a_cut*r*/tmp2 - rs2) + logrj+logrj);
00477 
00478 
00479       rmrsi = 1.0/rmris;
00480       //rmrs2 = rmris*rmris;
00481       rs2 = rhois*rhois;
00482       hji = /*0.125*r_i*/tmp1*(1 + rr*rmrsi +
00483         a_cut_i2*(/*r2 - 4.0*a_cut*r*/tmp2 - rs2) + 2.0* logri);
00484 //#if 1
00485 #ifdef PRINT_COMP
00486       int id1 = params->pExt[0][i].id;
00487       int id2 = params->pExt[1][j].id;
00488       float h1 = hij;
00489       float h2 = hji;
00490       float r10 = rhoi0;
00491       float r1s = rhojs;
00492       float r20 = rhoj0;
00493       float r2s = rhois;
00494 /*printf("PSIPAIR %05i %05i%9.5f%6.3f%6.3f%2i% 13.5e\n",
00495 id1,id2,sqrt(r2),
00496 rhoi0, rhojs,
00497 1,hij);
00498 printf("PSIPAIR %05i %05i%9.5f%6.3f%6.3f%2i% 13.5e\n",
00499 id2,id1,sqrt(r2),
00500 rhoj0, rhois,
00501 1,hji);*/
00502       if (id1 > id2) {
00503         int tmp = id1;
00504         id1 = id2;
00505         id2 = tmp;
00506         h1 = hji;
00507         h2 = hij;
00508         r20 = rhoi0;
00509         r2s = rhojs;
00510         r10 = rhoj0;
00511         r1s = rhois;
00512       }
00513 //      CkPrintf("PSIPAIR%5i%5i%10.5f%5i%14.8f%7.4f%7.4f\n",id1,id2,sqrt(r2),1,h1,r10,r1s);
00514 //      CkPrintf("PSIPAIR%5i%5i%10.5f%5i%14.8f%7.4f%7.4f\n",id2,id1,sqrt(r2),1,h2,r20,r2s);
00515       //CkPrintf("PSI(%04i)[%04i,%04i] = 11 % .4e % .4e\n",gbisParams->sequence,id1,id2,h1, h2);
00516       //CkPrintf("PPSI(%04i)[%04i,%04i] = 11\n",gbisParams->cid,id1,id2);
00517 #endif
00518 
00519 #ifdef BENCHMARK
00520       nops++;
00521 #endif
00522           
00523       psiI += hij;
00524       gbisParams->psiSum[1][j] += hji;
00525     }//end inner j
00526     gbisParams->psiSum[0][i] += psiI;
00527   }//end outer i
00528   for (int s = 0; s < strideIg; s++) {
00529     ngi+=params->p[0][ngi].nonbondedGroupSize;
00530   }
00531   }
00532 #ifdef BENCHMARK
00533   t2 = 1.0*clock()/CLOCKS_PER_SEC;
00534   CkPrintf("PHASE1.2: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
00535 nops = 0;
00536   t1 = 1.0*clock()/CLOCKS_PER_SEC;
00537   nops = 0;
00538 #endif
00539 
00540   //calculate all other piecewise pairs
00541   c = 2;
00542   for (int ngi = minIg; ngi < maxI;  ) {
00543     int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
00544   for (int i = ngi; i < ngi+iGroupSize; i++) {
00545     ri = params->p[0][i].position;
00546     ri.x += offset_x;
00547     ri.y += offset_y;
00548     ri.z += offset_z;
00549     rhoi0 = gbisParams->intRad[0][2*i+0];
00550     rhois = gbisParams->intRad[0][2*i+1];
00551     psiI = ZERO;
00552     plint *pairs;
00553     gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
00554     for (register int jj = 0; jj < numPairs; jj++) {
00555       j = pairs[jj];
00556       rj = params->p[1][j].position;
00557 
00558       dr = (ri.x - rj.x);
00559       r2 = dr*dr;
00560       dr = (ri.y - rj.y);
00561       r2 += dr*dr;
00562       dr = (ri.z - rj.z);
00563       r2 += dr*dr;
00564       rhojs = gbisParams->intRad[1][2*j+1];
00565       rhoj0 = gbisParams->intRad[1][2*j+0];
00566       r_i = 1.0/sqrt(r2);
00567       r = r2 * r_i;
00568 
00569       CalcHPair(r,r2,r_i,a_cut,
00570           rhoi0, rhojs,
00571           rhoj0, rhois,
00572           dij,dji,hij,hji);
00573 //#if 1
00574 #ifdef PRINT_COMP
00575       int id1 = params->pExt[0][i].id;
00576       int id2 = params->pExt[1][j].id;
00577       float h1 = hij;
00578       float h2 = hji;
00579       int d1 = dij;
00580       int d2 = dji;
00581       float r10 = rhoi0;
00582       float r1s = rhojs;
00583       float r20 = rhoj0;
00584       float r2s = rhois;
00585 /*  if (dij > 0 ) {
00586 printf("PSIPAIR %05i %05i%9.5f%6.3f%6.3f%2i% 13.5e\n",
00587 id1,id2,sqrt(r2),
00588 rhoi0, rhojs,
00589 dij,hij);
00590   }
00591   if (dji > 0 ) {
00592 printf("PSIPAIR %05i %05i%9.5f%6.3f%6.3f%2i% 13.5e\n",
00593 id2,id1,sqrt(r2),
00594 rhoj0, rhois,
00595 dji,hji);
00596   }*/
00597 //      CkPrintf("PSIPAIR%5i%5i%10.5f%5i%14.8f%7.4f%7.4f\n",id1,id2,sqrt(r2),d1,h1,r10,r1s);
00598       if (id1 > id2) {
00599         int tmp = id1;
00600         id1 = id2;
00601         id2 = tmp;
00602         h1 = hji;
00603         h2 = hij;
00604         d1 = dji;
00605         d2 = dij;
00606         r20 = rhoi0;
00607         r2s = rhojs;
00608         r10 = rhoj0;
00609         r1s = rhois;
00610       }
00611 //      CkPrintf("PSIPAIR%5i%5i%10.5f%5i%14.8f%7.4f%7.4f\n",id2,id1,sqrt(r2),d2,h2,r20,r2s);
00612       //CkPrintf("PSI(%04i)[%04i,%04i] = %i%i % .4e % .4e\n",gbisParams->sequence,id1,id2,d1,d2,h1, h2);
00613       //CkPrintf("PPSI(%04i)[%04i,%04i] = %i%i\n",gbisParams->cid,id1,id2,d1,d2);
00614 #endif
00615 
00616 #ifdef BENCHMARK
00617       nops++;
00618 #endif
00619       psiI += hij;
00620       gbisParams->psiSum[1][j] += hji;
00621     }//end inner j
00622     gbisParams->psiSum[0][i] += psiI;
00623   }//end outer i
00624   for (int s = 0; s < strideIg; s++) {
00625     ngi+=params->p[0][ngi].nonbondedGroupSize;
00626   }
00627   }
00628 
00629 #ifdef BENCHMARK
00630   t2 = 1.0*clock()/CLOCKS_PER_SEC;
00631   CkPrintf("PHASE1.3: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
00632 #endif
00633 
00634 /***********************************************************
00635 * GBIS Phase 2
00636 ***********************************************************/
00637 } else if (gbisParams->gbisPhase == 2) {
00638 
00639   float epsilon_s = gbisParams->epsilon_s;
00640   float epsilon_p = gbisParams->epsilon_p;
00641   float epsilon_s_i = 1/epsilon_s;
00642   float epsilon_p_i = 1/epsilon_p;
00643   float kappa = gbisParams->kappa;
00644 
00645   //values used in loop
00646   float r_cut_2 = 1.0 / r_cut2;
00647   float r_cut_4 = 4.0*r_cut_2*r_cut_2;
00648   float coulEij=0,ddrCoulEij=0,gbEij=0,ddrGbEij=0;
00649   float dEdai=0,dEdaj=0, qiqj=0;
00650   float scale=0, ddrScale=0;
00651   float rnx=0,rny=0,rnz=0;
00652   float fx=0,fy=0,fz=0,forceCoul=0, forcedEdr=0;
00653 
00654   int nops = 0;
00655   double t1 = 1.0*clock()/CLOCKS_PER_SEC;
00656   float r2;
00657   float dr;
00658   BigReal dx, dy, dz;
00659   float r, r_i, ratio;
00660   float fIx, fIy, fIz;
00661   GBReal dEdaSumI;
00662   float bornRadI, bornRadJ;
00663   float qi;
00664   Position ri, rj;
00665   float aiaj,expr2aiaj4,fij,f_i,expkappa,Dij;
00666   float aiaj4,ddrDij,ddrf_i,ddrfij,tmp_dEda;
00667 
00668   for (int c = 0; c < 4/*dEdrPLs*/; c++) {
00669   for (int ngi = minIg; ngi < maxI;  ) {
00670     int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
00671   for (int i = ngi; i < ngi+iGroupSize; i++) {
00672     ri = params->p[0][i].position;
00673     ri.x += offset_x;
00674     ri.y += offset_y;
00675     ri.z += offset_z;
00676     qi = - COULOMB * params->p[0][i].charge * scaling;
00677   //printf("ATOM(%05i) %.3e %.3e %.3e\n", params->pExt[0][i].id, params->p[0][i].charge, COULOMB, scaling);
00678     int numPairs;
00679     plint *pairs;
00680     gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
00681     fIx = fIy = fIz = 0.0;
00682     dEdaSumI = 0.0;
00683     bornRadI = gbisParams->bornRad[0][i];
00684     for (int jj = 0; jj < numPairs; jj++) {
00685       int j = pairs[jj];
00686       rj = params->p[1][j].position;
00687 
00688       //distance
00689       dx = (ri.x - rj.x);
00690       dy = (ri.y - rj.y);
00691       dz = (ri.z - rj.z);
00692       r2 = dx*dx + dy*dy + dz*dz;
00693       if (r2 > r_cut2) continue;
00694       qiqj = qi*params->p[1][j].charge;
00695       bornRadJ = gbisParams->bornRad[1][j];
00696       r_i = 1.0/sqrt(r2);
00697       r = r2 * r_i;
00698 
00699       //pairwise calculation
00700 /*      Phase2_Pair(r, r2, r_i, qiqj,
00701           bornRadI,
00702           bornRadJ,
00703           epsilon_p_i, epsilon_s_i,
00704           kappa, gbisParams->doFullElectrostatics,
00705           gbEij, ddrGbEij, dEdai, dEdaj);
00706 */
00707 
00708   //calculate GB energy
00709   aiaj = bornRadI*bornRadJ;
00710   aiaj4 = 4*aiaj;
00711   expr2aiaj4 = exp(-r2/aiaj4);
00712   fij = sqrt(r2+aiaj*expr2aiaj4);
00713   f_i = 1/fij;
00714   expkappa = (kappa > 0) ? exp(-kappa*fij) : 1.0;
00715   Dij = epsilon_p_i - expkappa*epsilon_s_i;
00716   gbEij = qiqj*Dij*f_i;
00717 
00718   //calculate energy derivatives
00719   ddrfij = r*f_i*(1 - 0.25*expr2aiaj4);
00720   ddrf_i = -ddrfij*f_i*f_i;
00721   ddrDij = kappa*expkappa*ddrfij*epsilon_s_i;
00722   ddrGbEij = qiqj*(ddrDij*f_i+Dij*ddrf_i);
00723 
00724   //calc dEda
00725       //NAMD smoothing function
00726       scale = 1;
00727       ddrScale = 0;
00728       if (gbisParams->doSmoothing) {
00729         scale = r2 * r_cut_2 - 1;
00730         scale *= scale;
00731         ddrScale = r*(r2-r_cut2)*r_cut_4;
00732         //CkPrintf("SCALE %f %f\n",scale,ddrScale);
00733         gbisParams->gbInterEnergy += gbEij * scale;
00734         forcedEdr = -(ddrGbEij)*scale-(gbEij)*ddrScale;
00735       } else {
00736         gbisParams->gbInterEnergy += gbEij;
00737         forcedEdr = -ddrGbEij;
00738       }
00739 
00740       //add dEda
00741       if (gbisParams->doFullElectrostatics) {
00742         //gbisParams->dEdaSum[0][i] += dEdai*scale;
00743         tmp_dEda = 0.5*qiqj*f_i*f_i
00744                       *(kappa*epsilon_s_i*expkappa-Dij*f_i)
00745                       *(aiaj+0.25*r2)*expr2aiaj4;//0
00746         dEdai = tmp_dEda/bornRadI;
00747         dEdaj = tmp_dEda/bornRadJ;
00748         dEdaSumI += dEdai*scale;
00749         gbisParams->dEdaSum[1][j] += dEdaj*scale;
00750       }
00751 
00752 #if 1
00753 //#ifdef PRINT_COMP
00754       int id1 = params->pExt[0][i].id;
00755       int id2 = params->pExt[1][j].id;
00756       float deda1 = dEdai;
00757       float deda2 = dEdaj;
00758       float bR1 = bornRadI;
00759       float bR2 = bornRadJ;
00760       if (id1 > id2) {
00761         int tmp = id1;
00762         id1 = id2;
00763         id2 = tmp;
00764         deda1 = dEdaj;
00765         deda2 = dEdai;
00766         bR1 = bornRadJ;
00767         bR2 = bornRadI;
00768       }
00769       //CkPrintf("DEDR(%04i)[%04i,%04i] = % .4e\n",gbisParams->sequence,id1,id2,forcedEdr);
00770       //CkPrintf("DASM(%04i)[%04i,%04i] = % .4e % .4e\n",gbisParams->sequence,id1, id2, deda1,deda2);
00771       //CkPrintf("P2RM(%04i)[%04i,%04i] = % .4e % .4e % .4e % .4e % .4e\n",gbisParams->sequence,id1, id2, r, bR1,bR2,epsilon_p_i,epsilon_s_i,kappa);
00772 /*CkPrintf("P2PAIR %05i %05i%9.5f%6.3f%6.3f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e\n",
00773 params->pExt[0][i].id, params->pExt[1][j].id,sqrt(r2),
00774 bornRadI, bornRadJ, forcedEdr, dEdai, qiqj, Dij, scale
00775 );
00776 CkPrintf("P2PAIR %05i %05i%9.5f%6.3f%6.3f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e\n",
00777 params->pExt[1][j].id, params->pExt[0][i].id,sqrt(r2),
00778 bornRadJ, bornRadI, forcedEdr, dEdaj, qiqj, Dij, scale
00779 );*/
00780 #endif
00781 
00782       forcedEdr *= r_i;
00783       fx = dx*forcedEdr;
00784       fy = dy*forcedEdr;
00785       fz = dz*forcedEdr;
00786 
00787       params->ff[1][j].x -= fx;
00788       params->ff[1][j].y -= fy;
00789       params->ff[1][j].z -= fz;
00790 
00791       fIx += fx;
00792       fIy += fy;
00793       fIz += fz;
00794 
00795 #ifdef BENCHMARK
00796       nops ++;//= (59 + 4*DIV_FLOPS + 2*EXP_FLOPS+SQRT_FLOPS);//56 w/o nops
00797 #endif
00798 
00799     }//end inner j
00800     gbisParams->dEdaSum[0][i] += dEdaSumI;
00801     params->ff[0][i].x += fIx;
00802     params->ff[0][i].y += fIy;
00803     params->ff[0][i].z += fIz;
00804 
00805     //self energy of each atom
00806     if (c == 0 && gbisParams->doEnergy && gbisParams->numPatches == 1) {
00807       float fij = bornRadI;//inf
00808       float expkappa = exp(-kappa*fij);//0
00809       float Dij = epsilon_p_i - expkappa*epsilon_s_i;
00810       float gbEij = qi*params->p[0][i].charge*Dij/fij;
00811       gbisParams->gbSelfEnergy += 0.5*gbEij;//self energy
00812     }
00813   }// end outer i
00814   for (int s = 0; s < strideIg; s++) {
00815     ngi+=params->p[0][ngi].nonbondedGroupSize;
00816   }//end i
00817   }//end ig
00818   }//end c
00819 #ifdef BENCHMARK
00820   double t2 = 1.0*clock()/CLOCKS_PER_SEC;
00821   //double flops = 1.0 * nops / (t2 - t1);
00822   //double gflops = flops / 1000000000.0;
00823   
00824   CkPrintf("PHASE2.0: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
00825 #endif
00826 
00827 /***********************************************************
00828 * GBIS Phase 3
00829 ***********************************************************/
00830 } else if (gbisParams->gbisPhase == 3 && gbisParams->doFullElectrostatics) {
00831 
00832 #ifdef BENCHMARK
00833   //int domains[] = {0, 0, 0, 0, 0, 0, 0, 0};
00834   double t1 = 1.0*clock()/CLOCKS_PER_SEC;
00835   double t2;
00836   int nops = 0;
00837 #endif
00838   //CkPrintf("GBIS(%3i)[%2i]::P3 %3i(%3i) %3i(%3i)\n",gbisParams->sequence,gbisParams->cid, gbisParams->patchID[0],params->numAtoms[0],gbisParams->patchID[1],params->numAtoms[1]);
00839 
00840   register BigReal dx, dy, dz;
00841   register float  r2;
00842   register float r, r_i;
00843   register float rhoi0, rhois;
00844   float rhojs, rhoj0;
00845   float fx, fy, fz;
00846   float forceAlpha;
00847   register float fIx, fIy, fIz;
00848 
00849   float dhij;
00850   float dhji;
00851   int dij;
00852   int dji;
00853   register float dHdrPrefixI;
00854   float dHdrPrefixJ;
00855   register Position ri;
00856   register Position rj;
00857   register int c, numPairs, jj, j;
00858   register float k;
00859   register float da = DA;
00860   register float db = DB;
00861   register float dc = DC;
00862   register float dd = DD;
00863   register float de = DE;
00864   float r_i3;
00865 
00866   //piecewise 22
00867   c = 0;
00868   for (int ngi = minIg; ngi < maxI;  ) {
00869     int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
00870   for (int i = ngi; i < ngi+iGroupSize; i++) {
00871     ri = params->p[0][i].position;
00872     ri.x += offset_x;
00873     ri.y += offset_y;
00874     ri.z += offset_z;
00875     rhois = gbisParams->intRad[0][2*i+1];
00876     plint *pairs;
00877     gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
00878     fIx = fIy = fIz = 0.0;
00879     dHdrPrefixI = gbisParams->dHdrPrefix[0][i];
00880     for (jj = 0; jj < numPairs; jj++) {
00881       j = pairs[jj];
00882       rj = params->p[1][j].position;
00883 
00884       dx = (ri.x - rj.x);
00885       dy = (ri.y - rj.y);
00886       dz = (ri.z - rj.z);
00887       r2 = dx*dx + dy*dy + dz*dz;
00888       dHdrPrefixJ = gbisParams->dHdrPrefix[1][j];
00889 
00890       r_i = 1.0/sqrt(r2);//rptI takes 50% of loop time
00891       r_i3 = r_i*r_i*r_i;
00892 
00893       rhojs = gbisParams->intRad[1][2*j+1];
00894 
00895       k = rhojs*r_i; k*=k;//k=(rs/r)^2
00896       dhij = -rhojs*r_i3*k*
00897               (da+k*(db+k*(dc+k*(dd+k*de))));
00898 
00899       k = rhois*r_i; k*=k;//k=(rs/r)^2
00900       dhji = -rhois*r_i3*k*
00901               (da+k*(db+k*(dc+k*(dd+k*de))));
00902 
00903       //add dEda*dadr force
00904       forceAlpha = -r_i*(dHdrPrefixI*dhij+dHdrPrefixJ*dhji);
00905       fx = dx * forceAlpha;
00906       fy = dy * forceAlpha;
00907       fz = dz * forceAlpha;
00908 
00909       params->fullf[1][j].x -= fx;
00910       params->fullf[1][j].y -= fy;
00911       params->fullf[1][j].z -= fz;
00912 
00913       fIx += fx;
00914       fIy += fy;
00915       fIz += fz;
00916 
00917 #if 1
00918 //#ifdef PRINT_COMP
00919       int id1 = params->pExt[0][i].id;
00920       int id2 = params->pExt[1][j].id;
00921       float h1 = dhij;
00922       float h2 = dhji;
00923       if (id1 > id2) {
00924         int tmp = id1;
00925         id1 = id2;
00926         id2 = tmp;
00927         h1 = dhji;
00928         h2 = dhij;
00929       }
00930 /*CkPrintf("P3PAIR %05i %05i%9.5f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e %i\n",
00931 params->pExt[0][i].id, params->pExt[1][j].id,sqrt(r2),
00932 dHdrPrefixI, dHdrPrefixJ, dhij, dhji, forceAlpha, 2
00933 );
00934 CkPrintf("P3PAIR %05i %05i%9.5f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e %i\n",
00935 params->pExt[1][j].id, params->pExt[0][i].id,sqrt(r2),
00936 dHdrPrefixJ, dHdrPrefixI, dhji, dhij, forceAlpha, 2
00937 );*/
00938       //CkPrintf("DEDA(%04i)[%04i,%04i] = 22 % .4e % .4e % .4e\n",gbisParams->sequence,id1,id2,h1,h2, forceAlpha);
00939 #endif
00940 
00941 #ifdef BENCHMARK
00942       nops++;//= (24 + 2*DIV_FLOPS+SQRT_FLOPS);// 8 w/o nops
00943 #endif
00944 
00945     }//end inner j
00946     params->fullf[0][i].x += fIx;
00947     params->fullf[0][i].y += fIy;
00948     params->fullf[0][i].z += fIz;
00949   }//end outer i
00950   for (int s = 0; s < strideIg; s++) {
00951     ngi+=params->p[0][ngi].nonbondedGroupSize;
00952   }
00953   }
00954 
00955 #ifdef BENCHMARK
00956   t2 = 1.0*clock()/CLOCKS_PER_SEC;
00957   CkPrintf("PHASE3.1: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
00958 nops = 0;
00959   t1 = 1.0*clock()/CLOCKS_PER_SEC;
00960   nops = 0;
00961 #endif
00962 
00963   float a_cut_i = 1.0/a_cut;
00964   float a_cut_i2 = a_cut_i*a_cut_i;
00965   float a_cut2 = a_cut*a_cut;
00966   float rmrs;
00967   float rmrsi;
00968   float rmrs2;
00969   float rhois2, rhojs2;
00970   float logri, logrj;
00971   float r_i2;
00972 
00973   //piecewise 11
00974   c = 1;
00975   for (int ngi = minIg; ngi < maxI;  ) {
00976     int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
00977   for (int i = ngi; i < ngi+iGroupSize; i++) {
00978     ri = params->p[0][i].position;
00979     ri.x += offset_x;
00980     ri.y += offset_y;
00981     ri.z += offset_z;
00982     rhois = gbisParams->intRad[0][2*i+1];
00983     rhois2 = rhois*rhois;
00984     plint *pairs;
00985     gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
00986     fIx = fIy = fIz = 0.0;
00987     dHdrPrefixI = gbisParams->dHdrPrefix[0][i];
00988     for (jj = 0; jj < numPairs; jj++) {
00989       j = pairs[jj];
00990       rj = params->p[1][j].position;
00991       dHdrPrefixJ = gbisParams->dHdrPrefix[1][j];
00992 
00993       dx = (ri.x - rj.x);
00994       dy = (ri.y - rj.y);
00995       dz = (ri.z - rj.z);
00996       r2 = dx*dx + dy*dy + dz*dz;
00997       r_i = 1.0/sqrt(r2);//rptI
00998       r = r2* r_i;
00999       r_i2 = r_i*r_i;
01000 
01001       rhojs = gbisParams->intRad[1][2*j+1];
01002       rhojs2 = rhojs*rhojs;
01003 
01004 
01005       rmrs = r-rhojs;// 4 times
01006       rmrsi = 1.0/rmrs;
01007       rmrs2 = rmrs*rmrs;
01008       logrj = log(rmrs*a_cut_i);
01009       dhij = r_i2*(-0.25*logrj - (a_cut2 - rmrs2)*(rhojs2 + r2)*0.125*a_cut_i2*rmrsi*rmrsi);
01010 
01011 
01012       rmrs = r-rhois;// 4 times
01013       rmrsi = 1.0/rmrs;
01014       rmrs2 = rmrs*rmrs;
01015       logri = log(rmrs*a_cut_i);
01016       dhji = r_i2*(-0.25*logri - (a_cut2 - rmrs2)*(rhois2 + r2)*0.125*a_cut_i2*rmrsi*rmrsi);
01017 
01018       //add dEda*dadr force
01019       forceAlpha = -r_i*(dHdrPrefixI*dhij+dHdrPrefixJ*dhji);
01020       fx = dx * forceAlpha;
01021       fy = dy * forceAlpha;
01022       fz = dz * forceAlpha;
01023 
01024       params->fullf[1][j].x -= fx;
01025       params->fullf[1][j].y -= fy;
01026       params->fullf[1][j].z -= fz;
01027 
01028       fIx += fx;
01029       fIy += fy;
01030       fIz += fz;
01031 
01032 #if 1
01033 //#ifdef PRINT_COMP
01034       int id1 = params->pExt[0][i].id;
01035       int id2 = params->pExt[1][j].id;
01036       float h1 = dhij;
01037       float h2 = dhji;
01038       if (id1 > id2) {
01039         int tmp = id1;
01040         id1 = id2;
01041         id2 = tmp;
01042         h1 = dhji;
01043         h2 = dhij;
01044       }
01045       //CkPrintf("DEDA(%04i)[%04i,%04i] = 11 % .4e % .4e % .4e\n",gbisParams->sequence,id1,id2,h1,h2, forceAlpha);
01046 /*CkPrintf("P3PAIR %05i %05i%9.5f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e %i\n",
01047 params->pExt[0][i].id, params->pExt[1][j].id,sqrt(r2),
01048 dHdrPrefixI, dHdrPrefixJ, dhij, dhji, forceAlpha, 1
01049 );
01050 CkPrintf("P3PAIR %05i %05i%9.5f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e %i\n",
01051 params->pExt[1][j].id, params->pExt[0][i].id,sqrt(r2),
01052 dHdrPrefixJ, dHdrPrefixI, dhji, dhij, forceAlpha, 1
01053 );*/
01054 #endif
01055 
01056 #ifdef BENCHMARK
01057       nops++;
01058 #endif
01059 
01060     }//end inner j
01061     params->fullf[0][i].x += fIx;
01062     params->fullf[0][i].y += fIy;
01063     params->fullf[0][i].z += fIz;
01064   }//end outer i
01065   for (int s = 0; s < strideIg; s++) {
01066     ngi+=params->p[0][ngi].nonbondedGroupSize;
01067   }
01068   }
01069 
01070 #ifdef BENCHMARK
01071   t2 = 1.0*clock()/CLOCKS_PER_SEC;
01072   CkPrintf("PHASE3.2: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
01073 nops = 0;
01074   t1 = 1.0*clock()/CLOCKS_PER_SEC;
01075   nops = 0;
01076 #endif
01077 
01078   //piecewise all others
01079   c = 2;
01080   for (int ngi = minIg; ngi < maxI;  ) {
01081     int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
01082   for (int i = ngi; i < ngi+iGroupSize; i++) {
01083     ri = params->p[0][i].position;
01084     ri.x += offset_x;
01085     ri.y += offset_y;
01086     ri.z += offset_z;
01087     rhoi0 = gbisParams->intRad[0][2*i+0];
01088     rhois = gbisParams->intRad[0][2*i+1];
01089     plint *pairs;
01090     gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
01091     fIx = fIy = fIz = 0.0;
01092     dHdrPrefixI = gbisParams->dHdrPrefix[0][i];
01093     for (jj = 0; jj < numPairs; jj++) {
01094       j = pairs[jj];
01095       rj = params->p[1][j].position;
01096       dHdrPrefixJ = gbisParams->dHdrPrefix[1][j];
01097 
01098       dx = (ri.x - rj.x);
01099       dy = (ri.y - rj.y);
01100       dz = (ri.z - rj.z);
01101       r2 = dx*dx + dy*dy + dz*dz;
01102 
01103       r_i = 1.0/sqrt(r2);//rptI
01104       r = r2* r_i;
01105 
01106       rhojs = gbisParams->intRad[1][2*j+1];
01107       rhoj0 = gbisParams->intRad[1][2*j+0];
01108 
01109       CalcDHPair(r,r2,r_i,a_cut,
01110           rhoi0,rhojs,
01111           rhoj0,rhois,
01112           dij,dji,dhij,dhji);
01113 
01114       //add dEda*dadr force
01115       forceAlpha = -r_i*(dHdrPrefixI*dhij+dHdrPrefixJ*dhji); // *scaling ?
01116       fx = dx * forceAlpha;
01117       fy = dy * forceAlpha;
01118       fz = dz * forceAlpha;
01119 
01120       fIx += fx;
01121       fIy += fy;
01122       fIz += fz;
01123 
01124       params->fullf[1][j].x -= fx;
01125       params->fullf[1][j].y -= fy;
01126       params->fullf[1][j].z -= fz;
01127 
01128 #if 1
01129 //#ifdef PRINT_COMP
01130       int id1 = params->pExt[0][i].id;
01131       int id2 = params->pExt[1][j].id;
01132       int d1 = dij;
01133       int d2 = dji;
01134       float h1 = dhij;
01135       float h2 = dhji;
01136       if (id1 > id2) {
01137         int tmp = id1;
01138         id1 = id2;
01139         id2 = tmp;
01140         d1 = dji;
01141         d2 = dij;
01142         h1 = dhji;
01143         h2 = dhij;
01144       }
01145 //      CkPrintf("DEDA(%04i)[%04i,%04i] = %i%i % .4e % .4e % .4e\n",gbisParams->sequence,id1,id2,d1,d2,h1,h2, forceAlpha);
01146 //      CkPrintf("DEDA(%04i)[%04i,%04i] = %i%i % .4e % .4e % .4e\n",gbisParams->sequence,id1,id2,d1,d2,h1,h2, forceAlpha);
01147 /*if ( dij > 0 ) {
01148 CkPrintf("P3PAIR %05i %05i%9.5f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e %i\n",
01149 params->pExt[0][i].id, params->pExt[1][j].id,sqrt(r2),
01150 dHdrPrefixI, dHdrPrefixJ, dhij, dhji, forceAlpha, dij
01151 );
01152 }
01153 if ( dji > 0 ) {
01154 CkPrintf("P3PAIR %05i %05i%9.5f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e %i\n",
01155 params->pExt[1][j].id, params->pExt[0][i].id,sqrt(r2),
01156 dHdrPrefixJ, dHdrPrefixI, dhji, dhij, forceAlpha, dji
01157 );
01158 }*/
01159 #endif
01160 
01161 #ifdef BENCHMARK
01162       nops++;
01163 #endif
01164 
01165     }//end inner j
01166     params->fullf[0][i].x += fIx;
01167     params->fullf[0][i].y += fIy;
01168     params->fullf[0][i].z += fIz;
01169   }//end outer i
01170   for (int s = 0; s < strideIg; s++) {
01171     ngi+=params->p[0][ngi].nonbondedGroupSize;
01172   }
01173   }
01174 
01175 
01176 #ifdef BENCHMARK
01177   t2 = 1.0*clock()/CLOCKS_PER_SEC;
01178   CkPrintf("PHASE3.3: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
01179 #endif
01180 
01181 }//end if gbisPhase
01182 
01183 }//end calcGBIS

Generated on Fri Sep 22 01:17:11 2017 for NAMD by  doxygen 1.4.7