Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

ComputeGBIS.C

Go to the documentation of this file.
00001 
00008 //#define BENCHMARK
00009 #define CHECK_PRIORITIES 0
00010 //#define PRINT_COMP
00011 
00012 //approximate flop equivalence of transcendental functs
00013 #ifdef BENCHMARK
00014 #define SQRT_FLOPS 15
00015 #define LOG_FLOPS 20
00016 #define EXP_FLOPS 20
00017 #define DIV_FLOPS 1
00018 #endif
00019 
00020 #include "ComputeNonbondedUtil.h"
00021 #include "ComputeGBIS.inl"
00022 #include "time.h"
00023 
00024 /*******************************************************************************
00025   When GBIS is active, the method calcGBIS is called from a nonbonded
00026   compute object ::doForce() three times each timestep with
00027   gbisPhase = 1, 2, 3.
00028 
00029   At the beginning of the first phase, the pairlists are generated which are
00030   used for all three phases; these pairlists are re-generated every timestep.
00031   4 mutually exclusive pairlists are used, in an effort to accelerate the
00032   iteration over pairs.
00033   pairlist 0: atoms fall within interaction domain 2 (largest group)
00034   pairlist 1: atoms fall within interaction domain 1 (next largest)
00035   pairlist 2: atoms are within the phase 1,3 cutoff (all the rest)
00036   pairlist 3: atoms are within the phase 2 cutoff (generally longer than previous cutoff)
00037 
00038   These calculations generate nonbonded forces (and associated energies) which
00039   are calculated in addition to Coulomb and van der Waals forces.
00040 *******************************************************************************/
00041 
00042 /*Searches all pair of atoms to create
00043 largest pairlist lasting all cycle*/
00044 inline void pairlistFromAll(
00045   nonbonded *params,
00046   GBISParamStruct *gbisParams,
00047   int minIg,
00048   int strideIg,
00049   int maxI
00050 ) {
00051   const BigReal offset_x = params->offset.x;
00052   const BigReal offset_y = params->offset.y;
00053   const BigReal offset_z = params->offset.z;
00054 
00055   int unique = (gbisParams->numPatches == 1) ? 1 : 0;
00056 
00057   int maxPairs = params->numAtoms[1];
00058   int seq = gbisParams->sequence;
00059   int cid = gbisParams->cid;
00060   int pe = CkMyPe();
00061   int gbisPhase = gbisParams->gbisPhase;
00062 
00063   //determine long and short cutoff
00064   float fsMax = FS_MAX; // gbisParams->fsMax;
00065   float a_cut = gbisParams->a_cut - fsMax;
00066   float a_cut_ps2 = (a_cut+fsMax)*(a_cut+fsMax);
00067   float r_cut = gbisParams->cutoff;
00068   float a_cut2 = a_cut*a_cut;
00069   float r_cut2 = r_cut*r_cut;
00070   int s = 0;// 0 is short cutoff list
00071   int l = 1;// 1 is long-short cutoff list
00072 #ifdef BENCHMARK
00073   double t1 = 1.0*clock()/CLOCKS_PER_SEC;
00074   int nops = 0;
00075 #endif
00076   Position ri, rj;
00077   Position ngri, ngrj;
00078   float r, dr, r2;
00079   float rhoi0, rhois, rhoj0, rhojs, rhois2, rhojs2, rhois2_16;
00080   float ami2, amj2, api2, apj2;
00081   int numGBISPairlists = 4;
00082 
00083   float max_gcut2  = r_cut;
00084   if (a_cut+fsMax > r_cut)
00085     max_gcut2 = a_cut+fsMax;
00086   max_gcut2 += 2.0*gbisParams->maxGroupRadius;
00087   //CkPrintf("max_cut = %f, %f\n",max_gcut2,gbisParams->maxGroupRadius);
00088   max_gcut2 *= max_gcut2;
00089 
00090   int maxGroupPairs = params->numAtoms[1];
00091   short *groupPairs = new short[maxGroupPairs];/*delete*/
00092 
00093   //foreach nonbonded group i
00094   for (int ngi = minIg; ngi < maxI; /*ngi updated at loop bottom*/ ) {
00095     int numGroupPairs = 0;
00096     ngri = params->p[0][ngi].position;
00097     ngri.x += offset_x;
00098     ngri.y += offset_y;
00099     ngri.z += offset_z;
00100 
00101     //find close j-groups; include i-group
00102     for (int ngj = unique*(ngi+params->p[0][ngi].nonbondedGroupSize); ngj < params->numAtoms[1]; ngj+=params->p[1][ngj].nonbondedGroupSize) {
00103       
00104       ngrj = params->p[1][ngj].position;
00105       dr = ngri.x - ngrj.x;
00106       r2 = dr*dr;
00107       dr = ngri.y - ngrj.y;
00108       r2 += dr*dr;
00109       dr = ngri.z - ngrj.z;
00110       r2 += dr*dr;
00111       if (r2 < max_gcut2) {
00112         groupPairs[numGroupPairs++] = ngj;
00113       }
00114     }
00115 
00116     //for all i in i-group
00117     int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
00118     for (int i=ngi; i<ngi+iGroupSize; i++) {
00119       //CkPrintf("\tFORALL i=%05i\n",params->pExt[0][i].id);
00120       //extend pairlists
00121       int *size = new int[numGBISPairlists];
00122       plint **pairs = new plint*[numGBISPairlists];
00123       for (int k = 0; k < numGBISPairlists; k++) {
00124         size[k] = 0;
00125         pairs[k] = gbisParams->gbisStepPairlists[k]->
00126                              newlist(maxPairs);
00127       }
00128 
00129       //load i values
00130       ri = params->p[0][i].position;
00131       ri.x += offset_x;
00132       ri.y += offset_y;
00133       ri.z += offset_z;
00134       rhois = gbisParams->intRad[0][2*i+1];
00135       api2 = a_cut + rhois;
00136       api2 *= api2;
00137       ami2 = a_cut - rhois;
00138       ami2 *= ami2;
00139       rhois2 = rhois*rhois;
00140       rhois2_16 = 16.0*rhois2;
00141 
00142       //for all other i's in i-group
00143       if (unique)
00144       for (int j = i+1; j < ngi+iGroupSize; j++) {
00145 
00146 #ifdef BENCHMARK
00147           nops ++;
00148 #endif
00149           rj = params->p[1][j].position;
00150           dr = ri.x - rj.x;
00151           r2 = dr*dr;
00152           dr = ri.y - rj.y;
00153           r2 += dr*dr;
00154           dr = ri.z - rj.z;
00155           r2 += dr*dr;
00156 
00157           rhojs = gbisParams->intRad[1][2*j+1];
00158           rhojs2 = rhojs*rhojs;
00159           //r = sqrt(r2);
00160           amj2 = a_cut - rhojs;
00161           amj2 *= amj2;
00162 
00163           apj2 = a_cut + rhojs;
00164           apj2 *= apj2;
00165           //CkPrintf("IPAIR %5i %5i %5i\n",0*gbisParams->cid,params->pExt[0][i].id,params->pExt[1][j].id);
00166 
00167           if (  r2 < ami2 && r2 < amj2 &&
00168             r2 > rhois2_16 && r2 > 16.0*rhojs2 ) {
00169             //belongs to 22
00170             pairs[0][size[0]++] = j;
00171           } else if ( r2 < api2 && r2 < apj2 &&
00172             r2 > ami2 && r2 > amj2 ) {
00173             //belongs to 11
00174             pairs[1][size[1]++] = j;
00175           } else if ( r2 < a_cut_ps2 ) {
00176             //belongs to other a_cut
00177             pairs[2][size[2]++] = j;
00178           } else if ( r2 < r_cut2 ) {
00179             //belongs to r_cut and (r_cut > a_cut)
00180             pairs[3][size[3]++] = j;
00181           }
00182       }
00183 
00184       //foreach j-group
00185       for (int g = 0; g < numGroupPairs; g++) {
00186         int ngj = groupPairs[g];
00187         int jGroupSize = params->p[1][ngj].nonbondedGroupSize;
00188         //for each j in j-group
00189         int maxJ = ngj+jGroupSize;
00190         for (int j = ngj; j < maxJ; j++) {
00191 
00192           //CkPrintf("for j-atom%i\n",j);
00193 #ifdef BENCHMARK
00194           nops ++;
00195 #endif
00196           rj = params->p[1][j].position;
00197           dr = ri.x - rj.x;
00198           r2 = dr*dr;
00199           dr = ri.y - rj.y;
00200           r2 += dr*dr;
00201           dr = ri.z - rj.z;
00202           r2 += dr*dr;
00203           //CkPrintf("PAIR %5i %5i %5i\n",0*gbisParams->cid,params->pExt[0][i].id,params->pExt[1][j].id);
00204 
00205           rhojs = gbisParams->intRad[1][2*j+1];
00206           rhojs2 = rhojs*rhojs;
00207           //r = sqrt(r2);
00208           amj2 = a_cut - rhojs;
00209           amj2 *= amj2;
00210 
00211           apj2 = a_cut + rhojs;
00212           apj2 *= apj2;
00213 
00214           if (  r2 < ami2 && r2 < amj2 &&
00215             r2 > rhois2_16 && r2 > 16.0*rhojs2 ) {
00216             //belongs to 22
00217             //CkPrintf("Pair22 %06i %06i\n",params->pExt[0][i].id,params->pExt[1][j].id);
00218             pairs[0][size[0]++] = j;
00219           } else if ( r2 < api2 && r2 < apj2 &&
00220             r2 > ami2 && r2 > amj2 ) {
00221             //CkPrintf("Pair11 %06i %06i\n",params->pExt[0][i].id,params->pExt[1][j].id);
00222             //belongs to 11
00223             pairs[1][size[1]++] = j;
00224           } else if ( r2 < a_cut_ps2 ) {
00225             //CkPrintf("PairAA %06i %06i\n",params->pExt[0][i].id,params->pExt[1][j].id);
00226             //belongs to other a_cut
00227             pairs[2][size[2]++] = j;
00228           } else if ( r2 < r_cut2 ) {
00229             //CkPrintf("PairRR %06i %06i\n",params->pExt[0][i].id,params->pExt[1][j].id);
00230             //belongs to r_cut and (r_cut > a_cut)
00231             pairs[3][size[3]++] = j;
00232           }
00233         }//end j inner loop
00234       }//end j-group loop
00235     for (int k = 0; k < numGBISPairlists; k++) {
00236       gbisParams->gbisStepPairlists[k]->newsize(size[k]);
00237     }
00238     delete[] size;
00239     delete[] pairs;
00240     }//end i all atom loop
00241 
00242     //jump to next nbg for round-robin
00243     //same iteration patter followed in all three gbisPhases below
00244     for (int s = 0; s < strideIg; s++) {
00245       ngi+=params->p[0][ngi].nonbondedGroupSize;
00246     }
00247   }//end i-group loop
00248     delete[] groupPairs;
00249   for (int k = 0; k < numGBISPairlists; k++)
00250     gbisParams->gbisStepPairlists[k]->reset();
00251 
00252 #ifdef BENCHMARK
00253   double t2 = 1.0*clock()/CLOCKS_PER_SEC;
00254   CkPrintf("PHASELST: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
00255 #endif
00256 }//end pairlist method
00257 
00258 /*******************************************************************************
00259  * Calculate GBIS 3 Phases
00260 *******************************************************************************/
00261 void ComputeNonbondedUtil::calcGBIS(nonbonded *params, GBISParamStruct *gbisParams) {
00262 //CkPrintf("SEQ%03i, P%i, CID%05i(%02i,%02i) ENTER\n",gbisParams->sequence,gbisParams->gbisPhase,gbisParams->cid,gbisParams->patchID[0],gbisParams->patchID[1]);
00263 #if CHECK_PRIORITIES
00264 CkPrintf("PE%i, S%09i, P%i\n",CkMyPe(),gbisParams->sequence,gbisParams->gbisPhase);
00265 #endif
00266   if (params->numAtoms[0] == 0 || params->numAtoms[1] == 0) return;
00267 
00268   const BigReal offset_x = params->offset.x;
00269   const BigReal offset_y = params->offset.y;
00270   const BigReal offset_z = params->offset.z;
00271 
00272   int partSize = params->numAtoms[0] / params->numParts;
00273   int minIg = 0;
00274   for (int s = 0; s < params->minPart; s++) {
00275     minIg+=params->p[0][minIg].nonbondedGroupSize;
00276   }
00277   int maxI = params->numAtoms[0];
00278   int strideIg = params->numParts;
00279 
00280   int unique = (gbisParams->numPatches == 1) ? 1 : 0;//should inner loop be unique from ourter loop
00281   int numGBISPairlists = 4;
00282   float r_cut = gbisParams->cutoff;
00283   float fsMax = FS_MAX; // gbisParams->fsMax;//FS_MAX;
00284   float a_cut = gbisParams->a_cut-fsMax;
00285   float a_cut2 = a_cut*a_cut;
00286   float a_cut_ps = a_cut + fsMax;//max screen radius
00287   float r_cut2 = r_cut*r_cut;
00288   float a_cut_ps2 = a_cut_ps*a_cut_ps;
00289   //put PL pointer back to beginning of lists
00290   for (int k = 0; k < numGBISPairlists; k++)
00291     gbisParams->gbisStepPairlists[k]->reset();
00292       
00293 
00294 /***********************************************************
00295 * GBIS Phase 1
00296 ***********************************************************/
00297 if (gbisParams->gbisPhase == 1) {
00298 //CkPrintf("SEQ%03i, P%i, CID%05i(%02i,%02i):[%03i,%03i]\n",gbisParams->sequence,gbisParams->gbisPhase,gbisParams->cid,gbisParams->patchID[0],gbisParams->patchID[1],minI,maxI);
00299 
00300   pairlistFromAll(params,gbisParams,minIg,strideIg,maxI);
00301 
00302 #ifdef BENCHMARK
00303   int nops = 0;
00304   int domains[] = {0, 0, 0, 0, 0, 0, 0, 0};
00305   int numDom = 7;
00306   double t1 = 1.0*clock()/CLOCKS_PER_SEC;
00307 #endif
00308   register GBReal psiI;
00309 
00310   register float dr;
00311   register float r2;
00312   register float r, r_i, r2_i;
00313   float rhoi0, rhois, rhojs, rhoj0;
00314   Position ri, rj;
00315   register int j;
00316   int numPairs;
00317   register float ta = TA;
00318   register float tb = TB;
00319   register float tc = TC;
00320   register float td = TD;
00321   register float te = TE;
00322 
00323   register float hij,hji;
00324   int dij,dji;
00325   float k;
00326   float rhois2, rhojs2;
00327 
00328   //calculate piecewise-22 Pairs
00329   int c = 0;
00330   for (int ngi = minIg; ngi < maxI;  ) {
00331     int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
00332   for (int i = ngi; i < ngi+iGroupSize; i++) {
00333     ri = params->p[0][i].position;
00334     ri.x += offset_x;
00335     ri.y += offset_y;
00336     ri.z += offset_z;
00337     rhoi0 = gbisParams->intRad[0][2*i+0];
00338     rhois = gbisParams->intRad[0][2*i+1];
00339     rhois2 = rhois*rhois;
00340     psiI = ZERO;
00341     numPairs;
00342     plint *pairs;
00343     gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
00344     for (register int jj = 0; jj < numPairs; jj++) {
00345 #ifdef BENCHMARK
00346       nops++;
00347 #endif
00348       j = pairs[jj];
00349       rj = params->p[1][j].position;
00350 
00351       dr = (ri.x - rj.x);
00352       r2 = dr*dr;
00353       dr = (ri.y - rj.y);
00354       r2 += dr*dr;
00355       dr = (ri.z - rj.z);
00356       r2 += dr*dr;
00357       r2_i = 1.0/r2;
00358 
00359       rhoj0 = gbisParams->intRad[1][2*j+0];
00360       rhojs = gbisParams->intRad[1][2*j+1];
00361       rhojs2 = rhojs*rhojs;
00362 
00363       k = rhojs2*r2_i;//k=(rs/r)^2
00364       hij = rhojs*r2_i*k*(ta+k*(tb+k*(tc+k*(td+k*te))));
00365 
00366       k = rhois2*r2_i;//k=(rs/r)^2
00367       hji = rhois*r2_i*k*(ta+k*(tb+k*(tc+k*(td+k*te))));
00368 
00369 //#if 1
00370 #ifdef PRINT_COMP
00371       int id1 = params->pExt[0][i].id;
00372       int id2 = params->pExt[1][j].id;
00373       float h1 = hij;
00374       float h2 = hji;
00375       float r10 = rhoi0;
00376       float r1s = rhojs;
00377       float r20 = rhoj0;
00378       float r2s = rhois;
00379 /*printf("PSIPAIR %05i %05i%9.5f%6.3f%6.3f%2i% 13.5e\n",
00380 id1,id2,sqrt(r2),
00381 rhoi0, rhojs,
00382 2,hij);
00383 printf("PSIPAIR %05i %05i%9.5f%6.3f%6.3f%2i% 13.5e\n",
00384 id2,id1,sqrt(r2),
00385 rhoj0, rhois,
00386 2,hji);
00387 */
00388       if (id1 > id2) {
00389         int tmp = id1;
00390         id1 = id2;
00391         id2 = tmp;
00392         h1 = hji;
00393         h2 = hij;
00394         r20 = rhoi0;
00395         r2s = rhojs;
00396         r10 = rhoj0;
00397         r1s = rhois;
00398       }
00399 //      CkPrintf("PSIPAIR%5i%5i%10.5f%5i%14.8f%7.4f%7.4f\n",id1,id2,sqrt(r2),2,h1,r10,r1s);
00400 //      CkPrintf("PSIPAIR%5i%5i%10.5f%5i%14.8f%7.4f%7.4f\n",id2,id1,sqrt(r2),2,h2,r20,r2s);
00401       //CkPrintf("PPSI(%04i)[%04i,%04i] = 22\n",gbisParams->cid,id1,id2);
00402 #endif
00403 
00404       psiI += hij;
00405       gbisParams->psiSum[1][j] += hji;
00406     }//end inner j
00407     gbisParams->psiSum[0][i] += psiI;
00408   }//end outer i
00409   for (int s = 0; s < strideIg; s++) {
00410     ngi+=params->p[0][ngi].nonbondedGroupSize;
00411   }
00412   }
00413 #ifdef BENCHMARK
00414   double t2 = 1.0*clock()/CLOCKS_PER_SEC;
00415   //nops *= (9 + 2*DIV_FLOPS+SQRT_FLOPS) + (14 + 0*DIV_FLOPS + 0*LOG_FLOPS);
00416   //double flops = 1.0 * nops / (t2 - t1);
00417   //double gflops = flops / 1000000000.0;
00418   CkPrintf("PHASE1.1: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
00419 nops = 0;
00420   t1 = 1.0*clock()/CLOCKS_PER_SEC;
00421   nops = 0;
00422 #endif
00423 
00424   float rmris, rmrjs;
00425   float rmrsi;
00426   float rmrs2;
00427   float rs2;
00428   float logri, logrj;
00429   float rci2;
00430   float a_cut_i = 1.0 / a_cut;
00431   float a_cut_i2 = a_cut_i*a_cut_i;
00432 
00433   //calculate piecewise-11 pairs
00434   c = 1;
00435   for (int ngi = minIg; ngi < maxI;  ) {
00436     int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
00437   for (int i = ngi; i < ngi+iGroupSize; i++) {
00438     ri = params->p[0][i].position;
00439     ri.x += offset_x;
00440     ri.y += offset_y;
00441     ri.z += offset_z;
00442     rhoi0 = gbisParams->intRad[0][2*i+0];
00443     rhois = gbisParams->intRad[0][2*i+1];
00444     psiI = ZERO;
00445     numPairs;
00446     plint *pairs;
00447     gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
00448     for (register int jj = 0; jj < numPairs; jj++) {
00449       j = pairs[jj];
00450       rj = params->p[1][j].position;
00451 
00452       dr = (ri.x - rj.x);
00453       r2 = dr*dr;
00454       dr = (ri.y - rj.y);
00455       r2 += dr*dr;
00456       dr = (ri.z - rj.z);
00457       r2 += dr*dr;
00458       r_i = 1.0/sqrt(r2);
00459       r = r2*r_i;
00460 
00461       rhoj0 = gbisParams->intRad[1][2*j+0];
00462       rhojs = gbisParams->intRad[1][2*j+1];
00463 
00464       float tmp1 = 0.125*r_i;
00465       float tmp2 = r2 - 4.0*a_cut*r;
00466       float rr = 2.0*r;
00467 
00468 
00469       rmrjs = r-rhojs;
00470       rmris = r-rhois;
00471       logri = log(rmris*a_cut_i);
00472       logrj = log(rmrjs*a_cut_i);
00473 
00474       rmrsi = 1.0/rmrjs;
00475       //rmrs2 = rmrjs*rmrjs;
00476       rs2 = rhojs*rhojs;
00477       hij = /*0.125*r_i*/tmp1*(1 + rr*rmrsi +
00478         a_cut_i2*(/*r2 - 4.0*a_cut*r*/tmp2 - rs2) + logrj+logrj);
00479 
00480 
00481       rmrsi = 1.0/rmris;
00482       //rmrs2 = rmris*rmris;
00483       rs2 = rhois*rhois;
00484       hji = /*0.125*r_i*/tmp1*(1 + rr*rmrsi +
00485         a_cut_i2*(/*r2 - 4.0*a_cut*r*/tmp2 - rs2) + 2.0* logri);
00486 //#if 1
00487 #ifdef PRINT_COMP
00488       int id1 = params->pExt[0][i].id;
00489       int id2 = params->pExt[1][j].id;
00490       float h1 = hij;
00491       float h2 = hji;
00492       float r10 = rhoi0;
00493       float r1s = rhojs;
00494       float r20 = rhoj0;
00495       float r2s = rhois;
00496 /*printf("PSIPAIR %05i %05i%9.5f%6.3f%6.3f%2i% 13.5e\n",
00497 id1,id2,sqrt(r2),
00498 rhoi0, rhojs,
00499 1,hij);
00500 printf("PSIPAIR %05i %05i%9.5f%6.3f%6.3f%2i% 13.5e\n",
00501 id2,id1,sqrt(r2),
00502 rhoj0, rhois,
00503 1,hji);*/
00504       if (id1 > id2) {
00505         int tmp = id1;
00506         id1 = id2;
00507         id2 = tmp;
00508         h1 = hji;
00509         h2 = hij;
00510         r20 = rhoi0;
00511         r2s = rhojs;
00512         r10 = rhoj0;
00513         r1s = rhois;
00514       }
00515 //      CkPrintf("PSIPAIR%5i%5i%10.5f%5i%14.8f%7.4f%7.4f\n",id1,id2,sqrt(r2),1,h1,r10,r1s);
00516 //      CkPrintf("PSIPAIR%5i%5i%10.5f%5i%14.8f%7.4f%7.4f\n",id2,id1,sqrt(r2),1,h2,r20,r2s);
00517       //CkPrintf("PSI(%04i)[%04i,%04i] = 11 % .4e % .4e\n",gbisParams->sequence,id1,id2,h1, h2);
00518       //CkPrintf("PPSI(%04i)[%04i,%04i] = 11\n",gbisParams->cid,id1,id2);
00519 #endif
00520 
00521 #ifdef BENCHMARK
00522       nops++;
00523 #endif
00524           
00525       psiI += hij;
00526       gbisParams->psiSum[1][j] += hji;
00527     }//end inner j
00528     gbisParams->psiSum[0][i] += psiI;
00529   }//end outer i
00530   for (int s = 0; s < strideIg; s++) {
00531     ngi+=params->p[0][ngi].nonbondedGroupSize;
00532   }
00533   }
00534 #ifdef BENCHMARK
00535   t2 = 1.0*clock()/CLOCKS_PER_SEC;
00536   CkPrintf("PHASE1.2: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
00537 nops = 0;
00538   t1 = 1.0*clock()/CLOCKS_PER_SEC;
00539   nops = 0;
00540 #endif
00541 
00542   //calculate all other piecewise pairs
00543   c = 2;
00544   for (int ngi = minIg; ngi < maxI;  ) {
00545     int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
00546   for (int i = ngi; i < ngi+iGroupSize; i++) {
00547     ri = params->p[0][i].position;
00548     ri.x += offset_x;
00549     ri.y += offset_y;
00550     ri.z += offset_z;
00551     rhoi0 = gbisParams->intRad[0][2*i+0];
00552     rhois = gbisParams->intRad[0][2*i+1];
00553     psiI = ZERO;
00554     numPairs;
00555     plint *pairs;
00556     gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
00557     for (register int jj = 0; jj < numPairs; jj++) {
00558       j = pairs[jj];
00559       rj = params->p[1][j].position;
00560 
00561       dr = (ri.x - rj.x);
00562       r2 = dr*dr;
00563       dr = (ri.y - rj.y);
00564       r2 += dr*dr;
00565       dr = (ri.z - rj.z);
00566       r2 += dr*dr;
00567       rhojs = gbisParams->intRad[1][2*j+1];
00568       rhoj0 = gbisParams->intRad[1][2*j+0];
00569       r_i = 1.0/sqrt(r2);
00570       r = r2 * r_i;
00571 
00572       CalcHPair(r,r2,r_i,a_cut,
00573           rhoi0, rhojs,
00574           rhoj0, rhois,
00575           dij,dji,hij,hji);
00576 //#if 1
00577 #ifdef PRINT_COMP
00578       int id1 = params->pExt[0][i].id;
00579       int id2 = params->pExt[1][j].id;
00580       float h1 = hij;
00581       float h2 = hji;
00582       int d1 = dij;
00583       int d2 = dji;
00584       float r10 = rhoi0;
00585       float r1s = rhojs;
00586       float r20 = rhoj0;
00587       float r2s = rhois;
00588 /*  if (dij > 0 ) {
00589 printf("PSIPAIR %05i %05i%9.5f%6.3f%6.3f%2i% 13.5e\n",
00590 id1,id2,sqrt(r2),
00591 rhoi0, rhojs,
00592 dij,hij);
00593   }
00594   if (dji > 0 ) {
00595 printf("PSIPAIR %05i %05i%9.5f%6.3f%6.3f%2i% 13.5e\n",
00596 id2,id1,sqrt(r2),
00597 rhoj0, rhois,
00598 dji,hji);
00599   }*/
00600 //      CkPrintf("PSIPAIR%5i%5i%10.5f%5i%14.8f%7.4f%7.4f\n",id1,id2,sqrt(r2),d1,h1,r10,r1s);
00601       if (id1 > id2) {
00602         int tmp = id1;
00603         id1 = id2;
00604         id2 = tmp;
00605         h1 = hji;
00606         h2 = hij;
00607         d1 = dji;
00608         d2 = dij;
00609         r20 = rhoi0;
00610         r2s = rhojs;
00611         r10 = rhoj0;
00612         r1s = rhois;
00613       }
00614 //      CkPrintf("PSIPAIR%5i%5i%10.5f%5i%14.8f%7.4f%7.4f\n",id2,id1,sqrt(r2),d2,h2,r20,r2s);
00615       //CkPrintf("PSI(%04i)[%04i,%04i] = %i%i % .4e % .4e\n",gbisParams->sequence,id1,id2,d1,d2,h1, h2);
00616       //CkPrintf("PPSI(%04i)[%04i,%04i] = %i%i\n",gbisParams->cid,id1,id2,d1,d2);
00617 #endif
00618 
00619 #ifdef BENCHMARK
00620       nops++;
00621 #endif
00622       psiI += hij;
00623       gbisParams->psiSum[1][j] += hji;
00624     }//end inner j
00625     gbisParams->psiSum[0][i] += psiI;
00626   }//end outer i
00627   for (int s = 0; s < strideIg; s++) {
00628     ngi+=params->p[0][ngi].nonbondedGroupSize;
00629   }
00630   }
00631 
00632 #ifdef BENCHMARK
00633   t2 = 1.0*clock()/CLOCKS_PER_SEC;
00634   CkPrintf("PHASE1.3: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
00635 #endif
00636 
00637 /***********************************************************
00638 * GBIS Phase 2
00639 ***********************************************************/
00640 } else if (gbisParams->gbisPhase == 2) {
00641 
00642   float epsilon_s = gbisParams->epsilon_s;
00643   float epsilon_p = gbisParams->epsilon_p;
00644   float epsilon_s_i = 1/epsilon_s;
00645   float epsilon_p_i = 1/epsilon_p;
00646   float kappa = gbisParams->kappa;
00647 
00648   //values used in loop
00649   float r_cut_2 = 1.0 / r_cut2;
00650   float r_cut_4 = 4.0*r_cut_2*r_cut_2;
00651   float coulEij=0,ddrCoulEij=0,gbEij=0,ddrGbEij=0;
00652   float dEdai=0,dEdaj=0, qiqj=0;
00653   float scale=0, ddrScale=0;
00654   float rnx=0,rny=0,rnz=0;
00655   float fx=0,fy=0,fz=0,forceCoul=0, forcedEdr=0;
00656 
00657   int nops = 0;
00658   double t1 = 1.0*clock()/CLOCKS_PER_SEC;
00659   float r2;
00660   float dr;
00661   BigReal dx, dy, dz;
00662   float r, r_i, ratio;
00663   float fIx, fIy, fIz;
00664   GBReal dEdaSumI;
00665   float bornRadI, bornRadJ;
00666   float qi;
00667   Position ri, rj;
00668   float aiaj,expr2aiaj4,fij,f_i,expkappa,Dij;
00669   float aiaj4,ddrDij,ddrf_i,ddrfij,tmp_dEda;
00670 
00671   for (int c = 0; c < 4/*dEdrPLs*/; c++) {
00672   for (int ngi = minIg; ngi < maxI;  ) {
00673     int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
00674   for (int i = ngi; i < ngi+iGroupSize; i++) {
00675     ri = params->p[0][i].position;
00676     ri.x += offset_x;
00677     ri.y += offset_y;
00678     ri.z += offset_z;
00679     qi = - COULOMB * params->p[0][i].charge * scaling;
00680   //printf("ATOM(%05i) %.3e %.3e %.3e\n", params->pExt[0][i].id, params->p[0][i].charge, COULOMB, scaling);
00681     int numPairs;
00682     plint *pairs;
00683     gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
00684     fIx = fIy = fIz = 0.0;
00685     dEdaSumI = 0.0;
00686     bornRadI = gbisParams->bornRad[0][i];
00687     for (int jj = 0; jj < numPairs; jj++) {
00688       int j = pairs[jj];
00689       rj = params->p[1][j].position;
00690 
00691       //distance
00692       dx = (ri.x - rj.x);
00693       dy = (ri.y - rj.y);
00694       dz = (ri.z - rj.z);
00695       r2 = dx*dx + dy*dy + dz*dz;
00696       if (r2 > r_cut2) continue;
00697       qiqj = qi*params->p[1][j].charge;
00698       bornRadJ = gbisParams->bornRad[1][j];
00699       r_i = 1.0/sqrt(r2);
00700       r = r2 * r_i;
00701 
00702       //pairwise calculation
00703 /*      Phase2_Pair(r, r2, r_i, qiqj,
00704           bornRadI,
00705           bornRadJ,
00706           epsilon_p_i, epsilon_s_i,
00707           kappa, gbisParams->doFullElectrostatics,
00708           gbEij, ddrGbEij, dEdai, dEdaj);
00709 */
00710 
00711   //calculate GB energy
00712   aiaj = bornRadI*bornRadJ;
00713   aiaj4 = 4*aiaj;
00714   expr2aiaj4 = exp(-r2/aiaj4);
00715   fij = sqrt(r2+aiaj*expr2aiaj4);
00716   f_i = 1/fij;
00717   expkappa = (kappa > 0) ? exp(-kappa*fij) : 1.0;
00718   Dij = epsilon_p_i - expkappa*epsilon_s_i;
00719   gbEij = qiqj*Dij*f_i;
00720 
00721   //calculate energy derivatives
00722   ddrfij = r*f_i*(1 - 0.25*expr2aiaj4);
00723   ddrf_i = -ddrfij*f_i*f_i;
00724   ddrDij = kappa*expkappa*ddrfij*epsilon_s_i;
00725   ddrGbEij = qiqj*(ddrDij*f_i+Dij*ddrf_i);
00726 
00727   //calc dEda
00728       //NAMD smoothing function
00729       scale = 1;
00730       ddrScale = 0;
00731       if (gbisParams->doSmoothing) {
00732         scale = r2 * r_cut_2 - 1;
00733         scale *= scale;
00734         ddrScale = r*(r2-r_cut2)*r_cut_4;
00735         //CkPrintf("SCALE %f %f\n",scale,ddrScale);
00736         gbisParams->gbInterEnergy += gbEij * scale;
00737         forcedEdr = -(ddrGbEij)*scale-(gbEij)*ddrScale;
00738       } else {
00739         gbisParams->gbInterEnergy += gbEij;
00740         forcedEdr = -ddrGbEij;
00741       }
00742 
00743       //add dEda
00744       if (gbisParams->doFullElectrostatics) {
00745         //gbisParams->dEdaSum[0][i] += dEdai*scale;
00746         tmp_dEda = 0.5*qiqj*f_i*f_i
00747                       *(kappa*epsilon_s_i*expkappa-Dij*f_i)
00748                       *(aiaj+0.25*r2)*expr2aiaj4;//0
00749         dEdai = tmp_dEda/bornRadI;
00750         dEdaj = tmp_dEda/bornRadJ;
00751         dEdaSumI += dEdai*scale;
00752         gbisParams->dEdaSum[1][j] += dEdaj*scale;
00753       }
00754 
00755 #if 1
00756 //#ifdef PRINT_COMP
00757       int id1 = params->pExt[0][i].id;
00758       int id2 = params->pExt[1][j].id;
00759       float deda1 = dEdai;
00760       float deda2 = dEdaj;
00761       float bR1 = bornRadI;
00762       float bR2 = bornRadJ;
00763       if (id1 > id2) {
00764         int tmp = id1;
00765         id1 = id2;
00766         id2 = tmp;
00767         deda1 = dEdaj;
00768         deda2 = dEdai;
00769         bR1 = bornRadJ;
00770         bR2 = bornRadI;
00771       }
00772       //CkPrintf("DEDR(%04i)[%04i,%04i] = % .4e\n",gbisParams->sequence,id1,id2,forcedEdr);
00773       //CkPrintf("DASM(%04i)[%04i,%04i] = % .4e % .4e\n",gbisParams->sequence,id1, id2, deda1,deda2);
00774       //CkPrintf("P2RM(%04i)[%04i,%04i] = % .4e % .4e % .4e % .4e % .4e\n",gbisParams->sequence,id1, id2, r, bR1,bR2,epsilon_p_i,epsilon_s_i,kappa);
00775 /*CkPrintf("P2PAIR %05i %05i%9.5f%6.3f%6.3f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e\n",
00776 params->pExt[0][i].id, params->pExt[1][j].id,sqrt(r2),
00777 bornRadI, bornRadJ, forcedEdr, dEdai, qiqj, Dij, scale
00778 );
00779 CkPrintf("P2PAIR %05i %05i%9.5f%6.3f%6.3f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e\n",
00780 params->pExt[1][j].id, params->pExt[0][i].id,sqrt(r2),
00781 bornRadJ, bornRadI, forcedEdr, dEdaj, qiqj, Dij, scale
00782 );*/
00783 #endif
00784 
00785       forcedEdr *= r_i;
00786       fx = dx*forcedEdr;
00787       fy = dy*forcedEdr;
00788       fz = dz*forcedEdr;
00789 
00790       params->ff[1][j].x -= fx;
00791       params->ff[1][j].y -= fy;
00792       params->ff[1][j].z -= fz;
00793 
00794       fIx += fx;
00795       fIy += fy;
00796       fIz += fz;
00797 
00798 #ifdef BENCHMARK
00799       nops ++;//= (59 + 4*DIV_FLOPS + 2*EXP_FLOPS+SQRT_FLOPS);//56 w/o nops
00800 #endif
00801 
00802     }//end inner j
00803     gbisParams->dEdaSum[0][i] += dEdaSumI;
00804     params->ff[0][i].x += fIx;
00805     params->ff[0][i].y += fIy;
00806     params->ff[0][i].z += fIz;
00807 
00808     //self energy of each atom
00809     if (c == 0 && gbisParams->doEnergy && gbisParams->numPatches == 1) {
00810       float fij = bornRadI;//inf
00811       float expkappa = exp(-kappa*fij);//0
00812       float Dij = epsilon_p_i - expkappa*epsilon_s_i;
00813       float gbEij = qi*params->p[0][i].charge*Dij/fij;
00814       gbisParams->gbSelfEnergy += 0.5*gbEij;//self energy
00815     }
00816   }// end outer i
00817   for (int s = 0; s < strideIg; s++) {
00818     ngi+=params->p[0][ngi].nonbondedGroupSize;
00819   }//end i
00820   }//end ig
00821   }//end c
00822 #ifdef BENCHMARK
00823   double t2 = 1.0*clock()/CLOCKS_PER_SEC;
00824   //double flops = 1.0 * nops / (t2 - t1);
00825   //double gflops = flops / 1000000000.0;
00826   
00827   CkPrintf("PHASE2.0: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
00828 #endif
00829 
00830 /***********************************************************
00831 * GBIS Phase 3
00832 ***********************************************************/
00833 } else if (gbisParams->gbisPhase == 3 && gbisParams->doFullElectrostatics) {
00834 
00835 #ifdef BENCHMARK
00836   //int domains[] = {0, 0, 0, 0, 0, 0, 0, 0};
00837   double t1 = 1.0*clock()/CLOCKS_PER_SEC;
00838   double t2;
00839   int nops = 0;
00840 #endif
00841   //CkPrintf("GBIS(%3i)[%2i]::P3 %3i(%3i) %3i(%3i)\n",gbisParams->sequence,gbisParams->cid, gbisParams->patchID[0],params->numAtoms[0],gbisParams->patchID[1],params->numAtoms[1]);
00842 
00843   register BigReal dx, dy, dz;
00844   register float  r2;
00845   register float r, r_i;
00846   register float rhoi0, rhois;
00847   float rhojs, rhoj0;
00848   float fx, fy, fz;
00849   float forceAlpha;
00850   register float fIx, fIy, fIz;
00851 
00852   float dhij;
00853   float dhji;
00854   int dij;
00855   int dji;
00856   register float dHdrPrefixI;
00857   float dHdrPrefixJ;
00858   register Position ri;
00859   register Position rj;
00860   register int c, numPairs, jj, j;
00861   register float k;
00862   register float da = DA;
00863   register float db = DB;
00864   register float dc = DC;
00865   register float dd = DD;
00866   register float de = DE;
00867   float r_i3;
00868 
00869   //piecewise 22
00870   c = 0;
00871   for (int ngi = minIg; ngi < maxI;  ) {
00872     int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
00873   for (int i = ngi; i < ngi+iGroupSize; i++) {
00874     ri = params->p[0][i].position;
00875     ri.x += offset_x;
00876     ri.y += offset_y;
00877     ri.z += offset_z;
00878     rhois = gbisParams->intRad[0][2*i+1];
00879     plint *pairs;
00880     gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
00881     fIx = fIy = fIz = 0.0;
00882     dHdrPrefixI = gbisParams->dHdrPrefix[0][i];
00883     for (jj = 0; jj < numPairs; jj++) {
00884       j = pairs[jj];
00885       rj = params->p[1][j].position;
00886 
00887       dx = (ri.x - rj.x);
00888       dy = (ri.y - rj.y);
00889       dz = (ri.z - rj.z);
00890       r2 = dx*dx + dy*dy + dz*dz;
00891       dHdrPrefixJ = gbisParams->dHdrPrefix[1][j];
00892 
00893       r_i = 1.0/sqrt(r2);//rptI takes 50% of loop time
00894       r_i3 = r_i*r_i*r_i;
00895 
00896       rhojs = gbisParams->intRad[1][2*j+1];
00897 
00898       k = rhojs*r_i; k*=k;//k=(rs/r)^2
00899       dhij = -rhojs*r_i3*k*
00900               (da+k*(db+k*(dc+k*(dd+k*de))));
00901 
00902       k = rhois*r_i; k*=k;//k=(rs/r)^2
00903       dhji = -rhois*r_i3*k*
00904               (da+k*(db+k*(dc+k*(dd+k*de))));
00905 
00906       //add dEda*dadr force
00907       forceAlpha = -r_i*(dHdrPrefixI*dhij+dHdrPrefixJ*dhji);
00908       fx = dx * forceAlpha;
00909       fy = dy * forceAlpha;
00910       fz = dz * forceAlpha;
00911 
00912       params->fullf[1][j].x -= fx;
00913       params->fullf[1][j].y -= fy;
00914       params->fullf[1][j].z -= fz;
00915 
00916       fIx += fx;
00917       fIy += fy;
00918       fIz += fz;
00919 
00920 #if 1
00921 //#ifdef PRINT_COMP
00922       int id1 = params->pExt[0][i].id;
00923       int id2 = params->pExt[1][j].id;
00924       float h1 = dhij;
00925       float h2 = dhji;
00926       if (id1 > id2) {
00927         int tmp = id1;
00928         id1 = id2;
00929         id2 = tmp;
00930         h1 = dhji;
00931         h2 = dhij;
00932       }
00933 /*CkPrintf("P3PAIR %05i %05i%9.5f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e %i\n",
00934 params->pExt[0][i].id, params->pExt[1][j].id,sqrt(r2),
00935 dHdrPrefixI, dHdrPrefixJ, dhij, dhji, forceAlpha, 2
00936 );
00937 CkPrintf("P3PAIR %05i %05i%9.5f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e %i\n",
00938 params->pExt[1][j].id, params->pExt[0][i].id,sqrt(r2),
00939 dHdrPrefixJ, dHdrPrefixI, dhji, dhij, forceAlpha, 2
00940 );*/
00941       //CkPrintf("DEDA(%04i)[%04i,%04i] = 22 % .4e % .4e % .4e\n",gbisParams->sequence,id1,id2,h1,h2, forceAlpha);
00942 #endif
00943 
00944 #ifdef BENCHMARK
00945       nops++;//= (24 + 2*DIV_FLOPS+SQRT_FLOPS);// 8 w/o nops
00946 #endif
00947 
00948     }//end inner j
00949     params->fullf[0][i].x += fIx;
00950     params->fullf[0][i].y += fIy;
00951     params->fullf[0][i].z += fIz;
00952   }//end outer i
00953   for (int s = 0; s < strideIg; s++) {
00954     ngi+=params->p[0][ngi].nonbondedGroupSize;
00955   }
00956   }
00957 
00958 #ifdef BENCHMARK
00959   t2 = 1.0*clock()/CLOCKS_PER_SEC;
00960   CkPrintf("PHASE3.1: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
00961 nops = 0;
00962   t1 = 1.0*clock()/CLOCKS_PER_SEC;
00963   nops = 0;
00964 #endif
00965 
00966   float a_cut_i = 1.0/a_cut;
00967   float a_cut_i2 = a_cut_i*a_cut_i;
00968   float a_cut2 = a_cut*a_cut;
00969   float rmrs;
00970   float rmrsi;
00971   float rmrs2;
00972   float rhois2, rhojs2;
00973   float logri, logrj;
00974   float r_i2;
00975 
00976   //piecewise 11
00977   c = 1;
00978   for (int ngi = minIg; ngi < maxI;  ) {
00979     int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
00980   for (int i = ngi; i < ngi+iGroupSize; i++) {
00981     ri = params->p[0][i].position;
00982     ri.x += offset_x;
00983     ri.y += offset_y;
00984     ri.z += offset_z;
00985     rhois = gbisParams->intRad[0][2*i+1];
00986     rhois2 = rhois*rhois;
00987     plint *pairs;
00988     gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
00989     fIx = fIy = fIz = 0.0;
00990     dHdrPrefixI = gbisParams->dHdrPrefix[0][i];
00991     for (jj = 0; jj < numPairs; jj++) {
00992       j = pairs[jj];
00993       rj = params->p[1][j].position;
00994       dHdrPrefixJ = gbisParams->dHdrPrefix[1][j];
00995 
00996       dx = (ri.x - rj.x);
00997       dy = (ri.y - rj.y);
00998       dz = (ri.z - rj.z);
00999       r2 = dx*dx + dy*dy + dz*dz;
01000       r_i = 1.0/sqrt(r2);//rptI
01001       r = r2* r_i;
01002       r_i2 = r_i*r_i;
01003 
01004       rhojs = gbisParams->intRad[1][2*j+1];
01005       rhojs2 = rhojs*rhojs;
01006 
01007 
01008       rmrs = r-rhojs;// 4 times
01009       rmrsi = 1.0/rmrs;
01010       rmrs2 = rmrs*rmrs;
01011       logrj = log(rmrs*a_cut_i);
01012       dhij = r_i2*(-0.25*logrj - (a_cut2 - rmrs2)*(rhojs2 + r2)*0.125*a_cut_i2*rmrsi*rmrsi);
01013 
01014 
01015       rmrs = r-rhois;// 4 times
01016       rmrsi = 1.0/rmrs;
01017       rmrs2 = rmrs*rmrs;
01018       logri = log(rmrs*a_cut_i);
01019       dhji = r_i2*(-0.25*logri - (a_cut2 - rmrs2)*(rhois2 + r2)*0.125*a_cut_i2*rmrsi*rmrsi);
01020 
01021       //add dEda*dadr force
01022       forceAlpha = -r_i*(dHdrPrefixI*dhij+dHdrPrefixJ*dhji);
01023       fx = dx * forceAlpha;
01024       fy = dy * forceAlpha;
01025       fz = dz * forceAlpha;
01026 
01027       params->fullf[1][j].x -= fx;
01028       params->fullf[1][j].y -= fy;
01029       params->fullf[1][j].z -= fz;
01030 
01031       fIx += fx;
01032       fIy += fy;
01033       fIz += fz;
01034 
01035 #if 1
01036 //#ifdef PRINT_COMP
01037       int id1 = params->pExt[0][i].id;
01038       int id2 = params->pExt[1][j].id;
01039       float h1 = dhij;
01040       float h2 = dhji;
01041       if (id1 > id2) {
01042         int tmp = id1;
01043         id1 = id2;
01044         id2 = tmp;
01045         h1 = dhji;
01046         h2 = dhij;
01047       }
01048       //CkPrintf("DEDA(%04i)[%04i,%04i] = 11 % .4e % .4e % .4e\n",gbisParams->sequence,id1,id2,h1,h2, forceAlpha);
01049 /*CkPrintf("P3PAIR %05i %05i%9.5f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e %i\n",
01050 params->pExt[0][i].id, params->pExt[1][j].id,sqrt(r2),
01051 dHdrPrefixI, dHdrPrefixJ, dhij, dhji, forceAlpha, 1
01052 );
01053 CkPrintf("P3PAIR %05i %05i%9.5f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e %i\n",
01054 params->pExt[1][j].id, params->pExt[0][i].id,sqrt(r2),
01055 dHdrPrefixJ, dHdrPrefixI, dhji, dhij, forceAlpha, 1
01056 );*/
01057 #endif
01058 
01059 #ifdef BENCHMARK
01060       nops++;
01061 #endif
01062 
01063     }//end inner j
01064     params->fullf[0][i].x += fIx;
01065     params->fullf[0][i].y += fIy;
01066     params->fullf[0][i].z += fIz;
01067   }//end outer i
01068   for (int s = 0; s < strideIg; s++) {
01069     ngi+=params->p[0][ngi].nonbondedGroupSize;
01070   }
01071   }
01072 
01073 #ifdef BENCHMARK
01074   t2 = 1.0*clock()/CLOCKS_PER_SEC;
01075   CkPrintf("PHASE3.2: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
01076 nops = 0;
01077   t1 = 1.0*clock()/CLOCKS_PER_SEC;
01078   nops = 0;
01079 #endif
01080 
01081   //piecewise all others
01082   c = 2;
01083   for (int ngi = minIg; ngi < maxI;  ) {
01084     int iGroupSize = params->p[0][ngi].nonbondedGroupSize;
01085   for (int i = ngi; i < ngi+iGroupSize; i++) {
01086     ri = params->p[0][i].position;
01087     ri.x += offset_x;
01088     ri.y += offset_y;
01089     ri.z += offset_z;
01090     rhoi0 = gbisParams->intRad[0][2*i+0];
01091     rhois = gbisParams->intRad[0][2*i+1];
01092     plint *pairs;
01093     gbisParams->gbisStepPairlists[c]->nextlist(&pairs,&numPairs);
01094     fIx = fIy = fIz = 0.0;
01095     dHdrPrefixI = gbisParams->dHdrPrefix[0][i];
01096     for (jj = 0; jj < numPairs; jj++) {
01097       j = pairs[jj];
01098       rj = params->p[1][j].position;
01099       dHdrPrefixJ = gbisParams->dHdrPrefix[1][j];
01100 
01101       dx = (ri.x - rj.x);
01102       dy = (ri.y - rj.y);
01103       dz = (ri.z - rj.z);
01104       r2 = dx*dx + dy*dy + dz*dz;
01105 
01106       r_i = 1.0/sqrt(r2);//rptI
01107       r = r2* r_i;
01108 
01109       rhojs = gbisParams->intRad[1][2*j+1];
01110       rhoj0 = gbisParams->intRad[1][2*j+0];
01111 
01112       CalcDHPair(r,r2,r_i,a_cut,
01113           rhoi0,rhojs,
01114           rhoj0,rhois,
01115           dij,dji,dhij,dhji);
01116 
01117       //add dEda*dadr force
01118       forceAlpha = -r_i*(dHdrPrefixI*dhij+dHdrPrefixJ*dhji); // *scaling ?
01119       fx = dx * forceAlpha;
01120       fy = dy * forceAlpha;
01121       fz = dz * forceAlpha;
01122 
01123       fIx += fx;
01124       fIy += fy;
01125       fIz += fz;
01126 
01127       params->fullf[1][j].x -= fx;
01128       params->fullf[1][j].y -= fy;
01129       params->fullf[1][j].z -= fz;
01130 
01131 #if 1
01132 //#ifdef PRINT_COMP
01133       int id1 = params->pExt[0][i].id;
01134       int id2 = params->pExt[1][j].id;
01135       int d1 = dij;
01136       int d2 = dji;
01137       float h1 = dhij;
01138       float h2 = dhji;
01139       if (id1 > id2) {
01140         int tmp = id1;
01141         id1 = id2;
01142         id2 = tmp;
01143         d1 = dji;
01144         d2 = dij;
01145         h1 = dhji;
01146         h2 = dhij;
01147       }
01148 //      CkPrintf("DEDA(%04i)[%04i,%04i] = %i%i % .4e % .4e % .4e\n",gbisParams->sequence,id1,id2,d1,d2,h1,h2, forceAlpha);
01149 //      CkPrintf("DEDA(%04i)[%04i,%04i] = %i%i % .4e % .4e % .4e\n",gbisParams->sequence,id1,id2,d1,d2,h1,h2, forceAlpha);
01150 /*if ( dij > 0 ) {
01151 CkPrintf("P3PAIR %05i %05i%9.5f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e %i\n",
01152 params->pExt[0][i].id, params->pExt[1][j].id,sqrt(r2),
01153 dHdrPrefixI, dHdrPrefixJ, dhij, dhji, forceAlpha, dij
01154 );
01155 }
01156 if ( dji > 0 ) {
01157 CkPrintf("P3PAIR %05i %05i%9.5f% 13.5e% 13.5e% 13.5e% 13.5e% 13.5e %i\n",
01158 params->pExt[1][j].id, params->pExt[0][i].id,sqrt(r2),
01159 dHdrPrefixJ, dHdrPrefixI, dhji, dhij, forceAlpha, dji
01160 );
01161 }*/
01162 #endif
01163 
01164 #ifdef BENCHMARK
01165       nops++;
01166 #endif
01167 
01168     }//end inner j
01169     params->fullf[0][i].x += fIx;
01170     params->fullf[0][i].y += fIy;
01171     params->fullf[0][i].z += fIz;
01172   }//end outer i
01173   for (int s = 0; s < strideIg; s++) {
01174     ngi+=params->p[0][ngi].nonbondedGroupSize;
01175   }
01176   }
01177 
01178 
01179 #ifdef BENCHMARK
01180   t2 = 1.0*clock()/CLOCKS_PER_SEC;
01181   CkPrintf("PHASE3.3: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
01182 #endif
01183 
01184 }//end if gbisPhase
01185 
01186 }//end calcGBIS

Generated on Fri May 25 04:07:14 2012 for NAMD by  doxygen 1.3.9.1