9 #define CHECK_PRIORITIES 0 55 int unique = (gbisParams->
numPatches == 1) ? 1 : 0;
59 int cid = gbisParams->
cid;
65 float a_cut = gbisParams->
a_cut - fsMax;
66 float a_cut_ps2 = (a_cut+fsMax)*(a_cut+fsMax);
67 float r_cut = gbisParams->
cutoff;
68 float a_cut2 = a_cut*a_cut;
69 float r_cut2 = r_cut*r_cut;
73 double t1 = 1.0*clock()/CLOCKS_PER_SEC;
79 float rhoi0, rhois, rhoj0, rhojs, rhois2, rhojs2, rhois2_16;
80 float ami2, amj2, api2, apj2;
81 int numGBISPairlists = 4;
83 float max_gcut2 = r_cut;
84 if (a_cut+fsMax > r_cut)
85 max_gcut2 = a_cut+fsMax;
88 max_gcut2 *= max_gcut2;
90 int maxGroupPairs = params->
numAtoms[1];
91 short *groupPairs =
new short[maxGroupPairs];
94 for (
int ngi = minIg; ngi < maxI; ) {
95 int numGroupPairs = 0;
105 dr = ngri.
x - ngrj.
x;
107 dr = ngri.
y - ngrj.
y;
109 dr = ngri.
z - ngrj.
z;
111 if (r2 < max_gcut2) {
112 groupPairs[numGroupPairs++] = ngj;
118 for (
int i=ngi; i<ngi+iGroupSize; i++) {
121 int *size =
new int[numGBISPairlists];
123 for (
int k = 0; k < numGBISPairlists; k++) {
134 rhois = gbisParams->
intRad[0][2*i+1];
135 api2 = a_cut + rhois;
137 ami2 = a_cut - rhois;
139 rhois2 = rhois*rhois;
140 rhois2_16 = 16.0*rhois2;
144 for (
int j = i+1; j < ngi+iGroupSize; j++) {
157 rhojs = gbisParams->
intRad[1][2*j+1];
158 rhojs2 = rhojs*rhojs;
160 amj2 = a_cut - rhojs;
163 apj2 = a_cut + rhojs;
167 if ( r2 < ami2 && r2 < amj2 &&
168 r2 > rhois2_16 && r2 > 16.0*rhojs2 ) {
170 pairs[0][size[0]++] = j;
171 }
else if ( r2 < api2 && r2 < apj2 &&
172 r2 > ami2 && r2 > amj2 ) {
174 pairs[1][size[1]++] = j;
175 }
else if ( r2 < a_cut_ps2 ) {
177 pairs[2][size[2]++] = j;
178 }
else if ( r2 < r_cut2 ) {
180 pairs[3][size[3]++] = j;
185 for (
int g = 0; g < numGroupPairs; g++) {
186 int ngj = groupPairs[g];
189 int maxJ = ngj+jGroupSize;
190 for (
int j = ngj; j < maxJ; j++) {
205 rhojs = gbisParams->
intRad[1][2*j+1];
206 rhojs2 = rhojs*rhojs;
208 amj2 = a_cut - rhojs;
211 apj2 = a_cut + rhojs;
214 if ( r2 < ami2 && r2 < amj2 &&
215 r2 > rhois2_16 && r2 > 16.0*rhojs2 ) {
218 pairs[0][size[0]++] = j;
219 }
else if ( r2 < api2 && r2 < apj2 &&
220 r2 > ami2 && r2 > amj2 ) {
223 pairs[1][size[1]++] = j;
224 }
else if ( r2 < a_cut_ps2 ) {
227 pairs[2][size[2]++] = j;
228 }
else if ( r2 < r_cut2 ) {
231 pairs[3][size[3]++] = j;
235 for (
int k = 0; k < numGBISPairlists; k++) {
244 for (
int s = 0; s < strideIg; s++) {
249 for (
int k = 0; k < numGBISPairlists; k++)
253 double t2 = 1.0*clock()/CLOCKS_PER_SEC;
254 CkPrintf(
"PHASELST: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
264 CkPrintf(
"PE%i, S%09i, P%i\n",CkMyPe(),gbisParams->
sequence,gbisParams->
gbisPhase);
274 for (
int s = 0; s < params->
minPart; s++) {
280 int unique = (gbisParams->
numPatches == 1) ? 1 : 0;
281 int numGBISPairlists = 4;
282 float r_cut = gbisParams->
cutoff;
284 float a_cut = gbisParams->
a_cut-fsMax;
285 float a_cut2 = a_cut*a_cut;
286 float a_cut_ps = a_cut + fsMax;
287 float r_cut2 = r_cut*r_cut;
288 float a_cut_ps2 = a_cut_ps*a_cut_ps;
290 for (
int k = 0; k < numGBISPairlists; k++)
304 int domains[] = {0, 0, 0, 0, 0, 0, 0, 0};
306 double t1 = 1.0*clock()/CLOCKS_PER_SEC;
312 register float r, r_i, r2_i;
313 float rhoi0, rhois, rhojs, rhoj0;
317 register float ta =
TA;
318 register float tb =
TB;
319 register float tc =
TC;
320 register float td =
TD;
321 register float te =
TE;
323 register float hij,hji;
326 float rhois2, rhojs2;
330 for (
int ngi = minIg; ngi < maxI; ) {
332 for (
int i = ngi; i < ngi+iGroupSize; i++) {
337 rhoi0 = gbisParams->
intRad[0][2*i+0];
338 rhois = gbisParams->
intRad[0][2*i+1];
339 rhois2 = rhois*rhois;
343 for (
register int jj = 0; jj < numPairs; jj++) {
358 rhoj0 = gbisParams->
intRad[1][2*j+0];
359 rhojs = gbisParams->
intRad[1][2*j+1];
360 rhojs2 = rhojs*rhojs;
363 hij = rhojs*r2_i*k*(ta+k*(tb+k*(tc+k*(td+k*te))));
366 hji = rhois*r2_i*k*(ta+k*(tb+k*(tc+k*(td+k*te))));
370 int id1 = params->
pExt[0][i].
id;
371 int id2 = params->
pExt[1][j].
id;
404 gbisParams->
psiSum[1][j] += hji;
406 gbisParams->
psiSum[0][i] += psiI;
408 for (
int s = 0; s < strideIg; s++) {
413 double t2 = 1.0*clock()/CLOCKS_PER_SEC;
417 CkPrintf(
"PHASE1.1: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
419 t1 = 1.0*clock()/CLOCKS_PER_SEC;
429 float a_cut_i = 1.0 / a_cut;
430 float a_cut_i2 = a_cut_i*a_cut_i;
434 for (
int ngi = minIg; ngi < maxI; ) {
436 for (
int i = ngi; i < ngi+iGroupSize; i++) {
441 rhoi0 = gbisParams->
intRad[0][2*i+0];
442 rhois = gbisParams->
intRad[0][2*i+1];
446 for (
register int jj = 0; jj < numPairs; jj++) {
459 rhoj0 = gbisParams->
intRad[1][2*j+0];
460 rhojs = gbisParams->
intRad[1][2*j+1];
462 float tmp1 = 0.125*r_i;
463 float tmp2 = r2 - 4.0*a_cut*r;
469 logri = log(rmris*a_cut_i);
470 logrj = log(rmrjs*a_cut_i);
475 hij = tmp1*(1 + rr*rmrsi +
476 a_cut_i2*(tmp2 - rs2) + logrj+logrj);
482 hji = tmp1*(1 + rr*rmrsi +
483 a_cut_i2*(tmp2 - rs2) + 2.0* logri);
486 int id1 = params->
pExt[0][i].
id;
487 int id2 = params->
pExt[1][j].
id;
524 gbisParams->
psiSum[1][j] += hji;
526 gbisParams->
psiSum[0][i] += psiI;
528 for (
int s = 0; s < strideIg; s++) {
533 t2 = 1.0*clock()/CLOCKS_PER_SEC;
534 CkPrintf(
"PHASE1.2: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
536 t1 = 1.0*clock()/CLOCKS_PER_SEC;
542 for (
int ngi = minIg; ngi < maxI; ) {
544 for (
int i = ngi; i < ngi+iGroupSize; i++) {
549 rhoi0 = gbisParams->
intRad[0][2*i+0];
550 rhois = gbisParams->
intRad[0][2*i+1];
554 for (
register int jj = 0; jj < numPairs; jj++) {
564 rhojs = gbisParams->
intRad[1][2*j+1];
565 rhoj0 = gbisParams->
intRad[1][2*j+0];
575 int id1 = params->
pExt[0][i].
id;
576 int id2 = params->
pExt[1][j].
id;
620 gbisParams->
psiSum[1][j] += hji;
622 gbisParams->
psiSum[0][i] += psiI;
624 for (
int s = 0; s < strideIg; s++) {
630 t2 = 1.0*clock()/CLOCKS_PER_SEC;
631 CkPrintf(
"PHASE1.3: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
641 float epsilon_s_i = 1/epsilon_s;
642 float epsilon_p_i = 1/epsilon_p;
643 float kappa = gbisParams->
kappa;
646 float r_cut_2 = 1.0 / r_cut2;
647 float r_cut_4 = 4.0*r_cut_2*r_cut_2;
648 float coulEij=0,ddrCoulEij=0,gbEij=0,ddrGbEij=0;
649 float dEdai=0,dEdaj=0, qiqj=0;
650 float scale=0, ddrScale=0;
651 float rnx=0,rny=0,rnz=0;
652 float fx=0,fy=0,fz=0,forceCoul=0, forcedEdr=0;
655 double t1 = 1.0*clock()/CLOCKS_PER_SEC;
662 float bornRadI, bornRadJ;
665 float aiaj,expr2aiaj4,fij,f_i,expkappa,Dij;
666 float aiaj4,ddrDij,ddrf_i,ddrfij,tmp_dEda;
668 for (
int c = 0; c < 4; c++) {
669 for (
int ngi = minIg; ngi < maxI; ) {
671 for (
int i = ngi; i < ngi+iGroupSize; i++) {
681 fIx = fIy = fIz = 0.0;
683 bornRadI = gbisParams->
bornRad[0][i];
684 for (
int jj = 0; jj < numPairs; jj++) {
692 r2 = dx*dx + dy*dy + dz*dz;
693 if (r2 > r_cut2)
continue;
694 qiqj = qi*params->
p[1][j].
charge;
695 bornRadJ = gbisParams->
bornRad[1][j];
709 aiaj = bornRadI*bornRadJ;
711 expr2aiaj4 = exp(-r2/aiaj4);
712 fij = sqrt(r2+aiaj*expr2aiaj4);
714 expkappa = (kappa > 0) ? exp(-kappa*fij) : 1.0;
715 Dij = epsilon_p_i - expkappa*epsilon_s_i;
716 gbEij = qiqj*Dij*f_i;
719 ddrfij = r*f_i*(1 - 0.25*expr2aiaj4);
720 ddrf_i = -ddrfij*f_i*f_i;
721 ddrDij = kappa*expkappa*ddrfij*epsilon_s_i;
722 ddrGbEij = qiqj*(ddrDij*f_i+Dij*ddrf_i);
729 scale = r2 * r_cut_2 - 1;
731 ddrScale = r*(r2-r_cut2)*r_cut_4;
734 forcedEdr = -(ddrGbEij)*scale-(gbEij)*ddrScale;
737 forcedEdr = -ddrGbEij;
743 tmp_dEda = 0.5*qiqj*f_i*f_i
744 *(kappa*epsilon_s_i*expkappa-Dij*f_i)
745 *(aiaj+0.25*r2)*expr2aiaj4;
746 dEdai = tmp_dEda/bornRadI;
747 dEdaj = tmp_dEda/bornRadJ;
748 dEdaSumI += dEdai*scale;
749 gbisParams->
dEdaSum[1][j] += dEdaj*scale;
754 int id1 = params->
pExt[0][i].
id;
755 int id2 = params->
pExt[1][j].
id;
758 float bR1 = bornRadI;
759 float bR2 = bornRadJ;
787 params->
ff[1][j].
x -= fx;
788 params->
ff[1][j].
y -= fy;
789 params->
ff[1][j].
z -= fz;
800 gbisParams->
dEdaSum[0][i] += dEdaSumI;
801 params->
ff[0][i].
x += fIx;
802 params->
ff[0][i].
y += fIy;
803 params->
ff[0][i].
z += fIz;
807 float fij = bornRadI;
808 float expkappa = exp(-kappa*fij);
809 float Dij = epsilon_p_i - expkappa*epsilon_s_i;
810 float gbEij = qi*params->
p[0][i].
charge*Dij/fij;
814 for (
int s = 0; s < strideIg; s++) {
820 double t2 = 1.0*clock()/CLOCKS_PER_SEC;
824 CkPrintf(
"PHASE2.0: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
834 double t1 = 1.0*clock()/CLOCKS_PER_SEC;
842 register float r, r_i;
843 register float rhoi0, rhois;
847 register float fIx, fIy, fIz;
853 register float dHdrPrefixI;
857 register int c, numPairs, jj, j;
859 register float da =
DA;
860 register float db =
DB;
861 register float dc =
DC;
862 register float dd =
DD;
863 register float de =
DE;
868 for (
int ngi = minIg; ngi < maxI; ) {
870 for (
int i = ngi; i < ngi+iGroupSize; i++) {
875 rhois = gbisParams->
intRad[0][2*i+1];
878 fIx = fIy = fIz = 0.0;
880 for (jj = 0; jj < numPairs; jj++) {
887 r2 = dx*dx + dy*dy + dz*dz;
893 rhojs = gbisParams->
intRad[1][2*j+1];
896 dhij = -rhojs*r_i3*k*
897 (da+k*(db+k*(dc+k*(dd+k*de))));
900 dhji = -rhois*r_i3*k*
901 (da+k*(db+k*(dc+k*(dd+k*de))));
904 forceAlpha = -r_i*(dHdrPrefixI*dhij+dHdrPrefixJ*dhji);
905 fx = dx * forceAlpha;
906 fy = dy * forceAlpha;
907 fz = dz * forceAlpha;
909 params->
fullf[1][j].
x -= fx;
910 params->
fullf[1][j].
y -= fy;
911 params->
fullf[1][j].
z -= fz;
919 int id1 = params->
pExt[0][i].
id;
920 int id2 = params->
pExt[1][j].
id;
946 params->
fullf[0][i].
x += fIx;
947 params->
fullf[0][i].
y += fIy;
948 params->
fullf[0][i].
z += fIz;
950 for (
int s = 0; s < strideIg; s++) {
956 t2 = 1.0*clock()/CLOCKS_PER_SEC;
957 CkPrintf(
"PHASE3.1: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
959 t1 = 1.0*clock()/CLOCKS_PER_SEC;
963 float a_cut_i = 1.0/a_cut;
964 float a_cut_i2 = a_cut_i*a_cut_i;
965 float a_cut2 = a_cut*a_cut;
969 float rhois2, rhojs2;
975 for (
int ngi = minIg; ngi < maxI; ) {
977 for (
int i = ngi; i < ngi+iGroupSize; i++) {
982 rhois = gbisParams->
intRad[0][2*i+1];
983 rhois2 = rhois*rhois;
986 fIx = fIy = fIz = 0.0;
988 for (jj = 0; jj < numPairs; jj++) {
996 r2 = dx*dx + dy*dy + dz*dz;
1001 rhojs = gbisParams->
intRad[1][2*j+1];
1002 rhojs2 = rhojs*rhojs;
1008 logrj = log(rmrs*a_cut_i);
1009 dhij = r_i2*(-0.25*logrj - (a_cut2 - rmrs2)*(rhojs2 + r2)*0.125*a_cut_i2*rmrsi*rmrsi);
1015 logri = log(rmrs*a_cut_i);
1016 dhji = r_i2*(-0.25*logri - (a_cut2 - rmrs2)*(rhois2 + r2)*0.125*a_cut_i2*rmrsi*rmrsi);
1019 forceAlpha = -r_i*(dHdrPrefixI*dhij+dHdrPrefixJ*dhji);
1020 fx = dx * forceAlpha;
1021 fy = dy * forceAlpha;
1022 fz = dz * forceAlpha;
1024 params->
fullf[1][j].
x -= fx;
1025 params->
fullf[1][j].
y -= fy;
1026 params->
fullf[1][j].
z -= fz;
1034 int id1 = params->
pExt[0][i].
id;
1035 int id2 = params->
pExt[1][j].
id;
1061 params->
fullf[0][i].
x += fIx;
1062 params->
fullf[0][i].
y += fIy;
1063 params->
fullf[0][i].
z += fIz;
1065 for (
int s = 0; s < strideIg; s++) {
1071 t2 = 1.0*clock()/CLOCKS_PER_SEC;
1072 CkPrintf(
"PHASE3.2: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
1074 t1 = 1.0*clock()/CLOCKS_PER_SEC;
1080 for (
int ngi = minIg; ngi < maxI; ) {
1082 for (
int i = ngi; i < ngi+iGroupSize; i++) {
1087 rhoi0 = gbisParams->
intRad[0][2*i+0];
1088 rhois = gbisParams->
intRad[0][2*i+1];
1091 fIx = fIy = fIz = 0.0;
1093 for (jj = 0; jj < numPairs; jj++) {
1101 r2 = dx*dx + dy*dy + dz*dz;
1106 rhojs = gbisParams->
intRad[1][2*j+1];
1107 rhoj0 = gbisParams->
intRad[1][2*j+0];
1115 forceAlpha = -r_i*(dHdrPrefixI*dhij+dHdrPrefixJ*dhji);
1116 fx = dx * forceAlpha;
1117 fy = dy * forceAlpha;
1118 fz = dz * forceAlpha;
1124 params->
fullf[1][j].
x -= fx;
1125 params->
fullf[1][j].
y -= fy;
1126 params->
fullf[1][j].
z -= fz;
1130 int id1 = params->
pExt[0][i].
id;
1131 int id2 = params->
pExt[1][j].
id;
1166 params->
fullf[0][i].
x += fIx;
1167 params->
fullf[0][i].
y += fIy;
1168 params->
fullf[0][i].
z += fIz;
1170 for (
int s = 0; s < strideIg; s++) {
1177 t2 = 1.0*clock()/CLOCKS_PER_SEC;
1178 CkPrintf(
"PHASE3.3: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
void calcGBIS(nonbonded *params, GBISParamStruct *gbisParams)
static void h2(float r, float r2, float ri, float rc, float r0, float rs, float &h)
static void CalcDHPair(float r, float r2, float ri, float rc, float ri0, float rjs, float rj0, float ris, int &dij, int &dji, float &dhij, float &dhji)
void nextlist(plint **list, int *list_size)
static void h1(float r, float r2, float ri, float rc, float r0, float rs, float &h)
Pairlists * gbisStepPairlists[4]
static void CalcHPair(float r, float r2, float ri, float rc, float ri0, float rjs, float rj0, float ris, int &dij, int &dji, float &hij, float &hji)
void newsize(int list_size)
void pairlistFromAll(nonbonded *params, GBISParamStruct *gbisParams, int minIg, int strideIg, int maxI)