14 #include "WorkDistrib.decl.h"
28 #define FLOPS(X) flops += X;
38 int minPartition,
int maxPartition,
int numPartitions,
int numPatches)
39 :
Compute(c), workArrays(_workArrays),
40 minPart(minPartition), maxPart(maxPartition),
41 strideIg(numPartitions), numParts(numPartitions),
42 maxAtomRadius(1.9+1.4)
92 for (
int i=0; i<8; i++) {
169 for (
int pI = 0; pI < 8; pI++) {
173 for (
int pJ = 0; pJ < 8; pJ++) {
177 if ( ( gsa==1 && (jia>iia) != (idx[pJ][0]>idx[pI][0]) ) ||
178 ( gsb==1 && (jib>iib) != (idx[pJ][1]>idx[pI][1]) ) ||
179 ( gsc==1 && (jic>iic) != (idx[pJ][2]>idx[pI][2]) ) ||
194 for (
int i=0; i<8; i++) {
204 for (
int i=0; i<8; i++) {
227 if (
patch[0]->flags.doNonbonded) {
232 for (
int i=0; i<8; i++) {
252 if ( bounds[0][0] < bounds[0][1] ) {
253 if (x < bounds[0][0] || x >= bounds[0][1] )
256 if (x < bounds[0][0] && x >= bounds[0][1] )
261 if ( bounds[1][0] < bounds[1][1] ) {
262 if (y < bounds[1][0] || y >= bounds[1][1] )
265 if (y < bounds[1][0] && y >= bounds[1][1] )
270 if ( bounds[2][0] < bounds[2][1] ) {
271 if (z < bounds[2][0] || z >= bounds[2][1] )
274 if (z < bounds[2][0] && z >= bounds[2][1] )
282 return PI*ri*(2*ri-r-(ri*ri-rj*rj)/r);
294 Real probeRadius = 1.4f;
295 Real cutMargin = 0.f;
299 BigReal dxij, dyij, dzij, r2ij;
300 BigReal dxik, dyik, dzik, r2ik;
301 BigReal dxjk, dyjk, dzjk, r2jk;
312 double t_start = 1.0*clock()/CLOCKS_PER_SEC;
315 lcpoNeighborList.
reset();
317 cut2 = 2*maxAtomRadius+cutMargin; cut2 *= cut2;
320 for (
int pI = 0; pI < 8; pI++) {
325 for (
int s = 0; s <
minPart; s++) {
331 int numAtomsInBounds = 0;
334 for (
int ngi = minIg; ngi <
numAtoms[pI]; ) {
336 if ( isInBounds(ngir.
x, ngir.
y, ngir.
z) &&
lcpoType[pI][ngi] > 0 ) {
337 inAtoms[numAtomsInBounds++] = ngi;
338 ri = probeRadius+lcpoParams[
lcpoType[pI][ngi] ][0];
339 maxAtomRadius = (ri > maxAtomRadius) ? ri : maxAtomRadius;
343 for (
int pJ = 0; pJ < 8; pJ++) {
344 if (numAtoms[pJ] > 0) {
345 maxAtoms += numAtoms[pJ];
349 int numLcpoNeighbors = 0;
352 for (
int pJ = 0; pJ < 8; pJ++) {
354 if (!
valid[pI][pJ])
continue;
357 for (
int ngj = 0; ngj < numAtoms[pJ]; ) {
360 dxij = ngir.
x - ngjr.
x;
361 dyij = ngir.
y - ngjr.
y;
362 dzij = ngir.
z - ngjr.
z;
365 r2ij = dxij*dxij + dyij*dyij + dzij*dzij;
367 if (r2ij < cut2 && r2ij > 0.01) {
370 rj = probeRadius+lcpoParams[ lcpoType[pJ][ngj] ][0];
372 BigReal rirjcutMargin2 = ri+rj+cutMargin;
373 rirjcutMargin2 *= rirjcutMargin2;
374 if (r2ij < rirjcutMargin2 && r2ij > 0.0001 &&
375 lcpoType[pJ][ngj] > 0) {
376 lcpoNeighbors[numLcpoNeighbors].
x = ngjr.
x;
377 lcpoNeighbors[numLcpoNeighbors].
y = ngjr.
y;
378 lcpoNeighbors[numLcpoNeighbors].
z = ngjr.
z;
379 lcpoNeighbors[numLcpoNeighbors].
r = rj;
380 lcpoNeighbors[numLcpoNeighbors].
f =
384 maxAtomRadius = (rj > maxAtomRadius) ? rj : maxAtomRadius;
392 lcpoNeighborList.
newsize(numLcpoNeighbors);
395 for (
int s = 0; s < strideIg; s++) {
400 inAtomsPl.
newsize(numAtomsInBounds);
403 double t_stop = 1.0*clock()/CLOCKS_PER_SEC;
404 CkPrintf(
"LCPO_TIME_P %7.3f Gflops %9d @ %f\n", flops*1e-9/(t_stop-t_start),flops,(t_stop-t_start));
408 double t_start = 1.0*clock()/CLOCKS_PER_SEC;
414 lcpoNeighborList.
reset();
415 cut2 = maxAtomRadius*2; cut2 *= cut2;
428 for (
int pI = 0; pI < 8; pI++) {
433 inAtomsPl.
nextlist( &inAtoms, &numInAtoms );
435 for (
int i = 0; i < numInAtoms; i++) {
436 int iIndex = inAtoms[i];
441 const Real *lcpoParamI = lcpoParams[
lcpoType[pI][iIndex] ];
442 ri = probeRadius+lcpoParamI[0];
445 Real P1 = lcpoParamI[1];
446 Real P2 = lcpoParamI[2];
447 Real P3 = lcpoParamI[3];
448 Real P4 = lcpoParamI[4];
466 BigReal dAijdrijdxiAjkSum = 0.0;
467 BigReal dAijdrijdyiAjkSum = 0.0;
468 BigReal dAijdrijdziAjkSum = 0.0;
474 int numLcpoNeighbors;
475 lcpoNeighborList.
nextlist( &lcpoNeighbors, &numLcpoNeighbors );
477 for (
int j = 0; j < numLcpoNeighbors; j++) {
478 Real xj = lcpoNeighbors[j].
x;
479 Real yj = lcpoNeighbors[j].
y;
480 Real zj = lcpoNeighbors[j].
z;
481 Real rj = lcpoNeighbors[j].
r;
487 r2ij = dxij*dxij + dyij*dyij + dzij*dzij;
489 if (r2ij >= cut2 || r2ij < 0.01) {
continue; }
495 if ( r2ij >= rirj2 ) {
continue; }
508 BigReal dAijdrij =
PI*ri*(rij_1*rij_1*(ri*ri-rj*rj)-1);
509 BigReal dAijdrijdxj = dAijdrij*dxij*rij_1;
510 BigReal dAijdrijdyj = dAijdrij*dyij*rij_1;
511 BigReal dAijdrijdzj = dAijdrij*dzij*rij_1;
522 for (
int k = 0; k < numLcpoNeighbors; k++) {
523 Real xk = lcpoNeighbors[k].
x;
524 Real yk = lcpoNeighbors[k].
y;
525 Real zk = lcpoNeighbors[k].
z;
526 Real rk = lcpoNeighbors[k].
r;
532 r2ik = dxik*dxik + dyik*dyik + dzik*dzik;
534 if (r2ik >= cut2 || r2ik < 0.01) {
continue; }
540 r2jk = dxjk*dxjk + dyjk*dyjk + dzjk*dzjk;
542 if (r2jk >= cut2 || r2jk < 0.01) {
continue; }
548 if ( r2ik >= rirk2 ) {
continue; }
554 if ( r2jk >= rjrk2 ) {
continue; }
570 BigReal dAjkdrjk =
PI*rj*rjk_1*(rjk_1*rjk_1*(rj*rj-rk*rk) - 1.f);
571 BigReal dAjkdrjkdxj = -dAjkdrjk*dxjk;
572 BigReal dAjkdrjkdyj = -dAjkdrjk*dyjk;
573 BigReal dAjkdrjkdzj = -dAjkdrjk*dzjk;
574 lcpoNeighbors[k].
f->
x -= -dAjkdrjkdxj*(P3+P4*Aij)*surfTen;
575 lcpoNeighbors[k].
f->
y -= -dAjkdrjkdyj*(P3+P4*Aij)*surfTen;
576 lcpoNeighbors[k].
f->
z -= -dAjkdrjkdzj*(P3+P4*Aij)*surfTen;
578 dAjkdrjkdxjSum += dAjkdrjkdxj;
579 dAjkdrjkdyjSum += dAjkdrjkdyj;
580 dAjkdrjkdzjSum += dAjkdrjkdzj;
587 AijAjkSum += Aij*AjkjSum;
592 BigReal lastxj = dAijdrijdxj*AjkjSum + Aij*dAjkdrjkdxjSum;
593 BigReal lastyj = dAijdrijdyj*AjkjSum + Aij*dAjkdrjkdyjSum;
594 BigReal lastzj = dAijdrijdzj*AjkjSum + Aij*dAjkdrjkdzjSum;
595 BigReal dAidxj = (P2*dAijdrijdxj + P3*dAjkdrjkdxjSum + P4*lastxj);
596 BigReal dAidyj = (P2*dAijdrijdyj + P3*dAjkdrjkdyjSum + P4*lastyj);
597 BigReal dAidzj = (P2*dAijdrijdzj + P3*dAjkdrjkdzjSum + P4*lastzj);
598 lcpoNeighbors[j].
f->
x -= dAidxj*surfTen;
599 lcpoNeighbors[j].
f->
y -= dAidyj*surfTen;
600 lcpoNeighbors[j].
f->
z -= dAidzj*surfTen;
603 dAijdrijdxiSum -= dAijdrijdxj;
604 dAijdrijdyiSum -= dAijdrijdyj;
605 dAijdrijdziSum -= dAijdrijdzj;
606 dAijdrijdxiAjkSum -= dAijdrijdxj*AjkjSum;
607 dAijdrijdyiAjkSum -= dAijdrijdyj*AjkjSum;
608 dAijdrijdziAjkSum -= dAijdrijdzj*AjkjSum;
615 BigReal dAidxi = (P2*dAijdrijdxiSum + P4*dAijdrijdxiAjkSum);
616 BigReal dAidyi = (P2*dAijdrijdyiSum + P4*dAijdrijdyiAjkSum);
617 BigReal dAidzi = (P2*dAijdrijdziSum + P4*dAijdrijdziAjkSum);
625 BigReal SAi = P1*S1 + P2*AijSum + P3*AjkSum + P4*AijAjkSum;
628 totalSurfaceArea += SAi;
633 double t_stop = 1.0*clock()/CLOCKS_PER_SEC;
634 CkPrintf(
"LCPO_TIME_F %7.3f Gflops %9d @ %f\n", 1e-9*flops/(t_stop-t_start),flops, (t_stop-t_start));
650 const Real ComputeLCPO::lcpoParams[23][5] = {
651 { 0.00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00 },
652 { 1.70, 7.7887e-01, -2.8063e-01, -1.2968e-03, 3.9328e-04 },
653 { 1.70, 5.6482e-01, -1.9608e-01, -1.0219e-03, 2.6580e-04 },
654 { 1.70, 2.3348e-01, -7.2627e-02, -2.0079e-04, 7.9670e-05 },
655 { 1.70, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00 },
656 { 1.70, 5.1245e-01, -1.5966e-01, -1.9781e-04, 1.6392e-04 },
657 { 1.70, 7.0344e-02, -1.9015e-02, -2.2009e-05, 1.6875e-05 },
658 { 1.60, 7.7914e-01, -2.5262e-01, -1.6056e-03, 3.5071e-04 },
659 { 1.60, 4.9392e-01, -1.6038e-01, -1.5512e-04, 1.6453e-04 },
660 { 1.60, 6.8563e-01, -1.8680e-01, -1.3557e-03, 2.3743e-04 },
661 { 1.60, 8.8857e-01, -3.3421e-01, -1.8683e-03, 4.9372e-04 },
662 { 1.65, 7.8602e-02, -2.9198e-01, -6.5370e-04, 3.6247e-04 },
663 { 1.65, 2.2599e-01, -3.6648e-02, -1.2297e-03, 8.0038e-05 },
664 { 1.65, 5.1481e-02, -1.2603e-02, -3.2006e-04, 2.4774e-05 },
665 { 1.65, 7.3511e-01, -2.2116e-01, -8.9148e-04, 2.5230e-04 },
666 { 1.65, 4.1102e-01, -1.2254e-01, -7.5448e-05, 1.1804e-04 },
667 { 1.65, 6.2577e-02, -1.7874e-02, -8.3120e-05, 1.9849e-05 },
668 { 1.90, 7.7220e-01, -2.6393e-01, 1.0629e-03, 2.1790e-04 },
669 { 1.90, 5.4581e-01, -1.9477e-01, -1.2873e-03, 2.9247e-04 },
670 { 1.90, 3.8650e-01, -1.8249e-01, -3.6598e-03, 4.2640e-04 },
671 { 1.90, 3.8730e-02, -8.9339e-03, 8.3582e-06, 3.0381e-06 },
672 { 1.80, 9.8318e-01, -4.0437e-01, 1.1249e-04, 4.9901e-04 },
673 { 1.18, 4.9392e-01, -1.6038e-01, -1.5512e-04, 1.6453e-04 }
void setNumPatches(int n)
#define PROXY_RESULTS_PRIORITY
unsigned int hydrogenGroupSize
Box< Patch, int > * lcpoTypeBox[8]
int gridsize_c(void) const
static PatchMap * Object()
BigReal max_c(int pid) const
BigReal min_a(int pid) const
SimParameters * simParameters
int index_a(int pid) const
void unregisterForceDeposit(Compute *cid, Box< Patch, Results > **const box)
void startWork(const LDObjHandle &handle)
virtual void initialize()
SubmitReduction * willSubmit(int setID, int size=-1)
static ReductionMgr * Object(void)
Box< Patch, int > * registerLcpoTypePickup(Compute *cid)
Patch * patch(PatchID pid)
BigReal min_b(int pid) const
Box< Patch, Results > * forceBox[8]
virtual void atomUpdate()
void nextlist(plint **list, int *list_size)
int gridsize_a(void) const
virtual void initialize()
void skipWork(const LDObjHandle &handle)
int index_b(int pid) const
void newsize(int list_size)
Box< Patch, CompAtom > * positionBox[8]
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int numPatches
static LdbCoordinator * Object()
LCPOAtom * newlist(int max_size)
BigReal max_b(int pid) const
int index_c(int pid) const
ComputeLCPO(ComputeID c, PatchID pid[], int t[], ComputeNonbondedWorkArrays *_workArrays, int minPartition, int maxPartition, int numPartitions, int numPatches)
void unregisterPositionPickup(Compute *cid, Box< Patch, CompAtom > **const box)
BigReal max_a(int pid) const
void endWork(const LDObjHandle &handle)
BigReal calcOverlap(BigReal r, Real ri, Real rj)
void newsize(int list_size)
BigReal min_c(int pid) const
void nextlist(LCPOAtom **list, int *list_size)
plint * newlist(int max_size)
SubmitReduction * reduction
int gridsize_b(void) const
void unregisterLcpoTypePickup(Compute *cid, Box< Patch, int > **const box)
void close(Data **const t)
Box< Patch, CompAtom > * registerPositionPickup(Compute *cid)
#define PATCH_PRIORITY(PID)
CompAtomExt * getCompAtomExtInfo()
Box< Patch, Results > * registerForceDeposit(Compute *cid)