4 #include <cuda_runtime.h> 7 #include <hip/hip_runtime.h> 21 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 29 patches[i].patchID = pids[i];
43 patches[0].patchID = pid;
55 if (patches[i].positionBox != NULL) {
58 if (patches[i].avgPositionBox != NULL) {
61 if (patches[i].forceBox != NULL) {
73 lock = CmiCreateLock();
77 if (
simParams->lesOn)
NAMD_bug(
"ComputePmeCUDA::ComputePmeCUDA, lesOn not yet implemented");
78 if (
simParams->pairInteractionOn)
NAMD_bug(
"ComputePmeCUDA::ComputePmeCUDA, pairInteractionOn not yet implemented");
80 sendAtomsDone =
false;
82 #ifdef NODEGROUP_FORCE_REGISTER 83 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
84 PatchData *patchData = cpdata.ckLocalBranch();
91 computePmeCUDAMgrProxy = CkpvAccess(BOCclass_group).computePmeCUDAMgr;
92 mgr = computePmeCUDAMgrProxy.ckLocalBranch();
94 NAMD_bug(
"ComputePmeCUDA::ComputePmeCUDA, unable to locate local branch of BOC entry computePmeCUDAMgr");
98 if (patches[i].positionBox != NULL || patches[i].avgPositionBox != NULL
99 || patches[i].forceBox != NULL || patches[i].patch != NULL)
100 NAMD_bug(
"ComputePmeCUDA::initialize() called twice or boxes not set to NULL");
102 NAMD_bug(
"ComputePmeCUDA::initialize() patch not found");
104 patches[i].positionBox = patches[i].patch->registerPositionPickup(
this);
105 patches[i].forceBox = patches[i].patch->registerForceDeposit(
this);
106 patches[i].avgPositionBox = patches[i].patch->registerAvgPositionPickup(
this);
109 setupActivePencils();
119 void ComputePmeCUDA::setupActivePencils() {
127 patches[i].homePencilY = homey;
128 patches[i].homePencilZ = homez;
129 patches[i].homePencilNode = mgr->
getNode(homey,homez);
133 computePmeCUDAMgrProxy[patches[i].homePencilNode].registerPatch(msg);
142 if (patches[0].patch->flags.doFullElectrostatics)
return 0;
147 patches[i].positionBox->skip();
148 patches[i].forceBox->skip();
150 if (patches[i].patchID == 0) computePmeCUDAMgrProxy[patches[i].homePencilNode].skip();
162 sendAtomsDone =
false;
166 sendAtomsDone =
true;
173 void ComputePmeCUDA::sendAtoms() {
175 Lattice& lattice = patches[0].patch->lattice;
180 double ox = origin.
x;
181 double oy = origin.
y;
182 double oz = origin.
z;
183 double r1x = recip1.
x;
184 double r1y = recip1.
y;
185 double r1z = recip1.
z;
186 double r2x = recip2.
x;
187 double r2y = recip2.
y;
188 double r2z = recip2.
z;
189 double r3x = recip3.
x;
190 double r3y = recip3.
y;
191 double r3z = recip3.
z;
195 if (patches[i].pmeForceMsg != NULL)
196 NAMD_bug(
"ComputePmeCUDA::sendAtoms, pmeForceMsg is not empty");
201 bool doMolly = patches[i].patch->flags.doMolly;
202 bool doEnergy = patches[i].patch->flags.doEnergy;
203 bool doVirial = patches[i].patch->flags.doVirial;
211 CompAtom *x = patches[i].positionBox->open();
213 patches[i].positionBox->close(&x);
214 x = patches[i].avgPositionBox->open();
217 int numAtoms = patches[i].patch->getNumAtoms();
223 const bool isFirstStep = (patches[0].patch->flags.step ==
simParams->firstTimestep) || (selfEnergy == 0);
226 calcSelfEnergyFEP(numAtoms, x, isFirstStep);
229 calcSelfEnergyTI(numAtoms, x, isFirstStep);
231 calcSelfEnergy(numAtoms, x, isFirstStep);
242 const int alchGrid =
simParams->alchOn ? 1 : 0;
243 const int alchDecoupleGrid =
simParams->alchDecouple ? 1: 0;
244 const int alchSoftCoreOrTI = (
simParams->alchElecLambdaStart > 0 ||
simParams->alchThermIntOn) ? 1 : 0;
245 msg =
new (numAtoms, alchGrid * numAtoms, alchGrid * numAtoms,
246 alchDecoupleGrid * numAtoms, alchDecoupleGrid * numAtoms,
257 msg->
i = patches[i].homePencilY;
258 msg->
j = patches[i].homePencilZ;
273 for (
int j=0;j < numAtoms;j++) {
275 float factor1 = 1.0f;
276 float factor2 = 1.0f;
277 float factor3 = 1.0f;
278 float factor4 = 1.0f;
279 float factor5 = 1.0f;
286 double wx = px*r1x + py*r1y + pz*r1z;
287 double wy = px*r2x + py*r2y + pz*r2z;
288 double wz = px*r3x + py*r3y + pz*r3z;
292 wx = (wx - (floor(wx + 0.5) - 0.5));
293 wy = (wy - (floor(wy + 0.5) - 0.5));
294 wz = (wz - (floor(wz + 0.5) - 0.5));
304 if (atom.x >= 1.0f) atom.x -= 1.0f;
305 if (atom.y >= 1.0f) atom.y -= 1.0f;
306 if (atom.z >= 1.0f) atom.z -= 1.0f;
307 atom.q = (float)(q*coulomb_sqrt);
348 default:
NAMD_bug(
"Invalid partition number");
break;
351 atomChargeFactors1[j] = factor1;
352 atomChargeFactors2[j] = factor2;
354 atomChargeFactors3[j] = factor3;
355 atomChargeFactors4[j] = factor4;
358 atomChargeFactors5[j] = factor5;
365 atom.x = (float) x[j].position.x;
367 atom.z = (float) x[j].position.z;
368 atom.q = x[j].charge;
371 #if defined(NTESTPID) 372 if (NTESTPID == patches[i].patch->getPatchID()) {
375 sprintf(fname,
"pme_xyzq_soa_pid%d_step%d.bin", NTESTPID,
376 patches[i].patch->flags.step);
377 sprintf(remark,
"SOA PME xyzq, patch %d, step %d", NTESTPID,
378 patches[i].patch->flags.step);
379 TestArray_write<float>(fname, remark, (
float *) atoms, 4*numAtoms);
390 if (patches[i].homePencilNode == CkMyNode()) {
393 computePmeCUDAMgrProxy[patches[i].homePencilNode].recvAtoms(msg);
397 patches[i].avgPositionBox->close(&x);
399 patches[i].positionBox->close(&x);
402 #ifdef NODEGROUP_FORCE_REGISTER 427 void ComputePmeCUDA::calcSelfEnergy(
int numAtoms,
CompAtom *x,
bool isFirstStep) {
429 bool constantAtomicCharges =
true;
430 if (isFirstStep || !constantAtomicCharges) {
432 for (
int i=0;i < numAtoms;i++) {
441 void ComputePmeCUDA::calcSelfEnergyFEP(
int numAtoms,
CompAtom *atoms,
bool isFirstStep) {
444 NAMD_bug(
"Called calcSelfEnergyFEP() in non-FEP code!");
447 bool constantAtomicCharges =
true;
448 BigReal scaleLambda1, scaleLambda2;
449 const BigReal alchLambda1 =
simParams->getCurrentLambda(patches[0].patch->flags.step);
450 const BigReal alchLambda2 =
simParams->getCurrentLambda2(patches[0].patch->flags.step);
451 static thread_local
BigReal lambda1Up =
simParams->getElecLambda(alchLambda1);
452 static thread_local
BigReal lambda2Up =
simParams->getElecLambda(alchLambda2);
453 static thread_local
BigReal lambda1Down =
simParams->getElecLambda(1.0 - alchLambda1);
454 static thread_local
BigReal lambda2Down =
simParams->getElecLambda(1.0 - alchLambda2);
455 if (isFirstStep || !constantAtomicCharges ||
456 lambda1Up !=
simParams->getElecLambda(alchLambda1) ||
457 lambda2Up !=
simParams->getElecLambda(alchLambda2) ||
458 lambda1Down !=
simParams->getElecLambda(1.0 - alchLambda1) ||
459 lambda2Down !=
simParams->getElecLambda(1.0 - alchLambda2))
461 lambda1Up =
simParams->getElecLambda(alchLambda1);
462 lambda2Up =
simParams->getElecLambda(alchLambda2);
463 lambda1Down =
simParams->getElecLambda(1.0 - alchLambda1);
464 lambda2Down =
simParams->getElecLambda(1.0 - alchLambda2);
467 for (
int i = 0; i < numAtoms; ++i) {
475 scaleLambda1 = lambda1Up;
476 scaleLambda2 = lambda2Up;
480 scaleLambda1 = lambda1Down;
481 scaleLambda2 = lambda2Down;
484 default:
NAMD_bug(
"Invalid partition number");
486 selfEnergy += scaleLambda1 * (atoms[i].
charge * atoms[i].
charge);
487 selfEnergyFEP += scaleLambda2 * (atoms[i].
charge * atoms[i].
charge);
489 selfEnergy += (1.0 - scaleLambda1) * (atoms[i].charge * atoms[i].charge);
490 selfEnergyFEP += (1.0 - scaleLambda2) * (atoms[i].charge * atoms[i].charge);
500 void ComputePmeCUDA::calcSelfEnergyTI(
int numAtoms,
CompAtom *atoms,
bool isFirstStep) {
503 NAMD_bug(
"Called calcSelfEnergyTI() in non-FEP code!");
506 bool constantAtomicCharges =
true;
507 const BigReal alchLambda1 =
simParams->getCurrentLambda(patches[0].patch->flags.step);
508 static thread_local
BigReal lambda1Up =
simParams->getElecLambda(alchLambda1);
509 static thread_local
BigReal lambda1Down =
simParams->getElecLambda(1.0 - alchLambda1);
510 if (isFirstStep || !constantAtomicCharges ||
511 lambda1Up !=
simParams->getElecLambda(alchLambda1) ||
512 lambda1Down !=
simParams->getElecLambda(1.0 - alchLambda1))
514 lambda1Up =
simParams->getElecLambda(alchLambda1);
515 lambda1Down =
simParams->getElecLambda(1.0 - alchLambda1);
520 double factor_ti_1 = 0.0;
521 double factor_ti_2 = 0.0;
522 for (
int i = 0; i < numAtoms; ++i) {
531 elecLambda1 = lambda1Up;
537 elecLambda1 = lambda1Down;
542 default:
NAMD_bug(
"Invalid partition number");
544 selfEnergy += elecLambda1 * (atoms[i].
charge * atoms[i].
charge);
545 selfEnergyTI1 += factor_ti_1 * (atoms[i].
charge * atoms[i].
charge);
546 selfEnergyTI2 += factor_ti_2 * (atoms[i].
charge * atoms[i].
charge);
548 selfEnergy += (1.0 - elecLambda1) * (atoms[i].charge * atoms[i].charge);
549 selfEnergyTI1 -= factor_ti_1 * (atoms[i].
charge * atoms[i].
charge);
550 selfEnergyTI2 -= factor_ti_2 * (atoms[i].
charge * atoms[i].
charge);
559 void ComputePmeCUDA::recvForces() {
561 Lattice& lattice = patches[0].patch->lattice;
566 double r1x = recip1.
x;
567 double r1y = recip1.
y;
568 double r1z = recip1.
z;
569 double r2x = recip2.
x;
570 double r2y = recip2.
y;
571 double r2z = recip2.
z;
572 double r3x = recip3.
x;
573 double r3y = recip3.
y;
574 double r3z = recip3.
z;
578 double alchLambda1, lambda1Up, lambda1Down;
579 double lambda3Up, lambda3Down;
581 alchLambda1 =
simParams->getCurrentLambda(patches[0].patch->flags.step);
582 lambda1Up =
simParams->getElecLambda(alchLambda1);
583 lambda1Down =
simParams->getElecLambda(1 - alchLambda1);
585 lambda3Up = 1 - lambda1Up;
586 lambda3Down = 1 - lambda1Down;
592 if (patches[i].pmeForceMsg == NULL)
593 NAMD_bug(
"ComputePmeCUDA::recvForces, no message in pmeForceMsg");
595 const CudaForce* force = patches[i].pmeForceMsg->force;
596 const CudaForce* force2 = patches[i].pmeForceMsg->force2;
597 const CudaForce* force3 = patches[i].pmeForceMsg->force3;
598 const CudaForce* force4 = patches[i].pmeForceMsg->force4;
599 const CudaForce* force5 = patches[i].pmeForceMsg->force5;
600 Results *r = patches[i].forceBox->open();
601 int numAtoms = patches[i].pmeForceMsg->numAtoms;
604 if (patches[i].patchID == TESTPID) {
605 fprintf(stderr,
"Storing scaled PME CudaForce array for patch %d\n",
607 TestArray_write<float>(
"scaled_pme_force_good.bin",
608 "scaled PME CudaForce good",
609 (
float *) force, 3*numAtoms);
610 TestArray_write<double>(
"lattice_good.bin",
"Lattice good",
611 (
double *) &lattice, 3*7);
614 #if defined(NTESTPID) 615 if (NTESTPID == patches[i].patch->getPatchID()) {
618 sprintf(fname,
"pme_fxyz_soa_pid%d_step%d.bin", NTESTPID,
619 patches[i].patch->flags.step);
620 sprintf(remark,
"SOA PME fxyz, patch %d, step %d", NTESTPID,
621 patches[i].patch->flags.step);
622 TestArray_write<float>(fname, remark, (
float *) force, 3*numAtoms);
625 if (!patches[i].pmeForceMsg->numStrayAtoms && !
simParams->commOnly) {
626 for(
int j=0;j < numAtoms;j++) {
631 f1 += force2[j].
x * lambda1Down + force[j].
x * lambda1Up;
632 f2 += force2[j].
y * lambda1Down + force[j].
y * lambda1Up;
633 f3 += force2[j].
z * lambda1Down + force[j].
z * lambda1Up;
635 f1 += force3[j].
x * lambda3Up + force4[j].
x * lambda3Down;
636 f2 += force3[j].
y * lambda3Up + force4[j].
y * lambda3Down;
637 f3 += force3[j].
z * lambda3Up + force4[j].
z * lambda3Down;
640 f1 += force5[j].
x * (lambda1Up + lambda1Down - 1.0) * (-1.0);
641 f2 += force5[j].
y * (lambda1Up + lambda1Down - 1.0) * (-1.0);
642 f3 += force5[j].
z * (lambda1Up + lambda1Down - 1.0) * (-1.0);
649 f[j].x += f1*r1x + f2*r2x + f3*r3x;
650 f[j].y += f1*r1y + f2*r2y + f3*r3y;
651 f[j].z += f1*r1z + f2*r2z + f3*r3z;
656 if (patches[i].patchID == TESTPID) {
657 fprintf(stderr,
"Storing slow force array for patch %d\n",
659 TestArray_write<double>(
"pme_force_good.bin",
"PME force good",
660 (
double *) f, 3*numAtoms);
664 patches[i].forceBox->close(&r);
665 delete patches[i].pmeForceMsg;
666 patches[i].pmeForceMsg = NULL;
677 if (patchCounter == 0) {
682 if (patches[i].pmeForceMsg != NULL)
683 NAMD_bug(
"ComputePmeCUDA::storePmeForceMsg, already contains message");
684 patches[i].pmeForceMsg = msg;
void setNumPatches(int n)
#define NAMD_EVENT_STOP(eon, id)
void unregisterAvgPositionPickup(Compute *cid, Box< Patch, CompAtom > **const box)
static BigReal dielectric_1
static void partition(int *order, const FullAtom *atoms, int begin, int end)
static PatchMap * Object()
void getHomePencil(PatchID patchID, int &homey, int &homez)
SimParameters * simParameters
void unregisterForceDeposit(Compute *cid, Box< Patch, Results > **const box)
void recvAtoms(PmeAtomMsg *msg)
SubmitReduction * willSubmit(int setID, int size=-1)
static ReductionMgr * Object(void)
NodeReduction * reduction
Patch * patch(PatchID pid)
ComputePmeCUDA(ComputeID c, PatchIDList &pids)
bool storePmeForceMsg(PmeForceMsg *msg)
#define NAMD_EVENT_START(eon, id)
void NAMD_bug(const char *err_msg)
NAMD_HOST_DEVICE Vector a_r() const
NAMD_HOST_DEVICE Vector b_r() const
NAMD_HOST_DEVICE Vector c_r() const
virtual ~ComputePmeCUDA()
void unregisterPositionPickup(Compute *cid, Box< Patch, CompAtom > **const box)
int getNode(int i, int j)
#define SET_PRIORITY(MSG, SEQ, PRIO)
NAMD_HOST_DEVICE Vector origin() const