4 #include <cuda_runtime.h> 7 #include <hip/hip_runtime.h> 21 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 29 patches[i].patchID = pids[i];
43 patches[0].patchID = pid;
55 if (patches[i].positionBox != NULL) {
58 if (patches[i].avgPositionBox != NULL) {
61 if (patches[i].forceBox != NULL) {
73 lock = CmiCreateLock();
77 if (
simParams->lesOn)
NAMD_bug(
"ComputePmeCUDA::ComputePmeCUDA, lesOn not yet implemented");
78 if (
simParams->pairInteractionOn)
NAMD_bug(
"ComputePmeCUDA::ComputePmeCUDA, pairInteractionOn not yet implemented");
80 sendAtomsDone =
false;
82 #ifdef NODEGROUP_FORCE_REGISTER 83 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
84 PatchData *patchData = cpdata.ckLocalBranch();
90 computePmeCUDAMgrProxy = CkpvAccess(BOCclass_group).computePmeCUDAMgr;
91 mgr = computePmeCUDAMgrProxy.ckLocalBranch();
93 NAMD_bug(
"ComputePmeCUDA::ComputePmeCUDA, unable to locate local branch of BOC entry computePmeCUDAMgr");
97 if (patches[i].positionBox != NULL || patches[i].avgPositionBox != NULL
98 || patches[i].forceBox != NULL || patches[i].patch != NULL)
99 NAMD_bug(
"ComputePmeCUDA::initialize() called twice or boxes not set to NULL");
101 NAMD_bug(
"ComputePmeCUDA::initialize() patch not found");
103 patches[i].positionBox = patches[i].patch->registerPositionPickup(
this);
104 patches[i].forceBox = patches[i].patch->registerForceDeposit(
this);
105 patches[i].avgPositionBox = patches[i].patch->registerAvgPositionPickup(
this);
108 setupActivePencils();
118 void ComputePmeCUDA::setupActivePencils() {
126 patches[i].homePencilY = homey;
127 patches[i].homePencilZ = homez;
128 patches[i].homePencilNode = mgr->
getNode(homey,homez);
132 computePmeCUDAMgrProxy[patches[i].homePencilNode].registerPatch(msg);
141 if (patches[0].patch->flags.doFullElectrostatics)
return 0;
146 patches[i].positionBox->skip();
147 patches[i].forceBox->skip();
149 if (patches[i].patchID == 0) computePmeCUDAMgrProxy[patches[i].homePencilNode].skip();
161 sendAtomsDone =
false;
165 sendAtomsDone =
true;
172 void ComputePmeCUDA::sendAtoms() {
174 Lattice& lattice = patches[0].patch->lattice;
179 double ox = origin.
x;
180 double oy = origin.
y;
181 double oz = origin.
z;
182 double r1x = recip1.
x;
183 double r1y = recip1.
y;
184 double r1z = recip1.
z;
185 double r2x = recip2.
x;
186 double r2y = recip2.
y;
187 double r2z = recip2.
z;
188 double r3x = recip3.
x;
189 double r3y = recip3.
y;
190 double r3z = recip3.
z;
194 if (patches[i].pmeForceMsg != NULL)
195 NAMD_bug(
"ComputePmeCUDA::sendAtoms, pmeForceMsg is not empty");
200 bool doMolly = patches[i].patch->flags.doMolly;
201 bool doEnergy = patches[i].patch->flags.doEnergy;
202 bool doVirial = patches[i].patch->flags.doVirial;
210 CompAtom *x = patches[i].positionBox->open();
212 patches[i].positionBox->close(&x);
213 x = patches[i].avgPositionBox->open();
216 int numAtoms = patches[i].patch->getNumAtoms();
222 const bool isFirstStep = (patches[0].patch->flags.step ==
simParams->firstTimestep) || (selfEnergy == 0);
225 calcSelfEnergyFEP(numAtoms, x, isFirstStep);
228 calcSelfEnergyTI(numAtoms, x, isFirstStep);
230 calcSelfEnergy(numAtoms, x, isFirstStep);
241 const int alchGrid =
simParams->alchOn ? 1 : 0;
242 const int alchDecoupleGrid =
simParams->alchDecouple ? 1: 0;
243 const int alchSoftCoreOrTI = (
simParams->alchElecLambdaStart > 0 ||
simParams->alchThermIntOn) ? 1 : 0;
244 msg =
new (numAtoms, alchGrid * numAtoms, alchGrid * numAtoms,
245 alchDecoupleGrid * numAtoms, alchDecoupleGrid * numAtoms,
256 msg->
i = patches[i].homePencilY;
257 msg->
j = patches[i].homePencilZ;
272 for (
int j=0;j < numAtoms;j++) {
274 float factor1 = 1.0f;
275 float factor2 = 1.0f;
276 float factor3 = 1.0f;
277 float factor4 = 1.0f;
278 float factor5 = 1.0f;
285 double wx = px*r1x + py*r1y + pz*r1z;
286 double wy = px*r2x + py*r2y + pz*r2z;
287 double wz = px*r3x + py*r3y + pz*r3z;
291 wx = (wx - (floor(wx + 0.5) - 0.5));
292 wy = (wy - (floor(wy + 0.5) - 0.5));
293 wz = (wz - (floor(wz + 0.5) - 0.5));
303 if (atom.x >= 1.0f) atom.x -= 1.0f;
304 if (atom.y >= 1.0f) atom.y -= 1.0f;
305 if (atom.z >= 1.0f) atom.z -= 1.0f;
306 atom.q = (float)(q*coulomb_sqrt);
347 default:
NAMD_bug(
"Invalid partition number");
break;
350 atomChargeFactors1[j] = factor1;
351 atomChargeFactors2[j] = factor2;
353 atomChargeFactors3[j] = factor3;
354 atomChargeFactors4[j] = factor4;
357 atomChargeFactors5[j] = factor5;
364 atom.x = (float) x[j].position.x;
366 atom.z = (float) x[j].position.z;
367 atom.q = x[j].charge;
370 #if defined(NTESTPID) 371 if (NTESTPID == patches[i].patch->getPatchID()) {
374 sprintf(fname,
"pme_xyzq_soa_pid%d_step%d.bin", NTESTPID,
375 patches[i].patch->flags.step);
376 sprintf(remark,
"SOA PME xyzq, patch %d, step %d", NTESTPID,
377 patches[i].patch->flags.step);
378 TestArray_write<float>(fname, remark, (
float *) atoms, 4*numAtoms);
389 if (patches[i].homePencilNode == CkMyNode()) {
392 computePmeCUDAMgrProxy[patches[i].homePencilNode].recvAtoms(msg);
396 patches[i].avgPositionBox->close(&x);
398 patches[i].positionBox->close(&x);
415 void ComputePmeCUDA::calcSelfEnergy(
int numAtoms,
CompAtom *x,
bool isFirstStep) {
417 bool constantAtomicCharges =
true;
418 if (isFirstStep || !constantAtomicCharges) {
420 for (
int i=0;i < numAtoms;i++) {
429 void ComputePmeCUDA::calcSelfEnergyFEP(
int numAtoms,
CompAtom *atoms,
bool isFirstStep) {
432 NAMD_bug(
"Called calcSelfEnergyFEP() in non-FEP code!");
435 bool constantAtomicCharges =
true;
436 BigReal scaleLambda1, scaleLambda2;
437 const BigReal alchLambda1 =
simParams->getCurrentLambda(patches[0].patch->flags.step);
438 const BigReal alchLambda2 =
simParams->getCurrentLambda2(patches[0].patch->flags.step);
439 static thread_local
BigReal lambda1Up =
simParams->getElecLambda(alchLambda1);
440 static thread_local
BigReal lambda2Up =
simParams->getElecLambda(alchLambda2);
441 static thread_local
BigReal lambda1Down =
simParams->getElecLambda(1.0 - alchLambda1);
442 static thread_local
BigReal lambda2Down =
simParams->getElecLambda(1.0 - alchLambda2);
443 if (isFirstStep || !constantAtomicCharges ||
444 lambda1Up !=
simParams->getElecLambda(alchLambda1) ||
445 lambda2Up !=
simParams->getElecLambda(alchLambda2) ||
446 lambda1Down !=
simParams->getElecLambda(1.0 - alchLambda1) ||
447 lambda2Down !=
simParams->getElecLambda(1.0 - alchLambda2))
449 lambda1Up =
simParams->getElecLambda(alchLambda1);
450 lambda2Up =
simParams->getElecLambda(alchLambda2);
451 lambda1Down =
simParams->getElecLambda(1.0 - alchLambda1);
452 lambda2Down =
simParams->getElecLambda(1.0 - alchLambda2);
455 for (
int i = 0; i < numAtoms; ++i) {
463 scaleLambda1 = lambda1Up;
464 scaleLambda2 = lambda2Up;
468 scaleLambda1 = lambda1Down;
469 scaleLambda2 = lambda2Down;
472 default:
NAMD_bug(
"Invalid partition number");
474 selfEnergy += scaleLambda1 * (atoms[i].
charge * atoms[i].
charge);
475 selfEnergyFEP += scaleLambda2 * (atoms[i].
charge * atoms[i].
charge);
477 selfEnergy += (1.0 - scaleLambda1) * (atoms[i].charge * atoms[i].charge);
478 selfEnergyFEP += (1.0 - scaleLambda2) * (atoms[i].charge * atoms[i].charge);
488 void ComputePmeCUDA::calcSelfEnergyTI(
int numAtoms,
CompAtom *atoms,
bool isFirstStep) {
491 NAMD_bug(
"Called calcSelfEnergyTI() in non-FEP code!");
494 bool constantAtomicCharges =
true;
495 const BigReal alchLambda1 =
simParams->getCurrentLambda(patches[0].patch->flags.step);
496 static thread_local
BigReal lambda1Up =
simParams->getElecLambda(alchLambda1);
497 static thread_local
BigReal lambda1Down =
simParams->getElecLambda(1.0 - alchLambda1);
498 if (isFirstStep || !constantAtomicCharges ||
499 lambda1Up !=
simParams->getElecLambda(alchLambda1) ||
500 lambda1Down !=
simParams->getElecLambda(1.0 - alchLambda1))
502 lambda1Up =
simParams->getElecLambda(alchLambda1);
503 lambda1Down =
simParams->getElecLambda(1.0 - alchLambda1);
508 double factor_ti_1 = 0.0;
509 double factor_ti_2 = 0.0;
510 for (
int i = 0; i < numAtoms; ++i) {
519 elecLambda1 = lambda1Up;
525 elecLambda1 = lambda1Down;
530 default:
NAMD_bug(
"Invalid partition number");
532 selfEnergy += elecLambda1 * (atoms[i].
charge * atoms[i].
charge);
533 selfEnergyTI1 += factor_ti_1 * (atoms[i].
charge * atoms[i].
charge);
534 selfEnergyTI2 += factor_ti_2 * (atoms[i].
charge * atoms[i].
charge);
536 selfEnergy += (1.0 - elecLambda1) * (atoms[i].charge * atoms[i].charge);
537 selfEnergyTI1 -= factor_ti_1 * (atoms[i].
charge * atoms[i].
charge);
538 selfEnergyTI2 -= factor_ti_2 * (atoms[i].
charge * atoms[i].
charge);
547 void ComputePmeCUDA::recvForces() {
549 Lattice& lattice = patches[0].patch->lattice;
554 double r1x = recip1.
x;
555 double r1y = recip1.
y;
556 double r1z = recip1.
z;
557 double r2x = recip2.
x;
558 double r2y = recip2.
y;
559 double r2z = recip2.
z;
560 double r3x = recip3.
x;
561 double r3y = recip3.
y;
562 double r3z = recip3.
z;
566 double alchLambda1, lambda1Up, lambda1Down;
567 double lambda3Up, lambda3Down;
569 alchLambda1 =
simParams->getCurrentLambda(patches[0].patch->flags.step);
570 lambda1Up =
simParams->getElecLambda(alchLambda1);
571 lambda1Down =
simParams->getElecLambda(1 - alchLambda1);
573 lambda3Up = 1 - lambda1Up;
574 lambda3Down = 1 - lambda1Down;
580 if (patches[i].pmeForceMsg == NULL)
581 NAMD_bug(
"ComputePmeCUDA::recvForces, no message in pmeForceMsg");
583 const CudaForce* force = patches[i].pmeForceMsg->force;
584 const CudaForce* force2 = patches[i].pmeForceMsg->force2;
585 const CudaForce* force3 = patches[i].pmeForceMsg->force3;
586 const CudaForce* force4 = patches[i].pmeForceMsg->force4;
587 const CudaForce* force5 = patches[i].pmeForceMsg->force5;
588 Results *r = patches[i].forceBox->open();
589 int numAtoms = patches[i].pmeForceMsg->numAtoms;
592 if (patches[i].patchID == TESTPID) {
593 fprintf(stderr,
"Storing scaled PME CudaForce array for patch %d\n",
595 TestArray_write<float>(
"scaled_pme_force_good.bin",
596 "scaled PME CudaForce good",
597 (
float *) force, 3*numAtoms);
598 TestArray_write<double>(
"lattice_good.bin",
"Lattice good",
599 (
double *) &lattice, 3*7);
602 #if defined(NTESTPID) 603 if (NTESTPID == patches[i].patch->getPatchID()) {
606 sprintf(fname,
"pme_fxyz_soa_pid%d_step%d.bin", NTESTPID,
607 patches[i].patch->flags.step);
608 sprintf(remark,
"SOA PME fxyz, patch %d, step %d", NTESTPID,
609 patches[i].patch->flags.step);
610 TestArray_write<float>(fname, remark, (
float *) force, 3*numAtoms);
613 if (!patches[i].pmeForceMsg->numStrayAtoms && !
simParams->commOnly) {
614 for(
int j=0;j < numAtoms;j++) {
619 f1 += force2[j].
x * lambda1Down + force[j].
x * lambda1Up;
620 f2 += force2[j].
y * lambda1Down + force[j].
y * lambda1Up;
621 f3 += force2[j].
z * lambda1Down + force[j].
z * lambda1Up;
623 f1 += force3[j].
x * lambda3Up + force4[j].
x * lambda3Down;
624 f2 += force3[j].
y * lambda3Up + force4[j].
y * lambda3Down;
625 f3 += force3[j].
z * lambda3Up + force4[j].
z * lambda3Down;
628 f1 += force5[j].
x * (lambda1Up + lambda1Down - 1.0) * (-1.0);
629 f2 += force5[j].
y * (lambda1Up + lambda1Down - 1.0) * (-1.0);
630 f3 += force5[j].
z * (lambda1Up + lambda1Down - 1.0) * (-1.0);
637 f[j].x += f1*r1x + f2*r2x + f3*r3x;
638 f[j].y += f1*r1y + f2*r2y + f3*r3y;
639 f[j].z += f1*r1z + f2*r2z + f3*r3z;
644 if (patches[i].patchID == TESTPID) {
645 fprintf(stderr,
"Storing slow force array for patch %d\n",
647 TestArray_write<double>(
"pme_force_good.bin",
"PME force good",
648 (
double *) f, 3*numAtoms);
652 patches[i].forceBox->close(&r);
653 delete patches[i].pmeForceMsg;
654 patches[i].pmeForceMsg = NULL;
665 if (patchCounter == 0) {
670 if (patches[i].pmeForceMsg != NULL)
671 NAMD_bug(
"ComputePmeCUDA::storePmeForceMsg, already contains message");
672 patches[i].pmeForceMsg = msg;
void setNumPatches(int n)
#define NAMD_EVENT_STOP(eon, id)
void unregisterAvgPositionPickup(Compute *cid, Box< Patch, CompAtom > **const box)
static BigReal dielectric_1
static void partition(int *order, const FullAtom *atoms, int begin, int end)
static PatchMap * Object()
void getHomePencil(PatchID patchID, int &homey, int &homez)
virtual void submit(void)=0
SimParameters * simParameters
void unregisterForceDeposit(Compute *cid, Box< Patch, Results > **const box)
void recvAtoms(PmeAtomMsg *msg)
SubmitReduction * willSubmit(int setID, int size=-1)
static ReductionMgr * Object(void)
Patch * patch(PatchID pid)
ComputePmeCUDA(ComputeID c, PatchIDList &pids)
bool storePmeForceMsg(PmeForceMsg *msg)
#define NAMD_EVENT_START(eon, id)
void NAMD_bug(const char *err_msg)
NAMD_HOST_DEVICE Vector a_r() const
NAMD_HOST_DEVICE Vector b_r() const
NAMD_HOST_DEVICE Vector c_r() const
virtual ~ComputePmeCUDA()
void unregisterPositionPickup(Compute *cid, Box< Patch, CompAtom > **const box)
int getNode(int i, int j)
#define SET_PRIORITY(MSG, SEQ, PRIO)
NAMD_HOST_DEVICE Vector origin() const