4 #include <cuda_runtime.h>     7 #include <hip/hip_runtime.h>    21 #if defined(NAMD_CUDA) || defined(NAMD_HIP)    29     patches[i].patchID = pids[i];
    43   patches[0].patchID = pid;
    55         if (patches[i].positionBox != NULL) {
    58     if (patches[i].avgPositionBox != NULL) {
    61     if (patches[i].forceBox != NULL) {
    73   lock = CmiCreateLock();
    77   if (
simParams->lesOn) 
NAMD_bug(
"ComputePmeCUDA::ComputePmeCUDA, lesOn not yet implemented");
    78   if (
simParams->pairInteractionOn) 
NAMD_bug(
"ComputePmeCUDA::ComputePmeCUDA, pairInteractionOn not yet implemented");
    80   sendAtomsDone = 
false;
    82 #ifdef NODEGROUP_FORCE_REGISTER    83   CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
    84   PatchData *patchData = cpdata.ckLocalBranch();
    90   computePmeCUDAMgrProxy = CkpvAccess(BOCclass_group).computePmeCUDAMgr;
    91   mgr = computePmeCUDAMgrProxy.ckLocalBranch();
    93     NAMD_bug(
"ComputePmeCUDA::ComputePmeCUDA, unable to locate local branch of BOC entry computePmeCUDAMgr");
    97     if (patches[i].positionBox != NULL || patches[i].avgPositionBox != NULL
    98       || patches[i].forceBox != NULL || patches[i].patch != NULL)
    99       NAMD_bug(
"ComputePmeCUDA::initialize() called twice or boxes not set to NULL");
   101       NAMD_bug(
"ComputePmeCUDA::initialize() patch not found");
   103     patches[i].positionBox = patches[i].patch->registerPositionPickup(
this);
   104     patches[i].forceBox = patches[i].patch->registerForceDeposit(
this);
   105         patches[i].avgPositionBox = patches[i].patch->registerAvgPositionPickup(
this);
   108   setupActivePencils();
   118 void ComputePmeCUDA::setupActivePencils() {
   126     patches[i].homePencilY = homey;
   127     patches[i].homePencilZ = homez;
   128     patches[i].homePencilNode = mgr->
getNode(homey,homez);
   132     computePmeCUDAMgrProxy[patches[i].homePencilNode].registerPatch(msg);
   141   if (patches[0].patch->flags.doFullElectrostatics) 
return 0;
   146     patches[i].positionBox->skip();
   147     patches[i].forceBox->skip();
   149     if (patches[i].patchID == 0) computePmeCUDAMgrProxy[patches[i].homePencilNode].skip();
   161     sendAtomsDone = 
false;
   165     sendAtomsDone = 
true;
   172 void ComputePmeCUDA::sendAtoms() {
   174   Lattice& lattice = patches[0].patch->lattice;
   179   double ox = origin.
x;
   180   double oy = origin.
y;
   181   double oz = origin.
z;
   182   double r1x = recip1.
x;
   183   double r1y = recip1.
y;
   184   double r1z = recip1.
z;
   185   double r2x = recip2.
x;
   186   double r2y = recip2.
y;
   187   double r2z = recip2.
z;
   188   double r3x = recip3.
x;
   189   double r3y = recip3.
y;
   190   double r3z = recip3.
z;
   194     if (patches[i].pmeForceMsg != NULL)
   195       NAMD_bug(
"ComputePmeCUDA::sendAtoms, pmeForceMsg is not empty");
   200         bool doMolly = patches[i].patch->flags.doMolly;
   201     bool doEnergy = patches[i].patch->flags.doEnergy;
   202     bool doVirial = patches[i].patch->flags.doVirial;
   210     CompAtom *x = patches[i].positionBox->open();
   212       patches[i].positionBox->close(&x);
   213       x = patches[i].avgPositionBox->open();
   216     int numAtoms = patches[i].patch->getNumAtoms();
   222     const bool isFirstStep = (patches[0].patch->flags.step == 
simParams->firstTimestep) || (selfEnergy == 0);
   225         calcSelfEnergyFEP(numAtoms, x, isFirstStep);
   228         calcSelfEnergyTI(numAtoms, x, isFirstStep);
   230         calcSelfEnergy(numAtoms, x, isFirstStep);
   241     const int alchGrid = 
simParams->alchOn ? 1 : 0;
   242     const int alchDecoupleGrid = 
simParams->alchDecouple ? 1: 0;
   243     const int alchSoftCoreOrTI = (
simParams->alchElecLambdaStart > 0 || 
simParams->alchThermIntOn) ? 1 : 0;
   244     msg = 
new (numAtoms, alchGrid * numAtoms, alchGrid * numAtoms,
   245                alchDecoupleGrid * numAtoms,  alchDecoupleGrid * numAtoms,
   256     msg->
i = patches[i].homePencilY;
   257     msg->
j = patches[i].homePencilZ;
   272     for (
int j=0;j < numAtoms;j++) {
   274         float factor1 = 1.0f;
   275         float factor2 = 1.0f;
   276         float factor3 = 1.0f;
   277         float factor4 = 1.0f;
   278         float factor5 = 1.0f;
   285       double wx = px*r1x + py*r1y + pz*r1z;
   286       double wy = px*r2x + py*r2y + pz*r2z;
   287       double wz = px*r3x + py*r3y + pz*r3z;
   291       wx = (wx - (floor(wx + 0.5) - 0.5));
   292       wy = (wy - (floor(wy + 0.5) - 0.5));
   293       wz = (wz - (floor(wz + 0.5) - 0.5));
   303       if (atom.x >= 1.0f) atom.x -= 1.0f;
   304       if (atom.y >= 1.0f) atom.y -= 1.0f;
   305       if (atom.z >= 1.0f) atom.z -= 1.0f;
   306         atom.q = (float)(q*coulomb_sqrt);
   347           default: 
NAMD_bug(
"Invalid partition number"); 
break;
   350         atomChargeFactors1[j] = factor1;
   351         atomChargeFactors2[j] = factor2;
   353           atomChargeFactors3[j] = factor3;
   354           atomChargeFactors4[j] = factor4;
   357           atomChargeFactors5[j] = factor5;
   364       atom.x = (float) x[j].position.x;
   366       atom.z = (float) x[j].position.z;
   367       atom.q = x[j].charge;
   370 #if defined(NTESTPID)   371     if (NTESTPID == patches[i].patch->getPatchID()) {
   374       sprintf(fname, 
"pme_xyzq_soa_pid%d_step%d.bin", NTESTPID,
   375           patches[i].patch->flags.step);
   376       sprintf(remark, 
"SOA PME xyzq, patch %d, step %d", NTESTPID,
   377           patches[i].patch->flags.step);
   378       TestArray_write<float>(fname, remark, (
float *) atoms, 4*numAtoms);
   389     if (patches[i].homePencilNode == CkMyNode()) {
   392       computePmeCUDAMgrProxy[patches[i].homePencilNode].recvAtoms(msg);
   396       patches[i].avgPositionBox->close(&x);
   398       patches[i].positionBox->close(&x);
   415 void ComputePmeCUDA::calcSelfEnergy(
int numAtoms, 
CompAtom *x, 
bool isFirstStep) {
   417   bool constantAtomicCharges = 
true;
   418   if (isFirstStep || !constantAtomicCharges) {
   420     for (
int i=0;i < numAtoms;i++) {
   429 void ComputePmeCUDA::calcSelfEnergyFEP(
int numAtoms, 
CompAtom *atoms, 
bool isFirstStep) {
   432     NAMD_bug(
"Called calcSelfEnergyFEP() in non-FEP code!");
   435   bool constantAtomicCharges = 
true;
   436   BigReal scaleLambda1, scaleLambda2; 
   437   const  BigReal alchLambda1 = 
simParams->getCurrentLambda(patches[0].patch->flags.step);
   438   const  BigReal alchLambda2 = 
simParams->getCurrentLambda2(patches[0].patch->flags.step);
   439   static thread_local 
BigReal lambda1Up   = 
simParams->getElecLambda(alchLambda1);
   440   static thread_local 
BigReal lambda2Up   = 
simParams->getElecLambda(alchLambda2);
   441   static thread_local 
BigReal lambda1Down = 
simParams->getElecLambda(1.0 - alchLambda1);
   442   static thread_local 
BigReal lambda2Down = 
simParams->getElecLambda(1.0 - alchLambda2);
   443   if (isFirstStep || !constantAtomicCharges ||
   444       lambda1Up   != 
simParams->getElecLambda(alchLambda1) ||
   445       lambda2Up   != 
simParams->getElecLambda(alchLambda2) ||
   446       lambda1Down != 
simParams->getElecLambda(1.0 - alchLambda1) ||
   447       lambda2Down != 
simParams->getElecLambda(1.0 - alchLambda2))
   449     lambda1Up   = 
simParams->getElecLambda(alchLambda1);
   450     lambda2Up   = 
simParams->getElecLambda(alchLambda2);
   451     lambda1Down = 
simParams->getElecLambda(1.0 - alchLambda1);
   452     lambda2Down = 
simParams->getElecLambda(1.0 - alchLambda2);
   455     for (
int i = 0; i < numAtoms; ++i) {
   463           scaleLambda1 = lambda1Up; 
   464           scaleLambda2 = lambda2Up; 
   468           scaleLambda1 = lambda1Down; 
   469           scaleLambda2 = lambda2Down; 
   472         default: 
NAMD_bug(
"Invalid partition number");
   474       selfEnergy    += scaleLambda1 * (atoms[i].
charge * atoms[i].
charge);
   475       selfEnergyFEP += scaleLambda2 * (atoms[i].
charge * atoms[i].
charge);
   477         selfEnergy    += (1.0 - scaleLambda1) * (atoms[i].charge * atoms[i].charge);
   478         selfEnergyFEP += (1.0 - scaleLambda2) * (atoms[i].charge * atoms[i].charge);
   488 void ComputePmeCUDA::calcSelfEnergyTI(
int numAtoms, 
CompAtom *atoms, 
bool isFirstStep) {
   491     NAMD_bug(
"Called calcSelfEnergyTI() in non-FEP code!");
   494   bool constantAtomicCharges = 
true;
   495   const  BigReal alchLambda1   = 
simParams->getCurrentLambda(patches[0].patch->flags.step);
   496   static thread_local 
BigReal lambda1Up     = 
simParams->getElecLambda(alchLambda1);
   497   static thread_local 
BigReal lambda1Down   = 
simParams->getElecLambda(1.0 - alchLambda1);
   498   if (isFirstStep || !constantAtomicCharges ||
   499       lambda1Up   != 
simParams->getElecLambda(alchLambda1) ||
   500       lambda1Down != 
simParams->getElecLambda(1.0 - alchLambda1))
   502     lambda1Up   = 
simParams->getElecLambda(alchLambda1);
   503     lambda1Down = 
simParams->getElecLambda(1.0 - alchLambda1);
   508     double factor_ti_1  = 0.0;
   509     double factor_ti_2  = 0.0;
   510     for (
int i = 0; i < numAtoms; ++i) {
   519           elecLambda1 = lambda1Up;
   525           elecLambda1 = lambda1Down;
   530         default: 
NAMD_bug(
"Invalid partition number");
   532       selfEnergy    += elecLambda1 * (atoms[i].
charge * atoms[i].
charge);
   533       selfEnergyTI1 += factor_ti_1 * (atoms[i].
charge * atoms[i].
charge);
   534       selfEnergyTI2 += factor_ti_2 * (atoms[i].
charge * atoms[i].
charge);
   536         selfEnergy    += (1.0 - elecLambda1) * (atoms[i].charge * atoms[i].charge);
   537         selfEnergyTI1 -= factor_ti_1 * (atoms[i].
charge * atoms[i].
charge);
   538         selfEnergyTI2 -= factor_ti_2 * (atoms[i].
charge * atoms[i].
charge);
   547 void ComputePmeCUDA::recvForces() {
   549   Lattice& lattice = patches[0].patch->lattice;
   554   double r1x = recip1.
x;
   555   double r1y = recip1.
y;
   556   double r1z = recip1.
z;
   557   double r2x = recip2.
x;
   558   double r2y = recip2.
y;
   559   double r2z = recip2.
z;
   560   double r3x = recip3.
x;
   561   double r3y = recip3.
y;
   562   double r3z = recip3.
z;
   566   double alchLambda1, lambda1Up, lambda1Down;
   567   double lambda3Up, lambda3Down;
   569     alchLambda1 = 
simParams->getCurrentLambda(patches[0].patch->flags.step);
   570     lambda1Up   = 
simParams->getElecLambda(alchLambda1);
   571     lambda1Down = 
simParams->getElecLambda(1 - alchLambda1);
   573       lambda3Up   = 1 - lambda1Up;
   574       lambda3Down = 1 - lambda1Down;
   580     if (patches[i].pmeForceMsg == NULL)
   581       NAMD_bug(
"ComputePmeCUDA::recvForces, no message in pmeForceMsg");
   583     const CudaForce* force = patches[i].pmeForceMsg->force;
   584     const CudaForce* force2 = patches[i].pmeForceMsg->force2;
   585     const CudaForce* force3 = patches[i].pmeForceMsg->force3;
   586     const CudaForce* force4 = patches[i].pmeForceMsg->force4;
   587     const CudaForce* force5 = patches[i].pmeForceMsg->force5;
   588     Results *r = patches[i].forceBox->open();
   589     int numAtoms =  patches[i].pmeForceMsg->numAtoms;
   592     if (patches[i].patchID == TESTPID) {
   593       fprintf(stderr, 
"Storing scaled PME CudaForce array for patch %d\n",
   595       TestArray_write<float>(
"scaled_pme_force_good.bin",
   596           "scaled PME CudaForce good",
   597           (
float *) force, 3*numAtoms);
   598       TestArray_write<double>(
"lattice_good.bin", 
"Lattice good",
   599           (
double *) &lattice, 3*7);
   602 #if defined(NTESTPID)   603     if (NTESTPID == patches[i].patch->getPatchID()) {
   606       sprintf(fname, 
"pme_fxyz_soa_pid%d_step%d.bin", NTESTPID,
   607           patches[i].patch->flags.step);
   608       sprintf(remark, 
"SOA PME fxyz, patch %d, step %d", NTESTPID,
   609           patches[i].patch->flags.step);
   610       TestArray_write<float>(fname, remark, (
float *) force, 3*numAtoms);
   613     if (!patches[i].pmeForceMsg->numStrayAtoms && !
simParams->commOnly) {
   614       for(
int j=0;j < numAtoms;j++) {
   619           f1 += force2[j].
x * lambda1Down + force[j].
x * lambda1Up;
   620           f2 += force2[j].
y * lambda1Down + force[j].
y * lambda1Up;
   621           f3 += force2[j].
z * lambda1Down + force[j].
z * lambda1Up;
   623             f1 += force3[j].
x * lambda3Up + force4[j].
x * lambda3Down;
   624             f2 += force3[j].
y * lambda3Up + force4[j].
y * lambda3Down;
   625             f3 += force3[j].
z * lambda3Up + force4[j].
z * lambda3Down;
   628             f1 += force5[j].
x * (lambda1Up + lambda1Down - 1.0) * (-1.0);
   629             f2 += force5[j].
y * (lambda1Up + lambda1Down - 1.0) * (-1.0);
   630             f3 += force5[j].
z * (lambda1Up + lambda1Down - 1.0) * (-1.0);
   637         f[j].x += f1*r1x + f2*r2x + f3*r3x;
   638         f[j].y += f1*r1y + f2*r2y + f3*r3y;
   639         f[j].z += f1*r1z + f2*r2z + f3*r3z;
   644     if (patches[i].patchID == TESTPID) {
   645       fprintf(stderr, 
"Storing slow force array for patch %d\n",
   647       TestArray_write<double>(
"pme_force_good.bin", 
"PME force good",
   648           (
double *) f, 3*numAtoms);
   652     patches[i].forceBox->close(&r);
   653     delete patches[i].pmeForceMsg;
   654     patches[i].pmeForceMsg = NULL;
   665   if (patchCounter == 0) {
   670   if (patches[i].pmeForceMsg != NULL)
   671     NAMD_bug(
"ComputePmeCUDA::storePmeForceMsg, already contains message");
   672   patches[i].pmeForceMsg = msg;
 
void setNumPatches(int n)
#define NAMD_EVENT_STOP(eon, id)
void unregisterAvgPositionPickup(Compute *cid, Box< Patch, CompAtom > **const box)
static BigReal dielectric_1
static void partition(int *order, const FullAtom *atoms, int begin, int end)
static PatchMap * Object()
void getHomePencil(PatchID patchID, int &homey, int &homez)
virtual void submit(void)=0
SimParameters * simParameters
void unregisterForceDeposit(Compute *cid, Box< Patch, Results > **const box)
void recvAtoms(PmeAtomMsg *msg)
SubmitReduction * willSubmit(int setID, int size=-1)
static ReductionMgr * Object(void)
Patch * patch(PatchID pid)
ComputePmeCUDA(ComputeID c, PatchIDList &pids)
bool storePmeForceMsg(PmeForceMsg *msg)
#define NAMD_EVENT_START(eon, id)
void NAMD_bug(const char *err_msg)
NAMD_HOST_DEVICE Vector a_r() const
NAMD_HOST_DEVICE Vector b_r() const
NAMD_HOST_DEVICE Vector c_r() const
virtual ~ComputePmeCUDA()
void unregisterPositionPickup(Compute *cid, Box< Patch, CompAtom > **const box)
int getNode(int i, int j)
#define SET_PRIORITY(MSG, SEQ, PRIO)
NAMD_HOST_DEVICE Vector origin() const