ComputePmeCUDA.C

Go to the documentation of this file.
00001 #include <numeric>
00002 #include <algorithm>
00003 #ifdef NAMD_CUDA
00004 #include <cuda_runtime.h>
00005 #endif
00006 #include "Node.h"
00007 #include "SimParameters.h"
00008 #include "Priorities.h"
00009 #include "ComputeNonbondedUtil.h"
00010 #include "ComputePmeCUDA.h"
00011 #include "ComputePmeCUDAMgr.h"
00012 #include "PmeSolver.h"
00013 #include "HomePatch.h"
00014 
00015 #ifdef NAMD_CUDA
00016 //
00017 // Class creator, multiple patches
00018 //
00019 ComputePmeCUDA::ComputePmeCUDA(ComputeID c, PatchIDList& pids) : Compute(c) {
00020   setNumPatches(pids.size());
00021   patches.resize(getNumPatches());
00022   for (int i=0;i < getNumPatches();i++) {
00023     patches[i].patchID = pids[i];
00024   }
00025 }
00026 
00027 //
00028 // Class creator, single patch
00029 //
00030 ComputePmeCUDA::ComputePmeCUDA(ComputeID c, PatchID pid) : Compute(c) {
00031         setNumPatches(1);
00032   patches.resize(getNumPatches());
00033   patches[0].patchID = pid;
00034 }
00035 
00036 //
00037 // Class destructor
00038 //
00039 ComputePmeCUDA::~ComputePmeCUDA() {
00040   for (int i=0;i < getNumPatches();i++) {
00041         if (patches[i].positionBox != NULL) {
00042                 PatchMap::Object()->patch(patches[i].patchID)->unregisterPositionPickup(this, &patches[i].positionBox);
00043     }
00044     if (patches[i].avgPositionBox != NULL) {
00045         PatchMap::Object()->patch(patches[i].patchID)->unregisterAvgPositionPickup(this, &patches[i].avgPositionBox);
00046     }
00047     if (patches[i].forceBox != NULL) {
00048         PatchMap::Object()->patch(patches[i].patchID)->unregisterForceDeposit(this, &patches[i].forceBox);
00049     }
00050   }
00051   CmiDestroyLock(lock);
00052 }
00053 
00054 //
00055 // Initialize
00056 //
00057 void ComputePmeCUDA::initialize() {
00058   lock = CmiCreateLock();
00059 
00060   // Sanity Check
00061   SimParameters *simParams = Node::Object()->simParameters;
00062   if (simParams->alchFepOn) NAMD_bug("ComputePmeCUDA::ComputePmeCUDA, alchFepOn not yet implemented");
00063   if (simParams->alchThermIntOn) NAMD_bug("ComputePmeCUDA::ComputePmeCUDA, alchThermIntOn not yet implemented");
00064   if (simParams->lesOn) NAMD_bug("ComputePmeCUDA::ComputePmeCUDA, lesOn not yet implemented");
00065   if (simParams->pairInteractionOn) NAMD_bug("ComputePmeCUDA::ComputePmeCUDA, pairInteractionOn not yet implemented");
00066 
00067   sendAtomsDone = false;
00068   selfEnergyDone = false;
00069   // basePriority = PME_PRIORITY;
00070   patchCounter = getNumPatches();
00071 
00072   // Get proxy to ComputePmeCUDAMgr
00073   computePmeCUDAMgrProxy = CkpvAccess(BOCclass_group).computePmeCUDAMgr;
00074   mgr = computePmeCUDAMgrProxy.ckLocalBranch();
00075   if (mgr == NULL)
00076     NAMD_bug("ComputePmeCUDA::ComputePmeCUDA, unable to locate local branch of BOC entry computePmeCUDAMgr");
00077   pmeGrid = mgr->getPmeGrid();
00078 
00079   for (int i=0;i < getNumPatches();i++) {
00080     if (patches[i].positionBox != NULL || patches[i].avgPositionBox != NULL
00081       || patches[i].forceBox != NULL || patches[i].patch != NULL)
00082       NAMD_bug("ComputePmeCUDA::initialize() called twice or boxes not set to NULL");
00083     if (!(patches[i].patch = PatchMap::Object()->patch(patches[i].patchID))) {
00084       NAMD_bug("ComputePmeCUDA::initialize() patch not found");
00085     }
00086     patches[i].positionBox = patches[i].patch->registerPositionPickup(this);
00087     patches[i].forceBox = patches[i].patch->registerForceDeposit(this);
00088         patches[i].avgPositionBox = patches[i].patch->registerAvgPositionPickup(this);
00089   }
00090 
00091   setupActivePencils();
00092 }
00093 
00094 void ComputePmeCUDA::atomUpdate() {
00095   atomsChanged = true;
00096 }
00097 
00098 //
00099 // Setup, see which pencils overlap with the patches held by this compute
00100 //
00101 void ComputePmeCUDA::setupActivePencils() {
00102   PatchMap *patchMap = PatchMap::Object();
00103 
00104   for (int i=0;i < getNumPatches();i++) {
00105     int homey = -1;
00106     int homez = -1;
00107     mgr->getHomePencil(patches[i].patchID, homey, homez);
00108 
00109     patches[i].homePencilY = homey;
00110     patches[i].homePencilZ = homez;
00111     patches[i].homePencilNode = mgr->getNode(homey,homez);
00112     RegisterPatchMsg *msg = new RegisterPatchMsg();
00113     msg->i = homey;
00114     msg->j = homez;
00115     computePmeCUDAMgrProxy[patches[i].homePencilNode].registerPatch(msg);
00116   }
00117 
00118   atomsChanged = true;
00119 
00120 }
00121 
00122 int ComputePmeCUDA::noWork() {
00123 
00124   if (patches[0].patch->flags.doFullElectrostatics) return 0;
00125 
00126   for (int i=0;i < getNumPatches();i++) {
00127     patches[i].positionBox->skip();
00128     patches[i].forceBox->skip();
00129     // We only need to call skip() once
00130     if (patches[i].patchID == 0) computePmeCUDAMgrProxy[patches[i].homePencilNode].skip();
00131   }
00132 
00133   return 1;
00134 }
00135 
00136 void ComputePmeCUDA::doWork() {
00137   if (sendAtomsDone) {
00138     // Second part of computation: receive forces from ComputePmeCUDAMgr
00139     // basePriority = PME_OFFLOAD_PRIORITY;
00140     sendAtomsDone = false;
00141     recvForces();
00142   } else {
00143     // First part of computation: send atoms to ComputePmeCUDAMgr
00144     sendAtomsDone = true;
00145     // basePriority = COMPUTE_HOME_PRIORITY + PATCH_PRIORITY(patchID);
00146     sendAtoms();
00147   }
00148 }
00149 
00150 void ComputePmeCUDA::sendAtoms() {
00151 
00152   Lattice& lattice = patches[0].patch->lattice;
00153   Vector origin = lattice.origin();
00154   Vector recip1 = lattice.a_r();
00155   Vector recip2 = lattice.b_r();
00156   Vector recip3 = lattice.c_r();
00157   double ox = origin.x;
00158   double oy = origin.y;
00159   double oz = origin.z;
00160   double r1x = recip1.x;
00161   double r1y = recip1.y;
00162   double r1z = recip1.z;
00163   double r2x = recip2.x;
00164   double r2y = recip2.y;
00165   double r2z = recip2.z;
00166   double r3x = recip3.x;
00167   double r3y = recip3.y;
00168   double r3z = recip3.z;
00169 
00170   for (int i=0;i < getNumPatches();i++) {
00171     if (patches[i].pmeForceMsg != NULL)
00172       NAMD_bug("ComputePmeCUDA::sendAtoms, pmeForceMsg is not empty");
00173 
00174         const BigReal coulomb_sqrt = sqrt( COULOMB * ComputeNonbondedUtil::scaling
00175                                      * ComputeNonbondedUtil::dielectric_1 );
00176 
00177         bool doMolly = patches[i].patch->flags.doMolly;
00178     bool doEnergy = patches[i].patch->flags.doEnergy;
00179     bool doVirial = patches[i].patch->flags.doVirial;
00180     PatchMap *patchMap = PatchMap::Object();
00181 
00182     // Send atom patch to pencil(s)
00183   // #ifdef NETWORK_PROGRESS
00184   //   CmiNetworkProgress();
00185   // #endif
00186 
00187     CompAtom *x = patches[i].positionBox->open();
00188     if ( doMolly ) {
00189       patches[i].positionBox->close(&x);
00190       x = patches[i].avgPositionBox->open();
00191     }
00192 
00193     int numAtoms = patches[i].patch->getNumAtoms();
00194 
00195     if (!selfEnergyDone) {
00196       double selfEnergy = calcSelfEnergy(numAtoms, x);
00197       // Send self-energy to one of the pencils, doesn't matter which one, the self energy is always
00198       // delivered to (0,0) pencil
00199       PmeSelfEnergyMsg *msg = new PmeSelfEnergyMsg();
00200       msg->energy = selfEnergy;
00201       computePmeCUDAMgrProxy[patches[i].homePencilNode].recvSelfEnergy(msg);
00202     }
00203 
00204     // const Vector ucenter = patches[i].patch->lattice.unscale(patchMap->center(patches[i].patchID));
00205     // const BigReal recip11 = patches[i].patch->lattice.a_r().x;
00206     // const BigReal recip22 = patches[i].patch->lattice.b_r().y;
00207     // const BigReal recip33 = patches[i].patch->lattice.c_r().z;
00208 
00209     PmeAtomMsg *msg = new (numAtoms, PRIORITY_SIZE) PmeAtomMsg;
00210     SET_PRIORITY(msg, sequence(), PME_PRIORITY)
00211     // NOTE:
00212     // patch already contains the centered coordinates and scaled charges
00213         //    memcpy(msg->atoms, patch->getCudaAtomList(), sizeof(CudaAtom)*numAtoms);
00214 
00215     msg->numAtoms = numAtoms;
00216     // msg->patchIndex = i;
00217     msg->i = patches[i].homePencilY;
00218     msg->j = patches[i].homePencilZ;
00219     msg->compute = this;
00220     msg->pe = CkMyPe();
00221     msg->doEnergy = doEnergy;
00222     msg->doVirial = doVirial;
00223     CudaAtom *atoms = msg->atoms;
00224     // BigReal miny = 1.0e20;
00225     // BigReal minz = 1.0e20;
00226     for (int j=0;j < numAtoms;j++) {
00227         CudaAtom atom;
00228       BigReal q = x[j].charge;
00229       // Convert atom positions to range [0,1)
00230       double px = x[j].position.x - ox;
00231       double py = x[j].position.y - oy;
00232       double pz = x[j].position.z - oz;
00233       double wx = px*r1x + py*r1y + pz*r1z;
00234       double wy = px*r2x + py*r2y + pz*r2z;
00235       double wz = px*r3x + py*r3y + pz*r3z;
00236       // double wx = x[j].position.x*recip11;
00237       // double wy = x[j].position.y*recip22;
00238       // double wz = x[j].position.z*recip33;
00239       wx = (wx - (floor(wx + 0.5) - 0.5));
00240       wy = (wy - (floor(wy + 0.5) - 0.5));
00241       wz = (wz - (floor(wz + 0.5) - 0.5));
00242       // wx = (wx - floor(wx));
00243       // wy = (wy - floor(wy));
00244       // wz = (wz - floor(wz));
00245       // if (wx >= 1.0) wx -= 1.0;
00246       // if (wy >= 1.0) wy -= 1.0;
00247       // if (wz >= 1.0) wz -= 1.0;
00248       atom.x = (float)wx;
00249       atom.y = (float)wy;
00250       atom.z = (float)wz;
00251       if (atom.x >= 1.0f) atom.x -= 1.0f;
00252       if (atom.y >= 1.0f) atom.y -= 1.0f;
00253       if (atom.z >= 1.0f) atom.z -= 1.0f;
00254         atom.q = (float)(q*coulomb_sqrt);
00255                 atoms[j] = atom;
00256       // miny = std::min(x[j].position.y, miny);
00257       // minz = std::min(x[j].position.z, minz);
00258     }
00259     // Calculate corner with minimum y and z for this patch
00260     // double wy = miny*recip22;
00261     // double wz = minz*recip33;
00262     // msg->miny = (int)((double)pmeGrid.K2*(wy - (floor(wy + 0.5) - 0.5)));
00263     // msg->minz = (int)((double)pmeGrid.K3*(wz - (floor(wz + 0.5) - 0.5)));
00264 
00265     // For local (within shared memory node), get pointer to memory location and do direct memcpy
00266     // For global (on different shread memory nodes), 
00267     if (patches[i].homePencilNode == CkMyNode()) {
00268       mgr->recvAtoms(msg);
00269     } else {
00270       computePmeCUDAMgrProxy[patches[i].homePencilNode].recvAtoms(msg);
00271     }
00272     
00273     if ( doMolly )
00274       patches[i].avgPositionBox->close(&x);
00275     else 
00276       patches[i].positionBox->close(&x);
00277   }
00278 
00279   if (!selfEnergyDone) {
00280     selfEnergyDone = true;
00281   }
00282 
00283 }
00284 
00285 //
00286 // Calculate self-energy and send to PmeSolver
00287 //
00288 double ComputePmeCUDA::calcSelfEnergy(int numAtoms, CompAtom *x) {
00289   double selfEnergy = 0.0;
00290   for (int i=0;i < numAtoms;i++) {
00291     selfEnergy += x[i].charge*x[i].charge;
00292   }
00293   //const double SQRT_PI = 1.7724538509055160273; /* mathematica 15 digits*/
00294   selfEnergy *= -ComputeNonbondedUtil::ewaldcof*COULOMB * ComputeNonbondedUtil::scaling 
00295             * ComputeNonbondedUtil::dielectric_1 / SQRT_PI;
00296   return selfEnergy;
00297 }
00298 
00299 void ComputePmeCUDA::recvForces() {
00300 
00301   Lattice& lattice = patches[0].patch->lattice;
00302   Vector origin = lattice.origin();
00303   Vector recip1 = lattice.a_r();
00304   Vector recip2 = lattice.b_r();
00305   Vector recip3 = lattice.c_r();
00306   double r1x = recip1.x;
00307   double r1y = recip1.y;
00308   double r1z = recip1.z;
00309   double r2x = recip2.x;
00310   double r2y = recip2.y;
00311   double r2z = recip2.z;
00312   double r3x = recip3.x;
00313   double r3y = recip3.y;
00314   double r3z = recip3.z;
00315 
00316   SimParameters *simParams = Node::Object()->simParameters;
00317 
00318   for (int i=0;i < getNumPatches();i++) {
00319     if (patches[i].pmeForceMsg == NULL)
00320       NAMD_bug("ComputePmeCUDA::recvForces, no message in pmeForceMsg");
00321 
00322     CudaForce* force = patches[i].pmeForceMsg->force;
00323     Results *r = patches[i].forceBox->open();
00324     int numAtoms =  patches[i].pmeForceMsg->numAtoms;
00325 
00326     Force *f = r->f[Results::slow];
00327     if (!patches[i].pmeForceMsg->numStrayAtoms && !simParams->commOnly) {
00328       for(int j=0;j < numAtoms;j++) {
00329         double f1 = force[j].x;
00330         double f2 = force[j].y;
00331         double f3 = force[j].z;
00332         f[j].x += f1*r1x + f2*r2x + f3*r3x;
00333         f[j].y += f1*r1y + f2*r2y + f3*r3y;
00334         f[j].z += f1*r1z + f2*r2z + f3*r3z;
00335       }
00336     }
00337 
00338     patches[i].forceBox->close(&r);
00339     delete patches[i].pmeForceMsg;
00340     patches[i].pmeForceMsg = NULL;
00341   }
00342 
00343 }
00344 
00345 bool ComputePmeCUDA::storePmeForceMsg(PmeForceMsg *msg) {
00346   bool done = false;
00347   int i;
00348   CmiLock(lock);
00349   patchCounter--;
00350   i = patchCounter;
00351   if (patchCounter == 0) {
00352     patchCounter = getNumPatches();
00353     done = true;
00354   }
00355   CmiUnlock(lock);
00356   if (patches[i].pmeForceMsg != NULL)
00357     NAMD_bug("ComputePmeCUDA::storePmeForceMsg, already contains message");
00358   patches[i].pmeForceMsg = msg;
00359   return done;
00360 }
00361 #endif // NAMD_CUDA

Generated on Tue Sep 19 01:17:11 2017 for NAMD by  doxygen 1.4.7