NAMD
ComputePmeCUDA.C
Go to the documentation of this file.
1 #include <numeric>
2 #include <algorithm>
3 #ifdef NAMD_CUDA
4 #include <cuda_runtime.h>
5 #endif
6 #ifdef NAMD_HIP
7 #include <hip/hip_runtime.h>
8 #endif
9 #include "Node.h"
10 #include "SimParameters.h"
11 #include "Priorities.h"
12 #include "ComputeNonbondedUtil.h"
13 #include "ComputePmeCUDA.h"
14 #include "ComputePmeCUDAMgr.h"
15 #include "PmeSolver.h"
16 #include "HomePatch.h"
17 
18 #include "NamdEventsProfiling.h"
19 
20 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
21 //
22 // Class creator, multiple patches
23 //
25  setNumPatches(pids.size());
26  patches.resize(getNumPatches());
27  for (int i=0;i < getNumPatches();i++) {
28  patches[i].patchID = pids[i];
29  }
30 }
31 
32 //
33 // Class creator, single patch
34 //
36  setNumPatches(1);
37  patches.resize(getNumPatches());
38  patches[0].patchID = pid;
39 }
40 
41 //
42 // Class destructor
43 //
45  for (int i=0;i < getNumPatches();i++) {
46  if (patches[i].positionBox != NULL) {
47  PatchMap::Object()->patch(patches[i].patchID)->unregisterPositionPickup(this, &patches[i].positionBox);
48  }
49  if (patches[i].avgPositionBox != NULL) {
50  PatchMap::Object()->patch(patches[i].patchID)->unregisterAvgPositionPickup(this, &patches[i].avgPositionBox);
51  }
52  if (patches[i].forceBox != NULL) {
53  PatchMap::Object()->patch(patches[i].patchID)->unregisterForceDeposit(this, &patches[i].forceBox);
54  }
55  }
56  delete reduction;
57  CmiDestroyLock(lock);
58 }
59 
60 //
61 // Initialize
62 //
64  lock = CmiCreateLock();
65 
66  // Sanity Check
68  if (simParams->alchFepOn) NAMD_bug("ComputePmeCUDA::ComputePmeCUDA, alchFepOn not yet implemented");
69  if (simParams->alchThermIntOn) NAMD_bug("ComputePmeCUDA::ComputePmeCUDA, alchThermIntOn not yet implemented");
70  if (simParams->lesOn) NAMD_bug("ComputePmeCUDA::ComputePmeCUDA, lesOn not yet implemented");
71  if (simParams->pairInteractionOn) NAMD_bug("ComputePmeCUDA::ComputePmeCUDA, pairInteractionOn not yet implemented");
72 
73  sendAtomsDone = false;
75  // basePriority = PME_PRIORITY;
76  patchCounter = getNumPatches();
77 
78  // Get proxy to ComputePmeCUDAMgr
79  computePmeCUDAMgrProxy = CkpvAccess(BOCclass_group).computePmeCUDAMgr;
80  mgr = computePmeCUDAMgrProxy.ckLocalBranch();
81  if (mgr == NULL)
82  NAMD_bug("ComputePmeCUDA::ComputePmeCUDA, unable to locate local branch of BOC entry computePmeCUDAMgr");
83  pmeGrid = mgr->getPmeGrid();
84 
85  for (int i=0;i < getNumPatches();i++) {
86  if (patches[i].positionBox != NULL || patches[i].avgPositionBox != NULL
87  || patches[i].forceBox != NULL || patches[i].patch != NULL)
88  NAMD_bug("ComputePmeCUDA::initialize() called twice or boxes not set to NULL");
89  if (!(patches[i].patch = PatchMap::Object()->patch(patches[i].patchID))) {
90  NAMD_bug("ComputePmeCUDA::initialize() patch not found");
91  }
92  patches[i].positionBox = patches[i].patch->registerPositionPickup(this);
93  patches[i].forceBox = patches[i].patch->registerForceDeposit(this);
94  patches[i].avgPositionBox = patches[i].patch->registerAvgPositionPickup(this);
95  }
96 
97  setupActivePencils();
98 }
99 
101  atomsChanged = true;
102 }
103 
104 //
105 // Setup, see which pencils overlap with the patches held by this compute
106 //
107 void ComputePmeCUDA::setupActivePencils() {
108  PatchMap *patchMap = PatchMap::Object();
109 
110  for (int i=0;i < getNumPatches();i++) {
111  int homey = -1;
112  int homez = -1;
113  mgr->getHomePencil(patches[i].patchID, homey, homez);
114 
115  patches[i].homePencilY = homey;
116  patches[i].homePencilZ = homez;
117  patches[i].homePencilNode = mgr->getNode(homey,homez);
118  RegisterPatchMsg *msg = new RegisterPatchMsg();
119  msg->i = homey;
120  msg->j = homez;
121  computePmeCUDAMgrProxy[patches[i].homePencilNode].registerPatch(msg);
122  }
123 
124  atomsChanged = true;
125 
126 }
127 
129 
130  if (patches[0].patch->flags.doFullElectrostatics) return 0;
131 
132  reduction->submit();
133 
134  for (int i=0;i < getNumPatches();i++) {
135  patches[i].positionBox->skip();
136  patches[i].forceBox->skip();
137  // We only need to call skip() once
138  if (patches[i].patchID == 0) computePmeCUDAMgrProxy[patches[i].homePencilNode].skip();
139  }
140 
141  return 1;
142 }
143 
145  if (sendAtomsDone) {
146  // Second part of computation: receive forces from ComputePmeCUDAMgr
147  // basePriority = PME_OFFLOAD_PRIORITY;
148  sendAtomsDone = false;
149  recvForces();
150  } else {
151  // First part of computation: send atoms to ComputePmeCUDAMgr
152  sendAtomsDone = true;
153  // basePriority = COMPUTE_HOME_PRIORITY + PATCH_PRIORITY(patchID);
154  sendAtoms();
155  }
156 }
157 
158 void ComputePmeCUDA::sendAtoms() {
159 
160  Lattice& lattice = patches[0].patch->lattice;
161  Vector origin = lattice.origin();
162  Vector recip1 = lattice.a_r();
163  Vector recip2 = lattice.b_r();
164  Vector recip3 = lattice.c_r();
165  double ox = origin.x;
166  double oy = origin.y;
167  double oz = origin.z;
168  double r1x = recip1.x;
169  double r1y = recip1.y;
170  double r1z = recip1.z;
171  double r2x = recip2.x;
172  double r2y = recip2.y;
173  double r2z = recip2.z;
174  double r3x = recip3.x;
175  double r3y = recip3.y;
176  double r3z = recip3.z;
177 
178  double selfEnergy = 0.;
179 
180  for (int i=0;i < getNumPatches();i++) {
181  if (patches[i].pmeForceMsg != NULL)
182  NAMD_bug("ComputePmeCUDA::sendAtoms, pmeForceMsg is not empty");
183 
184  const BigReal coulomb_sqrt = sqrt( COULOMB * ComputeNonbondedUtil::scaling
186 
187  bool doMolly = patches[i].patch->flags.doMolly;
188  bool doEnergy = patches[i].patch->flags.doEnergy;
189  bool doVirial = patches[i].patch->flags.doVirial;
190  PatchMap *patchMap = PatchMap::Object();
191 
192  // Send atom patch to pencil(s)
193  // #ifdef NETWORK_PROGRESS
194  // CmiNetworkProgress();
195  // #endif
196 
197  CompAtom *x = patches[i].positionBox->open();
198  if ( doMolly ) {
199  patches[i].positionBox->close(&x);
200  x = patches[i].avgPositionBox->open();
201  }
202 
203  int numAtoms = patches[i].patch->getNumAtoms();
204 
205  if ( doEnergy ) selfEnergy += calcSelfEnergy(numAtoms, x);
206 
207  // const Vector ucenter = patches[i].patch->lattice.unscale(patchMap->center(patches[i].patchID));
208  // const BigReal recip11 = patches[i].patch->lattice.a_r().x;
209  // const BigReal recip22 = patches[i].patch->lattice.b_r().y;
210  // const BigReal recip33 = patches[i].patch->lattice.c_r().z;
211 
212  PmeAtomMsg *msg = new (numAtoms, PRIORITY_SIZE) PmeAtomMsg;
214  // NOTE:
215  // patch already contains the centered coordinates and scaled charges
216  // memcpy(msg->atoms, patch->getCudaAtomList(), sizeof(CudaAtom)*numAtoms);
217 
218  msg->numAtoms = numAtoms;
219  // msg->patchIndex = i;
220  msg->i = patches[i].homePencilY;
221  msg->j = patches[i].homePencilZ;
222  msg->compute = this;
223  msg->pe = CkMyPe();
224  msg->doEnergy = doEnergy;
225  msg->doVirial = doVirial;
226  msg->lattice = lattice;
227  CudaAtom *atoms = msg->atoms;
228  // BigReal miny = 1.0e20;
229  // BigReal minz = 1.0e20;
230  for (int j=0;j < numAtoms;j++) {
231  CudaAtom atom;
232  BigReal q = x[j].charge;
233  // Convert atom positions to range [0,1)
234  double px = x[j].position.x - ox;
235  double py = x[j].position.y - oy;
236  double pz = x[j].position.z - oz;
237  double wx = px*r1x + py*r1y + pz*r1z;
238  double wy = px*r2x + py*r2y + pz*r2z;
239  double wz = px*r3x + py*r3y + pz*r3z;
240  // double wx = x[j].position.x*recip11;
241  // double wy = x[j].position.y*recip22;
242  // double wz = x[j].position.z*recip33;
243  wx = (wx - (floor(wx + 0.5) - 0.5));
244  wy = (wy - (floor(wy + 0.5) - 0.5));
245  wz = (wz - (floor(wz + 0.5) - 0.5));
246  // wx = (wx - floor(wx));
247  // wy = (wy - floor(wy));
248  // wz = (wz - floor(wz));
249  // if (wx >= 1.0) wx -= 1.0;
250  // if (wy >= 1.0) wy -= 1.0;
251  // if (wz >= 1.0) wz -= 1.0;
252  atom.x = (float)wx;
253  atom.y = (float)wy;
254  atom.z = (float)wz;
255  if (atom.x >= 1.0f) atom.x -= 1.0f;
256  if (atom.y >= 1.0f) atom.y -= 1.0f;
257  if (atom.z >= 1.0f) atom.z -= 1.0f;
258  atom.q = (float)(q*coulomb_sqrt);
259  atoms[j] = atom;
260  // miny = std::min(x[j].position.y, miny);
261  // minz = std::min(x[j].position.z, minz);
262  }
263  // Calculate corner with minimum y and z for this patch
264  // double wy = miny*recip22;
265  // double wz = minz*recip33;
266  // msg->miny = (int)((double)pmeGrid.K2*(wy - (floor(wy + 0.5) - 0.5)));
267  // msg->minz = (int)((double)pmeGrid.K3*(wz - (floor(wz + 0.5) - 0.5)));
268 
269  // For local (within shared memory node), get pointer to memory location and do direct memcpy
270  // For global (on different shread memory nodes),
271  if (patches[i].homePencilNode == CkMyNode()) {
272  mgr->recvAtoms(msg);
273  } else {
274  computePmeCUDAMgrProxy[patches[i].homePencilNode].recvAtoms(msg);
275  }
276 
277  if ( doMolly )
278  patches[i].avgPositionBox->close(&x);
279  else
280  patches[i].positionBox->close(&x);
281  }
282 
283  reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += selfEnergy;
284  reduction->submit();
285 
286 }
287 
288 //
289 // Calculate self-energy and send to PmeSolver
290 //
291 double ComputePmeCUDA::calcSelfEnergy(int numAtoms, CompAtom *x) {
292  double selfEnergy = 0.0;
293  for (int i=0;i < numAtoms;i++) {
294  selfEnergy += x[i].charge*x[i].charge;
295  }
296  //const double SQRT_PI = 1.7724538509055160273; /* mathematica 15 digits*/
299  return selfEnergy;
300 }
301 
302 void ComputePmeCUDA::recvForces() {
303 
304  Lattice& lattice = patches[0].patch->lattice;
305  Vector origin = lattice.origin();
306  Vector recip1 = lattice.a_r();
307  Vector recip2 = lattice.b_r();
308  Vector recip3 = lattice.c_r();
309  double r1x = recip1.x;
310  double r1y = recip1.y;
311  double r1z = recip1.z;
312  double r2x = recip2.x;
313  double r2y = recip2.y;
314  double r2z = recip2.z;
315  double r3x = recip3.x;
316  double r3y = recip3.y;
317  double r3z = recip3.z;
318 
320 
321  for (int i=0;i < getNumPatches();i++) {
322  if (patches[i].pmeForceMsg == NULL)
323  NAMD_bug("ComputePmeCUDA::recvForces, no message in pmeForceMsg");
324 
325  CudaForce* force = patches[i].pmeForceMsg->force;
326  Results *r = patches[i].forceBox->open();
327  int numAtoms = patches[i].pmeForceMsg->numAtoms;
328 
329  Force *f = r->f[Results::slow];
330  if (!patches[i].pmeForceMsg->numStrayAtoms && !simParams->commOnly) {
331  for(int j=0;j < numAtoms;j++) {
332  double f1 = force[j].x;
333  double f2 = force[j].y;
334  double f3 = force[j].z;
335  f[j].x += f1*r1x + f2*r2x + f3*r3x;
336  f[j].y += f1*r1y + f2*r2y + f3*r3y;
337  f[j].z += f1*r1z + f2*r2z + f3*r3z;
338  }
339  }
340 
341  patches[i].forceBox->close(&r);
342  delete patches[i].pmeForceMsg;
343  patches[i].pmeForceMsg = NULL;
344  }
345 
346 }
347 
349  bool done = false;
350  int i;
351  CmiLock(lock);
352  patchCounter--;
353  i = patchCounter;
354  if (patchCounter == 0) {
355  patchCounter = getNumPatches();
356  done = true;
357  }
358  CmiUnlock(lock);
359  if (patches[i].pmeForceMsg != NULL)
360  NAMD_bug("ComputePmeCUDA::storePmeForceMsg, already contains message");
361  patches[i].pmeForceMsg = msg;
362  return done;
363 }
364 #endif // NAMD_CUDA
static Node * Object()
Definition: Node.h:86
void setNumPatches(int n)
Definition: Compute.h:52
int sequence(void)
Definition: Compute.h:64
void unregisterAvgPositionPickup(Compute *cid, Box< Patch, CompAtom > **const box)
Definition: Patch.C:140
Vector a_r() const
Definition: Lattice.h:268
int ComputeID
Definition: NamdTypes.h:183
static PatchMap * Object()
Definition: PatchMap.h:27
void getHomePencil(PatchID patchID, int &homey, int &homez)
Definition: Vector.h:64
SimParameters * simParameters
Definition: Node.h:178
Vector c_r() const
Definition: Lattice.h:270
static __thread atom * atoms
#define COULOMB
Definition: common.h:46
BigReal & item(int i)
Definition: ReductionMgr.h:312
void unregisterForceDeposit(Compute *cid, Box< Patch, Results > **const box)
Definition: Patch.C:239
float x
Definition: NamdTypes.h:158
void recvAtoms(PmeAtomMsg *msg)
BigReal z
Definition: Vector.h:66
Position position
Definition: NamdTypes.h:53
SubmitReduction * willSubmit(int setID, int size=-1)
Definition: ReductionMgr.C:365
static ReductionMgr * Object(void)
Definition: ReductionMgr.h:278
Patch * patch(PatchID pid)
Definition: PatchMap.h:235
Vector origin() const
Definition: Lattice.h:262
Bool pairInteractionOn
ComputePmeCUDA(ComputeID c, PatchIDList &pids)
Vector b_r() const
Definition: Lattice.h:269
#define SQRT_PI
Definition: ComputeExt.C:30
bool storePmeForceMsg(PmeForceMsg *msg)
Charge charge
Definition: NamdTypes.h:54
#define PRIORITY_SIZE
Definition: Priorities.h:13
#define PME_PRIORITY
Definition: Priorities.h:29
void NAMD_bug(const char *err_msg)
Definition: common.C:129
Force * f[maxNumForces]
Definition: PatchTypes.h:67
BigReal x
Definition: Vector.h:66
float z
Definition: NamdTypes.h:158
int PatchID
Definition: NamdTypes.h:182
virtual ~ComputePmeCUDA()
void unregisterPositionPickup(Compute *cid, Box< Patch, CompAtom > **const box)
Definition: Patch.C:122
#define simParams
Definition: Output.C:127
int getNumPatches()
Definition: Compute.h:53
BigReal y
Definition: Vector.h:66
int getNode(int i, int j)
float y
Definition: NamdTypes.h:158
void submit(void)
Definition: ReductionMgr.h:323
int size(void) const
Definition: ResizeArray.h:127
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
gridSize x
double BigReal
Definition: common.h:114
for(int i=0;i< n1;++i)