NAMD
ComputePmeCUDAMgr.h
Go to the documentation of this file.
1 #ifndef COMPUTEPMECUDAMGR_H
2 #define COMPUTEPMECUDAMGR_H
3 
4 #ifdef NAMD_CUDA
5 #include <cuda_runtime.h> // Needed for cudaStream_t that is used in ComputePmeCUDAMgr -class
6 #endif
7 #ifdef NAMD_HIP
8 #include <hip/hip_runtime.h>
9 #endif
10 
11 #include <vector>
12 
13 #include "PmeBase.h"
14 #include "PmeSolver.h"
15 #include "PmeSolverUtil.h"
16 #include "ComputePmeCUDAMgr.decl.h"
17 #include "HipDefines.h"
18 
19 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
20 class ComputePmeCUDA;
21 
22 //
23 // Base class for thread safe atom storage
24 //
26 public:
27  PmeAtomStorage(const bool useIndex) : useIndex(useIndex) {
28  numAtoms = 0;
29  atomCapacity = 0;
30  atom = NULL;
31  atomIndexCapacity = 0;
32  atomIndex = NULL;
33  overflowStart = 0;
34  overflowEnd = 0;
35  overflowAtomCapacity = 0;
36  overflowAtom = NULL;
37  overflowAtomIndexCapacity = 0;
38  overflowAtomIndex = NULL;
40  atomElecFactorArrays = std::vector<float*>(0);
41  overflowAtomElecFactorArrays = std::vector<float*>(0);
42  overflowAtomElecFactorCapacities = std::vector<int>(0);
43  alchOn = false;
44  alchFepOn = false;
45  alchDecouple = false;
46  lock_ = CmiCreateLock();
47  }
48  virtual ~PmeAtomStorage() {
49  CmiDestroyLock(lock_);
50  }
51 
52  int addAtoms(const int natom, const CudaAtom* src, const std::vector<float*>& lambdaArrays) {
53  return addAtoms_(natom, src, NULL, lambdaArrays);
54  }
55 
56  int addAtomsWithIndex(const int natom, const CudaAtom* src, const int* index, const std::vector<float*>& lambdaArrays) {
57  return addAtoms_(natom, src, index, lambdaArrays);
58  }
59 
60  // Finish up, must be called after "done" is returned by addAtoms.
61  // Only the last thread that gets the "done" signal from addAtoms can enter here.
62  void finish() {
63  if (overflowEnd-overflowStart > 0) {
64  resize_((void **)&atom, numAtoms, atomCapacity, sizeof(CudaAtom));
65  if (useIndex) resize_((void **)&atomIndex, numAtoms, atomIndexCapacity, sizeof(int));
66  if (alchOn) {
67  for (unsigned int i = 0; i < totalFactorArrays; ++i) {
68  if (enabledGrid[i] == true)
69  resize_((void **)&(atomElecFactorArrays[i]), numAtoms, overflowAtomCapacity, sizeof(float));
70  }
71  }
72  memcpy_(atom+overflowStart, overflowAtom, (overflowEnd - overflowStart)*sizeof(CudaAtom));
73  if (useIndex) memcpy_(atomIndex+overflowStart, overflowAtomIndex, (overflowEnd - overflowStart)*sizeof(int));
74  if (alchOn) {
75  for (unsigned int i = 0; i < totalFactorArrays; ++i) {
76  if (enabledGrid[i] == true)
77  memcpy_(atomElecFactorArrays[i]+overflowStart, overflowAtomElecFactorArrays[i], (overflowEnd - overflowStart)*sizeof(float));
78  }
79  }
80  overflowStart = 0;
81  overflowEnd = 0;
82  }
83  }
84 
85  // Clear and reset storage to initial stage.
86  // Only the last thread that gets the "done" signal from addAtoms can enter here.
87  void clear() {
88  patchPos.clear();
89  numAtoms = 0;
90  }
91 
92  // Return pointer to atom data
94  return atom;
95  }
96 
97  // CHC: return pointer to alchemical lambdas
98  float* getAtomElecFactors(unsigned int iGrid) {
99  return atomElecFactorArrays[iGrid];
100  }
102  alchOn = simParams.alchOn;
103  alchFepOn = simParams.alchFepOn;
104  alchDecouple = simParams.alchDecouple;
105  if (alchOn) {
106  enabledGrid.resize(NUM_GRID_MAX, false);
107  totalFactorArrays = 2;
108  enabledGrid[0] = true;
109  enabledGrid[1] = true;
110  if (alchDecouple) {
111  totalFactorArrays = 4;
112  enabledGrid[2] = true;
113  enabledGrid[3] = true;
114  }
115  if (bool(simParams.alchElecLambdaStart)) {
116  totalFactorArrays = 5;
117  enabledGrid[4] = true;
118  }
119  if (simParams.alchThermIntOn) {
120  totalFactorArrays = 5;
121  enabledGrid[4] = true;
122  }
126  }
127  }
128 
129  // Return pointer to patch positions
130  int* getPatchPos() {
131  return patchPos.data();
132  }
133 
135  return patchPos.size();
136  }
137 
138  int getNumAtoms() {
139  return numAtoms;
140  }
141 
142  int* getAtomIndex() {
143  if (!useIndex)
144  NAMD_bug("PmeAtomStorage::getAtomIndex, no indexing enabled");
145  return atomIndex;
146  }
147 
148 protected:
149  // Atom array
151  // Atom index array
152  int* atomIndex;
153  // Overflow atom array
155  // Overflow atom index array
157  // CHC: factor arrays for alchemical transformation
158  std::vector<float*> atomElecFactorArrays;
159  std::vector<float*> overflowAtomElecFactorArrays;
161  std::vector<bool> enabledGrid;
162  unsigned int totalFactorArrays;
163  bool alchOn;
164  bool alchFepOn;
166 
167 private:
168  // If true, uses indexed atom arrays
169  const bool useIndex;
170  // Node lock
171  CmiNodeLock lock_;
172  // Data overflow
173  int overflowAtomCapacity;
174  int overflowAtomIndexCapacity;
175  int overflowStart;
176  int overflowEnd;
177  // Number of atoms currently in storage
178  int numAtoms;
179  // Atom patch position
180  std::vector<int> patchPos;
181  // Atom array capacity
182  int atomCapacity;
183  // Atom index array capacity
184  int atomIndexCapacity;
185 
186  // Resize array with 1.5x extra storage
187  void resize_(void **array, int sizeRequested, int& arrayCapacity, const size_t sizeofType) {
188  // If array is not NULL and has enough capacity => we have nothing to do
189  if (*array != NULL && arrayCapacity >= sizeRequested) return;
190 
191  // Otherwise, allocate new array
192  int newArrayCapacity = (int)(sizeRequested*1.5);
193  void* newArray = alloc_(sizeofType*newArrayCapacity);
194 
195  if (*array != NULL) {
196  // We have old array => copy contents to new array
197  memcpy_(newArray, *array, arrayCapacity*sizeofType);
198  // De-allocate old array
199  dealloc_(*array);
200  }
201 
202  // Set new capacity and array pointer
203  arrayCapacity = newArrayCapacity;
204  *array = newArray;
205  }
206 
207  virtual void memcpy_(void *dst, const void* src, const int size) {
208  memcpy(dst, src, size);
209  }
210 
211  // CHC: use template to make the function can copy any array with indices
212  // also remove the virtual keyword since no derived class overwrite it
213  // I don't know why Antti-Pekka use virtual for all memory related member functions
214  // did he plan to support some alternative memory managements/allocators?
215  template <typename array_type>
216  void copyWithIndex_(array_type* dst, const array_type* src, const int natom, const int* indexSrc) {
217  for (int i=0;i < natom;i++) dst[i] = src[indexSrc[i]];
218  }
219 
220  // Allocate array of size bytes
221  virtual void* alloc_(const size_t size)=0;
222 
223  // Deallocate array
224  virtual void dealloc_(void *p)=0;
225 
226  // Add atoms in thread-safe manner.
227  // Returns the patch index where the atoms were added
228  int addAtoms_(const int natom, const CudaAtom* src, const int* index, const std::vector<float*>& lambdaArrays) {
229  CmiLock(lock_);
230  // Accumulate position for patches:
231  // atoms for patch i are in the range [ patchPos[i-1], patchPos[i]-1 ]
232  int patchInd = patchPos.size();
233  int ppos = (patchInd == 0) ? natom : patchPos[patchInd-1] + natom;
234  patchPos.push_back(ppos);
235  int pos = numAtoms;
236  bool overflow = false;
237  numAtoms += natom;
238  // Check for overflow
239  if (numAtoms > atomCapacity || (useIndex && numAtoms > atomIndexCapacity)) {
240  // number of atoms exceeds capacity, store into overflow buffer
241  // Note: storing to overflow should be very infrequent, most likely only
242  // in the initial call
243  if (overflowEnd-overflowStart == 0) {
244  overflowStart = pos;
245  overflowEnd = pos;
246  }
247  overflowEnd += natom;
248  if (overflowEnd-overflowStart > overflowAtomCapacity) {
249  resize_((void **)&overflowAtom, overflowEnd-overflowStart, overflowAtomCapacity, sizeof(CudaAtom));
250  }
251  if (useIndex && overflowEnd-overflowStart > overflowAtomIndexCapacity) {
252  resize_((void **)&overflowAtomIndex, overflowEnd-overflowStart, overflowAtomIndexCapacity, sizeof(int));
253  }
254  // CHC: copy alchemical lambda value
255  if (alchOn) {
256  for (unsigned int i = 0; i < totalFactorArrays; ++i) {
257  if (lambdaArrays[i] != NULL && overflowEnd-overflowStart > overflowAtomElecFactorCapacities[i]) {
258  resize_((void **)&(overflowAtomElecFactorArrays[i]), overflowEnd-overflowStart, overflowAtomElecFactorCapacities[i], sizeof(float));
259  }
260  }
261  }
262  if (index != NULL) {
263  if (useIndex) memcpy_(overflowAtomIndex+overflowEnd-overflowStart-natom, index, natom*sizeof(int));
264  copyWithIndex_(overflowAtom+overflowEnd-overflowStart-natom, src, natom, index);
265  if (alchOn) {
266  for (unsigned int i = 0; i < totalFactorArrays; ++i) {
267  if (lambdaArrays[i] != NULL) {
268  copyWithIndex_(overflowAtomElecFactorArrays[i]+overflowEnd-overflowStart-natom, lambdaArrays[i], natom, index);
269  }
270  }
271  }
272  } else {
273  memcpy_(overflowAtom+overflowEnd-overflowStart-natom, src, natom*sizeof(CudaAtom));
274  if (alchOn) {
275  for (unsigned int i = 0; i < totalFactorArrays; ++i) {
276  if (lambdaArrays[i] != NULL) {
277  memcpy_(overflowAtomElecFactorArrays[i]+overflowEnd-overflowStart-natom, lambdaArrays[i], natom*sizeof(float));
278  }
279  }
280  }
281  }
282  overflow = true;
283  }
284  CmiUnlock(lock_);
285  // If no overflow, copy to final position
286  if (!overflow) {
287  if (index != NULL) {
288  if (useIndex) memcpy_(atomIndex+pos, index, natom*sizeof(int));
289  copyWithIndex_(atom+pos, src, natom, index);
290  if (alchOn) {
291  for (unsigned int i = 0; i < totalFactorArrays; ++i) {
292  if (lambdaArrays[i] != NULL) {
293  copyWithIndex_(atomElecFactorArrays[i]+pos, lambdaArrays[i], natom, index);
294  }
295  }
296  }
297  } else {
298  memcpy_(atom+pos, src, natom*sizeof(CudaAtom));
299  if (alchOn) {
300  for (unsigned int i = 0; i < totalFactorArrays; ++i) {
301  if (lambdaArrays[i] != NULL) {
302  memcpy_(atomElecFactorArrays[i]+pos, lambdaArrays[i], natom*sizeof(float));
303  }
304  }
305  }
306  }
307  }
308  return patchInd;
309  }
310 
311 };
312 
313 class PmeAtomMsg : public CMessage_PmeAtomMsg {
314 public:
321  int numAtoms;
322  int i, j;
324  int pe;
328  // int miny, minz;
329 };
330 
331 class PmeForceMsg : public CMessage_PmeForceMsg {
332 public:
335  CudaForce *force3; // force from the third grid of alchDecouple
339  int pe;
340  int numAtoms;
342  bool zeroCopy;
344 };
345 
346 class PmeLaunchMsg : public CMessage_PmeLaunchMsg {
347 public:
349  int natom;
350  int pe;
352 };
353 
354 class RegisterPatchMsg : public CMessage_RegisterPatchMsg {
355 public:
356  int i, j;
357 };
358 
359 class NumDevicesMsg : public CMessage_NumDevicesMsg {
360 public:
363 };
364 
365 class PmeAtomPencilMsg : public CMessage_PmeAtomPencilMsg {
366 public:
373  int numAtoms;
374  int y, z;
375  int srcY, srcZ;
379 };
380 
381 class PmeForcePencilMsg : public CMessage_PmeForcePencilMsg {
382 public:
385  CudaForce *force3; // force from the third grid of alchDecouple
388  int numAtoms;
389  int y, z;
390  int srcY, srcZ;
391 };
392 
393 class CProxy_ComputePmeCUDADevice;
394 class RecvDeviceMsg : public CMessage_RecvDeviceMsg {
395 public:
396  CProxy_ComputePmeCUDADevice* dev;
398 };
399 
400 class PmeAtomFiler : public CBase_PmeAtomFiler {
401 public:
402  PmeAtomFiler();
403  PmeAtomFiler(CkMigrateMessage *);
404  ~PmeAtomFiler();
405  void fileAtoms(const int numAtoms, const CudaAtom* atoms, Lattice &lattice, const PmeGrid &pmeGrid,
406  const int pencilIndexY, const int pencilIndexZ, const int ylo, const int yhi, const int zlo, const int zhi);
407  // static inline int yBlock(int p) {return p % 3;}
408  // static inline int zBlock(int p) {return p / 3;}
409  int getNumAtoms(int p) {return pencilSize[p];}
410  int* getAtomIndex(int p) {return pencil[p];}
411 private:
412  // 9 Pencils + 1 Stay atom pencil
413  int pencilSize[9+1];
414  int pencilCapacity[9+1];
415  int* pencil[9+1];
416 };
417 
418 
419 class CProxy_ComputePmeCUDAMgr;
420 class ComputePmeCUDADevice : public CBase_ComputePmeCUDADevice {
421 public:
422  // ComputePmeCUDADevice_SDAG_CODE;
424  ComputePmeCUDADevice(CkMigrateMessage *m);
426  void initialize(PmeGrid& pmeGrid_in, int pencilIndexY_in, int pencilIndexZ_in,
427  int deviceID_in, int pmePencilType_in, CProxy_ComputePmeCUDAMgr mgrProxy_in,
428  CProxy_PmeAtomFiler pmeAtomFiler_in);
429  int getDeviceID();
430  cudaStream_t getStream();
431  CProxy_ComputePmeCUDAMgr getMgrProxy();
432  void setPencilProxy(CProxy_CudaPmePencilXYZ pmePencilXYZ_in);
433  void setPencilProxy(CProxy_CudaPmePencilXY pmePencilXY_in);
434  void setPencilProxy(CProxy_CudaPmePencilX pmePencilX_in);
435  void activate_pencils();
436  void initializePatches(int numHomePatches_in);
437  void registerNeighbor();
438  void recvAtoms(PmeAtomMsg *msg);
439  void sendAtomsToNeighbors();
440  void sendAtomsToNeighbor(int y, int z, int atomIval);
443  void spreadCharge();
444  void gatherForce();
445  void gatherForceDone(unsigned int iGrid);
446  void sendForcesToNeighbors();
448  void mergeForcesOnPatch(int homePatchIndex);
449  void sendForcesToPatch(PmeForceMsg *forceMsg);
450 
451  void gatherForceDoneSubset(int first, int last);
452 
453  bool isGridEnabled(unsigned int i) const;
454 
455 private:
456  // Store updated lattice
457  Lattice lattice;
458  // Store virial and energy flags
459  bool doVirial, doEnergy;
460  // PME grid definiton
461  PmeGrid pmeGrid;
462  // PME pencil type
463  int pmePencilType;
464  // Neighboring pencil bounds, [-1,1]
465  int ylo, yhi, zlo, zhi;
466  // Size of the neighboring pencil grid, maximum value 3. yNBlocks = yhi - ylo + 1
467  int yNBlocks, zNBlocks;
468  // Number of home patches for this device
469  int numHomePatches;
470  // Pencil location for this device
471  int pencilIndexY, pencilIndexZ;
472 
473  // Number of neighbors expected to provide atoms including self
474  int numNeighborsExpected;
475 
476  // Number of stray atoms
477  int numStrayAtoms;
478 
479  // Node locks
480  CmiNodeLock lock_numHomePatchesMerged;
481  CmiNodeLock lock_numPencils;
482  CmiNodeLock lock_numNeighborsRecv;
483  CmiNodeLock lock_recvAtoms;
484 
485  int atomI, forceI;
486 
487  //----------------------------------------------------------------------------------
488  // Book keeping
489  // NOTE: We keep two copies of pmeAtomStorage and homePatchIndexList so that forces can be
490  // merged while next patch of atoms is already being received
491  //----------------------------------------------------------------------------------
492  // Storage for each pencil on the yNBlocks x zNBlocks grid
493  std::vector< PmeAtomStorage* > pmeAtomStorage[2];
494  std::vector<bool> pmeAtomStorageAllocatedHere;
495 
496  // Size numHomePatches:
497  // Tells how many pencils have contributed to home patch
498  std::vector<int> numPencils[2];
499 
500  // Pencil location
501  struct PencilLocation {
502  // Pencil index
503  int pp;
504  // Patch location in the pencil
505  int pencilPatchIndex;
506  PencilLocation(int pp, int pencilPatchIndex) : pp(pp), pencilPatchIndex(pencilPatchIndex) {}
507  };
508 
509  // Size numHomePatches
510  std::vector< std::vector<PencilLocation> > plList[2];
511 
512  // Size numHomePatches
513  std::vector< PmeForceMsg* > homePatchForceMsgs[2];
514 
515  // // Size numHomePatches
516  // std::vector<int> numHomeAtoms[2];
517 
518  std::vector< std::vector<int> > homePatchIndexList[2];
519 
520  // Number of neighbors from which we have received atoms
521  int numNeighborsRecv;
522 
523  // Number of home patches we have received atom from
524  int numHomePatchesRecv;
525 
526  // Number of home patches we have merged forces for
527  int numHomePatchesMerged;
528 
529  // Size yNBlocks*zNBlocks
530  std::vector< PmeForcePencilMsg* > neighborForcePencilMsgs;
531  // std::vector< PmeForcePencil > neighborForcePencils;
532 
533  // Size yNBlocks*zNBlocks
534  std::vector<int> neighborPatchIndex;
535  //----------------------------------------------------------------------------------
536 
537  // CUDA stream
538  cudaStream_t stream;
539  bool streamCreated;
540  // Device ID
541  int deviceID;
542  // Charge spreading and force gathering
543 
544  // Used for alchemical transformation
545  int simulationStep;
546 
547  // multiple PmeRealSpaceCompute for multiple grids
548  std::array<PmeRealSpaceCompute*, NUM_GRID_MAX> pmeRealSpaceComputes;
549  std::array<bool, NUM_GRID_MAX> enabledGrid;
550  // Host memory force array
551  std::array<size_t, NUM_GRID_MAX> forceCapacities;
552  std::array<CudaForce*, NUM_GRID_MAX> forces;
553 
554  // Array for avoiding race conditions in force collection
555  // forceReady[i] == -1 => grid i is not enabled
556  // forceReady[i] == 0 => force is not ready
557  // forceReady[i] == 1 => force is ready
558  // For example, if only grid 1 and 2 are used , then forceReady is initialized to {0, 0, -1, -1, -1}
559  // When pmeRealSpaceComputes[i] calls gatherForceDone by the callback gatherForceSetCallback(this), forceReady[i] is set to 1
560  // The actual stuff or force merging that gatherForceDone does will launch when forceReady becomes {1, 1, -1, -1, -1}
561  // After that forceReady will be reset to {0, 0, -1, -1, -1} again.
562  std::array<int, NUM_GRID_MAX> forceReady;
563 
564  // Proxy for the manager
565  CProxy_ComputePmeCUDAMgr mgrProxy;
566 
567  // Atom filer proxy
568  CProxy_PmeAtomFiler pmeAtomFiler;
569 
570  // Pencil proxy
571  CProxy_CudaPmePencilXYZ pmePencilXYZ;
572  CProxy_CudaPmePencilXY pmePencilXY;
573  CProxy_CudaPmePencilX pmePencilX;
574 
575  // For event tracing
576  double beforeWalltime;
577 };
578 
579 class ComputePmeCUDAMgr : public CBase_ComputePmeCUDAMgr {
580 public:
583  ComputePmeCUDAMgr(CkMigrateMessage *);
585  void setupPencils();
586  void initialize(CkQdMsg *msg);
587  void initialize_pencils(CkQdMsg *msg);
588  void activate_pencils(CkQdMsg *msg);
589  PmeGrid getPmeGrid() {return pmeGrid;}
590  int getNode(int i, int j);
591  int getDevice(int i, int j);
592  int getDevicePencilY(int i, int j);
593  int getDevicePencilZ(int i, int j);
594  int getDeviceIDPencilX(int i, int j);
595  int getDeviceIDPencilY(int i, int j);
596  int getDeviceIDPencilZ(int i, int j);
597  void recvPencils(CProxy_CudaPmePencilXYZ xyz);
598  void recvPencils(CProxy_CudaPmePencilXY xy, CProxy_CudaPmePencilZ z);
599  void recvPencils(CProxy_CudaPmePencilX x, CProxy_CudaPmePencilY y, CProxy_CudaPmePencilZ z);
600 
602  void recvDevices(RecvDeviceMsg* msg);
603  void recvAtomFiler(CProxy_PmeAtomFiler filer);
604  void skip();
605  void recvAtoms(PmeAtomMsg *msg);
606  void getHomePencil(PatchID patchID, int& homey, int& homez);
607  int getHomeNode(PatchID patchID);
608 
609  bool isPmePe(int pe);
610  bool isPmeNode(int node);
611  bool isPmeDevice(int deviceID);
612 
614  CProxy_ComputePmeCUDAMgr mgrProxy(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
615  return mgrProxy.ckLocalBranch();
616  }
617 protected:
618 
619 private:
620  void restrictToMaxPMEPencils();
621 
622  // ---------------------------------------------
623  // For .ci file
624  // Counter for determining numDevicesMax
625  int numNodesContributed;
626  int numDevicesMax;
627 
628  // Number of home patches for each device on this manager
629  std::vector<int> numHomePatchesList;
630 
631  // Counter for "registerPatchDone"
632  int numTotalPatches;
633  // ---------------------------------------------
634 
635  // PME pencil type: 1=column, 2=slab, 3=box
636  int pmePencilType;
637 
638  PmeGrid pmeGrid;
639 
640  // Number of CUDA devices on this node that are used for PME computation
641  int numDevices;
642 
643  std::vector<int> xPes;
644  std::vector<int> yPes;
645  std::vector<int> zPes;
646 
647  // List of pencil coordinates (i,j) for each device held by this node
648  struct IJ {
649  int i, j;
650  };
651  std::vector<IJ> ijPencilX;
652  std::vector<IJ> ijPencilY;
653  std::vector<IJ> ijPencilZ;
654 
655  struct NodeDevice {
656  int node;
657  int device;
658  };
659  std::vector<NodeDevice> nodeDeviceList;
660 
661  // Atom filer proxy
662  CProxy_PmeAtomFiler pmeAtomFiler;
663 
664  // Device proxies
665  std::vector<CProxy_ComputePmeCUDADevice> deviceProxy;
666 
667  // Extra devices
668  struct ExtraDevice {
669  int deviceID;
670  cudaStream_t stream;
671  };
672  std::vector<ExtraDevice> extraDevices;
673 
674  // Pencil proxies
675  CProxy_CudaPmePencilXYZ pmePencilXYZ;
676  CProxy_CudaPmePencilXY pmePencilXY;
677  CProxy_CudaPmePencilX pmePencilX;
678  CProxy_CudaPmePencilY pmePencilY;
679  CProxy_CudaPmePencilZ pmePencilZ;
680 
681 };
682 #else // NAMD_CUDA
683 class ComputePmeCUDAMgr : public CBase_ComputePmeCUDAMgr {
684 };
685 #endif // NAMD_CUDA
686 
687 #endif // COMPUTEPMECUDAMGR_H
int getHomeNode(PatchID patchID)
float * chargeFactors3
void sendAtomsToNeighbor(int y, int z, int atomIval)
void recvForcesFromNeighbor(PmeForcePencilMsg *msg)
CProxy_ComputePmeCUDAMgr getMgrProxy()
void initialize(CkQdMsg *msg)
virial xy
ComputePmeCUDA * compute
void gatherForceDone(unsigned int iGrid)
int getDeviceIDPencilX(int i, int j)
void sendForcesToPatch(PmeForceMsg *forceMsg)
void setPencilProxy(CProxy_CudaPmePencilXYZ pmePencilXYZ_in)
void getHomePencil(PatchID patchID, int &homey, int &homez)
CudaForce * force3
int addAtomsWithIndex(const int natom, const CudaAtom *src, const int *index, const std::vector< float *> &lambdaArrays)
void recvAtomFiler(CProxy_PmeAtomFiler filer)
void recvAtoms(PmeAtomMsg *msg)
CudaForce * force4
int addAtoms(const int natom, const CudaAtom *src, const std::vector< float *> &lambdaArrays)
void setupAlch(const SimParameters &simParams)
float * chargeFactors1
std::vector< bool > enabledGrid
void mergeForcesOnPatch(int homePatchIndex)
PmeAtomStorage(const bool useIndex)
const unsigned int NUM_GRID_MAX
Definition: PmeSolverUtil.h:9
void recvDevices(RecvDeviceMsg *msg)
int getDeviceIDPencilZ(int i, int j)
std::vector< int > overflowAtomElecFactorCapacities
void fileAtoms(const int numAtoms, const CudaAtom *atoms, Lattice &lattice, const PmeGrid &pmeGrid, const int pencilIndexY, const int pencilIndexZ, const int ylo, const int yhi, const int zlo, const int zhi)
float * chargeFactors4
int getNumAtoms(int p)
CudaAtom * overflowAtom
void initialize(PmeGrid &pmeGrid_in, int pencilIndexY_in, int pencilIndexZ_in, int deviceID_in, int pmePencilType_in, CProxy_ComputePmeCUDAMgr mgrProxy_in, CProxy_PmeAtomFiler pmeAtomFiler_in)
static ComputePmeCUDAMgr * Object()
bool isGridEnabled(unsigned int i) const
void NAMD_bug(const char *err_msg)
Definition: common.C:195
int getDevicePencilZ(int i, int j)
CudaAtom * atoms
void recvAtomsFromNeighbor(PmeAtomPencilMsg *msg)
virtual ~PmeAtomStorage()
void initializePatches(int numHomePatches_in)
unsigned int totalFactorArrays
CudaForce * force
int getDevice(int i, int j)
float * chargeFactors5
int getDeviceIDPencilY(int i, int j)
void recvAtoms(PmeAtomMsg *msg)
#define simParams
Definition: Output.C:129
void activate_pencils(CkQdMsg *msg)
CudaForce * force2
CudaAtom * getAtoms()
CudaForce * force5
bool isPmeNode(int node)
int getNode(int i, int j)
void initialize_pencils(CkQdMsg *msg)
std::vector< float * > overflowAtomElecFactorArrays
ComputePmeCUDA * compute
int getDevicePencilY(int i, int j)
int * getAtomIndex(int p)
void recvPencils(CProxy_CudaPmePencilXYZ xyz)
float * chargeFactors2
CProxy_ComputePmeCUDADevice * dev
void gatherForceDoneSubset(int first, int last)
float * getAtomElecFactors(unsigned int iGrid)
NumDevicesMsg(int numDevices)
ComputePmeCUDA * compute
CudaForce * force
int32 PatchID
Definition: NamdTypes.h:277
bool isPmeDevice(int deviceID)
std::vector< float * > atomElecFactorArrays