NAMD
Public Member Functions | Static Public Member Functions | List of all members
ComputeCUDAMgr Class Reference

#include <ComputeCUDAMgr.h>

Inheritance diagram for ComputeCUDAMgr:

Public Member Functions

 ComputeCUDAMgr ()
 
 ComputeCUDAMgr (CkMigrateMessage *)
 
 ~ComputeCUDAMgr ()
 
void initialize (CkQdMsg *msg)
 
void initialize_devices (CkQdMsg *msg)
 
void update ()
 
CudaComputeNonbondedcreateCudaComputeNonbonded (ComputeID c)
 
CudaComputeNonbondedgetCudaComputeNonbonded ()
 
CudaPmeOneDevicecreateCudaPmeOneDevice ()
 
CudaPmeOneDevicegetCudaPmeOneDevice ()
 
std::shared_ptr< CudaGlobalMasterServercreateCudaGlobalMaster ()
 
std::shared_ptr< CudaGlobalMasterServergetCudaGlobalMaster ()
 

Static Public Member Functions

static ComputeCUDAMgrgetComputeCUDAMgr ()
 

Detailed Description

Definition at line 16 of file ComputeCUDAMgr.h.

Constructor & Destructor Documentation

◆ ComputeCUDAMgr() [1/2]

ComputeCUDAMgr::ComputeCUDAMgr ( )

Definition at line 18 of file ComputeCUDAMgr.C.

18  {
19  // __sdag_init();
20  numDevices = 0;
21  // numNodesContributed = 0;
22  // numDevicesMax = 0;
23  cudaPmeOneDevice = NULL; // XXX is this needed?
24 #ifdef NAMD_CUDA
25  cudaGlobalMasterObject = nullptr;
26 #endif // NAMD_CUDA
27 }

◆ ComputeCUDAMgr() [2/2]

ComputeCUDAMgr::ComputeCUDAMgr ( CkMigrateMessage *  )

Definition at line 32 of file ComputeCUDAMgr.C.

References NAMD_bug().

32  {
33  // __sdag_init();
34  NAMD_bug("ComputeCUDAMgr cannot be migrated");
35  numDevices = 0;
36  // numNodesContributed = 0;
37  // numDevicesMax = 0;
38  cudaPmeOneDevice = NULL; // XXX is this needed?
39 #ifdef NAMD_CUDA
40  cudaGlobalMasterObject = nullptr;
41 #endif // NAMD_CUDA
42 }
void NAMD_bug(const char *err_msg)
Definition: common.C:195

◆ ~ComputeCUDAMgr()

ComputeCUDAMgr::~ComputeCUDAMgr ( )

Definition at line 47 of file ComputeCUDAMgr.C.

47  {
48  for (int i=0;i < numDevices;i++) {
49  if (cudaNonbondedTablesList[i] != NULL) delete cudaNonbondedTablesList[i];
50  if (cudaComputeNonbondedList[i] != NULL) delete cudaComputeNonbondedList[i];
51 #ifdef BONDED_CUDA
52  if (computeBondedCUDAList[i] != NULL) delete computeBondedCUDAList[i];
53 #endif
54  }
55  delete cudaPmeOneDevice;
56 }

Member Function Documentation

◆ createCudaComputeNonbonded()

CudaComputeNonbonded * ComputeCUDAMgr::createCudaComputeNonbonded ( ComputeID  c)

Definition at line 238 of file ComputeCUDAMgr.C.

References SimParameters::CUDASOAintegrate, deviceCUDA, SimParameters::GBISOn, DeviceCUDA::getDeviceID(), DeviceCUDA::getNoStreaming(), NAMD_bug(), Node::Object(), and Node::simParameters.

Referenced by createCudaComputeNonbonded().

238  {
239  int deviceID = deviceCUDA->getDeviceID();
240  if (cudaComputeNonbondedList.at(deviceID) != NULL)
241  NAMD_bug("ComputeCUDAMgr::createCudaComputeNonbonded called twice");
242  if (cudaNonbondedTablesList.at(deviceID) == NULL)
243  NAMD_bug("ComputeCUDAMgr::createCudaComputeNonbonded, non-bonded CUDA tables not created");
244  //bool doStreaming = !deviceCUDA->getNoStreaming() && !Node::Object()->simParameters->GBISOn && !Node::Object()->simParameters->CUDASOAintegrate;
246  cudaComputeNonbondedList[deviceID] = new CudaComputeNonbonded(c, deviceID, *cudaNonbondedTablesList[deviceID], doStreaming);
247  return cudaComputeNonbondedList[deviceID];
248 }
static Node * Object()
Definition: Node.h:86
SimParameters * simParameters
Definition: Node.h:181
void NAMD_bug(const char *err_msg)
Definition: common.C:195
int getDeviceID()
Definition: DeviceCUDA.h:144
int getNoStreaming()
Definition: DeviceCUDA.h:130
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23

◆ createCudaGlobalMaster()

std::shared_ptr< CudaGlobalMasterServer > ComputeCUDAMgr::createCudaGlobalMaster ( )

Definition at line 294 of file ComputeCUDAMgr.C.

References deviceCUDA, endi(), DeviceCUDA::getGlobalDevice(), iINFO(), iout, Node::Object(), Node::simParameters, and simParams.

Referenced by ComputeMgr::recvCudaGlobalMasterCreateMsg().

294  {
295  iout << iINFO << "Creating CUDAGlobalMaster on PE " << CkMyPe() << '\n' << endi;
296  if (cudaGlobalMasterObject) {
297  return cudaGlobalMasterObject;
298  }
299  const int deviceID = deviceCUDA->getGlobalDevice();
301  cudaGlobalMasterObject = std::make_shared<CudaGlobalMasterServer>(deviceID, simParams->cudaGlobalProfilingFreq);
302  return cudaGlobalMasterObject;
303 }
static Node * Object()
Definition: Node.h:86
std::ostream & iINFO(std::ostream &s)
Definition: InfoStream.C:81
SimParameters * simParameters
Definition: Node.h:181
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
#define iout
Definition: InfoStream.h:51
int getGlobalDevice() const
Definition: DeviceCUDA.h:171
#define simParams
Definition: Output.C:129
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23

◆ createCudaPmeOneDevice()

CudaPmeOneDevice * ComputeCUDAMgr::createCudaPmeOneDevice ( )

Definition at line 202 of file ComputeCUDAMgr.C.

References PmeGrid::block1, PmeGrid::block2, PmeGrid::block3, deviceCUDA, PmeGrid::dim2, PmeGrid::dim3, DeviceCUDA::getPmeDevice(), DeviceCUDA::getPmeDeviceIndex(), PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, Node::Object(), PmeGrid::order, Node::simParameters, simParams, PmeGrid::xBlocks, PmeGrid::yBlocks, and PmeGrid::zBlocks.

202  {
203  // initialize pmeGrid from simParams
205  PmeGrid pmeGrid;
206  pmeGrid.K1 = simParams->PMEGridSizeX;
207  pmeGrid.K2 = simParams->PMEGridSizeY;
208  pmeGrid.K3 = simParams->PMEGridSizeZ;
209  pmeGrid.order = simParams->PMEInterpOrder;
210  pmeGrid.dim2 = pmeGrid.K2;
211  pmeGrid.dim3 = 2 * (pmeGrid.K3/2 + 1);
212  // override settings for PME pencils
213  pmeGrid.xBlocks = 1;
214  pmeGrid.yBlocks = 1;
215  pmeGrid.zBlocks = 1;
216  pmeGrid.block1 = pmeGrid.K1;
217  pmeGrid.block2 = pmeGrid.K2;
218  pmeGrid.block3 = pmeGrid.K3;
219  // use shared deviceID class
220  int deviceID = 0;
221  int deviceIndex = 0;
222 #ifdef NODEGROUP_FORCE_REGISTER
223  deviceID = deviceCUDA->getPmeDevice();
224  deviceIndex = deviceCUDA->getPmeDeviceIndex();
225 #endif
226  if (cudaPmeOneDevice != NULL) delete cudaPmeOneDevice;
227  cudaPmeOneDevice = new CudaPmeOneDevice(pmeGrid, deviceID, deviceIndex);
228  return cudaPmeOneDevice;
229 }
static Node * Object()
Definition: Node.h:86
int dim2
Definition: PmeBase.h:22
int zBlocks
Definition: PmeBase.h:25
int dim3
Definition: PmeBase.h:22
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
int getPmeDevice()
Definition: DeviceCUDA.h:165
int block1
Definition: PmeBase.h:24
int block2
Definition: PmeBase.h:24
int yBlocks
Definition: PmeBase.h:25
int getPmeDeviceIndex()
Definition: DeviceCUDA.h:167
int order
Definition: PmeBase.h:23
int block3
Definition: PmeBase.h:24
#define simParams
Definition: Output.C:129
int K3
Definition: PmeBase.h:21
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23
int xBlocks
Definition: PmeBase.h:25

◆ getComputeCUDAMgr()

ComputeCUDAMgr * ComputeCUDAMgr::getComputeCUDAMgr ( )
static

Definition at line 193 of file ComputeCUDAMgr.C.

References NAMD_bug().

Referenced by createCudaComputeNonbonded(), getCudaComputeNonbonded(), ComputeMgr::recvCudaGlobalMasterCreateMsg(), ComputeMgr::recvCudaGlobalMasterRemoveMsg(), ComputeMgr::recvCudaGlobalMasterUpdateMsg(), and SimParameters::scriptSet().

193  {
194  // Get pointer to ComputeCUDAMgr on this node
195  CProxy_ComputeCUDAMgr computeCUDAMgrProxy = CkpvAccess(BOCclass_group).computeCUDAMgr;
196  ComputeCUDAMgr* computeCUDAMgr = computeCUDAMgrProxy.ckLocalBranch();
197  if (computeCUDAMgr == NULL)
198  NAMD_bug("getComputeCUDAMgr, unable to locate local branch of BOC entry ComputeCUDAMgr");
199  return computeCUDAMgr;
200 }
void NAMD_bug(const char *err_msg)
Definition: common.C:195

◆ getCudaComputeNonbonded()

CudaComputeNonbonded * ComputeCUDAMgr::getCudaComputeNonbonded ( )

Definition at line 253 of file ComputeCUDAMgr.C.

References deviceCUDA, DeviceCUDA::getDeviceID(), and NAMD_bug().

Referenced by getCudaComputeNonbonded().

253  {
254  // Get device ID for this Pe
255  int deviceID = deviceCUDA->getDeviceID();
256  CudaComputeNonbonded* p = cudaComputeNonbondedList[deviceID];
257  if (p == NULL)
258  NAMD_bug("ComputeCUDAMgr::getCudaComputeNonbonded(), device not created yet");
259  return p;
260 }
void NAMD_bug(const char *err_msg)
Definition: common.C:195
int getDeviceID()
Definition: DeviceCUDA.h:144
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23

◆ getCudaGlobalMaster()

std::shared_ptr< CudaGlobalMasterServer > ComputeCUDAMgr::getCudaGlobalMaster ( )

Definition at line 290 of file ComputeCUDAMgr.C.

Referenced by ComputeMgr::recvCudaGlobalMasterCreateMsg(), ComputeMgr::recvCudaGlobalMasterRemoveMsg(), and ComputeMgr::recvCudaGlobalMasterUpdateMsg().

290  {
291  return cudaGlobalMasterObject;
292 }

◆ getCudaPmeOneDevice()

CudaPmeOneDevice * ComputeCUDAMgr::getCudaPmeOneDevice ( )

Definition at line 231 of file ComputeCUDAMgr.C.

231  {
232  return cudaPmeOneDevice;
233 }

◆ initialize()

void ComputeCUDAMgr::initialize ( CkQdMsg *  msg)

Definition at line 62 of file ComputeCUDAMgr.C.

References cudaCheck, deviceCUDA, DeviceCUDA::getDeviceCount(), DeviceCUDA::getDeviceIDbyRank(), DeviceCUDA::getNumDevice(), and DeviceCUDA::isGpuReservedPme().

62  {
63  if (msg != NULL) delete msg;
64 
65  numDevices = deviceCUDA->getDeviceCount();
66 #ifdef NODEGROUP_FORCE_REGISTER
67  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
68  PatchData *pdata = cpdata.ckLocalBranch();
69  int ndevs = deviceCUDA->getNumDevice() + 1*deviceCUDA->isGpuReservedPme();
70  pdata->devData.resize(numDevices);
71 
72  {
73  // Pointers to SOA integration data
74  allocate_host<bool*>(&(pdata->h_devHasForces),ndevs);
75  allocate_host<double*>(&(pdata->h_soa_fb_x), ndevs);
76  allocate_host<double*>(&(pdata->h_soa_fb_y), ndevs);
77  allocate_host<double*>(&(pdata->h_soa_fb_z), ndevs);
78 
79  allocate_host<double*>(&(pdata->h_soa_fn_x), ndevs);
80  allocate_host<double*>(&(pdata->h_soa_fn_y), ndevs);
81  allocate_host<double*>(&(pdata->h_soa_fn_z), ndevs);
82 
83  allocate_host<double*>(&(pdata->h_soa_fs_x), ndevs);
84  allocate_host<double*>(&(pdata->h_soa_fs_y), ndevs);
85  allocate_host<double*>(&(pdata->h_soa_fs_z), ndevs);
86 
87  allocate_host<double*>(&(pdata->h_soa_pos_x), ndevs);
88  allocate_host<double*>(&(pdata->h_soa_pos_y), ndevs);
89  allocate_host<double*>(&(pdata->h_soa_pos_z), ndevs);
90 
91  allocate_host<double*>(&(pdata->h_soa_vel_x), deviceCUDA->getNumDevice());
92  allocate_host<double*>(&(pdata->h_soa_vel_y), deviceCUDA->getNumDevice());
93  allocate_host<double*>(&(pdata->h_soa_vel_z), deviceCUDA->getNumDevice());
94 
95  allocate_host<float*> (&(pdata->h_soa_charge), deviceCUDA->getNumDevice());
96 
97  allocate_host<int*> (&(pdata->h_soa_id), deviceCUDA->getNumDevice());
98  allocate_host<int*> (&(pdata->h_soa_vdwType), deviceCUDA->getNumDevice());
99  allocate_host<int*> (&(pdata->h_soa_sortOrder), deviceCUDA->getNumDevice());
100  allocate_host<int*> (&(pdata->h_soa_unsortOrder), deviceCUDA->getNumDevice());
101  allocate_host<double3*>(&(pdata->h_soa_patchCenter), deviceCUDA->getNumDevice());
102  allocate_host<int4*> (&(pdata->h_soa_migrationDestination), deviceCUDA->getNumDevice());
103  allocate_host<int*> (&(pdata->h_soa_sortSoluteIndex), deviceCUDA->getNumDevice());
104 
105  allocate_host<int*> (&(pdata->h_soa_partition), deviceCUDA->getNumDevice());
106 
107  allocate_host<FullAtom*>(&(pdata->h_atomdata_AoS), deviceCUDA->getNumDevice());
108  allocate_host<CudaLocalRecord*>(&(pdata->h_peer_record), deviceCUDA->getNumDevice());
109 
110  allocate_host<int*>(&(pdata->h_tupleCount.bond), deviceCUDA->getNumDevice());
111  allocate_host<int*>(&(pdata->h_tupleCount.angle), deviceCUDA->getNumDevice());
112  allocate_host<int*>(&(pdata->h_tupleCount.dihedral), deviceCUDA->getNumDevice());
113  allocate_host<int*>(&(pdata->h_tupleCount.improper), deviceCUDA->getNumDevice());
114  allocate_host<int*>(&(pdata->h_tupleCount.modifiedExclusion), deviceCUDA->getNumDevice());
115  allocate_host<int*>(&(pdata->h_tupleCount.exclusion), deviceCUDA->getNumDevice());
116  allocate_host<int*>(&(pdata->h_tupleCount.crossterm), deviceCUDA->getNumDevice());
117 
118  allocate_host<int*>(&(pdata->h_tupleOffset.bond), deviceCUDA->getNumDevice());
119  allocate_host<int*>(&(pdata->h_tupleOffset.angle), deviceCUDA->getNumDevice());
120  allocate_host<int*>(&(pdata->h_tupleOffset.dihedral), deviceCUDA->getNumDevice());
121  allocate_host<int*>(&(pdata->h_tupleOffset.improper), deviceCUDA->getNumDevice());
122  allocate_host<int*>(&(pdata->h_tupleOffset.modifiedExclusion), deviceCUDA->getNumDevice());
123  allocate_host<int*>(&(pdata->h_tupleOffset.exclusion), deviceCUDA->getNumDevice());
124  allocate_host<int*>(&(pdata->h_tupleOffset.crossterm), deviceCUDA->getNumDevice());
125 
126  allocate_host<CudaBondStage*>(&(pdata->h_tupleDataStage.bond), deviceCUDA->getNumDevice());
127  allocate_host<CudaAngleStage*>(&(pdata->h_tupleDataStage.angle), deviceCUDA->getNumDevice());
128  allocate_host<CudaDihedralStage*>(&(pdata->h_tupleDataStage.dihedral), deviceCUDA->getNumDevice());
129  allocate_host<CudaDihedralStage*>(&(pdata->h_tupleDataStage.improper), deviceCUDA->getNumDevice());
130  allocate_host<CudaExclusionStage*>(&(pdata->h_tupleDataStage.modifiedExclusion), deviceCUDA->getNumDevice());
131  allocate_host<CudaExclusionStage*>(&(pdata->h_tupleDataStage.exclusion), deviceCUDA->getNumDevice());
132  allocate_host<CudaCrosstermStage*>(&(pdata->h_tupleDataStage.crossterm), deviceCUDA->getNumDevice());
133  }
134 
135  // Allocate the work queues
136  allocate_host<unsigned int*>(&(pdata->d_queues), ndevs);
137  allocate_host<unsigned int>(&(pdata->d_queueCounters), ndevs);
138 
139  cudaCheck(cudaMemset(pdata->d_queueCounters, 0, sizeof(unsigned int)*ndevs));
140 
141  pdata->migrationFlagPerDevice.resize(deviceCUDA->getNumDevice());
142 
143  pdata->tupleReallocationFlagPerDevice.resize(deviceCUDA->getNumDevice());
144  pdata->atomReallocationFlagPerDevice.resize(deviceCUDA->getNumDevice());
145  pdata->maxNumBonds.store(0);
146  pdata->maxNumAngles.store(0);
147  pdata->maxNumDihedrals.store(0);
148  pdata->maxNumImpropers.store(0);
149  pdata->maxNumModifiedExclusions.store(0);
150  pdata->maxNumExclusions.store(0);
151  pdata->maxNumCrossterms.store(0);
152  pdata->devicePatchMapFlag.resize(CkNumPes(), 0);
153 #ifdef NAMD_NCCL_ALLREDUCE
154  // Allocate NCCL-related stuff
155  deviceCUDA->setupNcclUniqueId();
156  // After I do this, I can go ahead and register it in patchData
157  pdata->ncclId = deviceCUDA->getNcclUniqueId(); // registered in ngroup
158 #endif
159 #endif
160 
161  // Create pointers to devices
162  cudaNonbondedTablesList.resize(numDevices, NULL);
163  cudaComputeNonbondedList.resize(numDevices, NULL);
164 #ifdef BONDED_CUDA
165  computeBondedCUDAList.resize(numDevices, NULL);
166 #endif
167  if (cudaPmeOneDevice != NULL) delete cudaPmeOneDevice;
168  cudaPmeOneDevice = NULL;
169 
170  // Create CUDA non-bonded tables for all devices that are used for computation
171  for (int i=0;i < deviceCUDA->getNumDevice();i++) {
172  int deviceID = deviceCUDA->getDeviceIDbyRank(i);
173  cudaNonbondedTablesList[deviceID] = new CudaNonbondedTables(deviceID);
174  }
175 
176 
177 
178 }
int getDeviceCount()
Definition: DeviceCUDA.h:124
int getNumDevice()
Definition: DeviceCUDA.h:125
bool isGpuReservedPme()
Definition: DeviceCUDA.h:164
int getDeviceIDbyRank(int rank)
Definition: DeviceCUDA.h:145
#define cudaCheck(stmt)
Definition: CudaUtils.h:233
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23

◆ initialize_devices()

void ComputeCUDAMgr::initialize_devices ( CkQdMsg *  msg)

◆ update()

void ComputeCUDAMgr::update ( )

Definition at line 184 of file ComputeCUDAMgr.C.

References deviceCUDA, DeviceCUDA::getDeviceIDbyRank(), DeviceCUDA::getNumDevice(), and NAMD_bug().

Referenced by SimParameters::scriptSet().

184  {
185  if ( CkMyRank() ) NAMD_bug("ComputeCUDAMgr::update() should be called only by rank 0");
186  for (int i=0; i < deviceCUDA->getNumDevice(); i++) {
187  int deviceID = deviceCUDA->getDeviceIDbyRank(i);
188  // calls update function from CudaNonbondedTables
189  cudaNonbondedTablesList[deviceID]->updateTables();
190  }
191 }
int getNumDevice()
Definition: DeviceCUDA.h:125
void NAMD_bug(const char *err_msg)
Definition: common.C:195
int getDeviceIDbyRank(int rank)
Definition: DeviceCUDA.h:145
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23

The documentation for this class was generated from the following files: