NAMD
DeviceCUDA.h
Go to the documentation of this file.
1 #ifndef DEVICECUDA_H
2 #define DEVICECUDA_H
3 
4 #ifdef NAMD_CUDA
5 #include <cuda_runtime.h>
6 #ifdef NODEGROUP_FORCE_REGISTER
7 #ifdef NAMD_NCCL_ALLREDUCE
8 #include "nccl.h"
9 #endif
10 #endif
11 
12 #endif // NAMD_CUDA
13 
14 #ifdef NAMD_HIP
15 #include <hip/hip_runtime.h>
16 #ifdef NODEGROUP_FORCE_REGISTER
17 #ifdef NAMD_NCCL_ALLREDUCE
18 #include "rccl.h"
19 #endif
20 #endif
21 #endif // NAMD_HIP
22 
23 #include "HipDefines.h"
24 
25 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
26 
27 #define CUDA_PME_SPREADCHARGE_EVENT 90
28 #define CUDA_PME_GATHERFORCE_EVENT 91
29 #define CUDA_BONDED_KERNEL_EVENT 92
30 #define CUDA_DEBUG_EVENT 93
31 #define CUDA_NONBONDED_KERNEL_EVENT 94
32 #define CUDA_GBIS1_KERNEL_EVENT 95
33 #define CUDA_GBIS2_KERNEL_EVENT 96
34 #define CUDA_GBIS3_KERNEL_EVENT 97
35 
36 #define CUDA_EVENT_ID_POLL_REMOTE 98
37 #define CUDA_TRACE_POLL_REMOTE \
38  traceUserEvent(CUDA_EVENT_ID_POLL_REMOTE)
39 #define CUDA_EVENT_ID_POLL_LOCAL 99
40 #define CUDA_TRACE_POLL_LOCAL \
41  traceUserEvent(CUDA_EVENT_ID_POLL_LOCAL)
42 #define CUDA_EVENT_ID_BASE 100
43 #define CUDA_TRACE_REMOTE(START,END) \
44  do { int dev; cudaGetDevice(&dev); traceUserBracketEvent( \
45  CUDA_EVENT_ID_BASE + 2 * dev, START, END); } while (0)
46 #define CUDA_TRACE_LOCAL(START,END) \
47  do { int dev; cudaGetDevice(&dev); traceUserBracketEvent( \
48  CUDA_EVENT_ID_BASE + 2 * dev + 1, START, END); } while (0)
49 
50 
51 //
52 // Class that handles PE <=> CUDA device mapping
53 //
54 class DeviceCUDA {
55 
56 private:
57  // Command line argument settings
58  char *devicelist;
59  int usedevicelist;
60  int devicesperreplica;
61  int ignoresharing;
62  int mergegrids;
63  int nomergegrids;
64  int nostreaming;
65 
66  // Number of devices on this physical node
67  int deviceCount;
68 
69  // Number of devices on this physical node that are used for computation
70  int ndevices;
71 
72  // List of device IDs on this physical node that are used for computation
73  int *devices;
74 
75  // Number of devices that are used for computation by this node
76  int nnodedevices;
77 
78  // List of device IDs that are used for computation by this node
79  int *nodedevices;
80 
81  // True when GPU is shared between PEs
82  bool sharedGpu;
83  // Index of next GPU sharing this GPU
84  int nextPeSharingGpu;
85  // Index of the master PE for this GPU
86  int masterPe;
87  // Number of PEs that share this GPU
88  int numPesSharingDevice;
89  // List of PEs that share this GPU
90  int *pesSharingDevice;
91  // True when what???
92  int gpuIsMine;
93 
94  // Device ID for this Pe
95  int deviceID;
96 
97  // Device properties for all devices on this node
98  cudaDeviceProp* deviceProps;
99 
100  int deviceIndex; // Position of device in devices[] list
101 #ifdef NODEGROUP_FORCE_REGISTER
102 #ifdef NAMD_NCCL_ALLREDUCE
103  ncclUniqueId ncclId;
104  ncclComm_t ncclComm;
105 #endif
106 #endif
107  bool reservePme; // true if a device is doing only PME
108  int pmeDevice; // holds deviceID of device doing PME
109  int pmeDeviceIndex; // holds positions of the device doing PME on the devices[] list
110  bool isPmeDevice; // true if pmeDevice == deviceID
111  int masterDevice; // holds index of device that should do I/O on CUDASOA
112  bool isMasterDevice; // true if this device should do IO
113  int globalDevice;
114  bool isGlobalDevice;
115 
116  void register_user_events();
117 
118 public:
119  DeviceCUDA();
120  ~DeviceCUDA();
121 
122  void initialize();
123 
124  int getDeviceCount() {return deviceCount;}
125  int getNumDevice() {return nnodedevices;}
126 
127  bool device_shared_with_pe(int pe);
128  bool one_device_per_node();
129 
130  int getNoStreaming() {return nostreaming;}
131  int getNoMergeGrids() {return nomergegrids;}
132  int getMergeGrids() {return mergegrids;}
133  void setMergeGrids(const int val) {mergegrids = val;}
134 
135  bool getSharedGpu() {return sharedGpu;}
136  int getNextPeSharingGpu() {return nextPeSharingGpu;}
137  int getMasterPe() {return masterPe;}
138  int getNumPesSharingDevice() {return numPesSharingDevice;}
139  int getPesSharingDevice(const int i) {return pesSharingDevice[i];}
140 
141  int getGpuIsMine() {return gpuIsMine;}
142  void setGpuIsMine(const int val) {gpuIsMine = val;}
143 
144  int getDeviceID() {return deviceID;}
145  int getDeviceIDbyRank(int rank) {return nodedevices[rank];}
146  int getDeviceIDforPe(int pe);
147  int getMasterPeForDeviceID(int deviceID);
148 
149  int getMaxNumThreads();
150  int getMaxNumBlocks();
151 
152  void setupDevicePeerAccess();
153 
154 #ifdef NODEGROUP_FORCE_REGISTER
155 #ifdef NAMD_NCCL_ALLREDUCE
156  ncclUniqueId getNcclUniqueId(){ return ncclId; }
157  ncclComm_t getNcclComm(){return ncclComm; }
158  void setNcclUniqueId(ncclUniqueId &other) { ncclId = other;}
159  void setupNcclUniqueId(); // this one gets called by rank 0
160  void setupNcclComm();
161 #endif
162 #endif
163 
164  bool isGpuReservedPme() { return reservePme; }
165  int getPmeDevice() { return pmeDevice; }
166  int getDeviceIndex() { return deviceIndex; }
167  int getPmeDeviceIndex() { return pmeDeviceIndex; }
168  bool getIsPmeDevice() { return isPmeDevice; }
169  bool getIsMasterDevice();
170 
171  int getGlobalDevice() const {return globalDevice;}
172  bool getIsGlobalDevice() const {return isGlobalDevice;}
173  const int* allDevices() const {return devices;}
174 };
175 #endif //NAMD_CUDA
176 
177 #endif // DEVICECUDA_H
void initialize()
Definition: DeviceCUDA.C:107
int getDeviceCount()
Definition: DeviceCUDA.h:124
int getMaxNumThreads()
Definition: DeviceCUDA.C:558
bool getSharedGpu()
Definition: DeviceCUDA.h:135
void setMergeGrids(const int val)
Definition: DeviceCUDA.h:133
int getPmeDevice()
Definition: DeviceCUDA.h:165
int getNumDevice()
Definition: DeviceCUDA.h:125
int getMergeGrids()
Definition: DeviceCUDA.h:132
void setupDevicePeerAccess()
int getGlobalDevice() const
Definition: DeviceCUDA.h:171
int getPmeDeviceIndex()
Definition: DeviceCUDA.h:167
void setGpuIsMine(const int val)
Definition: DeviceCUDA.h:142
int getMasterPe()
Definition: DeviceCUDA.h:137
int getMasterPeForDeviceID(int deviceID)
Definition: DeviceCUDA.C:530
bool device_shared_with_pe(int pe)
Definition: DeviceCUDA.C:537
bool getIsGlobalDevice() const
Definition: DeviceCUDA.h:172
int getPesSharingDevice(const int i)
Definition: DeviceCUDA.h:139
int getMaxNumBlocks()
Definition: DeviceCUDA.C:564
const int * allDevices() const
Definition: DeviceCUDA.h:173
bool isGpuReservedPme()
Definition: DeviceCUDA.h:164
int getDeviceID()
Definition: DeviceCUDA.h:144
int getNoMergeGrids()
Definition: DeviceCUDA.h:131
int getNextPeSharingGpu()
Definition: DeviceCUDA.h:136
int getDeviceIDbyRank(int rank)
Definition: DeviceCUDA.h:145
bool getIsPmeDevice()
Definition: DeviceCUDA.h:168
int getNoStreaming()
Definition: DeviceCUDA.h:130
int getDeviceIndex()
Definition: DeviceCUDA.h:166
bool getIsMasterDevice()
int getGpuIsMine()
Definition: DeviceCUDA.h:141
bool one_device_per_node()
Definition: DeviceCUDA.C:547
int getDeviceIDforPe(int pe)
Definition: DeviceCUDA.C:523
int getNumPesSharingDevice()
Definition: DeviceCUDA.h:138