5 #include <cuda_runtime.h> 6 #ifdef NODEGROUP_FORCE_REGISTER 7 #ifdef NAMD_NCCL_ALLREDUCE 15 #include <hip/hip_runtime.h> 16 #ifdef NODEGROUP_FORCE_REGISTER 17 #ifdef NAMD_NCCL_ALLREDUCE 25 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 27 #define CUDA_PME_SPREADCHARGE_EVENT 90 28 #define CUDA_PME_GATHERFORCE_EVENT 91 29 #define CUDA_BONDED_KERNEL_EVENT 92 30 #define CUDA_DEBUG_EVENT 93 31 #define CUDA_NONBONDED_KERNEL_EVENT 94 32 #define CUDA_GBIS1_KERNEL_EVENT 95 33 #define CUDA_GBIS2_KERNEL_EVENT 96 34 #define CUDA_GBIS3_KERNEL_EVENT 97 36 #define CUDA_EVENT_ID_POLL_REMOTE 98 37 #define CUDA_TRACE_POLL_REMOTE \ 38 traceUserEvent(CUDA_EVENT_ID_POLL_REMOTE) 39 #define CUDA_EVENT_ID_POLL_LOCAL 99 40 #define CUDA_TRACE_POLL_LOCAL \ 41 traceUserEvent(CUDA_EVENT_ID_POLL_LOCAL) 42 #define CUDA_EVENT_ID_BASE 100 43 #define CUDA_TRACE_REMOTE(START,END) \ 44 do { int dev; cudaGetDevice(&dev); traceUserBracketEvent( \ 45 CUDA_EVENT_ID_BASE + 2 * dev, START, END); } while (0) 46 #define CUDA_TRACE_LOCAL(START,END) \ 47 do { int dev; cudaGetDevice(&dev); traceUserBracketEvent( \ 48 CUDA_EVENT_ID_BASE + 2 * dev + 1, START, END); } while (0) 60 int devicesperreplica;
88 int numPesSharingDevice;
90 int *pesSharingDevice;
98 cudaDeviceProp* deviceProps;
101 #ifdef NODEGROUP_FORCE_REGISTER 102 #ifdef NAMD_NCCL_ALLREDUCE 116 void register_user_events();
154 #ifdef NODEGROUP_FORCE_REGISTER 155 #ifdef NAMD_NCCL_ALLREDUCE 156 ncclUniqueId getNcclUniqueId(){
return ncclId; }
157 ncclComm_t getNcclComm(){
return ncclComm; }
158 void setNcclUniqueId(ncclUniqueId &other) { ncclId = other;}
159 void setupNcclUniqueId();
160 void setupNcclComm();
177 #endif // DEVICECUDA_H
void setMergeGrids(const int val)
void setupDevicePeerAccess()
int getGlobalDevice() const
void setGpuIsMine(const int val)
int getMasterPeForDeviceID(int deviceID)
bool device_shared_with_pe(int pe)
bool getIsGlobalDevice() const
int getPesSharingDevice(const int i)
const int * allDevices() const
int getNextPeSharingGpu()
int getDeviceIDbyRank(int rank)
bool one_device_per_node()
int getDeviceIDforPe(int pe)
int getNumPesSharingDevice()