4 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
6 #include <cuda_runtime.h>
10 #include <hip/hip_runtime.h>
12 #define CUDA_PME_SPREADCHARGE_EVENT 90
13 #define CUDA_PME_GATHERFORCE_EVENT 91
14 #define CUDA_BONDED_KERNEL_EVENT 92
15 #define CUDA_DEBUG_EVENT 93
16 #define CUDA_NONBONDED_KERNEL_EVENT 94
17 #define CUDA_GBIS1_KERNEL_EVENT 95
18 #define CUDA_GBIS2_KERNEL_EVENT 96
19 #define CUDA_GBIS3_KERNEL_EVENT 97
21 #define CUDA_EVENT_ID_POLL_REMOTE 98
22 #define CUDA_TRACE_POLL_REMOTE \
23 traceUserEvent(CUDA_EVENT_ID_POLL_REMOTE)
24 #define CUDA_EVENT_ID_POLL_LOCAL 99
25 #define CUDA_TRACE_POLL_LOCAL \
26 traceUserEvent(CUDA_EVENT_ID_POLL_LOCAL)
27 #define CUDA_EVENT_ID_BASE 100
28 #define CUDA_TRACE_REMOTE(START,END) \
29 do { int dev; cudaGetDevice(&dev); traceUserBracketEvent( \
30 CUDA_EVENT_ID_BASE + 2 * dev, START, END); } while (0)
31 #define CUDA_TRACE_LOCAL(START,END) \
32 do { int dev; cudaGetDevice(&dev); traceUserBracketEvent( \
33 CUDA_EVENT_ID_BASE + 2 * dev + 1, START, END); } while (0)
44 int devicesperreplica;
72 int numPesSharingDevice;
74 int *pesSharingDevice;
82 cudaDeviceProp* deviceProps;
84 void register_user_events();
122 #endif // DEVICECUDA_H
void setMergeGrids(const int val)
void setGpuIsMine(const int val)
int getMasterPeForDeviceID(int deviceID)
bool device_shared_with_pe(int pe)
int getPesSharingDevice(const int i)
int getNextPeSharingGpu()
int getDeviceIDbyRank(int rank)
bool one_device_per_node()
int getDeviceIDforPe(int pe)
int getNumPesSharingDevice()