version 1.3 | version 1.4 |
---|
| |
#ifndef DEVICECUDA_H | #ifndef DEVICECUDA_H |
#define DEVICECUDA_H | #define DEVICECUDA_H |
| |
| #ifdef NAMD_CUDA |
| #include <cuda_runtime.h> |
| |
#define CUDA_EVENT_ID_POLL_REMOTE 98 | #define CUDA_EVENT_ID_POLL_REMOTE 98 |
#define CUDA_TRACE_POLL_REMOTE \ | #define CUDA_TRACE_POLL_REMOTE \ |
traceUserEvent(CUDA_EVENT_ID_POLL_REMOTE) | traceUserEvent(CUDA_EVENT_ID_POLL_REMOTE) |
| |
do { int dev; cudaGetDevice(&dev); traceUserBracketEvent( \ | do { int dev; cudaGetDevice(&dev); traceUserBracketEvent( \ |
CUDA_EVENT_ID_BASE + 2 * dev + 1, START, END); } while (0) | CUDA_EVENT_ID_BASE + 2 * dev + 1, START, END); } while (0) |
| |
#ifdef WIN32 | |
#define __thread __declspec(thread) | |
#endif | |
| |
// | // |
// Class that handles PE <=> CUDA device mapping | // Class that handles PE <=> CUDA device mapping |
// | // |
| |
int nomergegrids; | int nomergegrids; |
int nostreaming; | int nostreaming; |
| |
| // Number of devices on this node |
| int deviceCount; |
| |
| // Number of devices on this node that are used for computation |
| int ndevices; |
| |
| // List of device IDs that are used for computation |
| int *devices; |
| |
// True when GPU is shared between PEs | // True when GPU is shared between PEs |
bool sharedGpu; | bool sharedGpu; |
// Index of next GPU sharing this GPU | // Index of next GPU sharing this GPU |
| |
int *pesSharingDevice; | int *pesSharingDevice; |
// True when what??? | // True when what??? |
int gpuIsMine; | int gpuIsMine; |
| // Value of __CUDA_ARCH__ |
| int cuda_arch; |
| |
| // Device ID for this Pe |
| int deviceID; |
| |
| // Device properties for all devices on this node |
| cudaDeviceProp* deviceProps; |
| |
void register_user_events(); | void register_user_events(); |
| |
| |
| |
void initialize(); | void initialize(); |
| |
| int getDeviceCount() {return deviceCount;} |
| int getNumDevice() {return ndevices;} |
| |
bool device_shared_with_pe(int pe); | bool device_shared_with_pe(int pe); |
bool one_device_per_node(); | bool one_device_per_node(); |
| |
| |
| |
int getGpuIsMine() {return gpuIsMine;} | int getGpuIsMine() {return gpuIsMine;} |
void setGpuIsMine(const int val) {gpuIsMine = val;} | void setGpuIsMine(const int val) {gpuIsMine = val;} |
| |
| int get_cuda_arch() {return cuda_arch;} |
| |
| int getDeviceID() {return deviceID;} |
| int getDeviceIDbyRank(int rank) {return devices[rank];} |
| int getDeviceIDforPe(int pe); |
| int getMasterPeForDeviceID(int deviceID); |
| |
| int getMaxNumThreads(); |
| int getMaxNumBlocks(); |
}; | }; |
| #endif //NAMD_CUDA |
| |
#endif // DEVICECUDA_H | #endif // DEVICECUDA_H |