NAMD
GlobalGPUMgr.h
Go to the documentation of this file.
1 #ifndef GLOBAL_GPU_MGR_H
2 #define GLOBAL_GPU_MGR_H
3 
4 #include "charm++.h"
5 
6 #include "main.h"
7 #include "NamdTypes.h"
8 #include "ProcessorPrivate.h"
9 #include "CudaRecord.h"
10 #include "CudaUtils.h"
11 
12 #include "GlobalGPUMgr.decl.h"
13 
14 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
15 
16 /*
17  * @brief RunWithAffinity is used to run a block of code on a specific CPU core
18  *
19  * Creating a RunWithAffinity object will change the affinity of the thread to the given
20  * CPU core until the object goes out of scope. When the object goes out of scope
21  * and the destructor is called, the original affinity of the thread will be
22  * restored.
23  *
24  * This is useful when initializing libraries that create their own threads which
25  * inherit affinity from this thread. We can temporarily change the affinity of
26  * this thread, initialize the library, and then change this thread's affinity
27  * back.
28  *
29  */
31 {
32 public:
33  RunWithAffinity(const bool valid, const int tempCpu);
35 
36  static constexpr int kCudaOffset = 0;
37  static constexpr int kNvshmemOffset = 1;
38  static constexpr int kNcclOffset = 2;
39 
40 private:
41  cpu_set_t* current = nullptr;
42  cpu_set_t* temp = nullptr;
43  size_t size = 0;
44  bool validCpu = false;
45 };
46 
47 /*
48  * @brief Utility functions for information about all GPUs uesd in simulation
49  *
50  * The DeviceCUDA object only has scope for within a node, but the GPU resident
51  * code path needs information about the GPUs used accross all nodes. This class
52  * generates a mapping between master PEs and devices as well as devices to master
53  * PEs
54  *
55  * This class also stores the managers for the various communication backends,
56  * like NCCL and NVSHMEM, that can be used
57  */
58 class GlobalGPUMgr : public CBase_GlobalGPUMgr
59 {
60 public:
61  static GlobalGPUMgr* Object() { return CkpvAccess(GlobalGPUMgr_instance); }
62  static GlobalGPUMgr* ObjectOnPe(const int pe) {
63  return CkpvAccessOther(GlobalGPUMgr_instance, CmiRankOf(pe));
64  }
65 
66  GlobalGPUMgr();
67  ~GlobalGPUMgr();
68 
69 /*
70  * @brief Sets up information about devices being used
71  *
72  * This function will set up the peToDeviceIDMap, peToDeviceIndexMap, and deviceIndexToPeMap on all
73  * PEs. It will also count the total number of devices being used in the run across all nodes and
74  * determine the index of this PEs device
75  */
76  void initialize();
77 
78  /*
79  * @brief Sets up various communication backends
80  */
81  void initializeBackends();
82 
83  /*
84  * @brief Tears down commuication backends
85  */
86  void finalize();
87 
88  /*
89  * @brief Prints debug info about device map
90  */
91  void printDeviceMaps();
92 
93  /*
94  * @brief Returns total number of devices used in simulation
95  */
96  int getNumDevices() { return numDevices; }
97 
98  /*
99  * @brief Returns index of device being used by this PE
100  */
101  int getDeviceIndex() { return deviceIndex; }
102 
103  /*
104  * @brief Returns if this is a master PE
105  */
106  int getIsMasterPe() { return isMasterPe; }
107 
108  /*
109  * @brief Returns the master PE for a given device
110  */
111  int getMasterPeForDeviceIndex(int device) { return deviceIndexToPeMap[device]; }
112 
113  /*
114  * @brief Returns a reference to the device index to PE map
115  */
116  const std::vector<int>& getDeviceIndexToPeMap() const { return deviceIndexToPeMap; }
117 
118  /*
119  * @brief Returns if this is the master PE of the master device
120  *
121  * Some operations should only be performed by one master PE accross all nodes, like
122  * generating unique IDS for NCCL.
123  */
124  bool getIsMasterDevice() { return isMasterDevice && isMasterPe; }
125 
126  /*
127  * @brief Returns true if this PE's device is involved in PME calculation
128  */
129  bool getIsPmeDevice();
130 
131 private:
132  int isMasterPe;
133  int numDevices = -1;
134  int deviceIndex = -1;
135  bool isMasterDevice;
136 
137  std::vector<int> deviceIndexToPeMap;
138  std::vector<int> peToDeviceIDMap;
139  std::vector<int> peToDeviceIndexMap;
140 };
141 
142 #endif /* NAMD_CUDA || NAMD_HIP */
143 #endif /* GLOBAL_GPU_MGR_H */
bool getIsMasterDevice()
Definition: GlobalGPUMgr.h:124
void initialize()
Definition: GlobalGPUMgr.C:58
static constexpr int kNvshmemOffset
Definition: GlobalGPUMgr.h:37
int getDeviceIndex()
Definition: GlobalGPUMgr.h:101
void printDeviceMaps()
void finalize()
Definition: GlobalGPUMgr.C:97
void initializeBackends()
Definition: GlobalGPUMgr.C:94
int getNumDevices()
Definition: GlobalGPUMgr.h:96
const std::vector< int > & getDeviceIndexToPeMap() const
Definition: GlobalGPUMgr.h:116
RunWithAffinity(const bool valid, const int tempCpu)
Definition: GlobalGPUMgr.C:20
static constexpr int kNcclOffset
Definition: GlobalGPUMgr.h:38
static GlobalGPUMgr * ObjectOnPe(const int pe)
Definition: GlobalGPUMgr.h:62
bool getIsPmeDevice()
Definition: GlobalGPUMgr.C:100
int getMasterPeForDeviceIndex(int device)
Definition: GlobalGPUMgr.h:111
static GlobalGPUMgr * Object()
Definition: GlobalGPUMgr.h:61
static constexpr int kCudaOffset
Definition: GlobalGPUMgr.h:36
int getIsMasterPe()
Definition: GlobalGPUMgr.h:106