namd/doxygen/GlobalGPUMgr_8h_source.html

 #ifndef GLOBAL_GPU_MGR_H
 #define GLOBAL_GPU_MGR_H

 #include "charm++.h"

 #include "main.h"
 #include "NamdTypes.h"
 #include "ProcessorPrivate.h"
 #include "CudaRecord.h"
 #include "CudaUtils.h"

 #include "GlobalGPUMgr.decl.h"

 #if defined(NAMD_CUDA) || defined(NAMD_HIP)

 /*
  * @brief RunWithAffinity is used to run a block of code on a specific CPU core
  *
  * Creating a RunWithAffinity object will change the affinity of the thread to the given
  * CPU core until the object goes out of scope. When the object goes out of scope
  * and the destructor is called, the original affinity of the thread will be
  * restored.
  *
  * This is useful when initializing libraries that create their own threads which
  * inherit affinity from this thread. We can temporarily change the affinity of
  * this thread, initialize the library, and then change this thread's affinity
  * back.
  *
  */
 class RunWithAffinity
 {
 public:
   RunWithAffinity(const bool valid, const int tempCpu);
   ~RunWithAffinity();

   static constexpr int kCudaOffset = 0;
   static constexpr int kNvshmemOffset = 1;
   static constexpr int kNcclOffset = 2;

 private:
   cpu_set_t* current = nullptr;
   cpu_set_t* temp = nullptr;
   size_t size = 0;
   bool validCpu = false;
 };

 /*
  * @brief Utility functions for information about all GPUs uesd in simulation
  *
  * The DeviceCUDA object only has scope for within a node, but the GPU resident
  * code path needs information about the GPUs used accross all nodes. This class
  * generates a mapping between master PEs and devices as well as devices to master
  * PEs
  *
  * This class also stores the managers for the various communication backends,
  * like NCCL and NVSHMEM, that can be used
  */
 class GlobalGPUMgr : public CBase_GlobalGPUMgr
 {
 public:
   static GlobalGPUMgr* Object() { return CkpvAccess(GlobalGPUMgr_instance); }
   static GlobalGPUMgr* ObjectOnPe(const int pe) {
     return CkpvAccessOther(GlobalGPUMgr_instance, CmiRankOf(pe));
   }

   GlobalGPUMgr();
   ~GlobalGPUMgr();

 /*
  * @brief Sets up information about devices being used
  *
  * This function will set up the peToDeviceIDMap, peToDeviceIndexMap, and deviceIndexToPeMap on all
  * PEs. It will also count the total number of devices being used in the run across all nodes and
  * determine the index of this PEs device
  */
   void initialize();

   /*
    * @brief Sets up various communication backends
    */
   void initializeBackends();

   /*
    * @brief Tears down commuication backends
    */
   void finalize();

   /*
    * @brief Prints debug info about device map
    */
   void printDeviceMaps();

   /*
    * @brief Returns total number of devices used in simulation
    */
   int getNumDevices() { return numDevices; }

   /*
    * @brief Returns index of device being used by this PE
    */
   int getDeviceIndex() { return deviceIndex; }

   /*
    * @brief Returns if this is a master PE
    */
   int getIsMasterPe() { return isMasterPe; }

   /*
    * @brief Returns the master PE for a given device
    */
   int getMasterPeForDeviceIndex(int device) { return deviceIndexToPeMap[device]; }

   /*
    * @brief Returns a reference to the device index to PE map
    */
   const std::vector<int>& getDeviceIndexToPeMap() const { return deviceIndexToPeMap; }

   /*
    * @brief Returns if this is the master PE of the master device
    *
    * Some operations should only be performed by one master PE accross all nodes, like
    * generating unique IDS for NCCL.
    */
   bool getIsMasterDevice() { return isMasterDevice && isMasterPe; }

   /*
    * @brief Returns true if this PE's device is involved in PME calculation
    */
   bool getIsPmeDevice();

 private:
   int isMasterPe;
   int numDevices = -1;
   int deviceIndex = -1;
   bool isMasterDevice;

   std::vector<int> deviceIndexToPeMap;
   std::vector<int> peToDeviceIDMap;
   std::vector<int> peToDeviceIndexMap;
 };

 #endif  /* NAMD_CUDA || NAMD_HIP */
 #endif  /* GLOBAL_GPU_MGR_H */
CudaUtils.h

GlobalGPUMgr::getIsMasterDevice
bool getIsMasterDevice()
Definition: GlobalGPUMgr.h:124

RunWithAffinity
Definition: GlobalGPUMgr.h:30

GlobalGPUMgr::initialize
void initialize()
Definition: GlobalGPUMgr.C:58

main.h

RunWithAffinity::kNvshmemOffset
static constexpr int kNvshmemOffset
Definition: GlobalGPUMgr.h:37

GlobalGPUMgr::getDeviceIndex
int getDeviceIndex()
Definition: GlobalGPUMgr.h:101

RunWithAffinity::~RunWithAffinity
~RunWithAffinity()
Definition: GlobalGPUMgr.C:38

GlobalGPUMgr::printDeviceMaps
void printDeviceMaps()

GlobalGPUMgr::GlobalGPUMgr
GlobalGPUMgr()
Definition: GlobalGPUMgr.C:47

GlobalGPUMgr::finalize
void finalize()
Definition: GlobalGPUMgr.C:97

GlobalGPUMgr::initializeBackends
void initializeBackends()
Definition: GlobalGPUMgr.C:94

GlobalGPUMgr::getNumDevices
int getNumDevices()
Definition: GlobalGPUMgr.h:96

GlobalGPUMgr::getDeviceIndexToPeMap
const std::vector< int > & getDeviceIndexToPeMap() const
Definition: GlobalGPUMgr.h:116

RunWithAffinity::RunWithAffinity
RunWithAffinity(const bool valid, const int tempCpu)
Definition: GlobalGPUMgr.C:20

ProcessorPrivate.h

NamdTypes.h

RunWithAffinity::kNcclOffset
static constexpr int kNcclOffset
Definition: GlobalGPUMgr.h:38

GlobalGPUMgr::ObjectOnPe
static GlobalGPUMgr * ObjectOnPe(const int pe)
Definition: GlobalGPUMgr.h:62

GlobalGPUMgr::getIsPmeDevice
bool getIsPmeDevice()
Definition: GlobalGPUMgr.C:100

GlobalGPUMgr::getMasterPeForDeviceIndex
int getMasterPeForDeviceIndex(int device)
Definition: GlobalGPUMgr.h:111

GlobalGPUMgr::Object
static GlobalGPUMgr * Object()
Definition: GlobalGPUMgr.h:61

RunWithAffinity::kCudaOffset
static constexpr int kCudaOffset
Definition: GlobalGPUMgr.h:36

GlobalGPUMgr
Definition: GlobalGPUMgr.h:58

GlobalGPUMgr::~GlobalGPUMgr
~GlobalGPUMgr()
Definition: GlobalGPUMgr.C:56

GlobalGPUMgr::getIsMasterPe
int getIsMasterPe()
Definition: GlobalGPUMgr.h:106

CudaRecord.h