namd/doxygen/CudaGlobalMasterServer_8h_source.html

 #ifndef CUDAGLOBALMASTERSERVER_H
 #define CUDAGLOBALMASTERSERVER_H

 #include <memory>
 #include <unordered_map>
 #include <vector>
 #include "CudaRecord.h"
 #ifdef NAMD_CUDA
 #include "cuda_runtime.h"
 #endif
 #ifdef NAMD_HIP
 #include <hip/hip_runtime.h>
 #endif
 #include "common.h"

 namespace CudaGlobalMaster {
   inline namespace CUDAGM_NS {
     class CudaGlobalMasterClient;
   }
 }

 class Lattice;
 class AtomMap;
 class SubmitReduction;

 class CudaGlobalMasterServer {
 public:
 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(NODEGROUP_FORCE_REGISTER)

   using tf_type = double;
   struct CopyListTuple {
     int m_src_dev_index;
     int m_soa_index;
     size_t m_client_index;
     size_t m_client_atom_pos;
   };
   struct ClientBuffer {
     double *d_data;
     float *d_mass;
     float *d_charge;
     char *d_transform;
     double *d_vel;
     size_t sz;
   };
   struct PeerAtomData {
     double **d_pos_x;
     double **d_pos_y;
     double **d_pos_z;
     double **d_vel_x;
     double **d_vel_y;
     double **d_vel_z;
     float **d_mass;
     float **d_charge;
     char3 **d_transform;
   };
   struct PeerTFArray {
     tf_type **d_f_normal_x;
     tf_type **d_f_normal_y;
     tf_type **d_f_normal_z;
     tf_type **d_f_saved_nbond_x;
     tf_type **d_f_saved_nbond_y;
     tf_type **d_f_saved_nbond_z;
     tf_type **d_f_saved_slow_x;
     tf_type **d_f_saved_slow_y;
     tf_type **d_f_saved_slow_z;
     int **d_atomFixed;
   };
   struct PeerAFArray {
     tf_type **d_f_applied_x;
     tf_type **d_f_applied_y;
     tf_type **d_f_applied_z;
     int **d_atomFixed;
   };
   CudaGlobalMasterServer(int deviceID, int printProfilingFreq = -1);
   ~CudaGlobalMasterServer();
   void addClient(std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient> client);
   void removeClient(std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient> client);
   void communicateToClients(const Lattice* lat);
   void calculate();
   void communicateToMD(bool doEnergy, bool doVirial);
   void updateAtomMaps();
   bool requestedTotalForces() const;
   bool willAddGlobalForces() const;
   void setStep(int64_t step);
   const std::vector<std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient>> &getClients() const { return m_clients; }

   cudaStream_t getStream() {
     return m_stream;
   }
   void finishReductions();

   template <typename T>
   class CudaHostAllocator {
   public:
     using value_type = T;

     CudaHostAllocator() = default;

     template<typename U>
     constexpr CudaHostAllocator(const CudaHostAllocator<U>&) noexcept {}

     friend bool operator==(const CudaHostAllocator&, const CudaHostAllocator&) { return true; }
     friend bool operator!=(const CudaHostAllocator&, const CudaHostAllocator&) { return false; }

     T* allocate(size_t n) {
       T* ptr;
       if (cudaHostAlloc(&ptr, n * sizeof(T), cudaHostAllocMapped) != cudaSuccess) {
         throw std::bad_alloc();
       }
       return ptr;
     }
     void deallocate(T* ptr, size_t n) noexcept {
       cudaFreeHost(ptr);
     }
     template<typename U, typename... Args>
     void construct(U* p, Args&&... args) {
         new(p) U(std::forward<Args>(args)...);
     }

     template<typename U>
     void destroy(U* p) noexcept {
         p->~U();
     }
   };
 private:
   void copyAtomsToClients(bool copyPositions, bool copyMasses, bool copyCharges,
                           bool copyTransforms, bool copyVelocities);
   void copyTotalForcesToClients();
   void addGlobalForces();
   void buildAtomsCopyList();
   void buildAtomsTotalForcesCopyList();
   void buildForcedAtomsCopyList();
   void allocatePeerArrays();
   void copyPeerArraysToDevice();
   SubmitReduction* getCurrentReduction() {
     // only supports GPU-resident mode
     return reductionGpuResident;
   }

 private:
   int m_device_id;
   int64_t m_step;
   cudaStream_t m_stream;
   int m_num_devices;
   int m_clients_changed;
   int m_atom_maps_changed;
   int m_print_profiling_freq;
   std::vector<std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient>> m_clients;
   static constexpr int numCopyLists = 3;
   // Data structures for copying atomic positions to multiple clients
   std::vector<CopyListTuple, CudaHostAllocator<CopyListTuple>> m_atom_pos_copy_list;
   CopyListTuple *m_d_atom_pos_copy_list;
   ClientBuffer *m_d_atom_pos_client_buffers;
   // Data structures for copying total forces to multiple clients
   std::vector<CopyListTuple, CudaHostAllocator<CopyListTuple>> m_atom_total_force_copy_list;
   CopyListTuple *m_d_atom_total_force_copy_list;
   ClientBuffer *m_atom_total_force_client_buffers;
   // Data structures for copying total forces to multiple clients
   std::vector<CopyListTuple, CudaHostAllocator<CopyListTuple>> m_forced_atom_copy_list;
   bool m_unique_forced_atoms;
   CopyListTuple *m_d_forced_atom_copy_list;
   ClientBuffer *m_d_forced_atom_client_buffers;
   // Data structures for mapping global atom ids to SOA ids
   std::vector<std::vector<AtomMap *>> m_atom_map_lists;
   std::vector<int> m_src_devs;
   std::vector<std::vector<CudaLocalRecord>> m_local_records;
   std::vector<int *> m_global_to_local_id;
   std::unordered_map<int, int> m_device_id_to_index;
   // Pointers to buffers of device arrays (for multiple GPUs)
   PeerAtomData m_h_peer_atom_data;
   PeerTFArray m_h_peer_tf_array;
   PeerAFArray m_h_peer_af_array;
   PeerAtomData m_d_peer_atom_data;
   PeerTFArray m_d_peer_tf_array;
   PeerAFArray m_d_peer_af_array;
   // CudaGlobalMasterServer only supports GPU-resident mode
   SubmitReduction *reductionGpuResident;
   // Lattice
   std::vector<double, CudaHostAllocator<double>> m_h_lattice;
 #else
   CudaGlobalMasterServer(int deviceID, int printProfilingFreq = -1);
 #endif // (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(NODEGROUP_FORCE_REGISTER)
 };

 #endif // CUDAGLOBALMASTERSERVER_H
CudaGlobalMaster
Definition: ComputeMgr.h:56

CudaGlobalMasterServer::CudaGlobalMasterServer
CudaGlobalMasterServer(int deviceID, int printProfilingFreq=-1)
Definition: CudaGlobalMasterServer.C:958

SubmitReduction
Definition: ReductionMgr.h:326

operator!=
int operator!=(const FourBodyConsts &f1, const FourBodyConsts &f2)
Definition: CompressPsf.C:353

AtomMap
Definition: AtomMap.h:33

CUDAGM_NS
#define CUDAGM_NS
Definition: common.h:280

operator==
int operator==(const AtomSigInfo &s1, const AtomSigInfo &s2)
Definition: CompressPsf.C:146

common.h

Lattice
Definition: Lattice.h:17

CudaGlobalMasterServer
A class for copying atom information from SequencerCUDA to CudaGlobalMasterClient.
Definition: CudaGlobalMasterServer.h:30

CudaRecord.h