namd/doxygen/CudaGlobalMasterServer_8h_source.html

 #ifndef CUDAGLOBALMASTERSERVER_H
 #define CUDAGLOBALMASTERSERVER_H

 #include <chrono>
 #include <memory>
 #include <unordered_map>
 #include <vector>
 #ifdef NAMD_CUDA
 #include "CudaRecord.h"
 #include "cuda_runtime.h"
 #endif

 #include "NamdTypes.h"

 class CudaGlobalMasterClient;
 class Lattice;
 class AtomMap;
 class NodeReduction;
 class SubmitReduction;

 class CudaGlobalMasterServer {
 public:
 #if defined(NAMD_CUDA) && defined(NODEGROUP_FORCE_REGISTER)

   using tf_type = double;
   struct CopyListTuple {
     int m_src_dev_index;
     int m_soa_index;
     size_t m_client_index;
     size_t m_client_atom_pos;
   };
   struct ClientBuffer {
     double *d_data;
     float *d_mass;
     float *d_charge;
     char *d_transform;
     double *d_vel;
     size_t sz;
   };
   struct PeerAtomData {
     double **d_pos_x;
     double **d_pos_y;
     double **d_pos_z;
     double **d_vel_x;
     double **d_vel_y;
     double **d_vel_z;
     float **d_mass;
     float **d_charge;
     char3 **d_transform;
   };
   struct PeerTFArray {
     tf_type **d_f_normal_x;
     tf_type **d_f_normal_y;
     tf_type **d_f_normal_z;
     tf_type **d_f_saved_nbond_x;
     tf_type **d_f_saved_nbond_y;
     tf_type **d_f_saved_nbond_z;
     tf_type **d_f_saved_slow_x;
     tf_type **d_f_saved_slow_y;
     tf_type **d_f_saved_slow_z;
     int **d_atomFixed;
   };
   struct PeerAFArray {
     tf_type **d_f_applied_x;
     tf_type **d_f_applied_y;
     tf_type **d_f_applied_z;
     int **d_atomFixed;
   };
   CudaGlobalMasterServer(int deviceID, int printProfilingFreq = -1);
   ~CudaGlobalMasterServer();
   void addClient(std::shared_ptr<CudaGlobalMasterClient> client);
   void removeClient(std::shared_ptr<CudaGlobalMasterClient> client);
   void communicateToClients(const Lattice* lat);
   void communicateToMD();
   void updateAtomMaps();
   bool requestedTotalForces() const;
   bool willAddGlobalForces() const;
   void setStep(int64_t step);
   const std::vector<std::shared_ptr<CudaGlobalMasterClient>> &getClients() const { return m_clients; }

   cudaStream_t getStream() {
     return m_stream;
   }
 #ifdef NODEGROUP_FORCE_REGISTER

   void finishReductions(bool doEnergy, bool doVirial, NodeReduction *reduction);
 #endif
 private:
   void copyAtomsToClients(bool copyPositions, bool copyMasses, bool copyCharges,
                           bool copyTransforms, bool copyVelocities);
   void copyTotalForcesToClients();
   void addGlobalForces();
   void buildAtomsCopyList();
   void buildAtomsTotalForcesCopyList();
   void buildForcedAtomsCopyList();
   void printProfiling() const;
   void allocatePeerArrays();
   void copyPeerArraysToDevice();

 private:
   int m_device_id;
   int64_t m_step;
   cudaStream_t m_stream;
   int m_num_devices;
   int m_clients_changed;
   int m_atom_maps_changed;
   int m_print_profiling_freq;
   std::vector<std::shared_ptr<CudaGlobalMasterClient>> m_clients;
   static constexpr int numCopyLists = 3;
   // Data structures for copying atomic positions to multiple clients
   std::vector<CopyListTuple> m_atom_pos_copy_list;
   CopyListTuple *m_d_atom_pos_copy_list;
   ClientBuffer *m_d_atom_pos_client_buffers;
   // Data structures for copying total forces to multiple clients
   std::vector<CopyListTuple> m_atom_total_force_copy_list;
   CopyListTuple *m_d_atom_total_force_copy_list;
   ClientBuffer *m_atom_total_force_client_buffers;
   // Data structures for copying total forces to multiple clients
   std::vector<CopyListTuple> m_forced_atom_copy_list;
   bool m_unique_forced_atoms;
   CopyListTuple *m_d_forced_atom_copy_list;
   ClientBuffer *m_d_forced_atom_client_buffers;
   // Data structures for mapping global atom ids to SOA ids
   std::vector<std::vector<AtomMap *>> m_atom_map_lists;
   std::vector<int> m_src_devs;
   std::vector<std::vector<CudaLocalRecord>> m_local_records;
   std::vector<int *> m_global_to_local_id;
   std::unordered_map<int, int> m_device_id_to_index;
   // For profiling
   std::chrono::duration<float> m_t_build_copy_lists;
   std::chrono::duration<float> m_t_copy_atoms;
   std::chrono::duration<float> m_t_copy_total_forces;
   std::chrono::duration<float> m_t_add_global_forces;
   std::chrono::duration<float> m_t_calc;
   std::chrono::duration<float> m_t_reductions;
   // Pointers to buffers of device arrays (for multiple GPUs)
   PeerAtomData m_h_peer_atom_data;
   PeerTFArray m_h_peer_tf_array;
   PeerAFArray m_h_peer_af_array;
   PeerAtomData m_d_peer_atom_data;
   PeerTFArray m_d_peer_tf_array;
   PeerAFArray m_d_peer_af_array;
 #else
   CudaGlobalMasterServer(int deviceID, int printProfilingFreq = -1);
 #endif // defined(NAMD_CUDA) && defined(NODEGROUP_FORCE_REGISTER)
 };

 #endif // CUDAGLOBALMASTERSERVER_H
NodeReduction
Definition: ReductionMgr.h:364

CudaGlobalMasterClient
A class for performing calculations on specific atoms selected by atom serial numbers.
Definition: CudaGlobalMasterClient.h:21

CudaGlobalMasterServer::CudaGlobalMasterServer
CudaGlobalMasterServer(int deviceID, int printProfilingFreq=-1)
Definition: CudaGlobalMasterServer.C:964

SubmitReduction
Definition: ReductionMgr.h:305

NamdTypes.h

AtomMap
Definition: AtomMap.h:33

Lattice
Definition: Lattice.h:17

CudaGlobalMasterServer
A class for copying atom information from SequencerCUDA to CudaGlobalMasterClient.
Definition: CudaGlobalMasterServer.h:25

CudaRecord.h