NAMD
CudaGlobalMasterServer.h
Go to the documentation of this file.
1 #ifndef CUDAGLOBALMASTERSERVER_H
2 #define CUDAGLOBALMASTERSERVER_H
3 
4 #include <chrono>
5 #include <memory>
6 #include <unordered_map>
7 #include <vector>
8 #ifdef NAMD_CUDA
9 #include "CudaRecord.h"
10 #include "cuda_runtime.h"
11 #endif
12 
13 #include "NamdTypes.h"
14 
16 class Lattice;
17 class AtomMap;
18 class NodeReduction;
19 class SubmitReduction;
20 
26 public:
27 #if defined(NAMD_CUDA) && defined(NODEGROUP_FORCE_REGISTER)
28 
32  using tf_type = double;
38  struct CopyListTuple {
40  int m_src_dev_index;
42  int m_soa_index;
44  size_t m_client_index;
46  size_t m_client_atom_pos;
47  };
53  struct ClientBuffer {
55  double *d_data;
57  float *d_mass;
59  float *d_charge;
61  char *d_transform;
63  double *d_vel;
65  size_t sz;
66  };
71  struct PeerAtomData {
72  double **d_pos_x;
73  double **d_pos_y;
74  double **d_pos_z;
75  double **d_vel_x;
76  double **d_vel_y;
77  double **d_vel_z;
78  float **d_mass;
79  float **d_charge;
80  char3 **d_transform;
81  };
86  struct PeerTFArray {
87  tf_type **d_f_normal_x;
88  tf_type **d_f_normal_y;
89  tf_type **d_f_normal_z;
90  tf_type **d_f_saved_nbond_x;
91  tf_type **d_f_saved_nbond_y;
92  tf_type **d_f_saved_nbond_z;
93  tf_type **d_f_saved_slow_x;
94  tf_type **d_f_saved_slow_y;
95  tf_type **d_f_saved_slow_z;
96  int **d_atomFixed;
97  };
102  struct PeerAFArray {
103  tf_type **d_f_applied_x;
104  tf_type **d_f_applied_y;
105  tf_type **d_f_applied_z;
106  int **d_atomFixed;
107  };
113  CudaGlobalMasterServer(int deviceID, int printProfilingFreq = -1);
122  void addClient(std::shared_ptr<CudaGlobalMasterClient> client);
127  void removeClient(std::shared_ptr<CudaGlobalMasterClient> client);
132  void communicateToClients(const Lattice* lat);
136  void communicateToMD();
140  void updateAtomMaps();
144  bool requestedTotalForces() const;
148  bool willAddGlobalForces() const;
153  void setStep(int64_t step);
157  const std::vector<std::shared_ptr<CudaGlobalMasterClient>> &getClients() const { return m_clients; }
158 
159  cudaStream_t getStream() {
160  return m_stream;
161  }
162 #ifdef NODEGROUP_FORCE_REGISTER
163 
169  void finishReductions(bool doEnergy, bool doVirial, NodeReduction *reduction);
170 #endif
171 private:
180  void copyAtomsToClients(bool copyPositions, bool copyMasses, bool copyCharges,
181  bool copyTransforms, bool copyVelocities);
185  void copyTotalForcesToClients();
189  void addGlobalForces();
193  void buildAtomsCopyList();
197  void buildAtomsTotalForcesCopyList();
201  void buildForcedAtomsCopyList();
205  void printProfiling() const;
209  void allocatePeerArrays();
213  void copyPeerArraysToDevice();
214 
215 private:
216  int m_device_id;
217  int64_t m_step;
218  cudaStream_t m_stream;
219  int m_num_devices;
220  int m_clients_changed;
221  int m_atom_maps_changed;
222  int m_print_profiling_freq;
223  std::vector<std::shared_ptr<CudaGlobalMasterClient>> m_clients;
224  static constexpr int numCopyLists = 3;
225  // Data structures for copying atomic positions to multiple clients
226  std::vector<CopyListTuple> m_atom_pos_copy_list;
227  CopyListTuple *m_d_atom_pos_copy_list;
228  ClientBuffer *m_d_atom_pos_client_buffers;
229  // Data structures for copying total forces to multiple clients
230  std::vector<CopyListTuple> m_atom_total_force_copy_list;
231  CopyListTuple *m_d_atom_total_force_copy_list;
232  ClientBuffer *m_atom_total_force_client_buffers;
233  // Data structures for copying total forces to multiple clients
234  std::vector<CopyListTuple> m_forced_atom_copy_list;
235  bool m_unique_forced_atoms;
236  CopyListTuple *m_d_forced_atom_copy_list;
237  ClientBuffer *m_d_forced_atom_client_buffers;
238  // Data structures for mapping global atom ids to SOA ids
239  std::vector<std::vector<AtomMap *>> m_atom_map_lists;
240  std::vector<int> m_src_devs;
241  std::vector<std::vector<CudaLocalRecord>> m_local_records;
242  std::vector<int *> m_global_to_local_id;
243  std::unordered_map<int, int> m_device_id_to_index;
244  // For profiling
245  std::chrono::duration<float> m_t_build_copy_lists;
246  std::chrono::duration<float> m_t_copy_atoms;
247  std::chrono::duration<float> m_t_copy_total_forces;
248  std::chrono::duration<float> m_t_add_global_forces;
249  std::chrono::duration<float> m_t_calc;
250  std::chrono::duration<float> m_t_reductions;
251  // Pointers to buffers of device arrays (for multiple GPUs)
252  PeerAtomData m_h_peer_atom_data;
253  PeerTFArray m_h_peer_tf_array;
254  PeerAFArray m_h_peer_af_array;
255  PeerAtomData m_d_peer_atom_data;
256  PeerTFArray m_d_peer_tf_array;
257  PeerAFArray m_d_peer_af_array;
258 #else
259  CudaGlobalMasterServer(int deviceID, int printProfilingFreq = -1);
260 #endif // defined(NAMD_CUDA) && defined(NODEGROUP_FORCE_REGISTER)
261 };
262 
263 #endif // CUDAGLOBALMASTERSERVER_H
A class for performing calculations on specific atoms selected by atom serial numbers.
CudaGlobalMasterServer(int deviceID, int printProfilingFreq=-1)
A class for copying atom information from SequencerCUDA to CudaGlobalMasterClient.