1 #ifndef CUDAGLOBALMASTERSERVER_H 2 #define CUDAGLOBALMASTERSERVER_H 6 #include <unordered_map> 10 #include "cuda_runtime.h" 27 #if defined(NAMD_CUDA) && defined(NODEGROUP_FORCE_REGISTER) 32 using tf_type = double;
38 struct CopyListTuple {
44 size_t m_client_index;
46 size_t m_client_atom_pos;
87 tf_type **d_f_normal_x;
88 tf_type **d_f_normal_y;
89 tf_type **d_f_normal_z;
90 tf_type **d_f_saved_nbond_x;
91 tf_type **d_f_saved_nbond_y;
92 tf_type **d_f_saved_nbond_z;
93 tf_type **d_f_saved_slow_x;
94 tf_type **d_f_saved_slow_y;
95 tf_type **d_f_saved_slow_z;
103 tf_type **d_f_applied_x;
104 tf_type **d_f_applied_y;
105 tf_type **d_f_applied_z;
122 void addClient(std::shared_ptr<CudaGlobalMasterClient> client);
127 void removeClient(std::shared_ptr<CudaGlobalMasterClient> client);
132 void communicateToClients(
const Lattice* lat);
136 void communicateToMD();
140 void updateAtomMaps();
144 bool requestedTotalForces()
const;
148 bool willAddGlobalForces()
const;
153 void setStep(int64_t step);
157 const std::vector<std::shared_ptr<CudaGlobalMasterClient>> &getClients()
const {
return m_clients; }
159 cudaStream_t getStream() {
162 #ifdef NODEGROUP_FORCE_REGISTER 169 void finishReductions(
bool doEnergy,
bool doVirial,
NodeReduction *reduction);
180 void copyAtomsToClients(
bool copyPositions,
bool copyMasses,
bool copyCharges,
181 bool copyTransforms,
bool copyVelocities);
185 void copyTotalForcesToClients();
189 void addGlobalForces();
193 void buildAtomsCopyList();
197 void buildAtomsTotalForcesCopyList();
201 void buildForcedAtomsCopyList();
205 void printProfiling()
const;
209 void allocatePeerArrays();
213 void copyPeerArraysToDevice();
218 cudaStream_t m_stream;
220 int m_clients_changed;
221 int m_atom_maps_changed;
222 int m_print_profiling_freq;
223 std::vector<std::shared_ptr<CudaGlobalMasterClient>> m_clients;
224 static constexpr
int numCopyLists = 3;
226 std::vector<CopyListTuple> m_atom_pos_copy_list;
227 CopyListTuple *m_d_atom_pos_copy_list;
228 ClientBuffer *m_d_atom_pos_client_buffers;
230 std::vector<CopyListTuple> m_atom_total_force_copy_list;
231 CopyListTuple *m_d_atom_total_force_copy_list;
232 ClientBuffer *m_atom_total_force_client_buffers;
234 std::vector<CopyListTuple> m_forced_atom_copy_list;
235 bool m_unique_forced_atoms;
236 CopyListTuple *m_d_forced_atom_copy_list;
237 ClientBuffer *m_d_forced_atom_client_buffers;
239 std::vector<std::vector<AtomMap *>> m_atom_map_lists;
240 std::vector<int> m_src_devs;
241 std::vector<std::vector<CudaLocalRecord>> m_local_records;
242 std::vector<int *> m_global_to_local_id;
243 std::unordered_map<int, int> m_device_id_to_index;
245 std::chrono::duration<float> m_t_build_copy_lists;
246 std::chrono::duration<float> m_t_copy_atoms;
247 std::chrono::duration<float> m_t_copy_total_forces;
248 std::chrono::duration<float> m_t_add_global_forces;
249 std::chrono::duration<float> m_t_calc;
250 std::chrono::duration<float> m_t_reductions;
252 PeerAtomData m_h_peer_atom_data;
253 PeerTFArray m_h_peer_tf_array;
254 PeerAFArray m_h_peer_af_array;
255 PeerAtomData m_d_peer_atom_data;
256 PeerTFArray m_d_peer_tf_array;
257 PeerAFArray m_d_peer_af_array;
260 #endif // defined(NAMD_CUDA) && defined(NODEGROUP_FORCE_REGISTER) 263 #endif // CUDAGLOBALMASTERSERVER_H
A class for performing calculations on specific atoms selected by atom serial numbers.
CudaGlobalMasterServer(int deviceID, int printProfilingFreq=-1)
A class for copying atom information from SequencerCUDA to CudaGlobalMasterClient.