1 #ifndef CUDAGLOBALMASTERSERVER_H 2 #define CUDAGLOBALMASTERSERVER_H 5 #include <unordered_map> 9 #include "cuda_runtime.h" 12 #include <hip/hip_runtime.h> 18 class CudaGlobalMasterClient;
32 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(NODEGROUP_FORCE_REGISTER) 37 using tf_type = double;
43 struct CopyListTuple {
49 size_t m_client_index;
51 size_t m_client_atom_pos;
92 tf_type **d_f_normal_x;
93 tf_type **d_f_normal_y;
94 tf_type **d_f_normal_z;
95 tf_type **d_f_saved_nbond_x;
96 tf_type **d_f_saved_nbond_y;
97 tf_type **d_f_saved_nbond_z;
98 tf_type **d_f_saved_slow_x;
99 tf_type **d_f_saved_slow_y;
100 tf_type **d_f_saved_slow_z;
108 tf_type **d_f_applied_x;
109 tf_type **d_f_applied_y;
110 tf_type **d_f_applied_z;
127 void addClient(std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient> client);
132 void removeClient(std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient> client);
137 void communicateToClients(
const Lattice* lat);
145 void communicateToMD(
bool doEnergy,
bool doVirial);
149 void updateAtomMaps();
153 bool requestedTotalForces()
const;
157 bool willAddGlobalForces()
const;
162 void setStep(int64_t step);
166 const std::vector<std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient>> &getClients()
const {
return m_clients; }
168 cudaStream_t getStream() {
174 void finishReductions();
179 template <
typename T>
180 class CudaHostAllocator {
182 using value_type = T;
184 CudaHostAllocator() =
default;
187 constexpr CudaHostAllocator(
const CudaHostAllocator<U>&) noexcept {}
189 friend bool operator==(
const CudaHostAllocator&,
const CudaHostAllocator&) {
return true; }
190 friend bool operator!=(
const CudaHostAllocator&,
const CudaHostAllocator&) {
return false; }
192 T* allocate(
size_t n) {
194 if (cudaHostAlloc(&ptr, n *
sizeof(T), cudaHostAllocMapped) != cudaSuccess) {
195 throw std::bad_alloc();
199 void deallocate(T* ptr,
size_t n) noexcept {
202 template<
typename U,
typename... Args>
203 void construct(U* p, Args&&... args) {
204 new(p) U(std::forward<Args>(args)...);
208 void destroy(U* p) noexcept {
221 void copyAtomsToClients(
bool copyPositions,
bool copyMasses,
bool copyCharges,
222 bool copyTransforms,
bool copyVelocities);
226 void copyTotalForcesToClients();
230 void addGlobalForces();
234 void buildAtomsCopyList();
238 void buildAtomsTotalForcesCopyList();
242 void buildForcedAtomsCopyList();
246 void allocatePeerArrays();
250 void copyPeerArraysToDevice();
256 return reductionGpuResident;
262 cudaStream_t m_stream;
264 int m_clients_changed;
265 int m_atom_maps_changed;
266 int m_print_profiling_freq;
267 std::vector<std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient>> m_clients;
268 static constexpr
int numCopyLists = 3;
270 std::vector<CopyListTuple, CudaHostAllocator<CopyListTuple>> m_atom_pos_copy_list;
271 CopyListTuple *m_d_atom_pos_copy_list;
272 ClientBuffer *m_d_atom_pos_client_buffers;
274 std::vector<CopyListTuple, CudaHostAllocator<CopyListTuple>> m_atom_total_force_copy_list;
275 CopyListTuple *m_d_atom_total_force_copy_list;
276 ClientBuffer *m_atom_total_force_client_buffers;
278 std::vector<CopyListTuple, CudaHostAllocator<CopyListTuple>> m_forced_atom_copy_list;
279 bool m_unique_forced_atoms;
280 CopyListTuple *m_d_forced_atom_copy_list;
281 ClientBuffer *m_d_forced_atom_client_buffers;
283 std::vector<std::vector<AtomMap *>> m_atom_map_lists;
284 std::vector<int> m_src_devs;
285 std::vector<std::vector<CudaLocalRecord>> m_local_records;
286 std::vector<int *> m_global_to_local_id;
287 std::unordered_map<int, int> m_device_id_to_index;
289 PeerAtomData m_h_peer_atom_data;
290 PeerTFArray m_h_peer_tf_array;
291 PeerAFArray m_h_peer_af_array;
292 PeerAtomData m_d_peer_atom_data;
293 PeerTFArray m_d_peer_tf_array;
294 PeerAFArray m_d_peer_af_array;
298 std::vector<double, CudaHostAllocator<double>> m_h_lattice;
301 #endif // (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(NODEGROUP_FORCE_REGISTER) 304 #endif // CUDAGLOBALMASTERSERVER_H
CudaGlobalMasterServer(int deviceID, int printProfilingFreq=-1)
int operator!=(const FourBodyConsts &f1, const FourBodyConsts &f2)
int operator==(const AtomSigInfo &s1, const AtomSigInfo &s2)
A class for copying atom information from SequencerCUDA to CudaGlobalMasterClient.