NAMD
CudaGlobalMasterServer.C
Go to the documentation of this file.
1 #include "AtomMap.h"
5 #include "CudaUtils.h"
6 #include "DeviceCUDA.h"
7 #include "InfoStream.h"
8 #include "Lattice.h"
9 #include "NamdEventsProfiling.h"
10 #include "NamdTypes.h"
11 #include "ProcessorPrivate.h"
12 #include "SequencerCUDA.h"
13 #include "SimParameters.h"
14 #include <type_traits>
15 #include <string>
16 
17 #ifdef DEBUGM
18 #include "Debug.h"
19 #define MIN_DEBUG_LEVEL 3
20 
23 std::string ptr_to_str(const void *ptr) {
24  std::ostringstream oss;
25  oss << ptr;
26  return oss.str();
27 }
28 #endif
29 
34 struct localIDResult {
35  bool found;
36  // int pe;
37  int soaIndex;
38 };
39 
40 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(NODEGROUP_FORCE_REGISTER)
41 
42 using CopyListTupleT = CudaGlobalMasterServer::CopyListTuple;
43 using ClientBufferT = CudaGlobalMasterServer::ClientBuffer;
44 
45 extern __thread DeviceCUDA *deviceCUDA;
46 
57 localIDResult queryLocalID(const AtomID globalID,
58  const std::vector<AtomMap *> &atomMapsList,
59  const std::vector<CudaLocalRecord> &localRecords,
60  const int *h_globalToLocalID) {
61  // The logic here is similar to ComputeRestraintsCUDA::updateRestrainedAtoms
62  localIDResult result{false, -1};
63  for (int i = 0; i < atomMapsList.size(); ++i) {
64  // Lookup a LocalID from the global atom ID
65  const LocalID lid = atomMapsList[i]->localID(globalID);
66  if (lid.pid != notUsed) {
67  // Atom found
68  result.found = true;
69  // Mapping from global patch ID to local patch ID
70  const int soaPid = h_globalToLocalID[lid.pid];
71  // Memory location of the atom = patch start offset + atom local index in the patch
72  result.soaIndex = localRecords[soaPid].bufferOffset + lid.index;
73 #ifdef DEBUGM
74  DebugM(1, "Atom " + std::to_string(globalID) + " found in local patch " +
75  std::to_string(lid.pid) + " with local index " +
76  std::to_string(lid.index) + ", SOA patch ID " +
77  std::to_string(soaPid) + ", SOA global index " +
78  std::to_string(result.soaIndex) + "\n");
79 #endif
80  break;
81  }
82  }
83 #ifdef DEBUGM
84  if (result.found == false) {
85  DebugM(3, "Atom " + std::to_string(globalID) + " not found.\n");
86  }
87 #endif
88  return result;
89 }
90 
109 template <typename F1, typename F2>
110 void buildCopyList(
111  F1 getAtomID, F2 getPosOrForceBuffer,
112  std::vector<std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient>> &clients,
113  const std::vector<std::vector<AtomMap *>> &atomMapsLists,
114  const std::vector<std::vector<CudaLocalRecord>> &localRecords,
115  const std::vector<int *> h_globalToLocalID,
116  const std::vector<int> &sourceDevicesList,
117  const std::unordered_map<int, int> &deviceToIndex,
118  ClientBufferT *&d_clientBuffers, std::vector<CopyListTupleT, CudaGlobalMasterServer::CudaHostAllocator<CopyListTupleT>> &hostCopyList,
119  CopyListTupleT *&d_copyList, cudaStream_t stream, bool* checkUniqueList = nullptr) {
120  // Number of devices == number of master PEs == number of
121  // "std::vector<AtomMap*>"s
122  std::vector<ClientBufferT> clientBuffers;
123  hostCopyList.clear();
124  const size_t numClients = clients.size();
125  const int numDevices = atomMapsLists.size();
126  for (size_t i = 0; i < numClients; ++i) {
127  const auto &client = clients[i];
128  // getPosOrForceBuffer should return the device memory address of the client
129  // for copying to/from
130  clientBuffers.push_back(ClientBufferT{
131  ((*client).*getPosOrForceBuffer)(), client->getMasses(),
132  client->getCharges(), client->getTransforms(), client->getVelocities(),
133  ((*client).*getAtomID)().size()});
134  const auto &requested_atoms = ((*client).*getAtomID)();
135  // iout << iINFO << "PE: " << CkMyPe() << ", client atoms address " << &requested_atoms << "\n" << endi;
136  // For each client, we try to map the global IDs of its
137  // requested atoms to local SOA array indicies.
138  // Also, taking the multi-GPU cases into account, we need
139  // to bookkeep the device index of the peer arrays.
140  for (size_t j = 0; j < requested_atoms.size(); ++j) {
141  // The global ID of an atom
142  const AtomID gid = requested_atoms[j];
143  localIDResult result{false, -1};
144  // We find the global ID in all atomMaps associated to all master PEs
145  for (int k = 0; k < numDevices; ++k) {
146  result = queryLocalID(gid, atomMapsLists[k], localRecords[k],
147  h_globalToLocalID[k]);
148  if (result.found) {
149  hostCopyList.push_back(CopyListTupleT{
150  deviceToIndex.at(sourceDevicesList[k]), result.soaIndex, i, j});
151  break;
152  }
153  }
154  // The global ID should be found somewhere. Otherwise there is a bug.
155  if (!result.found) {
156  const std::string error =
157  "Cannot find the local ID in SOA arrays of atom " +
158  std::to_string(gid) + " requested by client[" + std::to_string(i) +
159  "] (" + client->name() + ")\n";
160  NAMD_bug(error.c_str());
161  }
162  }
163  }
164  // Sorting the copy list might improve the performance
165  std::sort(hostCopyList.begin(), hostCopyList.end(),
166  [](const CopyListTupleT &a, const CopyListTupleT &b) {
167  return a.m_soa_index < b.m_soa_index;
168  });
169  if (checkUniqueList) {
170  // Check if the list is unique
171  auto tmp_list = hostCopyList;
172  // std::sort(tmp_list.begin(), tmp_list.end(),
173  // [](const CopyListTupleT &a, const CopyListTupleT &b) {
174  // return a.m_soa_index < b.m_soa_index;
175  // });
176  auto last = std::unique(tmp_list.begin(), tmp_list.end(),
177  [](const CopyListTupleT &a, const CopyListTupleT &b){
178  return a.m_soa_index == b.m_soa_index;
179  });
180  if (last == tmp_list.end()) {
181  *checkUniqueList = true;
182  } else {
183  *checkUniqueList = false;
184  }
185  }
186  // Copy the copy list structure to the device memory
187  size_t copySize = sizeof(CopyListTupleT) * hostCopyList.size();
188 #ifdef DEBUGM
189  DebugM(3, "Will copy " + std::to_string(hostCopyList.size()) + " items.\n");
190 #endif
191  // std::cout << "Trying to assign a copyList with size of " <<
192  // std::to_string(copySize) << std::endl;
193  if (d_copyList != nullptr) {
194  cudaCheck(cudaFree(d_copyList));
195  }
196  cudaCheck(cudaMalloc(&d_copyList, copySize));
197  cudaCheck(cudaMemcpyAsync(d_copyList, hostCopyList.data(), copySize,
198  cudaMemcpyHostToDevice, stream));
199  copySize = sizeof(ClientBufferT) * clientBuffers.size();
200  if (d_clientBuffers != nullptr) {
201  cudaCheck(cudaFree(d_clientBuffers));
202  }
203  cudaCheck(cudaMalloc(&d_clientBuffers, copySize));
204  cudaCheck(cudaMemcpyAsync(d_clientBuffers, clientBuffers.data(), copySize,
205  cudaMemcpyHostToDevice, stream));
206 }
207 
214 void copyLatticeToClient(const std::vector<double, CudaGlobalMasterServer::CudaHostAllocator<double>>& lat, int deviceID,
215  std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient> client, cudaStream_t stream) {
216  const size_t copySize = sizeof(double) * lat.size();
217  int savedDevice;
218  cudaCheck(cudaGetDevice(&savedDevice));
219  cudaCheck(cudaSetDevice(deviceID));
220  double *d_lattice = client->getLattice();
221  if (d_lattice) {
222  cudaCheck(cudaMemcpyAsync(d_lattice, lat.data(), copySize,
223  cudaMemcpyHostToDevice, stream));
224  } else {
225  const std::string error = "Failed to copy lattice to client " + client->name() +
226  " (lattice requested but the client provides a "
227  "nullptr to the device buffer)";
228  NAMD_die(error.c_str());
229  }
230  cudaCheck(cudaSetDevice(savedDevice));
231 }
232 
233 #ifdef DEBUGM
234 template <typename T>
235 void debugCopyList(const std::string &name,
236  T &L) {
237  std::cout << "CudaGlobalMasterServer: the copylist is " << name << ", with "
238  << L.size() << " items.\n";
239  for (size_t i = 0; i < L.size(); ++i) {
240  fprintf(stdout,
241  "i = %lu, deviceIndex = %d, soaIndex = %d, clientIndex = %lu, "
242  "clientArrayIndex = %lu\n",
243  i, L[i].m_src_dev_index, L[i].m_soa_index, L[i].m_client_index,
244  L[i].m_client_atom_pos);
245  }
246 }
247 #endif
248 
249 #if 0
250 void debugClientBuffer(const std::string &name, ClientBufferT *B,
251  size_t numClients) {
252  std::vector<ClientBufferT> hB(numClients);
253  cudaPointerAttributes attributes;
254  cudaCheck(cudaPointerGetAttributes(&attributes, B));
255  std::cout << "CudaGlobalMasterServer: the clientBuffer is " << name
256  << ", with " << numClients << " items.\n";
257  std::cout << "deviceBuffer pointer = " << static_cast<void *>(B) << '\n';
258  std::cout << "deviceBuffer attributes:\n";
259  std::cout << "memory type: " << attributes.type << std::endl;
260  cudaCheck(cudaMemcpy(hB.data(), B, sizeof(ClientBufferT) * numClients,
261  cudaMemcpyDeviceToHost));
262  for (size_t i = 0; i < numClients; ++i) {
263  fprintf(stdout,
264  "i = %lu, d_data = %p, d_mass = %p, d_charge = %p, size = %lu\n", i,
265  hB[i].d_data, hB[i].d_mass, hB[i].d_charge, hB[i].sz);
266  }
267  std::cout << std::endl;
268 }
269 #endif
270 
271 CudaGlobalMasterServer::CudaGlobalMasterServer(int deviceID, int printProfilingFreq)
272  : m_device_id(deviceID), m_step(0),
273  m_num_devices(deviceCUDA->getNumDevice()), m_clients_changed(0),
274  m_atom_maps_changed(0), m_print_profiling_freq(printProfilingFreq),
275  reductionGpuResident(nullptr) {
276  iout << iINFO << "CudaGlobalMasterServer: initialized on PE " << CkMyPe()
277  << " and GPU device " << m_device_id << "\n"
278  << endi;
279  int savedDevice;
280  cudaCheck(cudaGetDevice(&savedDevice));
281  cudaCheck(cudaSetDevice(m_device_id));
282  cudaCheck(cudaStreamCreate(&m_stream));
283 #ifdef NAMD_NVTX_ENABLED
284  nvtxNameCuStreamA(m_stream, "CudaGlobalMaster stream");
285 #endif
286  m_atom_map_lists.resize(m_num_devices);
287  m_src_devs.resize(m_num_devices);
288  m_local_records.resize(m_num_devices);
289  m_global_to_local_id.resize(m_num_devices);
290  const int *allDevices = deviceCUDA->allDevices();
291  for (int i = 0; i < m_num_devices; ++i) {
292  const int currentDeviceID = allDevices[i];
293  m_device_id_to_index[currentDeviceID] = i;
294  m_src_devs[i] = currentDeviceID;
295  }
296  if (m_num_devices > 1) {
297  allocatePeerArrays();
298  }
299  // Copy lists
300  m_d_atom_pos_copy_list = nullptr;
301  m_d_atom_total_force_copy_list = nullptr;
302  m_d_forced_atom_copy_list = nullptr;
303  // Client buffers
304  m_d_atom_pos_client_buffers = nullptr;
305  m_atom_total_force_client_buffers = nullptr;
306  m_d_forced_atom_client_buffers = nullptr;
307  m_unique_forced_atoms = false;
308  cudaCheck(cudaSetDevice(savedDevice));
321  constexpr int m_h_lattice_size =
322  (sizeof(std::invoke_result_t<decltype(&Lattice::a), Lattice>) +
323  sizeof(std::invoke_result_t<decltype(&Lattice::b), Lattice>) +
324  sizeof(std::invoke_result_t<decltype(&Lattice::c), Lattice>) +
325  sizeof(std::invoke_result_t<decltype(&Lattice::origin), Lattice>)) /
326  sizeof(std::invoke_result_t<decltype(&Vector::operator[]), Vector, int>);
327  m_h_lattice.resize(m_h_lattice_size);
328  // Setup the reductions
329  auto* params = Node::Object()->simParameters;
330  if (!params->CUDASOAintegrateMode) {
331  NAMD_bug("CudaGlobalMasterServer only supports GPU-resident mode");
332  }
333  reductionGpuResident = ReductionMgr::Object()->willSubmit(REDUCTIONS_GPURESIDENT);
334 }
335 
336 CudaGlobalMasterServer::~CudaGlobalMasterServer() {
337  if (m_d_atom_pos_copy_list != nullptr) {
338  cudaCheck(cudaFree(m_d_atom_pos_copy_list));
339  m_d_atom_pos_copy_list = nullptr;
340  }
341  if (m_d_atom_pos_client_buffers != nullptr) {
342  cudaCheck(cudaFree(m_d_atom_pos_client_buffers));
343  m_d_atom_pos_client_buffers = nullptr;
344  }
345  if (m_d_atom_total_force_copy_list != nullptr) {
346  cudaCheck(cudaFree(m_d_atom_total_force_copy_list));
347  m_d_atom_total_force_copy_list = nullptr;
348  }
349  if (m_atom_total_force_client_buffers != nullptr) {
350  cudaCheck(cudaFree(m_atom_total_force_client_buffers));
351  m_atom_total_force_client_buffers = nullptr;
352  }
353  if (m_d_forced_atom_copy_list != nullptr) {
354  cudaCheck(cudaFree(m_d_forced_atom_copy_list));
355  m_d_forced_atom_copy_list = nullptr;
356  }
357  if (m_d_forced_atom_client_buffers != nullptr) {
358  cudaCheck(cudaFree(m_d_forced_atom_client_buffers));
359  m_d_forced_atom_client_buffers = nullptr;
360  }
361  if (reductionGpuResident != nullptr) {
362  delete reductionGpuResident;
363  }
364  iout << iINFO << "CudaGlobalMasterServer: destructed on PE " << CkMyPe()
365  << "\n"
366  << endi;
367 }
368 
369 void CudaGlobalMasterServer::addClient(
370  std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient> client) {
371  auto it = std::find(m_clients.begin(), m_clients.end(), client);
372  if (it == m_clients.end()) {
373  iout << iINFO << "CudaGlobalMasterServer: adding client \""
374  << client->name() << "\"\n"
375  << endi;
376  m_clients.push_back(client);
377  m_clients_changed = CudaGlobalMasterServer::numCopyLists;
378  } else {
379  const std::string error =
380  "The client \"" + client->name() + "\" are being added twice.\n";
381  NAMD_die(error.c_str());
382  }
383 }
384 
385 void CudaGlobalMasterServer::removeClient(
386  std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient> client) {
387  auto it = std::find(m_clients.begin(), m_clients.end(), client);
388  if (it == m_clients.end()) {
389  iout << iWARN << "CudaGlobalMasterServer: the client \"" << client->name()
390  << "\" is not registered with CudaGlobalMasterServer\n"
391  << endi;
392  }
393  while (it != m_clients.end()) {
394  iout << iINFO << "CudaGlobalMasterServer: removing client \""
395  << client->name() << "\"\n"
396  << endi;
397  m_clients.erase(it);
398  m_clients_changed = CudaGlobalMasterServer::numCopyLists;
399  it = std::find(m_clients.begin(), m_clients.end(), client);
400  }
401 }
402 
403 void CudaGlobalMasterServer::updateAtomMaps() {
404  const int numPes = CkNumPes();
405  // Iterate over all PEs
406 #ifdef DEBUGM
407  DebugM(3, "updateAtomMaps: number of PEs = " + std::to_string(numPes) + "\n");
408 #endif
409  for (int i = 0; i < numPes; ++i) {
410  // Find the device ID of the i-th PE
411  const int peDevice = deviceCUDA->getDeviceIDforPe(i);
412  const int j = m_device_id_to_index.at(peDevice);
413  // Get the atom maps
414  AtomMap *amap = AtomMap::ObjectOnPe(i);
415 #ifdef DEBUGM
416  DebugM(3, "updateAtomMaps: PE " + std::to_string(i) + " atomMap " +
417  ptr_to_str(amap) + " on device " + std::to_string(peDevice) +
418  "\n");
419 #endif
420  m_atom_map_lists[j].push_back(amap);
421  }
422  const bool multi_gpu = m_num_devices > 1;
423  // Iterate over all devices to get the map of global patch ID to local patch
424  // ID
425  // TODO: I assume all devices are used. Is this correct?
426  for (int i = 0; i < m_num_devices; ++i) {
427  const int deviceID = m_src_devs[i];
428  // Get the master PE
429  const int masterPe = deviceCUDA->getMasterPeForDeviceID(deviceID);
430  // Get the corresponding SequencerCUDA instance
431  const SequencerCUDA *sequencer = SequencerCUDA::ObjectOnPe(masterPe);
432  m_global_to_local_id[i] = sequencer->globalToLocalID;
433  m_local_records[i] = sequencer->patchData->devData[deviceID].h_localPatches;
434  if (multi_gpu) {
435  m_h_peer_atom_data.d_pos_x[i] = sequencer->d_pos_x;
436  m_h_peer_atom_data.d_pos_y[i] = sequencer->d_pos_y;
437  m_h_peer_atom_data.d_pos_z[i] = sequencer->d_pos_z;
438  m_h_peer_atom_data.d_vel_x[i] = sequencer->d_vel_x;
439  m_h_peer_atom_data.d_vel_y[i] = sequencer->d_vel_y;
440  m_h_peer_atom_data.d_vel_z[i] = sequencer->d_vel_z;
441  m_h_peer_atom_data.d_mass[i] = sequencer->d_mass;
442  m_h_peer_atom_data.d_charge[i] = sequencer->d_charge;
443  m_h_peer_atom_data.d_transform[i] = sequencer->d_transform;
444 
445  m_h_peer_tf_array.d_f_normal_x[i] = sequencer->d_f_normal_x;
446  m_h_peer_tf_array.d_f_normal_y[i] = sequencer->d_f_normal_y;
447  m_h_peer_tf_array.d_f_normal_z[i] = sequencer->d_f_normal_z;
448  m_h_peer_tf_array.d_f_saved_nbond_x[i] = sequencer->d_f_saved_nbond_x;
449  m_h_peer_tf_array.d_f_saved_nbond_y[i] = sequencer->d_f_saved_nbond_y;
450  m_h_peer_tf_array.d_f_saved_nbond_z[i] = sequencer->d_f_saved_nbond_z;
451  m_h_peer_tf_array.d_f_saved_slow_x[i] = sequencer->d_f_saved_slow_x;
452  m_h_peer_tf_array.d_f_saved_slow_y[i] = sequencer->d_f_saved_slow_y;
453  m_h_peer_tf_array.d_f_saved_slow_z[i] = sequencer->d_f_saved_slow_z;
454  m_h_peer_tf_array.d_atomFixed[i] = sequencer->d_atomFixed;
455 
456  m_h_peer_af_array.d_f_applied_x[i] = sequencer->d_f_global_x;
457  m_h_peer_af_array.d_f_applied_y[i] = sequencer->d_f_global_y;
458  m_h_peer_af_array.d_f_applied_z[i] = sequencer->d_f_global_z;
459  m_h_peer_af_array.d_atomFixed[i] = sequencer->d_atomFixed;
460  }
461 #ifdef DEBUGM
462  DebugM(3, "updateAtomMaps: device " + std::to_string(deviceID) +
463  ", sequencer " + ptr_to_str(sequencer) + "\n");
464 #endif
465  }
466  if (multi_gpu) {
467  copyPeerArraysToDevice();
468  }
469  // Setup the flag to rebuild all copy lists
470  m_atom_maps_changed = CudaGlobalMasterServer::numCopyLists;
471 }
472 
473 void CudaGlobalMasterServer::setStep(int64_t step) {
474  m_step = step;
475  // Update the step number
476  for (auto& client: m_clients) {
477  client->setStep(step);
478  }
479 }
480 
481 void CudaGlobalMasterServer::communicateToClients(const Lattice* lat) {
482  // iout << iINFO << "PE: " << CkMyPe() << ", communicateToClients this address " << this << "\n" << endi;
483  const Vector lat_a = lat->a();
484  const Vector lat_b = lat->b();
485  const Vector lat_c = lat->c();
486  const Vector lat_o = lat->origin();
487  m_h_lattice[0] = lat_a.x;
488  m_h_lattice[1] = lat_a.y;
489  m_h_lattice[2] = lat_a.z;
490  m_h_lattice[3] = lat_b.x;
491  m_h_lattice[4] = lat_b.y;
492  m_h_lattice[5] = lat_b.z;
493  m_h_lattice[6] = lat_c.x;
494  m_h_lattice[7] = lat_c.y;
495  m_h_lattice[8] = lat_c.z;
496  m_h_lattice[9] = lat_o.x;
497  m_h_lattice[10] = lat_o.y;
498  m_h_lattice[11] = lat_o.z;
499  bool b_buildAtomsPositionCopyList = false;
500  bool b_buildAtomsTotalForcesCopyList = false;
501  for (auto&& client: m_clients) {
502  // Check all clients if their requested atoms are changed
503  if (client->requestedAtomsChanged()) {
504  b_buildAtomsPositionCopyList = true;
505  }
506  // Check all clients if their requested total force atoms are changed
507  if (client->requestedTotalForcesAtomsChanged()) {
508  b_buildAtomsTotalForcesCopyList = true;
509  }
510  // Copy lattice to client buffers if necessary
511  if (client->requestUpdateLattice()) {
512  copyLatticeToClient(m_h_lattice, m_device_id, client, m_stream);
513  }
514  }
515  // Rebuild the list of atoms to copy if necessary
516  if (b_buildAtomsPositionCopyList || m_atom_maps_changed > 0 ||
517  m_clients_changed > 0) {
518  buildAtomsCopyList();
519  if (m_atom_maps_changed > 0)
520  m_atom_maps_changed--;
521  if (m_clients_changed > 0)
522  m_clients_changed--;
523  }
524  if (b_buildAtomsTotalForcesCopyList || m_atom_maps_changed > 0 ||
525  m_clients_changed > 0) {
526  buildAtomsTotalForcesCopyList();
527  if (m_atom_maps_changed > 0)
528  m_atom_maps_changed--;
529  if (m_clients_changed > 0)
530  m_clients_changed--;
531  }
532  // Check all clients if they request to update the atomic positions
533  bool b_copyPositions = false;
534  bool b_copyTotalForces = false;
535  bool b_copyMasses = false;
536  bool b_copyCharges = false;
537  bool b_copyTransforms = false;
538  bool b_copyVelocities = false;
539  for (auto&& client: m_clients) {
540  b_copyPositions |= client->requestUpdateAtomPositions();
541  b_copyTotalForces |= client->requestUpdateAtomTotalForces();
542  b_copyMasses |= client->requestUpdateMasses();
543  b_copyCharges |= client->requestUpdateCharges();
544  b_copyTransforms |= client->requestUpdateTransforms();
545  b_copyVelocities |= client->requestUpdateVelocities();
546  }
547  // Update the atomic positions if necessary
548  if (b_buildAtomsPositionCopyList || b_copyPositions || b_copyMasses ||
549  b_copyCharges || b_copyTransforms || b_copyVelocities) {
550  copyAtomsToClients(b_copyPositions, b_copyMasses, b_copyCharges,
551  b_copyTransforms, b_copyVelocities);
552  }
553  if (b_copyTotalForces || b_buildAtomsTotalForcesCopyList) {
554  copyTotalForcesToClients();
555  }
556  for (auto&& client: m_clients) {
557  client->onBuffersUpdated();
558  }
559 }
560 
561 bool CudaGlobalMasterServer::requestedTotalForces() const {
562  bool result = false;
563  for (auto&& client: m_clients) {
564  result |= (!(client->getRequestedForcedAtoms().empty()) &&
565  client->requestUpdateAtomTotalForces());
566  }
567  return result;
568 }
569 
570 void CudaGlobalMasterServer::buildAtomsCopyList() {
571 #ifdef DEBUGM
572  DebugM(3, "buildAtomsCopyList is called\n");
573 #endif
574  // Save the current device ID
575  int savedDevice;
576  cudaCheck(cudaGetDevice(&savedDevice));
577  cudaCheck(cudaSetDevice(m_device_id));
578  buildCopyList(&CudaGlobalMaster::CudaGlobalMasterClient::getRequestedAtoms,
579  &CudaGlobalMaster::CudaGlobalMasterClient::getPositions, m_clients,
580  m_atom_map_lists, m_local_records, m_global_to_local_id,
581  m_src_devs, m_device_id_to_index, m_d_atom_pos_client_buffers,
582  m_atom_pos_copy_list, m_d_atom_pos_copy_list, m_stream);
583  // cudaCheck(cudaStreamSynchronize(m_stream));
584  cudaCheck(cudaSetDevice(savedDevice));
585 }
586 
587 void CudaGlobalMasterServer::copyAtomsToClients(bool copyPositions,
588  bool copyMasses,
589  bool copyCharges,
590  bool copyTransforms,
591  bool copyVelocities) {
592 #ifdef DEBUGM
593  DebugM(1, "copyAtomsToClients is called\n");
594 #endif
595  NAMD_EVENT_START(1, NamdProfileEvent::CUDAGM_ATOMTOCLIENTS);
596  // Save the current device ID
597  int savedDevice;
598  cudaCheck(cudaGetDevice(&savedDevice));
599  cudaCheck(cudaSetDevice(m_device_id));
600  if (m_num_devices == 1) {
601 #ifdef DEBUGM
602 #ifdef MIN_DEBUG_LEVEL
603 #if MIN_DEBUG_LEVEL <= 1
604  debugCopyList("CudaGlobalMasterServer::copyAtomsToClients",
605  m_atom_pos_copy_list);
606 #endif
607 #endif
608 #endif
609  const int masterPe = deviceCUDA->getMasterPe();
610  const SequencerCUDA *sequencer = SequencerCUDA::ObjectOnPe(masterPe);
611  copyAtomsToClientsCUDA(
612  copyPositions, copyMasses, copyCharges, copyTransforms, copyVelocities,
613  sequencer->d_pos_x, sequencer->d_pos_y, sequencer->d_pos_z,
614  sequencer->d_vel_x, sequencer->d_vel_y, sequencer->d_vel_z,
615  sequencer->d_transform, sequencer->d_mass, sequencer->d_charge,
616  sequencer->myLattice, this->m_d_atom_pos_copy_list,
617  this->m_atom_pos_copy_list.size(), this->m_d_atom_pos_client_buffers,
618  m_clients.size(), m_stream);
619  } else {
620  const int masterPe = deviceCUDA->getMasterPe();
621  const SequencerCUDA *sequencer = SequencerCUDA::ObjectOnPe(masterPe);
622  copyAtomsToClientsCUDAMGPU(
623  copyPositions, copyMasses, copyCharges, copyTransforms, copyVelocities,
624  (const double **)m_h_peer_atom_data.d_pos_x,
625  (const double **)m_h_peer_atom_data.d_pos_y,
626  (const double **)m_h_peer_atom_data.d_pos_z,
627  (const double **)m_h_peer_atom_data.d_vel_x,
628  (const double **)m_h_peer_atom_data.d_vel_y,
629  (const double **)m_h_peer_atom_data.d_vel_z,
630  (const char3 **)m_h_peer_atom_data.d_transform,
631  (const float **)m_h_peer_atom_data.d_mass,
632  (const float **)m_h_peer_atom_data.d_charge, sequencer->myLattice,
633  this->m_d_atom_pos_copy_list, this->m_atom_pos_copy_list.size(),
634  this->m_d_atom_pos_client_buffers, m_clients.size(), m_stream);
635  }
636  // cudaCheck(cudaStreamSynchronize(m_stream));
637  cudaCheck(cudaSetDevice(savedDevice));
638  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDAGM_ATOMTOCLIENTS);
639 }
640 
641 void CudaGlobalMasterServer::copyTotalForcesToClients() {
642 #ifdef DEBUGM
643  DebugM(1, "copyTotalForcesToClients is called\n");
644 #endif
646  NAMD_EVENT_START(1, NamdProfileEvent::CUDAGM_TOTALFORCETOCLIENTS);
647  // Save the current device ID
648  int savedDevice;
649  cudaCheck(cudaGetDevice(&savedDevice));
650  cudaCheck(cudaSetDevice(m_device_id));
651  if (m_num_devices == 1) {
652 #ifdef DEBUGM
653 #ifdef MIN_DEBUG_LEVEL
654 #if MIN_DEBUG_LEVEL <= 1
655  debugCopyList("CudaGlobalMasterServer::copyTotalForcesToClients",
656  m_atom_total_force_copy_list);
657 #endif
658 #endif
659 #endif
660  const int masterPe = deviceCUDA->getMasterPe();
661  const SequencerCUDA *sequencer = SequencerCUDA::ObjectOnPe(masterPe);
662  copyTotalForcesToClientsCUDA(
663  simParams->fixedAtomsOn, sequencer->d_f_normal_x,
664  sequencer->d_f_normal_y, sequencer->d_f_normal_z,
665  sequencer->d_f_saved_nbond_x, sequencer->d_f_saved_nbond_y,
666  sequencer->d_f_saved_nbond_z, sequencer->d_f_saved_slow_x,
667  sequencer->d_f_saved_slow_y, sequencer->d_f_saved_slow_z,
668  sequencer->d_atomFixed, this->m_d_atom_total_force_copy_list,
669  this->m_atom_total_force_copy_list.size(),
670  this->m_atom_total_force_client_buffers, m_clients.size(), m_stream);
671  } else {
672  copyTotalForcesToClientsCUDAMGPU(
673  simParams->fixedAtomsOn,
674  (const double **)m_d_peer_tf_array.d_f_normal_x,
675  (const double **)m_d_peer_tf_array.d_f_normal_y,
676  (const double **)m_d_peer_tf_array.d_f_normal_z,
677  (const double **)m_d_peer_tf_array.d_f_saved_nbond_x,
678  (const double **)m_d_peer_tf_array.d_f_saved_nbond_y,
679  (const double **)m_d_peer_tf_array.d_f_saved_nbond_z,
680  (const double **)m_d_peer_tf_array.d_f_saved_slow_x,
681  (const double **)m_d_peer_tf_array.d_f_saved_slow_y,
682  (const double **)m_d_peer_tf_array.d_f_saved_slow_z,
683  (const int **)m_d_peer_tf_array.d_atomFixed,
684  this->m_d_atom_total_force_copy_list,
685  this->m_atom_total_force_copy_list.size(),
686  this->m_atom_total_force_client_buffers, m_clients.size(), m_stream);
687  }
688  // cudaCheck(cudaStreamSynchronize(m_stream));
689  cudaCheck(cudaSetDevice(savedDevice));
690  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDAGM_TOTALFORCETOCLIENTS);
691 }
692 
693 void CudaGlobalMasterServer::buildAtomsTotalForcesCopyList() {
694 #ifdef DEBUGM
695  DebugM(3, "buildAtomsTotalForcesCopyList is called\n");
696 #endif
697  // Save the current device ID
698  int savedDevice;
699  cudaCheck(cudaGetDevice(&savedDevice));
700  cudaCheck(cudaSetDevice(m_device_id));
701  buildCopyList(&CudaGlobalMaster::CudaGlobalMasterClient::getRequestedTotalForcesAtoms,
702  &CudaGlobalMaster::CudaGlobalMasterClient::getTotalForces, m_clients,
703  m_atom_map_lists, m_local_records, m_global_to_local_id,
704  m_src_devs, m_device_id_to_index,
705  m_atom_total_force_client_buffers, m_atom_total_force_copy_list,
706  m_d_atom_total_force_copy_list, m_stream);
707  cudaCheck(cudaSetDevice(savedDevice));
708 }
709 
710 void CudaGlobalMasterServer::buildForcedAtomsCopyList() {
711 #ifdef DEBUGM
712  DebugM(3, "buildForcedAtomsCopyList is called\n");
713 #endif
714  // Save the current device ID
715  int savedDevice;
716  cudaCheck(cudaGetDevice(&savedDevice));
717  cudaCheck(cudaSetDevice(m_device_id));
718  buildCopyList(&CudaGlobalMaster::CudaGlobalMasterClient::getRequestedForcedAtoms,
719  &CudaGlobalMaster::CudaGlobalMasterClient::getAppliedForces, m_clients,
720  m_atom_map_lists, m_local_records, m_global_to_local_id,
721  m_src_devs, m_device_id_to_index,
722  m_d_forced_atom_client_buffers, m_forced_atom_copy_list,
723  m_d_forced_atom_copy_list, m_stream, &m_unique_forced_atoms);
724  cudaCheck(cudaSetDevice(savedDevice));
725 }
726 
727 void CudaGlobalMasterServer::calculate() {
728  NAMD_EVENT_START(1, NamdProfileEvent::CUDAGM_CALCULATECLIENTS);
729  for (auto&& client: m_clients) {
730  client->calculate();
731  }
732  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDAGM_CALCULATECLIENTS);
733 }
734 
735 void CudaGlobalMasterServer::communicateToMD(bool doEnergy, bool doVirial) {
736  // iout << iINFO << "PE: " << CkMyPe() << ", communicateToMD this address " << this << "\n" << endi;
737 #ifdef DEBUGM
738  DebugM(1, "Calling communicateToMD at step " + std::to_string(m_step));
739 #endif
740  // Call the calculate function of every client
741  bool b_buildForcedAtomsCopyList = false;
742  for (auto&& client: m_clients) {
743  if (client->requestedForcedAtomsChanged()) {
744  b_buildForcedAtomsCopyList = true;
745  }
746  }
747  if (b_buildForcedAtomsCopyList || m_atom_maps_changed > 0 ||
748  m_clients_changed > 0) {
749  buildForcedAtomsCopyList();
750  if (m_atom_maps_changed > 0)
751  m_atom_maps_changed--;
752  if (m_clients_changed > 0)
753  m_clients_changed--;
754  }
755  bool b_copyForcedAtoms = false;
756  for (auto&& client: m_clients) {
757  if (client->requestUpdateForcedAtoms()) {
758  b_copyForcedAtoms = true;
759  }
760  }
761  NAMD_EVENT_START(1, NamdProfileEvent::CUDAGM_ADDGLOBALFORCES);
762  if (b_copyForcedAtoms) {
763  addGlobalForces();
764  }
765  auto* currentReduction = getCurrentReduction();
766  for (auto& client: m_clients) {
767  client->finishReductions(doEnergy, doVirial, currentReduction);
768  }
769  cudaCheck(cudaStreamSynchronize(m_stream));
770  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDAGM_ADDGLOBALFORCES);
771 }
772 
773 bool CudaGlobalMasterServer::willAddGlobalForces() const {
774  bool result = false;
775  for (auto&& client: m_clients) {
776  result |= (!(client->getRequestedForcedAtoms().empty()) &&
777  client->requestUpdateForcedAtoms());
778  }
779  return result;
780 }
781 
782 void CudaGlobalMasterServer::addGlobalForces() {
783 #ifdef DEBUGM
784  DebugM(1, "Calling addGlobalForces at step " + std::to_string(m_step));
785 #endif
787  // Save the current device ID
788  int savedDevice;
789  cudaCheck(cudaGetDevice(&savedDevice));
790  cudaCheck(cudaSetDevice(m_device_id));
791  if (m_num_devices == 1) {
792  const int masterPe = deviceCUDA->getMasterPe();
793  const SequencerCUDA *sequencer = SequencerCUDA::ObjectOnPe(masterPe);
794  addGlobalForcesFromClients(
795  simParams->fixedAtomsOn, m_unique_forced_atoms, sequencer->d_f_global_x,
796  sequencer->d_f_global_y, sequencer->d_f_global_z,
797  sequencer->d_atomFixed, this->m_d_forced_atom_copy_list,
798  this->m_forced_atom_copy_list.size(),
799  this->m_d_forced_atom_client_buffers, m_clients.size(), m_stream);
800  } else {
801  addGlobalForcesFromClientsMGPU(
802  simParams->fixedAtomsOn, m_unique_forced_atoms, m_d_peer_af_array.d_f_applied_x,
803  m_d_peer_af_array.d_f_applied_y, m_d_peer_af_array.d_f_applied_z,
804  (const int **)m_d_peer_af_array.d_atomFixed,
805  this->m_d_forced_atom_copy_list, this->m_forced_atom_copy_list.size(),
806  this->m_d_forced_atom_client_buffers, m_clients.size(), m_stream);
807  }
808  // cudaCheck(cudaStreamSynchronize(m_stream));
809  cudaCheck(cudaSetDevice(savedDevice));
810 }
811 
812 void CudaGlobalMasterServer::allocatePeerArrays() {
813 #ifdef DEBUGM
814  DebugM(3, "CudaGlobalMasterServer::allocatePeerArrays");
815 #endif
816  allocate_host<double *>(&(m_h_peer_atom_data.d_pos_x), m_num_devices);
817  allocate_host<double *>(&(m_h_peer_atom_data.d_pos_y), m_num_devices);
818  allocate_host<double *>(&(m_h_peer_atom_data.d_pos_z), m_num_devices);
819  allocate_host<double *>(&(m_h_peer_atom_data.d_vel_x), m_num_devices);
820  allocate_host<double *>(&(m_h_peer_atom_data.d_vel_y), m_num_devices);
821  allocate_host<double *>(&(m_h_peer_atom_data.d_vel_z), m_num_devices);
822  allocate_host<float *>(&(m_h_peer_atom_data.d_mass), m_num_devices);
823  allocate_host<float *>(&(m_h_peer_atom_data.d_charge), m_num_devices);
824  allocate_host<char3 *>(&(m_h_peer_atom_data.d_transform), m_num_devices);
825 
826  allocate_host<tf_type *>(&(m_h_peer_tf_array.d_f_normal_x), m_num_devices);
827  allocate_host<tf_type *>(&(m_h_peer_tf_array.d_f_normal_y), m_num_devices);
828  allocate_host<tf_type *>(&(m_h_peer_tf_array.d_f_normal_z), m_num_devices);
829  allocate_host<tf_type *>(&(m_h_peer_tf_array.d_f_saved_nbond_x),
830  m_num_devices);
831  allocate_host<tf_type *>(&(m_h_peer_tf_array.d_f_saved_nbond_y),
832  m_num_devices);
833  allocate_host<tf_type *>(&(m_h_peer_tf_array.d_f_saved_nbond_z),
834  m_num_devices);
835  allocate_host<tf_type *>(&(m_h_peer_tf_array.d_f_saved_slow_x),
836  m_num_devices);
837  allocate_host<tf_type *>(&(m_h_peer_tf_array.d_f_saved_slow_y),
838  m_num_devices);
839  allocate_host<tf_type *>(&(m_h_peer_tf_array.d_f_saved_slow_z),
840  m_num_devices);
841  allocate_host<int *>(&(m_h_peer_tf_array.d_atomFixed), m_num_devices);
842 
843  allocate_host<tf_type *>(&(m_h_peer_af_array.d_f_applied_x), m_num_devices);
844  allocate_host<tf_type *>(&(m_h_peer_af_array.d_f_applied_y), m_num_devices);
845  allocate_host<tf_type *>(&(m_h_peer_af_array.d_f_applied_z), m_num_devices);
846  allocate_host<int *>(&(m_h_peer_af_array.d_atomFixed), m_num_devices);
847 
848  // Save the current device ID
849  int savedDevice;
850  cudaCheck(cudaGetDevice(&savedDevice));
851  cudaCheck(cudaSetDevice(m_device_id));
852  allocate_device<double *>(&(m_d_peer_atom_data.d_pos_x), m_num_devices);
853  allocate_device<double *>(&(m_d_peer_atom_data.d_pos_y), m_num_devices);
854  allocate_device<double *>(&(m_d_peer_atom_data.d_pos_z), m_num_devices);
855  allocate_device<double *>(&(m_d_peer_atom_data.d_vel_x), m_num_devices);
856  allocate_device<double *>(&(m_d_peer_atom_data.d_vel_y), m_num_devices);
857  allocate_device<double *>(&(m_d_peer_atom_data.d_vel_z), m_num_devices);
858  allocate_device<float *>(&(m_d_peer_atom_data.d_mass), m_num_devices);
859  allocate_device<float *>(&(m_d_peer_atom_data.d_charge), m_num_devices);
860  allocate_device<char3 *>(&(m_d_peer_atom_data.d_transform), m_num_devices);
861 
862  allocate_device<tf_type *>(&(m_d_peer_tf_array.d_f_normal_x), m_num_devices);
863  allocate_device<tf_type *>(&(m_d_peer_tf_array.d_f_normal_y), m_num_devices);
864  allocate_device<tf_type *>(&(m_d_peer_tf_array.d_f_normal_z), m_num_devices);
865  allocate_device<tf_type *>(&(m_d_peer_tf_array.d_f_saved_nbond_x),
866  m_num_devices);
867  allocate_device<tf_type *>(&(m_d_peer_tf_array.d_f_saved_nbond_y),
868  m_num_devices);
869  allocate_device<tf_type *>(&(m_d_peer_tf_array.d_f_saved_nbond_z),
870  m_num_devices);
871  allocate_device<tf_type *>(&(m_d_peer_tf_array.d_f_saved_slow_x),
872  m_num_devices);
873  allocate_device<tf_type *>(&(m_d_peer_tf_array.d_f_saved_slow_y),
874  m_num_devices);
875  allocate_device<tf_type *>(&(m_d_peer_tf_array.d_f_saved_slow_z),
876  m_num_devices);
877  allocate_device<int *>(&(m_d_peer_tf_array.d_atomFixed), m_num_devices);
878 
879  allocate_device<tf_type *>(&(m_d_peer_af_array.d_f_applied_x), m_num_devices);
880  allocate_device<tf_type *>(&(m_d_peer_af_array.d_f_applied_y), m_num_devices);
881  allocate_device<tf_type *>(&(m_d_peer_af_array.d_f_applied_z), m_num_devices);
882  allocate_device<int *>(&(m_d_peer_af_array.d_atomFixed), m_num_devices);
883  cudaCheck(cudaSetDevice(savedDevice));
884 }
885 
886 void CudaGlobalMasterServer::copyPeerArraysToDevice() {
887 #ifdef DEBUGM
888  DebugM(3, "CudaGlobalMasterServer::copyPeerArraysToDevice");
889 #endif
890  // Save the current device ID
891  int savedDevice;
892  cudaCheck(cudaGetDevice(&savedDevice));
893  cudaCheck(cudaSetDevice(m_device_id));
894  copy_HtoD(m_h_peer_atom_data.d_pos_x, m_d_peer_atom_data.d_pos_x,
895  m_num_devices, m_stream);
896  copy_HtoD(m_h_peer_atom_data.d_pos_y, m_d_peer_atom_data.d_pos_y,
897  m_num_devices, m_stream);
898  copy_HtoD(m_h_peer_atom_data.d_pos_z, m_d_peer_atom_data.d_pos_z,
899  m_num_devices, m_stream);
900  copy_HtoD(m_h_peer_atom_data.d_vel_x, m_d_peer_atom_data.d_vel_x,
901  m_num_devices, m_stream);
902  copy_HtoD(m_h_peer_atom_data.d_vel_y, m_d_peer_atom_data.d_vel_y,
903  m_num_devices, m_stream);
904  copy_HtoD(m_h_peer_atom_data.d_vel_z, m_d_peer_atom_data.d_vel_z,
905  m_num_devices, m_stream);
906  copy_HtoD(m_h_peer_atom_data.d_mass, m_d_peer_atom_data.d_mass, m_num_devices,
907  m_stream);
908  copy_HtoD(m_h_peer_atom_data.d_charge, m_d_peer_atom_data.d_charge,
909  m_num_devices, m_stream);
910  copy_HtoD(m_h_peer_atom_data.d_transform, m_d_peer_atom_data.d_transform,
911  m_num_devices, m_stream);
912 
913  copy_HtoD(m_h_peer_tf_array.d_f_normal_x, m_d_peer_tf_array.d_f_normal_x,
914  m_num_devices, m_stream);
915  copy_HtoD(m_h_peer_tf_array.d_f_normal_y, m_d_peer_tf_array.d_f_normal_y,
916  m_num_devices, m_stream);
917  copy_HtoD(m_h_peer_tf_array.d_f_normal_z, m_d_peer_tf_array.d_f_normal_z,
918  m_num_devices, m_stream);
919  copy_HtoD(m_h_peer_tf_array.d_f_saved_nbond_x,
920  m_d_peer_tf_array.d_f_saved_nbond_x, m_num_devices, m_stream);
921  copy_HtoD(m_h_peer_tf_array.d_f_saved_nbond_y,
922  m_d_peer_tf_array.d_f_saved_nbond_y, m_num_devices, m_stream);
923  copy_HtoD(m_h_peer_tf_array.d_f_saved_nbond_z,
924  m_d_peer_tf_array.d_f_saved_nbond_z, m_num_devices, m_stream);
925  copy_HtoD(m_h_peer_tf_array.d_f_saved_slow_x,
926  m_d_peer_tf_array.d_f_saved_slow_x, m_num_devices, m_stream);
927  copy_HtoD(m_h_peer_tf_array.d_f_saved_slow_y,
928  m_d_peer_tf_array.d_f_saved_slow_y, m_num_devices, m_stream);
929  copy_HtoD(m_h_peer_tf_array.d_f_saved_slow_z,
930  m_d_peer_tf_array.d_f_saved_slow_z, m_num_devices, m_stream);
931  copy_HtoD(m_h_peer_tf_array.d_atomFixed, m_d_peer_tf_array.d_atomFixed,
932  m_num_devices, m_stream);
933 
934  copy_HtoD(m_h_peer_af_array.d_f_applied_x, m_d_peer_af_array.d_f_applied_x,
935  m_num_devices, m_stream);
936  copy_HtoD(m_h_peer_af_array.d_f_applied_y, m_d_peer_af_array.d_f_applied_y,
937  m_num_devices, m_stream);
938  copy_HtoD(m_h_peer_af_array.d_f_applied_z, m_d_peer_af_array.d_f_applied_z,
939  m_num_devices, m_stream);
940  copy_HtoD(m_h_peer_af_array.d_atomFixed, m_d_peer_af_array.d_atomFixed,
941  m_num_devices, m_stream);
942 
943  // cudaCheck(cudaStreamSynchronize(m_stream));
944  cudaCheck(cudaSetDevice(savedDevice));
945 }
946 
947 void CudaGlobalMasterServer::finishReductions() {
948 #ifdef DEBUGM
949  DebugM(1, "Calling finishReductions at step " + std::to_string(m_step));
950 #endif
951  // CkPrintf("Calling CudaGlobalMasterServer::finishReductions\n");
952  auto* currentReduction = getCurrentReduction();
953  currentReduction->submit();
954 }
955 
956 #else
957 
959  int deviceID, int printProfilingFreq /* = -1 */
960 ) {
961  NAMD_die(
962  "CudaGlobalMasterServer requires to build NAMD with CUDA support.\n");
963 }
964 #endif // (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(NODEGROUP_FORCE_REGISTER)
static Node * Object()
Definition: Node.h:86
#define NAMD_EVENT_STOP(eon, id)
std::ostream & iINFO(std::ostream &s)
Definition: InfoStream.C:81
NAMD_HOST_DEVICE Vector c() const
Definition: Lattice.h:270
Helper struct to store the result of query local ID from global ID.
Definition: Vector.h:72
SimParameters * simParameters
Definition: Node.h:181
#define DebugM(x, y)
Definition: Debug.h:75
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
BigReal z
Definition: Vector.h:74
std::ostream & iWARN(std::ostream &s)
Definition: InfoStream.C:82
SubmitReduction * willSubmit(int setID, int size=-1)
Definition: ReductionMgr.C:368
static ReductionMgr * Object(void)
Definition: ReductionMgr.h:290
#define iout
Definition: InfoStream.h:51
CudaGlobalMasterServer(int deviceID, int printProfilingFreq=-1)
int32 index
Definition: NamdTypes.h:300
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23
#define NAMD_EVENT_START(eon, id)
static AtomMap * ObjectOnPe(int pe)
Definition: AtomMap.h:38
int getMasterPe()
Definition: DeviceCUDA.h:137
void NAMD_bug(const char *err_msg)
Definition: common.C:195
int getMasterPeForDeviceID(int deviceID)
Definition: DeviceCUDA.C:532
BigReal x
Definition: Vector.h:74
void NAMD_die(const char *err_msg)
Definition: common.C:147
PatchID pid
Definition: NamdTypes.h:299
const int * allDevices() const
Definition: DeviceCUDA.h:173
NAMD_HOST_DEVICE Vector b() const
Definition: Lattice.h:269
#define simParams
Definition: Output.C:131
int32 AtomID
Definition: NamdTypes.h:35
BigReal y
Definition: Vector.h:74
#define cudaCheck(stmt)
Definition: CudaUtils.h:233
NAMD_HOST_DEVICE Vector a() const
Definition: Lattice.h:268
int getDeviceIDforPe(int pe)
Definition: DeviceCUDA.C:525
NAMD_HOST_DEVICE Vector origin() const
Definition: Lattice.h:278
void copy_HtoD(const T *h_array, T *d_array, size_t array_len, cudaStream_t stream=0)
Definition: CudaUtils.h:409