NAMD
ComputeMgr.C
Go to the documentation of this file.
1 
7 #include "SimParameters.h"
8 #include "dlloader.h"
10 #include "InfoStream.h"
11 #include "ProcessorPrivate.h"
12 #include "middle-conv.h"
13 
14 //#define DEBUGM
15 #define MIN_DEBUG_LEVEL 3
16 #include "Debug.h"
17 
18 #include "BOCgroup.h"
19 #include "ComputeMgr.decl.h"
20 #include "ComputeMgr.h"
21 #include "ProxyMgr.decl.h"
22 #include "ProxyMgr.h"
23 
24 #include "Node.h"
25 #include "ComputeMap.h"
26 #include "PatchMap.h"
27 #include "PatchMap.inl"
28 
29 #include "Compute.h"
30 #include "ComputeNonbondedUtil.h"
31 #include "ComputeNonbondedSelf.h"
32 #include "ComputeNonbondedPair.h"
33 #include "ComputeAngles.h"
34 #include "ComputeDihedrals.h"
35 #include "ComputeImpropers.h"
36 #include "ComputeThole.h"
37 #include "ComputeAniso.h"
38 #include "ComputeCrossterms.h"
39 #include "ComputeOneFourNbTholes.h"
40 // JLai
41 #include "ComputeGromacsPair.h"
42 #include "ComputeBonds.h"
44 #include "ComputeFullDirect.h"
45 #include "ComputeGlobal.h"
46 #include "ComputeGlobalMsgs.h"
47 #include "ComputeExt.h"
48 #include "ComputeQM.h"
49 #include "ComputeGBISser.h"
50 #include "ComputeLCPO.h"
51 #include "ComputeFmmSerial.h"
52 #include "ComputeMsmSerial.h"
53 #include "ComputeLjPmeSerial.h"
54 #include "ComputeMsmMsa.h"
55 #include "ComputeMsm.h"
56 #include "ComputeDPMTA.h"
57 #include "ComputeDPME.h"
58 #include "ComputeDPMEMsgs.h"
59 #include "ComputePme.h"
60 // #ifdef NAMD_CUDA
61 #include "ComputePmeCUDA.h"
62 #include "ComputeCUDAMgr.h"
63 #include "CudaComputeNonbonded.h"
64 #include "ComputePmeCUDAMgr.h"
65 // #endif
66 #include "ComputeEwald.h"
67 #include "ComputeEField.h"
68 /* BEGIN gf */
69 #include "ComputeGridForce.h"
70 /* END gf */
71 #include "ComputeStir.h"
72 #include "ComputeSphericalBC.h"
73 #include "ComputeCylindricalBC.h"
74 #include "ComputeTclBC.h"
75 #include "ComputeRestraints.h"
76 #include "ComputeConsForce.h"
77 #include "ComputeConsForceMsgs.h"
78 #include "WorkDistrib.h"
79 
80 #include "LdbCoordinator.h"
81 
82 /* include all of the specific masters we need here */
83 #include "FreeEnergyEnums.h"
84 #include "FreeEnergyAssert.h"
85 #include "FreeEnergyGroup.h"
86 #include "FreeEnergyVector.h"
87 #include "FreeEnergyRestrain.h"
88 #include "FreeEnergyRMgr.h"
89 #include "FreeEnergyLambda.h"
90 #include "FreeEnergyLambdMgr.h"
91 
92 #include "GlobalMasterTest.h"
93 #include "GlobalMasterIMD.h"
94 #include "GlobalMasterTcl.h"
95 #include "GlobalMasterSMD.h"
96 #include "GlobalMasterTMD.h"
97 #include "GlobalMasterSymmetry.h"
98 #include "GlobalMasterEasy.h"
99 #include "GlobalMasterMisc.h"
100 #include "GlobalMasterFreeEnergy.h"
101 #include "GlobalMasterColvars.h"
102 
103 #include "PatchData.h"
104 #include "NamdEventsProfiling.h"
105 #include "DeviceCUDA.h"
106 
107 #include "CudaGlobalMasterServer.h"
108 #include "strlib.h"
109 
110 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
111 #ifdef WIN32
112 #define __thread __declspec(thread)
113 #endif
114 extern __thread DeviceCUDA *deviceCUDA;
115 #endif
116 
118 {
119  CkpvAccess(BOCclass_group).computeMgr = thisgroup;
123  computeDPMEObject = 0;
124  computeEwaldObject = 0;
125  computeNonbondedWorkArrays = new ComputeNonbondedWorkArrays;
126  skipSplitting = 0;
127  masterServerObject = NULL;
128 }
129 
131 {
132  delete computeNonbondedWorkArrays;
133  if (masterServerObject != NULL) delete masterServerObject;
134  for (auto& loader: CudaGlobalMasterClientDlloaders) {
135  if (loader) {
136  iout << iINFO << "Close library " << loader->LibName() << "\n" << endi;
137  loader->DLCloseLib();
138  }
139  }
140 }
141 
142 void ComputeMgr::updateComputes(int ep, CkGroupID chareID)
143 {
144  updateComputesReturnEP = ep;
145  updateComputesReturnChareID = chareID;
146  updateComputesCount = CkNumPes();
147 
148  if (CkMyPe())
149  {
150  NAMD_bug("updateComputes signaled on wrong Pe!");
151  }
152 
153  CkStartQD(CkIndex_ComputeMgr::updateComputes2((CkQdMsg*)0),&thishandle);
154 }
155 
156 void ComputeMgr::updateComputes2(CkQdMsg *msg)
157 {
158  delete msg;
159 
160  CProxy_WorkDistrib wd(CkpvAccess(BOCclass_group).workDistrib);
161  WorkDistrib *workDistrib = wd.ckLocalBranch();
162  workDistrib->saveComputeMapChanges(CkIndex_ComputeMgr::updateComputes3(),thisgroup);
163 }
164 
166 {
167  if ( skipSplitting ) {
168  CProxy_ComputeMgr(thisgroup).updateLocalComputes();
169  } else {
170  CProxy_ComputeMgr(thisgroup).splitComputes();
171  skipSplitting = 1;
172  }
173 }
174 
176 {
177  if ( ! CkMyRank() ) {
178  ComputeMap *computeMap = ComputeMap::Object();
179  const int nc = computeMap->numComputes();
180 
181  for (int i=0; i<nc; i++) {
182  int nnp = computeMap->newNumPartitions(i);
183  if ( nnp > 0 ) {
184  if ( computeMap->numPartitions(i) != 1 ) {
185  CkPrintf("Warning: unable to partition compute %d\n", i);
186  computeMap->setNewNumPartitions(i,0);
187  continue;
188  }
189  //CkPrintf("splitting compute %d by %d\n",i,nnp);
190  computeMap->setNumPartitions(i,nnp);
191  if (computeMap->newNode(i) == -1) {
192  computeMap->setNewNode(i,computeMap->node(i));
193  }
194  for ( int j=1; j<nnp; ++j ) {
195  int newcid = computeMap->cloneCompute(i,j);
196  //CkPrintf("compute %d partition %d is %d\n",i,j,newcid);
197  }
198  }
199  }
200  computeMap->extendPtrs();
201  }
202 
203  if (!CkMyPe())
204  {
205  CkStartQD(CkIndex_ComputeMgr::splitComputes2((CkQdMsg*)0), &thishandle);
206  }
207 }
208 
209 void ComputeMgr::splitComputes2(CkQdMsg *msg)
210 {
211  delete msg;
212  CProxy_ComputeMgr(thisgroup).updateLocalComputes();
213 }
214 
216 {
217  ComputeMap *computeMap = ComputeMap::Object();
218  CProxy_ProxyMgr pm(CkpvAccess(BOCclass_group).proxyMgr);
219  ProxyMgr *proxyMgr = pm.ckLocalBranch();
220  LdbCoordinator *ldbCoordinator = LdbCoordinator::Object();
221 
222  computeFlag.resize(0);
223 
224  const int nc = computeMap->numComputes();
225  for (int i=0; i<nc; i++) {
226 
227  if ( computeMap->node(i) == CkMyPe() &&
228  computeMap->newNumPartitions(i) > 1 ) {
229  Compute *c = computeMap->compute(i);
230  ldbCoordinator->Migrate(c->ldObjHandle,CkMyPe());
231  delete c;
232  computeMap->registerCompute(i,NULL);
233  if ( computeMap->newNode(i) == CkMyPe() ) computeFlag.add(i);
234  } else
235  if (computeMap->newNode(i) == CkMyPe() && computeMap->node(i) != CkMyPe())
236  {
237  computeFlag.add(i);
238  for (int n=0; n < computeMap->numPids(i); n++)
239  {
240  proxyMgr->createProxy(computeMap->pid(i,n));
241  }
242  }
243  else if (computeMap->node(i) == CkMyPe() &&
244  (computeMap->newNode(i) != -1 && computeMap->newNode(i) != CkMyPe() ))
245  {
246  // CkPrintf("delete compute %d on pe %d\n",i,CkMyPe());
247  delete computeMap->compute(i);
248  computeMap->registerCompute(i,NULL);
249  }
250  }
251 
252  if (!CkMyPe())
253  {
254  CkStartQD(CkIndex_ComputeMgr::updateLocalComputes2((CkQdMsg*)0), &thishandle);
255  }
256 }
257 
258 void
260 {
261  delete msg;
262  CProxy_ComputeMgr(thisgroup).updateLocalComputes3();
263 }
264 
265 void
267 {
268  ComputeMap *computeMap = ComputeMap::Object();
269  CProxy_ProxyMgr pm(CkpvAccess(BOCclass_group).proxyMgr);
270  ProxyMgr *proxyMgr = pm.ckLocalBranch();
271 
273 
274  const int nc = computeMap->numComputes();
275 
276  if ( ! CkMyRank() ) {
277  for (int i=0; i<nc; i++) {
278  computeMap->setNewNumPartitions(i,0);
279  if (computeMap->newNode(i) != -1) {
280  computeMap->setNode(i,computeMap->newNode(i));
281  computeMap->setNewNode(i,-1);
282  }
283  }
284  }
285 
286  for(int i=0; i<computeFlag.size(); i++) createCompute(computeFlag[i], computeMap);
287  computeFlag.clear();
288 
289  proxyMgr->removeUnusedProxies();
290 
291  if (!CkMyPe())
292  {
293  CkStartQD(CkIndex_ComputeMgr::updateLocalComputes4((CkQdMsg*)0), &thishandle);
294  }
295 }
296 
297 void
299 {
300  delete msg;
301  CProxy_ComputeMgr(thisgroup).updateLocalComputes5();
302 
303  // store the latest compute map
305  if (simParams->storeComputeMap) {
306  ComputeMap *computeMap = ComputeMap::Object();
307  computeMap->saveComputeMap(simParams->computeMapFilename);
308  }
309 }
310 
311 #if 0
312 int firstphase = 1;
313 #endif
314 
315 void
317 {
318  if ( ! CkMyRank() ) {
321  }
322 
323  // we always use the centralized building of spanning tree
324  // distributed building of ST called in Node.C only
327 
328  // this code needs to be turned on if we want to
329  // shift the creation of ST to the load balancer
330 
331 #if 0
333  {
334  if (firstphase)
336  else
337  if (CkMyPe() == 0)
339 
340  firstphase = 0;
341  }
342 #endif
343 
344  if (!CkMyPe())
345  CkStartQD(CkIndex_ComputeMgr::doneUpdateLocalComputes(), &thishandle);
346 }
347 
349 {
350 
351 // if (!--updateComputesCount) {
352  DebugM(4, "doneUpdateLocalComputes on Pe("<<CkMyPe()<<")\n");
353  void *msg = CkAllocMsg(0,0,0);
354  CkSendMsgBranch(updateComputesReturnEP,msg,0,updateComputesReturnChareID);
355 // }
356 }
357 
358 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
359 // Helper functions for creating and getting pointers to CUDA computes
362 }
363 
366 }
367 
368 #ifdef BONDED_CUDA
369 ComputeBondedCUDA* getComputeBondedCUDA() {
370  return ComputeCUDAMgr::getComputeCUDAMgr()->getComputeBondedCUDA();
371 }
372 
373 ComputeBondedCUDA* createComputeBondedCUDA(ComputeID c, ComputeMgr* computeMgr) {
374  return ComputeCUDAMgr::getComputeCUDAMgr()->createComputeBondedCUDA(c, computeMgr);
375 }
376 #endif
377 #endif
378 
379 //
380 void
381 ComputeMgr::createCompute(ComputeID i, ComputeMap *map)
382 {
383  Compute *c;
384  PatchID pid2[2];
385  PatchIDList pids;
386  int trans2[2];
388 
389  PatchID pid8[8];
390  int trans8[8];
391 #ifdef NODEGROUP_FORCE_REGISTER
392  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
393  PatchData *patchData = cpdata.ckLocalBranch();
394  suspendCounter=&(patchData->suspendCounter);
395 #endif
396 
397  switch ( map->type(i) )
398  {
400 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
401  getCudaComputeNonbonded()->registerComputeSelf(i, map->computeData[i].pids[0].pid);
402 #else
403  c = new ComputeNonbondedSelf(i,map->computeData[i].pids[0].pid,
404  computeNonbondedWorkArrays,
405  map->partition(i),map->partition(i)+1,
406  map->numPartitions(i)); // unknown delete
407  map->registerCompute(i,c);
408  c->initialize();
409 #endif
410  break;
411  case computeLCPOType:
412  for (int j = 0; j < 8; j++) {
413  pid8[j] = map->computeData[i].pids[j].pid;
414  trans8[j] = map->computeData[i].pids[j].trans;
415  }
416  c = new ComputeLCPO(i,pid8,trans8,
417  computeNonbondedWorkArrays,
418  map->partition(i),map->partition(i)+1,
419  map->numPartitions(i), 8);
420  map->registerCompute(i,c);
421  c->initialize();
422 
423  break;
425  pid2[0] = map->computeData[i].pids[0].pid;
426  trans2[0] = map->computeData[i].pids[0].trans;
427  pid2[1] = map->computeData[i].pids[1].pid;
428  trans2[1] = map->computeData[i].pids[1].trans;
429 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
430  getCudaComputeNonbonded()->registerComputePair(i, pid2, trans2);
431 #else
432  c = new ComputeNonbondedPair(i,pid2,trans2,
433  computeNonbondedWorkArrays,
434  map->partition(i),map->partition(i)+1,
435  map->numPartitions(i)); // unknown delete
436  map->registerCompute(i,c);
437  c->initialize();
438 #endif
439  break;
440 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
443  map->registerCompute(i,c);
444  // NOTE: initialize() is called at the end of createComputes(),
445  // after all computes have been created
446  //c->initialize();
447  break;
448 #ifdef BONDED_CUDA
449  case computeBondedCUDAType:
450  c = createComputeBondedCUDA(i, this);
451  map->registerCompute(i,c);
452  break;
453 #endif
454 #endif
455  case computeExclsType:
456 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined(NAMD_HIP))
457  if (simParams->bondedCUDA & NAMD_BONDEDGPU_EXCLS)
458  {
459  PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
460  getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
461  } else
462 #endif
463  {
464  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
465  c = new ComputeExcls(i,pids); // unknown delete
466  map->registerCompute(i,c);
467  c->initialize();
468  }
469  break;
470  case computeBondsType:
471 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
472  if (simParams->bondedCUDA & NAMD_BONDEDGPU_BONDS)
473  {
474  PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
475  getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
476  } else
477 #endif
478  {
479  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
480  c = new ComputeBonds(i,pids); // unknown delete
481  map->registerCompute(i,c);
482  c->initialize();
483  }
484  break;
485  case computeAnglesType:
486 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
487  if (simParams->bondedCUDA & NAMD_BONDEDGPU_ANGLES)
488  {
489  PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
490  getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
491  } else
492 #endif
493  {
494  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
495  c = new ComputeAngles(i,pids); // unknown delete
496  map->registerCompute(i,c);
497  c->initialize();
498  }
499  break;
501 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
502  if (simParams->bondedCUDA & NAMD_BONDEDGPU_DIHEDRALS)
503  {
504  PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
505  getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
506  } else
507 #endif
508  {
509  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
510  c = new ComputeDihedrals(i,pids); // unknown delete
511  map->registerCompute(i,c);
512  c->initialize();
513  }
514  break;
516 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
517  if (simParams->bondedCUDA & NAMD_BONDEDGPU_IMPROPERS)
518  {
519  PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
520  getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
521  } else
522 #endif
523  {
524  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
525  c = new ComputeImpropers(i,pids); // unknown delete
526  map->registerCompute(i,c);
527  c->initialize();
528  }
529  break;
530  case computeTholeType:
531 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
532  if (simParams->bondedCUDA & NAMD_BONDEDGPU_THOLES) {
533  PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
534  getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
535  } else
536 #endif
537  {
538  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
539  c = new ComputeThole(i,pids); // unknown delete
540  map->registerCompute(i,c);
541  c->initialize();
542  }
543  break;
544  case computeAnisoType:
545 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
546  if (simParams->bondedCUDA & NAMD_BONDEDGPU_ANISOS) {
547  PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
548  getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
549  } else
550 #endif
551  {
552  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
553  c = new ComputeAniso(i,pids); // unknown delete
554  map->registerCompute(i,c);
555  c->initialize();
556  }
557  break;
559 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
560  if (simParams->bondedCUDA & NAMD_BONDEDGPU_CROSSTERMS)
561  {
562  PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
563  getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
564  } else
565 #endif
566  {
567  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
568  c = new ComputeCrossterms(i,pids); // unknown delete
569  map->registerCompute(i,c);
570  c->initialize();
571  }
572  break;
574 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
575  if (simParams->bondedCUDA & NAMD_BONDEDGPU_ONEFOURENBTHOLES)
576  {
577  PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
578  getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
579  } else
580 #endif
581  {
582  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
583  c = new ComputeOneFourNbThole(i, pids);
584  map->registerCompute(i,c);
585  c->initialize();
586  }
587  break;
588  // JLai
590  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
591  c = new ComputeGromacsPair(i,pids); // unknown delete
592  map->registerCompute(i,c);
593  c->initialize();
594  break;
596  c = new ComputeSelfGromacsPair(i,map->computeData[i].pids[0].pid); // unknown delete
597  map->registerCompute(i,c);
598  c->initialize();
599  break;
600  // End of JLai
602 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
603  if (simParams->bondedCUDA & NAMD_BONDEDGPU_EXCLS)
604  {
605  getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
606  } else
607 #endif
608  {
609  c = new ComputeSelfExcls(i,map->computeData[i].pids[0].pid);
610  map->registerCompute(i,c);
611  c->initialize();
612  }
613  break;
615 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
616  if (simParams->bondedCUDA & NAMD_BONDEDGPU_BONDS)
617  {
618  getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
619  } else
620 #endif
621  {
622  c = new ComputeSelfBonds(i,map->computeData[i].pids[0].pid);
623  map->registerCompute(i,c);
624  c->initialize();
625  }
626  break;
628 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
629  if (simParams->bondedCUDA & NAMD_BONDEDGPU_ANGLES)
630  {
631  getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
632  } else
633 #endif
634  {
635  c = new ComputeSelfAngles(i,map->computeData[i].pids[0].pid);
636  map->registerCompute(i,c);
637  c->initialize();
638  }
639  break;
641 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
642  if (simParams->bondedCUDA & NAMD_BONDEDGPU_DIHEDRALS)
643  {
644  getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
645  } else
646 #endif
647  {
648  c = new ComputeSelfDihedrals(i,map->computeData[i].pids[0].pid);
649  map->registerCompute(i,c);
650  c->initialize();
651  }
652  break;
654 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
655  if (simParams->bondedCUDA & NAMD_BONDEDGPU_IMPROPERS)
656  {
657  getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
658  } else
659 #endif
660  {
661  c = new ComputeSelfImpropers(i,map->computeData[i].pids[0].pid);
662  map->registerCompute(i,c);
663  c->initialize();
664  }
665  break;
667 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
668  if (simParams->bondedCUDA & NAMD_BONDEDGPU_THOLES) {
669  getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
670  } else
671 #endif
672  {
673  c = new ComputeSelfThole(i,map->computeData[i].pids[0].pid);
674  map->registerCompute(i,c);
675  c->initialize();
676  }
677  break;
679 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
680  if (simParams->bondedCUDA & NAMD_BONDEDGPU_ANISOS) {
681  getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
682  } else
683 #endif
684  {
685  c = new ComputeSelfAniso(i,map->computeData[i].pids[0].pid);
686  map->registerCompute(i,c);
687  c->initialize();
688  }
689  break;
691 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
692  if (simParams->bondedCUDA & NAMD_BONDEDGPU_CROSSTERMS)
693  {
694  getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
695  } else
696 #endif
697  {
698  c = new ComputeSelfCrossterms(i,map->computeData[i].pids[0].pid);
699  map->registerCompute(i,c);
700  c->initialize();
701  }
702  break;
704 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
705  if (simParams->bondedCUDA & NAMD_BONDEDGPU_ONEFOURENBTHOLES) {
706  getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
707  } else
708 #endif
709  {
710  c = new ComputeSelfOneFourNbThole(i,map->computeData[i].pids[0].pid);
711  map->registerCompute(i,c);
712  c->initialize();
713  }
714  break;
715 #ifdef DPMTA
716  case computeDPMTAType:
717  c = new ComputeDPMTA(i); // unknown delete
718  map->registerCompute(i,c);
719  c->initialize();
720  break;
721 #endif
722 #ifdef DPME
723  case computeDPMEType:
724  c = computeDPMEObject = new ComputeDPME(i,this); // unknown delete
725  map->registerCompute(i,c);
726  c->initialize();
727  break;
728 #endif
729  case computePmeType:
730  c = new ComputePme(i,map->computeData[i].pids[0].pid); // unknown delete
731  map->registerCompute(i,c);
732  c->initialize();
733  break;
734 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
735  case computePmeCUDAType:
736  // PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
737  // c = new ComputePmeCUDA(i, pids);
738  c = new ComputePmeCUDA(i, map->computeData[i].pids[0].pid);
739  map->registerCompute(i,c);
740  c->initialize();
741  break;
742 #endif
743  case computeEwaldType:
744  c = computeEwaldObject = new ComputeEwald(i,this); // unknown delete
745  map->registerCompute(i,c);
746  c->initialize();
747  break;
749  c = new ComputeFullDirect(i); // unknown delete
750  map->registerCompute(i,c);
751  c->initialize();
752  break;
753  case computeGlobalType:
754  c = computeGlobalObject = new ComputeGlobal(i,this); // unknown delete
755  map->registerCompute(i,c);
756  c->initialize();
757  break;
758  case computeStirType:
759  c = new ComputeStir(i,map->computeData[i].pids[0].pid); // unknown delete
760  map->registerCompute(i,c);
761  c->initialize();
762  break;
763  case computeExtType:
764  c = new ComputeExt(i); // unknown delete
765  map->registerCompute(i,c);
766  c->initialize();
767  break;
768  case computeQMType:
769  c = new ComputeQM(i);
770  map->registerCompute(i,c);
771  c->initialize();
772  break;
773  case computeGBISserType: //gbis serial
774  c = new ComputeGBISser(i);
775  map->registerCompute(i,c);
776  c->initialize();
777  break;
778  case computeFmmType: // FMM serial
779  c = new ComputeFmmSerial(i);
780  map->registerCompute(i,c);
781  c->initialize();
782  break;
783  case computeMsmSerialType: // MSM serial
784  c = new ComputeMsmSerial(i);
785  map->registerCompute(i,c);
786  c->initialize();
787  break;
788  case computeLjPmeSerialType: // LJ-PME serial
789  c = new ComputeLjPmeSerial(i);
790  map->registerCompute(i,c);
791  c->initialize();
792  break;
793 #ifdef CHARM_HAS_MSA
794  case computeMsmMsaType: // MSM parallel long-range part using MSA
795  c = new ComputeMsmMsa(i);
796  map->registerCompute(i,c);
797  c->initialize();
798  break;
799 #endif
800  case computeMsmType: // MSM parallel
801  c = new ComputeMsm(i);
802  map->registerCompute(i,c);
803  c->initialize();
804  break;
805  case computeEFieldType:
806  c = new ComputeEField(i,map->computeData[i].pids[0].pid); // unknown delete
807  map->registerCompute(i,c);
808  c->initialize();
809  break;
810  /* BEGIN gf */
812  c = new ComputeGridForce(i,map->computeData[i].pids[0].pid);
813  map->registerCompute(i,c);
814  c->initialize();
815  break;
816  /* END gf */
818  c = new ComputeSphericalBC(i,map->computeData[i].pids[0].pid); // unknown delete
819  map->registerCompute(i,c);
820  c->initialize();
821  break;
823  c = new ComputeCylindricalBC(i,map->computeData[i].pids[0].pid); // unknown delete
824  map->registerCompute(i,c);
825  c->initialize();
826  break;
827  case computeTclBCType:
828  c = new ComputeTclBC(i); // unknown delete
829  map->registerCompute(i,c);
830  c->initialize();
831  break;
833  c = new ComputeRestraints(i,map->computeData[i].pids[0].pid); // unknown delete
834  map->registerCompute(i,c);
835  c->initialize();
836  break;
838  c = new ComputeConsForce(i,map->computeData[i].pids[0].pid);
839  map->registerCompute(i,c);
840  c->initialize();
841  break;
843  c = new ComputeConsTorque(i,map->computeData[i].pids[0].pid);
844  map->registerCompute(i,c);
845  c->initialize();
846  break;
847  default:
848  NAMD_bug("Unknown compute type in ComputeMgr::createCompute().");
849  break;
850  }
851 }
852 
854 #ifdef TRACE_COMPUTE_OBJECTS
856  PatchMap *pmap = PatchMap::Object();
857  char user_des[50];
858  int p1, p2;
859  int adim, bdim, cdim;
860  int t1, t2;
861  int x1, y1, z1, x2, y2, z2;
862  int dx, dy, dz;
863  for (int i=0; i<map->numComputes(); i++)
864  {
865  memset(user_des, 0, 50);
866  switch ( map->type(i) )
867  {
869  sprintf(user_des, "computeNonBondedSelfType_%d_pid_%d", i, map->pid(i,0));
870  break;
871  case computeLCPOType:
872  sprintf(user_des, "computeLCPOType_%d_pid_%d", i, map->pid(i,0));
873  break;
875  adim = pmap->gridsize_a();
876  bdim = pmap->gridsize_b();
877  cdim = pmap->gridsize_c();
878  p1 = map->pid(i, 0);
879  t1 = map->trans(i, 0);
880  x1 = pmap->index_a(p1) + adim * Lattice::offset_a(t1);
881  y1 = pmap->index_b(p1) + bdim * Lattice::offset_b(t1);
882  z1 = pmap->index_c(p1) + cdim * Lattice::offset_c(t1);
883  p2 = map->pid(i, 1);
884  t2 = map->trans(i, 1);
885  x2 = pmap->index_a(p2) + adim * Lattice::offset_a(t2);
886  y2 = pmap->index_b(p2) + bdim * Lattice::offset_b(t2);
887  z2 = pmap->index_c(p2) + cdim * Lattice::offset_c(t2);
888  dx = abs(x1-x2);
889  dy = abs(y1-y2);
890  dz = abs(z1-z2);
891  sprintf(user_des, "computeNonBondedPairType_%d(%d,%d,%d)", i, dx,dy,dz);
892  break;
893 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
894 #ifdef BONDED_CUDA
895  case computeBondedCUDAType:
896  sprintf(user_des, "computeBondedCUDAType_%d", i);
897  break;
898 #endif
899 #endif
900  case computeExclsType:
901  sprintf(user_des, "computeExclsType_%d", i);
902  break;
903  case computeBondsType:
904  sprintf(user_des, "computeBondsType_%d", i);
905  break;
906  case computeAnglesType:
907  sprintf(user_des, "computeAnglesType_%d", i);
908  break;
910  sprintf(user_des, "computeDihedralsType_%d", i);
911  break;
913  sprintf(user_des, "computeImpropersType_%d", i);
914  break;
915  case computeTholeType:
916  sprintf(user_des, "computeTholeType_%d", i);
917  break;
918  case computeAnisoType:
919  sprintf(user_des, "computeAnisoType_%d", i);
920  break;
922  sprintf(user_des, "computeCrosstermsType_%d", i);
923  break;
925  sprintf(user_des, "computeOneFourNbTholeType_%d", i);
926  break;
928  sprintf(user_des, "computeSelfExclsType_%d", i);
929  break;
931  sprintf(user_des, "computeSelfBondsType_%d", i);
932  break;
934  sprintf(user_des, "computeSelfAnglesType_%d", i);
935  break;
937  sprintf(user_des, "computeSelfDihedralsType_%d", i);
938  break;
940  sprintf(user_des, "computeSelfImpropersType_%d", i);
941  break;
943  sprintf(user_des, "computeSelfTholeType_%d", i);
944  break;
946  sprintf(user_des, "computeSelfAnisoType_%d", i);
947  break;
949  sprintf(user_des, "computeSelfCrosstermsType_%d", i);
950  break;
952  sprintf(user_des, "computeSelfOneFourNbTholeType_%d", i);
953  break;
954 #ifdef DPMTA
955  case computeDPMTAType:
956  sprintf(user_des, "computeDPMTAType_%d", i);
957  break;
958 #endif
959 #ifdef DPME
960  case computeDPMEType:
961  sprintf(user_des, "computeDPMEType_%d", i);
962  break;
963 #endif
964  case computePmeType:
965  sprintf(user_des, "computePMEType_%d", i);
966  break;
967 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
968  case computePmeCUDAType:
969  sprintf(user_des, "computePMECUDAType_%d", i);
970  break;
971 #endif
972  case computeEwaldType:
973  sprintf(user_des, "computeEwaldType_%d", i);
974  break;
976  sprintf(user_des, "computeFullDirectType_%d", i);
977  break;
978  case computeGlobalType:
979  sprintf(user_des, "computeGlobalType_%d", i);
980  break;
981  case computeStirType:
982  sprintf(user_des, "computeStirType_%d", i);
983  break;
984  case computeExtType:
985  sprintf(user_des, "computeExtType_%d", i);
986  break;
987  case computeQMType:
988  sprintf(user_des, "computeQMType_%d", i);
989  break;
990  case computeEFieldType:
991  sprintf(user_des, "computeEFieldType_%d", i);
992  break;
993  /* BEGIN gf */
995  sprintf(user_des, "computeGridForceType_%d", i);
996  break;
997  /* END gf */
999  sprintf(user_des, "computeSphericalBCType_%d", i);
1000  break;
1002  sprintf(user_des, "computeCylindricalBCType_%d", i);
1003  break;
1004  case computeTclBCType:
1005  sprintf(user_des, "computeTclBCType_%d", i);
1006  break;
1007  case computeRestraintsType:
1008  sprintf(user_des, "computeRestraintsType_%d", i);
1009  break;
1010  case computeConsForceType:
1011  sprintf(user_des, "computeConsForceType_%d", i);
1012  break;
1013  case computeConsTorqueType:
1014  sprintf(user_des, "computeConsTorqueType_%d", i);
1015  break;
1016  default:
1017  NAMD_bug("Unknown compute type in ComputeMgr::registerUserEventForAllComputeObjs().");
1018  break;
1019  }
1020  int user_des_len = strlen(user_des);
1021  char *user_des_cst = new char[user_des_len+1];
1022  memcpy(user_des_cst, user_des, user_des_len);
1023  user_des_cst[user_des_len] = 0;
1024  //Since the argument in traceRegisterUserEvent is supposed
1025  //to be a const string which will not be copied inside the
1026  //function when a new user event is created, user_des_cst
1027  //has to be allocated in heap.
1028  int reEvenId = traceRegisterUserEvent(user_des_cst, TRACE_COMPOBJ_IDOFFSET+i);
1029  //printf("Register user event (%s) with id (%d)\n", user_des, reEvenId);
1030  }
1031 #else
1032  return;
1033 #endif
1034 }
1035 
1036 void
1038 {
1039 // #ifdef NAMD_CUDA
1040 // int ComputePmeCUDACounter = 0;
1041 // #endif
1042  Node *node = Node::Object();
1044  int myNode = node->myid();
1045 
1046  if ( simParams->globalForcesOn && !myNode )
1047  {
1048  DebugM(4,"Mgr running on Node "<<CkMyPe()<<"\n");
1049  /* create a master server to allow multiple masters */
1050  masterServerObject = new GlobalMasterServer(this,
1051  PatchMap::Object()->numNodesWithPatches());
1052 
1053  #ifdef NODEGROUP_FORCE_REGISTER
1054  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1055  PatchData *patchData = cpdata.ckLocalBranch();
1056  patchData->master_mgr = this;
1057  #endif
1058 
1059  /* create the individual global masters */
1060  // masterServerObject->addClient(new GlobalMasterTest());
1061  if (simParams->tclForcesOn)
1062  masterServerObject->addClient(new GlobalMasterTcl());
1063  if (simParams->IMDon && ! (simParams->IMDignore || simParams->IMDignoreForces) )
1064  masterServerObject->addClient(new GlobalMasterIMD());
1065  // SMD is implemented on GPU resident version of NAMD (NAMD3)
1066  if (simParams->SMDOn && !simParams->CUDASOAintegrateMode)
1067  masterServerObject->addClient(
1068  new GlobalMasterSMD(simParams->SMDk, simParams->SMDk2,
1069  simParams->SMDVel,
1070  simParams->SMDDir, simParams->SMDOutputFreq,
1071  simParams->firstTimestep, simParams->SMDFile,
1072  node->molecule->numAtoms)
1073  );
1074 
1075  if (simParams->symmetryOn &&
1076  (simParams->firstTimestep < simParams->symmetryLastStep ||
1077  simParams->symmetryLastStep == -1))
1078  masterServerObject->addClient(new GlobalMasterSymmetry());
1079  if (simParams->TMDOn)
1080  masterServerObject->addClient(new GlobalMasterTMD());
1081  if (simParams->miscForcesOn)
1082  masterServerObject->addClient(new GlobalMasterMisc());
1083  if ( simParams->freeEnergyOn )
1084  masterServerObject->addClient(new GlobalMasterFreeEnergy());
1085  if ( simParams->colvarsOn )
1086  masterServerObject->addClient(new GlobalMasterColvars());
1087 
1088  }
1089 
1090  if ( !myNode && simParams->IMDon && (simParams->IMDignore || simParams->IMDignoreForces) ) {
1091  // GlobalMasterIMD constructor saves pointer to node->IMDOutput object
1092  new GlobalMasterIMD();
1093  }
1094 
1095 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1096  bool deviceIsMine = ( deviceCUDA->getMasterPe() == CkMyPe() );
1097 #ifdef BONDED_CUDA
1098  // Place bonded forces on Pe different from non-bonded forces
1099  int bondedMasterPe = deviceCUDA->getMasterPe();
1100  // for (int i=0;i < deviceCUDA->getNumPesSharingDevice();i++) {
1101  // int pe = deviceCUDA->getPesSharingDevice(i);
1102  // if (pe != deviceCUDA->getMasterPe()) {
1103  // bondedMasterPe = pe;
1104  // }
1105  // }
1106  bool deviceIsMineBonded = (CkMyPe() == bondedMasterPe);
1107 #endif
1108 #endif
1109 
1110  for (int i=0; i < map->nComputes; i++)
1111  {
1112  if ( ! ( i % 100 ) )
1113  {
1114  }
1115 
1116 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1117  switch ( map->type(i) )
1118  {
1119  // case computePmeCUDAType:
1120  // // Only create single ComputePmeCUDA object per Pe
1121  // if ( map->computeData[i].node != myNode ) continue;
1122  // if (ComputePmeCUDACounter > 0) continue;
1123  // ComputePmeCUDACounter++;
1124  // break;
1126  if ( ! deviceIsMine ) continue;
1127  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1128  break;
1129 
1131  if ( ! deviceIsMine ) continue;
1132  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1133  break;
1134 
1135 #ifdef BONDED_CUDA
1136  case computeSelfBondsType:
1137  case computeBondsType:
1138  if (simParams->bondedCUDA & NAMD_BONDEDGPU_BONDS) {
1139  if ( ! deviceIsMineBonded ) continue;
1140  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1141  } else {
1142  if ( map->computeData[i].node != myNode ) continue;
1143  }
1144  break;
1145 
1146  case computeSelfAnglesType:
1147  case computeAnglesType:
1148  if (simParams->bondedCUDA & NAMD_BONDEDGPU_ANGLES) {
1149  if ( ! deviceIsMineBonded ) continue;
1150  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1151  } else {
1152  if ( map->computeData[i].node != myNode ) continue;
1153  }
1154  break;
1155 
1157  case computeDihedralsType:
1158  if (simParams->bondedCUDA & NAMD_BONDEDGPU_DIHEDRALS) {
1159  if ( ! deviceIsMineBonded ) continue;
1160  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1161  } else {
1162  if ( map->computeData[i].node != myNode ) continue;
1163  }
1164  break;
1165 
1167  case computeImpropersType:
1168  if (simParams->bondedCUDA & NAMD_BONDEDGPU_IMPROPERS) {
1169  if ( ! deviceIsMineBonded ) continue;
1170  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1171  } else {
1172  if ( map->computeData[i].node != myNode ) continue;
1173  }
1174  break;
1175 
1176  case computeSelfExclsType:
1177  case computeExclsType:
1178  if (simParams->bondedCUDA & NAMD_BONDEDGPU_EXCLS) {
1179  if ( ! deviceIsMineBonded ) continue;
1180  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1181  } else {
1182  if ( map->computeData[i].node != myNode ) continue;
1183  }
1184  break;
1185 
1187  case computeCrosstermsType:
1188  if (simParams->bondedCUDA & NAMD_BONDEDGPU_CROSSTERMS) {
1189  if ( ! deviceIsMineBonded ) continue;
1190  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1191  } else {
1192  if ( map->computeData[i].node != myNode ) continue;
1193  }
1194  break;
1195 
1196  case computeSelfTholeType:
1197  case computeTholeType:
1198  if (simParams->bondedCUDA & NAMD_BONDEDGPU_THOLES) {
1199  if ( ! deviceIsMineBonded ) continue;
1200  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1201  } else {
1202  if ( map->computeData[i].node != myNode ) continue;
1203  }
1204  break;
1205 
1206  case computeSelfAnisoType:
1207  case computeAnisoType:
1208  if (simParams->bondedCUDA & NAMD_BONDEDGPU_ANISOS) {
1209  if ( ! deviceIsMineBonded ) continue;
1210  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1211  } else {
1212  if ( map->computeData[i].node != myNode ) continue;
1213  }
1214  break;
1215 
1218  if (simParams->bondedCUDA & NAMD_BONDEDGPU_ONEFOURENBTHOLES) {
1219  if ( ! deviceIsMineBonded ) continue;
1220  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1221  } else {
1222  if ( map->computeData[i].node != myNode ) continue;
1223  }
1224  break;
1225 
1226  case computeBondedCUDAType:
1227  if ( ! deviceIsMineBonded ) continue;
1228  if ( map->computeData[i].node != myNode ) continue;
1229  break;
1230 #endif // BONDED_CUDA
1231 
1233  if ( ! deviceIsMine ) continue;
1234 // #ifdef BONDED_CUDA
1235 // case computeBondedCUDAType:
1236 // #endif
1237  default:
1238  if ( map->computeData[i].node != myNode ) continue;
1239  }
1240 #else // defined(NAMD_CUDA) || defined(NAMD_HIP)
1241  if ( map->computeData[i].node != myNode ) continue;
1242 #endif
1243  DebugM(1,"Compute " << i << '\n');
1244  DebugM(1," node = " << map->computeData[i].node << '\n');
1245  DebugM(1," type = " << map->computeData[i].type << '\n');
1246  DebugM(1," numPids = " << map->computeData[i].numPids << '\n');
1247 // DebugM(1," numPidsAllocated = " << map->computeData[i].numPidsAllocated << '\n');
1248  for (int j=0; j < map->computeData[i].numPids; j++)
1249  {
1250  DebugM(1," pid " << map->computeData[i].pids[j].pid << '\n');
1251  if (!((j+1) % 6))
1252  DebugM(1,'\n');
1253  }
1254  DebugM(1,"\n---------------------------------------");
1255  DebugM(1,"---------------------------------------\n");
1256 
1257  createCompute(i, map);
1258 
1259  }
1260 
1261 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1262  if (deviceIsMine) {
1265  }
1266 #ifdef BONDED_CUDA
1267  if (simParams->bondedCUDA) {
1268  if (deviceIsMineBonded) {
1269  getComputeBondedCUDA()->initialize();
1270  }
1271  }
1272 #endif
1273 #endif
1274 }
1275 
1276 #if 0
1277 void ComputeMgr:: sendComputeGlobalConfig(ComputeGlobalConfigMsg *msg)
1278 {
1279  (CProxy_ComputeMgr(CkpvAccess(BOCclass_group).computeMgr)).recvComputeGlobalConfig(msg);
1280 }
1281 
1282 void ComputeMgr:: recvComputeGlobalConfig(ComputeGlobalConfigMsg *msg)
1283 {
1284  if ( computeGlobalObject )
1285  {
1286  computeGlobalObject->recvConfig(msg);
1287  }
1288  else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
1289  else NAMD_die("ComputeMgr::computeGlobalObject is NULL!");
1290 }
1291 #endif
1292 #ifdef NODEGROUP_FORCE_REGISTER
1293 #endif
1295 {
1296  NAMD_EVENT_START(1, NamdProfileEvent::GM_SEND_COMP_DATA);
1297  // CkPrintf("*** [%d] Calling sendComputeGlobalData\n", CkMyPe());
1298  #ifdef NODEGROUP_FORCE_REGISTER
1300  if (sp->CUDASOAintegrate) {
1301  NAMD_EVENT_START(1, NamdProfileEvent::GM_NODELOCK);
1302  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1303  PatchData *patchData = cpdata.ckLocalBranch();
1304  CmiNodeLock &nl = patchData->nodeLock;
1305  // atomic access to GlobalMasterServer to simulate queueing
1306  if (CkMyPe() != 0)
1307  {
1308  CmiLock(nl);
1309  //CkPrintf("*** [%d] Acquired nodelock!\n", CkMyPe());
1310  patchData->master_mgr->recvComputeGlobalData(msg);
1311  CmiUnlock(nl);
1312  }
1313  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_NODELOCK);
1314  NAMD_EVENT_START(1, NamdProfileEvent::GM_BARRIER);
1315  // Barrier to make sure 0 goes last, since invocation of the clients and
1316  // message coordination has to happen on PE 0 and the last PE to call
1317  // recvComputeGlobalData will trigger all of that on itself
1318  // CmiNodeBarrier();
1319  // CkPrintf("*** sendComputeGlobalData entering barrier 1 on PE %d \n", CkMyPe());
1320  stowSuspendULT();
1321 
1322  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_BARRIER);
1323  if (CkMyPe() == 0)
1324  {
1325  CmiLock(nl);
1326  patchData->master_mgr->recvComputeGlobalData(msg);
1327  CmiUnlock(nl);
1328  }
1329  else
1330  {
1331  // All PEs other than 0 wait here while the clients run and the global
1332  // results messages are prepared and copied into their slots (happens from
1333  // sendComputeGlobalResults on PE0)
1334  // CmiNodeBarrier();
1335  // CkPrintf("before call to stow %d\n",CkMyPe());
1336  // CkPrintf("*** sendComputeGlobalData barrier 3 on PE %d \n", CkMyPe());
1337  stowSuspendULT();
1338  // CkPrintf("*** sendComputeGlobalData out barrier 3 on PE %d \n", CkMyPe());
1339  // CkPrintf("returned from call to stow %d\n",CkMyPe());
1340  }
1341  // Get the message from the slot for this PE and resume execution
1342  ComputeGlobalResultsMsg* resultsMsg = CkpvAccess(ComputeGlobalResultsMsg_instance);
1343  DebugM(3,"["<<CkMyPe()<<"] calling recvComputeGlobalResults\n");
1344  recvComputeGlobalResults(resultsMsg);
1345  } else {
1346  #endif
1347  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1348  DebugM(3,"["<<CkMyPe()<<"] msg to recvComputeGlobalData\n");
1349  cm[0].recvComputeGlobalData(msg);
1350  #ifdef NODEGROUP_FORCE_REGISTER
1351  }
1352  #endif
1353  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_SEND_COMP_DATA);
1354  DebugM(3,"["<<CkMyPe()<<"] done sendComputeGlobalData\n");
1355 }
1356 
1358 {
1359  NAMD_EVENT_START(1, NamdProfileEvent::GM_RECV_COMP_DATA);
1360  if (masterServerObject) // make sure it has been initialized
1361  {
1362  DebugM(3, "["<<CkMyPe()<<"] recvComputeGlobalData calling recvData\n");
1363  masterServerObject->recvData(msg);
1364  }
1365  else NAMD_die("ComputeMgr::masterServerObject is NULL!");
1366  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_RECV_COMP_DATA);
1367 }
1368 
1370 {
1371  NAMD_EVENT_START(1, NamdProfileEvent::GM_SEND_COMP_RESULTS);
1373  DebugM(3,"["<< CkMyPe()<< "] sendComputeGlobalResults seq "<<msg->seq<<"\n");
1374 
1375  #ifdef NODEGROUP_FORCE_REGISTER
1377  if (sp->CUDASOAintegrate) {
1378  // Only PE 0 runs this code
1379  // Copy the message into each PE's slot (Assumes single-node with multicore build)
1380  for (int pe = 0; pe < CkMyNodeSize(); pe++) {
1381  if(CkpvAccessOther(ComputeGlobalResultsMsg_instance, pe)!=nullptr)
1382  {
1383  // make sure msg delete happens on the same PE as made the msg to
1384  // avoid unbounded memory pool growth for these unsent messages
1385  delete CkpvAccessOther(ComputeGlobalResultsMsg_instance, pe);
1386  }
1387  CkpvAccessOther(ComputeGlobalResultsMsg_instance, pe) = (ComputeGlobalResultsMsg*)CkCopyMsg((void**)&msg);
1388  }
1389  delete msg;
1390  // Now that copies are done, trigger the barrier to resume the other PEs
1391  // (most other PEs call this barrier from sendComputeGlobalData)
1392  // CkPrintf("this is where we would call awaken\n",CkMyPe());
1393  //CmiNodeBarrier();
1394  // CkPrintf("*** sendComputeGlobalResults entering barrier 2 on PE %d \n", CkMyPe());
1395  stowSuspendULT();
1396  //thisProxy.recvComputeGlobalResults(msg);
1397  } else {
1398  #endif
1399  DebugM(3,"["<< CkMyPe() << "] ComputeMgr::sendComputeGlobalResults invoking bcast recvComputeGlobalResults\n");
1400  thisProxy.recvComputeGlobalResults(msg);
1401  #ifdef NODEGROUP_FORCE_REGISTER
1402  }
1403  #endif
1404  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_SEND_COMP_RESULTS);
1405 }
1406 
1408 {
1409  NAMD_EVENT_START(1, NamdProfileEvent::GM_ENABLE_COMP_RESULTS);
1411  DebugM(3,"["<<CkMyPe() <<"] enableComputeGlobalResults for "<< computeGlobalResultsMsgs.size() <<" messages seq "<< computeGlobalResultsMsgSeq <<"\n");
1412  for ( int i=0; i<computeGlobalResultsMsgs.size(); ++i ) {
1417  break;
1418  }
1419  }
1420  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_ENABLE_COMP_RESULTS);
1421  DebugM(3,"["<<CkMyPe() <<"] exiting enableComputeGlobalResults for "<< computeGlobalResultsMsgs.size() <<" messages seq "<< computeGlobalResultsMsgSeq <<"\n");
1422 }
1423 
1425 {
1426  NAMD_EVENT_START(1, NamdProfileEvent::GM_RCV_COMP_RESULTS);
1427  DebugM(3,"[" << CkMyPe() << "] recvComputeGlobalResults msg->seq "<< msg->seq << " computeGlobalResultsMsgSeq " << computeGlobalResultsMsgSeq << "\n");
1428  if ( computeGlobalObject )
1429  {
1430  if ( msg->seq == computeGlobalResultsMsgSeq ) {
1431  CmiEnableUrgentSend(1);
1432 
1434  // CkPrintf("*** past recvResults on PE %d \n", CkMyPe());
1435  CmiEnableUrgentSend(0);
1436  } else {
1437  // CkPrintf("*** Adding recvComputeGlobalResults on PE %d \n", CkMyPe());
1439  }
1440  }
1441  else if ( ! (PatchMap::Object())->numHomePatches() )
1442  {
1443  // CkPrintf("*** ignoring recvComputeGlobalResults on PE %d due to no home patch\n", CkMyPe());
1444  delete msg;
1445  }
1446  else NAMD_die("ComputeMgr::computeGlobalObject is NULL!");
1447  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_RCV_COMP_RESULTS);
1448  // CkPrintf("*** exiting recvComputeGlobalResults on PE %d \n", CkMyPe());
1449 }
1450 
1451 /*
1452  * Begin Ewald messages
1453  */
1455 {
1456  if (computeEwaldObject)
1457  {
1458  int node = computeEwaldObject->getMasterNode();
1459  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1460  cm[node].recvComputeEwaldData(msg);
1461  }
1462  else if (!PatchMap::Object()->numHomePatches())
1463  {
1464  // CkPrintf("skipping message on Pe(%d)\n", CkMyPe());
1465  delete msg;
1466  }
1467  else NAMD_die("ComputeMgr::computeEwaldObject is NULL!");
1468 }
1469 
1471 {
1472  if (computeEwaldObject)
1473  computeEwaldObject->recvData(msg);
1474  else NAMD_die("ComputeMgr::computeEwaldObject in recvData is NULL!");
1475 }
1476 
1478 {
1479  (CProxy_ComputeMgr(CkpvAccess(BOCclass_group).computeMgr)).recvComputeEwaldResults(msg);
1480 }
1481 
1483 {
1484  if (computeEwaldObject) {
1485  CmiEnableUrgentSend(1);
1486  computeEwaldObject->recvResults(msg);
1487  CmiEnableUrgentSend(0);
1488  }
1489  else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
1490  else NAMD_die("ComputeMgr::computeEwaldObject in recvResults is NULL!");
1491 }
1492 
1494 {
1495  if ( computeDPMEObject )
1496  {
1497 #ifdef DPME
1498  int node = computeDPMEObject->getMasterNode();
1499  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1500  cm.recvComputeDPMEData(msg,node);
1501 #endif
1502  }
1503  else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
1504  else NAMD_die("ComputeMgr::computeDPMEObject is NULL!");
1505 }
1506 
1508 {
1509  if ( computeDPMEObject )
1510  {
1511 #ifdef DPME
1512  computeDPMEObject->recvData(msg);
1513 #endif
1514  }
1515  else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
1516  else NAMD_die("ComputeMgr::computeDPMEObject is NULL!");
1517 }
1518 
1520 {
1521  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1522  cm[node].recvComputeDPMEResults(msg);
1523 }
1524 
1526 {
1527  if ( computeDPMEObject )
1528  {
1529 #ifdef DPME
1530  computeDPMEObject->recvResults(msg);
1531 #endif
1532  }
1533  else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
1534  else NAMD_die("ComputeMgr::computeDPMEObject is NULL!");
1535 }
1536 
1537 /*
1538  * Molecule now has only one instance per process, so this must only
1539  * be done once per process.
1540 
1541  * TODO: A molecule manager nodegroup would be the natural place
1542  * for entry methods that alter the molecule like this.
1543  */
1545 {
1546  Molecule *m = Node::Object()->molecule;
1547  if(CkMyRank()==0){ // there is only one molecule per process
1548  delete [] m->consForceIndexes;
1549  delete [] m->consForce;
1550  int n = msg->aid.size();
1551  if (n > 0)
1552  {
1553  m->consForceIndexes = new int32[m->numAtoms];
1554  m->consForce = new Vector[n];
1555  int i;
1556  for (i=0; i<m->numAtoms; i++) m->consForceIndexes[i] = -1;
1557  for (i=0; i<msg->aid.size(); i++)
1558  {
1559  m->consForceIndexes[msg->aid[i]] = i;
1560  m->consForce[i] = msg->f[i];
1561  }
1562  }
1563  else
1564  {
1565  m->consForceIndexes = NULL;
1566  m->consForce = NULL;
1567  }
1568  }
1569  delete msg;
1570 #ifdef NODEGROUP_FORCE_REGISTER
1571  if(CkMyPe()==0)
1572  {
1573  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1574  cpdata.setDeviceKernelUpdateCounter();
1575  }
1576 #endif
1577 }
1578 
1579 void ComputeMgr::recvCudaGlobalMasterCreateMsg(std::vector<std::string> args) {
1580 #if (defined(NAMD_CUDA) || defined(NAMD_HIP))
1581  Node *node = Node::Object();
1583  if (simParams->CUDASOAintegrate && simParams->useCudaGlobal) {
1584 #ifdef NODEGROUP_FORCE_REGISTER
1585  if (deviceCUDA->getMasterPe() == CkMyPe()) {
1586  if (deviceCUDA->getIsGlobalDevice()) {
1587  DebugM(3, "Call recvCudaGlobalMasterCreateMsg on master PE " << CkMyPe() << ".\n");
1589  cudaMgr->createCudaGlobalMaster();
1590  std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient> client = nullptr;
1591  const std::string library_name = args[0];
1592  // Find to see if library_name has been loaded
1593  std::shared_ptr<dlloader::DLLoader<CudaGlobalMaster::CudaGlobalMasterClient>> loader = nullptr;
1594  for (auto it = CudaGlobalMasterClientDlloaders.begin();
1595  it != CudaGlobalMasterClientDlloaders.end(); ++it) {
1596  if ((*it)->LibName() == library_name) {
1597  loader = (*it);
1598  break;
1599  }
1600  }
1601  // Create a new loader if not found
1602  if (loader == nullptr) {
1603  loader = std::shared_ptr<dlloader::DLLoader<CudaGlobalMaster::CudaGlobalMasterClient>>(new dlloader::DLLoader<CudaGlobalMaster::CudaGlobalMasterClient>(library_name));
1604  }
1605  try {
1606  iout << iINFO << "Loading library " << library_name
1607  << " on PE: " << CkMyPe() << "\n" << endi;
1608  loader->DLOpenLib();
1609  client = loader->DLGetInstance();
1610  } catch (std::exception& e) {
1611  iout << iERROR << "Cannot load the shared library " << library_name << "\n" << endi;
1612  NAMD_die(e.what());
1613  }
1614  // Try to initialize the client
1615  try {
1616  client->initialize(args,
1618  cudaMgr->getCudaGlobalMaster()->getStream());
1619  client->subscribe(cudaMgr->getCudaGlobalMaster());
1620  iout << iINFO << "CudaGlobalMaster client \"" << client->name()
1621  << "\"" << " initialized\n" << endi;
1622  } catch (std::exception& e) {
1623  iout << iERROR << "Cannot initialize the CudaGlobalMaster client from "
1624  << library_name << "\n" << endi;
1625  NAMD_die(e.what());
1626  }
1627  CudaGlobalMasterClientDlloaders.push_back(loader);
1628  } else {
1629  DebugM(3, "Skip recvCudaGlobalMasterCreateMsg on master PE " <<
1630  CkMyPe() << " that is not scheduled for GPU-resident global master.\n");
1631  }
1632  } else {
1633  DebugM(3, "Skip recvCudaGlobalMasterCreateMsg on non-master PE " << CkMyPe() << ".\n");
1634  }
1635 #endif // NODEGROUP_FORCE_REGISTER
1636  } else {
1637  if (!(simParams->CUDASOAintegrate)) {
1638  NAMD_die("GPU-resident mode is not enabled.\n");
1639  }
1640  if (!(simParams->useCudaGlobal)) {
1641  NAMD_die("GPU-resident external forces are not enabled.\n");
1642  }
1643  }
1644  // CmiNodeBarrier();
1645 #endif // (defined(NAMD_CUDA) || defined(NAMD_HIP))
1646 }
1647 
1648 void ComputeMgr::recvCudaGlobalMasterRemoveMsg(std::vector<std::string> args) {
1649 #if (defined(NAMD_CUDA) || defined(NAMD_HIP))
1650  Node *node = Node::Object();
1652  const std::string client_name_to_remove = args[0];
1653  if (simParams->CUDASOAintegrate && simParams->useCudaGlobal) {
1654 #ifdef NODEGROUP_FORCE_REGISTER
1655  if (deviceCUDA->getMasterPe() == CkMyPe()) {
1656  if (deviceCUDA->getIsGlobalDevice()) {
1658  std::shared_ptr<CudaGlobalMasterServer> gm = cudaMgr->getCudaGlobalMaster();
1659  if (gm) {
1660  std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient> c = nullptr;
1661  const std::vector<std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient>>& clients = gm->getClients();
1662  for (size_t i = 0; i < clients.size(); ++i) {
1663  if (client_name_to_remove == clients[i]->name()) {
1664  c = clients[i];
1665  break;
1666  }
1667  }
1668  if (c) {
1669  gm->removeClient(c);
1670  iout << iINFO << "CudaGlobalMasterClient \""
1671  << client_name_to_remove << "\" removed\n" << endi;
1672  } else {
1673  const std::string error = "CudaGlobalMasterClient \""
1674  + client_name_to_remove + "\" not found";
1675  NAMD_die(error.c_str());
1676  }
1677  }
1678  }
1679  }
1680 #endif // NODEGROUP_FORCE_REGISTER
1681  } else {
1682  if (!(simParams->CUDASOAintegrate)) {
1683  NAMD_die("GPU-resident mode is not enabled.\n");
1684  }
1685  if (!(simParams->useCudaGlobal)) {
1686  NAMD_die("GPU-resident external forces are not enabled.\n");
1687  }
1688  }
1689 #endif // (defined(NAMD_CUDA) || defined(NAMD_HIP))
1690 }
1691 
1692 void ComputeMgr::recvCudaGlobalMasterUpdateMsg(std::vector<std::string> args) {
1693 #if (defined(NAMD_CUDA) || defined(NAMD_HIP))
1694  std::vector<std::string> result_args;
1695  Node *node = Node::Object();
1697  const std::string client_name_to_update = args[0];
1698 #ifdef NAMD_TCL
1699  int error_code = TCL_OK;
1700 #else
1701  int error_code = 0;
1702 #endif
1703  if (simParams->CUDASOAintegrate && simParams->useCudaGlobal) {
1704 #ifdef NODEGROUP_FORCE_REGISTER
1705  if (deviceCUDA->getMasterPe() == CkMyPe()) {
1706  if (deviceCUDA->getIsGlobalDevice()) {
1708  std::shared_ptr<CudaGlobalMasterServer> gm = cudaMgr->getCudaGlobalMaster();
1709  if (gm) {
1710  std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient> c = nullptr;
1711  const std::vector<std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient>>& clients = gm->getClients();
1712  for (size_t i = 0; i < clients.size(); ++i) {
1713  if (client_name_to_update == clients[i]->name()) {
1714  c = clients[i];
1715  break;
1716  }
1717  }
1718  if (c) {
1719  result_args.push_back(client_name_to_update);
1720  error_code = c->updateFromTCLCommand(args);
1721  result_args.push_back(c->getTCLUpdateResult());
1722  iout << iINFO << "CudaGlobalMasterClient \""
1723  << client_name_to_update << "\" updated\n" << endi;
1724  } else {
1725  const std::string error = "CudaGlobalMasterClient \""
1726  + client_name_to_update + "\" not found";
1727  NAMD_die(error.c_str());
1728  }
1729  }
1730  }
1731  }
1732 #endif // NODEGROUP_FORCE_REGISTER
1733  } else {
1734  if (!(simParams->CUDASOAintegrate)) {
1735  NAMD_die("GPU-resident mode is not enabled.\n");
1736  }
1737  if (!(simParams->useCudaGlobal)) {
1738  NAMD_die("GPU-resident external forces are not enabled.\n");
1739  }
1740  }
1741  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1742  cm[0].recvCudaGlobalMasterUpdateResultMsg(error_code, result_args);
1743 #endif // (defined(NAMD_CUDA) || defined(NAMD_HIP))
1744 }
1745 
1746 void ComputeMgr::recvCudaGlobalMasterUpdateResultMsg(int tcl_error_code, std::vector<std::string> args) {
1747  if (CkMyPe() == 0) {
1748  if (!args.empty()) {
1749  CudaGlobalMasterClientUpdateResults[args[0]] = tcl_error_code;
1750  CudaGlobalMasterClientUpdateResultStrings[args[0]] = args[1];
1751  }
1752  } else {
1753  const std::string error =
1754  "recvCudaGlobalMasterUpdateResultMsg is called on " +
1755  std::to_string(CkMyPe()) + " but expected on PE 0!\n";
1756  NAMD_bug(error.c_str());
1757  }
1758 }
1759 
1760 int ComputeMgr::getCudaGlobalMasterUpdateResult(const std::string& client_name) const {
1761  return CudaGlobalMasterClientUpdateResults.at(client_name);
1762 }
1763 
1764 std::string ComputeMgr::getCudaGlobalMasterUpdateResultString(const std::string& client_name) const {
1765  return CudaGlobalMasterClientUpdateResultStrings.at(client_name);
1766 }
1767 
1769  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1770  cm[pe].recvYieldDevice(CkMyPe());
1771 }
1772 
1774  // XXX MIC support was only code using YieldDevice functionality
1775  // computeNonbondedMICObject->recvYieldDevice(pe);
1776 }
1777 
1778 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1779 class CudaComputeNonbondedMsg : public CMessage_CudaComputeNonbondedMsg {
1780 public:
1782  int i;
1783 };
1784 
1786  for (int i=0;i < pes.size();i++) {
1788  msg->c = c;
1789  thisProxy[pes[i]].recvAssignPatchesOnPe(msg);
1790  }
1791 }
1792 
1794  msg->c->assignPatchesOnPe();
1795  delete msg;
1796 }
1797 
1799  for (int i=0;i < pes.size();i++) {
1801  msg->c = c;
1802  thisProxy[pes[i]].recvSkipPatchesOnPe(msg);
1803  }
1804 }
1805 
1807  msg->c->skipPatchesOnPe();
1808  delete msg;
1809 }
1810 
1812  for (int i=0;i < pes.size();i++) {
1815  msg->c = c;
1816  thisProxy[pes[i]].recvFinishPatchesOnPe(msg);
1817  }
1818 }
1819 
1821  msg->c->finishPatchesOnPe();
1822  delete msg;
1823 }
1824 
1828  msg->c = c;
1829  msg->i = i;
1830  thisProxy[pe].recvFinishPatchOnPe(msg);
1831 }
1832 
1834  msg->c->finishPatchOnPe(msg->i);
1835  delete msg;
1836 }
1837 
1838 void ComputeMgr::sendOpenBoxesOnPe(std::vector<int>& pes, CudaComputeNonbonded* c) {
1839  for (int i=0;i < pes.size();i++) {
1841  SET_PRIORITY(msg, c->sequence(), PROXY_DATA_PRIORITY+1); // after bonded
1842  msg->c = c;
1843  thisProxy[pes[i]].recvOpenBoxesOnPe(msg);
1844  }
1845 }
1846 
1848  msg->c->openBoxesOnPe();
1849  delete msg;
1850 }
1851 
1854  msg->c = c;
1855  thisProxy[pe].recvFinishReductions(msg);
1856 }
1857 
1859  msg->c->finishReductions();
1860  delete msg;
1861 }
1862 
1865  msg->c = c;
1866  thisProxy[pe].recvMessageEnqueueWork(msg);
1867 }
1868 
1870  msg->c->messageEnqueueWork();
1871  delete msg;
1872 }
1873 
1876  msg->c = c;
1877  thisProxy[pe].recvLaunchWork(msg);
1878 }
1879 
1881  msg->c->launchWork();
1882  delete msg;
1883 }
1884 
1886  for (int i=0;i < pes.size();i++) {
1888  msg->c = c;
1889  thisProxy[pes[i]].recvUnregisterBoxesOnPe(msg);
1890  }
1891 }
1892 
1894  msg->c->unregisterBoxesOnPe();
1895  delete msg;
1896 }
1897 
1898 #ifdef BONDED_CUDA
1899 
1900 class ComputeBondedCUDAMsg : public CMessage_ComputeBondedCUDAMsg {
1901 public:
1902  ComputeBondedCUDA* c;
1903  int i;
1904 };
1905 
1906 void ComputeMgr::sendAssignPatchesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
1907  for (int i=0;i < pes.size();i++) {
1908  ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
1909  msg->c = c;
1910  thisProxy[pes[i]].recvAssignPatchesOnPe(msg);
1911  }
1912 }
1913 
1914 void ComputeMgr::recvAssignPatchesOnPe(ComputeBondedCUDAMsg *msg) {
1915  msg->c->assignPatchesOnPe();
1916  delete msg;
1917 }
1918 
1919 void ComputeMgr::sendMessageEnqueueWork(int pe, ComputeBondedCUDA* c) {
1920  ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
1921  msg->c = c;
1922  thisProxy[pe].recvMessageEnqueueWork(msg);
1923 }
1924 
1925 void ComputeMgr::recvMessageEnqueueWork(ComputeBondedCUDAMsg *msg) {
1926  msg->c->messageEnqueueWork();
1927  delete msg;
1928 }
1929 
1930 void ComputeMgr::sendOpenBoxesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
1931  for (int i=0;i < pes.size();i++) {
1932  ComputeBondedCUDAMsg *msg = new (PRIORITY_SIZE) ComputeBondedCUDAMsg;
1933  SET_PRIORITY(msg, c->sequence(), PROXY_DATA_PRIORITY);
1934  msg->c = c;
1935  thisProxy[pes[i]].recvOpenBoxesOnPe(msg);
1936  }
1937 }
1938 
1939 void ComputeMgr::recvOpenBoxesOnPe(ComputeBondedCUDAMsg *msg) {
1940  msg->c->openBoxesOnPe();
1941  delete msg;
1942 }
1943 
1944 void ComputeMgr::sendLoadTuplesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
1945  for (int i=0;i < pes.size();i++) {
1946  ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
1947  msg->c = c;
1948  thisProxy[pes[i]].recvLoadTuplesOnPe(msg);
1949  }
1950 }
1951 
1952 void ComputeMgr::recvLoadTuplesOnPe(ComputeBondedCUDAMsg *msg) {
1953  msg->c->loadTuplesOnPe();
1954  delete msg;
1955 }
1956 
1957 void ComputeMgr::sendLaunchWork(int pe, ComputeBondedCUDA* c) {
1958  ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
1959  msg->c = c;
1960  thisProxy[pe].recvLaunchWork(msg);
1961 }
1962 
1963 void ComputeMgr::recvLaunchWork(ComputeBondedCUDAMsg *msg) {
1964  msg->c->launchWork();
1965  delete msg;
1966 }
1967 
1968 void ComputeMgr::sendFinishPatchesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
1969  for (int i=0;i < pes.size();i++) {
1970  ComputeBondedCUDAMsg *msg = new (PRIORITY_SIZE) ComputeBondedCUDAMsg;
1971  SET_PRIORITY(msg, c->sequence(), COMPUTE_PROXY_PRIORITY);
1972  msg->c = c;
1973  thisProxy[pes[i]].recvFinishPatchesOnPe(msg);
1974  }
1975 }
1976 
1977 void ComputeMgr::recvFinishPatchesOnPe(ComputeBondedCUDAMsg *msg) {
1978  msg->c->finishPatchesOnPe();
1979  delete msg;
1980 }
1981 
1982 void ComputeMgr::sendFinishReductions(int pe, ComputeBondedCUDA* c) {
1983  ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
1984  msg->c = c;
1985  thisProxy[pe].recvFinishReductions(msg);
1986 }
1987 
1988 void ComputeMgr::recvFinishReductions(ComputeBondedCUDAMsg *msg) {
1989  msg->c->finishReductions();
1990  delete msg;
1991 }
1992 
1993 void ComputeMgr::sendUnregisterBoxesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
1994  for (int i=0;i < pes.size();i++) {
1995  ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
1996  msg->c = c;
1997  thisProxy[pes[i]].recvUnregisterBoxesOnPe(msg);
1998  }
1999 }
2000 
2001 void ComputeMgr::recvUnregisterBoxesOnPe(ComputeBondedCUDAMsg *msg) {
2002  msg->c->unregisterBoxesOnPe();
2003  delete msg;
2004 }
2005 
2006 #endif // BONDED_CUDA
2007 
2008 #endif // NAMD_CUDA
2009 
2010 #include "ComputeMgr.def.h"
2011 
static Node * Object()
Definition: Node.h:86
CudaComputeNonbonded * createCudaComputeNonbonded(ComputeID c)
Definition: ComputeMgr.C:364
#define COMPUTE_PROXY_PRIORITY
Definition: Priorities.h:71
void recvComputeEwaldData(ComputeEwaldMsg *)
Definition: ComputeMgr.C:1470
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23
void updateLocalComputes()
Definition: ComputeMgr.C:215
void checkMap()
Definition: ComputeMap.C:46
#define NAMD_BONDEDGPU_IMPROPERS
#define NAMD_EVENT_STOP(eon, id)
std::ostream & iINFO(std::ostream &s)
Definition: InfoStream.C:81
void sendYieldDevice(int pe)
Definition: ComputeMgr.C:1768
void recvData(ComputeEwaldMsg *)
Definition: ComputeEwald.C:187
int sequence(void)
Definition: Compute.h:64
int size(void) const
Definition: ResizeArray.h:131
void recvComputeDPMEResults(ComputeDPMEResultsMsg *)
Definition: ComputeMgr.C:1525
#define NAMD_BONDEDGPU_CROSSTERMS
void recvResults(ComputeEwaldMsg *)
Definition: ComputeEwald.C:204
void setNewNumPartitions(ComputeID cid, char numPartitions)
Definition: ComputeMap.h:146
void recvResults(ComputeGlobalResultsMsg *)
std::shared_ptr< CudaGlobalMasterServer > getCudaGlobalMaster()
int proxyRecvSpanning
Definition: ProxyMgr.C:45
int numComputes(void)
Definition: ComputeMap.h:103
void saveComputeMap(const char *fname)
Definition: ComputeMap.C:260
static ProxyMgr * Object()
Definition: ProxyMgr.h:394
#define NAMD_BONDEDGPU_ANISOS
Definition: Node.h:78
#define TRACE_COMPOBJ_IDOFFSET
Definition: Compute.h:77
void updateLocalComputes5()
Definition: ComputeMgr.C:316
CudaComputeNonbonded * getCudaComputeNonbonded()
Definition: ComputeMgr.C:360
int32 ComputeID
Definition: NamdTypes.h:288
int getMasterNode() const
Definition: ComputeEwald.h:86
static PatchMap * Object()
Definition: PatchMap.h:27
void recvFinishPatchOnPe(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1833
#define NAMD_BONDEDGPU_ONEFOURENBTHOLES
void buildProxySpanningTree2()
Definition: ProxyMgr.C:576
void sendMessageEnqueueWork(int pe, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1863
#define NAMD_BONDEDGPU_ANGLES
#define NAMD_BONDEDGPU_THOLES
Definition: Vector.h:72
SimParameters * simParameters
Definition: Node.h:181
void sendFinishReductions(int pe, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1852
void registerCompute(ComputeID cid, Compute *c)
Definition: ComputeMap.h:97
std::string getCudaGlobalMasterUpdateResultString(const std::string &client_name) const
Definition: ComputeMgr.C:1764
void basePatchIDList(int pe, PatchIDList &)
Definition: PatchMap.C:454
void setNumPartitions(ComputeID cid, char numPartitions)
Definition: ComputeMap.h:140
int32_t int32
Definition: common.h:38
#define DebugM(x, y)
Definition: Debug.h:75
void recvComputeConsForceMsg(ComputeConsForceMsg *)
Definition: ComputeMgr.C:1544
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
virtual void initialize()
Definition: Compute.h:56
#define PROXY_DATA_PRIORITY
Definition: Priorities.h:40
void updateLocalComputes3()
Definition: ComputeMgr.C:266
void Migrate(LDObjHandle handle, int dest)
int getCudaGlobalMasterUpdateResult(const std::string &client_name) const
Definition: ComputeMgr.C:1760
int index_a(int pid) const
Definition: PatchMap.h:86
LDObjHandle ldObjHandle
Definition: Compute.h:44
#define iout
Definition: InfoStream.h:51
#define NAMD_BONDEDGPU_DIHEDRALS
void clear()
Definition: ResizeArray.h:91
void recvMessageEnqueueWork(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1869
int add(const Elem &elem)
Definition: ResizeArray.h:101
void createComputes(ComputeMap *map)
Definition: ComputeMgr.C:1037
void setNode(ComputeID cid, NodeID node)
Definition: ComputeMap.h:112
void recvLaunchWork(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1880
Molecule stores the structural information for the system.
Definition: Molecule.h:174
int computeGlobalResultsMsgSeq
Definition: ComputeMgr.h:162
void updateLocalComputes2(CkQdMsg *)
Definition: ComputeMgr.C:259
void recvAssignPatchesOnPe(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1793
void doneUpdateLocalComputes()
Definition: ComputeMgr.C:348
void resize(int i)
Definition: ResizeArray.h:84
int gridsize_c(void) const
Definition: PatchMap.h:66
void recvCudaGlobalMasterRemoveMsg(std::vector< std::string > args)
Definition: ComputeMgr.C:1648
int getGlobalDevice() const
Definition: DeviceCUDA.h:171
char newNumPartitions(ComputeID cid)
Definition: ComputeMap.h:143
void recvComputeGlobalConfig(ComputeGlobalConfigMsg *)
int computeGlobalResultsMsgMasterSeq
Definition: ComputeMgr.h:163
CudaComputeNonbonded * c
Definition: ComputeMgr.C:1781
void sendComputeEwaldData(ComputeEwaldMsg *)
Definition: ComputeMgr.C:1454
#define PRIORITY_SIZE
Definition: Priorities.h:13
void registerUserEventsForAllComputeObjs()
Definition: ComputeMgr.C:853
int gridsize_a(void) const
Definition: PatchMap.h:64
void sendLaunchWork(int pe, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1874
#define NAMD_EVENT_START(eon, id)
static NAMD_HOST_DEVICE int offset_b(int i)
Definition: Lattice.h:264
void recvYieldDevice(int pe)
Definition: ComputeMgr.C:1773
int getMasterPe()
Definition: DeviceCUDA.h:137
void NAMD_bug(const char *err_msg)
Definition: common.C:195
static NAMD_HOST_DEVICE int offset_c(int i)
Definition: Lattice.h:265
void sendComputeGlobalResults(ComputeGlobalResultsMsg *)
Definition: ComputeMgr.C:1369
ComputeType type(ComputeID cid)
Definition: ComputeMap.C:118
static ComputeCUDAMgr * getComputeCUDAMgr()
void removeUnusedProxies(void)
Definition: ProxyMgr.C:398
int index_b(int pid) const
Definition: PatchMap.h:87
void sendUnregisterBoxesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1885
#define NAMD_BONDEDGPU_EXCLS
bool device_shared_with_pe(int pe)
Definition: DeviceCUDA.C:539
int numPartitions(ComputeID cid)
Definition: ComputeMap.C:133
std::shared_ptr< CudaGlobalMasterServer > createCudaGlobalMaster()
bool getIsGlobalDevice() const
Definition: DeviceCUDA.h:172
int numAtoms
Definition: Molecule.h:586
void sendFinishPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1811
void recvFinishReductions(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1858
void createProxy(PatchID pid)
Definition: ProxyMgr.C:492
void setNewNode(ComputeID cid, NodeID node)
Definition: ComputeMap.h:122
int partition(ComputeID cid)
Definition: ComputeMap.C:126
void updateLocalComputes4(CkQdMsg *)
Definition: ComputeMgr.C:298
void NAMD_die(const char *err_msg)
Definition: common.C:147
static LdbCoordinator * Object()
void recvFinishPatchesOnPe(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1820
void enableComputeGlobalResults()
Definition: ComputeMgr.C:1407
void registerComputeSelf(ComputeID cid, PatchID pid)
void recvComputeGlobalResults(ComputeGlobalResultsMsg *)
Definition: ComputeMgr.C:1424
ResizeArray< ComputeGlobalResultsMsg * > computeGlobalResultsMsgs
Definition: ComputeMgr.h:161
void recvComputeDPMEData(ComputeDPMEDataMsg *)
Definition: ComputeMgr.C:1507
void splitComputes()
Definition: ComputeMgr.C:175
ComputeGlobal * computeGlobalObject
Definition: ComputeMgr.h:160
void recvData(ComputeGlobalDataMsg *)
void recvComputeEwaldResults(ComputeEwaldMsg *)
Definition: ComputeMgr.C:1482
int index_c(int pid) const
Definition: PatchMap.h:88
void extendPtrs()
Definition: ComputeMap.C:87
void sendComputeDPMEData(ComputeDPMEDataMsg *)
Definition: ComputeMgr.C:1493
void saveComputeMapChanges(int, CkGroupID)
Definition: WorkDistrib.C:359
void recvComputeGlobalData(ComputeGlobalDataMsg *)
Definition: ComputeMgr.C:1357
void addClient(GlobalMaster *newClient)
void sendComputeDPMEResults(ComputeDPMEResultsMsg *, int)
Definition: ComputeMgr.C:1519
int myid()
Definition: Node.h:191
#define simParams
Definition: Output.C:131
int32 * consForceIndexes
Definition: Molecule.h:648
static NAMD_HOST_DEVICE int offset_a(int i)
Definition: Lattice.h:263
void checkMap()
Definition: PatchMap.C:274
void updateComputes2(CkQdMsg *)
Definition: ComputeMgr.C:156
void registerComputePair(ComputeID cid, PatchID *pid, int *trans)
Compute * compute(ComputeID cid)
Definition: ComputeMap.h:173
ComputeID cloneCompute(ComputeID src, int partition)
Definition: ComputeMap.C:183
static ComputeMap * Object()
Definition: ComputeMap.h:91
void recvOpenBoxesOnPe(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1847
void sendSpanningTrees()
Definition: ProxyMgr.C:1106
void del(int index, int num=1)
Definition: ResizeArray.h:108
void recvUnregisterBoxesOnPe(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1893
void sendComputeGlobalConfig(ComputeGlobalConfigMsg *)
void recvCudaGlobalMasterCreateMsg(std::vector< std::string > args)
Definition: ComputeMgr.C:1579
void assignPatches(ComputeMgr *computeMgrIn)
CudaComputeNonbonded * getCudaComputeNonbonded()
void recvSkipPatchesOnPe(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1806
CudaComputeNonbonded * createCudaComputeNonbonded(ComputeID c)
static int nodecount
Definition: ProxyMgr.h:398
int node(ComputeID cid)
Definition: ComputeMap.h:108
int numPids(ComputeID cid)
Definition: ComputeMap.C:101
void recvCudaGlobalMasterUpdateResultMsg(int tcl_error_code, std::vector< std::string > args)
Definition: ComputeMgr.C:1746
int gridsize_b(void) const
Definition: PatchMap.h:65
Vector * consForce
Definition: Molecule.h:649
void sendComputeGlobalData(ComputeGlobalDataMsg *)
Definition: ComputeMgr.C:1294
int pid(ComputeID cid, int i)
Definition: ComputeMap.C:107
std::ostream & iERROR(std::ostream &s)
Definition: InfoStream.C:83
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
int trans(ComputeID cid, int i)
Definition: ComputeMap.C:112
void sendOpenBoxesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1838
void updateComputes3()
Definition: ComputeMgr.C:165
void sendFinishPatchOnPe(int pe, CudaComputeNonbonded *c, int i, PatchID patchID)
Definition: ComputeMgr.C:1825
void recvCudaGlobalMasterUpdateMsg(std::vector< std::string > args)
Definition: ComputeMgr.C:1692
int32 PatchID
Definition: NamdTypes.h:287
Molecule * molecule
Definition: Node.h:179
void updateComputes(int, CkGroupID)
Definition: ComputeMgr.C:142
void sendComputeEwaldResults(ComputeEwaldMsg *)
Definition: ComputeMgr.C:1477
colvarproxy_namd GlobalMasterColvars
void sendAssignPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1785
void splitComputes2(CkQdMsg *)
Definition: ComputeMgr.C:209
#define NAMD_BONDEDGPU_BONDS
#define PATCH_PRIORITY(PID)
Definition: Priorities.h:25
for(int i=0;i< n1;++i)
void sendSkipPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1798
NodeID newNode(ComputeID cid)
Definition: ComputeMap.h:118
int proxySendSpanning
Definition: ProxyMgr.C:44