NAMD
ComputeMgr.C
Go to the documentation of this file.
1 
7 #include "dlloader.h"
9 #include "InfoStream.h"
10 #include "ProcessorPrivate.h"
11 #include "middle-conv.h"
12 
13 //#define DEBUGM
14 #define MIN_DEBUG_LEVEL 3
15 #include "Debug.h"
16 
17 #include "BOCgroup.h"
18 #include "ComputeMgr.decl.h"
19 #include "ComputeMgr.h"
20 #include "ProxyMgr.decl.h"
21 #include "ProxyMgr.h"
22 
23 #include "Node.h"
24 #include "ComputeMap.h"
25 #include "PatchMap.h"
26 #include "PatchMap.inl"
27 
28 #include "Compute.h"
29 #include "ComputeNonbondedUtil.h"
30 #include "ComputeNonbondedSelf.h"
31 #include "ComputeNonbondedPair.h"
32 #include "ComputeAngles.h"
33 #include "ComputeDihedrals.h"
34 #include "ComputeImpropers.h"
35 #include "ComputeThole.h"
36 #include "ComputeAniso.h"
37 #include "ComputeCrossterms.h"
38 // JLai
39 #include "ComputeGromacsPair.h"
40 #include "ComputeBonds.h"
42 #include "ComputeFullDirect.h"
43 #include "ComputeGlobal.h"
44 #include "ComputeGlobalMsgs.h"
45 #include "ComputeExt.h"
46 #include "ComputeQM.h"
47 #include "ComputeGBISser.h"
48 #include "ComputeLCPO.h"
49 #include "ComputeFmmSerial.h"
50 #include "ComputeMsmSerial.h"
51 #include "ComputeLjPmeSerial.h"
52 #include "ComputeMsmMsa.h"
53 #include "ComputeMsm.h"
54 #include "ComputeDPMTA.h"
55 #include "ComputeDPME.h"
56 #include "ComputeDPMEMsgs.h"
57 #include "ComputePme.h"
58 // #ifdef NAMD_CUDA
59 #include "ComputePmeCUDA.h"
60 #include "ComputeCUDAMgr.h"
61 #include "CudaComputeNonbonded.h"
62 #include "ComputePmeCUDAMgr.h"
63 // #endif
64 #include "ComputeEwald.h"
65 #include "ComputeEField.h"
66 /* BEGIN gf */
67 #include "ComputeGridForce.h"
68 /* END gf */
69 #include "ComputeStir.h"
70 #include "ComputeSphericalBC.h"
71 #include "ComputeCylindricalBC.h"
72 #include "ComputeTclBC.h"
73 #include "ComputeRestraints.h"
74 #include "ComputeConsForce.h"
75 #include "ComputeConsForceMsgs.h"
76 #include "WorkDistrib.h"
77 
78 #include "LdbCoordinator.h"
79 
80 /* include all of the specific masters we need here */
81 #include "FreeEnergyEnums.h"
82 #include "FreeEnergyAssert.h"
83 #include "FreeEnergyGroup.h"
84 #include "FreeEnergyVector.h"
85 #include "FreeEnergyRestrain.h"
86 #include "FreeEnergyRMgr.h"
87 #include "FreeEnergyLambda.h"
88 #include "FreeEnergyLambdMgr.h"
89 
90 #include "GlobalMasterTest.h"
91 #include "GlobalMasterIMD.h"
92 #include "GlobalMasterTcl.h"
93 #include "GlobalMasterSMD.h"
94 #include "GlobalMasterTMD.h"
95 #include "GlobalMasterSymmetry.h"
96 #include "GlobalMasterEasy.h"
97 #include "GlobalMasterMisc.h"
98 #include "GlobalMasterFreeEnergy.h"
99 #include "GlobalMasterColvars.h"
100 
101 #include "PatchData.h"
102 #include "NamdEventsProfiling.h"
103 #include "DeviceCUDA.h"
104 
105 #include "CudaGlobalMasterServer.h"
106 #include "strlib.h"
107 
108 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
109 #ifdef WIN32
110 #define __thread __declspec(thread)
111 #endif
112 extern __thread DeviceCUDA *deviceCUDA;
113 #endif
114 
116 {
117  CkpvAccess(BOCclass_group).computeMgr = thisgroup;
121  computeDPMEObject = 0;
122  computeEwaldObject = 0;
123  computeNonbondedWorkArrays = new ComputeNonbondedWorkArrays;
124  skipSplitting = 0;
125  masterServerObject = NULL;
126 }
127 
129 {
130  delete computeNonbondedWorkArrays;
131  if (masterServerObject != NULL) delete masterServerObject;
132  for (auto& loader: CudaGlobalMasterClientDlloaders) {
133  if (loader) {
134  iout << iINFO << "Close library " << loader->LibName() << "\n" << endi;
135  loader->DLCloseLib();
136  }
137  }
138 }
139 
140 void ComputeMgr::updateComputes(int ep, CkGroupID chareID)
141 {
142  updateComputesReturnEP = ep;
143  updateComputesReturnChareID = chareID;
144  updateComputesCount = CkNumPes();
145 
146  if (CkMyPe())
147  {
148  NAMD_bug("updateComputes signaled on wrong Pe!");
149  }
150 
151  CkStartQD(CkIndex_ComputeMgr::updateComputes2((CkQdMsg*)0),&thishandle);
152 }
153 
154 void ComputeMgr::updateComputes2(CkQdMsg *msg)
155 {
156  delete msg;
157 
158  CProxy_WorkDistrib wd(CkpvAccess(BOCclass_group).workDistrib);
159  WorkDistrib *workDistrib = wd.ckLocalBranch();
160  workDistrib->saveComputeMapChanges(CkIndex_ComputeMgr::updateComputes3(),thisgroup);
161 }
162 
164 {
165  if ( skipSplitting ) {
166  CProxy_ComputeMgr(thisgroup).updateLocalComputes();
167  } else {
168  CProxy_ComputeMgr(thisgroup).splitComputes();
169  skipSplitting = 1;
170  }
171 }
172 
174 {
175  if ( ! CkMyRank() ) {
176  ComputeMap *computeMap = ComputeMap::Object();
177  const int nc = computeMap->numComputes();
178 
179  for (int i=0; i<nc; i++) {
180  int nnp = computeMap->newNumPartitions(i);
181  if ( nnp > 0 ) {
182  if ( computeMap->numPartitions(i) != 1 ) {
183  CkPrintf("Warning: unable to partition compute %d\n", i);
184  computeMap->setNewNumPartitions(i,0);
185  continue;
186  }
187  //CkPrintf("splitting compute %d by %d\n",i,nnp);
188  computeMap->setNumPartitions(i,nnp);
189  if (computeMap->newNode(i) == -1) {
190  computeMap->setNewNode(i,computeMap->node(i));
191  }
192  for ( int j=1; j<nnp; ++j ) {
193  int newcid = computeMap->cloneCompute(i,j);
194  //CkPrintf("compute %d partition %d is %d\n",i,j,newcid);
195  }
196  }
197  }
198  computeMap->extendPtrs();
199  }
200 
201  if (!CkMyPe())
202  {
203  CkStartQD(CkIndex_ComputeMgr::splitComputes2((CkQdMsg*)0), &thishandle);
204  }
205 }
206 
207 void ComputeMgr::splitComputes2(CkQdMsg *msg)
208 {
209  delete msg;
210  CProxy_ComputeMgr(thisgroup).updateLocalComputes();
211 }
212 
214 {
215  ComputeMap *computeMap = ComputeMap::Object();
216  CProxy_ProxyMgr pm(CkpvAccess(BOCclass_group).proxyMgr);
217  ProxyMgr *proxyMgr = pm.ckLocalBranch();
218  LdbCoordinator *ldbCoordinator = LdbCoordinator::Object();
219 
220  computeFlag.resize(0);
221 
222  const int nc = computeMap->numComputes();
223  for (int i=0; i<nc; i++) {
224 
225  if ( computeMap->node(i) == CkMyPe() &&
226  computeMap->newNumPartitions(i) > 1 ) {
227  Compute *c = computeMap->compute(i);
228  ldbCoordinator->Migrate(c->ldObjHandle,CkMyPe());
229  delete c;
230  computeMap->registerCompute(i,NULL);
231  if ( computeMap->newNode(i) == CkMyPe() ) computeFlag.add(i);
232  } else
233  if (computeMap->newNode(i) == CkMyPe() && computeMap->node(i) != CkMyPe())
234  {
235  computeFlag.add(i);
236  for (int n=0; n < computeMap->numPids(i); n++)
237  {
238  proxyMgr->createProxy(computeMap->pid(i,n));
239  }
240  }
241  else if (computeMap->node(i) == CkMyPe() &&
242  (computeMap->newNode(i) != -1 && computeMap->newNode(i) != CkMyPe() ))
243  {
244  // CkPrintf("delete compute %d on pe %d\n",i,CkMyPe());
245  delete computeMap->compute(i);
246  computeMap->registerCompute(i,NULL);
247  }
248  }
249 
250  if (!CkMyPe())
251  {
252  CkStartQD(CkIndex_ComputeMgr::updateLocalComputes2((CkQdMsg*)0), &thishandle);
253  }
254 }
255 
256 void
258 {
259  delete msg;
260  CProxy_ComputeMgr(thisgroup).updateLocalComputes3();
261 }
262 
263 void
265 {
266  ComputeMap *computeMap = ComputeMap::Object();
267  CProxy_ProxyMgr pm(CkpvAccess(BOCclass_group).proxyMgr);
268  ProxyMgr *proxyMgr = pm.ckLocalBranch();
269 
271 
272  const int nc = computeMap->numComputes();
273 
274  if ( ! CkMyRank() ) {
275  for (int i=0; i<nc; i++) {
276  computeMap->setNewNumPartitions(i,0);
277  if (computeMap->newNode(i) != -1) {
278  computeMap->setNode(i,computeMap->newNode(i));
279  computeMap->setNewNode(i,-1);
280  }
281  }
282  }
283 
284  for(int i=0; i<computeFlag.size(); i++) createCompute(computeFlag[i], computeMap);
285  computeFlag.clear();
286 
287  proxyMgr->removeUnusedProxies();
288 
289  if (!CkMyPe())
290  {
291  CkStartQD(CkIndex_ComputeMgr::updateLocalComputes4((CkQdMsg*)0), &thishandle);
292  }
293 }
294 
295 void
297 {
298  delete msg;
299  CProxy_ComputeMgr(thisgroup).updateLocalComputes5();
300 
301  // store the latest compute map
303  if (simParams->storeComputeMap) {
304  ComputeMap *computeMap = ComputeMap::Object();
305  computeMap->saveComputeMap(simParams->computeMapFilename);
306  }
307 }
308 
309 #if 0
310 int firstphase = 1;
311 #endif
312 
313 void
315 {
316  if ( ! CkMyRank() ) {
319  }
320 
321  // we always use the centralized building of spanning tree
322  // distributed building of ST called in Node.C only
325 
326  // this code needs to be turned on if we want to
327  // shift the creation of ST to the load balancer
328 
329 #if 0
331  {
332  if (firstphase)
334  else
335  if (CkMyPe() == 0)
337 
338  firstphase = 0;
339  }
340 #endif
341 
342  if (!CkMyPe())
343  CkStartQD(CkIndex_ComputeMgr::doneUpdateLocalComputes(), &thishandle);
344 }
345 
347 {
348 
349 // if (!--updateComputesCount) {
350  DebugM(4, "doneUpdateLocalComputes on Pe("<<CkMyPe()<<")\n");
351  void *msg = CkAllocMsg(0,0,0);
352  CkSendMsgBranch(updateComputesReturnEP,msg,0,updateComputesReturnChareID);
353 // }
354 }
355 
356 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
357 // Helper functions for creating and getting pointers to CUDA computes
360 }
361 
364 }
365 
366 #ifdef BONDED_CUDA
367 ComputeBondedCUDA* getComputeBondedCUDA() {
368  return ComputeCUDAMgr::getComputeCUDAMgr()->getComputeBondedCUDA();
369 }
370 
371 ComputeBondedCUDA* createComputeBondedCUDA(ComputeID c, ComputeMgr* computeMgr) {
372  return ComputeCUDAMgr::getComputeCUDAMgr()->createComputeBondedCUDA(c, computeMgr);
373 }
374 #endif
375 #endif
376 
377 //
378 void
379 ComputeMgr::createCompute(ComputeID i, ComputeMap *map)
380 {
381  Compute *c;
382  PatchID pid2[2];
383  PatchIDList pids;
384  int trans2[2];
386 
387  PatchID pid8[8];
388  int trans8[8];
389 #ifdef NODEGROUP_FORCE_REGISTER
390  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
391  PatchData *patchData = cpdata.ckLocalBranch();
392  suspendCounter=&(patchData->suspendCounter);
393 #endif
394 
395  switch ( map->type(i) )
396  {
398 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
399  getCudaComputeNonbonded()->registerComputeSelf(i, map->computeData[i].pids[0].pid);
400 #else
401  c = new ComputeNonbondedSelf(i,map->computeData[i].pids[0].pid,
402  computeNonbondedWorkArrays,
403  map->partition(i),map->partition(i)+1,
404  map->numPartitions(i)); // unknown delete
405  map->registerCompute(i,c);
406  c->initialize();
407 #endif
408  break;
409  case computeLCPOType:
410  for (int j = 0; j < 8; j++) {
411  pid8[j] = map->computeData[i].pids[j].pid;
412  trans8[j] = map->computeData[i].pids[j].trans;
413  }
414  c = new ComputeLCPO(i,pid8,trans8,
415  computeNonbondedWorkArrays,
416  map->partition(i),map->partition(i)+1,
417  map->numPartitions(i), 8);
418  map->registerCompute(i,c);
419  c->initialize();
420 
421  break;
423  pid2[0] = map->computeData[i].pids[0].pid;
424  trans2[0] = map->computeData[i].pids[0].trans;
425  pid2[1] = map->computeData[i].pids[1].pid;
426  trans2[1] = map->computeData[i].pids[1].trans;
427 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
428  getCudaComputeNonbonded()->registerComputePair(i, pid2, trans2);
429 #else
430  c = new ComputeNonbondedPair(i,pid2,trans2,
431  computeNonbondedWorkArrays,
432  map->partition(i),map->partition(i)+1,
433  map->numPartitions(i)); // unknown delete
434  map->registerCompute(i,c);
435  c->initialize();
436 #endif
437  break;
438 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
441  map->registerCompute(i,c);
442  // NOTE: initialize() is called at the end of createComputes(),
443  // after all computes have been created
444  //c->initialize();
445  break;
446 #ifdef BONDED_CUDA
447  case computeBondedCUDAType:
448  c = createComputeBondedCUDA(i, this);
449  map->registerCompute(i,c);
450  break;
451 #endif
452 #endif
453  case computeExclsType:
454 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined(NAMD_HIP))
455  if (simParams->bondedCUDA & 16)
456  {
457  PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
458  getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
459  } else
460 #endif
461  {
462  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
463  c = new ComputeExcls(i,pids); // unknown delete
464  map->registerCompute(i,c);
465  c->initialize();
466  }
467  break;
468  case computeBondsType:
469 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
470  if (simParams->bondedCUDA & 1)
471  {
472  PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
473  getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
474  } else
475 #endif
476  {
477  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
478  c = new ComputeBonds(i,pids); // unknown delete
479  map->registerCompute(i,c);
480  c->initialize();
481  }
482  break;
483  case computeAnglesType:
484 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
485  if (simParams->bondedCUDA & 2)
486  {
487  PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
488  getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
489  } else
490 #endif
491  {
492  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
493  c = new ComputeAngles(i,pids); // unknown delete
494  map->registerCompute(i,c);
495  c->initialize();
496  }
497  break;
499 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
500  if (simParams->bondedCUDA & 4)
501  {
502  PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
503  getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
504  } else
505 #endif
506  {
507  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
508  c = new ComputeDihedrals(i,pids); // unknown delete
509  map->registerCompute(i,c);
510  c->initialize();
511  }
512  break;
514 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
515  if (simParams->bondedCUDA & 8)
516  {
517  PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
518  getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
519  } else
520 #endif
521  {
522  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
523  c = new ComputeImpropers(i,pids); // unknown delete
524  map->registerCompute(i,c);
525  c->initialize();
526  }
527  break;
528  case computeTholeType:
529  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
530  c = new ComputeThole(i,pids); // unknown delete
531  map->registerCompute(i,c);
532  c->initialize();
533  break;
534  case computeAnisoType:
535  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
536  c = new ComputeAniso(i,pids); // unknown delete
537  map->registerCompute(i,c);
538  c->initialize();
539  break;
541 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
542  if (simParams->bondedCUDA & 32)
543  {
544  PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
545  getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
546  } else
547 #endif
548  {
549  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
550  c = new ComputeCrossterms(i,pids); // unknown delete
551  map->registerCompute(i,c);
552  c->initialize();
553  }
554  break;
555  // JLai
557  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
558  c = new ComputeGromacsPair(i,pids); // unknown delete
559  map->registerCompute(i,c);
560  c->initialize();
561  break;
563  c = new ComputeSelfGromacsPair(i,map->computeData[i].pids[0].pid); // unknown delete
564  map->registerCompute(i,c);
565  c->initialize();
566  break;
567  // End of JLai
569 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
570  if (simParams->bondedCUDA & 16)
571  {
572  getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
573  } else
574 #endif
575  {
576  c = new ComputeSelfExcls(i,map->computeData[i].pids[0].pid);
577  map->registerCompute(i,c);
578  c->initialize();
579  }
580  break;
582 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
583  if (simParams->bondedCUDA & 1)
584  {
585  getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
586  } else
587 #endif
588  {
589  c = new ComputeSelfBonds(i,map->computeData[i].pids[0].pid);
590  map->registerCompute(i,c);
591  c->initialize();
592  }
593  break;
595 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
596  if (simParams->bondedCUDA & 2)
597  {
598  getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
599  } else
600 #endif
601  {
602  c = new ComputeSelfAngles(i,map->computeData[i].pids[0].pid);
603  map->registerCompute(i,c);
604  c->initialize();
605  }
606  break;
608 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
609  if (simParams->bondedCUDA & 4)
610  {
611  getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
612  } else
613 #endif
614  {
615  c = new ComputeSelfDihedrals(i,map->computeData[i].pids[0].pid);
616  map->registerCompute(i,c);
617  c->initialize();
618  }
619  break;
621 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
622  if (simParams->bondedCUDA & 8)
623  {
624  getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
625  } else
626 #endif
627  {
628  c = new ComputeSelfImpropers(i,map->computeData[i].pids[0].pid);
629  map->registerCompute(i,c);
630  c->initialize();
631  }
632  break;
634  c = new ComputeSelfThole(i,map->computeData[i].pids[0].pid);
635  map->registerCompute(i,c);
636  c->initialize();
637  break;
639  c = new ComputeSelfAniso(i,map->computeData[i].pids[0].pid);
640  map->registerCompute(i,c);
641  c->initialize();
642  break;
644 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
645  if (simParams->bondedCUDA & 32)
646  {
647  getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
648  } else
649 #endif
650  {
651  c = new ComputeSelfCrossterms(i,map->computeData[i].pids[0].pid);
652  map->registerCompute(i,c);
653  c->initialize();
654  }
655  break;
656 #ifdef DPMTA
657  case computeDPMTAType:
658  c = new ComputeDPMTA(i); // unknown delete
659  map->registerCompute(i,c);
660  c->initialize();
661  break;
662 #endif
663 #ifdef DPME
664  case computeDPMEType:
665  c = computeDPMEObject = new ComputeDPME(i,this); // unknown delete
666  map->registerCompute(i,c);
667  c->initialize();
668  break;
669 #endif
670  case computePmeType:
671  c = new ComputePme(i,map->computeData[i].pids[0].pid); // unknown delete
672  map->registerCompute(i,c);
673  c->initialize();
674  break;
675 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
676  case computePmeCUDAType:
677  // PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
678  // c = new ComputePmeCUDA(i, pids);
679  c = new ComputePmeCUDA(i, map->computeData[i].pids[0].pid);
680  map->registerCompute(i,c);
681  c->initialize();
682  break;
683 #endif
684  case computeEwaldType:
685  c = computeEwaldObject = new ComputeEwald(i,this); // unknown delete
686  map->registerCompute(i,c);
687  c->initialize();
688  break;
690  c = new ComputeFullDirect(i); // unknown delete
691  map->registerCompute(i,c);
692  c->initialize();
693  break;
694  case computeGlobalType:
695  c = computeGlobalObject = new ComputeGlobal(i,this); // unknown delete
696  map->registerCompute(i,c);
697  c->initialize();
698  break;
699  case computeStirType:
700  c = new ComputeStir(i,map->computeData[i].pids[0].pid); // unknown delete
701  map->registerCompute(i,c);
702  c->initialize();
703  break;
704  case computeExtType:
705  c = new ComputeExt(i); // unknown delete
706  map->registerCompute(i,c);
707  c->initialize();
708  break;
709  case computeQMType:
710  c = new ComputeQM(i);
711  map->registerCompute(i,c);
712  c->initialize();
713  break;
714  case computeGBISserType: //gbis serial
715  c = new ComputeGBISser(i);
716  map->registerCompute(i,c);
717  c->initialize();
718  break;
719  case computeFmmType: // FMM serial
720  c = new ComputeFmmSerial(i);
721  map->registerCompute(i,c);
722  c->initialize();
723  break;
724  case computeMsmSerialType: // MSM serial
725  c = new ComputeMsmSerial(i);
726  map->registerCompute(i,c);
727  c->initialize();
728  break;
729  case computeLjPmeSerialType: // LJ-PME serial
730  c = new ComputeLjPmeSerial(i);
731  map->registerCompute(i,c);
732  c->initialize();
733  break;
734 #ifdef CHARM_HAS_MSA
735  case computeMsmMsaType: // MSM parallel long-range part using MSA
736  c = new ComputeMsmMsa(i);
737  map->registerCompute(i,c);
738  c->initialize();
739  break;
740 #endif
741  case computeMsmType: // MSM parallel
742  c = new ComputeMsm(i);
743  map->registerCompute(i,c);
744  c->initialize();
745  break;
746  case computeEFieldType:
747  c = new ComputeEField(i,map->computeData[i].pids[0].pid); // unknown delete
748  map->registerCompute(i,c);
749  c->initialize();
750  break;
751  /* BEGIN gf */
753  c = new ComputeGridForce(i,map->computeData[i].pids[0].pid);
754  map->registerCompute(i,c);
755  c->initialize();
756  break;
757  /* END gf */
759  c = new ComputeSphericalBC(i,map->computeData[i].pids[0].pid); // unknown delete
760  map->registerCompute(i,c);
761  c->initialize();
762  break;
764  c = new ComputeCylindricalBC(i,map->computeData[i].pids[0].pid); // unknown delete
765  map->registerCompute(i,c);
766  c->initialize();
767  break;
768  case computeTclBCType:
769  c = new ComputeTclBC(i); // unknown delete
770  map->registerCompute(i,c);
771  c->initialize();
772  break;
774  c = new ComputeRestraints(i,map->computeData[i].pids[0].pid); // unknown delete
775  map->registerCompute(i,c);
776  c->initialize();
777  break;
779  c = new ComputeConsForce(i,map->computeData[i].pids[0].pid);
780  map->registerCompute(i,c);
781  c->initialize();
782  break;
784  c = new ComputeConsTorque(i,map->computeData[i].pids[0].pid);
785  map->registerCompute(i,c);
786  c->initialize();
787  break;
788  default:
789  NAMD_bug("Unknown compute type in ComputeMgr::createCompute().");
790  break;
791  }
792 }
793 
795 {
796 #ifdef TRACE_COMPUTE_OBJECTS
798  PatchMap *pmap = PatchMap::Object();
799  char user_des[50];
800  int p1, p2;
801  int adim, bdim, cdim;
802  int t1, t2;
803  int x1, y1, z1, x2, y2, z2;
804  int dx, dy, dz;
805  for (int i=0; i<map->numComputes(); i++)
806  {
807  memset(user_des, 0, 50);
808  switch ( map->type(i) )
809  {
811  sprintf(user_des, "computeNonBondedSelfType_%d_pid_%d", i, map->pid(i,0));
812  break;
813  case computeLCPOType:
814  sprintf(user_des, "computeLCPOType_%d_pid_%d", i, map->pid(i,0));
815  break;
817  adim = pmap->gridsize_a();
818  bdim = pmap->gridsize_b();
819  cdim = pmap->gridsize_c();
820  p1 = map->pid(i, 0);
821  t1 = map->trans(i, 0);
822  x1 = pmap->index_a(p1) + adim * Lattice::offset_a(t1);
823  y1 = pmap->index_b(p1) + bdim * Lattice::offset_b(t1);
824  z1 = pmap->index_c(p1) + cdim * Lattice::offset_c(t1);
825  p2 = map->pid(i, 1);
826  t2 = map->trans(i, 1);
827  x2 = pmap->index_a(p2) + adim * Lattice::offset_a(t2);
828  y2 = pmap->index_b(p2) + bdim * Lattice::offset_b(t2);
829  z2 = pmap->index_c(p2) + cdim * Lattice::offset_c(t2);
830  dx = abs(x1-x2);
831  dy = abs(y1-y2);
832  dz = abs(z1-z2);
833  sprintf(user_des, "computeNonBondedPairType_%d(%d,%d,%d)", i, dx,dy,dz);
834  break;
835 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
836 #ifdef BONDED_CUDA
837  case computeBondedCUDAType:
838  sprintf(user_des, "computeBondedCUDAType_%d", i);
839  break;
840 #endif
841 #endif
842  case computeExclsType:
843  sprintf(user_des, "computeExclsType_%d", i);
844  break;
845  case computeBondsType:
846  sprintf(user_des, "computeBondsType_%d", i);
847  break;
848  case computeAnglesType:
849  sprintf(user_des, "computeAnglesType_%d", i);
850  break;
852  sprintf(user_des, "computeDihedralsType_%d", i);
853  break;
855  sprintf(user_des, "computeImpropersType_%d", i);
856  break;
857  case computeTholeType:
858  sprintf(user_des, "computeTholeType_%d", i);
859  break;
860  case computeAnisoType:
861  sprintf(user_des, "computeAnisoType_%d", i);
862  break;
864  sprintf(user_des, "computeCrosstermsType_%d", i);
865  break;
867  sprintf(user_des, "computeSelfExclsType_%d", i);
868  break;
870  sprintf(user_des, "computeSelfBondsType_%d", i);
871  break;
873  sprintf(user_des, "computeSelfAnglesType_%d", i);
874  break;
876  sprintf(user_des, "computeSelfDihedralsType_%d", i);
877  break;
879  sprintf(user_des, "computeSelfImpropersType_%d", i);
880  break;
882  sprintf(user_des, "computeSelfTholeType_%d", i);
883  break;
885  sprintf(user_des, "computeSelfAnisoType_%d", i);
886  break;
888  sprintf(user_des, "computeSelfCrosstermsType_%d", i);
889  break;
890 #ifdef DPMTA
891  case computeDPMTAType:
892  sprintf(user_des, "computeDPMTAType_%d", i);
893  break;
894 #endif
895 #ifdef DPME
896  case computeDPMEType:
897  sprintf(user_des, "computeDPMEType_%d", i);
898  break;
899 #endif
900  case computePmeType:
901  sprintf(user_des, "computePMEType_%d", i);
902  break;
903 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
904  case computePmeCUDAType:
905  sprintf(user_des, "computePMECUDAType_%d", i);
906  break;
907 #endif
908  case computeEwaldType:
909  sprintf(user_des, "computeEwaldType_%d", i);
910  break;
912  sprintf(user_des, "computeFullDirectType_%d", i);
913  break;
914  case computeGlobalType:
915  sprintf(user_des, "computeGlobalType_%d", i);
916  break;
917  case computeStirType:
918  sprintf(user_des, "computeStirType_%d", i);
919  break;
920  case computeExtType:
921  sprintf(user_des, "computeExtType_%d", i);
922  break;
923  case computeQMType:
924  sprintf(user_des, "computeQMType_%d", i);
925  break;
926  case computeEFieldType:
927  sprintf(user_des, "computeEFieldType_%d", i);
928  break;
929  /* BEGIN gf */
931  sprintf(user_des, "computeGridForceType_%d", i);
932  break;
933  /* END gf */
935  sprintf(user_des, "computeSphericalBCType_%d", i);
936  break;
938  sprintf(user_des, "computeCylindricalBCType_%d", i);
939  break;
940  case computeTclBCType:
941  sprintf(user_des, "computeTclBCType_%d", i);
942  break;
944  sprintf(user_des, "computeRestraintsType_%d", i);
945  break;
947  sprintf(user_des, "computeConsForceType_%d", i);
948  break;
950  sprintf(user_des, "computeConsTorqueType_%d", i);
951  break;
952  default:
953  NAMD_bug("Unknown compute type in ComputeMgr::registerUserEventForAllComputeObjs().");
954  break;
955  }
956  int user_des_len = strlen(user_des);
957  char *user_des_cst = new char[user_des_len+1];
958  memcpy(user_des_cst, user_des, user_des_len);
959  user_des_cst[user_des_len] = 0;
960  //Since the argument in traceRegisterUserEvent is supposed
961  //to be a const string which will not be copied inside the
962  //function when a new user event is created, user_des_cst
963  //has to be allocated in heap.
964  int reEvenId = traceRegisterUserEvent(user_des_cst, TRACE_COMPOBJ_IDOFFSET+i);
965  //printf("Register user event (%s) with id (%d)\n", user_des, reEvenId);
966  }
967 #else
968  return;
969 #endif
970 }
971 
972 void
974 {
975 // #ifdef NAMD_CUDA
976 // int ComputePmeCUDACounter = 0;
977 // #endif
978  Node *node = Node::Object();
980  int myNode = node->myid();
981 
982  if ( simParams->globalForcesOn && !myNode )
983  {
984  DebugM(4,"Mgr running on Node "<<CkMyPe()<<"\n");
985  /* create a master server to allow multiple masters */
986  masterServerObject = new GlobalMasterServer(this,
987  PatchMap::Object()->numNodesWithPatches());
988 
989  #ifdef NODEGROUP_FORCE_REGISTER
990  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
991  PatchData *patchData = cpdata.ckLocalBranch();
992  patchData->master_mgr = this;
993  #endif
994 
995  /* create the individual global masters */
996  // masterServerObject->addClient(new GlobalMasterTest());
997  if (simParams->tclForcesOn)
998  masterServerObject->addClient(new GlobalMasterTcl());
999  if (simParams->IMDon && ! (simParams->IMDignore || simParams->IMDignoreForces) )
1000  masterServerObject->addClient(new GlobalMasterIMD());
1001  // SMD is implemented on GPU resident version of NAMD (NAMD3)
1002  if (simParams->SMDOn && !simParams->CUDASOAintegrateMode)
1003  masterServerObject->addClient(
1004  new GlobalMasterSMD(simParams->SMDk, simParams->SMDk2,
1005  simParams->SMDVel,
1006  simParams->SMDDir, simParams->SMDOutputFreq,
1007  simParams->firstTimestep, simParams->SMDFile,
1008  node->molecule->numAtoms)
1009  );
1010 
1011  if (simParams->symmetryOn &&
1012  (simParams->firstTimestep < simParams->symmetryLastStep ||
1013  simParams->symmetryLastStep == -1))
1014  masterServerObject->addClient(new GlobalMasterSymmetry());
1015  if (simParams->TMDOn)
1016  masterServerObject->addClient(new GlobalMasterTMD());
1017  if (simParams->miscForcesOn)
1018  masterServerObject->addClient(new GlobalMasterMisc());
1019  if ( simParams->freeEnergyOn )
1020  masterServerObject->addClient(new GlobalMasterFreeEnergy());
1021  if ( simParams->colvarsOn )
1022  masterServerObject->addClient(new GlobalMasterColvars());
1023 
1024  }
1025 
1026  if ( !myNode && simParams->IMDon && (simParams->IMDignore || simParams->IMDignoreForces) ) {
1027  // GlobalMasterIMD constructor saves pointer to node->IMDOutput object
1028  new GlobalMasterIMD();
1029  }
1030 
1031 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1032  bool deviceIsMine = ( deviceCUDA->getMasterPe() == CkMyPe() );
1033 #ifdef BONDED_CUDA
1034  // Place bonded forces on Pe different from non-bonded forces
1035  int bondedMasterPe = deviceCUDA->getMasterPe();
1036  // for (int i=0;i < deviceCUDA->getNumPesSharingDevice();i++) {
1037  // int pe = deviceCUDA->getPesSharingDevice(i);
1038  // if (pe != deviceCUDA->getMasterPe()) {
1039  // bondedMasterPe = pe;
1040  // }
1041  // }
1042  bool deviceIsMineBonded = (CkMyPe() == bondedMasterPe);
1043 #endif
1044 #endif
1045 
1046  for (int i=0; i < map->nComputes; i++)
1047  {
1048  if ( ! ( i % 100 ) )
1049  {
1050  }
1051 
1052 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1053  switch ( map->type(i) )
1054  {
1055  // case computePmeCUDAType:
1056  // // Only create single ComputePmeCUDA object per Pe
1057  // if ( map->computeData[i].node != myNode ) continue;
1058  // if (ComputePmeCUDACounter > 0) continue;
1059  // ComputePmeCUDACounter++;
1060  // break;
1062  if ( ! deviceIsMine ) continue;
1063  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1064  break;
1065 
1067  if ( ! deviceIsMine ) continue;
1068  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1069  break;
1070 
1071 #ifdef BONDED_CUDA
1072  case computeSelfBondsType:
1073  case computeBondsType:
1074  if (simParams->bondedCUDA & 1) {
1075  if ( ! deviceIsMineBonded ) continue;
1076  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1077  } else {
1078  if ( map->computeData[i].node != myNode ) continue;
1079  }
1080  break;
1081 
1082  case computeSelfAnglesType:
1083  case computeAnglesType:
1084  if (simParams->bondedCUDA & 2) {
1085  if ( ! deviceIsMineBonded ) continue;
1086  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1087  } else {
1088  if ( map->computeData[i].node != myNode ) continue;
1089  }
1090  break;
1091 
1093  case computeDihedralsType:
1094  if (simParams->bondedCUDA & 4) {
1095  if ( ! deviceIsMineBonded ) continue;
1096  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1097  } else {
1098  if ( map->computeData[i].node != myNode ) continue;
1099  }
1100  break;
1101 
1103  case computeImpropersType:
1104  if (simParams->bondedCUDA & 8) {
1105  if ( ! deviceIsMineBonded ) continue;
1106  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1107  } else {
1108  if ( map->computeData[i].node != myNode ) continue;
1109  }
1110  break;
1111 
1112  case computeSelfExclsType:
1113  case computeExclsType:
1114  if (simParams->bondedCUDA & 16) {
1115  if ( ! deviceIsMineBonded ) continue;
1116  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1117  } else {
1118  if ( map->computeData[i].node != myNode ) continue;
1119  }
1120  break;
1121 
1123  case computeCrosstermsType:
1124  if (simParams->bondedCUDA & 32) {
1125  if ( ! deviceIsMineBonded ) continue;
1126  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1127  } else {
1128  if ( map->computeData[i].node != myNode ) continue;
1129  }
1130  break;
1131 
1132  case computeBondedCUDAType:
1133  if ( ! deviceIsMineBonded ) continue;
1134  if ( map->computeData[i].node != myNode ) continue;
1135  break;
1136 #endif // BONDED_CUDA
1137 
1139  if ( ! deviceIsMine ) continue;
1140 // #ifdef BONDED_CUDA
1141 // case computeBondedCUDAType:
1142 // #endif
1143  default:
1144  if ( map->computeData[i].node != myNode ) continue;
1145  }
1146 #else // defined(NAMD_CUDA) || defined(NAMD_HIP)
1147  if ( map->computeData[i].node != myNode ) continue;
1148 #endif
1149  DebugM(1,"Compute " << i << '\n');
1150  DebugM(1," node = " << map->computeData[i].node << '\n');
1151  DebugM(1," type = " << map->computeData[i].type << '\n');
1152  DebugM(1," numPids = " << map->computeData[i].numPids << '\n');
1153 // DebugM(1," numPidsAllocated = " << map->computeData[i].numPidsAllocated << '\n');
1154  for (int j=0; j < map->computeData[i].numPids; j++)
1155  {
1156  DebugM(1," pid " << map->computeData[i].pids[j].pid << '\n');
1157  if (!((j+1) % 6))
1158  DebugM(1,'\n');
1159  }
1160  DebugM(1,"\n---------------------------------------");
1161  DebugM(1,"---------------------------------------\n");
1162 
1163  createCompute(i, map);
1164 
1165  }
1166 
1167 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1168  if (deviceIsMine) {
1171  }
1172 #ifdef BONDED_CUDA
1173  if (simParams->bondedCUDA) {
1174  if (deviceIsMineBonded) {
1175  getComputeBondedCUDA()->initialize();
1176  }
1177  }
1178 #endif
1179 #endif
1180 }
1181 
1182 #if 0
1183 void ComputeMgr:: sendComputeGlobalConfig(ComputeGlobalConfigMsg *msg)
1184 {
1185  (CProxy_ComputeMgr(CkpvAccess(BOCclass_group).computeMgr)).recvComputeGlobalConfig(msg);
1186 }
1187 
1188 void ComputeMgr:: recvComputeGlobalConfig(ComputeGlobalConfigMsg *msg)
1189 {
1190  if ( computeGlobalObject )
1191  {
1192  computeGlobalObject->recvConfig(msg);
1193  }
1194  else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
1195  else NAMD_die("ComputeMgr::computeGlobalObject is NULL!");
1196 }
1197 #endif
1198 #ifdef NODEGROUP_FORCE_REGISTER
1199 #endif
1201 {
1202  NAMD_EVENT_START(1, NamdProfileEvent::GM_SEND_COMP_DATA);
1203  // CkPrintf("*** [%d] Calling sendComputeGlobalData\n", CkMyPe());
1204  #ifdef NODEGROUP_FORCE_REGISTER
1206  if (sp->CUDASOAintegrate) {
1207  NAMD_EVENT_START(1, NamdProfileEvent::GM_NODELOCK);
1208  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1209  PatchData *patchData = cpdata.ckLocalBranch();
1210  CmiNodeLock &nl = patchData->nodeLock;
1211  // atomic access to GlobalMasterServer to simulate queueing
1212  if (CkMyPe() != 0)
1213  {
1214  CmiLock(nl);
1215  //CkPrintf("*** [%d] Acquired nodelock!\n", CkMyPe());
1216  patchData->master_mgr->recvComputeGlobalData(msg);
1217  CmiUnlock(nl);
1218  }
1219  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_NODELOCK);
1220  NAMD_EVENT_START(1, NamdProfileEvent::GM_BARRIER);
1221  // Barrier to make sure 0 goes last, since invocation of the clients and
1222  // message coordination has to happen on PE 0 and the last PE to call
1223  // recvComputeGlobalData will trigger all of that on itself
1224  // CmiNodeBarrier();
1225  // CkPrintf("*** sendComputeGlobalData entering barrier 1 on PE %d \n", CkMyPe());
1226  stowSuspendULT();
1227 
1228  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_BARRIER);
1229  if (CkMyPe() == 0)
1230  {
1231  CmiLock(nl);
1232  patchData->master_mgr->recvComputeGlobalData(msg);
1233  CmiUnlock(nl);
1234  }
1235  else
1236  {
1237  // All PEs other than 0 wait here while the clients run and the global
1238  // results messages are prepared and copied into their slots (happens from
1239  // sendComputeGlobalResults on PE0)
1240  // CmiNodeBarrier();
1241  // CkPrintf("before call to stow %d\n",CkMyPe());
1242  // CkPrintf("*** sendComputeGlobalData barrier 3 on PE %d \n", CkMyPe());
1243  stowSuspendULT();
1244  // CkPrintf("*** sendComputeGlobalData out barrier 3 on PE %d \n", CkMyPe());
1245  // CkPrintf("returned from call to stow %d\n",CkMyPe());
1246  }
1247  // Get the message from the slot for this PE and resume execution
1248  ComputeGlobalResultsMsg* resultsMsg = CkpvAccess(ComputeGlobalResultsMsg_instance);
1249  DebugM(3,"["<<CkMyPe()<<"] calling recvComputeGlobalResults\n");
1250  recvComputeGlobalResults(resultsMsg);
1251  } else {
1252  #endif
1253  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1254  DebugM(3,"["<<CkMyPe()<<"] msg to recvComputeGlobalData\n");
1255  cm[0].recvComputeGlobalData(msg);
1256  #ifdef NODEGROUP_FORCE_REGISTER
1257  }
1258  #endif
1259  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_SEND_COMP_DATA);
1260  DebugM(3,"["<<CkMyPe()<<"] done sendComputeGlobalData\n");
1261 }
1262 
1264 {
1265  NAMD_EVENT_START(1, NamdProfileEvent::GM_RECV_COMP_DATA);
1266  if (masterServerObject) // make sure it has been initialized
1267  {
1268  DebugM(3, "["<<CkMyPe()<<"] recvComputeGlobalData calling recvData\n");
1269  masterServerObject->recvData(msg);
1270  }
1271  else NAMD_die("ComputeMgr::masterServerObject is NULL!");
1272  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_RECV_COMP_DATA);
1273 }
1274 
1276 {
1277  NAMD_EVENT_START(1, NamdProfileEvent::GM_SEND_COMP_RESULTS);
1279  DebugM(3,"["<< CkMyPe()<< "] sendComputeGlobalResults seq "<<msg->seq<<"\n");
1280 
1281  #ifdef NODEGROUP_FORCE_REGISTER
1283  if (sp->CUDASOAintegrate) {
1284  // Only PE 0 runs this code
1285  // Copy the message into each PE's slot (Assumes single-node with multicore build)
1286  for (int pe = 0; pe < CkMyNodeSize(); pe++) {
1287  if(CkpvAccessOther(ComputeGlobalResultsMsg_instance, pe)!=nullptr)
1288  {
1289  // make sure msg delete happens on the same PE as made the msg to
1290  // avoid unbounded memory pool growth for these unsent messages
1291  delete CkpvAccessOther(ComputeGlobalResultsMsg_instance, pe);
1292  }
1293  CkpvAccessOther(ComputeGlobalResultsMsg_instance, pe) = (ComputeGlobalResultsMsg*)CkCopyMsg((void**)&msg);
1294  }
1295  delete msg;
1296  // Now that copies are done, trigger the barrier to resume the other PEs
1297  // (most other PEs call this barrier from sendComputeGlobalData)
1298  // CkPrintf("this is where we would call awaken\n",CkMyPe());
1299  //CmiNodeBarrier();
1300  // CkPrintf("*** sendComputeGlobalResults entering barrier 2 on PE %d \n", CkMyPe());
1301  stowSuspendULT();
1302  //thisProxy.recvComputeGlobalResults(msg);
1303  } else {
1304  #endif
1305  DebugM(3,"["<< CkMyPe() << "] ComputeMgr::sendComputeGlobalResults invoking bcast recvComputeGlobalResults\n");
1306  thisProxy.recvComputeGlobalResults(msg);
1307  #ifdef NODEGROUP_FORCE_REGISTER
1308  }
1309  #endif
1310  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_SEND_COMP_RESULTS);
1311 }
1312 
1314 {
1315  NAMD_EVENT_START(1, NamdProfileEvent::GM_ENABLE_COMP_RESULTS);
1317  DebugM(3,"["<<CkMyPe() <<"] enableComputeGlobalResults for "<< computeGlobalResultsMsgs.size() <<" messages seq "<< computeGlobalResultsMsgSeq <<"\n");
1318  for ( int i=0; i<computeGlobalResultsMsgs.size(); ++i ) {
1323  break;
1324  }
1325  }
1326  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_ENABLE_COMP_RESULTS);
1327  DebugM(3,"["<<CkMyPe() <<"] exiting enableComputeGlobalResults for "<< computeGlobalResultsMsgs.size() <<" messages seq "<< computeGlobalResultsMsgSeq <<"\n");
1328 }
1329 
1331 {
1332  NAMD_EVENT_START(1, NamdProfileEvent::GM_RCV_COMP_RESULTS);
1333  DebugM(3,"[" << CkMyPe() << "] recvComputeGlobalResults msg->seq "<< msg->seq << " computeGlobalResultsMsgSeq " << computeGlobalResultsMsgSeq << "\n");
1334  if ( computeGlobalObject )
1335  {
1336  if ( msg->seq == computeGlobalResultsMsgSeq ) {
1337  CmiEnableUrgentSend(1);
1338 
1340  // CkPrintf("*** past recvResults on PE %d \n", CkMyPe());
1341  CmiEnableUrgentSend(0);
1342  } else {
1343  // CkPrintf("*** Adding recvComputeGlobalResults on PE %d \n", CkMyPe());
1345  }
1346  }
1347  else if ( ! (PatchMap::Object())->numHomePatches() )
1348  {
1349  // CkPrintf("*** ignoring recvComputeGlobalResults on PE %d due to no home patch\n", CkMyPe());
1350  delete msg;
1351  }
1352  else NAMD_die("ComputeMgr::computeGlobalObject is NULL!");
1353  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_RCV_COMP_RESULTS);
1354  // CkPrintf("*** exiting recvComputeGlobalResults on PE %d \n", CkMyPe());
1355 }
1356 
1357 /*
1358  * Begin Ewald messages
1359  */
1361 {
1362  if (computeEwaldObject)
1363  {
1364  int node = computeEwaldObject->getMasterNode();
1365  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1366  cm[node].recvComputeEwaldData(msg);
1367  }
1368  else if (!PatchMap::Object()->numHomePatches())
1369  {
1370  // CkPrintf("skipping message on Pe(%d)\n", CkMyPe());
1371  delete msg;
1372  }
1373  else NAMD_die("ComputeMgr::computeEwaldObject is NULL!");
1374 }
1375 
1377 {
1378  if (computeEwaldObject)
1379  computeEwaldObject->recvData(msg);
1380  else NAMD_die("ComputeMgr::computeEwaldObject in recvData is NULL!");
1381 }
1382 
1384 {
1385  (CProxy_ComputeMgr(CkpvAccess(BOCclass_group).computeMgr)).recvComputeEwaldResults(msg);
1386 }
1387 
1389 {
1390  if (computeEwaldObject) {
1391  CmiEnableUrgentSend(1);
1392  computeEwaldObject->recvResults(msg);
1393  CmiEnableUrgentSend(0);
1394  }
1395  else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
1396  else NAMD_die("ComputeMgr::computeEwaldObject in recvResults is NULL!");
1397 }
1398 
1400 {
1401  if ( computeDPMEObject )
1402  {
1403 #ifdef DPME
1404  int node = computeDPMEObject->getMasterNode();
1405  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1406  cm.recvComputeDPMEData(msg,node);
1407 #endif
1408  }
1409  else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
1410  else NAMD_die("ComputeMgr::computeDPMEObject is NULL!");
1411 }
1412 
1414 {
1415  if ( computeDPMEObject )
1416  {
1417 #ifdef DPME
1418  computeDPMEObject->recvData(msg);
1419 #endif
1420  }
1421  else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
1422  else NAMD_die("ComputeMgr::computeDPMEObject is NULL!");
1423 }
1424 
1426 {
1427  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1428  cm[node].recvComputeDPMEResults(msg);
1429 }
1430 
1432 {
1433  if ( computeDPMEObject )
1434  {
1435 #ifdef DPME
1436  computeDPMEObject->recvResults(msg);
1437 #endif
1438  }
1439  else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
1440  else NAMD_die("ComputeMgr::computeDPMEObject is NULL!");
1441 }
1442 
1443 /*
1444  * Molecule now has only one instance per process, so this must only
1445  * be done once per process.
1446 
1447  * TODO: A molecule manager nodegroup would be the natural place
1448  * for entry methods that alter the molecule like this.
1449  */
1451 {
1452  Molecule *m = Node::Object()->molecule;
1453  if(CkMyRank()==0){ // there is only one molecule per process
1454  delete [] m->consForceIndexes;
1455  delete [] m->consForce;
1456  int n = msg->aid.size();
1457  if (n > 0)
1458  {
1459  m->consForceIndexes = new int32[m->numAtoms];
1460  m->consForce = new Vector[n];
1461  int i;
1462  for (i=0; i<m->numAtoms; i++) m->consForceIndexes[i] = -1;
1463  for (i=0; i<msg->aid.size(); i++)
1464  {
1465  m->consForceIndexes[msg->aid[i]] = i;
1466  m->consForce[i] = msg->f[i];
1467  }
1468  }
1469  else
1470  {
1471  m->consForceIndexes = NULL;
1472  m->consForce = NULL;
1473  }
1474  }
1475  delete msg;
1476 #ifdef NODEGROUP_FORCE_REGISTER
1477  if(CkMyPe()==0)
1478  {
1479  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1480  cpdata.setDeviceKernelUpdateCounter();
1481  }
1482 #endif
1483 }
1484 
1485 void ComputeMgr::recvCudaGlobalMasterCreateMsg(std::vector<std::string> args) {
1486 #ifdef NAMD_CUDA
1487  Node *node = Node::Object();
1489  if (simParams->CUDASOAintegrate && simParams->useCudaGlobal) {
1490 #ifdef NODEGROUP_FORCE_REGISTER
1491  if (deviceCUDA->getMasterPe() == CkMyPe()) {
1492  if (deviceCUDA->getIsGlobalDevice()) {
1493  DebugM(3, "Call recvCudaGlobalMasterCreateMsg on master PE " << CkMyPe() << ".\n");
1495  cudaMgr->createCudaGlobalMaster();
1496  std::shared_ptr<CudaGlobalMasterClient> client = nullptr;
1497  const std::string library_name = args[0];
1498  // Find to see if library_name has been loaded
1499  std::shared_ptr<dlloader::DLLoader<CudaGlobalMasterClient>> loader = nullptr;
1500  for (auto it = CudaGlobalMasterClientDlloaders.begin();
1501  it != CudaGlobalMasterClientDlloaders.end(); ++it) {
1502  if ((*it)->LibName() == library_name) {
1503  loader = (*it);
1504  break;
1505  }
1506  }
1507  // Create a new loader if not found
1508  if (loader == nullptr) {
1509  loader = std::shared_ptr<dlloader::DLLoader<CudaGlobalMasterClient>>(new dlloader::DLLoader<CudaGlobalMasterClient>(library_name));
1510  }
1511  try {
1512  iout << iINFO << "Loading library " << library_name
1513  << " on PE: " << CkMyPe() << "\n" << endi;
1514  loader->DLOpenLib();
1515  client = loader->DLGetInstance();
1516  } catch (std::exception& e) {
1517  iout << iERROR << "Cannot load the shared library " << library_name << "\n" << endi;
1518  NAMD_die(e.what());
1519  }
1520  // Try to initialize the client
1521  try {
1522  client->initialize(args,
1524  cudaMgr->getCudaGlobalMaster()->getStream());
1525  client->subscribe(cudaMgr->getCudaGlobalMaster());
1526  iout << iINFO << "CudaGlobalMaster client \"" << client->name()
1527  << "\"" << " initialized\n" << endi;
1528  } catch (std::exception& e) {
1529  iout << iERROR << "Cannot initialize the CudaGlobalMaster client from "
1530  << library_name << "\n" << endi;
1531  NAMD_die(e.what());
1532  }
1533  CudaGlobalMasterClientDlloaders.push_back(loader);
1534  } else {
1535  DebugM(3, "Skip recvCudaGlobalMasterCreateMsg on master PE " <<
1536  CkMyPe() << " that is not scheduled for GPU-resident global master.\n");
1537  }
1538  } else {
1539  DebugM(3, "Skip recvCudaGlobalMasterCreateMsg on non-master PE " << CkMyPe() << ".\n");
1540  }
1541 #endif // NODEGROUP_FORCE_REGISTER
1542  } else {
1543  if (!(simParams->CUDASOAintegrate)) {
1544  NAMD_die("GPU-resident mode is not enabled.\n");
1545  }
1546  if (!(simParams->useCudaGlobal)) {
1547  NAMD_die("GPU-resident external forces are not enabled.\n");
1548  }
1549  }
1550  // CmiNodeBarrier();
1551 #endif
1552 }
1553 
1554 void ComputeMgr::recvCudaGlobalMasterRemoveMsg(std::vector<std::string> args) {
1555 #ifdef NAMD_CUDA
1556  Node *node = Node::Object();
1558  const std::string client_name_to_remove = args[0];
1559  if (simParams->CUDASOAintegrate && simParams->useCudaGlobal) {
1560 #ifdef NODEGROUP_FORCE_REGISTER
1561  if (deviceCUDA->getMasterPe() == CkMyPe()) {
1562  if (deviceCUDA->getIsGlobalDevice()) {
1564  std::shared_ptr<CudaGlobalMasterServer> gm = cudaMgr->getCudaGlobalMaster();
1565  if (gm) {
1566  std::shared_ptr<CudaGlobalMasterClient> c = nullptr;
1567  const std::vector<std::shared_ptr<CudaGlobalMasterClient>>& clients = gm->getClients();
1568  for (size_t i = 0; i < clients.size(); ++i) {
1569  if (client_name_to_remove == clients[i]->name()) {
1570  c = clients[i];
1571  break;
1572  }
1573  }
1574  if (c) {
1575  gm->removeClient(c);
1576  iout << iINFO << "CudaGlobalMasterClient \""
1577  << client_name_to_remove << "\" removed\n" << endi;
1578  } else {
1579  const std::string error = "CudaGlobalMasterClient \""
1580  + client_name_to_remove + "\" not found";
1581  NAMD_die(error.c_str());
1582  }
1583  }
1584  }
1585  }
1586 #endif // NODEGROUP_FORCE_REGISTER
1587  } else {
1588  if (!(simParams->CUDASOAintegrate)) {
1589  NAMD_die("GPU-resident mode is not enabled.\n");
1590  }
1591  if (!(simParams->useCudaGlobal)) {
1592  NAMD_die("GPU-resident external forces are not enabled.\n");
1593  }
1594  }
1595 #endif
1596 }
1597 
1598 void ComputeMgr::recvCudaGlobalMasterUpdateMsg(std::vector<std::string> args) {
1599  // XXX Should this also be for NAMD_HIP ?
1600 #ifdef NAMD_CUDA
1601  std::vector<std::string> result_args;
1602  Node *node = Node::Object();
1604  const std::string client_name_to_update = args[0];
1605  if (simParams->CUDASOAintegrate && simParams->useCudaGlobal) {
1606 #ifdef NODEGROUP_FORCE_REGISTER
1607  if (deviceCUDA->getMasterPe() == CkMyPe()) {
1608  if (deviceCUDA->getIsGlobalDevice()) {
1610  std::shared_ptr<CudaGlobalMasterServer> gm = cudaMgr->getCudaGlobalMaster();
1611  if (gm) {
1612  std::shared_ptr<CudaGlobalMasterClient> c = nullptr;
1613  const std::vector<std::shared_ptr<CudaGlobalMasterClient>>& clients = gm->getClients();
1614  for (size_t i = 0; i < clients.size(); ++i) {
1615  if (client_name_to_update == clients[i]->name()) {
1616  c = clients[i];
1617  break;
1618  }
1619  }
1620  if (c) {
1621  result_args.push_back(client_name_to_update);
1622  result_args.push_back(c->updateFromTCLCommand(args));
1623  iout << iINFO << "CudaGlobalMasterClient \""
1624  << client_name_to_update << "\" updated\n" << endi;
1625  } else {
1626  const std::string error = "CudaGlobalMasterClient \""
1627  + client_name_to_update + "\" not found";
1628  NAMD_die(error.c_str());
1629  }
1630  }
1631  }
1632  }
1633 #endif // NODEGROUP_FORCE_REGISTER
1634  } else {
1635  if (!(simParams->CUDASOAintegrate)) {
1636  NAMD_die("GPU-resident mode is not enabled.\n");
1637  }
1638  if (!(simParams->useCudaGlobal)) {
1639  NAMD_die("GPU-resident external forces are not enabled.\n");
1640  }
1641  }
1642  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1643  cm[0].recvCudaGlobalMasterUpdateResultMsg(result_args);
1644 #endif
1645 }
1646 
1647 void ComputeMgr::recvCudaGlobalMasterUpdateResultMsg(std::vector<std::string> args) {
1648  if (CkMyPe() == 0) {
1649  if (!args.empty()) {
1650  CudaGlobalMasterClientUpdateResults[args[0]] = args[1];
1651  }
1652  } else {
1653  const std::string error =
1654  "recvCudaGlobalMasterUpdateResultMsg is called on " +
1655  std::to_string(CkMyPe()) + " but expected on PE 0!\n";
1656  NAMD_bug(error.c_str());
1657  }
1658 }
1659 
1660 std::string ComputeMgr::getCudaGlobalMasterUpdateResult(const std::string& client_name) const {
1661  return CudaGlobalMasterClientUpdateResults.at(client_name);
1662 }
1663 
1665  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1666  cm[pe].recvYieldDevice(CkMyPe());
1667 }
1668 
1670  // XXX MIC support was only code using YieldDevice functionality
1671  // computeNonbondedMICObject->recvYieldDevice(pe);
1672 }
1673 
1674 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1675 class CudaComputeNonbondedMsg : public CMessage_CudaComputeNonbondedMsg {
1676 public:
1678  int i;
1679 };
1680 
1682  for (int i=0;i < pes.size();i++) {
1684  msg->c = c;
1685  thisProxy[pes[i]].recvAssignPatchesOnPe(msg);
1686  }
1687 }
1688 
1690  msg->c->assignPatchesOnPe();
1691  delete msg;
1692 }
1693 
1695  for (int i=0;i < pes.size();i++) {
1697  msg->c = c;
1698  thisProxy[pes[i]].recvSkipPatchesOnPe(msg);
1699  }
1700 }
1701 
1703  msg->c->skipPatchesOnPe();
1704  delete msg;
1705 }
1706 
1708  for (int i=0;i < pes.size();i++) {
1711  msg->c = c;
1712  thisProxy[pes[i]].recvFinishPatchesOnPe(msg);
1713  }
1714 }
1715 
1717  msg->c->finishPatchesOnPe();
1718  delete msg;
1719 }
1720 
1724  msg->c = c;
1725  msg->i = i;
1726  thisProxy[pe].recvFinishPatchOnPe(msg);
1727 }
1728 
1730  msg->c->finishPatchOnPe(msg->i);
1731  delete msg;
1732 }
1733 
1734 void ComputeMgr::sendOpenBoxesOnPe(std::vector<int>& pes, CudaComputeNonbonded* c) {
1735  for (int i=0;i < pes.size();i++) {
1737  SET_PRIORITY(msg, c->sequence(), PROXY_DATA_PRIORITY+1); // after bonded
1738  msg->c = c;
1739  thisProxy[pes[i]].recvOpenBoxesOnPe(msg);
1740  }
1741 }
1742 
1744  msg->c->openBoxesOnPe();
1745  delete msg;
1746 }
1747 
1750  msg->c = c;
1751  thisProxy[pe].recvFinishReductions(msg);
1752 }
1753 
1755  msg->c->finishReductions();
1756  delete msg;
1757 }
1758 
1761  msg->c = c;
1762  thisProxy[pe].recvMessageEnqueueWork(msg);
1763 }
1764 
1766  msg->c->messageEnqueueWork();
1767  delete msg;
1768 }
1769 
1772  msg->c = c;
1773  thisProxy[pe].recvLaunchWork(msg);
1774 }
1775 
1777  msg->c->launchWork();
1778  delete msg;
1779 }
1780 
1782  for (int i=0;i < pes.size();i++) {
1784  msg->c = c;
1785  thisProxy[pes[i]].recvUnregisterBoxesOnPe(msg);
1786  }
1787 }
1788 
1790  msg->c->unregisterBoxesOnPe();
1791  delete msg;
1792 }
1793 
1794 #ifdef BONDED_CUDA
1795 
1796 class ComputeBondedCUDAMsg : public CMessage_ComputeBondedCUDAMsg {
1797 public:
1798  ComputeBondedCUDA* c;
1799  int i;
1800 };
1801 
1802 void ComputeMgr::sendAssignPatchesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
1803  for (int i=0;i < pes.size();i++) {
1804  ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
1805  msg->c = c;
1806  thisProxy[pes[i]].recvAssignPatchesOnPe(msg);
1807  }
1808 }
1809 
1810 void ComputeMgr::recvAssignPatchesOnPe(ComputeBondedCUDAMsg *msg) {
1811  msg->c->assignPatchesOnPe();
1812  delete msg;
1813 }
1814 
1815 void ComputeMgr::sendMessageEnqueueWork(int pe, ComputeBondedCUDA* c) {
1816  ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
1817  msg->c = c;
1818  thisProxy[pe].recvMessageEnqueueWork(msg);
1819 }
1820 
1821 void ComputeMgr::recvMessageEnqueueWork(ComputeBondedCUDAMsg *msg) {
1822  msg->c->messageEnqueueWork();
1823  delete msg;
1824 }
1825 
1826 void ComputeMgr::sendOpenBoxesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
1827  for (int i=0;i < pes.size();i++) {
1828  ComputeBondedCUDAMsg *msg = new (PRIORITY_SIZE) ComputeBondedCUDAMsg;
1829  SET_PRIORITY(msg, c->sequence(), PROXY_DATA_PRIORITY);
1830  msg->c = c;
1831  thisProxy[pes[i]].recvOpenBoxesOnPe(msg);
1832  }
1833 }
1834 
1835 void ComputeMgr::recvOpenBoxesOnPe(ComputeBondedCUDAMsg *msg) {
1836  msg->c->openBoxesOnPe();
1837  delete msg;
1838 }
1839 
1840 void ComputeMgr::sendLoadTuplesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
1841  for (int i=0;i < pes.size();i++) {
1842  ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
1843  msg->c = c;
1844  thisProxy[pes[i]].recvLoadTuplesOnPe(msg);
1845  }
1846 }
1847 
1848 void ComputeMgr::recvLoadTuplesOnPe(ComputeBondedCUDAMsg *msg) {
1849  msg->c->loadTuplesOnPe();
1850  delete msg;
1851 }
1852 
1853 void ComputeMgr::sendLaunchWork(int pe, ComputeBondedCUDA* c) {
1854  ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
1855  msg->c = c;
1856  thisProxy[pe].recvLaunchWork(msg);
1857 }
1858 
1859 void ComputeMgr::recvLaunchWork(ComputeBondedCUDAMsg *msg) {
1860  msg->c->launchWork();
1861  delete msg;
1862 }
1863 
1864 void ComputeMgr::sendFinishPatchesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
1865  for (int i=0;i < pes.size();i++) {
1866  ComputeBondedCUDAMsg *msg = new (PRIORITY_SIZE) ComputeBondedCUDAMsg;
1867  SET_PRIORITY(msg, c->sequence(), COMPUTE_PROXY_PRIORITY);
1868  msg->c = c;
1869  thisProxy[pes[i]].recvFinishPatchesOnPe(msg);
1870  }
1871 }
1872 
1873 void ComputeMgr::recvFinishPatchesOnPe(ComputeBondedCUDAMsg *msg) {
1874  msg->c->finishPatchesOnPe();
1875  delete msg;
1876 }
1877 
1878 void ComputeMgr::sendFinishReductions(int pe, ComputeBondedCUDA* c) {
1879  ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
1880  msg->c = c;
1881  thisProxy[pe].recvFinishReductions(msg);
1882 }
1883 
1884 void ComputeMgr::recvFinishReductions(ComputeBondedCUDAMsg *msg) {
1885  msg->c->finishReductions();
1886  delete msg;
1887 }
1888 
1889 void ComputeMgr::sendUnregisterBoxesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
1890  for (int i=0;i < pes.size();i++) {
1891  ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
1892  msg->c = c;
1893  thisProxy[pes[i]].recvUnregisterBoxesOnPe(msg);
1894  }
1895 }
1896 
1897 void ComputeMgr::recvUnregisterBoxesOnPe(ComputeBondedCUDAMsg *msg) {
1898  msg->c->unregisterBoxesOnPe();
1899  delete msg;
1900 }
1901 
1902 #endif // BONDED_CUDA
1903 
1904 #endif // NAMD_CUDA
1905 
1906 #include "ComputeMgr.def.h"
1907 
static Node * Object()
Definition: Node.h:86
CudaComputeNonbonded * createCudaComputeNonbonded(ComputeID c)
Definition: ComputeMgr.C:362
#define COMPUTE_PROXY_PRIORITY
Definition: Priorities.h:71
void recvComputeEwaldData(ComputeEwaldMsg *)
Definition: ComputeMgr.C:1376
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23
void updateLocalComputes()
Definition: ComputeMgr.C:213
void checkMap()
Definition: ComputeMap.C:46
#define NAMD_EVENT_STOP(eon, id)
std::ostream & iINFO(std::ostream &s)
Definition: InfoStream.C:81
void sendYieldDevice(int pe)
Definition: ComputeMgr.C:1664
void recvData(ComputeEwaldMsg *)
Definition: ComputeEwald.C:187
int sequence(void)
Definition: Compute.h:64
int size(void) const
Definition: ResizeArray.h:131
void recvComputeDPMEResults(ComputeDPMEResultsMsg *)
Definition: ComputeMgr.C:1431
void recvResults(ComputeEwaldMsg *)
Definition: ComputeEwald.C:204
void setNewNumPartitions(ComputeID cid, char numPartitions)
Definition: ComputeMap.h:144
void recvResults(ComputeGlobalResultsMsg *)
void recvCudaGlobalMasterUpdateResultMsg(std::vector< std::string > args)
Definition: ComputeMgr.C:1647
std::shared_ptr< CudaGlobalMasterServer > getCudaGlobalMaster()
int proxyRecvSpanning
Definition: ProxyMgr.C:45
int numComputes(void)
Definition: ComputeMap.h:101
void saveComputeMap(const char *fname)
Definition: ComputeMap.C:260
static ProxyMgr * Object()
Definition: ProxyMgr.h:394
Definition: Node.h:78
#define TRACE_COMPOBJ_IDOFFSET
Definition: Compute.h:77
void updateLocalComputes5()
Definition: ComputeMgr.C:314
CudaComputeNonbonded * getCudaComputeNonbonded()
Definition: ComputeMgr.C:358
int32 ComputeID
Definition: NamdTypes.h:278
int getMasterNode() const
Definition: ComputeEwald.h:86
static PatchMap * Object()
Definition: PatchMap.h:27
void recvFinishPatchOnPe(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1729
void buildProxySpanningTree2()
Definition: ProxyMgr.C:576
void sendMessageEnqueueWork(int pe, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1759
Definition: Vector.h:72
SimParameters * simParameters
Definition: Node.h:181
void sendFinishReductions(int pe, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1748
void registerCompute(ComputeID cid, Compute *c)
Definition: ComputeMap.h:95
void basePatchIDList(int pe, PatchIDList &)
Definition: PatchMap.C:454
void setNumPartitions(ComputeID cid, char numPartitions)
Definition: ComputeMap.h:138
int32_t int32
Definition: common.h:38
#define DebugM(x, y)
Definition: Debug.h:75
void recvComputeConsForceMsg(ComputeConsForceMsg *)
Definition: ComputeMgr.C:1450
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
virtual void initialize()
Definition: Compute.h:56
#define PROXY_DATA_PRIORITY
Definition: Priorities.h:40
void updateLocalComputes3()
Definition: ComputeMgr.C:264
void Migrate(LDObjHandle handle, int dest)
int index_a(int pid) const
Definition: PatchMap.h:86
LDObjHandle ldObjHandle
Definition: Compute.h:44
#define iout
Definition: InfoStream.h:51
void clear()
Definition: ResizeArray.h:91
void recvMessageEnqueueWork(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1765
std::string getCudaGlobalMasterUpdateResult(const std::string &client_name) const
Definition: ComputeMgr.C:1660
int add(const Elem &elem)
Definition: ResizeArray.h:101
void createComputes(ComputeMap *map)
Definition: ComputeMgr.C:973
void setNode(ComputeID cid, NodeID node)
Definition: ComputeMap.h:110
void recvLaunchWork(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1776
Molecule stores the structural information for the system.
Definition: Molecule.h:175
int computeGlobalResultsMsgSeq
Definition: ComputeMgr.h:156
void updateLocalComputes2(CkQdMsg *)
Definition: ComputeMgr.C:257
void recvAssignPatchesOnPe(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1689
void doneUpdateLocalComputes()
Definition: ComputeMgr.C:346
void resize(int i)
Definition: ResizeArray.h:84
int gridsize_c(void) const
Definition: PatchMap.h:66
void recvCudaGlobalMasterRemoveMsg(std::vector< std::string > args)
Definition: ComputeMgr.C:1554
int getGlobalDevice() const
Definition: DeviceCUDA.h:171
char newNumPartitions(ComputeID cid)
Definition: ComputeMap.h:141
void recvComputeGlobalConfig(ComputeGlobalConfigMsg *)
int computeGlobalResultsMsgMasterSeq
Definition: ComputeMgr.h:157
CudaComputeNonbonded * c
Definition: ComputeMgr.C:1677
void sendComputeEwaldData(ComputeEwaldMsg *)
Definition: ComputeMgr.C:1360
#define PRIORITY_SIZE
Definition: Priorities.h:13
void registerUserEventsForAllComputeObjs()
Definition: ComputeMgr.C:794
int gridsize_a(void) const
Definition: PatchMap.h:64
void sendLaunchWork(int pe, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1770
#define NAMD_EVENT_START(eon, id)
static NAMD_HOST_DEVICE int offset_b(int i)
Definition: Lattice.h:264
void recvYieldDevice(int pe)
Definition: ComputeMgr.C:1669
int getMasterPe()
Definition: DeviceCUDA.h:137
void NAMD_bug(const char *err_msg)
Definition: common.C:195
static NAMD_HOST_DEVICE int offset_c(int i)
Definition: Lattice.h:265
void sendComputeGlobalResults(ComputeGlobalResultsMsg *)
Definition: ComputeMgr.C:1275
ComputeType type(ComputeID cid)
Definition: ComputeMap.C:118
static ComputeCUDAMgr * getComputeCUDAMgr()
void removeUnusedProxies(void)
Definition: ProxyMgr.C:398
int index_b(int pid) const
Definition: PatchMap.h:87
void sendUnregisterBoxesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1781
bool device_shared_with_pe(int pe)
Definition: DeviceCUDA.C:537
int numPartitions(ComputeID cid)
Definition: ComputeMap.C:133
std::shared_ptr< CudaGlobalMasterServer > createCudaGlobalMaster()
bool getIsGlobalDevice() const
Definition: DeviceCUDA.h:172
int numAtoms
Definition: Molecule.h:585
void sendFinishPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1707
void recvFinishReductions(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1754
void createProxy(PatchID pid)
Definition: ProxyMgr.C:492
void setNewNode(ComputeID cid, NodeID node)
Definition: ComputeMap.h:120
int partition(ComputeID cid)
Definition: ComputeMap.C:126
void updateLocalComputes4(CkQdMsg *)
Definition: ComputeMgr.C:296
void NAMD_die(const char *err_msg)
Definition: common.C:147
static LdbCoordinator * Object()
void recvFinishPatchesOnPe(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1716
void enableComputeGlobalResults()
Definition: ComputeMgr.C:1313
void registerComputeSelf(ComputeID cid, PatchID pid)
void recvComputeGlobalResults(ComputeGlobalResultsMsg *)
Definition: ComputeMgr.C:1330
ResizeArray< ComputeGlobalResultsMsg * > computeGlobalResultsMsgs
Definition: ComputeMgr.h:155
void recvComputeDPMEData(ComputeDPMEDataMsg *)
Definition: ComputeMgr.C:1413
void splitComputes()
Definition: ComputeMgr.C:173
ComputeGlobal * computeGlobalObject
Definition: ComputeMgr.h:154
void recvData(ComputeGlobalDataMsg *)
void recvComputeEwaldResults(ComputeEwaldMsg *)
Definition: ComputeMgr.C:1388
int index_c(int pid) const
Definition: PatchMap.h:88
void extendPtrs()
Definition: ComputeMap.C:87
void sendComputeDPMEData(ComputeDPMEDataMsg *)
Definition: ComputeMgr.C:1399
void saveComputeMapChanges(int, CkGroupID)
Definition: WorkDistrib.C:357
void recvComputeGlobalData(ComputeGlobalDataMsg *)
Definition: ComputeMgr.C:1263
void addClient(GlobalMaster *newClient)
void sendComputeDPMEResults(ComputeDPMEResultsMsg *, int)
Definition: ComputeMgr.C:1425
int myid()
Definition: Node.h:191
#define simParams
Definition: Output.C:129
int32 * consForceIndexes
Definition: Molecule.h:646
static NAMD_HOST_DEVICE int offset_a(int i)
Definition: Lattice.h:263
void checkMap()
Definition: PatchMap.C:274
void updateComputes2(CkQdMsg *)
Definition: ComputeMgr.C:154
void registerComputePair(ComputeID cid, PatchID *pid, int *trans)
Compute * compute(ComputeID cid)
Definition: ComputeMap.h:171
ComputeID cloneCompute(ComputeID src, int partition)
Definition: ComputeMap.C:183
static ComputeMap * Object()
Definition: ComputeMap.h:89
void recvOpenBoxesOnPe(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1743
void sendSpanningTrees()
Definition: ProxyMgr.C:1106
void del(int index, int num=1)
Definition: ResizeArray.h:108
void recvUnregisterBoxesOnPe(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1789
void sendComputeGlobalConfig(ComputeGlobalConfigMsg *)
void recvCudaGlobalMasterCreateMsg(std::vector< std::string > args)
Definition: ComputeMgr.C:1485
void assignPatches(ComputeMgr *computeMgrIn)
CudaComputeNonbonded * getCudaComputeNonbonded()
void recvSkipPatchesOnPe(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1702
CudaComputeNonbonded * createCudaComputeNonbonded(ComputeID c)
static int nodecount
Definition: ProxyMgr.h:398
int node(ComputeID cid)
Definition: ComputeMap.h:106
int numPids(ComputeID cid)
Definition: ComputeMap.C:101
int gridsize_b(void) const
Definition: PatchMap.h:65
Vector * consForce
Definition: Molecule.h:647
void sendComputeGlobalData(ComputeGlobalDataMsg *)
Definition: ComputeMgr.C:1200
int pid(ComputeID cid, int i)
Definition: ComputeMap.C:107
std::ostream & iERROR(std::ostream &s)
Definition: InfoStream.C:83
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
int trans(ComputeID cid, int i)
Definition: ComputeMap.C:112
void sendOpenBoxesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1734
void updateComputes3()
Definition: ComputeMgr.C:163
void sendFinishPatchOnPe(int pe, CudaComputeNonbonded *c, int i, PatchID patchID)
Definition: ComputeMgr.C:1721
void recvCudaGlobalMasterUpdateMsg(std::vector< std::string > args)
Definition: ComputeMgr.C:1598
int32 PatchID
Definition: NamdTypes.h:277
Molecule * molecule
Definition: Node.h:179
void updateComputes(int, CkGroupID)
Definition: ComputeMgr.C:140
void sendComputeEwaldResults(ComputeEwaldMsg *)
Definition: ComputeMgr.C:1383
colvarproxy_namd GlobalMasterColvars
void sendAssignPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1681
void splitComputes2(CkQdMsg *)
Definition: ComputeMgr.C:207
#define PATCH_PRIORITY(PID)
Definition: Priorities.h:25
for(int i=0;i< n1;++i)
void sendSkipPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1694
NodeID newNode(ComputeID cid)
Definition: ComputeMap.h:116
int proxySendSpanning
Definition: ProxyMgr.C:44