NAMD
ComputeMgr.C
Go to the documentation of this file.
1 
7 #include "InfoStream.h"
8 #include "ProcessorPrivate.h"
9 
10 //#define DEBUGM
11 #define MIN_DEBUG_LEVEL 1
12 #include "Debug.h"
13 
14 #include "BOCgroup.h"
15 #include "ComputeMgr.decl.h"
16 #include "ComputeMgr.h"
17 #include "ProxyMgr.decl.h"
18 #include "ProxyMgr.h"
19 
20 #include "Node.h"
21 #include "ComputeMap.h"
22 #include "PatchMap.h"
23 #include "PatchMap.inl"
24 
25 #include "Compute.h"
26 #include "ComputeNonbondedUtil.h"
27 #include "ComputeNonbondedSelf.h"
28 #include "ComputeNonbondedPair.h"
29 #include "ComputeNonbondedCUDA.h"
30 #include "ComputeNonbondedMIC.h"
31 #include "ComputeAngles.h"
32 #include "ComputeDihedrals.h"
33 #include "ComputeImpropers.h"
34 #include "ComputeThole.h"
35 #include "ComputeAniso.h"
36 #include "ComputeCrossterms.h"
37 // JLai
38 #include "ComputeGromacsPair.h"
39 #include "ComputeBonds.h"
41 #include "ComputeFullDirect.h"
42 #include "ComputeGlobal.h"
43 #include "ComputeGlobalMsgs.h"
44 #include "ComputeExt.h"
45 #include "ComputeQM.h"
46 #include "ComputeGBISser.h"
47 #include "ComputeLCPO.h"
48 #include "ComputeFmmSerial.h"
49 #include "ComputeMsmSerial.h"
50 #include "ComputeMsmMsa.h"
51 #include "ComputeMsm.h"
52 #include "ComputeDPMTA.h"
53 #include "ComputeDPME.h"
54 #include "ComputeDPMEMsgs.h"
55 #include "ComputePme.h"
56 // #ifdef NAMD_CUDA
57 #include "ComputePmeCUDA.h"
58 #include "ComputeCUDAMgr.h"
59 #include "CudaComputeNonbonded.h"
60 #include "ComputePmeCUDAMgr.h"
61 // #endif
62 #include "ComputeEwald.h"
63 #include "ComputeEField.h"
64 /* BEGIN gf */
65 #include "ComputeGridForce.h"
66 /* END gf */
67 #include "ComputeStir.h"
68 #include "ComputeSphericalBC.h"
69 #include "ComputeCylindricalBC.h"
70 #include "ComputeTclBC.h"
71 #include "ComputeRestraints.h"
72 #include "ComputeConsForce.h"
73 #include "ComputeConsForceMsgs.h"
74 #include "WorkDistrib.h"
75 
76 #include "LdbCoordinator.h"
77 
78 /* include all of the specific masters we need here */
79 #include "FreeEnergyEnums.h"
80 #include "FreeEnergyAssert.h"
81 #include "FreeEnergyGroup.h"
82 #include "FreeEnergyVector.h"
83 #include "FreeEnergyRestrain.h"
84 #include "FreeEnergyRMgr.h"
85 #include "FreeEnergyLambda.h"
86 #include "FreeEnergyLambdMgr.h"
87 
88 #include "GlobalMasterTest.h"
89 #include "GlobalMasterIMD.h"
90 #include "GlobalMasterTcl.h"
91 #include "GlobalMasterSMD.h"
92 #include "GlobalMasterTMD.h"
93 #include "GlobalMasterSymmetry.h"
94 #include "GlobalMasterEasy.h"
95 #include "GlobalMasterMisc.h"
96 #include "GlobalMasterFreeEnergy.h"
97 #include "GlobalMasterColvars.h"
98 
100 
101 #include "DeviceCUDA.h"
102 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
103 #ifdef WIN32
104 #define __thread __declspec(thread)
105 #endif
106 extern __thread DeviceCUDA *deviceCUDA;
107 #endif
108 
110 {
111  CkpvAccess(BOCclass_group).computeMgr = thisgroup;
115  computeDPMEObject = 0;
116  computeEwaldObject = 0;
117  computeNonbondedCUDAObject = 0;
118  computeNonbondedMICObject = 0;
119  computeNonbondedWorkArrays = new ComputeNonbondedWorkArrays;
120  skipSplitting = 0;
121  masterServerObject = NULL;
122 
123  #if defined(NAMD_MIC)
124  // Create the micPEData flag array (1 bit per PE) and initially set each PE as "not driving
125  // a MIC card" (unset). PEs that are driving MIC card will identify themselves during startup.
126  int numPEs = CkNumPes();
127  int numInts = ((numPEs + (sizeof(int)*8-1)) & (~(sizeof(int)*8-1))) / (sizeof(int)*8); // Round up to sizeof(int) then divide by the size of an int
128  micPEData = new int[numInts];
129  if (micPEData == NULL) { NAMD_die("Unable to allocate memory for micPEData"); }
130  memset(micPEData, 0, sizeof(int) * numInts);
131  #else
132  micPEData = NULL;
133  #endif
134 }
135 
137 {
138  delete computeNonbondedWorkArrays;
139  if (masterServerObject != NULL) delete masterServerObject;
140 }
141 
142 void ComputeMgr::updateComputes(int ep, CkGroupID chareID)
143 {
144  updateComputesReturnEP = ep;
145  updateComputesReturnChareID = chareID;
146  updateComputesCount = CkNumPes();
147 
148  if (CkMyPe())
149  {
150  NAMD_bug("updateComputes signaled on wrong Pe!");
151  }
152 
153  CkStartQD(CkIndex_ComputeMgr::updateComputes2((CkQdMsg*)0),&thishandle);
154 }
155 
156 void ComputeMgr::updateComputes2(CkQdMsg *msg)
157 {
158  delete msg;
159 
160  CProxy_WorkDistrib wd(CkpvAccess(BOCclass_group).workDistrib);
161  WorkDistrib *workDistrib = wd.ckLocalBranch();
162  workDistrib->saveComputeMapChanges(CkIndex_ComputeMgr::updateComputes3(),thisgroup);
163 }
164 
166 {
167  if ( skipSplitting ) {
168  CProxy_ComputeMgr(thisgroup).updateLocalComputes();
169  } else {
170  CProxy_ComputeMgr(thisgroup).splitComputes();
171  skipSplitting = 1;
172  }
173 }
174 
176 {
177  if ( ! CkMyRank() ) {
178  ComputeMap *computeMap = ComputeMap::Object();
179  const int nc = computeMap->numComputes();
180 
181  for (int i=0; i<nc; i++) {
182  int nnp = computeMap->newNumPartitions(i);
183  if ( nnp > 0 ) {
184  if ( computeMap->numPartitions(i) != 1 ) {
185  CkPrintf("Warning: unable to partition compute %d\n", i);
186  computeMap->setNewNumPartitions(i,0);
187  continue;
188  }
189  //CkPrintf("splitting compute %d by %d\n",i,nnp);
190  computeMap->setNumPartitions(i,nnp);
191  if (computeMap->newNode(i) == -1) {
192  computeMap->setNewNode(i,computeMap->node(i));
193  }
194  for ( int j=1; j<nnp; ++j ) {
195  int newcid = computeMap->cloneCompute(i,j);
196  //CkPrintf("compute %d partition %d is %d\n",i,j,newcid);
197  }
198  }
199  }
200  computeMap->extendPtrs();
201  }
202 
203  if (!CkMyPe())
204  {
205  CkStartQD(CkIndex_ComputeMgr::splitComputes2((CkQdMsg*)0), &thishandle);
206  }
207 }
208 
209 void ComputeMgr::splitComputes2(CkQdMsg *msg)
210 {
211  delete msg;
212  CProxy_ComputeMgr(thisgroup).updateLocalComputes();
213 }
214 
216 {
217  ComputeMap *computeMap = ComputeMap::Object();
218  CProxy_ProxyMgr pm(CkpvAccess(BOCclass_group).proxyMgr);
219  ProxyMgr *proxyMgr = pm.ckLocalBranch();
220  LdbCoordinator *ldbCoordinator = LdbCoordinator::Object();
221 
222  computeFlag.resize(0);
223 
224  const int nc = computeMap->numComputes();
225  for (int i=0; i<nc; i++) {
226 
227  if ( computeMap->node(i) == CkMyPe() &&
228  computeMap->newNumPartitions(i) > 1 ) {
229  Compute *c = computeMap->compute(i);
230  ldbCoordinator->Migrate(c->ldObjHandle,CkMyPe());
231  delete c;
232  computeMap->registerCompute(i,NULL);
233  if ( computeMap->newNode(i) == CkMyPe() ) computeFlag.add(i);
234  } else
235  if (computeMap->newNode(i) == CkMyPe() && computeMap->node(i) != CkMyPe())
236  {
237  computeFlag.add(i);
238  for (int n=0; n < computeMap->numPids(i); n++)
239  {
240  proxyMgr->createProxy(computeMap->pid(i,n));
241  }
242  }
243  else if (computeMap->node(i) == CkMyPe() &&
244  (computeMap->newNode(i) != -1 && computeMap->newNode(i) != CkMyPe() ))
245  {
246  // CkPrintf("delete compute %d on pe %d\n",i,CkMyPe());
247  delete computeMap->compute(i);
248  computeMap->registerCompute(i,NULL);
249  }
250  }
251 
252  if (!CkMyPe())
253  {
254  CkStartQD(CkIndex_ComputeMgr::updateLocalComputes2((CkQdMsg*)0), &thishandle);
255  }
256 }
257 
258 void
260 {
261  delete msg;
262  CProxy_ComputeMgr(thisgroup).updateLocalComputes3();
263 }
264 
265 void
267 {
268  ComputeMap *computeMap = ComputeMap::Object();
269  CProxy_ProxyMgr pm(CkpvAccess(BOCclass_group).proxyMgr);
270  ProxyMgr *proxyMgr = pm.ckLocalBranch();
271 
273 
274  const int nc = computeMap->numComputes();
275 
276  if ( ! CkMyRank() ) {
277  for (int i=0; i<nc; i++) {
278  computeMap->setNewNumPartitions(i,0);
279  if (computeMap->newNode(i) != -1) {
280  computeMap->setNode(i,computeMap->newNode(i));
281  computeMap->setNewNode(i,-1);
282  }
283  }
284  }
285 
286  for(int i=0; i<computeFlag.size(); i++) createCompute(computeFlag[i], computeMap);
287  computeFlag.clear();
288 
289  proxyMgr->removeUnusedProxies();
290 
291  if (!CkMyPe())
292  {
293  CkStartQD(CkIndex_ComputeMgr::updateLocalComputes4((CkQdMsg*)0), &thishandle);
294  }
295 }
296 
297 void
299 {
300  delete msg;
301  CProxy_ComputeMgr(thisgroup).updateLocalComputes5();
302 
303  // store the latest compute map
305  if (simParams->storeComputeMap) {
306  ComputeMap *computeMap = ComputeMap::Object();
307  computeMap->saveComputeMap(simParams->computeMapFilename);
308  }
309 }
310 
311 #if 0
312 int firstphase = 1;
313 #endif
314 
315 void
317 {
318  if ( ! CkMyRank() ) {
321  }
322 
323  // we always use the centralized building of spanning tree
324  // distributed building of ST called in Node.C only
327 
328  // this code needs to be turned on if we want to
329  // shift the creation of ST to the load balancer
330 
331 #if 0
333  {
334  if (firstphase)
336  else
337  if (CkMyPe() == 0)
339 
340  firstphase = 0;
341  }
342 #endif
343 
344  if (!CkMyPe())
345  CkStartQD(CkIndex_ComputeMgr::doneUpdateLocalComputes(), &thishandle);
346 }
347 
349 {
350 
351 // if (!--updateComputesCount) {
352  DebugM(4, "doneUpdateLocalComputes on Pe("<<CkMyPe()<<")\n");
353  void *msg = CkAllocMsg(0,0,0);
354  CkSendMsgBranch(updateComputesReturnEP,msg,0,updateComputesReturnChareID);
355 // }
356 }
357 
358 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
359 // Helper functions for creating and getting pointers to CUDA computes
362 }
363 
366 }
367 
368 #ifdef BONDED_CUDA
369 ComputeBondedCUDA* getComputeBondedCUDA() {
370  return ComputeCUDAMgr::getComputeCUDAMgr()->getComputeBondedCUDA();
371 }
372 
373 ComputeBondedCUDA* createComputeBondedCUDA(ComputeID c, ComputeMgr* computeMgr) {
374  return ComputeCUDAMgr::getComputeCUDAMgr()->createComputeBondedCUDA(c, computeMgr);
375 }
376 #endif
377 #endif
378 
379 //
380 void
381 ComputeMgr::createCompute(ComputeID i, ComputeMap *map)
382 {
383  Compute *c;
384  PatchID pid2[2];
385  PatchIDList pids;
386  int trans2[2];
388 
389  PatchID pid8[8];
390  int trans8[8];
391 
392  switch ( map->type(i) )
393  {
395 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
396  if (simParams->useCUDA2) {
397  getCudaComputeNonbonded()->registerComputeSelf(i, map->computeData[i].pids[0].pid);
398  } else {
399  register_cuda_compute_self(i,map->computeData[i].pids[0].pid);
400  }
401 #elif defined(NAMD_MIC)
402  if (map->directToDevice(i) == 0) {
403  c = new ComputeNonbondedSelf(i,map->computeData[i].pids[0].pid,
404  computeNonbondedWorkArrays,
405  map->partition(i),map->partition(i)+1,
406  map->numPartitions(i)); // unknown delete
407  map->registerCompute(i,c);
408  c->initialize();
409  } else {
410  register_mic_compute_self(i,map->computeData[i].pids[0].pid,map->partition(i),map->numPartitions(i));
411  }
412 #else
413  c = new ComputeNonbondedSelf(i,map->computeData[i].pids[0].pid,
414  computeNonbondedWorkArrays,
415  map->partition(i),map->partition(i)+1,
416  map->numPartitions(i)); // unknown delete
417  map->registerCompute(i,c);
418  c->initialize();
419 #endif
420  break;
421  case computeLCPOType:
422  for (int j = 0; j < 8; j++) {
423  pid8[j] = map->computeData[i].pids[j].pid;
424  trans8[j] = map->computeData[i].pids[j].trans;
425  }
426  c = new ComputeLCPO(i,pid8,trans8,
427  computeNonbondedWorkArrays,
428  map->partition(i),map->partition(i)+1,
429  map->numPartitions(i), 8);
430  map->registerCompute(i,c);
431  c->initialize();
432 
433  break;
435  pid2[0] = map->computeData[i].pids[0].pid;
436  trans2[0] = map->computeData[i].pids[0].trans;
437  pid2[1] = map->computeData[i].pids[1].pid;
438  trans2[1] = map->computeData[i].pids[1].trans;
439 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
440  if (simParams->useCUDA2) {
441  getCudaComputeNonbonded()->registerComputePair(i, pid2, trans2);
442  } else {
443  register_cuda_compute_pair(i,pid2,trans2);
444  }
445 #elif defined(NAMD_MIC)
446  if (map->directToDevice(i) == 0) {
447  c = new ComputeNonbondedPair(i,pid2,trans2,
448  computeNonbondedWorkArrays,
449  map->partition(i),map->partition(i)+1,
450  map->numPartitions(i)); // unknown delete
451  map->registerCompute(i,c);
452  c->initialize();
453  } else {
454  register_mic_compute_pair(i,pid2,trans2,map->partition(i),map->numPartitions(i));
455  }
456 #else
457  c = new ComputeNonbondedPair(i,pid2,trans2,
458  computeNonbondedWorkArrays,
459  map->partition(i),map->partition(i)+1,
460  map->numPartitions(i)); // unknown delete
461  map->registerCompute(i,c);
462  c->initialize();
463 #endif
464  break;
465 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
467  c = computeNonbondedCUDAObject = new ComputeNonbondedCUDA(i,this); // unknown delete
468  map->registerCompute(i,c);
469  c->initialize();
470  break;
473  map->registerCompute(i,c);
474  // NOTE: initialize() is called at the end of createComputes(),
475  // after all computes have been created
476  //c->initialize();
477  break;
478 #endif
479 #ifdef NAMD_MIC
481  c = computeNonbondedMICObject = new ComputeNonbondedMIC(i,this); // unknown delete
482  map->registerCompute(i,c);
483  c->initialize();
484  break;
485 #endif
486 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
487 #ifdef BONDED_CUDA
488  case computeBondedCUDAType:
489  c = createComputeBondedCUDA(i, this);
490  map->registerCompute(i,c);
491  break;
492 #endif
493 #endif
494  case computeExclsType:
495 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined(NAMD_HIP))
496  if (simParams->bondedCUDA & 16)
497  {
498  PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
499  getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
500  } else
501 #endif
502  {
503  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
504  c = new ComputeExcls(i,pids); // unknown delete
505  map->registerCompute(i,c);
506  c->initialize();
507  }
508  break;
509  case computeBondsType:
510 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
511  if (simParams->bondedCUDA & 1)
512  {
513  PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
514  getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
515  } else
516 #endif
517  {
518  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
519  c = new ComputeBonds(i,pids); // unknown delete
520  map->registerCompute(i,c);
521  c->initialize();
522  }
523  break;
524  case computeAnglesType:
525 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
526  if (simParams->bondedCUDA & 2)
527  {
528  PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
529  getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
530  } else
531 #endif
532  {
533  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
534  c = new ComputeAngles(i,pids); // unknown delete
535  map->registerCompute(i,c);
536  c->initialize();
537  }
538  break;
540 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
541  if (simParams->bondedCUDA & 4)
542  {
543  PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
544  getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
545  } else
546 #endif
547  {
548  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
549  c = new ComputeDihedrals(i,pids); // unknown delete
550  map->registerCompute(i,c);
551  c->initialize();
552  }
553  break;
555 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
556  if (simParams->bondedCUDA & 8)
557  {
558  PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
559  getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
560  } else
561 #endif
562  {
563  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
564  c = new ComputeImpropers(i,pids); // unknown delete
565  map->registerCompute(i,c);
566  c->initialize();
567  }
568  break;
569  case computeTholeType:
570  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
571  c = new ComputeThole(i,pids); // unknown delete
572  map->registerCompute(i,c);
573  c->initialize();
574  break;
575  case computeAnisoType:
576  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
577  c = new ComputeAniso(i,pids); // unknown delete
578  map->registerCompute(i,c);
579  c->initialize();
580  break;
582 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
583  if (simParams->bondedCUDA & 32)
584  {
585  PatchMap::Object()->basePatchIDList(map->computeData[i].node, pids);
586  getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->type(i), pids);
587  } else
588 #endif
589  {
590  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
591  c = new ComputeCrossterms(i,pids); // unknown delete
592  map->registerCompute(i,c);
593  c->initialize();
594  }
595  break;
596  // JLai
598  PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
599  c = new ComputeGromacsPair(i,pids); // unknown delete
600  map->registerCompute(i,c);
601  c->initialize();
602  break;
604  c = new ComputeSelfGromacsPair(i,map->computeData[i].pids[0].pid); // unknown delete
605  map->registerCompute(i,c);
606  c->initialize();
607  break;
608  // End of JLai
610 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
611  if (simParams->bondedCUDA & 16)
612  {
613  getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
614  } else
615 #endif
616  {
617  c = new ComputeSelfExcls(i,map->computeData[i].pids[0].pid);
618  map->registerCompute(i,c);
619  c->initialize();
620  }
621  break;
623 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
624  if (simParams->bondedCUDA & 1)
625  {
626  getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
627  } else
628 #endif
629  {
630  c = new ComputeSelfBonds(i,map->computeData[i].pids[0].pid);
631  map->registerCompute(i,c);
632  c->initialize();
633  }
634  break;
636 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
637  if (simParams->bondedCUDA & 2)
638  {
639  getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
640  } else
641 #endif
642  {
643  c = new ComputeSelfAngles(i,map->computeData[i].pids[0].pid);
644  map->registerCompute(i,c);
645  c->initialize();
646  }
647  break;
649 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
650  if (simParams->bondedCUDA & 4)
651  {
652  getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
653  } else
654 #endif
655  {
656  c = new ComputeSelfDihedrals(i,map->computeData[i].pids[0].pid);
657  map->registerCompute(i,c);
658  c->initialize();
659  }
660  break;
662 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
663  if (simParams->bondedCUDA & 8)
664  {
665  getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
666  } else
667 #endif
668  {
669  c = new ComputeSelfImpropers(i,map->computeData[i].pids[0].pid);
670  map->registerCompute(i,c);
671  c->initialize();
672  }
673  break;
675  c = new ComputeSelfThole(i,map->computeData[i].pids[0].pid);
676  map->registerCompute(i,c);
677  c->initialize();
678  break;
680  c = new ComputeSelfAniso(i,map->computeData[i].pids[0].pid);
681  map->registerCompute(i,c);
682  c->initialize();
683  break;
685 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))
686  if (simParams->bondedCUDA & 32)
687  {
688  getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->type(i), map->computeData[i].pids[0].pid);
689  } else
690 #endif
691  {
692  c = new ComputeSelfCrossterms(i,map->computeData[i].pids[0].pid);
693  map->registerCompute(i,c);
694  c->initialize();
695  }
696  break;
697 #ifdef DPMTA
698  case computeDPMTAType:
699  c = new ComputeDPMTA(i); // unknown delete
700  map->registerCompute(i,c);
701  c->initialize();
702  break;
703 #endif
704 #ifdef DPME
705  case computeDPMEType:
706  c = computeDPMEObject = new ComputeDPME(i,this); // unknown delete
707  map->registerCompute(i,c);
708  c->initialize();
709  break;
710 #endif
711  case computePmeType:
712  c = new ComputePme(i,map->computeData[i].pids[0].pid); // unknown delete
713  map->registerCompute(i,c);
714  c->initialize();
715  break;
716 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
717  case computePmeCUDAType:
718  // PatchMap::Object()->basePatchIDList(CkMyPe(),pids);
719  // c = new ComputePmeCUDA(i, pids);
720  c = new ComputePmeCUDA(i, map->computeData[i].pids[0].pid);
721  map->registerCompute(i,c);
722  c->initialize();
723  break;
724 #endif
725  case computeEwaldType:
726  c = computeEwaldObject = new ComputeEwald(i,this); // unknown delete
727  map->registerCompute(i,c);
728  c->initialize();
729  break;
731  c = new ComputeFullDirect(i); // unknown delete
732  map->registerCompute(i,c);
733  c->initialize();
734  break;
735  case computeGlobalType:
736  c = computeGlobalObject = new ComputeGlobal(i,this); // unknown delete
737  map->registerCompute(i,c);
738  c->initialize();
739  break;
740  case computeStirType:
741  c = new ComputeStir(i,map->computeData[i].pids[0].pid); // unknown delete
742  map->registerCompute(i,c);
743  c->initialize();
744  break;
745  case computeExtType:
746  c = new ComputeExt(i); // unknown delete
747  map->registerCompute(i,c);
748  c->initialize();
749  break;
750  case computeQMType:
751  c = new ComputeQM(i);
752  map->registerCompute(i,c);
753  c->initialize();
754  break;
755  case computeGBISserType: //gbis serial
756  c = new ComputeGBISser(i);
757  map->registerCompute(i,c);
758  c->initialize();
759  break;
760  case computeFmmType: // FMM serial
761  c = new ComputeFmmSerial(i);
762  map->registerCompute(i,c);
763  c->initialize();
764  break;
765  case computeMsmSerialType: // MSM serial
766  c = new ComputeMsmSerial(i);
767  map->registerCompute(i,c);
768  c->initialize();
769  break;
770 #ifdef CHARM_HAS_MSA
771  case computeMsmMsaType: // MSM parallel long-range part using MSA
772  c = new ComputeMsmMsa(i);
773  map->registerCompute(i,c);
774  c->initialize();
775  break;
776 #endif
777  case computeMsmType: // MSM parallel
778  c = new ComputeMsm(i);
779  map->registerCompute(i,c);
780  c->initialize();
781  break;
782  case computeEFieldType:
783  c = new ComputeEField(i,map->computeData[i].pids[0].pid); // unknown delete
784  map->registerCompute(i,c);
785  c->initialize();
786  break;
787  /* BEGIN gf */
789  c = new ComputeGridForce(i,map->computeData[i].pids[0].pid);
790  map->registerCompute(i,c);
791  c->initialize();
792  break;
793  /* END gf */
795  c = new ComputeSphericalBC(i,map->computeData[i].pids[0].pid); // unknown delete
796  map->registerCompute(i,c);
797  c->initialize();
798  break;
800  c = new ComputeCylindricalBC(i,map->computeData[i].pids[0].pid); // unknown delete
801  map->registerCompute(i,c);
802  c->initialize();
803  break;
804  case computeTclBCType:
805  c = new ComputeTclBC(i); // unknown delete
806  map->registerCompute(i,c);
807  c->initialize();
808  break;
810  c = new ComputeRestraints(i,map->computeData[i].pids[0].pid); // unknown delete
811  map->registerCompute(i,c);
812  c->initialize();
813  break;
815  c = new ComputeConsForce(i,map->computeData[i].pids[0].pid);
816  map->registerCompute(i,c);
817  c->initialize();
818  break;
820  c = new ComputeConsTorque(i,map->computeData[i].pids[0].pid);
821  map->registerCompute(i,c);
822  c->initialize();
823  break;
824  default:
825  NAMD_bug("Unknown compute type in ComputeMgr::createCompute().");
826  break;
827  }
828 }
829 
831 {
832 #ifdef TRACE_COMPUTE_OBJECTS
834  PatchMap *pmap = PatchMap::Object();
835  char user_des[50];
836  int p1, p2;
837  int adim, bdim, cdim;
838  int t1, t2;
839  int x1, y1, z1, x2, y2, z2;
840  int dx, dy, dz;
841  for (int i=0; i<map->numComputes(); i++)
842  {
843  memset(user_des, 0, 50);
844  switch ( map->type(i) )
845  {
847  sprintf(user_des, "computeNonBondedSelfType_%d_pid_%d", i, map->pid(i,0));
848  break;
849  case computeLCPOType:
850  sprintf(user_des, "computeLCPOType_%d_pid_%d", i, map->pid(i,0));
851  break;
853  adim = pmap->gridsize_a();
854  bdim = pmap->gridsize_b();
855  cdim = pmap->gridsize_c();
856  p1 = map->pid(i, 0);
857  t1 = map->trans(i, 0);
858  x1 = pmap->index_a(p1) + adim * Lattice::offset_a(t1);
859  y1 = pmap->index_b(p1) + bdim * Lattice::offset_b(t1);
860  z1 = pmap->index_c(p1) + cdim * Lattice::offset_c(t1);
861  p2 = map->pid(i, 1);
862  t2 = map->trans(i, 1);
863  x2 = pmap->index_a(p2) + adim * Lattice::offset_a(t2);
864  y2 = pmap->index_b(p2) + bdim * Lattice::offset_b(t2);
865  z2 = pmap->index_c(p2) + cdim * Lattice::offset_c(t2);
866  dx = abs(x1-x2);
867  dy = abs(y1-y2);
868  dz = abs(z1-z2);
869  sprintf(user_des, "computeNonBondedPairType_%d(%d,%d,%d)", i, dx,dy,dz);
870  break;
871 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
872 #ifdef BONDED_CUDA
873  case computeBondedCUDAType:
874  sprintf(user_des, "computeBondedCUDAType_%d", i);
875  break;
876 #endif
877 #endif
878  case computeExclsType:
879  sprintf(user_des, "computeExclsType_%d", i);
880  break;
881  case computeBondsType:
882  sprintf(user_des, "computeBondsType_%d", i);
883  break;
884  case computeAnglesType:
885  sprintf(user_des, "computeAnglesType_%d", i);
886  break;
888  sprintf(user_des, "computeDihedralsType_%d", i);
889  break;
891  sprintf(user_des, "computeImpropersType_%d", i);
892  break;
893  case computeTholeType:
894  sprintf(user_des, "computeTholeType_%d", i);
895  break;
896  case computeAnisoType:
897  sprintf(user_des, "computeAnisoType_%d", i);
898  break;
900  sprintf(user_des, "computeCrosstermsType_%d", i);
901  break;
903  sprintf(user_des, "computeSelfExclsType_%d", i);
904  break;
906  sprintf(user_des, "computeSelfBondsType_%d", i);
907  break;
909  sprintf(user_des, "computeSelfAnglesType_%d", i);
910  break;
912  sprintf(user_des, "computeSelfDihedralsType_%d", i);
913  break;
915  sprintf(user_des, "computeSelfImpropersType_%d", i);
916  break;
918  sprintf(user_des, "computeSelfTholeType_%d", i);
919  break;
921  sprintf(user_des, "computeSelfAnisoType_%d", i);
922  break;
924  sprintf(user_des, "computeSelfCrosstermsType_%d", i);
925  break;
926 #ifdef DPMTA
927  case computeDPMTAType:
928  sprintf(user_des, "computeDPMTAType_%d", i);
929  break;
930 #endif
931 #ifdef DPME
932  case computeDPMEType:
933  sprintf(user_des, "computeDPMEType_%d", i);
934  break;
935 #endif
936  case computePmeType:
937  sprintf(user_des, "computePMEType_%d", i);
938  break;
939 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
940  case computePmeCUDAType:
941  sprintf(user_des, "computePMECUDAType_%d", i);
942  break;
943 #endif
944  case computeEwaldType:
945  sprintf(user_des, "computeEwaldType_%d", i);
946  break;
948  sprintf(user_des, "computeFullDirectType_%d", i);
949  break;
950  case computeGlobalType:
951  sprintf(user_des, "computeGlobalType_%d", i);
952  break;
953  case computeStirType:
954  sprintf(user_des, "computeStirType_%d", i);
955  break;
956  case computeExtType:
957  sprintf(user_des, "computeExtType_%d", i);
958  break;
959  case computeQMType:
960  sprintf(user_des, "computeQMType_%d", i);
961  break;
962  case computeEFieldType:
963  sprintf(user_des, "computeEFieldType_%d", i);
964  break;
965  /* BEGIN gf */
967  sprintf(user_des, "computeGridForceType_%d", i);
968  break;
969  /* END gf */
971  sprintf(user_des, "computeSphericalBCType_%d", i);
972  break;
974  sprintf(user_des, "computeCylindricalBCType_%d", i);
975  break;
976  case computeTclBCType:
977  sprintf(user_des, "computeTclBCType_%d", i);
978  break;
980  sprintf(user_des, "computeRestraintsType_%d", i);
981  break;
983  sprintf(user_des, "computeConsForceType_%d", i);
984  break;
986  sprintf(user_des, "computeConsTorqueType_%d", i);
987  break;
988  default:
989  NAMD_bug("Unknown compute type in ComputeMgr::registerUserEventForAllComputeObjs().");
990  break;
991  }
992  int user_des_len = strlen(user_des);
993  char *user_des_cst = new char[user_des_len+1];
994  memcpy(user_des_cst, user_des, user_des_len);
995  user_des_cst[user_des_len] = 0;
996  //Since the argument in traceRegisterUserEvent is supposed
997  //to be a const string which will not be copied inside the
998  //function when a new user event is created, user_des_cst
999  //has to be allocated in heap.
1000  int reEvenId = traceRegisterUserEvent(user_des_cst, TRACE_COMPOBJ_IDOFFSET+i);
1001  //printf("Register user event (%s) with id (%d)\n", user_des, reEvenId);
1002  }
1003 #else
1004  return;
1005 #endif
1006 }
1007 
1008 void
1010 {
1011 // #ifdef NAMD_CUDA
1012 // int ComputePmeCUDACounter = 0;
1013 // #endif
1014  Node *node = Node::Object();
1015  SimParameters *simParams = node->simParameters;
1016  int myNode = node->myid();
1017 
1018  if ( simParams->globalForcesOn && !myNode )
1019  {
1020  DebugM(4,"Mgr running on Node "<<CkMyPe()<<"\n");
1021  /* create a master server to allow multiple masters */
1022  masterServerObject = new GlobalMasterServer(this,
1023  PatchMap::Object()->numNodesWithPatches());
1024 
1025  /* create the individual global masters */
1026  // masterServerObject->addClient(new GlobalMasterTest());
1027  if (simParams->tclForcesOn)
1028  masterServerObject->addClient(new GlobalMasterTcl());
1029  if (simParams->IMDon && ! (simParams->IMDignore || simParams->IMDignoreForces) )
1030  masterServerObject->addClient(new GlobalMasterIMD());
1031 
1032  if (simParams->SMDOn)
1033  masterServerObject->addClient(
1034  new GlobalMasterSMD(simParams->SMDk, simParams->SMDk2,
1035  simParams->SMDVel,
1036  simParams->SMDDir, simParams->SMDOutputFreq,
1037  simParams->firstTimestep, simParams->SMDFile,
1038  node->molecule->numAtoms)
1039  );
1040 
1041  if (simParams->symmetryOn &&
1042  (simParams->firstTimestep < simParams->symmetryLastStep ||
1043  simParams->symmetryLastStep == -1))
1044  masterServerObject->addClient(new GlobalMasterSymmetry());
1045  if (simParams->TMDOn)
1046  masterServerObject->addClient(new GlobalMasterTMD());
1047  if (simParams->miscForcesOn)
1048  masterServerObject->addClient(new GlobalMasterMisc());
1049  if ( simParams->freeEnergyOn )
1050  masterServerObject->addClient(new GlobalMasterFreeEnergy());
1051  if ( simParams->colvarsOn )
1052  masterServerObject->addClient(new GlobalMasterColvars());
1053 
1054  }
1055 
1056  if ( !myNode && simParams->IMDon && (simParams->IMDignore || simParams->IMDignoreForces) ) {
1057  // GlobalMasterIMD constructor saves pointer to node->IMDOutput object
1058  new GlobalMasterIMD();
1059  }
1060 
1061 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1062  bool deviceIsMine = ( deviceCUDA->getMasterPe() == CkMyPe() );
1063 #ifdef BONDED_CUDA
1064  // Place bonded forces on Pe different from non-bonded forces
1065  int bondedMasterPe = deviceCUDA->getMasterPe();
1066  // for (int i=0;i < deviceCUDA->getNumPesSharingDevice();i++) {
1067  // int pe = deviceCUDA->getPesSharingDevice(i);
1068  // if (pe != deviceCUDA->getMasterPe()) {
1069  // bondedMasterPe = pe;
1070  // }
1071  // }
1072  bool deviceIsMineBonded = (CkMyPe() == bondedMasterPe);
1073 #endif
1074 #endif
1075 
1076  #ifdef NAMD_MIC
1077  bool deviceIsMine = ( mic_device_pe() == CkMyPe() );
1078  #endif
1079 
1080  for (int i=0; i < map->nComputes; i++)
1081  {
1082  if ( ! ( i % 100 ) )
1083  {
1084  }
1085 
1086 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
1087  switch ( map->type(i) )
1088  {
1089 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1090  // case computePmeCUDAType:
1091  // // Only create single ComputePmeCUDA object per Pe
1092  // if ( map->computeData[i].node != myNode ) continue;
1093  // if (ComputePmeCUDACounter > 0) continue;
1094  // ComputePmeCUDACounter++;
1095  // break;
1097  if ( ! deviceIsMine ) continue;
1098  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1099  break;
1100 
1102  if ( ! deviceIsMine ) continue;
1103  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1104  break;
1105 
1106 #ifdef BONDED_CUDA
1107  case computeSelfBondsType:
1108  case computeBondsType:
1109  if (simParams->bondedCUDA & 1) {
1110  if ( ! deviceIsMineBonded ) continue;
1111  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1112  } else {
1113  if ( map->computeData[i].node != myNode ) continue;
1114  }
1115  break;
1116 
1117  case computeSelfAnglesType:
1118  case computeAnglesType:
1119  if (simParams->bondedCUDA & 2) {
1120  if ( ! deviceIsMineBonded ) continue;
1121  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1122  } else {
1123  if ( map->computeData[i].node != myNode ) continue;
1124  }
1125  break;
1126 
1128  case computeDihedralsType:
1129  if (simParams->bondedCUDA & 4) {
1130  if ( ! deviceIsMineBonded ) continue;
1131  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1132  } else {
1133  if ( map->computeData[i].node != myNode ) continue;
1134  }
1135  break;
1136 
1138  case computeImpropersType:
1139  if (simParams->bondedCUDA & 8) {
1140  if ( ! deviceIsMineBonded ) continue;
1141  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1142  } else {
1143  if ( map->computeData[i].node != myNode ) continue;
1144  }
1145  break;
1146 
1147  case computeSelfExclsType:
1148  case computeExclsType:
1149  if (simParams->bondedCUDA & 16) {
1150  if ( ! deviceIsMineBonded ) continue;
1151  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1152  } else {
1153  if ( map->computeData[i].node != myNode ) continue;
1154  }
1155  break;
1156 
1158  case computeCrosstermsType:
1159  if (simParams->bondedCUDA & 32) {
1160  if ( ! deviceIsMineBonded ) continue;
1161  if ( ! deviceCUDA->device_shared_with_pe(map->computeData[i].node) ) continue;
1162  } else {
1163  if ( map->computeData[i].node != myNode ) continue;
1164  }
1165  break;
1166 
1167  case computeBondedCUDAType:
1168  if ( ! deviceIsMineBonded ) continue;
1169  if ( map->computeData[i].node != myNode ) continue;
1170  break;
1171 #endif
1172 
1173 #endif
1174 #ifdef NAMD_MIC
1175 
1177  if (map->directToDevice(i) != 0) { // If should be directed to the device...
1178  if ( ! deviceIsMine ) continue;
1179  if ( ! mic_device_shared_with_pe(map->computeData[i].node) ) continue;
1180  } else { // ... otherwise, direct to host...
1181  if (map->computeData[i].node != myNode) { continue; }
1182  }
1183  break;
1184 
1186  if (map->directToDevice(i)) { // If should be directed to the device...
1187  if ( ! deviceIsMine ) continue;
1188  if ( ! mic_device_shared_with_pe(map->computeData[i].node) ) continue;
1189  } else { // ... otherwise, direct to host...
1190  if (map->computeData[i].node != myNode) { continue; }
1191  }
1192  break;
1193 
1194 #endif
1196 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1198 // #ifdef BONDED_CUDA
1199 // case computeBondedCUDAType:
1200 // #endif
1201 #endif
1203  if ( ! deviceIsMine ) continue;
1204  default:
1205  if ( map->computeData[i].node != myNode ) continue;
1206  }
1207 #else // defined(NAMD_CUDA) || defined(NAMD_MIC)
1208  if ( map->computeData[i].node != myNode ) continue;
1209 #endif
1210  DebugM(1,"Compute " << i << '\n');
1211  DebugM(1," node = " << map->computeData[i].node << '\n');
1212  DebugM(1," type = " << map->computeData[i].type << '\n');
1213  DebugM(1," numPids = " << map->computeData[i].numPids << '\n');
1214 // DebugM(1," numPidsAllocated = " << map->computeData[i].numPidsAllocated << '\n');
1215  for (int j=0; j < map->computeData[i].numPids; j++)
1216  {
1217  DebugM(1," pid " << map->computeData[i].pids[j].pid << '\n');
1218  if (!((j+1) % 6))
1219  DebugM(1,'\n');
1220  }
1221  DebugM(1,"\n---------------------------------------");
1222  DebugM(1,"---------------------------------------\n");
1223 
1224  createCompute(i, map);
1225 
1226  }
1227 
1228 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1229  if (simParams->useCUDA2) {
1230  if (deviceIsMine) {
1233  }
1234  } else {
1235  if ( computeNonbondedCUDAObject ) {
1236  computeNonbondedCUDAObject->assignPatches();
1237  }
1238  }
1239 #ifdef BONDED_CUDA
1240  if (simParams->bondedCUDA) {
1241  if (deviceIsMineBonded) {
1242  getComputeBondedCUDA()->initialize();
1243  }
1244  }
1245 #endif
1246 #endif
1247 #ifdef NAMD_MIC
1248  if ( computeNonbondedMICObject ) {
1249  computeNonbondedMICObject->assignPatches();
1250  }
1251 #endif
1252 
1253 }
1254 
1255 #if 0
1256 void ComputeMgr:: sendComputeGlobalConfig(ComputeGlobalConfigMsg *msg)
1257 {
1258  (CProxy_ComputeMgr(CkpvAccess(BOCclass_group).computeMgr)).recvComputeGlobalConfig(msg);
1259 }
1260 
1261 void ComputeMgr:: recvComputeGlobalConfig(ComputeGlobalConfigMsg *msg)
1262 {
1263  if ( computeGlobalObject )
1264  {
1265  computeGlobalObject->recvConfig(msg);
1266  }
1267  else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
1268  else NAMD_die("ComputeMgr::computeGlobalObject is NULL!");
1269 }
1270 #endif
1271 
1273 {
1274  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1275  cm[0].recvComputeGlobalData(msg);
1276 }
1277 
1279 {
1280  if (masterServerObject) // make sure it has been initialized
1281  {
1282  masterServerObject->recvData(msg);
1283  }
1284  else NAMD_die("ComputeMgr::masterServerObject is NULL!");
1285 }
1286 
1288 {
1290  thisProxy.recvComputeGlobalResults(msg);
1291 }
1292 
1294 {
1296  for ( int i=0; i<computeGlobalResultsMsgs.size(); ++i ) {
1301  break;
1302  }
1303  }
1304 }
1305 
1307 {
1308  if ( computeGlobalObject )
1309  {
1310  if ( msg->seq == computeGlobalResultsMsgSeq ) {
1311  CmiEnableUrgentSend(1);
1313  CmiEnableUrgentSend(0);
1314  } else {
1316  }
1317  }
1318  else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
1319  else NAMD_die("ComputeMgr::computeGlobalObject is NULL!");
1320 }
1321 
1322 /*
1323  * Begin Ewald messages
1324  */
1326 {
1327  if (computeEwaldObject)
1328  {
1329  int node = computeEwaldObject->getMasterNode();
1330  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1331  cm[node].recvComputeEwaldData(msg);
1332  }
1333  else if (!PatchMap::Object()->numHomePatches())
1334  {
1335  CkPrintf("skipping message on Pe(%d)\n", CkMyPe());
1336  delete msg;
1337  }
1338  else NAMD_die("ComputeMgr::computeEwaldObject is NULL!");
1339 }
1340 
1342 {
1343  if (computeEwaldObject)
1344  computeEwaldObject->recvData(msg);
1345  else NAMD_die("ComputeMgr::computeEwaldObject in recvData is NULL!");
1346 }
1347 
1349 {
1350  (CProxy_ComputeMgr(CkpvAccess(BOCclass_group).computeMgr)).recvComputeEwaldResults(msg);
1351 }
1352 
1354 {
1355  if (computeEwaldObject) {
1356  CmiEnableUrgentSend(1);
1357  computeEwaldObject->recvResults(msg);
1358  CmiEnableUrgentSend(0);
1359  }
1360  else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
1361  else NAMD_die("ComputeMgr::computeEwaldObject in recvResults is NULL!");
1362 }
1363 
1365 {
1366  if ( computeDPMEObject )
1367  {
1368 #ifdef DPME
1369  int node = computeDPMEObject->getMasterNode();
1370  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1371  cm.recvComputeDPMEData(msg,node);
1372 #endif
1373  }
1374  else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
1375  else NAMD_die("ComputeMgr::computeDPMEObject is NULL!");
1376 }
1377 
1379 {
1380  if ( computeDPMEObject )
1381  {
1382 #ifdef DPME
1383  computeDPMEObject->recvData(msg);
1384 #endif
1385  }
1386  else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
1387  else NAMD_die("ComputeMgr::computeDPMEObject is NULL!");
1388 }
1389 
1391 {
1392  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1393  cm[node].recvComputeDPMEResults(msg);
1394 }
1395 
1397 {
1398  if ( computeDPMEObject )
1399  {
1400 #ifdef DPME
1401  computeDPMEObject->recvResults(msg);
1402 #endif
1403  }
1404  else if ( ! (PatchMap::Object())->numHomePatches() ) delete msg;
1405  else NAMD_die("ComputeMgr::computeDPMEObject is NULL!");
1406 }
1407 
1409 {
1410  Molecule *m = Node::Object()->molecule;
1411  delete [] m->consForceIndexes;
1412  delete [] m->consForce;
1413  int n = msg->aid.size();
1414  if (n > 0)
1415  {
1416  m->consForceIndexes = new int32[m->numAtoms];
1417  m->consForce = new Vector[n];
1418  int i;
1419  for (i=0; i<m->numAtoms; i++) m->consForceIndexes[i] = -1;
1420  for (i=0; i<msg->aid.size(); i++)
1421  {
1422  m->consForceIndexes[msg->aid[i]] = i;
1423  m->consForce[i] = msg->f[i];
1424  }
1425  }
1426  else
1427  {
1428  m->consForceIndexes = NULL;
1429  m->consForce = NULL;
1430  }
1431  delete msg;
1432 }
1433 
1435  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1436  cm[pe].recvYieldDevice(CkMyPe());
1437 }
1438 
1440 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1441  computeNonbondedCUDAObject->recvYieldDevice(pe);
1442 #endif
1443 #ifdef NAMD_MIC
1444  computeNonbondedMICObject->recvYieldDevice(pe);
1445 #endif
1446 }
1447 
1449  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1450  int pe = CkNodeFirst(CkMyNode());
1451  int end = pe + CkNodeSize(CkMyNode());
1452  for( ; pe != end; ++pe ) {
1453  cm[pe].recvBuildCudaExclusions();
1454  }
1455 }
1456 
1457 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1458  void build_cuda_exclusions();
1459 #endif
1460 
1462 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1464 #endif
1465 }
1466 
1468  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1469  int pe = CkNodeFirst(CkMyNode());
1470  int end = pe + CkNodeSize(CkMyNode());
1471  for( ; pe != end; ++pe ) {
1472  cm[pe].recvBuildCudaForceTable();
1473  }
1474 }
1475 
1476 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1477  void build_cuda_force_table();
1478 #endif
1479 
1481 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1483 #endif
1484 }
1485 
1487  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1488  int pe = CkNodeFirst(CkMyNode());
1489  int end = pe + CkNodeSize(CkMyNode());
1490  for( ; pe != end; ++pe ) {
1491  cm[pe].recvBuildMICForceTable();
1492  }
1493 }
1494 
1495 #ifdef NAMD_MIC
1496  void build_mic_force_table();
1497 #endif
1498 
1500  #ifdef NAMD_MIC
1501  build_mic_force_table();
1502  #endif
1503 }
1504 
1505 class NonbondedCUDASlaveMsg : public CMessage_NonbondedCUDASlaveMsg {
1506 public:
1507  int index;
1509 };
1510 
1513  msg->master = computeNonbondedCUDAObject;
1514  msg->index = index;
1515  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1516  cm[pe].recvCreateNonbondedCUDASlave(msg);
1517 }
1518 
1520 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1521  new ComputeNonbondedCUDA(msg->master->cid,this,msg->master,msg->index);
1522 #endif
1523 }
1524 
1525 void ComputeMgr::sendNonbondedCUDASlaveReady(int pe, int np, int ac, int seq) {
1526  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1527  cm[pe].recvNonbondedCUDASlaveReady(np,ac,seq);
1528 }
1529 
1530 void ComputeMgr::recvNonbondedCUDASlaveReady(int np, int ac, int seq) {
1531  for ( int i=0; i<np; ++i ) {
1532  computeNonbondedCUDAObject->patchReady(-1,ac,seq);
1533  }
1534 }
1535 
1536 class NonbondedCUDASkipMsg : public CMessage_NonbondedCUDASkipMsg {
1537 public:
1539 };
1540 
1543  msg->compute = c;
1544  thisProxy[pe].recvNonbondedCUDASlaveSkip(msg);
1545 }
1546 
1548 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1549  msg->compute->skip();
1550 #endif
1551  delete msg;
1552 }
1553 
1554 void ComputeMgr::sendNonbondedCUDASlaveEnqueue(ComputeNonbondedCUDA *c, int pe, int seq, int prio, int ws) {
1555  if ( ws == 2 && c->localHostedPatches.size() == 0 ) return;
1556  LocalWorkMsg *msg = ( ws == 1 ? c->localWorkMsg : c->localWorkMsg2 );
1557  msg->compute = c;
1558  int type = c->type();
1559  int cid = c->cid;
1560  SET_PRIORITY(msg,seq,prio);
1561  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
1562  wdProxy[pe].enqueueCUDA(msg);
1563 }
1564 
1565 void ComputeMgr::sendNonbondedCUDASlaveEnqueuePatch(ComputeNonbondedCUDA *c, int pe, int seq, int prio, int data, FinishWorkMsg *msg) {
1566  msg->compute = c;
1567  msg->data = data;
1568  SET_PRIORITY(msg,seq,prio);
1569  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
1570  wdProxy[pe].finishCUDAPatch(msg);
1571 }
1572 
1573 class NonbondedMICSlaveMsg : public CMessage_NonbondedMICSlaveMsg {
1574 public:
1575  int index;
1577 };
1578 
1579 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1580 class CudaComputeNonbondedMsg : public CMessage_CudaComputeNonbondedMsg {
1581 public:
1583  int i;
1584 };
1585 
1587  for (int i=0;i < pes.size();i++) {
1589  msg->c = c;
1590  thisProxy[pes[i]].recvAssignPatchesOnPe(msg);
1591  }
1592 }
1593 
1595  msg->c->assignPatchesOnPe();
1596  delete msg;
1597 }
1598 
1600  for (int i=0;i < pes.size();i++) {
1602  msg->c = c;
1603  thisProxy[pes[i]].recvSkipPatchesOnPe(msg);
1604  }
1605 }
1606 
1608  msg->c->skipPatchesOnPe();
1609  delete msg;
1610 }
1611 
1613  for (int i=0;i < pes.size();i++) {
1616  msg->c = c;
1617  thisProxy[pes[i]].recvFinishPatchesOnPe(msg);
1618  }
1619 }
1620 
1622  msg->c->finishPatchesOnPe();
1623  delete msg;
1624 }
1625 
1629  msg->c = c;
1630  msg->i = i;
1631  thisProxy[pe].recvFinishPatchOnPe(msg);
1632 }
1633 
1635  msg->c->finishPatchOnPe(msg->i);
1636  delete msg;
1637 }
1638 
1639 void ComputeMgr::sendOpenBoxesOnPe(std::vector<int>& pes, CudaComputeNonbonded* c) {
1640  for (int i=0;i < pes.size();i++) {
1642  SET_PRIORITY(msg, c->sequence(), PROXY_DATA_PRIORITY+1); // after bonded
1643  msg->c = c;
1644  thisProxy[pes[i]].recvOpenBoxesOnPe(msg);
1645  }
1646 }
1647 
1649  msg->c->openBoxesOnPe();
1650  delete msg;
1651 }
1652 
1655  msg->c = c;
1656  thisProxy[pe].recvFinishReductions(msg);
1657 }
1658 
1660  msg->c->finishReductions();
1661  delete msg;
1662 }
1663 
1666  msg->c = c;
1667  thisProxy[pe].recvMessageEnqueueWork(msg);
1668 }
1669 
1671  msg->c->messageEnqueueWork();
1672  delete msg;
1673 }
1674 
1677  msg->c = c;
1678  thisProxy[pe].recvLaunchWork(msg);
1679 }
1680 
1682  msg->c->launchWork();
1683  delete msg;
1684 }
1685 
1687  for (int i=0;i < pes.size();i++) {
1689  msg->c = c;
1690  thisProxy[pes[i]].recvUnregisterBoxesOnPe(msg);
1691  }
1692 }
1693 
1695  msg->c->unregisterBoxesOnPe();
1696  delete msg;
1697 }
1698 
1699 #ifdef BONDED_CUDA
1700 
1701 class ComputeBondedCUDAMsg : public CMessage_ComputeBondedCUDAMsg {
1702 public:
1703  ComputeBondedCUDA* c;
1704  int i;
1705 };
1706 
1707 void ComputeMgr::sendAssignPatchesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
1708  for (int i=0;i < pes.size();i++) {
1709  ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
1710  msg->c = c;
1711  thisProxy[pes[i]].recvAssignPatchesOnPe(msg);
1712  }
1713 }
1714 
1715 void ComputeMgr::recvAssignPatchesOnPe(ComputeBondedCUDAMsg *msg) {
1716  msg->c->assignPatchesOnPe();
1717  delete msg;
1718 }
1719 
1720 void ComputeMgr::sendMessageEnqueueWork(int pe, ComputeBondedCUDA* c) {
1721  ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
1722  msg->c = c;
1723  thisProxy[pe].recvMessageEnqueueWork(msg);
1724 }
1725 
1726 void ComputeMgr::recvMessageEnqueueWork(ComputeBondedCUDAMsg *msg) {
1727  msg->c->messageEnqueueWork();
1728  delete msg;
1729 }
1730 
1731 void ComputeMgr::sendOpenBoxesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
1732  for (int i=0;i < pes.size();i++) {
1733  ComputeBondedCUDAMsg *msg = new (PRIORITY_SIZE) ComputeBondedCUDAMsg;
1734  SET_PRIORITY(msg, c->sequence(), PROXY_DATA_PRIORITY);
1735  msg->c = c;
1736  thisProxy[pes[i]].recvOpenBoxesOnPe(msg);
1737  }
1738 }
1739 
1740 void ComputeMgr::recvOpenBoxesOnPe(ComputeBondedCUDAMsg *msg) {
1741  msg->c->openBoxesOnPe();
1742  delete msg;
1743 }
1744 
1745 void ComputeMgr::sendLoadTuplesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
1746  for (int i=0;i < pes.size();i++) {
1747  ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
1748  msg->c = c;
1749  thisProxy[pes[i]].recvLoadTuplesOnPe(msg);
1750  }
1751 }
1752 
1753 void ComputeMgr::recvLoadTuplesOnPe(ComputeBondedCUDAMsg *msg) {
1754  msg->c->loadTuplesOnPe();
1755  delete msg;
1756 }
1757 
1758 void ComputeMgr::sendLaunchWork(int pe, ComputeBondedCUDA* c) {
1759  ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
1760  msg->c = c;
1761  thisProxy[pe].recvLaunchWork(msg);
1762 }
1763 
1764 void ComputeMgr::recvLaunchWork(ComputeBondedCUDAMsg *msg) {
1765  msg->c->launchWork();
1766  delete msg;
1767 }
1768 
1769 void ComputeMgr::sendFinishPatchesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
1770  for (int i=0;i < pes.size();i++) {
1771  ComputeBondedCUDAMsg *msg = new (PRIORITY_SIZE) ComputeBondedCUDAMsg;
1772  SET_PRIORITY(msg, c->sequence(), COMPUTE_PROXY_PRIORITY);
1773  msg->c = c;
1774  thisProxy[pes[i]].recvFinishPatchesOnPe(msg);
1775  }
1776 }
1777 
1778 void ComputeMgr::recvFinishPatchesOnPe(ComputeBondedCUDAMsg *msg) {
1779  msg->c->finishPatchesOnPe();
1780  delete msg;
1781 }
1782 
1783 void ComputeMgr::sendFinishReductions(int pe, ComputeBondedCUDA* c) {
1784  ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
1785  msg->c = c;
1786  thisProxy[pe].recvFinishReductions(msg);
1787 }
1788 
1789 void ComputeMgr::recvFinishReductions(ComputeBondedCUDAMsg *msg) {
1790  msg->c->finishReductions();
1791  delete msg;
1792 }
1793 
1794 void ComputeMgr::sendUnregisterBoxesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
1795  for (int i=0;i < pes.size();i++) {
1796  ComputeBondedCUDAMsg *msg = new ComputeBondedCUDAMsg;
1797  msg->c = c;
1798  thisProxy[pes[i]].recvUnregisterBoxesOnPe(msg);
1799  }
1800 }
1801 
1802 void ComputeMgr::recvUnregisterBoxesOnPe(ComputeBondedCUDAMsg *msg) {
1803  msg->c->unregisterBoxesOnPe();
1804  delete msg;
1805 }
1806 
1807 #endif // BONDED_CUDA
1808 
1809 #endif // NAMD_CUDA
1810 
1813  msg->master = computeNonbondedMICObject;
1814  msg->index = index;
1815  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1816  cm[pe].recvCreateNonbondedMICSlave(msg);
1817 }
1818 
1820 #ifdef NAMD_MIC
1821  ComputeNonbondedMIC *c = new ComputeNonbondedMIC(msg->master->cid,this,msg->master,msg->index);
1822 #endif
1823 }
1824 
1825 void ComputeMgr::sendNonbondedMICSlaveReady(int pe, int np, int ac, int seq) {
1826  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1827  cm[pe].recvNonbondedMICSlaveReady(np,ac,seq);
1828 }
1829 
1830 void ComputeMgr::recvNonbondedMICSlaveReady(int np, int ac, int seq) {
1831  for ( int i=0; i<np; ++i ) {
1832  computeNonbondedMICObject->patchReady(-1,ac,seq);
1833  }
1834 }
1835 
1836 class NonbondedMICSkipMsg : public CMessage_NonbondedMICSkipMsg {
1837 public:
1839 };
1840 
1843  msg->compute = c;
1844  thisProxy[pe].recvNonbondedMICSlaveSkip(msg);
1845 }
1846 
1848 #ifdef NAMD_MIC
1849  msg->compute->skip();
1850 #endif
1851  delete msg;
1852 }
1853 
1854 void ComputeMgr::sendNonbondedMICSlaveEnqueue(ComputeNonbondedMIC *c, int pe, int seq, int prio, int ws) {
1855  if ( ws == 2 && c->localHostedPatches.size() == 0 ) return;
1856  LocalWorkMsg *msg = ( ws == 1 ? c->localWorkMsg : c->localWorkMsg2 );
1857  msg->compute = c;
1858  int type = c->type();
1859  int cid = c->cid;
1860  SET_PRIORITY(msg,seq,prio);
1861  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
1862  wdProxy[pe].enqueueMIC(msg);
1863 }
1864 
1865 void ComputeMgr::sendMICPEData(int pe, int data) {
1866  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
1867  cm.recvMICPEData(pe, data);
1868 }
1869 
1870 void ComputeMgr::recvMICPEData(int pe, int data) {
1871  if (pe < 0 || pe >= CkNumPes() || micPEData == NULL) { return; }
1872  int majorIndex = pe / (sizeof(int)*8);
1873  int minorIndex = pe % (sizeof(int)*8);
1874  if (data != 0) {
1875  micPEData[majorIndex] |= (0x01 << minorIndex);
1876  } else {
1877  micPEData[majorIndex] &= ((~0x01) << minorIndex);
1878  }
1879 }
1880 
1881 int isMICProcessor(int pe) {
1882  return CProxy_ComputeMgr::ckLocalBranch(CkpvAccess(BOCclass_group).computeMgr)->isMICProcessor(pe);
1883 }
1884 
1886  if (pe < 0 || pe >= CkNumPes() || micPEData == NULL) { return 0; }
1887  int majorIndex = pe / (sizeof(int)*8);
1888  int minorIndex = pe % (sizeof(int)*8);
1889  return ((micPEData[majorIndex] >> minorIndex) & 0x01);
1890 }
1891 
1892 #include "ComputeMgr.def.h"
1893 
static Node * Object()
Definition: Node.h:86
void sendNonbondedCUDASlaveEnqueuePatch(ComputeNonbondedCUDA *c, int, int, int, int, FinishWorkMsg *)
Definition: ComputeMgr.C:1565
static int offset_b(int i)
Definition: Lattice.h:248
CudaComputeNonbonded * createCudaComputeNonbonded(ComputeID c)
Definition: ComputeMgr.C:364
void recvNonbondedMICSlaveReady(int, int, int)
Definition: ComputeMgr.C:1830
#define COMPUTE_PROXY_PRIORITY
Definition: Priorities.h:71
void recvComputeEwaldData(ComputeEwaldMsg *)
Definition: ComputeMgr.C:1341
void updateLocalComputes()
Definition: ComputeMgr.C:215
void checkMap()
Definition: ComputeMap.C:48
void sendNonbondedCUDASlaveSkip(ComputeNonbondedCUDA *c, int)
Definition: ComputeMgr.C:1541
void sendBuildCudaForceTable()
Definition: ComputeMgr.C:1467
void build_cuda_exclusions()
void sendYieldDevice(int pe)
Definition: ComputeMgr.C:1434
void recvData(ComputeEwaldMsg *)
Definition: ComputeEwald.C:187
void recvCreateNonbondedCUDASlave(NonbondedCUDASlaveMsg *)
Definition: ComputeMgr.C:1519
int sequence(void)
Definition: Compute.h:64
void recvComputeDPMEResults(ComputeDPMEResultsMsg *)
Definition: ComputeMgr.C:1396
void recvResults(ComputeEwaldMsg *)
Definition: ComputeEwald.C:204
void setNewNumPartitions(ComputeID cid, char numPartitions)
Definition: ComputeMap.h:144
void recvYieldDevice(int pe)
ComputeNonbondedMIC * master
Definition: ComputeMgr.C:1576
void build_cuda_force_table()
void recvResults(ComputeGlobalResultsMsg *)
static int offset_c(int i)
Definition: Lattice.h:249
int proxyRecvSpanning
Definition: ProxyMgr.C:46
int numComputes(void)
Definition: ComputeMap.h:101
void saveComputeMap(const char *fname)
Definition: ComputeMap.C:262
static ProxyMgr * Object()
Definition: ProxyMgr.h:394
Definition: Node.h:78
short int32
Definition: dumpdcd.c:24
int ComputeID
Definition: NamdTypes.h:183
#define TRACE_COMPOBJ_IDOFFSET
Definition: Compute.h:77
void updateLocalComputes5()
Definition: ComputeMgr.C:316
CudaComputeNonbonded * getCudaComputeNonbonded()
Definition: ComputeMgr.C:360
void del(int index, int num=1)
Definition: ResizeArray.h:104
int gridsize_c(void) const
Definition: PatchMap.h:66
static PatchMap * Object()
Definition: PatchMap.h:27
void recvFinishPatchOnPe(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1634
void buildProxySpanningTree2()
Definition: ProxyMgr.C:577
void sendMessageEnqueueWork(int pe, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1664
static __thread ComputeMgr * computeMgr
Definition: Vector.h:64
SimParameters * simParameters
Definition: Node.h:178
void sendFinishReductions(int pe, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1653
void registerCompute(ComputeID cid, Compute *c)
Definition: ComputeMap.h:95
void basePatchIDList(int pe, PatchIDList &)
Definition: PatchMap.C:454
int index_a(int pid) const
Definition: PatchMap.h:86
void setNumPartitions(ComputeID cid, char numPartitions)
Definition: ComputeMap.h:138
#define DebugM(x, y)
Definition: Debug.h:59
void recvComputeConsForceMsg(ComputeConsForceMsg *)
Definition: ComputeMgr.C:1408
virtual void initialize()
Definition: Compute.h:56
#define PROXY_DATA_PRIORITY
Definition: Priorities.h:40
void updateLocalComputes3()
Definition: ComputeMgr.C:266
void Migrate(LDObjHandle handle, int dest)
int isMICProcessor(int pe)
Definition: ComputeMgr.C:1881
LDObjHandle ldObjHandle
Definition: Compute.h:44
void recvMessageEnqueueWork(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1670
void sendNonbondedCUDASlaveReady(int, int, int, int)
Definition: ComputeMgr.C:1525
char computeMapFilename[NAMD_FILENAME_BUFFER_SIZE]
void recvMICPEData(int, int)
Definition: ComputeMgr.C:1870
LocalWorkMsg * localWorkMsg2
void createComputes(ComputeMap *map)
Definition: ComputeMgr.C:1009
void setNode(ComputeID cid, NodeID node)
Definition: ComputeMap.h:110
void recvLaunchWork(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1681
int computeGlobalResultsMsgSeq
Definition: ComputeMgr.h:99
void updateLocalComputes2(CkQdMsg *)
Definition: ComputeMgr.C:259
void recvAssignPatchesOnPe(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1594
LocalWorkMsg *const localWorkMsg
Definition: Compute.h:46
void doneUpdateLocalComputes()
Definition: ComputeMgr.C:348
void register_cuda_compute_self(ComputeID c, PatchID pid)
Compute * compute
Definition: WorkDistrib.h:33
char newNumPartitions(ComputeID cid)
Definition: ComputeMap.h:141
void recvComputeGlobalConfig(ComputeGlobalConfigMsg *)
void sendMICPEData(int, int)
Definition: ComputeMgr.C:1865
int computeGlobalResultsMsgMasterSeq
Definition: ComputeMgr.h:100
CudaComputeNonbonded * c
Definition: ComputeMgr.C:1582
void sendComputeEwaldData(ComputeEwaldMsg *)
Definition: ComputeMgr.C:1325
#define PRIORITY_SIZE
Definition: Priorities.h:13
void recvCreateNonbondedMICSlave(NonbondedMICSlaveMsg *)
Definition: ComputeMgr.C:1819
void registerUserEventsForAllComputeObjs()
Definition: ComputeMgr.C:830
void sendBuildCudaExclusions()
Definition: ComputeMgr.C:1448
void sendLaunchWork(int pe, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1675
bool mic_device_shared_with_pe(int pe)
int gridsize_a(void) const
Definition: PatchMap.h:64
void recvYieldDevice(int pe)
Definition: ComputeMgr.C:1439
int getMasterPe()
Definition: DeviceCUDA.h:105
void sendCreateNonbondedCUDASlave(int, int)
Definition: ComputeMgr.C:1511
Compute * compute
Definition: WorkDistrib.h:27
void NAMD_bug(const char *err_msg)
Definition: common.C:129
static int offset_a(int i)
Definition: Lattice.h:247
void sendComputeGlobalResults(ComputeGlobalResultsMsg *)
Definition: ComputeMgr.C:1287
ComputeType type(ComputeID cid)
Definition: ComputeMap.C:120
static ComputeCUDAMgr * getComputeCUDAMgr()
void removeUnusedProxies(void)
Definition: ProxyMgr.C:399
void sendUnregisterBoxesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1686
ComputeNonbondedCUDA * master
Definition: ComputeMgr.C:1508
int index_b(int pid) const
Definition: PatchMap.h:87
bool device_shared_with_pe(int pe)
Definition: DeviceCUDA.C:392
void clear()
Definition: ResizeArray.h:87
int numPartitions(ComputeID cid)
Definition: ComputeMap.C:135
int numAtoms
Definition: Molecule.h:557
int PatchID
Definition: NamdTypes.h:182
void sendFinishPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1612
void recvFinishReductions(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1659
void createProxy(PatchID pid)
Definition: ProxyMgr.C:493
void setNewNode(ComputeID cid, NodeID node)
Definition: ComputeMap.h:120
int partition(ComputeID cid)
Definition: ComputeMap.C:128
void updateLocalComputes4(CkQdMsg *)
Definition: ComputeMgr.C:298
void NAMD_die(const char *err_msg)
Definition: common.C:85
static LdbCoordinator * Object()
void recvNonbondedMICSlaveSkip(NonbondedMICSkipMsg *)
Definition: ComputeMgr.C:1847
void recvFinishPatchesOnPe(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1621
void enableComputeGlobalResults()
Definition: ComputeMgr.C:1293
void registerComputeSelf(ComputeID cid, PatchID pid)
LocalWorkMsg * localWorkMsg2
void recvComputeGlobalResults(ComputeGlobalResultsMsg *)
Definition: ComputeMgr.C:1306
ResizeArray< ComputeGlobalResultsMsg * > computeGlobalResultsMsgs
Definition: ComputeMgr.h:98
void recvComputeDPMEData(ComputeDPMEDataMsg *)
Definition: ComputeMgr.C:1378
void splitComputes()
Definition: ComputeMgr.C:175
ComputeGlobal * computeGlobalObject
Definition: ComputeMgr.h:97
void recvData(ComputeGlobalDataMsg *)
void recvComputeEwaldResults(ComputeEwaldMsg *)
Definition: ComputeMgr.C:1353
int getMasterNode() const
Definition: ComputeEwald.h:86
int index_c(int pid) const
Definition: PatchMap.h:88
void extendPtrs()
Definition: ComputeMap.C:89
void sendComputeDPMEData(ComputeDPMEDataMsg *)
Definition: ComputeMgr.C:1364
void saveComputeMapChanges(int, CkGroupID)
Definition: WorkDistrib.C:356
ComputeNonbondedMIC * compute
Definition: ComputeMgr.C:1838
int add(const Elem &elem)
Definition: ResizeArray.h:97
void recvComputeGlobalData(ComputeGlobalDataMsg *)
Definition: ComputeMgr.C:1278
void addClient(GlobalMaster *newClient)
void sendComputeDPMEResults(ComputeDPMEResultsMsg *, int)
Definition: ComputeMgr.C:1390
void sendNonbondedMICSlaveEnqueue(ComputeNonbondedMIC *c, int, int, int, int)
Definition: ComputeMgr.C:1854
int myid()
Definition: Node.h:188
#define simParams
Definition: Output.C:127
int32 * consForceIndexes
Definition: Molecule.h:613
ComputeNonbondedCUDA * compute
Definition: ComputeMgr.C:1538
void resize(int i)
Definition: ResizeArray.h:84
void checkMap()
Definition: PatchMap.C:274
void sendNonbondedMICSlaveReady(int, int, int, int)
Definition: ComputeMgr.C:1825
void updateComputes2(CkQdMsg *)
Definition: ComputeMgr.C:156
ResizeArray< int > localHostedPatches
void sendNonbondedCUDASlaveEnqueue(ComputeNonbondedCUDA *c, int, int, int, int)
Definition: ComputeMgr.C:1554
void registerComputePair(ComputeID cid, PatchID *pid, int *trans)
Compute * compute(ComputeID cid)
Definition: ComputeMap.h:171
ComputeID cloneCompute(ComputeID src, int partition)
Definition: ComputeMap.C:185
static ComputeMap * Object()
Definition: ComputeMap.h:89
void recvOpenBoxesOnPe(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1648
void sendSpanningTrees()
Definition: ProxyMgr.C:1107
void recvUnregisterBoxesOnPe(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1694
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:22
void sendComputeGlobalConfig(ComputeGlobalConfigMsg *)
void assignPatches(ComputeMgr *computeMgrIn)
void recvNonbondedCUDASlaveReady(int, int, int)
Definition: ComputeMgr.C:1530
char SMDFile[NAMD_FILENAME_BUFFER_SIZE]
CudaComputeNonbonded * getCudaComputeNonbonded()
int type()
Definition: Compute.h:48
void recvSkipPatchesOnPe(CudaComputeNonbondedMsg *msg)
Definition: ComputeMgr.C:1607
CudaComputeNonbonded * createCudaComputeNonbonded(ComputeID c)
static int nodecount
Definition: ProxyMgr.h:398
int node(ComputeID cid)
Definition: ComputeMap.h:106
int numPids(ComputeID cid)
Definition: ComputeMap.C:103
Vector * consForce
Definition: Molecule.h:614
void recvNonbondedCUDASlaveSkip(NonbondedCUDASkipMsg *)
Definition: ComputeMgr.C:1547
void sendComputeGlobalData(ComputeGlobalDataMsg *)
Definition: ComputeMgr.C:1272
int pid(ComputeID cid, int i)
Definition: ComputeMap.C:109
virtual void patchReady(PatchID, int doneMigration, int seq)
Definition: Compute.C:63
int size(void) const
Definition: ResizeArray.h:127
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
void recvBuildMICForceTable()
Definition: ComputeMgr.C:1499
int trans(ComputeID cid, int i)
Definition: ComputeMap.C:114
void sendOpenBoxesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1639
void updateComputes3()
Definition: ComputeMgr.C:165
void sendFinishPatchOnPe(int pe, CudaComputeNonbonded *c, int i, PatchID patchID)
Definition: ComputeMgr.C:1626
void sendNonbondedMICSlaveSkip(ComputeNonbondedMIC *c, int)
Definition: ComputeMgr.C:1841
int isMICProcessor(int)
Definition: ComputeMgr.C:1885
void sendBuildMICForceTable()
Definition: ComputeMgr.C:1486
Molecule * molecule
Definition: Node.h:176
void updateComputes(int, CkGroupID)
Definition: ComputeMgr.C:142
int gridsize_b(void) const
Definition: PatchMap.h:65
const ComputeID cid
Definition: Compute.h:43
void recvBuildCudaForceTable()
Definition: ComputeMgr.C:1480
void sendComputeEwaldResults(ComputeEwaldMsg *)
Definition: ComputeMgr.C:1348
int mic_device_pe()
colvarproxy_namd GlobalMasterColvars
void sendAssignPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1586
void sendCreateNonbondedMICSlave(int, int)
Definition: ComputeMgr.C:1811
void recvBuildCudaExclusions()
Definition: ComputeMgr.C:1461
void splitComputes2(CkQdMsg *)
Definition: ComputeMgr.C:209
ResizeArray< int > localHostedPatches
#define PATCH_PRIORITY(PID)
Definition: Priorities.h:25
for(int i=0;i< n1;++i)
void sendSkipPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
Definition: ComputeMgr.C:1599
NodeID newNode(ComputeID cid)
Definition: ComputeMap.h:116
int proxySendSpanning
Definition: ProxyMgr.C:45
void register_cuda_compute_pair(ComputeID c, PatchID pid[], int t[])