NAMD
WorkDistrib.C
Go to the documentation of this file.
1 
7 /*****************************************************************************
8  * $Source: /home/cvs/namd/cvsroot/namd2/src/WorkDistrib.C,v $
9  * $Author: jim $
10  * $Date: 2017/03/30 20:06:17 $
11  * $Revision: 1.1291 $
12  *****************************************************************************/
13 
20 #include <stdio.h>
21 
22 #include "InfoStream.h"
23 #include "Communicate.h"
24 #include "ProcessorPrivate.h"
25 #include "BOCgroup.h"
26 #include "WorkDistrib.decl.h"
27 #include "WorkDistrib.h"
28 #include "Lattice.h"
29 #include "ComputeMsmMsa.h" // needed for MsmMsaData definition
30 #include "main.decl.h"
31 #include "main.h"
32 #include "Node.h"
33 #include "PatchMgr.h"
34 #include "PatchMap.inl"
35 #include "NamdTypes.h"
36 #include "PDB.h"
37 #include "SimParameters.h"
38 #include "Molecule.h"
39 #include "NamdOneTools.h"
40 #include "Compute.h"
41 #include "ComputeMap.h"
42 #include "RecBisection.h"
43 #include "Random.h"
44 #include "varsizemsg.h"
45 #include "ProxyMgr.h"
46 #include "Priorities.h"
47 #include "SortAtoms.h"
48 #include <algorithm>
49 #include "TopoManager.h"
50 #include "ComputePmeCUDAMgr.h"
51 #include "ConfigList.h"
52 
53 #include "DeviceCUDA.h"
54 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
55 #ifdef WIN32
56 #define __thread __declspec(thread)
57 #endif
58 extern __thread DeviceCUDA *deviceCUDA;
59 #endif
60 
61 //#define DEBUGM
62 #define MIN_DEBUG_LEVEL 2
63 #include "Debug.h"
64 #ifdef MEM_OPT_VERSION
65 extern int isOutputProcessor(int);
66 #endif
67 class ComputeMapChangeMsg : public CMessage_ComputeMapChangeMsg
68 {
69 public:
70 
73  int *newNodes;
75 
76 // VARSIZE_DECL(ComputeMapChangeMsg);
77 };
78 
79 /*
80 VARSIZE_MSG(ComputeMapChangeMsg,
81  VARSIZE_ARRAY(newNodes);
82 )
83 */
84 
85 static int randtopo;
86 
87 static void build_ordering(void *) {
89 }
90 
91 void topo_getargs(char **argv) {
92  randtopo = CmiGetArgFlag(argv, "+randtopo");
93  if ( CkMyPe() >= CkNumPes() ) return;
94 #if CCD_COND_FN_EXISTS
95  CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdCondFn)build_ordering, (void*)0);
96 #else
97  CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdVoidFn)build_ordering, (void*)0);
98 #endif
99 }
100 
102 
103 //======================================================================
104 // Public functions
105 //----------------------------------------------------------------------
107 {
108  CkpvAccess(BOCclass_group).workDistrib = thisgroup;
109  patchMapArrived = false;
110  computeMapArrived = false;
111 
112 #if CMK_SMP
113 #define MACHINE_PROGRESS
114 #else
115 #define MACHINE_PROGRESS { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }
116  if ( CkMyNodeSize() > 1 ) NAMD_bug("CkMyNodeSize() > 1 for non-smp build");
117  eventMachineProgress = traceRegisterUserEvent("CmiMachineProgressImpl",233);
118 #endif
119 }
120 
121 //----------------------------------------------------------------------
123 { }
124 
125 static int compare_bit_reversed(int a, int b) {
126  int d = a ^ b;
127  int c = 1;
128  if ( d ) while ( ! (d & c) ) {
129  c = c << 1;
130  }
131  return (a & c) - (b & c);
132 }
133 
134 static bool less_than_bit_reversed(int a, int b) {
135  int d = a ^ b;
136  int c = 1;
137  if ( d ) while ( ! (d & c) ) {
138  c = c << 1;
139  }
140  return d && (b & c);
141 }
142 
146  inline bool operator() (int a, int b) const {
147  int c = compare_bit_reversed(CmiRankOf(a),CmiRankOf(b));
148  if ( c < 0 ) return true;
149  if ( c > 0 ) return false;
151  rankInPhysOfNode[CmiNodeOf(a)],rankInPhysOfNode[CmiNodeOf(b)]);
152  if ( c < 0 ) return true;
153  if ( c > 0 ) return false;
154  c = compare_bit_reversed(CmiPhysicalNodeID(a),CmiPhysicalNodeID(b));
155  return ( c < 0 );
156  }
157 };
158 
164 
165 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
166 extern void cuda_initialize();
167 #endif
168 
169 void mic_initialize();
170 
172  //CkPrintf("WorkDistrib::peOrderingReady on %d\n", CkMyPe());
173 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
174  cuda_initialize();
175 #endif
176 #ifdef NAMD_MIC
177  mic_initialize();
178 #endif
179 }
180 
182 
183  CmiMemLock();
184  if ( ! peOrderingInit ) {
185  //CkPrintf("WorkDistrib::buildNodeAwarePeOrdering on pe %d\n", CkMyPe());
186 
187  const int numPhys = CmiNumPhysicalNodes();
188  const int numNode = CmiNumNodes();
189  const int numPe = CmiNumPes();
190  ResizeArray<int> numNodeInPhys(numPhys);
191  ResizeArray<int> rankInPhysOfNode(numNode);
192 
193  peDiffuseOrdering = new int[numPe];
194  peDiffuseOrderingIndex = new int[numPe];
195  peCompactOrdering = new int[numPe];
196  peCompactOrderingIndex = new int[numPe];
197 
198  int k = 0;
199  for ( int ph=0; ph<numPhys; ++ph ) {
200  int *pes, npes;
201  CmiGetPesOnPhysicalNode(ph, &pes, &npes);
202  for ( int i=0; i<npes; ++i, ++k ) {
203  peCompactOrdering[k] = pes[i];
204  }
205  numNodeInPhys[ph] = 0;
206  for ( int i=0, j=0; i<npes; i += CmiNodeSize(CmiNodeOf(pes[i])), ++j ) {
207  rankInPhysOfNode[CmiNodeOf(pes[i])] = j;
208  numNodeInPhys[ph] += 1;
209  }
210  }
211 
212  if ( randtopo && numPhys > 2 ) {
213  if ( ! CkMyNode() ) {
214  iout << iWARN << "RANDOMIZING PHYSICAL NODE ORDERING\n" << endi;
215  }
216  ResizeArray<int> randPhysOrder(numPhys);
217  for ( int j=0; j<numPhys; ++j ) {
218  randPhysOrder[j] = j;
219  }
220  Random(314159265).reorder(randPhysOrder.begin()+2, numPhys-2);
221  for ( int j=0, k=0; j<numPhys; ++j ) {
222  const int ph = randPhysOrder[j];
223  int *pes, npes;
224  CmiGetPesOnPhysicalNode(ph, &pes, &npes);
225  for ( int i=0; i<npes; ++i, ++k ) {
226  peCompactOrdering[k] = pes[i];
227  }
228  }
229  }
230 
231  for ( int i=0; i<numPe; ++i ) {
232  peDiffuseOrdering[i] = i;
233  }
234  std::sort(peDiffuseOrdering, peDiffuseOrdering+numPe,
235  pe_sortop_bit_reversed(rankInPhysOfNode.begin()));
236 
237  for ( int i=0; i<numPe; ++i ) {
240  }
241 
242  if ( 0 && CmiMyNode() == 0 ) for ( int i=0; i<numPe; ++i ) {
243  CkPrintf("order %5d %5d %5d %5d %5d\n", i,
248  }
249 
250  peOrderingInit = 1;
251  }
252  CmiMemUnlock();
253  peOrderingReady();
254 
255 }
256 
260  inline bool operator() (int a, int b) const {
261  return ( spos[a].x < spos[b].x );
262  }
263 };
264 
268  inline bool operator() (int a, int b) const {
269  return ( spos[a].y < spos[b].y );
270  }
271 };
272 
274  int x_begin, int x_end, int y_begin, int y_end,
275  int *pe_begin, ScaledPosition *coord,
276  int *result, int ydim
277  ) {
278  int x_len = x_end - x_begin;
279  int y_len = y_end - y_begin;
280  if ( x_len == 1 && y_len == 1 ) {
281  // done, now put this pe in the right place
282  if ( 0 ) CkPrintf("pme %5d %5d on pe %5d at %f %f\n", x_begin, y_begin, *pe_begin,
283  coord[*pe_begin].x, coord[*pe_begin].y);
284  result[x_begin*ydim + y_begin] = *pe_begin;
285  return;
286  }
287  int *pe_end = pe_begin + x_len * y_len;
288  if ( x_len >= y_len ) {
289  std::sort(pe_begin, pe_end, pe_sortop_coord_x(coord));
290  int x_split = x_begin + x_len / 2;
291  int* pe_split = pe_begin + (x_split - x_begin) * y_len;
292  //CkPrintf("x_split %5d %5d %5d\n", x_begin, x_split, x_end);
293  recursive_bisect_coord(x_begin, x_split, y_begin, y_end, pe_begin, coord, result, ydim);
294  recursive_bisect_coord(x_split, x_end, y_begin, y_end, pe_split, coord, result, ydim);
295  } else {
296  std::sort(pe_begin, pe_end, pe_sortop_coord_y(coord));
297  int y_split = y_begin + y_len / 2;
298  int* pe_split = pe_begin + (y_split - y_begin) * x_len;
299  //CkPrintf("y_split %5d %5d %5d\n", y_begin, y_split, y_end);
300  recursive_bisect_coord(x_begin, x_end, y_begin, y_split, pe_begin, coord, result, ydim);
301  recursive_bisect_coord(x_begin, x_end, y_split, y_end, pe_split, coord, result, ydim);
302  }
303 }
304 
305 void WorkDistrib::sortPmePes(int *pmepes, int xdim, int ydim) {
306  int numpes = CkNumPes();
307  ResizeArray<int> count(numpes);
308  ResizeArray<ScaledPosition> sumPos(numpes);
309  ResizeArray<ScaledPosition> avgPos(numpes);
310  for ( int i=0; i<numpes; ++i ) {
311  count[i] = 0;
312  sumPos[i] = 0;
313  avgPos[i] = 0;
314  }
315  PatchMap *patchMap = PatchMap::Object();
316  for ( int i=0, npatches=patchMap->numPatches(); i<npatches; ++i ) {
317  int pe = patchMap->node(i);
318  count[pe] += 1;
319  sumPos[pe] += patchMap->center(i);
320  }
321  const int npmepes = xdim*ydim;
322  ResizeArray<int> sortpes(npmepes);
323  for ( int i=0; i<npmepes; ++i ) {
324  int pe = sortpes[i] = pmepes[i];
325  int cnt = count[pe];
326  ScaledPosition sum = sumPos[pe];
327  if ( cnt == 0 ) {
328  // average over node
329  int node = CkNodeOf(pe);
330  int nsize = CkNodeSize(node);
331  int pe2 = CkNodeFirst(node);
332  for ( int j=0; j<nsize; ++j, ++pe2 ) {
333  cnt += count[pe2];
334  sum += sumPos[pe2];
335  }
336  }
337  if ( cnt == 0 ) {
338  // average over physical node
339  int node = CmiPhysicalNodeID(pe);
340  int nsize, *nlist;
341  CmiGetPesOnPhysicalNode(node, &nlist, &nsize);
342  for ( int j=0; j<nsize; ++j ) {
343  int pe2 = nlist[j];
344  cnt += count[pe2];
345  sum += sumPos[pe2];
346  }
347  }
348  if ( cnt ) {
349  avgPos[pe] = sum / cnt;
350  }
351  }
352  recursive_bisect_coord(0, xdim, 0, ydim, sortpes.begin(), avgPos.begin(), pmepes, ydim);
353 }
354 
355 
356 //----------------------------------------------------------------------
357 void WorkDistrib::saveComputeMapChanges(int ep, CkGroupID chareID)
358 {
359  saveComputeMapReturnEP = ep;
360  saveComputeMapReturnChareID = chareID;
361 
362  ComputeMapChangeMsg *mapMsg = new (0, 0, 0) ComputeMapChangeMsg;
363  CProxy_WorkDistrib(thisgroup).recvComputeMapChanges(mapMsg);
364 
365 /*
366  // store the latest compute map
367  SimParameters *simParams = Node::Object()->simParameters;
368  if (simParams->storeComputeMap) {
369  computeMap->saveComputeMap(simParams->computeMapFilename);
370  CkPrintf("ComputeMap has been stored in %s.\n", simParams->computeMapFilename);
371  }
372 */
373 }
374 
376 
377  delete msg;
378 
379  ComputeMap *computeMap = ComputeMap::Object();
380 
381  int i;
382  int nc = computeMap->numComputes();
383 
384  if ( ! CkMyPe() ) { // send
385  // CkPrintf("At %f on %d WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
386  MOStream *msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, COMPUTEMAPTAG, BUFSIZE);
387  msg->put(nc);
388  for (i=0; i<nc; i++) {
389  int data = computeMap->newNode(i);
390  msg->put(data);
391  }
392  msg->put(nc);
393  for (i=0; i<nc; i++) {
394  char data = computeMap->newNumPartitions(i);
395  msg->put(data);
396  }
397  msg->put(nc);
398  msg->end();
399  delete msg;
400  // CkPrintf("At %f on %d done WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
401  } else if ( ! CkMyRank() ) { // receive
402  // if ( CkMyNode() == 1 ) CkPrintf("At %f on %d WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
403  MIStream *msg = CkpvAccess(comm)->newInputStream(0, COMPUTEMAPTAG);
404  msg->get(i);
405  if ( i != nc ) NAMD_bug("WorkDistrib::recvComputeMapChanges check 1 failed\n");
406  for (i=0; i<nc; i++) {
407  int data;
408  msg->get(data);
409  computeMap->setNewNode(i,data);
410  }
411  msg->get(i);
412  if ( i != nc ) NAMD_bug("WorkDistrib::recvComputeMapChanges check 2 failed\n");
413  for (i=0; i<nc; i++) {
414  char data;
415  msg->get(data);
416  computeMap->setNewNumPartitions(i,data);
417  }
418  msg->get(i);
419  if ( i != nc ) NAMD_bug("WorkDistrib::recvComputeMapChanges check 3 failed\n");
420  delete msg;
421  // if ( CkMyNode() == 1 ) CkPrintf("At %f on %d done WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
422  }
423 
424  CkCallback cb(CkIndex_WorkDistrib::doneSaveComputeMap(NULL), 0, thisgroup);
425  contribute(0, NULL, CkReduction::random, cb);
426 }
427 
428 void WorkDistrib::doneSaveComputeMap(CkReductionMsg *msg) {
429  delete msg;
430 
431  CkSendMsgBranch(saveComputeMapReturnEP, CkAllocMsg(0,0,0), 0, saveComputeMapReturnChareID);
432 }
433 
434 #ifdef MEM_OPT_VERSION
435 //All basic info already exists for each atom inside the FullAtomList because
436 //it is loaded when reading the binary per-atom file. This function will fill
437 //the info regarding transform, nonbondedGroupSize etc. Refer to
438 //WorkDistrib::createAtomLists
439 void WorkDistrib::fillAtomListForOnePatch(int pid, FullAtomList &alist){
440  PatchMap *patchMap = PatchMap::Object();
441 
442  ScaledPosition center(0.5*(patchMap->min_a(pid)+patchMap->max_a(pid)),
443  0.5*(patchMap->min_b(pid)+patchMap->max_b(pid)),
444  0.5*(patchMap->min_c(pid)+patchMap->max_c(pid)));
445 
446  int n = alist.size();
447  FullAtom *a = alist.begin();
448 /*
449  //Those options are not supported in MEM_OPT_VERSIOn -Chao Mei
450 //Modifications for alchemical fep
451  Bool alchFepOn = params->alchFepOn;
452  Bool alchThermIntOn = params->alchThermIntOn;
453 //fepe
454  Bool lesOn = params->lesOn;
455  Bool pairInteractionOn = params->pairInteractionOn;
456 
457  Bool pressureProfileTypes = (params->pressureProfileAtomTypes > 1);
458 */
460  const Lattice lattice = params->lattice;
461  Transform mother_transform;
462  for(int j=0; j < n; j++)
463  {
464  int aid = a[j].id;
465  a[j].nonbondedGroupSize = 0; // must be set based on coordinates
466 
467  // a[j].fixedPosition = a[j].position; ParallelIOMgr stores ref coord here.
468 
469  if ( a[j].migrationGroupSize ) {
470  if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
471  Position pos = a[j].position;
472  int mgs = a[j].migrationGroupSize;
473  int c = 1;
474 
475  for ( int k=a[j].hydrogenGroupSize; k<mgs;
476  k+=a[j+k].hydrogenGroupSize ) {
477  pos += a[j+k].position;
478  ++c;
479  }
480 
481  pos *= 1./c;
482  mother_transform = a[j].transform; // should be 0,0,0
483  pos = lattice.nearest(pos,center,&mother_transform);
484  a[j].position = lattice.apply_transform(a[j].position,mother_transform);
485  a[j].transform = mother_transform;
486  } else {
487  a[j].position = lattice.nearest(a[j].position, center, &(a[j].transform));
488  mother_transform = a[j].transform;
489  }
490  } else {
491  a[j].position = lattice.apply_transform(a[j].position,mother_transform);
492  a[j].transform = mother_transform;
493  }
494 
495 /*
496  //Those options are not supported in MEM_OPT_VERSIOn -Chao Mei
497 //Modifications for alchemical fep
498  if ( alchOn || lesOn || pairInteractionOn || pressureProfileTypes) {
499  a[j].partition = molecule->get_fep_type(aid);
500  }
501  else {
502  a[j].partition = 0;
503  }
504 //fepe
505 */
506  a[j].partition = 0;
507 
508  //set langevinParams based on atom status
509  if(params->langevinOn) {
510  BigReal bval = params->langevinDamping;
511  if(!params->langevinHydrogen &&
512  ((a[j].status & HydrogenAtom)!=0)) {
513  bval = 0;
514  }else if ((a[j].status & LonepairAtom)!=0) {
515  bval = 0;
516  }else if ((a[j].status & DrudeAtom)!=0) {
517  bval = params->langevinDamping;
518  }
519  a[j].langevinParam = bval;
520  }
521 
522  }
523 
524  // DH - promote water checking to take place before simulation
525  // Flag the Settle water molecules.
526  int size, allfixed;
527  const WaterModel watmodel = params->watmodel;
528  const int wathgsize = getWaterModelGroupSize(watmodel);
529  const int fixedAtomsOn = params->fixedAtomsOn;
530  const int useSettle = params->useSettle;
531  for(int j=0; j < n; j+=size) {
532  size = a[j].hydrogenGroupSize;
533  if ( ! size ) {
534  NAMD_bug("Mother atom with hydrogenGroupSize of 0!");
535  }
536  allfixed = 1;
537  for (int k = 0; k < size; ++k ) {
538  allfixed = ( allfixed && (a[j+k].atomFixed) );
539  }
540  for (int k = 0; k < size; ++k ) {
541  a[j+k].groupFixed = allfixed ? 1 : 0;
542  }
543  // DH - Set isWater flag for SETTLE water molecules,
544  // based on same condition as used in HomePatch::buildRattleList():
545  // The rigidBondLength of the FIRST water atom is set greater than 0.
546  if (a[j].rigidBondLength > 0) {
547  if (size != wathgsize) {
548  char errmsg[256];
549  sprintf(errmsg,
550  "Water molecule starting with atom %d contains %d atoms "
551  "but the specified water model requires %d atoms.\n",
552  a[j].id+1, size, wathgsize
553  );
554  NAMD_die(errmsg);
555  }
556  int anyfixed = 0;
557  for (int k = 0; k < size; k++) {
558  anyfixed += ( fixedAtomsOn && a[j+k].atomFixed );
559  }
560  if (useSettle && !anyfixed) {
561  for (int k = 0; k < size; k++) {
562  a[j+k].isWater = 1;
563  }
564  }
565  }
566  }
567 
568  if ( params->outputPatchDetails ) {
569  int patchId = pid;
570  int numAtomsInPatch = n;
571  int numFixedAtomsInPatch = 0;
572  int numAtomsInFixedGroupsInPatch = 0;
573  for(int j=0; j < n; j++) {
574  numFixedAtomsInPatch += ( a[j].atomFixed ? 1 : 0 );
575  numAtomsInFixedGroupsInPatch += ( a[j].groupFixed ? 1 : 0 );
576  }
577  iout << "PATCH_DETAILS:"
578  << " on proc " << CkMyPe()
579  << " patch " << patchId
580  << " atoms " << numAtomsInPatch
581  << " fixed_atoms " << numFixedAtomsInPatch
582  << " fixed_groups " << numAtomsInFixedGroupsInPatch
583  << "\n" << endi;
584  }
585 
586 }
587 
588 void WorkDistrib::random_velocities_parallel(BigReal Temp,InputAtomList &inAtoms)
589 {
590  int i, j; // Loop counter
591  BigReal kbT; // Boltzman constant * Temp
592  BigReal randnum; // Random number from -6.0 to 6.0
593  BigReal kbToverM; // sqrt(Kb*Temp/Mass)
595  Bool lesOn = simParams->lesOn;
596  Random vel_random(simParams->randomSeed);
597  int lesReduceTemp = lesOn && simParams->lesReduceTemp;
598  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
599 
600  kbT = Temp*BOLTZMANN;
601  int count=0;
602  int totalAtoms = inAtoms.size();
603  for(i=0;i<totalAtoms;i++)
604  {
605  Real atomMs=inAtoms[i].mass;
606 
607  if (atomMs <= 0.) {
608  kbToverM = 0.;
609  } else {
610  /*
611  * lesOn is not supported in MEM_OPT_VERSION, so the original assignment
612  * is simplified. --Chao Mei
613  */
614  //kbToverM = sqrt(kbT *
615  //( lesOn && structure->get_fep_type(aid) ? tempFactor : 1.0 ) /
616  // atomMs );
617  kbToverM = sqrt(kbT * 1.0 / atomMs);
618  }
619  for (randnum=0.0, j=0; j<12; j++)
620  {
621  randnum += vel_random.uniform();
622  }
623 
624  randnum -= 6.0;
625 
626  inAtoms[i].velocity.x = randnum*kbToverM;
627 
628  for (randnum=0.0, j=0; j<12; j++)
629  {
630  randnum += vel_random.uniform();
631  }
632 
633  randnum -= 6.0;
634 
635  inAtoms[i].velocity.y = randnum*kbToverM;
636 
637  for (randnum=0.0, j=0; j<12; j++)
638  {
639  randnum += vel_random.uniform();
640  }
641 
642  randnum -= 6.0;
643 
644  inAtoms[i].velocity.z = randnum*kbToverM;
645  }
646 }
647 #endif
648 
649 //----------------------------------------------------------------------
650 // This should only be called on node 0.
651 //----------------------------------------------------------------------
653 {
654  int i;
655  StringList *current; // Pointer used to retrieve configuration items
656  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
657  Node *node = nd.ckLocalBranch();
658  PatchMap *patchMap = PatchMap::Object();
659  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
660  PatchMgr *patchMgr = pm.ckLocalBranch();
661  SimParameters *params = node->simParameters;
662  Molecule *molecule = node->molecule;
663  PDB *pdb = node->pdb;
664 
665  int numPatches = patchMap->numPatches();
666  int numAtoms = pdb->num_atoms();
667 
668  Vector *positions = new Position[numAtoms];
669  Vector *velocities = new Velocity[numAtoms];
670 
671  if ( basename ) {
672  if ( params->binaryOutput ) {
673  read_binary_file((std::string(basename)+".coor").c_str(), positions, numAtoms);
674  read_binary_file((std::string(basename)+".vel").c_str(), velocities, numAtoms);
675  } else {
676  PDB coorpdb((std::string(basename)+".coor").c_str());
677  if ( coorpdb.num_atoms() != numAtoms ) {
678  NAMD_die("Incorrect atom count in coordinate pdb file");
679  }
680  coorpdb.get_all_positions(positions);
681  velocities_from_PDB((std::string(basename)+".vel").c_str(), velocities, numAtoms);
682  }
683  } else {
684  pdb->get_all_positions(positions);
685 
686  if ( params->initialTemp < 0.0 ) {
687  Bool binvels=FALSE;
688 
689  // Reading the veolcities from a PDB
690  current = node->configList->find("velocities");
691 
692  if (current == NULL) {
693  current = node->configList->find("binvelocities");
694  binvels = TRUE;
695  }
696 
697  if (!binvels) {
698  velocities_from_PDB(current->data, velocities, numAtoms);
699  }
700  else {
701  velocities_from_binfile(current->data, velocities, numAtoms);
702  }
703  }
704  else {
705  // Random velocities for a given temperature
706  random_velocities(params->initialTemp, molecule, velocities, numAtoms);
707  }
708  }
709 
710  // If COMMotion == no, remove center of mass motion
711  if (!(params->comMove)) {
712  remove_com_motion(velocities, molecule, numAtoms);
713  }
714 
715  FullAtomList *atoms = new FullAtomList[numPatches];
716 
717  const Lattice lattice = params->lattice;
718 
719  if ( params->staticAtomAssignment ) {
720  FullAtomList sortAtoms;
721  for ( i=0; i < numAtoms; i++ ) {
722  HydrogenGroupID &h = molecule->hydrogenGroup[i];
723  if ( ! h.isMP ) continue;
724  FullAtom a;
725  a.id = i;
727  a.position = positions[h.atomID];
728  sortAtoms.add(a);
729  }
730  int *order = new int[sortAtoms.size()];
731  for ( i=0; i < sortAtoms.size(); i++ ) {
732  order[i] = i;
733  }
734  int *breaks = new int[numPatches];
735  sortAtomsForPatches(order,breaks,sortAtoms.begin(),
736  sortAtoms.size(),numAtoms,
737  patchMap->gridsize_c(),
738  patchMap->gridsize_b(),
739  patchMap->gridsize_a());
740 
741  i = 0;
742  for ( int pid = 0; pid < numPatches; ++pid ) {
743  int iend = breaks[pid];
744  for ( ; i<iend; ++i ) {
745  FullAtom &sa = sortAtoms[order[i]];
746  int mgs = sa.migrationGroupSize;
747 /*
748 CkPrintf("patch %d (%d %d %d) has group %d atom %d size %d at %.2f %.2f %.2f\n",
749  pid, patchMap->index_a(pid), patchMap->index_b(pid),
750  patchMap->index_c(pid), order[i], sa.id, mgs,
751  sa.position.x, sa.position.y, sa.position.z);
752 */
753  for ( int k=0; k<mgs; ++k ) {
754  HydrogenGroupID &h = molecule->hydrogenGroup[sa.id + k];
755  int aid = h.atomID;
756  FullAtom a;
757  a.id = aid;
758  a.position = positions[aid];
759  a.velocity = velocities[aid];
760  a.vdwType = molecule->atomvdwtype(aid);
761  a.status = molecule->getAtoms()[aid].status;
762  a.langevinParam = molecule->langevin_param(aid);
763  a.hydrogenGroupSize = h.isGP ? h.atomsInGroup : 0;
765  if(params->rigidBonds != RIGID_NONE) {
766  a.rigidBondLength = molecule->rigid_bond_length(aid);
767  }else{
768  a.rigidBondLength = 0.0;
769  }
770  atoms[pid].add(a);
771  }
772  }
773 CkPrintf("patch %d (%d %d %d) has %d atoms\n",
774  pid, patchMap->index_a(pid), patchMap->index_b(pid),
775  patchMap->index_c(pid), atoms[pid].size());
776  }
777  delete [] order;
778  delete [] breaks;
779  } else
780  {
781  // split atoms into patches based on migration group and position
782  int aid, pid=0;
783  for(i=0; i < numAtoms; i++)
784  {
785  // Assign atoms to patches without splitting hydrogen groups.
786  // We know that the hydrogenGroup array is sorted with group parents
787  // listed first. Thus, only change the pid if an atom is a group parent.
788  HydrogenGroupID &h = molecule->hydrogenGroup[i];
789  aid = h.atomID;
790  FullAtom a;
791  a.id = aid;
792  a.position = positions[aid];
793  a.velocity = velocities[aid];
794  a.vdwType = molecule->atomvdwtype(aid);
795  a.status = molecule->getAtoms()[aid].status;
796  a.langevinParam = molecule->langevin_param(aid);
797  a.hydrogenGroupSize = h.isGP ? h.atomsInGroup : 0;
799  if(params->rigidBonds != RIGID_NONE) {
800  a.rigidBondLength = molecule->rigid_bond_length(aid);
801  }else{
802  a.rigidBondLength = 0.0;
803  }
804  if (h.isMP) {
805  pid = patchMap->assignToPatch(positions[aid],lattice);
806  } // else: don't change pid
807  atoms[pid].add(a);
808  }
809  }
810 
811  delete [] positions;
812  delete [] velocities;
813 
814  for(i=0; i < numPatches; i++)
815  {
816  ScaledPosition center(0.5*(patchMap->min_a(i)+patchMap->max_a(i)),
817  0.5*(patchMap->min_b(i)+patchMap->max_b(i)),
818  0.5*(patchMap->min_c(i)+patchMap->max_c(i)));
819 
820  int n = atoms[i].size();
821  FullAtom *a = atoms[i].begin();
822  int j;
823 //Modifications for alchemical fep
824  Bool alchOn = params->alchOn;
825 //fepe
826  Bool lesOn = params->lesOn;
827 
828  Bool pairInteractionOn = params->pairInteractionOn;
829 
830  Bool pressureProfileTypes = (params->pressureProfileAtomTypes > 1);
831 
832  Transform mother_transform;
833  for(j=0; j < n; j++)
834  {
835  int aid = a[j].id;
836 
837  a[j].nonbondedGroupSize = 0; // must be set based on coordinates
838 
839  a[j].atomFixed = molecule->is_atom_fixed(aid) ? 1 : 0;
840  a[j].fixedPosition = a[j].position;
841 
842  if ( a[j].migrationGroupSize ) {
843  if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
844  Position pos = a[j].position;
845  int mgs = a[j].migrationGroupSize;
846  int c = 1;
847  for ( int k=a[j].hydrogenGroupSize; k<mgs;
848  k+=a[j+k].hydrogenGroupSize ) {
849  pos += a[j+k].position;
850  ++c;
851  }
852  pos *= 1./c;
853  mother_transform = a[j].transform; // should be 0,0,0
854  pos = lattice.nearest(pos,center,&mother_transform);
855  a[j].position = lattice.apply_transform(a[j].position,mother_transform);
856  a[j].transform = mother_transform;
857  } else {
858  a[j].position = lattice.nearest(
859  a[j].position, center, &(a[j].transform));
860  mother_transform = a[j].transform;
861  }
862  } else {
863  a[j].position = lattice.apply_transform(a[j].position,mother_transform);
864  a[j].transform = mother_transform;
865  }
866 
867  a[j].mass = molecule->atommass(aid);
868  // Using double precision division for reciprocal mass.
869  a[j].recipMass = ( a[j].mass > 0 ? (1. / a[j].mass) : 0 );
870  a[j].charge = molecule->atomcharge(aid);
871 
872 //Modifications for alchemical fep
873  if ( alchOn || lesOn || pairInteractionOn || pressureProfileTypes) {
874  a[j].partition = molecule->get_fep_type(aid);
875  }
876  else {
877  a[j].partition = 0;
878  }
879 //fepe
880 
881  }
882 
883 #if 0
884  int size, allfixed, k;
885  for(j=0; j < n; j+=size) {
886  size = a[j].hydrogenGroupSize;
887  if ( ! size ) {
888  NAMD_bug("Mother atom with hydrogenGroupSize of 0!");
889  }
890  allfixed = 1;
891  for ( k = 0; k < size; ++k ) {
892  allfixed = ( allfixed && (a[j+k].atomFixed) );
893  }
894  for ( k = 0; k < size; ++k ) {
895  a[j+k].groupFixed = allfixed ? 1 : 0;
896  }
897  // DH - set isWater flag
898  // based on same condition as used for determining Settle:
899  // The rigidBondLength of the FIRST water atom is set greater than 0.
900  if (a[j].rigidBondLength > 0) {
901  for (k = 0; k < size; k++) {
902  a[j+k].isWater = 1;
903  }
904  }
905  }
906 #else
907  // DH - promote water checking to take place before simulation
908  // Flag the Settle water molecules.
909  int size, allfixed;
910  const WaterModel watmodel = params->watmodel;
911  const int wathgsize = getWaterModelGroupSize(watmodel);
912  const int fixedAtomsOn = params->fixedAtomsOn;
913  const int useSettle = params->useSettle;
914  for(int j=0; j < n; j+=size) {
915  size = a[j].hydrogenGroupSize;
916  if ( ! size ) {
917  NAMD_bug("Mother atom with hydrogenGroupSize of 0!");
918  }
919  allfixed = 1;
920  for (int k = 0; k < size; ++k ) {
921  allfixed = ( allfixed && (a[j+k].atomFixed) );
922  }
923  for (int k = 0; k < size; ++k ) {
924  a[j+k].groupFixed = allfixed ? 1 : 0;
925  }
926  // DH - Set isWater flag for SETTLE water molecules,
927  // based on same condition as used in HomePatch::buildRattleList():
928  // The rigidBondLength of the FIRST water atom is set greater than 0.
929  if (a[j].rigidBondLength > 0) {
930  if (size != wathgsize) {
931  char errmsg[256];
932  sprintf(errmsg,
933  "Water molecule starting with atom %d contains %d atoms "
934  "but the specified water model requires %d atoms.\n",
935  a[j].id+1, size, wathgsize
936  );
937  NAMD_die(errmsg);
938  }
939  int anyfixed = 0;
940  for (int k = 0; k < size; k++) {
941  anyfixed += ( fixedAtomsOn && a[j+k].atomFixed );
942  }
943  if (useSettle && !anyfixed) {
944  for (int k = 0; k < size; k++) {
945  a[j+k].isWater = 1;
946  }
947  }
948  }
949  }
950 #endif
951 
952  if ( params->outputPatchDetails ) {
953  int patchId = i;
954  int numAtomsInPatch = n;
955  int numFixedAtomsInPatch = 0;
956  int numAtomsInFixedGroupsInPatch = 0;
957  for(j=0; j < n; j++) {
958  numFixedAtomsInPatch += ( a[j].atomFixed ? 1 : 0 );
959  numAtomsInFixedGroupsInPatch += ( a[j].groupFixed ? 1 : 0 );
960  }
961  iout << "PATCH_DETAILS:"
962  << " patch " << patchId
963  << " atoms " << numAtomsInPatch
964  << " fixed_atoms " << numFixedAtomsInPatch
965  << " fixed_groups " << numAtomsInFixedGroupsInPatch
966  << "\n" << endi;
967  }
968  }
969 
970  return atoms;
971 
972 }
973 
974 //----------------------------------------------------------------------
975 // This should only be called on node 0.
976 //----------------------------------------------------------------------
978 {
979  int i;
980  PatchMap *patchMap = PatchMap::Object();
981  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
982  PatchMgr *patchMgr = pm.ckLocalBranch();
983 
984  int numPatches = patchMap->numPatches();
985 
986  FullAtomList *atoms = createAtomLists();
987 
988 #ifdef MEM_OPT_VERSION
989 /* CProxy_Node nd(CkpvAccess(BOCclass_group).node);
990  Node *node = nd.ckLocalBranch();
991  node->molecule->delEachAtomSigs();
992  node->molecule->delMassChargeSpace();
993 */
994 #endif
995 
996  int maxAtoms = -1;
997  int maxPatch = -1;
998  for(i=0; i < numPatches; i++) {
999  int numAtoms = atoms[i].size();
1000  if ( numAtoms > maxAtoms ) { maxAtoms = numAtoms; maxPatch = i; }
1001  }
1002  iout << iINFO << "LARGEST PATCH (" << maxPatch <<
1003  ") HAS " << maxAtoms << " ATOMS\n" << endi;
1004 
1005 #ifdef SHOW_HISTOGRAM_HGROUP_SIZES
1006  // histogram hydrogen group sizes
1007  int hgroupsize[9] = { 0 };
1008  int numhgroups = 0;
1009  int numwaters = 0;
1010  int maxhgroupsize = 0;
1011  for (i = 0; i < numPatches; i++) {
1012  const FullAtomList& a = atoms[i];
1013  int numAtoms = a.size();
1014  int hgs = 1; // init to something sane
1015  for (int j = 0; j < numAtoms; j += hgs) {
1016  hgs = a[j].hydrogenGroupSize;
1017  int histndx = ( hgs > 8 ? 8 : hgs );
1018  hgroupsize[ histndx ]++;
1019  numhgroups++;
1020  if (a[j].isWater) numwaters++;
1021  if (maxhgroupsize < hgs) maxhgroupsize = hgs;
1022  }
1023  }
1024  int hgslast = ( maxhgroupsize > 8 ? 8 : maxhgroupsize );
1025  printf("Number of hydrogen groups: %7d\n", numhgroups);
1026  printf("Number of settle water molecules: %7d\n", numwaters);
1027  printf("Number of remaining hydrogen groups: %7d\n", numhgroups - numwaters);
1028  printf("Largest hydrogen group size: %7d\n", maxhgroupsize);
1029  printf("Histogram of hydrogen group sizes:\n");
1030  int hgstotal = 0;
1031  for (i = 0; i <= hgslast; i++) {
1032  printf(" size %d count %d\n", i, hgroupsize[i]);
1033  hgstotal += hgroupsize[i];
1034  }
1035  printf("Checksum over hydrogen group sizes: %7d\n", hgstotal);
1036 #endif
1037 
1038  for(i=0; i < numPatches; i++)
1039  {
1040  if ( ! ( i % 100 ) )
1041  {
1042  DebugM(3,"Created " << i << " patches so far.\n");
1043  }
1044 
1045  patchMgr->createHomePatch(i,atoms[i]);
1046  }
1047 
1048  delete [] atoms;
1049 }
1050 
1052  // ref BOC
1053  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1054  Node *node = nd.ckLocalBranch();
1055  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
1056  PatchMgr *patchMgr = pm.ckLocalBranch();
1057  // ref singleton
1058  PatchMap *patchMap = PatchMap::Object();
1059 
1060  // Move patches to the proper node
1061  for(int i=0;i < patchMap->numPatches(); i++)
1062  {
1063  if (patchMap->node(i) != node->myid() )
1064  {
1065  DebugM(3,"patchMgr->movePatch("
1066  << i << "," << patchMap->node(i) << ")\n");
1067  patchMgr->movePatch(i,patchMap->node(i));
1068  }
1069  }
1070  patchMgr->sendMovePatches();
1071 }
1072 
1073 void WorkDistrib::reinitAtoms(const char *basename) {
1074 
1075  PatchMap *patchMap = PatchMap::Object();
1076  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
1077  PatchMgr *patchMgr = pm.ckLocalBranch();
1078 
1079  int numPatches = patchMap->numPatches();
1080 
1081  FullAtomList *atoms = createAtomLists(basename);
1082 
1083  for(int i=0; i < numPatches; i++) {
1084  patchMgr->sendAtoms(i,atoms[i]);
1085  }
1086 
1087  delete [] atoms;
1088 
1089 }
1090 
1091 
1092 //----------------------------------------------------------------------
1093 
1094 class PatchMapMsg : public CMessage_PatchMapMsg {
1095  public:
1097 };
1098 
1100 {
1101  if ( CkNumPes() == 1 ) {
1102  patchMapArrived = true;
1103  return;
1104  }
1105 
1106  //Automatically enable spanning tree
1107  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1108  Node *node = nd.ckLocalBranch();
1109  SimParameters *params = node->simParameters;
1110  if( ( PatchMap::Object()->numPatches() <= CkNumPes()/4
1111 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1112  || CkNumPes() > CkNumNodes()
1113  ) && ( CkNumNodes() > 1
1114 #endif
1115  ) && params->isSendSpanningTreeUnset() )
1117 
1118 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1119  if ( CkNumPes() > CkNumNodes() && CkNumNodes() > 1
1120  && params->isRecvSpanningTreeUnset() )
1122 #endif
1123 
1124  int size = PatchMap::Object()->packSize();
1125 
1126  PatchMapMsg *mapMsg = new (size, 0) PatchMapMsg;
1127 
1128  PatchMap::Object()->pack(mapMsg->patchMapData, size);
1129 
1130  CProxy_WorkDistrib workProxy(thisgroup);
1131  workProxy[0].savePatchMap(mapMsg);
1132 }
1133 
1134 // saveMaps() is called when the map message is received
1136 {
1137  // Use a resend to forward messages before processing. Otherwise the
1138  // map distribution is slow on many CPUs. We need to use a tree
1139  // rather than a broadcast because some implementations of broadcast
1140  // generate a copy of the message on the sender for each recipient.
1141  // This is because MPI doesn't allow re-use of an outstanding buffer.
1142 
1143  if ( CkMyRank() ) patchMapArrived = true;
1144 
1145  if ( patchMapArrived && CkMyPe() ) {
1147 
1148  //Automatically enable spanning tree
1149  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1150  Node *node = nd.ckLocalBranch();
1151  SimParameters *params = node->simParameters;
1152  if( ( PatchMap::Object()->numPatches() <= CkNumPes()/4
1153 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1154  || CkNumPes() > CkNumNodes()
1155  ) && ( CkNumNodes() > 1
1156 #endif
1157  ) && params->isSendSpanningTreeUnset() )
1159 
1160 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1161  if ( CkNumPes() > CkNumNodes() && CkNumNodes() > 1
1162  && params->isRecvSpanningTreeUnset() )
1164 #endif
1165  }
1166 
1167  if ( patchMapArrived ) {
1168  if ( CkMyRank() + 1 < CkNodeSize(CkMyNode()) ) {
1169  ((CProxy_WorkDistrib(thisgroup))[CkMyPe()+1]).savePatchMap(msg);
1170  } else {
1171  delete msg;
1172  }
1173  return;
1174  }
1175 
1176  patchMapArrived = true;
1177 
1178  int self = CkMyNode();
1179  int range_begin = 0;
1180  int range_end = CkNumNodes();
1181  while ( self != range_begin ) {
1182  ++range_begin;
1183  int split = range_begin + ( range_end - range_begin ) / 2;
1184  if ( self < split ) { range_end = split; }
1185  else { range_begin = split; }
1186  }
1187  int send_near = self + 1;
1188  int send_far = send_near + ( range_end - send_near ) / 2;
1189 
1190  int pids[3];
1191  int npid = 0;
1192  if ( send_far < range_end ) pids[npid++] = CkNodeFirst(send_far);
1193  if ( send_near < send_far ) pids[npid++] = CkNodeFirst(send_near);
1194  pids[npid++] = CkMyPe(); // always send the message to ourselves
1195  CProxy_WorkDistrib(thisgroup).savePatchMap(msg,npid,pids);
1196 }
1197 
1198 
1200 {
1201  if ( CkMyRank() ) return;
1202 
1203  if ( CkNumNodes() == 1 ) {
1204  computeMapArrived = true;
1206  return;
1207  }
1208 
1209  if ( ! CkMyPe() ) { // send
1210  MOStream *msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, COMPUTEMAPTAG, BUFSIZE);
1211  ComputeMap::Object()->pack(msg);
1212  msg->end();
1213  delete msg;
1214  } else if ( ! CkMyRank() ) { // receive
1215  MIStream *msg = CkpvAccess(comm)->newInputStream(0, COMPUTEMAPTAG);
1216  ComputeMap::Object()->unpack(msg);
1217  delete msg;
1218  }
1219 
1220  computeMapArrived = true;
1222 }
1223 
1224 
1225 //----------------------------------------------------------------------
1227 {
1228  PatchMap *patchMap = PatchMap::Object();
1229  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1230  Node *node = nd.ckLocalBranch();
1231  SimParameters *params = node->simParameters;
1232  Lattice lattice = params->lattice;
1233 
1234  BigReal patchSize = params->patchDimension;
1235 
1236 #ifndef MEM_OPT_VERSION
1237  const int totalAtoms = node->pdb->num_atoms();
1238 #else
1239  const int totalAtoms = node->molecule->numAtoms;
1240 #endif
1241 
1242  ScaledPosition xmin, xmax;
1243 
1244  double maxNumPatches = 1.e9; // need to adjust fractional values
1245  if ( params->minAtomsPerPatch > 0 )
1246  maxNumPatches = totalAtoms / params->minAtomsPerPatch;
1247 
1248  DebugM(3,"Mapping patches\n");
1249  if ( lattice.a_p() && lattice.b_p() && lattice.c_p() ) {
1250  xmin = 0.; xmax = 0.;
1251  }
1252  else if ( params->FMAOn || params->MSMOn || params->FMMOn ) {
1253  // Need to use full box for FMA to match NAMD 1.X results.
1254 #if 0
1255  node->pdb->find_extremes(&(xmin.x),&(xmax.x),lattice.a_r());
1256  node->pdb->find_extremes(&(xmin.y),&(xmax.y),lattice.b_r());
1257  node->pdb->find_extremes(&(xmin.z),&(xmax.z),lattice.c_r());
1258 #endif
1259  node->pdb->find_extremes(lattice);
1260  node->pdb->get_extremes(xmin, xmax);
1261 #if 0
1262  printf("+++ center=%.4f %.4f %.4f\n",
1263  lattice.origin().x, lattice.origin().y, lattice.origin().z);
1264  printf("+++ xmin=%.4f xmax=%.4f\n", xmin.x, xmax.x);
1265  printf("+++ ymin=%.4f ymax=%.4f\n", xmin.y, xmax.y);
1266  printf("+++ zmin=%.4f zmax=%.4f\n", xmin.z, xmax.z);
1267 #endif
1268  // Otherwise, this allows a small number of stray atoms.
1269  }
1270  else {
1271 #if 0
1272  node->pdb->find_extremes(&(xmin.x),&(xmax.x),lattice.a_r(),0.9);
1273  node->pdb->find_extremes(&(xmin.y),&(xmax.y),lattice.b_r(),0.9);
1274  node->pdb->find_extremes(&(xmin.z),&(xmax.z),lattice.c_r(),0.9);
1275 #endif
1276  node->pdb->find_extremes(lattice, 1.0);
1277  node->pdb->get_extremes(xmin, xmax);
1278  iout << iINFO << "ORIGINAL ATOMS MINMAX IS " << xmin << " " << xmax << "\n" << endi;
1279  double frac = ( (double)totalAtoms - 10000. ) / (double)totalAtoms;
1280  if ( frac < 0.9 ) { frac = 0.9; }
1281  node->pdb->find_extremes(lattice, frac);
1282  node->pdb->get_extremes(xmin, xmax);
1283  iout << iINFO << "ADJUSTED ATOMS MINMAX IS " << xmin << " " << xmax << "\n" << endi;
1284  }
1285 
1286 #if 0
1287  BigReal origin_shift;
1288  origin_shift = lattice.a_r() * lattice.origin();
1289  xmin.x -= origin_shift;
1290  xmax.x -= origin_shift;
1291  origin_shift = lattice.b_r() * lattice.origin();
1292  xmin.y -= origin_shift;
1293  xmax.y -= origin_shift;
1294  origin_shift = lattice.c_r() * lattice.origin();
1295  xmin.z -= origin_shift;
1296  xmax.z -= origin_shift;
1297 #endif
1298 
1299  // SimParameters default is -1 for unset
1300  int twoAwayX = params->twoAwayX;
1301  int twoAwayY = params->twoAwayY;
1302  int twoAwayZ = params->twoAwayZ;
1303 
1304  // SASA implementation is not compatible with twoAway patches
1305  if (params->LCPOOn && patchSize < 32.4) {
1306  if ( twoAwayX > 0 || twoAwayY > 0 || twoAwayZ > 0 ) {
1307  iout << iWARN << "Ignoring twoAway[XYZ] due to LCPO SASA implementation.\n" << endi;
1308  }
1309  twoAwayX = twoAwayY = twoAwayZ = 0;
1310  }
1311 
1312  // if you think you know what you're doing go right ahead
1313  if ( twoAwayX > 0 ) maxNumPatches = 1.e9;
1314  if ( twoAwayY > 0 ) maxNumPatches = 1.e9;
1315  if ( twoAwayZ > 0 ) maxNumPatches = 1.e9;
1316  if ( params->maxPatches > 0 ) {
1317  maxNumPatches = params->maxPatches;
1318  iout << iINFO << "LIMITING NUMBER OF PATCHES TO " <<
1319  maxNumPatches << "\n" << endi;
1320  }
1321 
1322  int numpes = CkNumPes();
1323  SimParameters *simparam = Node::Object()->simParameters;
1324  if(simparam->simulateInitialMapping) {
1325  numpes = simparam->simulatedPEs;
1326  delete [] patchMap->nPatchesOnNode;
1327  patchMap->nPatchesOnNode = new int[numpes];
1328  memset(patchMap->nPatchesOnNode, 0, numpes*sizeof(int));
1329  }
1330 
1331 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
1332  // for CUDA be sure there are more patches than pes
1333 
1334  int numPatches = patchMap->sizeGrid(
1335  xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
1336  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1337  if ( numPatches < numpes && twoAwayX < 0 ) {
1338  twoAwayX = 1;
1339  numPatches = patchMap->sizeGrid(
1340  xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
1341  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1342  }
1343  if ( numPatches < numpes && twoAwayY < 0 ) {
1344  twoAwayY = 1;
1345  numPatches = patchMap->sizeGrid(
1346  xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
1347  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1348  }
1349  if ( numPatches < numpes && twoAwayZ < 0 ) {
1350  twoAwayZ = 1;
1351  numPatches = patchMap->sizeGrid(
1352  xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
1353  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1354  }
1355  if ( numPatches < numpes ) {
1356  #if defined(NAMD_MIC)
1357  NAMD_die("MIC-enabled NAMD requires at least one patch per thread.");
1358  #else
1359  if (simparam->CUDASOAintegrateMode) {
1360  NAMD_die("GPU-resident NAMD requires at least one patch per thread.");
1361  }
1362  #endif
1363  }
1364  if ( numPatches % numpes && numPatches <= 1.4 * numpes ) {
1365  int exactFit = numPatches - numPatches % numpes;
1366  int newNumPatches = patchMap->sizeGrid(
1367  xmin,xmax,lattice,patchSize,exactFit,params->staticAtomAssignment,
1368  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1369  if ( newNumPatches == exactFit ) {
1370  iout << iINFO << "REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" << endi;
1371  maxNumPatches = exactFit;
1372  }
1373  }
1374 
1375  patchMap->makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,
1376  params->staticAtomAssignment, params->replicaUniformPatchGrids, params->LCPOOn,
1377  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1378 
1379 #else
1380 
1381  int availPes = numpes;
1382  if ( params->noPatchesOnZero && numpes > 1 ) {
1383  availPes -= 1;
1384  if(params->noPatchesOnOne && numpes > 2)
1385  availPes -= 1;
1386  }
1387 #ifdef MEM_OPT_VERSION
1388  if(params->noPatchesOnOutputPEs && numpes - params->numoutputprocs >2)
1389  {
1390  availPes -= params->numoutputprocs;
1391  if ( params->noPatchesOnZero && numpes > 1 && isOutputProcessor(0)){
1392  availPes++;
1393  }
1394  if ( params->noPatchesOnOne && numpes > 2 && isOutputProcessor(1)){
1395  availPes++;
1396  }
1397  }
1398 #endif
1399 
1400  int numPatches = patchMap->sizeGrid(
1401  xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
1402  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1403  if ( ( numPatches > (0.3*availPes) || numPatches > maxNumPatches
1404  ) && twoAwayZ < 0 ) {
1405  twoAwayZ = 0;
1406  numPatches = patchMap->sizeGrid(
1407  xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
1408  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1409  }
1410  if ( ( numPatches > (0.6*availPes) || numPatches > maxNumPatches
1411  ) && twoAwayY < 0 ) {
1412  twoAwayY = 0;
1413  numPatches = patchMap->sizeGrid(
1414  xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
1415  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1416  }
1417  if ( ( numPatches > availPes || numPatches > maxNumPatches
1418  ) && twoAwayX < 0 ) {
1419  twoAwayX = 0;
1420  numPatches = patchMap->sizeGrid(
1421  xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
1422  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1423  }
1424  if ( numPatches > availPes && numPatches <= (1.4*availPes) && availPes <= maxNumPatches ) {
1425  int newNumPatches = patchMap->sizeGrid(
1426  xmin,xmax,lattice,patchSize,availPes,params->staticAtomAssignment,
1427  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1428  if ( newNumPatches <= availPes && numPatches <= (1.4*newNumPatches) ) {
1429  iout << iINFO << "REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" << endi;
1430  maxNumPatches = availPes;
1431  }
1432  }
1433 
1434  patchMap->makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,
1435  params->staticAtomAssignment, params->replicaUniformPatchGrids, params->LCPOOn,
1436  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1437 
1438 #endif
1439 
1440 }
1441 
1442 
1443 //----------------------------------------------------------------------
1445 {
1446  PatchMap *patchMap = PatchMap::Object();
1447  int nNodes = Node::Object()->numNodes();
1448  SimParameters *simparam = Node::Object()->simParameters;
1449  if(simparam->simulateInitialMapping) {
1450  nNodes = simparam->simulatedPEs;
1451  }
1452 
1453 #if (CMK_BLUEGENEP | CMK_BLUEGENEL) && USE_TOPOMAP
1454  TopoManager tmgr;
1455  int numPes = tmgr.getDimNX() * tmgr.getDimNY() * tmgr.getDimNZ();
1456  if (numPes > patchMap->numPatches() && (assignPatchesTopoGridRecBisection() > 0)) {
1457  CkPrintf ("Blue Gene/L topology partitioner finished successfully \n");
1458  }
1459  else
1460 #endif
1461  assignPatchesSpaceFillingCurve();
1462 
1463  int *nAtoms = new int[nNodes];
1464  int numAtoms=0;
1465  int i;
1466  for(i=0; i < nNodes; i++)
1467  nAtoms[i] = 0;
1468 
1469  for(i=0; i < patchMap->numPatches(); i++)
1470  {
1471  // iout << iINFO << "Patch " << i << " has "
1472  // << patchMap->patch(i)->getNumAtoms() << " atoms and "
1473  // << patchMap->patch(i)->getNumAtoms() *
1474  // patchMap->patch(i)->getNumAtoms()
1475  // << " pairs.\n" << endi;
1476 #ifdef MEM_OPT_VERSION
1477  numAtoms += patchMap->numAtoms(i);
1478  nAtoms[patchMap->node(i)] += patchMap->numAtoms(i);
1479 #else
1480  if (patchMap->patch(i)) {
1481  numAtoms += patchMap->patch(i)->getNumAtoms();
1482  nAtoms[patchMap->node(i)] += patchMap->patch(i)->getNumAtoms();
1483  }
1484 #endif
1485  }
1486 
1487  if ( numAtoms != Node::Object()->molecule->numAtoms ) {
1488  for(i=0; i < nNodes; i++)
1489  iout << iINFO << nAtoms[i] << " atoms assigned to node " << i << "\n" << endi;
1490  iout << iINFO << "Assigned " << numAtoms << " atoms but expected " << Node::Object()->molecule->numAtoms << "\n" << endi;
1491  NAMD_die("Incorrect atom count in WorkDistrib::assignNodeToPatch\n");
1492  }
1493 
1494  delete [] nAtoms;
1495 
1496  // PatchMap::Object()->printPatchMap();
1497 }
1498 
1499 //----------------------------------------------------------------------
1500 // void WorkDistrib::assignPatchesSlices()
1501 // {
1502 // int pid;
1503 // int assignedNode = 0;
1504 // PatchMap *patchMap = PatchMap::Object();
1505 // Node *node = CLocalBranch(Node, CkpvAccess(BOCclass_group).node);
1506 
1507 // int *numAtoms = new int[node->numNodes()];
1508 // for (int i=0; i<node->numNodes(); i++) {
1509 // numAtoms[i] = 0;
1510 // }
1511 
1512 // // Assign patch to node with least atoms assigned.
1513 // for(pid=0; pid < patchMap->numPatches(); pid++) {
1514 // assignedNode = 0;
1515 // for (i=1; i < node->numNodes(); i++) {
1516 // if (numAtoms[i] < numAtoms[assignedNode]) assignedNode = i;
1517 // }
1518 // patchMap->assignNode(pid, assignedNode);
1519 // numAtoms[assignedNode] += patchMap->patch(pid)->getNumAtoms();
1520 
1521 // /*
1522 // iout << iINFO << "Patch (" << pid << ") has "
1523 // << patchMap->patch(pid)->getNumAtoms()
1524 // << " atoms: Assigned to Node(" << assignedNode << ")\n"
1525 // << endi;
1526 // */
1527 // }
1528 
1529 // delete[] numAtoms;
1530 // }
1531 
1532 //----------------------------------------------------------------------
1533 void WorkDistrib::assignPatchesToLowestLoadNode()
1534 {
1535  int pid;
1536  int assignedNode = 0;
1537  PatchMap *patchMap = PatchMap::Object();
1538  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1539  Node *node = nd.ckLocalBranch();
1541  int ncpus = node->numNodes();
1542  if(simParams->simulateInitialMapping) {
1543  ncpus = simParams->simulatedPEs;
1544  }
1545 
1546  int *load = new int[ncpus];
1547  int *assignedNodes = new int[patchMap->numPatches()];
1548  for (int i=0; i<ncpus; i++) {
1549  load[i] = 0;
1550  }
1551  CkPrintf("assignPatchesToLowestLoadNode\n");
1552  int defaultNode = 0;
1553  if ( simParams->noPatchesOnZero && ncpus > 1 ){
1554  defaultNode = 1;
1555  if( simParams->noPatchesOnOne && ncpus > 2)
1556  defaultNode = 2;
1557  }
1558  // Assign patch to node with least atoms assigned.
1559  for(pid=0; pid < patchMap->numPatches(); pid++) {
1560  assignedNode = defaultNode;
1561  for (int i=assignedNode + 1; i < ncpus; i++) {
1562  if (load[i] < load[assignedNode]) assignedNode = i;
1563  }
1564  assignedNodes[pid] = assignedNode;
1565 #ifdef MEM_OPT_VERSION
1566  load[assignedNode] += patchMap->numAtoms(pid) + 1;
1567 #else
1568  load[assignedNode] += patchMap->patch(pid)->getNumAtoms() + 1;
1569 #endif
1570  }
1571 
1572  delete[] load;
1573  sortNodesAndAssign(assignedNodes);
1574  delete[] assignedNodes;
1575 }
1576 
1577 //----------------------------------------------------------------------
1578 void WorkDistrib::assignPatchesBitReversal()
1579 {
1580  int pid;
1581  PatchMap *patchMap = PatchMap::Object();
1582  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1583  Node *node = nd.ckLocalBranch();
1584  SimParameters *simparam = node->simParameters;
1585 
1586  int ncpus = node->numNodes();
1587  if(simparam->simulateInitialMapping) {
1588  ncpus = simparam->simulatedPEs;
1589  }
1590  int npatches = patchMap->numPatches();
1591  if ( ncpus <= npatches )
1592  NAMD_bug("WorkDistrib::assignPatchesBitReversal called improperly");
1593 
1594  SortableResizeArray<int> seq(ncpus);
1595  // avoid using node 0 (reverse of 0 is 0 so start at 1)
1596  for ( int i = 1; i < ncpus; ++i ) {
1597  seq[i-1] = peDiffuseOrdering[i];
1598  }
1599 
1600  // extract and sort patch locations
1601  sortNodesAndAssign(seq.begin());
1602  if ( ncpus > 2*npatches ) sortNodesAndAssign(seq.begin()+npatches, 1);
1603 }
1604 
1605 //----------------------------------------------------------------------
1606 struct nodesort {
1607  int node;
1608  int a_total;
1609  int b_total;
1610  int c_total;
1612  nodesort() : node(-1),a_total(0),b_total(0),c_total(0),npatches(0) { ; }
1613  int operator==(const nodesort &o) const {
1614  float a1 = ((float)a_total)/((float)npatches);
1615  float a2 = ((float)o.a_total)/((float)o.npatches);
1616  float b1 = ((float)b_total)/((float)npatches);
1617  float b2 = ((float)o.b_total)/((float)o.npatches);
1618  float c1 = ((float)c_total)/((float)npatches);
1619  float c2 = ((float)o.c_total)/((float)o.npatches);
1620  return ((a1 == a2) && (b1 == b2) && (c1 == c2));
1621  }
1622  int operator<(const nodesort &o) const {
1623  float a1 = ((float)a_total)/((float)npatches);
1624  float a2 = ((float)o.a_total)/((float)o.npatches);
1625  float b1 = ((float)b_total)/((float)npatches);
1626  float b2 = ((float)o.b_total)/((float)o.npatches);
1627  float c1 = ((float)c_total)/((float)npatches);
1628  float c2 = ((float)o.c_total)/((float)o.npatches);
1629  return ( (a1 < a2) || ((a1 == a2) && (b1 < b2)) ||
1630  ((a1 == a2) && (b1 == b2) && (c1 < c2)) );
1631  }
1632 };
1633 
1634 void WorkDistrib::sortNodesAndAssign(int *assignedNode, int baseNodes) {
1635  // if baseNodes is zero (default) then set both nodes and basenodes
1636  // if baseNodes is nonzero then this is a second call to set basenodes only
1637  int i, pid;
1638  PatchMap *patchMap = PatchMap::Object();
1639  int npatches = patchMap->numPatches();
1640  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1641  Node *node = nd.ckLocalBranch();
1642  int nnodes = node->numNodes();
1643  SimParameters *simparam = node->simParameters;
1644  if(simparam->simulateInitialMapping) {
1645  nnodes = simparam->simulatedPEs;
1646  }
1647 
1648  ResizeArray<nodesort> allnodes(nnodes);
1649  for ( i=0; i < nnodes; ++i ) {
1650  allnodes[i].node = i;
1651  }
1652  for ( pid=0; pid<npatches; ++pid ) {
1653  // iout << pid << " " << assignedNode[pid] << "\n" << endi;
1654  allnodes[assignedNode[pid]].npatches++;
1655  allnodes[assignedNode[pid]].a_total += patchMap->index_a(pid);
1656  allnodes[assignedNode[pid]].b_total += patchMap->index_b(pid);
1657  allnodes[assignedNode[pid]].c_total += patchMap->index_c(pid);
1658  }
1659  SortableResizeArray<nodesort> usednodes(nnodes);
1660  usednodes.resize(0);
1661  for ( i=0; i < nnodes; ++i ) {
1662  if ( allnodes[i].npatches ) usednodes.add(allnodes[i]);
1663  }
1664  usednodes.sort();
1665  int i2 = 0;
1666  for ( i=0; i < nnodes; ++i ) {
1667  int pe = peCompactOrdering[i];
1668  if ( allnodes[pe].npatches ) allnodes[usednodes[i2++].node].node = pe;
1669  }
1670 
1671  for ( pid=0; pid<npatches; ++pid ) {
1672  // iout << pid << " " << allnodes[assignedNode[pid]].node << "\n" << endi;
1673  if ( ! baseNodes ) {
1674  patchMap->assignNode(pid, allnodes[assignedNode[pid]].node);
1675  }
1676  patchMap->assignBaseNode(pid, allnodes[assignedNode[pid]].node);
1677  }
1678 }
1679 
1680 //----------------------------------------------------------------------
1681 void WorkDistrib::assignPatchesRoundRobin()
1682 {
1683  int pid;
1684  PatchMap *patchMap = PatchMap::Object();
1685  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1686  Node *node = nd.ckLocalBranch();
1687  SimParameters *simparam = node->simParameters;
1688  int ncpus = node->numNodes();
1689  if(simparam->simulateInitialMapping) {
1690  ncpus = simparam->simulatedPEs;
1691  }
1692  int *assignedNode = new int[patchMap->numPatches()];
1693 
1694  for(pid=0; pid < patchMap->numPatches(); pid++) {
1695  assignedNode[pid] = pid % ncpus;
1696  }
1697 
1698  sortNodesAndAssign(assignedNode);
1699  delete [] assignedNode;
1700 }
1701 
1702 //----------------------------------------------------------------------
1703 void WorkDistrib::assignPatchesRecursiveBisection()
1704 {
1705  PatchMap *patchMap = PatchMap::Object();
1706  int *assignedNode = new int[patchMap->numPatches()];
1708  int numNodes = Node::Object()->numNodes();
1709  if(simParams->simulateInitialMapping) {
1710  numNodes = simParams->simulatedPEs;
1711  }
1712 
1713  int usedNodes = numNodes;
1714  int unusedNodes = 0;
1715  CkPrintf("assignPatchesRecursiveBisection\n");
1716  if ( simParams->noPatchesOnZero && numNodes > 1 ){
1717  usedNodes -= 1;
1718  if(simParams->noPatchesOnOne && numNodes > 2)
1719  usedNodes -= 1;
1720  }
1721  unusedNodes = numNodes - usedNodes;
1722  RecBisection recBisec(usedNodes,PatchMap::Object());
1723  if ( recBisec.partition(assignedNode) ) {
1724  if ( unusedNodes !=0 ) {
1725  for ( int i=0; i<patchMap->numPatches(); ++i ) {
1726  assignedNode[i] += unusedNodes;
1727  }
1728  }
1729  sortNodesAndAssign(assignedNode);
1730  delete [] assignedNode;
1731  } else {
1732  //free the array here since a same array will be allocated
1733  //in assignPatchesToLowestLoadNode function, thus reducting
1734  //temporary memory usage
1735  delete [] assignedNode;
1736 
1737  iout << iWARN
1738  << "WorkDistrib: Recursive bisection fails, "
1739  << "invoking space-filling curve algorithm\n";
1740  assignPatchesSpaceFillingCurve();
1741  }
1742 }
1743 
1744 // class to re-order dimensions in decreasing size
1746  TopoManager tmgr;
1750  int fixpe(int pe) { // compensate for lame fallback topology information
1751  return CmiGetFirstPeOnPhysicalNode(CmiPhysicalNodeID(pe));
1752  }
1754 #if CMK_BLUEGENEQ
1755  int na=tmgr.getDimNA();
1756  int nb=tmgr.getDimNB();
1757  int nc=tmgr.getDimNC();
1758  int nd=tmgr.getDimND();
1759  int ne=tmgr.getDimNE();
1760 #else
1761  int na=tmgr.getDimNX();
1762  int nb=tmgr.getDimNY();
1763  int nc=tmgr.getDimNZ();
1764  int nd=1;
1765  int ne=1;
1766 #endif
1767  ResizeArray<int> a_flags(na);
1768  ResizeArray<int> b_flags(nb);
1769  ResizeArray<int> c_flags(nc);
1770  ResizeArray<int> d_flags(nd);
1771  ResizeArray<int> e_flags(ne);
1772  for ( int i=0; i<na; ++i ) { a_flags[i] = 0; }
1773  for ( int i=0; i<nb; ++i ) { b_flags[i] = 0; }
1774  for ( int i=0; i<nc; ++i ) { c_flags[i] = 0; }
1775  for ( int i=0; i<nd; ++i ) { d_flags[i] = 0; }
1776  for ( int i=0; i<ne; ++i ) { e_flags[i] = 0; }
1777  int npes = CkNumPes();
1778  for ( int pe=0; pe<npes; ++pe ) {
1779  int a,b,c,d,e,t;
1780 #if CMK_BLUEGENEQ
1781  tmgr.rankToCoordinates(fixpe(pe),a,b,c,d,e,t);
1782 #else
1783  tmgr.rankToCoordinates(fixpe(pe),a,b,c,t);
1784  d=0; e=0;
1785 #endif
1786  if ( a < 0 || a >= na ) NAMD_bug("inconsistent torus topology!");
1787  if ( b < 0 || b >= nb ) NAMD_bug("inconsistent torus topology!");
1788  if ( c < 0 || c >= nc ) NAMD_bug("inconsistent torus topology!");
1789  if ( d < 0 || d >= nd ) NAMD_bug("inconsistent torus topology!");
1790  if ( e < 0 || e >= ne ) NAMD_bug("inconsistent torus topology!");
1791  a_flags[a] = 1;
1792  b_flags[b] = 1;
1793  c_flags[c] = 1;
1794  d_flags[d] = 1;
1795  e_flags[e] = 1;
1796  }
1797  iout << iINFO << "TORUS A SIZE " << na << " USING";
1798  for ( int i=0; i<na; ++i ) { if ( a_flags[i] ) iout << " " << i; }
1799  iout << "\n" << endi;
1800  iout << iINFO << "TORUS B SIZE " << nb << " USING";
1801  for ( int i=0; i<nb; ++i ) { if ( b_flags[i] ) iout << " " << i; }
1802  iout << "\n" << endi;
1803  iout << iINFO << "TORUS C SIZE " << nc << " USING";
1804  for ( int i=0; i<nc; ++i ) { if ( c_flags[i] ) iout << " " << i; }
1805  iout << "\n" << endi;
1806 #if CMK_BLUEGENEQ
1807  iout << iINFO << "TORUS D SIZE " << nd << " USING";
1808  for ( int i=0; i<nd; ++i ) { if ( d_flags[i] ) iout << " " << i; }
1809  iout << "\n" << endi;
1810  iout << iINFO << "TORUS E SIZE " << ne << " USING";
1811  for ( int i=0; i<ne; ++i ) { if ( e_flags[i] ) iout << " " << i; }
1812  iout << "\n" << endi;
1813 #endif
1814  // find most compact representation of our subset
1815  a_rot = b_rot = c_rot = d_rot = e_rot = 0;
1816  a_mod = na; b_mod = nb; c_mod = nc; d_mod = nd; e_mod = ne;
1817 #if CMK_BLUEGENEQ
1818  if ( tmgr.absA(na) == 0 ) // torus
1819 #else
1820  if ( tmgr.absX(na) == 0 ) // torus
1821 #endif
1822  for ( int i=0, gaplen=0, gapstart=0; i<2*na; ++i ) {
1823  if ( a_flags[i%na] ) gapstart = i+1;
1824  else if ( i - gapstart >= gaplen ) {
1825  a_rot = 2*na-i-1; gaplen = i - gapstart;
1826  }
1827  }
1828 #if CMK_BLUEGENEQ
1829  if ( tmgr.absB(nb) == 0 ) // torus
1830 #else
1831  if ( tmgr.absY(nb) == 0 ) // torus
1832 #endif
1833  for ( int i=0, gaplen=0, gapstart=0; i<2*nb; ++i ) {
1834  if ( b_flags[i%nb] ) gapstart = i+1;
1835  else if ( i - gapstart >= gaplen ) {
1836  b_rot = 2*nb-i-1; gaplen = i - gapstart;
1837  }
1838  }
1839 #if CMK_BLUEGENEQ
1840  if ( tmgr.absC(nc) == 0 ) // torus
1841 #else
1842  if ( tmgr.absZ(nc) == 0 ) // torus
1843 #endif
1844  for ( int i=0, gaplen=0, gapstart=0; i<2*nc; ++i ) {
1845  if ( c_flags[i%nc] ) gapstart = i+1;
1846  else if ( i - gapstart >= gaplen ) {
1847  c_rot = 2*nc-i-1; gaplen = i - gapstart;
1848  }
1849  }
1850 #if CMK_BLUEGENEQ
1851  if ( tmgr.absD(nd) == 0 ) // torus
1852  for ( int i=0, gaplen=0, gapstart=0; i<2*nd; ++i ) {
1853  if ( d_flags[i%nd] ) gapstart = i+1;
1854  else if ( i - gapstart >= gaplen ) {
1855  d_rot = 2*nd-i-1; gaplen = i - gapstart;
1856  }
1857  }
1858  if ( tmgr.absE(ne) == 0 ) // torus
1859  for ( int i=0, gaplen=0, gapstart=0; i<2*ne; ++i ) {
1860  if ( e_flags[i%ne] ) gapstart = i+1;
1861  else if ( i - gapstart >= gaplen ) {
1862  e_rot = 2*ne-i-1; gaplen = i - gapstart;
1863  }
1864  }
1865 #endif
1866  // order dimensions by length
1867  int a_min=na, a_max=-1;
1868  int b_min=nb, b_max=-1;
1869  int c_min=nc, c_max=-1;
1870  int d_min=nd, d_max=-1;
1871  int e_min=ne, e_max=-1;
1872  for ( int pe=0; pe<npes; ++pe ) {
1873  int a,b,c,d,e,t;
1874 #if CMK_BLUEGENEQ
1875  tmgr.rankToCoordinates(fixpe(pe),a,b,c,d,e,t);
1876 #else
1877  tmgr.rankToCoordinates(fixpe(pe),a,b,c,t);
1878  d=0; e=0;
1879 #endif
1880  a = (a+a_rot)%a_mod;
1881  b = (b+b_rot)%b_mod;
1882  c = (c+c_rot)%c_mod;
1883  d = (d+d_rot)%d_mod;
1884  e = (e+e_rot)%e_mod;
1885  if ( a < a_min ) a_min = a;
1886  if ( b < b_min ) b_min = b;
1887  if ( c < c_min ) c_min = c;
1888  if ( d < d_min ) d_min = d;
1889  if ( e < e_min ) e_min = e;
1890  if ( a > a_max ) a_max = a;
1891  if ( b > b_max ) b_max = b;
1892  if ( c > c_max ) c_max = c;
1893  if ( d > d_max ) d_max = d;
1894  if ( e > e_max ) e_max = e;
1895  }
1896  int a_len = a_max - a_min + 1;
1897  int b_len = b_max - b_min + 1;
1898  int c_len = c_max - c_min + 1;
1899  int d_len = d_max - d_min + 1;
1900  int e_len = e_max - e_min + 1;
1901  int lensort[5];
1902  lensort[0] = (a_len << 3) + 0;
1903  lensort[1] = (b_len << 3) + 1;
1904  lensort[2] = (c_len << 3) + 2;
1905  lensort[3] = (d_len << 3) + 3;
1906  lensort[4] = (e_len << 3) + 4;
1907  // CkPrintf("TopoManagerWrapper lensort before %d %d %d %d %d\n", lensort[0] & 7, lensort[1] & 7, lensort[2] & 7, lensort[3] & 7, lensort[4] & 7);
1908  std::sort(lensort, lensort+5);
1909  // CkPrintf("TopoManagerWrapper lensort after %d %d %d %d %d\n", lensort[0] & 7, lensort[1] & 7, lensort[2] & 7, lensort[3] & 7, lensort[4] & 7);
1910  for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 0 ) a_dim = 4-i; }
1911  for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 1 ) b_dim = 4-i; }
1912  for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 2 ) c_dim = 4-i; }
1913  for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 3 ) d_dim = 4-i; }
1914  for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 4 ) e_dim = 4-i; }
1915 #if 0
1916  if ( a_len >= b_len && a_len >= c_len ) {
1917  a_dim = 0;
1918  if ( b_len >= c_len ) {
1919  b_dim = 1; c_dim = 2;
1920  } else {
1921  b_dim = 2; c_dim = 1;
1922  }
1923  } else if ( b_len >= a_len && b_len >= c_len ) {
1924  b_dim = 0;
1925  if ( a_len >= c_len ) {
1926  a_dim = 1; c_dim = 2;
1927  } else {
1928  a_dim = 2; c_dim = 1;
1929  }
1930  } else { // c is longest
1931  c_dim = 0;
1932  if ( a_len >= b_len ) {
1933  a_dim = 1; b_dim = 2;
1934  } else {
1935  a_dim = 2; b_dim = 1;
1936  }
1937  }
1938 #endif
1939  iout << iINFO << "TORUS MINIMAL MESH SIZE IS " << a_len << " BY " << b_len << " BY " << c_len
1940 #if CMK_BLUEGENEQ
1941  << " BY " << d_len << " BY " << e_len
1942 #endif
1943  << "\n" << endi;
1944  // CkPrintf("TopoManagerWrapper dims %d %d %d %d %d\n", a_dim, b_dim, c_dim, d_dim, e_dim);
1945  }
1946  void coords(int pe, int *crds) {
1947  int a,b,c,d,e,t;
1948 #if CMK_BLUEGENEQ
1949  tmgr.rankToCoordinates(fixpe(pe),a,b,c,d,e,t);
1950 #else
1951  tmgr.rankToCoordinates(fixpe(pe),a,b,c,t);
1952  d=0; e=0;
1953 #endif
1954  if ( a_dim < 3 ) crds[a_dim] = (a+a_rot)%a_mod;
1955  if ( b_dim < 3 ) crds[b_dim] = (b+b_rot)%b_mod;
1956  if ( c_dim < 3 ) crds[c_dim] = (c+c_rot)%c_mod;
1957  if ( d_dim < 3 ) crds[d_dim] = (d+d_rot)%d_mod;
1958  if ( e_dim < 3 ) crds[e_dim] = (e+e_rot)%e_mod;
1959  }
1960  int coord(int pe, int dim) {
1961  int crds[3];
1962  coords(pe,crds);
1963  return crds[dim];
1964  }
1967  const int *sortdims;
1969  bool operator() (int pe1, int pe2) const {
1970  int crds1[3], crds2[3];
1971  tmgr.coords(pe1,crds1);
1972  tmgr.coords(pe2,crds2);
1973  for ( int i=0; i<3; ++i ) {
1974  int d = sortdims[i];
1975  if ( crds1[d] != crds2[d] ) return ( crds1[d] < crds2[d] );
1976  }
1977  const int *index = WorkDistrib::peCompactOrderingIndex;
1978  return ( index[pe1] < index[pe2] );
1979  }
1980  };
1981  int* sortAndSplit(int *node_begin, int *node_end, int splitdim) {
1982  if ( node_begin == node_end ) return node_begin;
1983  int tmins[3], tmaxs[3], tlens[3], sortdims[3];
1984  coords(*node_begin, tmins);
1985  coords(*node_begin, tmaxs);
1986  for ( int *peitr = node_begin; peitr != node_end; ++peitr ) {
1987  int tvals[3];
1988  coords(*peitr, tvals);
1989  for ( int i=0; i<3; ++i ) {
1990  if ( tvals[i] < tmins[i] ) tmins[i] = tvals[i];
1991  if ( tvals[i] > tmaxs[i] ) tmaxs[i] = tvals[i];
1992  }
1993  }
1994  for ( int i=0; i<3; ++i ) {
1995  tlens[i] = tmaxs[i] - tmins[i];
1996  }
1997  sortdims[0] = splitdim;
1998  for ( int i=0, j=0; i<3; ++i ) {
1999  if ( i != splitdim ) sortdims[++j] = i;
2000  }
2001  if ( tlens[sortdims[1]] < tlens[sortdims[2]] ) {
2002  int tmp = sortdims[1];
2003  sortdims[1] = sortdims[2];
2004  sortdims[2] = tmp;
2005  }
2006  std::sort(node_begin,node_end,pe_sortop_topo(*this,sortdims));
2007  int *nodes = node_begin;
2008  int nnodes = node_end - node_begin;
2009  int i_split = 0;
2010 #if 0
2011  int c_split = coord(nodes[0],splitdim);
2012  for ( int i=0; i<nnodes; ++i ) {
2013  if ( coord(nodes[i],splitdim) != c_split ) {
2014  int mid = (nnodes+1)/2;
2015  if ( abs(i-mid) < abs(i_split-mid) ) {
2016  i_split = i;
2017  c_split = coord(i,splitdim);
2018  }
2019  else break;
2020  }
2021  }
2022 #endif
2023  for ( int i=0; i<nnodes; ++i ) {
2024  if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
2025  int mid = (nnodes+1)/2;
2026  if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2027  else break;
2028  }
2029  }
2030  return ( node_begin + i_split );
2031  }
2032 };
2033 
2037  inline bool operator() (int p1, int p2) const {
2038  int a1 = pmap->index_a(p1);
2039  int a2 = pmap->index_a(p2);
2040  if ( a1 < a2 ) return true;
2041  if ( a1 > a2 ) return false;
2042  int dir = ( (a1 & 1) ? -1 : 1 );
2043  int b1 = pmap->index_b(p1);
2044  int b2 = pmap->index_b(p2);
2045  if ( b1 * dir < b2 * dir ) return true;
2046  if ( b1 * dir > b2 * dir ) return false;
2047  dir *= ( (b1 & 1) ? -1 : 1 );
2048  int c1 = pmap->index_c(p1);
2049  int c2 = pmap->index_c(p2);
2050  if ( c1 * dir < c2 * dir ) return true;
2051  return false;
2052  }
2053 };
2054 
2058  inline bool operator() (int p1, int p2) const {
2059  int a1 = pmap->index_b(p1);
2060  int a2 = pmap->index_b(p2);
2061  if ( a1 < a2 ) return true;
2062  if ( a1 > a2 ) return false;
2063  int dir = ( (a1 & 1) ? -1 : 1 );
2064  int b1 = pmap->index_a(p1);
2065  int b2 = pmap->index_a(p2);
2066  if ( b1 * dir < b2 * dir ) return true;
2067  if ( b1 * dir > b2 * dir ) return false;
2068  dir *= ( (b1 & 1) ? -1 : 1 );
2069  int c1 = pmap->index_c(p1);
2070  int c2 = pmap->index_c(p2);
2071  if ( c1 * dir < c2 * dir ) return true;
2072  return false;
2073  }
2074 };
2075 
2079  inline bool operator() (int p1, int p2) const {
2080  int a1 = pmap->index_c(p1);
2081  int a2 = pmap->index_c(p2);
2082  if ( a1 < a2 ) return true;
2083  if ( a1 > a2 ) return false;
2084  int dir = ( (a1 & 1) ? -1 : 1 );
2085  int b1 = pmap->index_a(p1);
2086  int b2 = pmap->index_a(p2);
2087  if ( b1 * dir < b2 * dir ) return true;
2088  if ( b1 * dir > b2 * dir ) return false;
2089  dir *= ( (b1 & 1) ? -1 : 1 );
2090  int c1 = pmap->index_b(p1);
2091  int c2 = pmap->index_b(p2);
2092  if ( c1 * dir < c2 * dir ) return true;
2093  return false;
2094  }
2095 };
2096 
2098  int *patch_begin, int *patch_end,
2099  int *node_begin, int *node_end,
2100  double *patchLoads,
2101  double *sortedLoads,
2102  int *assignedNode,
2103  TopoManagerWrapper &tmgr
2104  ) {
2105 
2107  PatchMap *patchMap = PatchMap::Object();
2108  int *patches = patch_begin;
2109  int npatches = patch_end - patch_begin;
2110  int *nodes = node_begin;
2111  int nnodes = node_end - node_begin;
2112 
2113  // assign patch loads
2114  const int emptyPatchLoad = simParams->emptyPatchLoad;
2115  double totalRawLoad = 0;
2116  for ( int i=0; i<npatches; ++i ) {
2117  int pid=patches[i];
2118 #ifdef MEM_OPT_VERSION
2119  double load = patchMap->numAtoms(pid) + emptyPatchLoad;
2120 #else
2121  double load = patchMap->patch(pid)->getNumAtoms() + emptyPatchLoad;
2122 #endif
2123  patchLoads[pid] = load;
2124  sortedLoads[i] = load;
2125  totalRawLoad += load;
2126  }
2127  std::sort(sortedLoads,sortedLoads+npatches);
2128 
2129  // limit maxPatchLoad to adjusted average load per node
2130  double sumLoad = 0;
2131  double maxPatchLoad = 1;
2132  for ( int i=0; i<npatches; ++i ) {
2133  double load = sortedLoads[i];
2134  double total = sumLoad + (npatches-i) * load;
2135  if ( nnodes * load > total ) break;
2136  sumLoad += load;
2137  maxPatchLoad = load;
2138  }
2139  double totalLoad = 0;
2140  for ( int i=0; i<npatches; ++i ) {
2141  int pid=patches[i];
2142  if ( patchLoads[pid] > maxPatchLoad ) patchLoads[pid] = maxPatchLoad;
2143  totalLoad += patchLoads[pid];
2144  }
2145  if ( nnodes * maxPatchLoad > totalLoad )
2146  NAMD_bug("algorithm failure in WorkDistrib recursive_bisect_with_curve()");
2147 
2148  int a_len, b_len, c_len;
2149  int a_min, b_min, c_min;
2150  { // find dimensions
2151  a_min = patchMap->index_a(patches[0]);
2152  b_min = patchMap->index_b(patches[0]);
2153  c_min = patchMap->index_c(patches[0]);
2154  int a_max = a_min;
2155  int b_max = b_min;
2156  int c_max = c_min;
2157  for ( int i=1; i<npatches; ++i ) {
2158  int a = patchMap->index_a(patches[i]);
2159  int b = patchMap->index_b(patches[i]);
2160  int c = patchMap->index_c(patches[i]);
2161  if ( a < a_min ) a_min = a;
2162  if ( b < b_min ) b_min = b;
2163  if ( c < c_min ) c_min = c;
2164  if ( a > a_max ) a_max = a;
2165  if ( b > b_max ) b_max = b;
2166  if ( c > c_max ) c_max = c;
2167  }
2168  a_len = a_max - a_min;
2169  b_len = b_max - b_min;
2170  c_len = c_max - c_min;
2171  }
2172 
2173  int *node_split = node_begin;
2174 
2175  if ( simParams->disableTopology ) ; else
2176  if ( a_len >= b_len && a_len >= c_len ) {
2177  node_split = tmgr.sortAndSplit(node_begin,node_end,0);
2178  } else if ( b_len >= a_len && b_len >= c_len ) {
2179  node_split = tmgr.sortAndSplit(node_begin,node_end,1);
2180  } else if ( c_len >= a_len && c_len >= b_len ) {
2181  node_split = tmgr.sortAndSplit(node_begin,node_end,2);
2182  }
2183 
2184  if ( node_split == node_begin ) { // unable to split torus
2185  // make sure physical nodes are together
2186  std::sort(node_begin, node_end, WorkDistrib::pe_sortop_compact());
2187  // find physical node boundary to split on
2188  int i_split = 0;
2189  for ( int i=0; i<nnodes; ++i ) {
2190  if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
2191  int mid = (nnodes+1)/2;
2192  if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2193  else break;
2194  }
2195  }
2196  node_split = node_begin + i_split;
2197  }
2198 
2199  bool final_patch_sort = false;
2200 
2201  if ( node_split == node_begin ) { // all on same physical node
2202  if ( ( simParams->verboseTopology ) &&
2203  nnodes == CmiNumPesOnPhysicalNode(CmiPhysicalNodeID(*node_begin)) ) {
2204  int crds[3];
2205  tmgr.coords(*node_begin, crds);
2206  CkPrintf("WorkDistrib: physnode %5d pe %5d node %5d at %5d %5d %5d from %5d %5d %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
2207  CmiPhysicalNodeID(*node_begin), *node_begin,
2208  CkNodeOf(*node_begin), crds[0], crds[1], crds[2],
2209  a_min, b_min, c_min, npatches,
2210  a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
2211  }
2212 
2213  // final sort along a to minimize pme message count
2214  final_patch_sort = true;
2215 
2216  // find node (process) boundary to split on
2217  int i_split = 0;
2218  for ( int i=0; i<nnodes; ++i ) {
2219  if ( CmiNodeOf(nodes[i_split]) != CmiNodeOf(nodes[i]) ) {
2220  int mid = (nnodes+1)/2;
2221  if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2222  else break;
2223  }
2224  }
2225  node_split = node_begin + i_split;
2226  }
2227 
2228  if ( node_split == node_begin ) { // all on same node (process)
2229  if ( ( simParams->verboseTopology ) &&
2230  nnodes == CmiNodeSize(CmiNodeOf(*node_begin)) ) {
2231  int crds[3];
2232  tmgr.coords(*node_begin, crds);
2233  CkPrintf("WorkDistrib: node %5d pe %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
2234  CmiNodeOf(*node_begin), *node_begin, npatches,
2235  a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
2236  }
2237 
2238  // no natural divisions so just split at midpoint
2239  node_split = node_begin + nnodes/2;
2240  }
2241 
2242  if ( nnodes == 1 ) { // down to a single pe
2243  // assign all patches
2244  int *node = node_begin;
2245  sumLoad = 0;
2246  for ( int i=0; i < npatches; ++i ) {
2247  int pid = patches[i];
2248  assignedNode[pid] = *node;
2249  sumLoad += patchLoads[pid];
2250  if ( 0 ) CkPrintf("assign %5d node %5d patch %5d %5d %5d load %7f total %7f\n",
2251  i, *node,
2252  patchMap->index_a(pid),
2253  patchMap->index_b(pid),
2254  patchMap->index_c(pid),
2255  patchLoads[pid], sumLoad);
2256  }
2257 
2258  return;
2259  }
2260 
2261  if ( final_patch_sort ) {
2262  // final sort along a to minimize pme message count
2263  std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
2264  } else if ( a_len >= b_len && a_len >= c_len ) {
2265  if ( 0 ) CkPrintf("sort a\n");
2266  std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
2267  } else if ( b_len >= a_len && b_len >= c_len ) {
2268  if ( 0 ) CkPrintf("sort b\n");
2269  std::sort(patch_begin,patch_end,patch_sortop_curve_b(patchMap));
2270  } else if ( c_len >= a_len && c_len >= b_len ) {
2271  if ( 0 ) CkPrintf("sort c\n");
2272  std::sort(patch_begin,patch_end,patch_sortop_curve_c(patchMap));
2273  }
2274 
2275  int *patch_split;
2276  { // walk through patches in sorted order
2277  int *node = node_begin;
2278  sumLoad = 0;
2279  for ( patch_split = patch_begin;
2280  patch_split != patch_end && node != node_split;
2281  ++patch_split ) {
2282  sumLoad += patchLoads[*patch_split];
2283  double targetLoad = totalLoad *
2284  ((double)(node-node_begin+1) / (double)nnodes);
2285  if ( 0 ) CkPrintf("test %5ld node %5d patch %5d %5d %5d load %7f target %7f\n",
2286  patch_split - patch_begin, *node,
2287  patchMap->index_a(*patch_split),
2288  patchMap->index_b(*patch_split),
2289  patchMap->index_c(*patch_split),
2290  sumLoad, targetLoad);
2291  double extra = ( patch_split+1 != patch_end ? 0.5 * patchLoads[*(patch_split+1)] : 0 );
2292  if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
2293  }
2294  double targetLoad = totalLoad *
2295  ((double)(node_split-node_begin) / (double)nnodes);
2296  if ( 0 ) CkPrintf("split node %5ld/%5d patch %5ld/%5d load %7f target %7f\n",
2297  node_split-node_begin, nnodes,
2298  patch_split-patch_begin, npatches,
2299  sumLoad, targetLoad);
2300  }
2301 
2302  // recurse
2304  patch_begin, patch_split, node_begin, node_split,
2305  patchLoads, sortedLoads, assignedNode, tmgr);
2307  patch_split, patch_end, node_split, node_end,
2308  patchLoads, sortedLoads, assignedNode, tmgr);
2309 }
2310 
2311 //----------------------------------------------------------------------
2312 void WorkDistrib::assignPatchesSpaceFillingCurve()
2313 {
2314  TopoManagerWrapper tmgr;
2315  PatchMap *patchMap = PatchMap::Object();
2316  const int numPatches = patchMap->numPatches();
2317  int *assignedNode = new int[numPatches];
2318  ResizeArray<double> patchLoads(numPatches);
2319  SortableResizeArray<double> sortedLoads(numPatches);
2320  int numNodes = Node::Object()->numNodes();
2322  if(simParams->simulateInitialMapping) {
2323  NAMD_die("simulateInitialMapping not supported by assignPatchesSpaceFillingCurve()");
2324  numNodes = simParams->simulatedPEs;
2325  }
2326 
2327  ResizeArray<int> patchOrdering(numPatches);
2328  for ( int i=0; i<numPatches; ++i ) {
2329  patchOrdering[i] = i;
2330  }
2331 
2332  ResizeArray<int> nodeOrdering(numNodes);
2333  nodeOrdering.resize(0);
2334  for ( int i=0; i<numNodes; ++i ) {
2335  int pe = peDiffuseOrdering[(i+1)%numNodes]; // avoid 0 if possible
2336  if ( simParams->noPatchesOnZero && numNodes > 1 ) {
2337  if ( pe == 0 ) continue;
2338  if(simParams->noPatchesOnOne && numNodes > 2) {
2339  if ( pe == 1 ) continue;
2340  }
2341  }
2342 #ifdef MEM_OPT_VERSION
2343  if(simParams->noPatchesOnOutputPEs && numNodes-simParams->numoutputprocs >2) {
2344  if ( isOutputProcessor(pe) ) continue;
2345  }
2346 #endif
2347  nodeOrdering.add(pe);
2348  if ( 0 ) CkPrintf("using pe %5d\n", pe);
2349  }
2350 
2351  int *node_begin = nodeOrdering.begin();
2352  int *node_end = nodeOrdering.end();
2353  if ( nodeOrdering.size() > numPatches ) {
2354  node_end = node_begin + numPatches;
2355  }
2356  std::sort(node_begin, node_end, pe_sortop_compact());
2357 
2358  int *basenode_begin = node_begin;
2359  int *basenode_end = node_end;
2360  if ( nodeOrdering.size() > 2*numPatches ) {
2361  basenode_begin = node_end;
2362  basenode_end = basenode_begin + numPatches;
2363  std::sort(basenode_begin, basenode_end, pe_sortop_compact());
2364  }
2365 
2366  if ( simParams->disableTopology ) {
2367  iout << iWARN << "IGNORING TORUS TOPOLOGY DURING PATCH PLACEMENT\n" << endi;
2368  }
2369 
2371  patchOrdering.begin(), patchOrdering.end(),
2372  node_begin, node_end,
2373  patchLoads.begin(), sortedLoads.begin(), assignedNode, tmgr);
2374 
2375  std::sort(node_begin, node_end, pe_sortop_compact());
2376 
2377  int samenodecount = 0;
2378 
2379  for ( int pid=0; pid<numPatches; ++pid ) {
2380  int node = assignedNode[pid];
2381  patchMap->assignNode(pid, node);
2382  int nodeidx = std::lower_bound(node_begin, node_end, node,
2383  pe_sortop_compact()) - node_begin;
2384  int basenode = basenode_begin[nodeidx];
2385  patchMap->assignBaseNode(pid, basenode);
2386  if ( CmiPeOnSamePhysicalNode(node,basenode) ) ++samenodecount;
2387  }
2388 
2389  iout << iINFO << "Placed " << (samenodecount*100./numPatches) << "% of base nodes on same physical node as patch\n" << endi;
2390 
2391  delete [] assignedNode;
2392 }
2393 
2394 //----------------------------------------------------------------------
2396 {
2397  PatchMap *patchMap = PatchMap::Object();
2398  ComputeMap *computeMap = ComputeMap::Object();
2399  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2400  Node *node = nd.ckLocalBranch();
2401 
2402  DebugM(3,"Mapping computes\n");
2403 
2404  computeMap->allocateCids();
2405 
2406  // Handle full electrostatics
2407  if ( node->simParameters->fullDirectOn )
2408  mapComputeHomePatches(computeFullDirectType);
2409  if ( node->simParameters->FMAOn )
2410 #ifdef DPMTA
2411  mapComputeHomePatches(computeDPMTAType);
2412 #else
2413  NAMD_die("This binary does not include DPMTA (FMA).");
2414 #endif
2415  if ( node->simParameters->PMEOn ) {
2416 #ifdef DPME
2417  if ( node->simParameters->useDPME )
2418  mapComputeHomePatches(computeDPMEType);
2419  else {
2420  mapComputeHomePatches(computePmeType);
2422  mapComputeHomePatches(computeEwaldType);
2423  }
2424 #else
2425 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2426  if (node->simParameters->usePMECUDA) {
2427  mapComputePatch(computePmeCUDAType);
2428  } else
2429 #endif
2430  {
2431  mapComputePatch(computePmeType);
2432  }
2434  mapComputeHomePatches(computeEwaldType);
2435 #endif
2436  }
2437 
2438  if ( node->simParameters->globalForcesOn ) {
2439  DebugM(2,"adding ComputeGlobal\n");
2440  mapComputeHomePatches(computeGlobalType);
2441  }
2442 
2443  if ( node->simParameters->extForcesOn )
2444  mapComputeHomePatches(computeExtType);
2445 
2446  if ( node->simParameters->qmForcesOn )
2447  mapComputeHomePatches(computeQMType);
2448 
2449  if ( node->simParameters->GBISserOn )
2450  mapComputeHomePatches(computeGBISserType);
2451 
2452  if ( node->simParameters->MsmSerialOn )
2453  mapComputeHomePatches(computeMsmSerialType);
2454 
2455  if ( node->simParameters->LJPMEOn )
2456  mapComputeHomePatches(computeLjPmeSerialType);
2457 #ifdef CHARM_HAS_MSA
2458  else if ( node->simParameters->MSMOn )
2459  mapComputeHomePatches(computeMsmMsaType);
2460 #else
2461  else if ( node->simParameters->MSMOn )
2462  mapComputeHomePatches(computeMsmType);
2463 #endif
2464 
2465  if ( node->simParameters->FMMOn )
2466  mapComputeHomePatches(computeFmmType);
2467 
2468 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2469 #ifdef BONDED_CUDA
2470  if (node->simParameters->bondedCUDA) {
2471  mapComputeNode(computeBondedCUDAType);
2472  }
2473 #endif
2474 #endif
2475 
2476 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2477  mapComputeNode(computeNonbondedCUDA2Type);
2478  mapComputeHomeTuples(computeExclsType);
2479  mapComputePatch(computeSelfExclsType);
2480 #endif
2481 
2482 #ifdef NAMD_MIC
2483  mapComputeNode(computeNonbondedMICType);
2484 #endif
2485 
2486  mapComputeNonbonded();
2487 
2488  if ( node->simParameters->LCPOOn ) {
2489  mapComputeLCPO();
2490  }
2491 
2492  // If we're doing true pair interactions, no need for bonded terms.
2493  // But if we're doing within-group interactions, we do need them.
2494  if ( !node->simParameters->pairInteractionOn ||
2496  mapComputeHomeTuples(computeBondsType);
2497  mapComputeHomeTuples(computeAnglesType);
2498  mapComputeHomeTuples(computeDihedralsType);
2499  mapComputeHomeTuples(computeImpropersType);
2500  mapComputeHomeTuples(computeCrosstermsType);
2501  mapComputePatch(computeSelfBondsType);
2502  mapComputePatch(computeSelfAnglesType);
2503  mapComputePatch(computeSelfDihedralsType);
2504  mapComputePatch(computeSelfImpropersType);
2505  mapComputePatch(computeSelfCrosstermsType);
2506  }
2507 
2508  if ( node->simParameters->goGroPair ) {
2509  // JLai
2510  mapComputeHomeTuples(computeGromacsPairType);
2511  mapComputePatch(computeSelfGromacsPairType);
2512  // End of JLai
2513  }
2514 
2515  if ( node->simParameters->drudeOn ) {
2516  mapComputeHomeTuples(computeTholeType);
2517  mapComputePatch(computeSelfTholeType);
2518  mapComputeHomeTuples(computeAnisoType);
2519  mapComputePatch(computeSelfAnisoType);
2520  }
2521 
2522  if ( node->simParameters->eFieldOn )
2523  mapComputePatch(computeEFieldType);
2524  /* BEGIN gf */
2525  if ( node->simParameters->mgridforceOn )
2526  mapComputePatch(computeGridForceType);
2527  /* END gf */
2528  if ( node->simParameters->stirOn )
2529  mapComputePatch(computeStirType);
2530  if ( node->simParameters->sphericalBCOn )
2531  mapComputePatch(computeSphericalBCType);
2532  if ( node->simParameters->cylindricalBCOn )
2533  mapComputePatch(computeCylindricalBCType);
2534  if ( node->simParameters->tclBCOn ) {
2535  mapComputeHomePatches(computeTclBCType);
2536  }
2537  if ( node->simParameters->constraintsOn )
2538  mapComputePatch(computeRestraintsType);
2539  if ( node->simParameters->consForceOn )
2540  mapComputePatch(computeConsForceType);
2541  if ( node->simParameters->consTorqueOn )
2542  mapComputePatch(computeConsTorqueType);
2543 
2544  // store the latest compute map
2546  if (simParams->storeComputeMap) {
2547  computeMap->saveComputeMap(simParams->computeMapFilename);
2548  }
2549  // override mapping decision
2550  if (simParams->loadComputeMap) {
2551  computeMap->loadComputeMap(simParams->computeMapFilename);
2552  CkPrintf("ComputeMap has been loaded from %s.\n", simParams->computeMapFilename);
2553  }
2554 }
2555 
2556 //----------------------------------------------------------------------
2557 void WorkDistrib::mapComputeHomeTuples(ComputeType type)
2558 {
2559  PatchMap *patchMap = PatchMap::Object();
2560  ComputeMap *computeMap = ComputeMap::Object();
2561  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2562  Node *node = nd.ckLocalBranch();
2563 
2564  int numNodes = node->numNodes();
2565  SimParameters *simparam = node->simParameters;
2566  if(simparam->simulateInitialMapping) {
2567  numNodes = simparam->simulatedPEs;
2568  }
2569 
2570  char *isBaseNode = new char[numNodes];
2571  memset(isBaseNode,0,numNodes*sizeof(char));
2572 
2573  int numPatches = patchMap->numPatches();
2574  for(int j=0; j<numPatches; j++) {
2575  isBaseNode[patchMap->basenode(j)] = 1;
2576  }
2577 
2578  for(int i=0; i<numNodes; i++) {
2579  if ( isBaseNode[i] ) {
2580  computeMap->storeCompute(i,0,type);
2581  }
2582  }
2583 
2584  delete [] isBaseNode;
2585 }
2586 
2587 //----------------------------------------------------------------------
2588 void WorkDistrib::mapComputeHomePatches(ComputeType type)
2589 {
2590  PatchMap *patchMap = PatchMap::Object();
2591  ComputeMap *computeMap = ComputeMap::Object();
2592  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2593  Node *node = nd.ckLocalBranch();
2594 
2595  int numNodes = node->numNodes();
2596  SimParameters *simparam = node->simParameters;
2597  if(simparam->simulateInitialMapping) {
2598  numNodes = simparam->simulatedPEs;
2599  }
2600 
2601  for(int i=0; i<numNodes; i++) {
2602  if ( patchMap->numPatchesOnNode(i) ) {
2603  computeMap->storeCompute(i,0,type);
2604  }
2605  }
2606 }
2607 
2608 //----------------------------------------------------------------------
2609 void WorkDistrib::mapComputePatch(ComputeType type)
2610 {
2611  PatchMap *patchMap = PatchMap::Object();
2612  ComputeMap *computeMap = ComputeMap::Object();
2613 
2614  PatchID i;
2615  ComputeID cid;
2616 
2617  for(i=0; i<patchMap->numPatches(); i++)
2618  {
2619  cid=computeMap->storeCompute(patchMap->node(i),1,type);
2620  computeMap->newPid(cid,i);
2621  patchMap->newCid(i,cid);
2622  }
2623 
2624 }
2625 
2626 //----------------------------------------------------------------------
2627 void WorkDistrib::mapComputeNode(ComputeType type)
2628 {
2629  PatchMap *patchMap = PatchMap::Object();
2630  ComputeMap *computeMap = ComputeMap::Object();
2631 
2632  PatchID i;
2633  ComputeID cid;
2634 
2635  int ncpus = CkNumPes();
2636  SimParameters *simparam = Node::Object()->simParameters;
2637  if(simparam->simulateInitialMapping) {
2638  ncpus = simparam->simulatedPEs;
2639  }
2640 
2641  for(int i=0; i<ncpus; i++) {
2642  computeMap->storeCompute(i,0,type);
2643  }
2644 
2645 }
2646 
2647 //----------------------------------------------------------------------
2648 void WorkDistrib::mapComputeNonbonded(void)
2649 {
2650  // For each patch, create 1 electrostatic object for self-interaction.
2651  // Then create 1 for each 1-away and 2-away neighbor which has a larger
2652  // pid.
2653 
2654  PatchMap *patchMap = PatchMap::Object();
2655  ComputeMap *computeMap = ComputeMap::Object();
2656  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2657  Node *node = nd.ckLocalBranch();
2659  int ncpus = CkNumPes();
2660  int nodesize = CkMyNodeSize();
2661  if(simParams->simulateInitialMapping) {
2662  ncpus = simParams->simulatedPEs;
2663  nodesize = simParams->simulatedNodeSize;
2664  }
2665 
2667  PatchID oneAwayDownstream[PatchMap::MaxOneOrTwoAway];
2668  int oneAwayTrans[PatchMap::MaxOneOrTwoAway];
2669 
2670  PatchID i;
2671  ComputeID cid;
2672  int numNeighbors;
2673  int j;
2674  double partScaling = 1.0;
2675  if ( ncpus < patchMap->numPatches() ) {
2676  partScaling = ((double)ncpus) / ((double)patchMap->numPatches());
2677  }
2678 
2679  for(i=0; i<patchMap->numPatches(); i++) // do the self
2680  {
2681 
2682  int numPartitions = 1;
2683 #if 0
2684  if ( simParams->ldBalancer == LDBAL_HYBRID ) {
2685 #ifdef MEM_OPT_VERSION
2686  int64 numFixed = patchMap->numFixedAtoms(i);
2687  int64 numAtoms = patchMap->numAtoms(i);
2688 #else
2689  int64 numFixed = patchMap->patch(i)->getNumFixedAtoms(); // avoid overflow
2690  int64 numAtoms = patchMap->patch(i)->getNumAtoms();
2691 #endif
2692 
2693  int divide = node->simParameters->numAtomsSelf;
2694  if (divide > 0) {
2695  numPartitions = (int) ( partScaling * ( 0.5 +
2696  (numAtoms*numAtoms-numFixed*numFixed) / (double)(2*divide*divide) ) );
2697  }
2698  if (numPartitions < 1) numPartitions = 1;
2699  if ( numPartitions > node->simParameters->maxSelfPart )
2700  numPartitions = node->simParameters->maxSelfPart;
2701  // self-interaction
2702  DebugM(4,"Mapping " << numPartitions << " ComputeNonbondedSelf objects for patch " << i << "\n");
2703 // iout <<"Self numPartitions = " <<numPartitions <<" numAtoms " <<numAtoms <<std::endl;
2704  }
2705 #endif
2706 
2707  // DMK - NOTE - For MIC builds (i.e. NAMD_MIC is defined), it is assumed that self computes are
2708  // mapped to the PE their associated patch is on. If the code below should change, making that
2709  // untrue, MIC builds should be special cased so that assumption still holds (or the host vs
2710  // device load balancing scheme should be modified). (See the comment in the function
2711  // mic_assignComputes() in ComputeNonbondedMIC.C for more details.)
2712  for(int partition=0; partition < numPartitions; partition++)
2713  {
2714  cid=computeMap->storeCompute(patchMap->node(i),1,
2716  partition,numPartitions);
2717  computeMap->newPid(cid,i);
2718  patchMap->newCid(i,cid);
2719  }
2720  }
2721 
2722  for(int p1=0; p1 <patchMap->numPatches(); p1++) // do the pairs
2723  {
2724  // this only returns half of neighbors, which is what we want
2725  numNeighbors=patchMap->oneOrTwoAwayNeighbors(p1,oneAway,oneAwayDownstream,oneAwayTrans);
2726  for(j=0;j<numNeighbors;j++)
2727  {
2728  int p2 = oneAway[j];
2729  int dsp = oneAwayDownstream[j];
2730 
2731  int numPartitions = 1;
2732 #if 0
2733  if ( simParams->ldBalancer == LDBAL_HYBRID ) {
2734 #ifdef MEM_OPT_VERSION
2735  int64 numAtoms1 = patchMap->numAtoms(p1);
2736  int64 numAtoms2 = patchMap->numAtoms(p2);
2737  int64 numFixed1 = patchMap->numFixedAtoms(p1);
2738  int64 numFixed2 = patchMap->numFixedAtoms(p2);
2739 #else
2740  int64 numAtoms1 = patchMap->patch(p1)->getNumAtoms();
2741  int64 numAtoms2 = patchMap->patch(p2)->getNumAtoms();
2742  int64 numFixed1 = patchMap->patch(p1)->getNumFixedAtoms();
2743  int64 numFixed2 = patchMap->patch(p2)->getNumFixedAtoms();
2744 #endif
2745 
2746 
2747  const int t2 = oneAwayTrans[j];
2748  const int adim = patchMap->gridsize_a();
2749  const int bdim = patchMap->gridsize_b();
2750  const int cdim = patchMap->gridsize_c();
2751  const int nax = patchMap->numaway_a(); // 1 or 2
2752  const int nay = patchMap->numaway_b(); // 1 or 2
2753  const int naz = patchMap->numaway_c(); // 1 or 2
2754  const int ia1 = patchMap->index_a(p1);
2755  const int ia2 = patchMap->index_a(p2) + adim * Lattice::offset_a(t2);
2756  const int ib1 = patchMap->index_b(p1);
2757  const int ib2 = patchMap->index_b(p2) + bdim * Lattice::offset_b(t2);
2758  const int ic1 = patchMap->index_c(p1);
2759  const int ic2 = patchMap->index_c(p2) + cdim * Lattice::offset_c(t2);
2760 
2761  if ( abs(ia2-ia1) > nax ||
2762  abs(ib2-ib1) > nay ||
2763  abs(ic2-ic1) > naz )
2764  NAMD_bug("Bad patch distance in WorkDistrib::mapComputeNonbonded");
2765 
2766  int distance = 3;
2767  if ( ia1 == ia2 ) --distance;
2768  else if ( ia1 == ia2 + nax - 1 ) --distance;
2769  else if ( ia1 + nax - 1 == ia2 ) --distance;
2770  if ( ib1 == ib2 ) --distance;
2771  else if ( ib1 == ib2 + nay - 1 ) --distance;
2772  else if ( ib1 + nay - 1 == ib2 ) --distance;
2773  if ( ic1 == ic2 ) --distance;
2774  else if ( ic1 == ic2 + naz - 1 ) --distance;
2775  else if ( ic1 + naz - 1 == ic2 ) --distance;
2776  int divide = 0;
2777  if ( distance == 0 ) {
2778  divide = node->simParameters->numAtomsSelf2;
2779  } else if (distance == 1) {
2780  divide = node->simParameters->numAtomsPair;
2781  } else {
2782  divide = node->simParameters->numAtomsPair2;
2783  }
2784  if (divide > 0) {
2785  numPartitions = (int) ( partScaling * ( 0.5 +
2786  (numAtoms1*numAtoms2-numFixed1*numFixed2)/(double)(divide*divide) ) );
2787  }
2788  if ( numPartitions < 1 ) numPartitions = 1;
2789  if ( numPartitions > node->simParameters->maxPairPart )
2790  numPartitions = node->simParameters->maxPairPart;
2791 // if ( numPartitions > 1 ) iout << "Mapping " << numPartitions << " ComputeNonbondedPair objects for patches " << p1 << "(" << numAtoms1 << ") and " << p2 << "(" << numAtoms2 << ")\n" << endi;
2792  }
2793 #endif
2794  for(int partition=0; partition < numPartitions; partition++)
2795  {
2796  cid=computeMap->storeCompute( patchMap->basenode(dsp),
2797  2,computeNonbondedPairType,partition,numPartitions);
2798  computeMap->newPid(cid,p1);
2799  computeMap->newPid(cid,p2,oneAwayTrans[j]);
2800  patchMap->newCid(p1,cid);
2801  patchMap->newCid(p2,cid);
2802  }
2803  }
2804  }
2805 }
2806 
2807 //----------------------------------------------------------------------
2808 void WorkDistrib::mapComputeLCPO(void) {
2809  //iterate over all needed objects
2810 
2811  PatchMap *patchMap = PatchMap::Object();
2812  ComputeMap *computeMap = ComputeMap::Object();
2813  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2814  Node *node = nd.ckLocalBranch();
2816  int ncpus = CkNumPes();
2817  int nodesize = CkMyNodeSize();
2818  const int maxPatches = 8;
2819 
2820  int numPatchesInOctet;
2821  PatchID patchesInOctet[maxPatches];
2822  int oneAwayTrans[maxPatches];
2823 
2824  //partitioned after 1st timestep
2825  int numPartitions = 1;
2826 
2827  PatchID i;
2828  ComputeID cid;
2829 
2830  // one octet per patch
2831  for(i=0; i<patchMap->numPatches(); i++) {
2832  numPatchesInOctet =
2833  patchMap->getPatchesInOctet(i, patchesInOctet, oneAwayTrans);
2834 
2835  for(int partition=0; partition < numPartitions; partition++) {
2836  cid=computeMap->storeCompute(patchMap->node(i),
2837  numPatchesInOctet,
2839  partition,
2840  numPartitions);
2841  for (int p = 0; p < numPatchesInOctet; p++) {
2842  computeMap->newPid(cid, patchesInOctet[p], oneAwayTrans[p]);
2843  }
2844  for (int p = 0; p < numPatchesInOctet; p++) {
2845  patchMap->newCid(patchesInOctet[p],cid);
2846  }
2847  } // for partitions
2848  } // for patches
2849 } // mapComputeLCPO
2850 
2851 //----------------------------------------------------------------------
2853  LocalWorkMsg *msg = compute->localWorkMsg;
2854  int seq = compute->sequence();
2855  int gbisPhase = compute->getGBISPhase();
2856 
2857  if ( seq < 0 ) {
2858  NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageEnqueueWork");
2859  } else {
2860  SET_PRIORITY(msg,seq,compute->priority());
2861  }
2862 
2863  msg->compute = compute; // pointer is valid since send is to local Pe
2864  int type = compute->type();
2865  int cid = compute->cid;
2866 
2867  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
2868  switch ( type ) {
2869  case computeExclsType:
2870  case computeSelfExclsType:
2871  wdProxy[CkMyPe()].enqueueExcls(msg);
2872  break;
2873  case computeBondsType:
2874  case computeSelfBondsType:
2875  wdProxy[CkMyPe()].enqueueBonds(msg);
2876  break;
2877  case computeAnglesType:
2878  case computeSelfAnglesType:
2879  wdProxy[CkMyPe()].enqueueAngles(msg);
2880  break;
2881  case computeDihedralsType:
2883  wdProxy[CkMyPe()].enqueueDihedrals(msg);
2884  break;
2885  case computeImpropersType:
2887  wdProxy[CkMyPe()].enqueueImpropers(msg);
2888  break;
2889  case computeTholeType:
2890  case computeSelfTholeType:
2891  wdProxy[CkMyPe()].enqueueThole(msg);
2892  break;
2893  case computeAnisoType:
2894  case computeSelfAnisoType:
2895  wdProxy[CkMyPe()].enqueueAniso(msg);
2896  break;
2897  case computeCrosstermsType:
2899  wdProxy[CkMyPe()].enqueueCrossterms(msg);
2900  break;
2901  // JLai
2904  wdProxy[CkMyPe()].enqueueGromacsPair(msg);
2905  break;
2906  // End of JLai
2907  case computeLCPOType:
2908  wdProxy[CkMyPe()].enqueueLCPO(msg);
2909  break;
2911  switch ( seq % 2 ) {
2912  case 0:
2913  //wdProxy[CkMyPe()].enqueueSelfA(msg);
2914  switch ( gbisPhase ) {
2915  case 1:
2916  wdProxy[CkMyPe()].enqueueSelfA1(msg);
2917  break;
2918  case 2:
2919  wdProxy[CkMyPe()].enqueueSelfA2(msg);
2920  break;
2921  case 3:
2922  wdProxy[CkMyPe()].enqueueSelfA3(msg);
2923  break;
2924  }
2925  break;
2926  case 1:
2927  //wdProxy[CkMyPe()].enqueueSelfB(msg);
2928  switch ( gbisPhase ) {
2929  case 1:
2930  wdProxy[CkMyPe()].enqueueSelfB1(msg);
2931  break;
2932  case 2:
2933  wdProxy[CkMyPe()].enqueueSelfB2(msg);
2934  break;
2935  case 3:
2936  wdProxy[CkMyPe()].enqueueSelfB3(msg);
2937  break;
2938  }
2939  break;
2940  default:
2941  NAMD_bug("WorkDistrib::messageEnqueueSelf case statement error!");
2942  }
2943  break;
2945  switch ( seq % 2 ) {
2946  case 0:
2947  //wdProxy[CkMyPe()].enqueueWorkA(msg);
2948  switch ( gbisPhase ) {
2949  case 1:
2950  wdProxy[CkMyPe()].enqueueWorkA1(msg);
2951  break;
2952  case 2:
2953  wdProxy[CkMyPe()].enqueueWorkA2(msg);
2954  break;
2955  case 3:
2956  wdProxy[CkMyPe()].enqueueWorkA3(msg);
2957  break;
2958  }
2959  break;
2960  case 1:
2961  //wdProxy[CkMyPe()].enqueueWorkB(msg);
2962  switch ( gbisPhase ) {
2963  case 1:
2964  wdProxy[CkMyPe()].enqueueWorkB1(msg);
2965  break;
2966  case 2:
2967  wdProxy[CkMyPe()].enqueueWorkB2(msg);
2968  break;
2969  case 3:
2970  wdProxy[CkMyPe()].enqueueWorkB3(msg);
2971  break;
2972  }
2973  break;
2974  case 2:
2975  wdProxy[CkMyPe()].enqueueWorkC(msg);
2976  break;
2977  default:
2978  NAMD_bug("WorkDistrib::messageEnqueueWork case statement error!");
2979  }
2980  break;
2981 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2983 // CkPrintf("WorkDistrib[%d]::CUDA seq=%d phase=%d\n", CkMyPe(), seq, gbisPhase);
2984  //wdProxy[CkMyPe()].enqueueCUDA(msg);
2985  switch ( gbisPhase ) {
2986  case 1:
2987  wdProxy[CkMyPe()].enqueueCUDA(msg);
2988  break;
2989  case 2:
2990  wdProxy[CkMyPe()].enqueueCUDAP2(msg);
2991  break;
2992  case 3:
2993  wdProxy[CkMyPe()].enqueueCUDAP3(msg);
2994  break;
2995  }
2996 #else
2998 #endif
2999  break;
3001 #ifdef NAMD_MIC
3002  wdProxy[CkMyPe()].enqueueMIC(msg);
3003 #endif
3004  break;
3005  case computePmeType:
3006  // CkPrintf("PME %d %d %x\n", CkMyPe(), seq, compute->priority());
3007  wdProxy[CkMyPe()].enqueuePme(msg);
3008  break;
3009 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3010  case computePmeCUDAType:
3011  wdProxy[CkMyPe()].enqueuePme(msg);
3012  break;
3013 #endif
3014  default:
3015  wdProxy[CkMyPe()].enqueueWork(msg);
3016  }
3017 }
3018 
3019 //----------------------------------------------------------------------
3021  LocalWorkMsg *msg = compute->localWorkMsg;
3022  int seq = compute->sequence();
3023  int gbisPhase = compute->getGBISPhase();
3024 
3025  if ( seq < 0 ) {
3026  NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageEnqueueWork");
3027  } else {
3028  SET_PRIORITY(msg,seq,compute->priority());
3029  }
3030 
3031  msg->compute = compute; // pointer is valid since send is to local Pe
3032  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3033 
3034 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3035  //wdProxy[CkMyPe()].finishCUDA(msg);
3036  switch ( gbisPhase ) {
3037  case 1:
3038  wdProxy[CkMyPe()].finishCUDA(msg);
3039  break;
3040  case 2:
3041  wdProxy[CkMyPe()].finishCUDAP2(msg);
3042  break;
3043  case 3:
3044  wdProxy[CkMyPe()].finishCUDAP3(msg);
3045  break;
3046  }
3047 #else
3049 #endif
3050 }
3051 
3052 //----------------------------------------------------------------------
3054  LocalWorkMsg *msg = compute->localWorkMsg;
3055  int seq = compute->sequence();
3056  int gbisPhase = compute->getGBISPhase();
3057 
3058  if ( seq < 0 ) {
3059  NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageFinishMIC");
3060  } else {
3061  SET_PRIORITY(msg,seq,compute->priority());
3062  }
3063 
3064  msg->compute = compute; // pointer is valid since send is to local Pe
3065  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3066 
3067 #ifdef NAMD_MIC
3068  wdProxy[CkMyPe()].finishMIC(msg);
3069 #else
3071 #endif
3072 }
3073 
3076  if ( msg->compute->localWorkMsg != msg )
3077  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3078 }
3079 
3082  if ( msg->compute->localWorkMsg != msg )
3083  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3084 }
3085 
3088  if ( msg->compute->localWorkMsg != msg )
3089  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3090 }
3091 
3094  if ( msg->compute->localWorkMsg != msg )
3095  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3096 }
3097 
3100  if ( msg->compute->localWorkMsg != msg )
3101  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3102 }
3103 
3106  if ( msg->compute->localWorkMsg != msg )
3107  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3108 }
3109 
3112  if ( msg->compute->localWorkMsg != msg )
3113  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3114 }
3115 
3118  if ( msg->compute->localWorkMsg != msg )
3119  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3120 }
3121 
3124  if ( msg->compute->localWorkMsg != msg )
3125  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3126 }
3127 
3128 // JLai
3130  msg->compute->doWork();
3132  if ( msg->compute->localWorkMsg != msg )
3133  NAMD_bug("\nWorkDistrib LocalWorkMsg recycling failed! Check enqueueGromacsPair from WorkDistrib.C\n");
3134 }
3135 // End of JLai
3136 
3139  if ( msg->compute->localWorkMsg != msg )
3140  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3141 }
3142 
3144  msg->compute->doWork();
3145  if ( msg->compute->localWorkMsg != msg )
3146  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3147 }
3150  if ( msg->compute->localWorkMsg != msg )
3151  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3152 }
3155  if ( msg->compute->localWorkMsg != msg )
3156  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3157 }
3160  if ( msg->compute->localWorkMsg != msg )
3161  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3162 }
3163 
3166  if ( msg->compute->localWorkMsg != msg )
3167  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3168 }
3171  if ( msg->compute->localWorkMsg != msg )
3172  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3173 }
3176  if ( msg->compute->localWorkMsg != msg )
3177  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3178 }
3179 
3182  if ( msg->compute->localWorkMsg != msg )
3183  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3184 }
3187  if ( msg->compute->localWorkMsg != msg )
3188  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3189 }
3192  if ( msg->compute->localWorkMsg != msg )
3193  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3194 }
3195 
3198  if ( msg->compute->localWorkMsg != msg )
3199  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3200 }
3203  if ( msg->compute->localWorkMsg != msg )
3204  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3205 }
3208  if ( msg->compute->localWorkMsg != msg )
3209  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3210 }
3211 
3212 
3213 
3216  if ( msg->compute->localWorkMsg != msg )
3217  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3218 }
3219 
3222 }
3225 }
3228 }
3229 
3231  msg->compute->finishPatch(msg->data);
3232 }
3233 
3236 }
3239 }
3242 }
3243 
3246 }
3249 }
3250 
3251 
3252 //**********************************************************************
3253 //
3254 // FUNCTION velocities_from_PDB
3255 //
3256 // INPUTS:
3257 // v - Array of vectors to populate
3258 // filename - name of the PDB filename to read in
3259 //
3260 // This function reads in a set of initial velocities from a
3261 // PDB file. It places the velocities into the array of Vectors
3262 // passed to it.
3263 //
3264 //***********************************************************************/
3265 
3266 void WorkDistrib::velocities_from_PDB(const char *filename,
3267  Vector *v, int totalAtoms)
3268 {
3269  PDB *v_pdb; // PDB info from velocity PDB
3270  int i;
3271 
3272  // Read the PDB
3273  v_pdb = new PDB(filename);
3274  if ( v_pdb == NULL )
3275  {
3276  NAMD_die("memory allocation failed in Node::velocities_from_PDB");
3277  }
3278 
3279  // Make sure the number of velocities read in matches
3280  // the number of atoms we have
3281  if (v_pdb->num_atoms() != totalAtoms)
3282  {
3283  char err_msg[129];
3284 
3285  sprintf(err_msg, "FOUND %d COORDINATES IN VELOCITY PDB!!",
3286  v_pdb->num_atoms());
3287 
3288  NAMD_die(err_msg);
3289  }
3290 
3291  // Get the entire list of atom info and loop through
3292  // them assigning the velocity vector for each one
3293  v_pdb->get_all_positions(v);
3294 
3295  for (i=0; i<totalAtoms; i++)
3296  {
3297  v[i].x *= PDBVELINVFACTOR;
3298  v[i].y *= PDBVELINVFACTOR;
3299  v[i].z *= PDBVELINVFACTOR;
3300  }
3301 
3302  delete v_pdb;
3303 }
3304 // END OF FUNCTION velocities_from_PDB
3305 
3306 //**********************************************************************
3307 //
3308 // FUNCTION velocities_from_binfile
3309 //
3310 // INPUTS:
3311 // fname - File name to write velocities to
3312 // n - Number of atoms in system
3313 // vels - Array of velocity vectors
3314 //
3315 // This function writes out the velocities in binary format. This is
3316 // done to preserve accuracy between restarts of namd.
3317 //
3318 //**********************************************************************
3319 
3320 void WorkDistrib::velocities_from_binfile(const char *fname, Vector *vels, int n)
3321 {
3322  read_binary_file(fname,vels,n);
3323 }
3324 // END OF FUNCTION velocities_from_binfile
3325 
3326 //**********************************************************************
3327 //
3328 // FUNCTION random_velocities
3329 //
3330 // INPUTS:
3331 // v - array of vectors to populate
3332 // Temp - Temperature to acheive
3333 //
3334 // This function assigns a random velocity distribution to a
3335 // simulation to achieve a desired initial temperature. The method
3336 // used here was stolen from the program X-PLOR.
3337 //
3338 //**********************************************************************
3339 
3340 void WorkDistrib::random_velocities(BigReal Temp,Molecule *structure,
3341  Vector *v, int totalAtoms)
3342 {
3343  int i, j; // Loop counter
3344  BigReal kbT; // Boltzman constant * Temp
3345  BigReal randnum; // Random number from -6.0 to 6.0
3346  BigReal kbToverM; // sqrt(Kb*Temp/Mass)
3348  Bool lesOn = simParams->lesOn;
3349  Random vel_random(simParams->randomSeed);
3350 
3351  int lesReduceTemp = lesOn && simParams->lesReduceTemp;
3352  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
3353 
3354  kbT = Temp*BOLTZMANN;
3355 
3356  // Loop through all the atoms and assign velocities in
3357  // the x, y and z directions for each one
3358  for (i=0; i<totalAtoms; i++)
3359  {
3360  if (structure->atommass(i) <= 0.) {
3361  kbToverM = 0.;
3362  } else {
3363  kbToverM = sqrt(kbT *
3364  ( lesOn && structure->get_fep_type(i) ? tempFactor : 1.0 ) /
3365  structure->atommass(i) );
3366  }
3367 
3368  // The following comment was stolen from X-PLOR where
3369  // the following section of code was adapted from.
3370 
3371  // This section generates a Gaussian random
3372  // deviate of 0.0 mean and standard deviation RFD for
3373  // each of the three spatial dimensions.
3374  // The algorithm is a "sum of uniform deviates algorithm"
3375  // which may be found in Abramowitz and Stegun,
3376  // "Handbook of Mathematical Functions", pg 952.
3377  for (randnum=0.0, j=0; j<12; j++)
3378  {
3379  randnum += vel_random.uniform();
3380  }
3381 
3382  randnum -= 6.0;
3383 
3384  v[i].x = randnum*kbToverM;
3385 
3386  for (randnum=0.0, j=0; j<12; j++)
3387  {
3388  randnum += vel_random.uniform();
3389  }
3390 
3391  randnum -= 6.0;
3392 
3393  v[i].y = randnum*kbToverM;
3394 
3395  for (randnum=0.0, j=0; j<12; j++)
3396  {
3397  randnum += vel_random.uniform();
3398  }
3399 
3400  randnum -= 6.0;
3401 
3402  v[i].z = randnum*kbToverM;
3403  }
3404 
3405  if ( simParams->drudeOn ) for (i=0; i<totalAtoms; i++) {
3406  if ( structure->is_drude(i) ) {
3407  v[i] = v[structure->get_mother_atom(i)]; // zero is good enough
3408  }
3409  }
3410 }
3411 /* END OF FUNCTION random_velocities */
3412 
3413 //**********************************************************************
3414 //
3415 // FUNCTION remove_com_motion
3416 //
3417 // INPUTS:
3418 // vel - Array of initial velocity vectors
3419 //
3420 // This function removes the center of mass motion from a molecule.
3421 //
3422 //**********************************************************************
3423 
3424 void WorkDistrib::remove_com_motion(Vector *vel, Molecule *structure, int n)
3425 {
3426  Vector mv(0,0,0); // Sum of (mv)_i
3427  BigReal totalMass=0; // Total mass of system
3428  int i; // Loop counter
3429 
3430  // Loop through and compute the net momentum
3431  for (i=0; i<n; i++)
3432  {
3433  BigReal mass = structure->atommass(i);
3434  mv += mass * vel[i];
3435  totalMass += mass;
3436  }
3437 
3438  mv /= totalMass;
3439 
3440  iout << iINFO << "REMOVING COM VELOCITY "
3441  << ( PDBVELFACTOR * mv ) << "\n" << endi;
3442 
3443  for (i=0; i<n; i++) { vel[i] -= mv; }
3444 
3445 }
3446 /* END OF FUNCTION remove_com_motion */
3447 
3448 #if USE_TOPOMAP
3449 
3450 //Specifically designed for BGL and other 3d Tori architectures
3451 //Partition Torus and Patch grid together using recursive bisection.
3452 int WorkDistrib::assignPatchesTopoGridRecBisection() {
3453 
3454  PatchMap *patchMap = PatchMap::Object();
3455  int *assignedNode = new int[patchMap->numPatches()];
3456  int numNodes = Node::Object()->numNodes();
3458  if(simParams->simulateInitialMapping) {
3459  numNodes = simParams->simulatedPEs;
3460  }
3461 
3462  int usedNodes = numNodes;
3463  CkPrintf("assignPatchesTopoGridRecBisection\n");
3464  if ( simParams->noPatchesOnZero && numNodes > 1 ) {
3465  usedNodes -= 1;
3466  if ( simParams->noPatchesOnOne && numNodes > 2 )
3467  usedNodes -= 1;
3468  }
3469  RecBisection recBisec(patchMap->numPatches(), PatchMap::Object());
3470 
3471  int xsize = 0, ysize = 0, zsize = 0;
3472 
3473  // Right now assumes a T*** (e.g. TXYZ) mapping
3474  TopoManager tmgr;
3475  xsize = tmgr.getDimNX();
3476  ysize = tmgr.getDimNY();
3477  zsize = tmgr.getDimNZ();
3478 
3479  //Fix to not assign patches to processor 0
3480  int rc = recBisec.partitionProcGrid(xsize, ysize, zsize, assignedNode);
3481 
3482  delete [] assignedNode;
3483 
3484  return rc;
3485 }
3486 #endif
3487 
3488 
3489 #if defined(NAMD_MIC)
3490  extern void mic_hostDeviceLDB();
3491  extern void mic_contributeHostDeviceLDB(int idLen, int * id);
3492  extern void mic_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2);
3493 #endif
3494 
3496  #if defined(NAMD_MIC)
3497  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3498  wdProxy.initHostDeviceLDB();
3499  #endif
3500 }
3501 
3503  #if defined(NAMD_MIC)
3504  mic_hostDeviceLDB();
3505  #endif
3506 }
3507 
3508 void WorkDistrib::send_contributeHostDeviceLDB(int peSetLen, int * peSet) {
3509  #if defined(NAMD_MIC)
3510  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3511  wdProxy[0].contributeHostDeviceLDB(peSetLen, peSet);
3512  #endif
3513 }
3514 
3515 void WorkDistrib::contributeHostDeviceLDB(int peSetLen, int * peSet) {
3516  #if defined(NAMD_MIC)
3517  mic_contributeHostDeviceLDB(peSetLen, peSet);
3518  #endif
3519 }
3520 
3521 void WorkDistrib::send_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2) {
3522  #if defined(NAMD_MIC)
3523  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3524  wdProxy.setDeviceLDBParams(dt, hs, sp1, pp1, pp2);
3525  #endif
3526 }
3527 
3528 void WorkDistrib::setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2) {
3529  #if defined(NAMD_MIC)
3530  mic_setDeviceLDBParams(dt, hs, sp1, pp1, pp2);
3531  #endif
3532 }
3533 
3534 
3535 #include "WorkDistrib.def.h"
3536 
static Node * Object()
Definition: Node.h:86
Real atomcharge(int anum) const
Definition: Molecule.h:1117
#define LDBAL_HYBRID
Definition: SimParameters.h:65
int npatches
Definition: WorkDistrib.C:1611
bool operator()(int p1, int p2) const
Definition: WorkDistrib.C:2037
void setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2)
Definition: WorkDistrib.C:3528
void enqueueMIC(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3244
Real langevinParam
Definition: NamdTypes.h:210
std::ostream & iINFO(std::ostream &s)
Definition: InfoStream.C:81
static void sortPmePes(int *pmepes, int xdim, int ydim)
Definition: WorkDistrib.C:305
ScaledPosition center(int pid) const
Definition: PatchMap.h:99
Bool simulateInitialMapping
static void messageFinishMIC(Compute *)
Definition: WorkDistrib.C:3053
bool operator()(int a, int b) const
Definition: WorkDistrib.C:146
int getNumAtoms() const
Definition: Patch.h:105
int isSendSpanningTreeUnset()
patch_sortop_curve_b(PatchMap *m)
Definition: WorkDistrib.C:2057
void enqueueAngles(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3092
static void messageFinishCUDA(Compute *)
Definition: WorkDistrib.C:3020
void end(void)
Definition: MStream.C:176
int getNumFixedAtoms() const
Definition: Patch.h:112
Definition: PDB.h:36
int sequence(void)
Definition: Compute.h:64
int size(void) const
Definition: ResizeArray.h:131
PatchID assignToPatch(Position p, const Lattice &l)
Definition: PatchMap.inl:14
void setNewNumPartitions(ComputeID cid, char numPartitions)
Definition: ComputeMap.h:144
void setRecvSpanning()
Definition: ProxyMgr.C:370
static bool less_than_bit_reversed(int a, int b)
Definition: WorkDistrib.C:134
static void recursive_bisect_with_curve(int *patch_begin, int *patch_end, int *node_begin, int *node_end, double *patchLoads, double *sortedLoads, int *assignedNode, TopoManagerWrapper &tmgr)
Definition: WorkDistrib.C:2097
int numComputes(void)
Definition: ComputeMap.h:101
void saveComputeMap(const char *fname)
Definition: ComputeMap.C:260
static ProxyMgr * Object()
Definition: ProxyMgr.h:394
NAMD_HOST_DEVICE int c_p() const
Definition: Lattice.h:291
#define BOLTZMANN
Definition: common.h:54
Definition: Node.h:78
static int * peCompactOrdering
Definition: WorkDistrib.h:117
int32 ComputeID
Definition: NamdTypes.h:278
BigReal max_a(int pid) const
Definition: PatchMap.h:92
void finishCUDAP3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3240
void initPtrs()
Definition: ComputeMap.C:80
void enqueueCrossterms(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3122
Position fixedPosition
Definition: NamdTypes.h:202
bool operator()(int p1, int p2) const
Definition: WorkDistrib.C:2058
static void partition(int *order, const FullAtom *atoms, int begin, int end)
Definition: SortAtoms.C:45
int isRecvSpanningTreeUnset()
void enqueuePme(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3137
static PatchMap * Object()
Definition: PatchMap.h:27
void enqueueWorkA3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3190
ComputeType
Definition: ComputeMap.h:20
void enqueueWork(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3074
void enqueueGromacsPair(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3129
void enqueueSelfA1(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3148
void finishCUDAP2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3237
Definition: Vector.h:72
#define DrudeAtom
Definition: structures.h:23
static void send_contributeHostDeviceLDB(int peSetLen, int *peSet)
Definition: WorkDistrib.C:3508
#define HydrogenAtom
Definition: structures.h:16
SimParameters * simParameters
Definition: Node.h:181
void loadComputeMap(const char *fname)
Definition: ComputeMap.C:276
Bool CUDASOAintegrateMode
void createHomePatch(PatchID pid, FullAtomList &a)
Definition: PatchMgr.C:74
void setSendSpanning()
Definition: ProxyMgr.C:361
void sendAtoms(PatchID pid, FullAtomList &a)
Definition: PatchMgr.C:157
TopoManager tmgr
Definition: WorkDistrib.C:1746
float Real
Definition: common.h:118
void enqueueExcls(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3080
#define DebugM(x, y)
Definition: Debug.h:75
void enqueueBonds(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3086
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
#define ALLBUTME
Definition: Communicate.h:14
BigReal z
Definition: Vector.h:74
void enqueueAniso(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3116
int packSize(void)
Definition: PatchMap.C:314
void enqueueSelfB1(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3164
#define FALSE
Definition: common.h:127
Position position
Definition: NamdTypes.h:77
void enqueueWorkB1(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3196
static void messageEnqueueWork(Compute *)
Definition: WorkDistrib.C:2852
static void peOrderingReady()
Definition: WorkDistrib.C:171
std::ostream & iWARN(std::ostream &s)
Definition: InfoStream.C:82
int operator==(const nodesort &o) const
Definition: WorkDistrib.C:1613
MIStream * get(char &data)
Definition: MStream.h:29
int index_a(int pid) const
Definition: PatchMap.h:86
#define iout
Definition: InfoStream.h:51
int sizeGrid(ScaledPosition xmin, ScaledPosition xmax, const Lattice &lattice, BigReal patchSize, double maxNumPatches, int staticAtomAssignment, int asplit, int bsplit, int csplit)
Definition: PatchMap.C:62
Velocity velocity
Definition: NamdTypes.h:201
ComputeID storeCompute(int node, int maxPids, ComputeType type, int partition=-1, int numPartitions=0)
Definition: ComputeMap.C:151
int num_atoms(void)
Definition: PDB.C:323
Patch * patch(PatchID pid)
Definition: PatchMap.h:244
void enqueueSelfA3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3158
uint32 groupFixed
Definition: NamdTypes.h:159
int add(const Elem &elem)
Definition: ResizeArray.h:101
bool operator()(int pe1, int pe2) const
Definition: WorkDistrib.C:1969
Bool pairInteractionOn
Molecule stores the structural information for the system.
Definition: Molecule.h:175
NAMD_HOST_DEVICE int b_p() const
Definition: Lattice.h:290
void movePatch(PatchID, NodeID)
Definition: PatchMgr.C:84
int getGBISPhase(void)
Definition: Compute.h:66
LocalWorkMsg *const localWorkMsg
Definition: Compute.h:46
void patchMapInit(void)
Definition: WorkDistrib.C:1226
void recvComputeMapChanges(ComputeMapChangeMsg *)
Definition: WorkDistrib.C:375
int allocateCids()
Definition: ComputeMap.C:141
int atomsInGroup
Definition: Hydrogen.h:19
WaterModel watmodel
#define MACHINE_PROGRESS
int gridsize_c(void) const
Definition: PatchMap.h:66
uint32 id
Definition: NamdTypes.h:156
Compute * compute
Definition: WorkDistrib.h:33
char * patchMapData
Definition: WorkDistrib.C:1096
char newNumPartitions(ComputeID cid)
Definition: ComputeMap.h:141
void unpack(char *buf)
Definition: PatchMap.C:365
Charge charge
Definition: NamdTypes.h:78
virtual void doWork()
Definition: Compute.C:120
void reorder(Elem *a, int n)
Definition: Random.h:231
HydrogenGroup hydrogenGroup
Definition: Molecule.h:673
void enqueueCUDA(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3220
void sendComputeMap(void)
Definition: WorkDistrib.C:1199
void enqueueWorkB2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3201
void read_binary_file(const char *fname, Vector *data, int n)
Definition: NamdOneTools.C:52
int numNodes()
Definition: Node.h:192
void enqueueCUDAP2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3223
void assignBaseNode(PatchID, NodeID)
Definition: PatchMap.C:472
static void recursive_bisect_coord(int x_begin, int x_end, int y_begin, int y_end, int *pe_begin, ScaledPosition *coord, int *result, int ydim)
Definition: WorkDistrib.C:273
void newCid(int pid, int cid)
Definition: PatchMap.C:512
constexpr int getWaterModelGroupSize(const WaterModel &watmodel)
Definition: common.h:228
void enqueueSelfB3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3174
int coord(int pe, int dim)
Definition: WorkDistrib.C:1960
int gridsize_a(void) const
Definition: PatchMap.h:64
#define order
Definition: PmeRealSpace.C:235
Definition: Random.h:37
int numPatches(void) const
Definition: PatchMap.h:59
static NAMD_HOST_DEVICE int offset_b(int i)
Definition: Lattice.h:264
void enqueueWorkC(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3214
pe_sortop_bit_reversed(int *r)
Definition: WorkDistrib.C:145
Bool is_drude(int)
void reinitAtoms(const char *basename=0)
Definition: WorkDistrib.C:1073
int operator<(const nodesort &o) const
Definition: WorkDistrib.C:1622
void enqueueThole(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3110
void enqueueWorkA2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3185
NAMD_HOST_DEVICE Position apply_transform(Position data, const Transform &t) const
Definition: Lattice.h:137
void createHomePatches(void)
Definition: WorkDistrib.C:977
Compute * compute
Definition: WorkDistrib.h:27
void NAMD_bug(const char *err_msg)
Definition: common.C:195
static NAMD_HOST_DEVICE int offset_c(int i)
Definition: Lattice.h:265
void enqueueImpropers(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3104
BigReal min_c(int pid) const
Definition: PatchMap.h:95
static int eventMachineProgress
Definition: WorkDistrib.C:101
int32 migrationGroupSize
Definition: NamdTypes.h:220
AtomID atomID
Definition: Hydrogen.h:16
Real langevin_param(int atomnum) const
Definition: Molecule.h:1383
Index atomvdwtype(int anum) const
Definition: Molecule.h:1127
BigReal langevinDamping
int numaway_c(void) const
Definition: PatchMap.h:70
void enqueueLCPO(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3143
int16 vdwType
Definition: NamdTypes.h:79
void sendMovePatches()
Definition: PatchMgr.C:110
void pack(MOStream *msg)
Definition: ComputeMap.C:61
int oneOrTwoAwayNeighbors(int pid, PatchID *neighbor_ids, PatchID *downstream_ids=0, int *transform_ids=0)
Definition: PatchMap.C:579
int index_b(int pid) const
Definition: PatchMap.h:87
Bool staticAtomAssignment
pe_sortop_coord_y(ScaledPosition *s)
Definition: WorkDistrib.C:267
int Bool
Definition: common.h:142
Bool replicaUniformPatchGrids
bool operator()(int a, int b) const
Definition: WorkDistrib.C:260
#define COMPUTEMAPTAG
Definition: common.h:184
int priority(void)
Definition: Compute.h:65
void finishCUDA(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3234
uint8 partition
Definition: NamdTypes.h:80
~WorkDistrib(void)
Definition: WorkDistrib.C:122
BigReal x
Definition: Vector.h:74
uint8 hydrogenGroupSize
Definition: NamdTypes.h:88
void get_extremes(ScaledPosition &xmin, ScaledPosition &xmax) const
Definition: PDB.h:104
int numaway_a(void) const
Definition: PatchMap.h:68
NAMD_HOST_DEVICE int a_p() const
Definition: Lattice.h:289
NAMD_HOST_DEVICE Vector a_r() const
Definition: Lattice.h:284
NAMD_HOST_DEVICE Vector b_r() const
Definition: Lattice.h:285
int numAtoms
Definition: Molecule.h:585
void setNewNode(ComputeID cid, NodeID node)
Definition: ComputeMap.h:120
virtual void finishPatch(int)
Definition: Compute.C:124
NAMD_HOST_DEVICE Position nearest(Position data, ScaledPosition ref) const
Definition: Lattice.h:95
void NAMD_die(const char *err_msg)
Definition: common.C:147
PDB * pdb
Definition: Node.h:183
void enqueueCUDAP3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3226
static int * peDiffuseOrderingIndex
Definition: WorkDistrib.h:116
BigReal min_a(int pid) const
Definition: PatchMap.h:91
NAMD_HOST_DEVICE Vector c_r() const
Definition: Lattice.h:286
Real atommass(int anum) const
Definition: Molecule.h:1107
static int compare_bit_reversed(int a, int b)
Definition: WorkDistrib.C:125
ConfigList * configList
Definition: Node.h:182
void enqueueWorkA1(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3180
#define BUFSIZE
Definition: Communicate.h:15
Bool pressureProfileEwaldOn
std::vector< std::string > split(const std::string &text, std::string delimiter)
Definition: MoleculeQM.C:74
static int * peDiffuseOrdering
Definition: WorkDistrib.h:115
void makePatches(ScaledPosition xmin, ScaledPosition xmax, const Lattice &lattice, BigReal patchSize, double maxNumPatches, int staticAtomAssignment, int replicaUniformPatchGrids, int lcpo, int asplit, int bsplit, int csplit)
Definition: PatchMap.C:171
int basenode(int pid) const
Definition: PatchMap.h:117
int index_c(int pid) const
Definition: PatchMap.h:88
unsigned char get_fep_type(int anum) const
Definition: Molecule.h:1432
static int peOrderingInit
Definition: WorkDistrib.h:114
void sendPatchMap(void)
Definition: WorkDistrib.C:1099
void find_extremes(const Lattice &, BigReal frac=1.0)
Definition: PDB.C:437
void saveComputeMapChanges(int, CkGroupID)
Definition: WorkDistrib.C:357
int32 status
Atom status bit fields defined in structures.h.
Definition: NamdTypes.h:217
void finishCUDAPatch(FinishWorkMsg *msg)
Definition: WorkDistrib.C:3230
void savePatchMap(PatchMapMsg *msg)
Definition: WorkDistrib.C:1135
void cuda_initialize()
Definition: DeviceCUDA.C:27
void topo_getargs(char **argv)
Definition: WorkDistrib.C:91
static int * peCompactOrderingIndex
Definition: WorkDistrib.h:118
static void buildNodeAwarePeOrdering(void)
Definition: WorkDistrib.C:181
uint8 nonbondedGroupSize
Definition: NamdTypes.h:81
patch_sortop_curve_a(PatchMap *m)
Definition: WorkDistrib.C:2036
Atom * getAtoms() const
Definition: Molecule.h:519
int myid()
Definition: Node.h:191
BigReal initialTemp
Real rigidBondLength
Definition: NamdTypes.h:221
int pressureProfileAtomTypes
#define simParams
Definition: Output.C:129
int atomsInMigrationGroup
Definition: Hydrogen.h:29
static int randtopo
Definition: WorkDistrib.C:85
void newPid(ComputeID cid, int pid, int trans=13)
Definition: ComputeMap.C:196
static void send_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2)
Definition: WorkDistrib.C:3521
ScaledPosition * spos
Definition: WorkDistrib.C:258
static NAMD_HOST_DEVICE int offset_a(int i)
Definition: Lattice.h:263
iterator begin(void)
Definition: ResizeArray.h:36
BigReal max_b(int pid) const
Definition: PatchMap.h:94
void mapComputes(void)
Definition: WorkDistrib.C:2395
char * data
Definition: ConfigList.h:48
void enqueueSelfA2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3153
static ComputeMap * Object()
Definition: ComputeMap.h:89
uint8 isWater
Definition: NamdTypes.h:89
static void build_ordering(void *)
Definition: WorkDistrib.C:87
BigReal y
Definition: Vector.h:74
int numaway_b(void) const
Definition: PatchMap.h:69
Mass mass
Definition: NamdTypes.h:208
void distributeHomePatches(void)
Definition: WorkDistrib.C:1051
void assignNode(PatchID, NodeID)
Definition: PatchMap.C:465
patch_sortop_curve_c(PatchMap *m)
Definition: WorkDistrib.C:2078
void mic_initialize()
#define PDBVELFACTOR
Definition: common.h:57
Bool pairInteractionSelf
BigReal max_c(int pid) const
Definition: PatchMap.h:96
int type()
Definition: Compute.h:48
void enqueueSelfB2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3169
BigReal patchDimension
int gridsize_b(void) const
Definition: PatchMap.h:65
int numPatchesOnNode(int node)
Definition: PatchMap.h:60
void initHostDeviceLDB()
Definition: WorkDistrib.C:3502
MOStream * put(char data)
Definition: MStream.h:112
static void send_initHostDeviceLDB()
Definition: WorkDistrib.C:3495
FullAtomList * createAtomLists(const char *basename=0)
Definition: WorkDistrib.C:652
#define LonepairAtom
Definition: structures.h:22
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
pe_sortop_coord_x(ScaledPosition *s)
Definition: WorkDistrib.C:259
void enqueueDihedrals(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3098
int node(int pid) const
Definition: PatchMap.h:114
Bool is_atom_fixed(int atomnum) const
Definition: Molecule.h:1499
void finishMIC(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3247
StringList * find(const char *name) const
Definition: ConfigList.C:341
void contributeHostDeviceLDB(int peSetLen, int *peSet)
Definition: WorkDistrib.C:3515
void pack(char *buf, int size)
Definition: PatchMap.C:328
#define RIGID_NONE
Definition: SimParameters.h:79
int isOutputProcessor(int pe)
uint32 atomFixed
Definition: NamdTypes.h:158
void doneSaveComputeMap(CkReductionMsg *)
Definition: WorkDistrib.C:428
void unpack(MIStream *msg)
Definition: ComputeMap.C:68
Real rigid_bond_length(int atomnum) const
Definition: Molecule.h:1545
double recipMass
Definition: NamdTypes.h:203
int64_t int64
Definition: common.h:39
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23
void get_all_positions(Vector *)
Definition: PDB.C:365
BigReal min_b(int pid) const
Definition: PatchMap.h:93
int32 PatchID
Definition: NamdTypes.h:277
pe_sortop_topo(TopoManagerWrapper &t, int *d)
Definition: WorkDistrib.C:1968
WaterModel
Definition: common.h:221
Molecule * molecule
Definition: Node.h:179
void coords(int pe, int *crds)
Definition: WorkDistrib.C:1946
void enqueueWorkB3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3206
#define TRUE
Definition: common.h:128
int get_mother_atom(int)
const ComputeID cid
Definition: Compute.h:43
NAMD_HOST_DEVICE Vector origin() const
Definition: Lattice.h:278
bool operator()(int a, int b) const
Definition: WorkDistrib.C:268
Bool noPatchesOnOutputPEs
int fixpe(int pe)
Definition: WorkDistrib.C:1750
int * sortAndSplit(int *node_begin, int *node_end, int splitdim)
Definition: WorkDistrib.C:1981
ScaledPosition * spos
Definition: WorkDistrib.C:266
void sortAtomsForPatches(int *order, int *breaks, const FullAtom *atoms, int nmgrps, int natoms, int ni, int nj, int nk)
Definition: SortAtoms.C:135
double BigReal
Definition: common.h:123
Transform transform
Definition: NamdTypes.h:219
bool operator()(int p1, int p2) const
Definition: WorkDistrib.C:2079
#define PDBVELINVFACTOR
Definition: common.h:58
void assignNodeToPatch(void)
Definition: WorkDistrib.C:1444
int getPatchesInOctet(int pid, PatchID *pids, int *transform_ids=0)
Definition: PatchMap.C:634
NodeID newNode(ComputeID cid)
Definition: ComputeMap.h:116