NAMD
WorkDistrib.C
Go to the documentation of this file.
1 
7 /*****************************************************************************
8  * $Source: /home/cvs/namd/cvsroot/namd2/src/WorkDistrib.C,v $
9  * $Author: jim $
10  * $Date: 2017/03/30 20:06:17 $
11  * $Revision: 1.1291 $
12  *****************************************************************************/
13 
20 #include <stdio.h>
21 
22 #include "ComputeOneFourNbTholes.h"
23 #include "InfoStream.h"
24 #include "Communicate.h"
25 #include "ProcessorPrivate.h"
26 #include "BOCgroup.h"
27 #include "WorkDistrib.decl.h"
28 #include "WorkDistrib.h"
29 #include "Lattice.h"
30 #include "ComputeMsmMsa.h" // needed for MsmMsaData definition
31 #include "main.decl.h"
32 #include "main.h"
33 #include "Node.h"
34 #include "PatchMgr.h"
35 #include "PatchMap.inl"
36 #include "NamdTypes.h"
37 #include "PDB.h"
38 #include "SimParameters.h"
39 #include "Parameters.h"
40 #include "Molecule.h"
41 #include "NamdOneTools.h"
42 #include "Compute.h"
43 #include "ComputeMap.h"
44 #include "RecBisection.h"
45 #include "Random.h"
46 #include "varsizemsg.h"
47 #include "ProxyMgr.h"
48 #include "Priorities.h"
49 #include "SortAtoms.h"
50 #include <algorithm>
51 #include "TopoManager.h"
52 #include "ComputePmeCUDAMgr.h"
53 #include "ConfigList.h"
54 
55 #include "DeviceCUDA.h"
56 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
57 #ifdef WIN32
58 #define __thread __declspec(thread)
59 #endif
60 extern __thread DeviceCUDA *deviceCUDA;
61 #endif
62 
63 //#define DEBUGM
64 #define MIN_DEBUG_LEVEL 2
65 #include "Debug.h"
66 #ifdef MEM_OPT_VERSION
67 extern int isOutputProcessor(int);
68 #endif
69 class ComputeMapChangeMsg : public CMessage_ComputeMapChangeMsg
70 {
71 public:
72 
75  int *newNodes;
77 
78 // VARSIZE_DECL(ComputeMapChangeMsg);
79 };
80 
81 /*
82 VARSIZE_MSG(ComputeMapChangeMsg,
83  VARSIZE_ARRAY(newNodes);
84 )
85 */
86 
87 static int randtopo;
88 
89 static void build_ordering(void *) {
91 }
92 
93 void topo_getargs(char **argv) {
94  randtopo = CmiGetArgFlag(argv, "+randtopo");
95  if ( CkMyPe() >= CkNumPes() ) return;
96 #if CCD_COND_FN_EXISTS
97  CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdCondFn)build_ordering, (void*)0);
98 #else
99  CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdVoidFn)build_ordering, (void*)0);
100 #endif
101 }
102 
104 
105 //======================================================================
106 // Public functions
107 //----------------------------------------------------------------------
109 {
110  CkpvAccess(BOCclass_group).workDistrib = thisgroup;
111  patchMapArrived = false;
112  computeMapArrived = false;
113 
114 #if CMK_SMP
115 #define MACHINE_PROGRESS
116 #else
117 #define MACHINE_PROGRESS { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }
118  if ( CkMyNodeSize() > 1 ) NAMD_bug("CkMyNodeSize() > 1 for non-smp build");
119  eventMachineProgress = traceRegisterUserEvent("CmiMachineProgressImpl",233);
120 #endif
121 }
122 
123 //----------------------------------------------------------------------
125 { }
126 
127 static int compare_bit_reversed(int a, int b) {
128  int d = a ^ b;
129  int c = 1;
130  if ( d ) while ( ! (d & c) ) {
131  c = c << 1;
132  }
133  return (a & c) - (b & c);
134 }
135 
136 static bool less_than_bit_reversed(int a, int b) {
137  int d = a ^ b;
138  int c = 1;
139  if ( d ) while ( ! (d & c) ) {
140  c = c << 1;
141  }
142  return d && (b & c);
143 }
144 
148  inline bool operator() (int a, int b) const {
149  int c = compare_bit_reversed(CmiRankOf(a),CmiRankOf(b));
150  if ( c < 0 ) return true;
151  if ( c > 0 ) return false;
153  rankInPhysOfNode[CmiNodeOf(a)],rankInPhysOfNode[CmiNodeOf(b)]);
154  if ( c < 0 ) return true;
155  if ( c > 0 ) return false;
156  c = compare_bit_reversed(CmiPhysicalNodeID(a),CmiPhysicalNodeID(b));
157  return ( c < 0 );
158  }
159 };
160 
166 
167 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
168 extern void cuda_initialize();
169 #endif
170 
171 void mic_initialize();
172 
174  //CkPrintf("WorkDistrib::peOrderingReady on %d\n", CkMyPe());
175 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
176  cuda_initialize();
177 #endif
178 #ifdef NAMD_MIC
179  mic_initialize();
180 #endif
181 }
182 
184 
185  CmiMemLock();
186  if ( ! peOrderingInit ) {
187  //CkPrintf("WorkDistrib::buildNodeAwarePeOrdering on pe %d\n", CkMyPe());
188 
189  const int numPhys = CmiNumPhysicalNodes();
190  const int numNode = CmiNumNodes();
191  const int numPe = CmiNumPes();
192  ResizeArray<int> numNodeInPhys(numPhys);
193  ResizeArray<int> rankInPhysOfNode(numNode);
194 
195  peDiffuseOrdering = new int[numPe];
196  peDiffuseOrderingIndex = new int[numPe];
197  peCompactOrdering = new int[numPe];
198  peCompactOrderingIndex = new int[numPe];
199 
200  int k = 0;
201  for ( int ph=0; ph<numPhys; ++ph ) {
202  int *pes, npes;
203  CmiGetPesOnPhysicalNode(ph, &pes, &npes);
204  for ( int i=0; i<npes; ++i, ++k ) {
205  peCompactOrdering[k] = pes[i];
206  }
207  numNodeInPhys[ph] = 0;
208  for ( int i=0, j=0; i<npes; i += CmiNodeSize(CmiNodeOf(pes[i])), ++j ) {
209  rankInPhysOfNode[CmiNodeOf(pes[i])] = j;
210  numNodeInPhys[ph] += 1;
211  }
212  }
213 
214  if ( randtopo && numPhys > 2 ) {
215  if ( ! CkMyNode() ) {
216  iout << iWARN << "RANDOMIZING PHYSICAL NODE ORDERING\n" << endi;
217  }
218  ResizeArray<int> randPhysOrder(numPhys);
219  for ( int j=0; j<numPhys; ++j ) {
220  randPhysOrder[j] = j;
221  }
222  Random(314159265).reorder(randPhysOrder.begin()+2, numPhys-2);
223  for ( int j=0, k=0; j<numPhys; ++j ) {
224  const int ph = randPhysOrder[j];
225  int *pes, npes;
226  CmiGetPesOnPhysicalNode(ph, &pes, &npes);
227  for ( int i=0; i<npes; ++i, ++k ) {
228  peCompactOrdering[k] = pes[i];
229  }
230  }
231  }
232 
233  for ( int i=0; i<numPe; ++i ) {
234  peDiffuseOrdering[i] = i;
235  }
236  std::sort(peDiffuseOrdering, peDiffuseOrdering+numPe,
237  pe_sortop_bit_reversed(rankInPhysOfNode.begin()));
238 
239  for ( int i=0; i<numPe; ++i ) {
242  }
243 
244  if ( 0 && CmiMyNode() == 0 ) for ( int i=0; i<numPe; ++i ) {
245  CkPrintf("order %5d %5d %5d %5d %5d\n", i,
250  }
251 
252  peOrderingInit = 1;
253  }
254  CmiMemUnlock();
255  peOrderingReady();
256 
257 }
258 
262  inline bool operator() (int a, int b) const {
263  return ( spos[a].x < spos[b].x );
264  }
265 };
266 
270  inline bool operator() (int a, int b) const {
271  return ( spos[a].y < spos[b].y );
272  }
273 };
274 
276  int x_begin, int x_end, int y_begin, int y_end,
277  int *pe_begin, ScaledPosition *coord,
278  int *result, int ydim
279  ) {
280  int x_len = x_end - x_begin;
281  int y_len = y_end - y_begin;
282  if ( x_len == 1 && y_len == 1 ) {
283  // done, now put this pe in the right place
284  if ( 0 ) CkPrintf("pme %5d %5d on pe %5d at %f %f\n", x_begin, y_begin, *pe_begin,
285  coord[*pe_begin].x, coord[*pe_begin].y);
286  result[x_begin*ydim + y_begin] = *pe_begin;
287  return;
288  }
289  int *pe_end = pe_begin + x_len * y_len;
290  if ( x_len >= y_len ) {
291  std::sort(pe_begin, pe_end, pe_sortop_coord_x(coord));
292  int x_split = x_begin + x_len / 2;
293  int* pe_split = pe_begin + (x_split - x_begin) * y_len;
294  //CkPrintf("x_split %5d %5d %5d\n", x_begin, x_split, x_end);
295  recursive_bisect_coord(x_begin, x_split, y_begin, y_end, pe_begin, coord, result, ydim);
296  recursive_bisect_coord(x_split, x_end, y_begin, y_end, pe_split, coord, result, ydim);
297  } else {
298  std::sort(pe_begin, pe_end, pe_sortop_coord_y(coord));
299  int y_split = y_begin + y_len / 2;
300  int* pe_split = pe_begin + (y_split - y_begin) * x_len;
301  //CkPrintf("y_split %5d %5d %5d\n", y_begin, y_split, y_end);
302  recursive_bisect_coord(x_begin, x_end, y_begin, y_split, pe_begin, coord, result, ydim);
303  recursive_bisect_coord(x_begin, x_end, y_split, y_end, pe_split, coord, result, ydim);
304  }
305 }
306 
307 void WorkDistrib::sortPmePes(int *pmepes, int xdim, int ydim) {
308  int numpes = CkNumPes();
309  ResizeArray<int> count(numpes);
310  ResizeArray<ScaledPosition> sumPos(numpes);
311  ResizeArray<ScaledPosition> avgPos(numpes);
312  for ( int i=0; i<numpes; ++i ) {
313  count[i] = 0;
314  sumPos[i] = 0;
315  avgPos[i] = 0;
316  }
317  PatchMap *patchMap = PatchMap::Object();
318  for ( int i=0, npatches=patchMap->numPatches(); i<npatches; ++i ) {
319  int pe = patchMap->node(i);
320  count[pe] += 1;
321  sumPos[pe] += patchMap->center(i);
322  }
323  const int npmepes = xdim*ydim;
324  ResizeArray<int> sortpes(npmepes);
325  for ( int i=0; i<npmepes; ++i ) {
326  int pe = sortpes[i] = pmepes[i];
327  int cnt = count[pe];
328  ScaledPosition sum = sumPos[pe];
329  if ( cnt == 0 ) {
330  // average over node
331  int node = CkNodeOf(pe);
332  int nsize = CkNodeSize(node);
333  int pe2 = CkNodeFirst(node);
334  for ( int j=0; j<nsize; ++j, ++pe2 ) {
335  cnt += count[pe2];
336  sum += sumPos[pe2];
337  }
338  }
339  if ( cnt == 0 ) {
340  // average over physical node
341  int node = CmiPhysicalNodeID(pe);
342  int nsize, *nlist;
343  CmiGetPesOnPhysicalNode(node, &nlist, &nsize);
344  for ( int j=0; j<nsize; ++j ) {
345  int pe2 = nlist[j];
346  cnt += count[pe2];
347  sum += sumPos[pe2];
348  }
349  }
350  if ( cnt ) {
351  avgPos[pe] = sum / cnt;
352  }
353  }
354  recursive_bisect_coord(0, xdim, 0, ydim, sortpes.begin(), avgPos.begin(), pmepes, ydim);
355 }
356 
357 
358 //----------------------------------------------------------------------
359 void WorkDistrib::saveComputeMapChanges(int ep, CkGroupID chareID)
360 {
361  saveComputeMapReturnEP = ep;
362  saveComputeMapReturnChareID = chareID;
363 
364  ComputeMapChangeMsg *mapMsg = new (0, 0, 0) ComputeMapChangeMsg;
365  CProxy_WorkDistrib(thisgroup).recvComputeMapChanges(mapMsg);
366 
367 /*
368  // store the latest compute map
369  SimParameters *simParams = Node::Object()->simParameters;
370  if (simParams->storeComputeMap) {
371  computeMap->saveComputeMap(simParams->computeMapFilename);
372  CkPrintf("ComputeMap has been stored in %s.\n", simParams->computeMapFilename);
373  }
374 */
375 }
376 
378 
379  delete msg;
380 
381  ComputeMap *computeMap = ComputeMap::Object();
382 
383  int i;
384  int nc = computeMap->numComputes();
385 
386  if ( ! CkMyPe() ) { // send
387  // CkPrintf("At %f on %d WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
388  MOStream *msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, COMPUTEMAPTAG, BUFSIZE);
389  msg->put(nc);
390  for (i=0; i<nc; i++) {
391  int data = computeMap->newNode(i);
392  msg->put(data);
393  }
394  msg->put(nc);
395  for (i=0; i<nc; i++) {
396  char data = computeMap->newNumPartitions(i);
397  msg->put(data);
398  }
399  msg->put(nc);
400  msg->end();
401  delete msg;
402  // CkPrintf("At %f on %d done WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
403  } else if ( ! CkMyRank() ) { // receive
404  // if ( CkMyNode() == 1 ) CkPrintf("At %f on %d WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
405  MIStream *msg = CkpvAccess(comm)->newInputStream(0, COMPUTEMAPTAG);
406  msg->get(i);
407  if ( i != nc ) NAMD_bug("WorkDistrib::recvComputeMapChanges check 1 failed\n");
408  for (i=0; i<nc; i++) {
409  int data;
410  msg->get(data);
411  computeMap->setNewNode(i,data);
412  }
413  msg->get(i);
414  if ( i != nc ) NAMD_bug("WorkDistrib::recvComputeMapChanges check 2 failed\n");
415  for (i=0; i<nc; i++) {
416  char data;
417  msg->get(data);
418  computeMap->setNewNumPartitions(i,data);
419  }
420  msg->get(i);
421  if ( i != nc ) NAMD_bug("WorkDistrib::recvComputeMapChanges check 3 failed\n");
422  delete msg;
423  // if ( CkMyNode() == 1 ) CkPrintf("At %f on %d done WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
424  }
425 
426  CkCallback cb(CkIndex_WorkDistrib::doneSaveComputeMap(NULL), 0, thisgroup);
427  contribute(0, NULL, CkReduction::random, cb);
428 }
429 
430 void WorkDistrib::doneSaveComputeMap(CkReductionMsg *msg) {
431  delete msg;
432 
433  CkSendMsgBranch(saveComputeMapReturnEP, CkAllocMsg(0,0,0), 0, saveComputeMapReturnChareID);
434 }
435 
436 #ifdef MEM_OPT_VERSION
437 //All basic info already exists for each atom inside the FullAtomList because
438 //it is loaded when reading the binary per-atom file. This function will fill
439 //the info regarding transform, nonbondedGroupSize etc. Refer to
440 //WorkDistrib::createAtomLists
441 void WorkDistrib::fillAtomListForOnePatch(int pid, FullAtomList &alist){
442  PatchMap *patchMap = PatchMap::Object();
443 
444  ScaledPosition center(0.5*(patchMap->min_a(pid)+patchMap->max_a(pid)),
445  0.5*(patchMap->min_b(pid)+patchMap->max_b(pid)),
446  0.5*(patchMap->min_c(pid)+patchMap->max_c(pid)));
447 
448  int n = alist.size();
449  FullAtom *a = alist.begin();
450 /*
451  //Those options are not supported in MEM_OPT_VERSIOn -Chao Mei
452 //Modifications for alchemical fep
453  Bool alchFepOn = params->alchFepOn;
454  Bool alchThermIntOn = params->alchThermIntOn;
455 //fepe
456  Bool lesOn = params->lesOn;
457  Bool pairInteractionOn = params->pairInteractionOn;
458 
459  Bool pressureProfileTypes = (params->pressureProfileAtomTypes > 1);
460 */
462  const Lattice lattice = params->lattice;
463  Transform mother_transform;
464  for(int j=0; j < n; j++)
465  {
466  int aid = a[j].id;
467  a[j].nonbondedGroupSize = 0; // must be set based on coordinates
468 
469  // a[j].fixedPosition = a[j].position; ParallelIOMgr stores ref coord here.
470 
471  if ( a[j].migrationGroupSize ) {
472  if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
473  Position pos = a[j].position;
474  int mgs = a[j].migrationGroupSize;
475  int c = 1;
476 
477  for ( int k=a[j].hydrogenGroupSize; k<mgs;
478  k+=a[j+k].hydrogenGroupSize ) {
479  pos += a[j+k].position;
480  ++c;
481  }
482 
483  pos *= 1./c;
484  mother_transform = a[j].transform; // should be 0,0,0
485  pos = lattice.nearest(pos,center,&mother_transform);
486  a[j].position = lattice.apply_transform(a[j].position,mother_transform);
487  a[j].transform = mother_transform;
488  } else {
489  a[j].position = lattice.nearest(a[j].position, center, &(a[j].transform));
490  mother_transform = a[j].transform;
491  }
492  } else {
493  a[j].position = lattice.apply_transform(a[j].position,mother_transform);
494  a[j].transform = mother_transform;
495  }
496 
497 /*
498  //Those options are not supported in MEM_OPT_VERSIOn -Chao Mei
499 //Modifications for alchemical fep
500  if ( alchOn || lesOn || pairInteractionOn || pressureProfileTypes) {
501  a[j].partition = molecule->get_fep_type(aid);
502  }
503  else {
504  a[j].partition = 0;
505  }
506 //fepe
507 */
508  a[j].partition = 0;
509 
510  //set langevinParams based on atom status
511  if(params->langevinOn) {
512  BigReal bval = params->langevinDamping;
513  if(!params->langevinHydrogen &&
514  ((a[j].status & HydrogenAtom)!=0)) {
515  bval = 0;
516  }else if ((a[j].status & LonepairAtom)!=0) {
517  bval = 0;
518  }else if ((a[j].status & DrudeAtom)!=0) {
519  bval = params->langevinDamping;
520  }
521  a[j].langevinParam = bval;
522  }
523 
524  }
525 
526  // DH - promote water checking to take place before simulation
527  // Flag the Settle water molecules.
528  int size, allfixed;
529  const WaterModel watmodel = params->watmodel;
530  const int wathgsize = getWaterModelGroupSize(watmodel);
531  const int fixedAtomsOn = params->fixedAtomsOn;
532  const int useSettle = params->useSettle;
533  for(int j=0; j < n; j+=size) {
534  size = a[j].hydrogenGroupSize;
535  if ( ! size ) {
536  NAMD_bug("Mother atom with hydrogenGroupSize of 0!");
537  }
538  allfixed = 1;
539  for (int k = 0; k < size; ++k ) {
540  allfixed = ( allfixed && (a[j+k].atomFixed) );
541  }
542  for (int k = 0; k < size; ++k ) {
543  a[j+k].groupFixed = allfixed ? 1 : 0;
544  }
545  // DH - Set isWater flag for SETTLE water molecules,
546  // based on same condition as used in HomePatch::buildRattleList():
547  // The rigidBondLength of the FIRST water atom is set greater than 0.
548  if (a[j].rigidBondLength > 0) {
549  if (size != wathgsize) {
550  char errmsg[256];
551  sprintf(errmsg,
552  "Water molecule starting with atom %d contains %d atoms "
553  "but the specified water model requires %d atoms.\n",
554  a[j].id+1, size, wathgsize
555  );
556  NAMD_die(errmsg);
557  }
558  int anyfixed = 0;
559  for (int k = 0; k < size; k++) {
560  anyfixed += ( fixedAtomsOn && a[j+k].atomFixed );
561  }
562  if (useSettle && !anyfixed) {
563  for (int k = 0; k < size; k++) {
564  a[j+k].isWater = 1;
565  }
566  }
567  }
568  }
569 
570  if ( params->outputPatchDetails ) {
571  int patchId = pid;
572  int numAtomsInPatch = n;
573  int numFixedAtomsInPatch = 0;
574  int numAtomsInFixedGroupsInPatch = 0;
575  for(int j=0; j < n; j++) {
576  numFixedAtomsInPatch += ( a[j].atomFixed ? 1 : 0 );
577  numAtomsInFixedGroupsInPatch += ( a[j].groupFixed ? 1 : 0 );
578  }
579  iout << "PATCH_DETAILS:"
580  << " on proc " << CkMyPe()
581  << " patch " << patchId
582  << " atoms " << numAtomsInPatch
583  << " fixed_atoms " << numFixedAtomsInPatch
584  << " fixed_groups " << numAtomsInFixedGroupsInPatch
585  << "\n" << endi;
586  }
587 
588 }
589 
590 void WorkDistrib::random_velocities_parallel(BigReal Temp,InputAtomList &inAtoms)
591 {
592  int i, j; // Loop counter
593  BigReal kbT; // Boltzman constant * Temp
594  BigReal randnum; // Random number from -6.0 to 6.0
595  BigReal kbToverM; // sqrt(Kb*Temp/Mass)
597  Bool lesOn = simParams->lesOn;
598  Random vel_random(simParams->randomSeed);
599  int lesReduceTemp = lesOn && simParams->lesReduceTemp;
600  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
601 
602  kbT = Temp*BOLTZMANN;
603  int count=0;
604  int totalAtoms = inAtoms.size();
605  for(i=0;i<totalAtoms;i++)
606  {
607  Real atomMs=inAtoms[i].mass;
608 
609  if (atomMs <= 0.) {
610  kbToverM = 0.;
611  } else {
612  /*
613  * lesOn is not supported in MEM_OPT_VERSION, so the original assignment
614  * is simplified. --Chao Mei
615  */
616  //kbToverM = sqrt(kbT *
617  //( lesOn && structure->get_fep_type(aid) ? tempFactor : 1.0 ) /
618  // atomMs );
619  kbToverM = sqrt(kbT * 1.0 / atomMs);
620  }
621  for (randnum=0.0, j=0; j<12; j++)
622  {
623  randnum += vel_random.uniform();
624  }
625 
626  randnum -= 6.0;
627 
628  inAtoms[i].velocity.x = randnum*kbToverM;
629 
630  for (randnum=0.0, j=0; j<12; j++)
631  {
632  randnum += vel_random.uniform();
633  }
634 
635  randnum -= 6.0;
636 
637  inAtoms[i].velocity.y = randnum*kbToverM;
638 
639  for (randnum=0.0, j=0; j<12; j++)
640  {
641  randnum += vel_random.uniform();
642  }
643 
644  randnum -= 6.0;
645 
646  inAtoms[i].velocity.z = randnum*kbToverM;
647  }
648 }
649 #endif
650 
651 //----------------------------------------------------------------------
652 // This should only be called on node 0.
653 //----------------------------------------------------------------------
655 {
656  int i;
657  StringList *current; // Pointer used to retrieve configuration items
658  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
659  Node *node = nd.ckLocalBranch();
660  PatchMap *patchMap = PatchMap::Object();
661  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
662  PatchMgr *patchMgr = pm.ckLocalBranch();
663  SimParameters *params = node->simParameters;
664  Molecule *molecule = node->molecule;
665  PDB *pdb = node->pdb;
666 
667  int numPatches = patchMap->numPatches();
668  int numAtoms = pdb->num_atoms();
669 
670  Vector *positions = new Position[numAtoms];
671  Vector *velocities = new Velocity[numAtoms];
672 
673  if ( basename ) {
674  if ( params->binaryOutput ) {
675  read_binary_file((std::string(basename)+".coor").c_str(), positions, numAtoms);
676  read_binary_file((std::string(basename)+".vel").c_str(), velocities, numAtoms);
677  } else {
678  PDB coorpdb((std::string(basename)+".coor").c_str());
679  if ( coorpdb.num_atoms() != numAtoms ) {
680  NAMD_die("Incorrect atom count in coordinate pdb file");
681  }
682  coorpdb.get_all_positions(positions);
683  velocities_from_PDB((std::string(basename)+".vel").c_str(), velocities, numAtoms);
684  }
685  } else {
686  pdb->get_all_positions(positions);
687 
688  if ( params->initialTemp < 0.0 ) {
689  Bool binvels=FALSE;
690 
691  // Reading the veolcities from a PDB
692  current = node->configList->find("velocities");
693 
694  if (current == NULL) {
695  current = node->configList->find("binvelocities");
696  binvels = TRUE;
697  }
698 
699  if (!binvels) {
700  velocities_from_PDB(current->data, velocities, numAtoms);
701  }
702  else {
703  velocities_from_binfile(current->data, velocities, numAtoms);
704  }
705  }
706  else {
707  // Random velocities for a given temperature
708  random_velocities(params->initialTemp, molecule, velocities, numAtoms);
709  }
710  }
711 
712  // If COMMotion == no, remove center of mass motion
713  if (!(params->comMove)) {
714  remove_com_motion(velocities, molecule, numAtoms);
715  }
716 
717  FullAtomList *atoms = new FullAtomList[numPatches];
718 
719  const Lattice lattice = params->lattice;
720 
721  if ( params->staticAtomAssignment ) {
722  FullAtomList sortAtoms;
723  for ( i=0; i < numAtoms; i++ ) {
724  HydrogenGroupID &h = molecule->hydrogenGroup[i];
725  if ( ! h.isMP ) continue;
726  FullAtom a;
727  a.id = i;
729  a.position = positions[h.atomID];
730  sortAtoms.add(a);
731  }
732  int *order = new int[sortAtoms.size()];
733  for ( i=0; i < sortAtoms.size(); i++ ) {
734  order[i] = i;
735  }
736  int *breaks = new int[numPatches];
737  sortAtomsForPatches(order,breaks,sortAtoms.begin(),
738  sortAtoms.size(),numAtoms,
739  patchMap->gridsize_c(),
740  patchMap->gridsize_b(),
741  patchMap->gridsize_a());
742 
743  i = 0;
744  for ( int pid = 0; pid < numPatches; ++pid ) {
745  int iend = breaks[pid];
746  for ( ; i<iend; ++i ) {
747  FullAtom &sa = sortAtoms[order[i]];
748  int mgs = sa.migrationGroupSize;
749 /*
750 CkPrintf("patch %d (%d %d %d) has group %d atom %d size %d at %.2f %.2f %.2f\n",
751  pid, patchMap->index_a(pid), patchMap->index_b(pid),
752  patchMap->index_c(pid), order[i], sa.id, mgs,
753  sa.position.x, sa.position.y, sa.position.z);
754 */
755  for ( int k=0; k<mgs; ++k ) {
756  HydrogenGroupID &h = molecule->hydrogenGroup[sa.id + k];
757  int aid = h.atomID;
758  FullAtom a;
759  a.id = aid;
760  a.position = positions[aid];
761  a.velocity = velocities[aid];
762  a.vdwType = molecule->atomvdwtype(aid);
763  a.status = molecule->getAtoms()[aid].status;
764  a.langevinParam = molecule->langevin_param(aid);
765  a.hydrogenGroupSize = h.isGP ? h.atomsInGroup : 0;
767  if(params->rigidBonds != RIGID_NONE) {
768  a.rigidBondLength = molecule->rigid_bond_length(aid);
769  }else{
770  a.rigidBondLength = 0.0;
771  }
772  atoms[pid].add(a);
773  }
774  }
775 CkPrintf("patch %d (%d %d %d) has %d atoms\n",
776  pid, patchMap->index_a(pid), patchMap->index_b(pid),
777  patchMap->index_c(pid), atoms[pid].size());
778  }
779  delete [] order;
780  delete [] breaks;
781  } else
782  {
783  // split atoms into patches based on migration group and position
784  int aid, pid=0;
785  for(i=0; i < numAtoms; i++)
786  {
787  // Assign atoms to patches without splitting hydrogen groups.
788  // We know that the hydrogenGroup array is sorted with group parents
789  // listed first. Thus, only change the pid if an atom is a group parent.
790  HydrogenGroupID &h = molecule->hydrogenGroup[i];
791  aid = h.atomID;
792  FullAtom a;
793  a.id = aid;
794  a.position = positions[aid];
795  a.velocity = velocities[aid];
796  a.vdwType = molecule->atomvdwtype(aid);
797  a.status = molecule->getAtoms()[aid].status;
798  a.langevinParam = molecule->langevin_param(aid);
799  a.hydrogenGroupSize = h.isGP ? h.atomsInGroup : 0;
801  if(params->rigidBonds != RIGID_NONE) {
802  a.rigidBondLength = molecule->rigid_bond_length(aid);
803  }else{
804  a.rigidBondLength = 0.0;
805  }
806  if (h.isMP) {
807  pid = patchMap->assignToPatch(positions[aid],lattice);
808  } // else: don't change pid
809  atoms[pid].add(a);
810  }
811  }
812 
813  delete [] positions;
814  delete [] velocities;
815 
816  for(i=0; i < numPatches; i++)
817  {
818  ScaledPosition center(0.5*(patchMap->min_a(i)+patchMap->max_a(i)),
819  0.5*(patchMap->min_b(i)+patchMap->max_b(i)),
820  0.5*(patchMap->min_c(i)+patchMap->max_c(i)));
821 
822  int n = atoms[i].size();
823  FullAtom *a = atoms[i].begin();
824  int j;
825 //Modifications for alchemical fep
826  Bool alchOn = params->alchOn;
827 //fepe
828  Bool lesOn = params->lesOn;
829 
830  Bool pairInteractionOn = params->pairInteractionOn;
831 
832  Bool pressureProfileTypes = (params->pressureProfileAtomTypes > 1);
833 
834  Transform mother_transform;
835  for(j=0; j < n; j++)
836  {
837  int aid = a[j].id;
838 
839  a[j].nonbondedGroupSize = 0; // must be set based on coordinates
840 
841  a[j].atomFixed = molecule->is_atom_fixed(aid) ? 1 : 0;
842  a[j].fixedPosition = a[j].position;
843 
844  if ( a[j].migrationGroupSize ) {
845  if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
846  Position pos = a[j].position;
847  int mgs = a[j].migrationGroupSize;
848  int c = 1;
849  for ( int k=a[j].hydrogenGroupSize; k<mgs;
850  k+=a[j+k].hydrogenGroupSize ) {
851  pos += a[j+k].position;
852  ++c;
853  }
854  pos *= 1./c;
855  mother_transform = a[j].transform; // should be 0,0,0
856  pos = lattice.nearest(pos,center,&mother_transform);
857  a[j].position = lattice.apply_transform(a[j].position,mother_transform);
858  a[j].transform = mother_transform;
859  } else {
860  a[j].position = lattice.nearest(
861  a[j].position, center, &(a[j].transform));
862  mother_transform = a[j].transform;
863  }
864  } else {
865  a[j].position = lattice.apply_transform(a[j].position,mother_transform);
866  a[j].transform = mother_transform;
867  }
868 
869  a[j].mass = molecule->atommass(aid);
870  // Using double precision division for reciprocal mass.
871  a[j].recipMass = ( a[j].mass > 0 ? (1. / a[j].mass) : 0 );
872  a[j].charge = molecule->atomcharge(aid);
873  if ( params->LJPMEOn ) {
874  const int index = a[j].vdwType;
875  const float scaling = params->nonbondedScaling;
876  float sigma, epsilon, sigma14, epsilon14;
877  molecule->params->get_vdw_params(&sigma, &epsilon, &sigma14, &epsilon14, index);
878  a[j].dispcoef = 2*sigma*sigma*sigma*sqrt(scaling * epsilon);
879  }
880  else {
881  a[j].dispcoef = 0;
882  }
883 
884 //Modifications for alchemical fep
885  if ( alchOn || lesOn || pairInteractionOn || pressureProfileTypes) {
886  a[j].partition = molecule->get_fep_type(aid);
887  }
888  else {
889  a[j].partition = 0;
890  }
891 //fepe
892 
893  }
894 
895 #if 0
896  int size, allfixed, k;
897  for(j=0; j < n; j+=size) {
898  size = a[j].hydrogenGroupSize;
899  if ( ! size ) {
900  NAMD_bug("Mother atom with hydrogenGroupSize of 0!");
901  }
902  allfixed = 1;
903  for ( k = 0; k < size; ++k ) {
904  allfixed = ( allfixed && (a[j+k].atomFixed) );
905  }
906  for ( k = 0; k < size; ++k ) {
907  a[j+k].groupFixed = allfixed ? 1 : 0;
908  }
909  // DH - set isWater flag
910  // based on same condition as used for determining Settle:
911  // The rigidBondLength of the FIRST water atom is set greater than 0.
912  if (a[j].rigidBondLength > 0) {
913  for (k = 0; k < size; k++) {
914  a[j+k].isWater = 1;
915  }
916  }
917  }
918 #else
919  // DH - promote water checking to take place before simulation
920  // Flag the Settle water molecules.
921  int size, allfixed;
922  const WaterModel watmodel = params->watmodel;
923  const int wathgsize = getWaterModelGroupSize(watmodel);
924  const int fixedAtomsOn = params->fixedAtomsOn;
925  const int useSettle = params->useSettle;
926  for(int j=0; j < n; j+=size) {
927  size = a[j].hydrogenGroupSize;
928  if ( ! size ) {
929  NAMD_bug("Mother atom with hydrogenGroupSize of 0!");
930  }
931  allfixed = 1;
932  for (int k = 0; k < size; ++k ) {
933  allfixed = ( allfixed && (a[j+k].atomFixed) );
934  }
935  for (int k = 0; k < size; ++k ) {
936  a[j+k].groupFixed = allfixed ? 1 : 0;
937  }
938  // DH - Set isWater flag for SETTLE water molecules,
939  // based on same condition as used in HomePatch::buildRattleList():
940  // The rigidBondLength of the FIRST water atom is set greater than 0.
941  if (a[j].rigidBondLength > 0) {
942  if (size != wathgsize) {
943  char errmsg[256];
944  sprintf(errmsg,
945  "Water molecule starting with atom %d contains %d atoms "
946  "but the specified water model requires %d atoms.\n",
947  a[j].id+1, size, wathgsize
948  );
949  NAMD_die(errmsg);
950  }
951  int anyfixed = 0;
952  for (int k = 0; k < size; k++) {
953  anyfixed += ( fixedAtomsOn && a[j+k].atomFixed );
954  }
955  if (useSettle && !anyfixed) {
956  for (int k = 0; k < size; k++) {
957  a[j+k].isWater = 1;
958  }
959  }
960  }
961  }
962 #endif
963 
964  if ( params->outputPatchDetails ) {
965  int patchId = i;
966  int numAtomsInPatch = n;
967  int numFixedAtomsInPatch = 0;
968  int numAtomsInFixedGroupsInPatch = 0;
969  for(j=0; j < n; j++) {
970  numFixedAtomsInPatch += ( a[j].atomFixed ? 1 : 0 );
971  numAtomsInFixedGroupsInPatch += ( a[j].groupFixed ? 1 : 0 );
972  }
973  iout << "PATCH_DETAILS:"
974  << " patch " << patchId
975  << " atoms " << numAtomsInPatch
976  << " fixed_atoms " << numFixedAtomsInPatch
977  << " fixed_groups " << numAtomsInFixedGroupsInPatch
978  << "\n" << endi;
979  }
980  }
981 
982  return atoms;
983 
984 }
985 
986 //----------------------------------------------------------------------
987 // This should only be called on node 0.
988 //----------------------------------------------------------------------
990 {
991  int i;
992  PatchMap *patchMap = PatchMap::Object();
993  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
994  PatchMgr *patchMgr = pm.ckLocalBranch();
995 
996  int numPatches = patchMap->numPatches();
997 
998  FullAtomList *atoms = createAtomLists();
999 
1000 #ifdef MEM_OPT_VERSION
1001 /* CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1002  Node *node = nd.ckLocalBranch();
1003  node->molecule->delEachAtomSigs();
1004  node->molecule->delMassChargeSpace();
1005 */
1006 #endif
1007 
1008  int maxAtoms = -1;
1009  int maxPatch = -1;
1010  for(i=0; i < numPatches; i++) {
1011  int numAtoms = atoms[i].size();
1012  if ( numAtoms > maxAtoms ) { maxAtoms = numAtoms; maxPatch = i; }
1013  }
1014  iout << iINFO << "LARGEST PATCH (" << maxPatch <<
1015  ") HAS " << maxAtoms << " ATOMS\n" << endi;
1016 
1017 #ifdef SHOW_HISTOGRAM_HGROUP_SIZES
1018  // histogram hydrogen group sizes
1019  int hgroupsize[9] = { 0 };
1020  int numhgroups = 0;
1021  int numwaters = 0;
1022  int maxhgroupsize = 0;
1023  for (i = 0; i < numPatches; i++) {
1024  const FullAtomList& a = atoms[i];
1025  int numAtoms = a.size();
1026  int hgs = 1; // init to something sane
1027  for (int j = 0; j < numAtoms; j += hgs) {
1028  hgs = a[j].hydrogenGroupSize;
1029  int histndx = ( hgs > 8 ? 8 : hgs );
1030  hgroupsize[ histndx ]++;
1031  numhgroups++;
1032  if (a[j].isWater) numwaters++;
1033  if (maxhgroupsize < hgs) maxhgroupsize = hgs;
1034  }
1035  }
1036  int hgslast = ( maxhgroupsize > 8 ? 8 : maxhgroupsize );
1037  printf("Number of hydrogen groups: %7d\n", numhgroups);
1038  printf("Number of settle water molecules: %7d\n", numwaters);
1039  printf("Number of remaining hydrogen groups: %7d\n", numhgroups - numwaters);
1040  printf("Largest hydrogen group size: %7d\n", maxhgroupsize);
1041  printf("Histogram of hydrogen group sizes:\n");
1042  int hgstotal = 0;
1043  for (i = 0; i <= hgslast; i++) {
1044  printf(" size %d count %d\n", i, hgroupsize[i]);
1045  hgstotal += hgroupsize[i];
1046  }
1047  printf("Checksum over hydrogen group sizes: %7d\n", hgstotal);
1048 #endif
1049 
1050  for(i=0; i < numPatches; i++)
1051  {
1052  if ( ! ( i % 100 ) )
1053  {
1054  DebugM(3,"Created " << i << " patches so far.\n");
1055  }
1056 
1057  patchMgr->createHomePatch(i,atoms[i]);
1058  }
1059 
1060  delete [] atoms;
1061 }
1062 
1064  // ref BOC
1065  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1066  Node *node = nd.ckLocalBranch();
1067  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
1068  PatchMgr *patchMgr = pm.ckLocalBranch();
1069  // ref singleton
1070  PatchMap *patchMap = PatchMap::Object();
1071 
1072  // Move patches to the proper node
1073  for(int i=0;i < patchMap->numPatches(); i++)
1074  {
1075  if (patchMap->node(i) != node->myid() )
1076  {
1077  DebugM(3,"patchMgr->movePatch("
1078  << i << "," << patchMap->node(i) << ")\n");
1079  patchMgr->movePatch(i,patchMap->node(i));
1080  }
1081  }
1082  patchMgr->sendMovePatches();
1083 }
1084 
1085 void WorkDistrib::reinitAtoms(const char *basename) {
1086 
1087  PatchMap *patchMap = PatchMap::Object();
1088  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
1089  PatchMgr *patchMgr = pm.ckLocalBranch();
1090 
1091  int numPatches = patchMap->numPatches();
1092 
1093  FullAtomList *atoms = createAtomLists(basename);
1094 
1095  for(int i=0; i < numPatches; i++) {
1096  patchMgr->sendAtoms(i,atoms[i]);
1097  }
1098 
1099  delete [] atoms;
1100 
1101 }
1102 
1103 
1104 //----------------------------------------------------------------------
1105 
1106 class PatchMapMsg : public CMessage_PatchMapMsg {
1107  public:
1109 };
1110 
1112 {
1113  if ( CkNumPes() == 1 ) {
1114  patchMapArrived = true;
1115  return;
1116  }
1117 
1118  //Automatically enable spanning tree
1119  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1120  Node *node = nd.ckLocalBranch();
1121  SimParameters *params = node->simParameters;
1122  if( ( PatchMap::Object()->numPatches() <= CkNumPes()/4
1123 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1124  || CkNumPes() > CkNumNodes()
1125  ) && ( CkNumNodes() > 1
1126 #endif
1127  ) && params->isSendSpanningTreeUnset() )
1129 
1130 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1131  if ( CkNumPes() > CkNumNodes() && CkNumNodes() > 1
1132  && params->isRecvSpanningTreeUnset() )
1134 #endif
1135 
1136  int size = PatchMap::Object()->packSize();
1137 
1138  PatchMapMsg *mapMsg = new (size, 0) PatchMapMsg;
1139 
1140  PatchMap::Object()->pack(mapMsg->patchMapData, size);
1141 
1142  CProxy_WorkDistrib workProxy(thisgroup);
1143  workProxy[0].savePatchMap(mapMsg);
1144 }
1145 
1146 // saveMaps() is called when the map message is received
1148 {
1149  // Use a resend to forward messages before processing. Otherwise the
1150  // map distribution is slow on many CPUs. We need to use a tree
1151  // rather than a broadcast because some implementations of broadcast
1152  // generate a copy of the message on the sender for each recipient.
1153  // This is because MPI doesn't allow re-use of an outstanding buffer.
1154 
1155  if ( CkMyRank() ) patchMapArrived = true;
1156 
1157  if ( patchMapArrived && CkMyPe() ) {
1159 
1160  //Automatically enable spanning tree
1161  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1162  Node *node = nd.ckLocalBranch();
1163  SimParameters *params = node->simParameters;
1164  if( ( PatchMap::Object()->numPatches() <= CkNumPes()/4
1165 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1166  || CkNumPes() > CkNumNodes()
1167  ) && ( CkNumNodes() > 1
1168 #endif
1169  ) && params->isSendSpanningTreeUnset() )
1171 
1172 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1173  if ( CkNumPes() > CkNumNodes() && CkNumNodes() > 1
1174  && params->isRecvSpanningTreeUnset() )
1176 #endif
1177  }
1178 
1179  if ( patchMapArrived ) {
1180  if ( CkMyRank() + 1 < CkNodeSize(CkMyNode()) ) {
1181  ((CProxy_WorkDistrib(thisgroup))[CkMyPe()+1]).savePatchMap(msg);
1182  } else {
1183  delete msg;
1184  }
1185  return;
1186  }
1187 
1188  patchMapArrived = true;
1189 
1190  int self = CkMyNode();
1191  int range_begin = 0;
1192  int range_end = CkNumNodes();
1193  while ( self != range_begin ) {
1194  ++range_begin;
1195  int split = range_begin + ( range_end - range_begin ) / 2;
1196  if ( self < split ) { range_end = split; }
1197  else { range_begin = split; }
1198  }
1199  int send_near = self + 1;
1200  int send_far = send_near + ( range_end - send_near ) / 2;
1201 
1202  int pids[3];
1203  int npid = 0;
1204  if ( send_far < range_end ) pids[npid++] = CkNodeFirst(send_far);
1205  if ( send_near < send_far ) pids[npid++] = CkNodeFirst(send_near);
1206  pids[npid++] = CkMyPe(); // always send the message to ourselves
1207  CProxy_WorkDistrib(thisgroup).savePatchMap(msg,npid,pids);
1208 }
1209 
1210 
1212 {
1213  if ( CkMyRank() ) return;
1214 
1215  if ( CkNumNodes() == 1 ) {
1216  computeMapArrived = true;
1218  return;
1219  }
1220 
1221  if ( ! CkMyPe() ) { // send
1222  MOStream *msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, COMPUTEMAPTAG, BUFSIZE);
1223  ComputeMap::Object()->pack(msg);
1224  msg->end();
1225  delete msg;
1226  } else if ( ! CkMyRank() ) { // receive
1227  MIStream *msg = CkpvAccess(comm)->newInputStream(0, COMPUTEMAPTAG);
1228  ComputeMap::Object()->unpack(msg);
1229  delete msg;
1230  }
1231 
1232  computeMapArrived = true;
1234 }
1235 
1236 
1237 //----------------------------------------------------------------------
1239 {
1240  PatchMap *patchMap = PatchMap::Object();
1241  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1242  Node *node = nd.ckLocalBranch();
1243  SimParameters *params = node->simParameters;
1244  Lattice lattice = params->lattice;
1245 
1246  BigReal patchSize = params->patchDimension;
1247 
1248 #ifndef MEM_OPT_VERSION
1249  const int totalAtoms = node->pdb->num_atoms();
1250 #else
1251  const int totalAtoms = node->molecule->numAtoms;
1252 #endif
1253 
1254  ScaledPosition xmin, xmax;
1255 
1256  double maxNumPatches = 1.e9; // need to adjust fractional values
1257  if ( params->minAtomsPerPatch > 0 )
1258  maxNumPatches = totalAtoms / params->minAtomsPerPatch;
1259 
1260  DebugM(3,"Mapping patches\n");
1261  if ( lattice.a_p() && lattice.b_p() && lattice.c_p() ) {
1262  xmin = 0.; xmax = 0.;
1263  }
1264  else if ( params->FMAOn || params->MSMOn || params->FMMOn ) {
1265  // Need to use full box for FMA to match NAMD 1.X results.
1266 #if 0
1267  node->pdb->find_extremes(&(xmin.x),&(xmax.x),lattice.a_r());
1268  node->pdb->find_extremes(&(xmin.y),&(xmax.y),lattice.b_r());
1269  node->pdb->find_extremes(&(xmin.z),&(xmax.z),lattice.c_r());
1270 #endif
1271  node->pdb->find_extremes(lattice);
1272  node->pdb->get_extremes(xmin, xmax);
1273 #if 0
1274  printf("+++ center=%.4f %.4f %.4f\n",
1275  lattice.origin().x, lattice.origin().y, lattice.origin().z);
1276  printf("+++ xmin=%.4f xmax=%.4f\n", xmin.x, xmax.x);
1277  printf("+++ ymin=%.4f ymax=%.4f\n", xmin.y, xmax.y);
1278  printf("+++ zmin=%.4f zmax=%.4f\n", xmin.z, xmax.z);
1279 #endif
1280  // Otherwise, this allows a small number of stray atoms.
1281  }
1282  else {
1283 #if 0
1284  node->pdb->find_extremes(&(xmin.x),&(xmax.x),lattice.a_r(),0.9);
1285  node->pdb->find_extremes(&(xmin.y),&(xmax.y),lattice.b_r(),0.9);
1286  node->pdb->find_extremes(&(xmin.z),&(xmax.z),lattice.c_r(),0.9);
1287 #endif
1288  node->pdb->find_extremes(lattice, 1.0);
1289  node->pdb->get_extremes(xmin, xmax);
1290  iout << iINFO << "ORIGINAL ATOMS MINMAX IS " << xmin << " " << xmax << "\n" << endi;
1291  double frac = ( (double)totalAtoms - 10000. ) / (double)totalAtoms;
1292  if ( frac < 0.9 ) { frac = 0.9; }
1293  node->pdb->find_extremes(lattice, frac);
1294  node->pdb->get_extremes(xmin, xmax);
1295  iout << iINFO << "ADJUSTED ATOMS MINMAX IS " << xmin << " " << xmax << "\n" << endi;
1296  }
1297 
1298 #if 0
1299  BigReal origin_shift;
1300  origin_shift = lattice.a_r() * lattice.origin();
1301  xmin.x -= origin_shift;
1302  xmax.x -= origin_shift;
1303  origin_shift = lattice.b_r() * lattice.origin();
1304  xmin.y -= origin_shift;
1305  xmax.y -= origin_shift;
1306  origin_shift = lattice.c_r() * lattice.origin();
1307  xmin.z -= origin_shift;
1308  xmax.z -= origin_shift;
1309 #endif
1310 
1311  // SimParameters default is -1 for unset
1312  int twoAwayX = params->twoAwayX;
1313  int twoAwayY = params->twoAwayY;
1314  int twoAwayZ = params->twoAwayZ;
1315 
1316  // SASA implementation is not compatible with twoAway patches
1317  if (params->LCPOOn && patchSize < 32.4) {
1318  if ( twoAwayX > 0 || twoAwayY > 0 || twoAwayZ > 0 ) {
1319  iout << iWARN << "Ignoring twoAway[XYZ] due to LCPO SASA implementation.\n" << endi;
1320  }
1321  twoAwayX = twoAwayY = twoAwayZ = 0;
1322  }
1323 
1324  // if you think you know what you're doing go right ahead
1325  if ( twoAwayX > 0 ) maxNumPatches = 1.e9;
1326  if ( twoAwayY > 0 ) maxNumPatches = 1.e9;
1327  if ( twoAwayZ > 0 ) maxNumPatches = 1.e9;
1328  if ( params->maxPatches > 0 ) {
1329  maxNumPatches = params->maxPatches;
1330  iout << iINFO << "LIMITING NUMBER OF PATCHES TO " <<
1331  maxNumPatches << "\n" << endi;
1332  }
1333 
1334  int numpes = CkNumPes();
1335  SimParameters *simparam = Node::Object()->simParameters;
1336  if(simparam->simulateInitialMapping) {
1337  numpes = simparam->simulatedPEs;
1338  delete [] patchMap->nPatchesOnNode;
1339  patchMap->nPatchesOnNode = new int[numpes];
1340  memset(patchMap->nPatchesOnNode, 0, numpes*sizeof(int));
1341  }
1342 
1343 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
1344  // for CUDA be sure there are more patches than pes
1345 
1346  int numPatches = patchMap->sizeGrid(
1347  xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
1348  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1349  if ( numPatches < numpes && twoAwayX < 0 ) {
1350  twoAwayX = 1;
1351  numPatches = patchMap->sizeGrid(
1352  xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
1353  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1354  }
1355  if ( numPatches < numpes && twoAwayY < 0 ) {
1356  twoAwayY = 1;
1357  numPatches = patchMap->sizeGrid(
1358  xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
1359  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1360  }
1361  if ( numPatches < numpes && twoAwayZ < 0 ) {
1362  twoAwayZ = 1;
1363  numPatches = patchMap->sizeGrid(
1364  xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
1365  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1366  }
1367  if ( numPatches < numpes ) {
1368  #if defined(NAMD_MIC)
1369  NAMD_die("MIC-enabled NAMD requires at least one patch per thread.");
1370  #else
1371  if (simparam->CUDASOAintegrateMode) {
1372  NAMD_die("GPU-resident NAMD requires at least one patch per thread.");
1373  }
1374  #endif
1375  }
1376  if ( numPatches % numpes && numPatches <= 1.4 * numpes ) {
1377  int exactFit = numPatches - numPatches % numpes;
1378  int newNumPatches = patchMap->sizeGrid(
1379  xmin,xmax,lattice,patchSize,exactFit,params->staticAtomAssignment,
1380  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1381  if ( newNumPatches == exactFit ) {
1382  iout << iINFO << "REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" << endi;
1383  maxNumPatches = exactFit;
1384  }
1385  }
1386 
1387  patchMap->makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,
1388  params->staticAtomAssignment, params->replicaUniformPatchGrids, params->LCPOOn,
1389  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1390 
1391 #else
1392 
1393  int availPes = numpes;
1394  if ( params->noPatchesOnZero && numpes > 1 ) {
1395  availPes -= 1;
1396  if(params->noPatchesOnOne && numpes > 2)
1397  availPes -= 1;
1398  }
1399 #ifdef MEM_OPT_VERSION
1400  if(params->noPatchesOnOutputPEs && numpes - params->numoutputprocs >2)
1401  {
1402  availPes -= params->numoutputprocs;
1403  if ( params->noPatchesOnZero && numpes > 1 && isOutputProcessor(0)){
1404  availPes++;
1405  }
1406  if ( params->noPatchesOnOne && numpes > 2 && isOutputProcessor(1)){
1407  availPes++;
1408  }
1409  }
1410 #endif
1411 
1412  int numPatches = patchMap->sizeGrid(
1413  xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
1414  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1415  if ( ( numPatches > (0.3*availPes) || numPatches > maxNumPatches
1416  ) && twoAwayZ < 0 ) {
1417  twoAwayZ = 0;
1418  numPatches = patchMap->sizeGrid(
1419  xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
1420  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1421  }
1422  if ( ( numPatches > (0.6*availPes) || numPatches > maxNumPatches
1423  ) && twoAwayY < 0 ) {
1424  twoAwayY = 0;
1425  numPatches = patchMap->sizeGrid(
1426  xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
1427  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1428  }
1429  if ( ( numPatches > availPes || numPatches > maxNumPatches
1430  ) && twoAwayX < 0 ) {
1431  twoAwayX = 0;
1432  numPatches = patchMap->sizeGrid(
1433  xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
1434  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1435  }
1436  if ( numPatches > availPes && numPatches <= (1.4*availPes) && availPes <= maxNumPatches ) {
1437  int newNumPatches = patchMap->sizeGrid(
1438  xmin,xmax,lattice,patchSize,availPes,params->staticAtomAssignment,
1439  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1440  if ( newNumPatches <= availPes && numPatches <= (1.4*newNumPatches) ) {
1441  iout << iINFO << "REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" << endi;
1442  maxNumPatches = availPes;
1443  }
1444  }
1445 
1446  patchMap->makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,
1447  params->staticAtomAssignment, params->replicaUniformPatchGrids, params->LCPOOn,
1448  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1449 
1450 #endif
1451 
1452 }
1453 
1454 
1455 //----------------------------------------------------------------------
1457 {
1458  PatchMap *patchMap = PatchMap::Object();
1459  int nNodes = Node::Object()->numNodes();
1460  SimParameters *simparam = Node::Object()->simParameters;
1461  if(simparam->simulateInitialMapping) {
1462  nNodes = simparam->simulatedPEs;
1463  }
1464 
1465 #if (CMK_BLUEGENEP | CMK_BLUEGENEL) && USE_TOPOMAP
1466  TopoManager tmgr;
1467  int numPes = tmgr.getDimNX() * tmgr.getDimNY() * tmgr.getDimNZ();
1468  if (numPes > patchMap->numPatches() && (assignPatchesTopoGridRecBisection() > 0)) {
1469  CkPrintf ("Blue Gene/L topology partitioner finished successfully \n");
1470  }
1471  else
1472 #endif
1473  assignPatchesSpaceFillingCurve();
1474 
1475  int *nAtoms = new int[nNodes];
1476  int numAtoms=0;
1477  int i;
1478  for(i=0; i < nNodes; i++)
1479  nAtoms[i] = 0;
1480 
1481  for(i=0; i < patchMap->numPatches(); i++)
1482  {
1483  // iout << iINFO << "Patch " << i << " has "
1484  // << patchMap->patch(i)->getNumAtoms() << " atoms and "
1485  // << patchMap->patch(i)->getNumAtoms() *
1486  // patchMap->patch(i)->getNumAtoms()
1487  // << " pairs.\n" << endi;
1488 #ifdef MEM_OPT_VERSION
1489  numAtoms += patchMap->numAtoms(i);
1490  nAtoms[patchMap->node(i)] += patchMap->numAtoms(i);
1491 #else
1492  if (patchMap->patch(i)) {
1493  numAtoms += patchMap->patch(i)->getNumAtoms();
1494  nAtoms[patchMap->node(i)] += patchMap->patch(i)->getNumAtoms();
1495  }
1496 #endif
1497  }
1498 
1499  if ( numAtoms != Node::Object()->molecule->numAtoms ) {
1500  for(i=0; i < nNodes; i++)
1501  iout << iINFO << nAtoms[i] << " atoms assigned to node " << i << "\n" << endi;
1502  iout << iINFO << "Assigned " << numAtoms << " atoms but expected " << Node::Object()->molecule->numAtoms << "\n" << endi;
1503  NAMD_die("Incorrect atom count in WorkDistrib::assignNodeToPatch\n");
1504  }
1505 
1506  delete [] nAtoms;
1507 
1508  // PatchMap::Object()->printPatchMap();
1509 }
1510 
1511 //----------------------------------------------------------------------
1512 // void WorkDistrib::assignPatchesSlices()
1513 // {
1514 // int pid;
1515 // int assignedNode = 0;
1516 // PatchMap *patchMap = PatchMap::Object();
1517 // Node *node = CLocalBranch(Node, CkpvAccess(BOCclass_group).node);
1518 
1519 // int *numAtoms = new int[node->numNodes()];
1520 // for (int i=0; i<node->numNodes(); i++) {
1521 // numAtoms[i] = 0;
1522 // }
1523 
1524 // // Assign patch to node with least atoms assigned.
1525 // for(pid=0; pid < patchMap->numPatches(); pid++) {
1526 // assignedNode = 0;
1527 // for (i=1; i < node->numNodes(); i++) {
1528 // if (numAtoms[i] < numAtoms[assignedNode]) assignedNode = i;
1529 // }
1530 // patchMap->assignNode(pid, assignedNode);
1531 // numAtoms[assignedNode] += patchMap->patch(pid)->getNumAtoms();
1532 
1533 // /*
1534 // iout << iINFO << "Patch (" << pid << ") has "
1535 // << patchMap->patch(pid)->getNumAtoms()
1536 // << " atoms: Assigned to Node(" << assignedNode << ")\n"
1537 // << endi;
1538 // */
1539 // }
1540 
1541 // delete[] numAtoms;
1542 // }
1543 
1544 //----------------------------------------------------------------------
1545 void WorkDistrib::assignPatchesToLowestLoadNode()
1546 {
1547  int pid;
1548  int assignedNode = 0;
1549  PatchMap *patchMap = PatchMap::Object();
1550  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1551  Node *node = nd.ckLocalBranch();
1553  int ncpus = node->numNodes();
1554  if(simParams->simulateInitialMapping) {
1555  ncpus = simParams->simulatedPEs;
1556  }
1557 
1558  int *load = new int[ncpus];
1559  int *assignedNodes = new int[patchMap->numPatches()];
1560  for (int i=0; i<ncpus; i++) {
1561  load[i] = 0;
1562  }
1563  CkPrintf("assignPatchesToLowestLoadNode\n");
1564  int defaultNode = 0;
1565  if ( simParams->noPatchesOnZero && ncpus > 1 ){
1566  defaultNode = 1;
1567  if( simParams->noPatchesOnOne && ncpus > 2)
1568  defaultNode = 2;
1569  }
1570  // Assign patch to node with least atoms assigned.
1571  for(pid=0; pid < patchMap->numPatches(); pid++) {
1572  assignedNode = defaultNode;
1573  for (int i=assignedNode + 1; i < ncpus; i++) {
1574  if (load[i] < load[assignedNode]) assignedNode = i;
1575  }
1576  assignedNodes[pid] = assignedNode;
1577 #ifdef MEM_OPT_VERSION
1578  load[assignedNode] += patchMap->numAtoms(pid) + 1;
1579 #else
1580  load[assignedNode] += patchMap->patch(pid)->getNumAtoms() + 1;
1581 #endif
1582  }
1583 
1584  delete[] load;
1585  sortNodesAndAssign(assignedNodes);
1586  delete[] assignedNodes;
1587 }
1588 
1589 //----------------------------------------------------------------------
1590 void WorkDistrib::assignPatchesBitReversal()
1591 {
1592  int pid;
1593  PatchMap *patchMap = PatchMap::Object();
1594  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1595  Node *node = nd.ckLocalBranch();
1596  SimParameters *simparam = node->simParameters;
1597 
1598  int ncpus = node->numNodes();
1599  if(simparam->simulateInitialMapping) {
1600  ncpus = simparam->simulatedPEs;
1601  }
1602  int npatches = patchMap->numPatches();
1603  if ( ncpus <= npatches )
1604  NAMD_bug("WorkDistrib::assignPatchesBitReversal called improperly");
1605 
1606  SortableResizeArray<int> seq(ncpus);
1607  // avoid using node 0 (reverse of 0 is 0 so start at 1)
1608  for ( int i = 1; i < ncpus; ++i ) {
1609  seq[i-1] = peDiffuseOrdering[i];
1610  }
1611 
1612  // extract and sort patch locations
1613  sortNodesAndAssign(seq.begin());
1614  if ( ncpus > 2*npatches ) sortNodesAndAssign(seq.begin()+npatches, 1);
1615 }
1616 
1617 //----------------------------------------------------------------------
1618 struct nodesort {
1619  int node;
1620  int a_total;
1621  int b_total;
1622  int c_total;
1624  nodesort() : node(-1),a_total(0),b_total(0),c_total(0),npatches(0) { ; }
1625  int operator==(const nodesort &o) const {
1626  float a1 = ((float)a_total)/((float)npatches);
1627  float a2 = ((float)o.a_total)/((float)o.npatches);
1628  float b1 = ((float)b_total)/((float)npatches);
1629  float b2 = ((float)o.b_total)/((float)o.npatches);
1630  float c1 = ((float)c_total)/((float)npatches);
1631  float c2 = ((float)o.c_total)/((float)o.npatches);
1632  return ((a1 == a2) && (b1 == b2) && (c1 == c2));
1633  }
1634  int operator<(const nodesort &o) const {
1635  float a1 = ((float)a_total)/((float)npatches);
1636  float a2 = ((float)o.a_total)/((float)o.npatches);
1637  float b1 = ((float)b_total)/((float)npatches);
1638  float b2 = ((float)o.b_total)/((float)o.npatches);
1639  float c1 = ((float)c_total)/((float)npatches);
1640  float c2 = ((float)o.c_total)/((float)o.npatches);
1641  return ( (a1 < a2) || ((a1 == a2) && (b1 < b2)) ||
1642  ((a1 == a2) && (b1 == b2) && (c1 < c2)) );
1643  }
1644 };
1645 
1646 void WorkDistrib::sortNodesAndAssign(int *assignedNode, int baseNodes) {
1647  // if baseNodes is zero (default) then set both nodes and basenodes
1648  // if baseNodes is nonzero then this is a second call to set basenodes only
1649  int i, pid;
1650  PatchMap *patchMap = PatchMap::Object();
1651  int npatches = patchMap->numPatches();
1652  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1653  Node *node = nd.ckLocalBranch();
1654  int nnodes = node->numNodes();
1655  SimParameters *simparam = node->simParameters;
1656  if(simparam->simulateInitialMapping) {
1657  nnodes = simparam->simulatedPEs;
1658  }
1659 
1660  ResizeArray<nodesort> allnodes(nnodes);
1661  for ( i=0; i < nnodes; ++i ) {
1662  allnodes[i].node = i;
1663  }
1664  for ( pid=0; pid<npatches; ++pid ) {
1665  // iout << pid << " " << assignedNode[pid] << "\n" << endi;
1666  allnodes[assignedNode[pid]].npatches++;
1667  allnodes[assignedNode[pid]].a_total += patchMap->index_a(pid);
1668  allnodes[assignedNode[pid]].b_total += patchMap->index_b(pid);
1669  allnodes[assignedNode[pid]].c_total += patchMap->index_c(pid);
1670  }
1671  SortableResizeArray<nodesort> usednodes(nnodes);
1672  usednodes.resize(0);
1673  for ( i=0; i < nnodes; ++i ) {
1674  if ( allnodes[i].npatches ) usednodes.add(allnodes[i]);
1675  }
1676  usednodes.sort();
1677  int i2 = 0;
1678  for ( i=0; i < nnodes; ++i ) {
1679  int pe = peCompactOrdering[i];
1680  if ( allnodes[pe].npatches ) allnodes[usednodes[i2++].node].node = pe;
1681  }
1682 
1683  for ( pid=0; pid<npatches; ++pid ) {
1684  // iout << pid << " " << allnodes[assignedNode[pid]].node << "\n" << endi;
1685  if ( ! baseNodes ) {
1686  patchMap->assignNode(pid, allnodes[assignedNode[pid]].node);
1687  }
1688  patchMap->assignBaseNode(pid, allnodes[assignedNode[pid]].node);
1689  }
1690 }
1691 
1692 //----------------------------------------------------------------------
1693 void WorkDistrib::assignPatchesRoundRobin()
1694 {
1695  int pid;
1696  PatchMap *patchMap = PatchMap::Object();
1697  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1698  Node *node = nd.ckLocalBranch();
1699  SimParameters *simparam = node->simParameters;
1700  int ncpus = node->numNodes();
1701  if(simparam->simulateInitialMapping) {
1702  ncpus = simparam->simulatedPEs;
1703  }
1704  int *assignedNode = new int[patchMap->numPatches()];
1705 
1706  for(pid=0; pid < patchMap->numPatches(); pid++) {
1707  assignedNode[pid] = pid % ncpus;
1708  }
1709 
1710  sortNodesAndAssign(assignedNode);
1711  delete [] assignedNode;
1712 }
1713 
1714 //----------------------------------------------------------------------
1715 void WorkDistrib::assignPatchesRecursiveBisection()
1716 {
1717  PatchMap *patchMap = PatchMap::Object();
1718  int *assignedNode = new int[patchMap->numPatches()];
1720  int numNodes = Node::Object()->numNodes();
1721  if(simParams->simulateInitialMapping) {
1722  numNodes = simParams->simulatedPEs;
1723  }
1724 
1725  int usedNodes = numNodes;
1726  int unusedNodes = 0;
1727  CkPrintf("assignPatchesRecursiveBisection\n");
1728  if ( simParams->noPatchesOnZero && numNodes > 1 ){
1729  usedNodes -= 1;
1730  if(simParams->noPatchesOnOne && numNodes > 2)
1731  usedNodes -= 1;
1732  }
1733  unusedNodes = numNodes - usedNodes;
1734  RecBisection recBisec(usedNodes,PatchMap::Object());
1735  if ( recBisec.partition(assignedNode) ) {
1736  if ( unusedNodes !=0 ) {
1737  for ( int i=0; i<patchMap->numPatches(); ++i ) {
1738  assignedNode[i] += unusedNodes;
1739  }
1740  }
1741  sortNodesAndAssign(assignedNode);
1742  delete [] assignedNode;
1743  } else {
1744  //free the array here since a same array will be allocated
1745  //in assignPatchesToLowestLoadNode function, thus reducting
1746  //temporary memory usage
1747  delete [] assignedNode;
1748 
1749  iout << iWARN
1750  << "WorkDistrib: Recursive bisection fails, "
1751  << "invoking space-filling curve algorithm\n";
1752  assignPatchesSpaceFillingCurve();
1753  }
1754 }
1755 
1756 // class to re-order dimensions in decreasing size
1758  TopoManager tmgr;
1762  int fixpe(int pe) { // compensate for lame fallback topology information
1763  return CmiGetFirstPeOnPhysicalNode(CmiPhysicalNodeID(pe));
1764  }
1766 #if CMK_BLUEGENEQ
1767  int na=tmgr.getDimNA();
1768  int nb=tmgr.getDimNB();
1769  int nc=tmgr.getDimNC();
1770  int nd=tmgr.getDimND();
1771  int ne=tmgr.getDimNE();
1772 #else
1773  int na=tmgr.getDimNX();
1774  int nb=tmgr.getDimNY();
1775  int nc=tmgr.getDimNZ();
1776  int nd=1;
1777  int ne=1;
1778 #endif
1779  ResizeArray<int> a_flags(na);
1780  ResizeArray<int> b_flags(nb);
1781  ResizeArray<int> c_flags(nc);
1782  ResizeArray<int> d_flags(nd);
1783  ResizeArray<int> e_flags(ne);
1784  for ( int i=0; i<na; ++i ) { a_flags[i] = 0; }
1785  for ( int i=0; i<nb; ++i ) { b_flags[i] = 0; }
1786  for ( int i=0; i<nc; ++i ) { c_flags[i] = 0; }
1787  for ( int i=0; i<nd; ++i ) { d_flags[i] = 0; }
1788  for ( int i=0; i<ne; ++i ) { e_flags[i] = 0; }
1789  int npes = CkNumPes();
1790  for ( int pe=0; pe<npes; ++pe ) {
1791  int a,b,c,d,e,t;
1792 #if CMK_BLUEGENEQ
1793  tmgr.rankToCoordinates(fixpe(pe),a,b,c,d,e,t);
1794 #else
1795  tmgr.rankToCoordinates(fixpe(pe),a,b,c,t);
1796  d=0; e=0;
1797 #endif
1798  if ( a < 0 || a >= na ) NAMD_bug("inconsistent torus topology!");
1799  if ( b < 0 || b >= nb ) NAMD_bug("inconsistent torus topology!");
1800  if ( c < 0 || c >= nc ) NAMD_bug("inconsistent torus topology!");
1801  if ( d < 0 || d >= nd ) NAMD_bug("inconsistent torus topology!");
1802  if ( e < 0 || e >= ne ) NAMD_bug("inconsistent torus topology!");
1803  a_flags[a] = 1;
1804  b_flags[b] = 1;
1805  c_flags[c] = 1;
1806  d_flags[d] = 1;
1807  e_flags[e] = 1;
1808  }
1809  iout << iINFO << "TORUS A SIZE " << na << " USING";
1810  for ( int i=0; i<na; ++i ) { if ( a_flags[i] ) iout << " " << i; }
1811  iout << "\n" << endi;
1812  iout << iINFO << "TORUS B SIZE " << nb << " USING";
1813  for ( int i=0; i<nb; ++i ) { if ( b_flags[i] ) iout << " " << i; }
1814  iout << "\n" << endi;
1815  iout << iINFO << "TORUS C SIZE " << nc << " USING";
1816  for ( int i=0; i<nc; ++i ) { if ( c_flags[i] ) iout << " " << i; }
1817  iout << "\n" << endi;
1818 #if CMK_BLUEGENEQ
1819  iout << iINFO << "TORUS D SIZE " << nd << " USING";
1820  for ( int i=0; i<nd; ++i ) { if ( d_flags[i] ) iout << " " << i; }
1821  iout << "\n" << endi;
1822  iout << iINFO << "TORUS E SIZE " << ne << " USING";
1823  for ( int i=0; i<ne; ++i ) { if ( e_flags[i] ) iout << " " << i; }
1824  iout << "\n" << endi;
1825 #endif
1826  // find most compact representation of our subset
1827  a_rot = b_rot = c_rot = d_rot = e_rot = 0;
1828  a_mod = na; b_mod = nb; c_mod = nc; d_mod = nd; e_mod = ne;
1829 #if CMK_BLUEGENEQ
1830  if ( tmgr.absA(na) == 0 ) // torus
1831 #else
1832  if ( tmgr.absX(na) == 0 ) // torus
1833 #endif
1834  for ( int i=0, gaplen=0, gapstart=0; i<2*na; ++i ) {
1835  if ( a_flags[i%na] ) gapstart = i+1;
1836  else if ( i - gapstart >= gaplen ) {
1837  a_rot = 2*na-i-1; gaplen = i - gapstart;
1838  }
1839  }
1840 #if CMK_BLUEGENEQ
1841  if ( tmgr.absB(nb) == 0 ) // torus
1842 #else
1843  if ( tmgr.absY(nb) == 0 ) // torus
1844 #endif
1845  for ( int i=0, gaplen=0, gapstart=0; i<2*nb; ++i ) {
1846  if ( b_flags[i%nb] ) gapstart = i+1;
1847  else if ( i - gapstart >= gaplen ) {
1848  b_rot = 2*nb-i-1; gaplen = i - gapstart;
1849  }
1850  }
1851 #if CMK_BLUEGENEQ
1852  if ( tmgr.absC(nc) == 0 ) // torus
1853 #else
1854  if ( tmgr.absZ(nc) == 0 ) // torus
1855 #endif
1856  for ( int i=0, gaplen=0, gapstart=0; i<2*nc; ++i ) {
1857  if ( c_flags[i%nc] ) gapstart = i+1;
1858  else if ( i - gapstart >= gaplen ) {
1859  c_rot = 2*nc-i-1; gaplen = i - gapstart;
1860  }
1861  }
1862 #if CMK_BLUEGENEQ
1863  if ( tmgr.absD(nd) == 0 ) // torus
1864  for ( int i=0, gaplen=0, gapstart=0; i<2*nd; ++i ) {
1865  if ( d_flags[i%nd] ) gapstart = i+1;
1866  else if ( i - gapstart >= gaplen ) {
1867  d_rot = 2*nd-i-1; gaplen = i - gapstart;
1868  }
1869  }
1870  if ( tmgr.absE(ne) == 0 ) // torus
1871  for ( int i=0, gaplen=0, gapstart=0; i<2*ne; ++i ) {
1872  if ( e_flags[i%ne] ) gapstart = i+1;
1873  else if ( i - gapstart >= gaplen ) {
1874  e_rot = 2*ne-i-1; gaplen = i - gapstart;
1875  }
1876  }
1877 #endif
1878  // order dimensions by length
1879  int a_min=na, a_max=-1;
1880  int b_min=nb, b_max=-1;
1881  int c_min=nc, c_max=-1;
1882  int d_min=nd, d_max=-1;
1883  int e_min=ne, e_max=-1;
1884  for ( int pe=0; pe<npes; ++pe ) {
1885  int a,b,c,d,e,t;
1886 #if CMK_BLUEGENEQ
1887  tmgr.rankToCoordinates(fixpe(pe),a,b,c,d,e,t);
1888 #else
1889  tmgr.rankToCoordinates(fixpe(pe),a,b,c,t);
1890  d=0; e=0;
1891 #endif
1892  a = (a+a_rot)%a_mod;
1893  b = (b+b_rot)%b_mod;
1894  c = (c+c_rot)%c_mod;
1895  d = (d+d_rot)%d_mod;
1896  e = (e+e_rot)%e_mod;
1897  if ( a < a_min ) a_min = a;
1898  if ( b < b_min ) b_min = b;
1899  if ( c < c_min ) c_min = c;
1900  if ( d < d_min ) d_min = d;
1901  if ( e < e_min ) e_min = e;
1902  if ( a > a_max ) a_max = a;
1903  if ( b > b_max ) b_max = b;
1904  if ( c > c_max ) c_max = c;
1905  if ( d > d_max ) d_max = d;
1906  if ( e > e_max ) e_max = e;
1907  }
1908  int a_len = a_max - a_min + 1;
1909  int b_len = b_max - b_min + 1;
1910  int c_len = c_max - c_min + 1;
1911  int d_len = d_max - d_min + 1;
1912  int e_len = e_max - e_min + 1;
1913  int lensort[5];
1914  lensort[0] = (a_len << 3) + 0;
1915  lensort[1] = (b_len << 3) + 1;
1916  lensort[2] = (c_len << 3) + 2;
1917  lensort[3] = (d_len << 3) + 3;
1918  lensort[4] = (e_len << 3) + 4;
1919  // CkPrintf("TopoManagerWrapper lensort before %d %d %d %d %d\n", lensort[0] & 7, lensort[1] & 7, lensort[2] & 7, lensort[3] & 7, lensort[4] & 7);
1920  std::sort(lensort, lensort+5);
1921  // CkPrintf("TopoManagerWrapper lensort after %d %d %d %d %d\n", lensort[0] & 7, lensort[1] & 7, lensort[2] & 7, lensort[3] & 7, lensort[4] & 7);
1922  for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 0 ) a_dim = 4-i; }
1923  for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 1 ) b_dim = 4-i; }
1924  for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 2 ) c_dim = 4-i; }
1925  for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 3 ) d_dim = 4-i; }
1926  for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 4 ) e_dim = 4-i; }
1927 #if 0
1928  if ( a_len >= b_len && a_len >= c_len ) {
1929  a_dim = 0;
1930  if ( b_len >= c_len ) {
1931  b_dim = 1; c_dim = 2;
1932  } else {
1933  b_dim = 2; c_dim = 1;
1934  }
1935  } else if ( b_len >= a_len && b_len >= c_len ) {
1936  b_dim = 0;
1937  if ( a_len >= c_len ) {
1938  a_dim = 1; c_dim = 2;
1939  } else {
1940  a_dim = 2; c_dim = 1;
1941  }
1942  } else { // c is longest
1943  c_dim = 0;
1944  if ( a_len >= b_len ) {
1945  a_dim = 1; b_dim = 2;
1946  } else {
1947  a_dim = 2; b_dim = 1;
1948  }
1949  }
1950 #endif
1951  iout << iINFO << "TORUS MINIMAL MESH SIZE IS " << a_len << " BY " << b_len << " BY " << c_len
1952 #if CMK_BLUEGENEQ
1953  << " BY " << d_len << " BY " << e_len
1954 #endif
1955  << "\n" << endi;
1956  // CkPrintf("TopoManagerWrapper dims %d %d %d %d %d\n", a_dim, b_dim, c_dim, d_dim, e_dim);
1957  }
1958  void coords(int pe, int *crds) {
1959  int a,b,c,d,e,t;
1960 #if CMK_BLUEGENEQ
1961  tmgr.rankToCoordinates(fixpe(pe),a,b,c,d,e,t);
1962 #else
1963  tmgr.rankToCoordinates(fixpe(pe),a,b,c,t);
1964  d=0; e=0;
1965 #endif
1966  if ( a_dim < 3 ) crds[a_dim] = (a+a_rot)%a_mod;
1967  if ( b_dim < 3 ) crds[b_dim] = (b+b_rot)%b_mod;
1968  if ( c_dim < 3 ) crds[c_dim] = (c+c_rot)%c_mod;
1969  if ( d_dim < 3 ) crds[d_dim] = (d+d_rot)%d_mod;
1970  if ( e_dim < 3 ) crds[e_dim] = (e+e_rot)%e_mod;
1971  }
1972  int coord(int pe, int dim) {
1973  int crds[3];
1974  coords(pe,crds);
1975  return crds[dim];
1976  }
1979  const int *sortdims;
1981  bool operator() (int pe1, int pe2) const {
1982  int crds1[3], crds2[3];
1983  tmgr.coords(pe1,crds1);
1984  tmgr.coords(pe2,crds2);
1985  for ( int i=0; i<3; ++i ) {
1986  int d = sortdims[i];
1987  if ( crds1[d] != crds2[d] ) return ( crds1[d] < crds2[d] );
1988  }
1989  const int *index = WorkDistrib::peCompactOrderingIndex;
1990  return ( index[pe1] < index[pe2] );
1991  }
1992  };
1993  int* sortAndSplit(int *node_begin, int *node_end, int splitdim) {
1994  if ( node_begin == node_end ) return node_begin;
1995  int tmins[3], tmaxs[3], tlens[3], sortdims[3];
1996  coords(*node_begin, tmins);
1997  coords(*node_begin, tmaxs);
1998  for ( int *peitr = node_begin; peitr != node_end; ++peitr ) {
1999  int tvals[3];
2000  coords(*peitr, tvals);
2001  for ( int i=0; i<3; ++i ) {
2002  if ( tvals[i] < tmins[i] ) tmins[i] = tvals[i];
2003  if ( tvals[i] > tmaxs[i] ) tmaxs[i] = tvals[i];
2004  }
2005  }
2006  for ( int i=0; i<3; ++i ) {
2007  tlens[i] = tmaxs[i] - tmins[i];
2008  }
2009  sortdims[0] = splitdim;
2010  for ( int i=0, j=0; i<3; ++i ) {
2011  if ( i != splitdim ) sortdims[++j] = i;
2012  }
2013  if ( tlens[sortdims[1]] < tlens[sortdims[2]] ) {
2014  int tmp = sortdims[1];
2015  sortdims[1] = sortdims[2];
2016  sortdims[2] = tmp;
2017  }
2018  std::sort(node_begin,node_end,pe_sortop_topo(*this,sortdims));
2019  int *nodes = node_begin;
2020  int nnodes = node_end - node_begin;
2021  int i_split = 0;
2022 #if 0
2023  int c_split = coord(nodes[0],splitdim);
2024  for ( int i=0; i<nnodes; ++i ) {
2025  if ( coord(nodes[i],splitdim) != c_split ) {
2026  int mid = (nnodes+1)/2;
2027  if ( abs(i-mid) < abs(i_split-mid) ) {
2028  i_split = i;
2029  c_split = coord(i,splitdim);
2030  }
2031  else break;
2032  }
2033  }
2034 #endif
2035  for ( int i=0; i<nnodes; ++i ) {
2036  if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
2037  int mid = (nnodes+1)/2;
2038  if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2039  else break;
2040  }
2041  }
2042  return ( node_begin + i_split );
2043  }
2044 };
2045 
2049  inline bool operator() (int p1, int p2) const {
2050  int a1 = pmap->index_a(p1);
2051  int a2 = pmap->index_a(p2);
2052  if ( a1 < a2 ) return true;
2053  if ( a1 > a2 ) return false;
2054  int dir = ( (a1 & 1) ? -1 : 1 );
2055  int b1 = pmap->index_b(p1);
2056  int b2 = pmap->index_b(p2);
2057  if ( b1 * dir < b2 * dir ) return true;
2058  if ( b1 * dir > b2 * dir ) return false;
2059  dir *= ( (b1 & 1) ? -1 : 1 );
2060  int c1 = pmap->index_c(p1);
2061  int c2 = pmap->index_c(p2);
2062  if ( c1 * dir < c2 * dir ) return true;
2063  return false;
2064  }
2065 };
2066 
2070  inline bool operator() (int p1, int p2) const {
2071  int a1 = pmap->index_b(p1);
2072  int a2 = pmap->index_b(p2);
2073  if ( a1 < a2 ) return true;
2074  if ( a1 > a2 ) return false;
2075  int dir = ( (a1 & 1) ? -1 : 1 );
2076  int b1 = pmap->index_a(p1);
2077  int b2 = pmap->index_a(p2);
2078  if ( b1 * dir < b2 * dir ) return true;
2079  if ( b1 * dir > b2 * dir ) return false;
2080  dir *= ( (b1 & 1) ? -1 : 1 );
2081  int c1 = pmap->index_c(p1);
2082  int c2 = pmap->index_c(p2);
2083  if ( c1 * dir < c2 * dir ) return true;
2084  return false;
2085  }
2086 };
2087 
2091  inline bool operator() (int p1, int p2) const {
2092  int a1 = pmap->index_c(p1);
2093  int a2 = pmap->index_c(p2);
2094  if ( a1 < a2 ) return true;
2095  if ( a1 > a2 ) return false;
2096  int dir = ( (a1 & 1) ? -1 : 1 );
2097  int b1 = pmap->index_a(p1);
2098  int b2 = pmap->index_a(p2);
2099  if ( b1 * dir < b2 * dir ) return true;
2100  if ( b1 * dir > b2 * dir ) return false;
2101  dir *= ( (b1 & 1) ? -1 : 1 );
2102  int c1 = pmap->index_b(p1);
2103  int c2 = pmap->index_b(p2);
2104  if ( c1 * dir < c2 * dir ) return true;
2105  return false;
2106  }
2107 };
2108 
2110  int *patch_begin, int *patch_end,
2111  int *node_begin, int *node_end,
2112  double *patchLoads,
2113  double *sortedLoads,
2114  int *assignedNode,
2115  TopoManagerWrapper &tmgr
2116  ) {
2117 
2119  PatchMap *patchMap = PatchMap::Object();
2120  int *patches = patch_begin;
2121  int npatches = patch_end - patch_begin;
2122  int *nodes = node_begin;
2123  int nnodes = node_end - node_begin;
2124 
2125  // assign patch loads
2126  const int emptyPatchLoad = simParams->emptyPatchLoad;
2127  double totalRawLoad = 0;
2128  for ( int i=0; i<npatches; ++i ) {
2129  int pid=patches[i];
2130 #ifdef MEM_OPT_VERSION
2131  double load = patchMap->numAtoms(pid) + emptyPatchLoad;
2132 #else
2133  double load = patchMap->patch(pid)->getNumAtoms() + emptyPatchLoad;
2134 #endif
2135  patchLoads[pid] = load;
2136  sortedLoads[i] = load;
2137  totalRawLoad += load;
2138  }
2139  std::sort(sortedLoads,sortedLoads+npatches);
2140 
2141  // limit maxPatchLoad to adjusted average load per node
2142  double sumLoad = 0;
2143  double maxPatchLoad = 1;
2144  for ( int i=0; i<npatches; ++i ) {
2145  double load = sortedLoads[i];
2146  double total = sumLoad + (npatches-i) * load;
2147  if ( nnodes * load > total ) break;
2148  sumLoad += load;
2149  maxPatchLoad = load;
2150  }
2151  double totalLoad = 0;
2152  for ( int i=0; i<npatches; ++i ) {
2153  int pid=patches[i];
2154  if ( patchLoads[pid] > maxPatchLoad ) patchLoads[pid] = maxPatchLoad;
2155  totalLoad += patchLoads[pid];
2156  }
2157  if ( nnodes * maxPatchLoad > totalLoad )
2158  NAMD_bug("algorithm failure in WorkDistrib recursive_bisect_with_curve()");
2159 
2160  int a_len, b_len, c_len;
2161  int a_min, b_min, c_min;
2162  { // find dimensions
2163  a_min = patchMap->index_a(patches[0]);
2164  b_min = patchMap->index_b(patches[0]);
2165  c_min = patchMap->index_c(patches[0]);
2166  int a_max = a_min;
2167  int b_max = b_min;
2168  int c_max = c_min;
2169  for ( int i=1; i<npatches; ++i ) {
2170  int a = patchMap->index_a(patches[i]);
2171  int b = patchMap->index_b(patches[i]);
2172  int c = patchMap->index_c(patches[i]);
2173  if ( a < a_min ) a_min = a;
2174  if ( b < b_min ) b_min = b;
2175  if ( c < c_min ) c_min = c;
2176  if ( a > a_max ) a_max = a;
2177  if ( b > b_max ) b_max = b;
2178  if ( c > c_max ) c_max = c;
2179  }
2180  a_len = a_max - a_min;
2181  b_len = b_max - b_min;
2182  c_len = c_max - c_min;
2183  }
2184 
2185  int *node_split = node_begin;
2186 
2187  if ( simParams->disableTopology ) ; else
2188  if ( a_len >= b_len && a_len >= c_len ) {
2189  node_split = tmgr.sortAndSplit(node_begin,node_end,0);
2190  } else if ( b_len >= a_len && b_len >= c_len ) {
2191  node_split = tmgr.sortAndSplit(node_begin,node_end,1);
2192  } else if ( c_len >= a_len && c_len >= b_len ) {
2193  node_split = tmgr.sortAndSplit(node_begin,node_end,2);
2194  }
2195 
2196  if ( node_split == node_begin ) { // unable to split torus
2197  // make sure physical nodes are together
2198  std::sort(node_begin, node_end, WorkDistrib::pe_sortop_compact());
2199  // find physical node boundary to split on
2200  int i_split = 0;
2201  for ( int i=0; i<nnodes; ++i ) {
2202  if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
2203  int mid = (nnodes+1)/2;
2204  if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2205  else break;
2206  }
2207  }
2208  node_split = node_begin + i_split;
2209  }
2210 
2211  bool final_patch_sort = false;
2212 
2213  if ( node_split == node_begin ) { // all on same physical node
2214  if ( ( simParams->verboseTopology ) &&
2215  nnodes == CmiNumPesOnPhysicalNode(CmiPhysicalNodeID(*node_begin)) ) {
2216  int crds[3];
2217  tmgr.coords(*node_begin, crds);
2218  CkPrintf("WorkDistrib: physnode %5d pe %5d node %5d at %5d %5d %5d from %5d %5d %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
2219  CmiPhysicalNodeID(*node_begin), *node_begin,
2220  CkNodeOf(*node_begin), crds[0], crds[1], crds[2],
2221  a_min, b_min, c_min, npatches,
2222  a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
2223  }
2224 
2225  // final sort along a to minimize pme message count
2226  final_patch_sort = true;
2227 
2228  // find node (process) boundary to split on
2229  int i_split = 0;
2230  for ( int i=0; i<nnodes; ++i ) {
2231  if ( CmiNodeOf(nodes[i_split]) != CmiNodeOf(nodes[i]) ) {
2232  int mid = (nnodes+1)/2;
2233  if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2234  else break;
2235  }
2236  }
2237  node_split = node_begin + i_split;
2238  }
2239 
2240  if ( node_split == node_begin ) { // all on same node (process)
2241  if ( ( simParams->verboseTopology ) &&
2242  nnodes == CmiNodeSize(CmiNodeOf(*node_begin)) ) {
2243  int crds[3];
2244  tmgr.coords(*node_begin, crds);
2245  CkPrintf("WorkDistrib: node %5d pe %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
2246  CmiNodeOf(*node_begin), *node_begin, npatches,
2247  a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
2248  }
2249 
2250  // no natural divisions so just split at midpoint
2251  node_split = node_begin + nnodes/2;
2252  }
2253 
2254  if ( nnodes == 1 ) { // down to a single pe
2255  // assign all patches
2256  int *node = node_begin;
2257  sumLoad = 0;
2258  for ( int i=0; i < npatches; ++i ) {
2259  int pid = patches[i];
2260  assignedNode[pid] = *node;
2261  sumLoad += patchLoads[pid];
2262  if ( 0 ) CkPrintf("assign %5d node %5d patch %5d %5d %5d load %7f total %7f\n",
2263  i, *node,
2264  patchMap->index_a(pid),
2265  patchMap->index_b(pid),
2266  patchMap->index_c(pid),
2267  patchLoads[pid], sumLoad);
2268  }
2269 
2270  return;
2271  }
2272 
2273  if ( final_patch_sort ) {
2274  // final sort along a to minimize pme message count
2275  std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
2276  } else if ( a_len >= b_len && a_len >= c_len ) {
2277  if ( 0 ) CkPrintf("sort a\n");
2278  std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
2279  } else if ( b_len >= a_len && b_len >= c_len ) {
2280  if ( 0 ) CkPrintf("sort b\n");
2281  std::sort(patch_begin,patch_end,patch_sortop_curve_b(patchMap));
2282  } else if ( c_len >= a_len && c_len >= b_len ) {
2283  if ( 0 ) CkPrintf("sort c\n");
2284  std::sort(patch_begin,patch_end,patch_sortop_curve_c(patchMap));
2285  }
2286 
2287  int *patch_split;
2288  { // walk through patches in sorted order
2289  int *node = node_begin;
2290  sumLoad = 0;
2291  for ( patch_split = patch_begin;
2292  patch_split != patch_end && node != node_split;
2293  ++patch_split ) {
2294  sumLoad += patchLoads[*patch_split];
2295  double targetLoad = totalLoad *
2296  ((double)(node-node_begin+1) / (double)nnodes);
2297  if ( 0 ) CkPrintf("test %5ld node %5d patch %5d %5d %5d load %7f target %7f\n",
2298  patch_split - patch_begin, *node,
2299  patchMap->index_a(*patch_split),
2300  patchMap->index_b(*patch_split),
2301  patchMap->index_c(*patch_split),
2302  sumLoad, targetLoad);
2303  double extra = ( patch_split+1 != patch_end ? 0.5 * patchLoads[*(patch_split+1)] : 0 );
2304  if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
2305  }
2306  double targetLoad = totalLoad *
2307  ((double)(node_split-node_begin) / (double)nnodes);
2308  if ( 0 ) CkPrintf("split node %5ld/%5d patch %5ld/%5d load %7f target %7f\n",
2309  node_split-node_begin, nnodes,
2310  patch_split-patch_begin, npatches,
2311  sumLoad, targetLoad);
2312  }
2313 
2314  // recurse
2316  patch_begin, patch_split, node_begin, node_split,
2317  patchLoads, sortedLoads, assignedNode, tmgr);
2319  patch_split, patch_end, node_split, node_end,
2320  patchLoads, sortedLoads, assignedNode, tmgr);
2321 }
2322 
2323 //----------------------------------------------------------------------
2324 void WorkDistrib::assignPatchesSpaceFillingCurve()
2325 {
2326  TopoManagerWrapper tmgr;
2327  PatchMap *patchMap = PatchMap::Object();
2328  const int numPatches = patchMap->numPatches();
2329  int *assignedNode = new int[numPatches];
2330  ResizeArray<double> patchLoads(numPatches);
2331  SortableResizeArray<double> sortedLoads(numPatches);
2332  int numNodes = Node::Object()->numNodes();
2334  if(simParams->simulateInitialMapping) {
2335  NAMD_die("simulateInitialMapping not supported by assignPatchesSpaceFillingCurve()");
2336  numNodes = simParams->simulatedPEs;
2337  }
2338 
2339  ResizeArray<int> patchOrdering(numPatches);
2340  for ( int i=0; i<numPatches; ++i ) {
2341  patchOrdering[i] = i;
2342  }
2343 
2344  ResizeArray<int> nodeOrdering(numNodes);
2345  nodeOrdering.resize(0);
2346  for ( int i=0; i<numNodes; ++i ) {
2347  int pe = peDiffuseOrdering[(i+1)%numNodes]; // avoid 0 if possible
2348  if ( simParams->noPatchesOnZero && numNodes > 1 ) {
2349  if ( pe == 0 ) continue;
2350  if(simParams->noPatchesOnOne && numNodes > 2) {
2351  if ( pe == 1 ) continue;
2352  }
2353  }
2354 #ifdef MEM_OPT_VERSION
2355  if(simParams->noPatchesOnOutputPEs && numNodes-simParams->numoutputprocs >2) {
2356  if ( isOutputProcessor(pe) ) continue;
2357  }
2358 #endif
2359  nodeOrdering.add(pe);
2360  if ( 0 ) CkPrintf("using pe %5d\n", pe);
2361  }
2362 
2363  int *node_begin = nodeOrdering.begin();
2364  int *node_end = nodeOrdering.end();
2365  if ( nodeOrdering.size() > numPatches ) {
2366  node_end = node_begin + numPatches;
2367  }
2368  std::sort(node_begin, node_end, pe_sortop_compact());
2369 
2370  int *basenode_begin = node_begin;
2371  int *basenode_end = node_end;
2372  if ( nodeOrdering.size() > 2*numPatches ) {
2373  basenode_begin = node_end;
2374  basenode_end = basenode_begin + numPatches;
2375  std::sort(basenode_begin, basenode_end, pe_sortop_compact());
2376  }
2377 
2378  if ( simParams->disableTopology ) {
2379  iout << iWARN << "IGNORING TORUS TOPOLOGY DURING PATCH PLACEMENT\n" << endi;
2380  }
2381 
2383  patchOrdering.begin(), patchOrdering.end(),
2384  node_begin, node_end,
2385  patchLoads.begin(), sortedLoads.begin(), assignedNode, tmgr);
2386 
2387  std::sort(node_begin, node_end, pe_sortop_compact());
2388 
2389  int samenodecount = 0;
2390 
2391  for ( int pid=0; pid<numPatches; ++pid ) {
2392  int node = assignedNode[pid];
2393  patchMap->assignNode(pid, node);
2394  int nodeidx = std::lower_bound(node_begin, node_end, node,
2395  pe_sortop_compact()) - node_begin;
2396  int basenode = basenode_begin[nodeidx];
2397  patchMap->assignBaseNode(pid, basenode);
2398  if ( CmiPeOnSamePhysicalNode(node,basenode) ) ++samenodecount;
2399  }
2400 
2401  iout << iINFO << "Placed " << (samenodecount*100./numPatches) << "% of base nodes on same physical node as patch\n" << endi;
2402 
2403  delete [] assignedNode;
2404 }
2405 
2406 //----------------------------------------------------------------------
2408 {
2409  PatchMap *patchMap = PatchMap::Object();
2410  ComputeMap *computeMap = ComputeMap::Object();
2411  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2412  Node *node = nd.ckLocalBranch();
2413 
2414  DebugM(3,"Mapping computes\n");
2415 
2416  computeMap->allocateCids();
2417 
2418  // Handle full electrostatics
2419  if ( node->simParameters->fullDirectOn )
2420  mapComputeHomePatches(computeFullDirectType);
2421  if ( node->simParameters->FMAOn )
2422 #ifdef DPMTA
2423  mapComputeHomePatches(computeDPMTAType);
2424 #else
2425  NAMD_die("This binary does not include DPMTA (FMA).");
2426 #endif
2427  if ( node->simParameters->PMEOn ) {
2428 #ifdef DPME
2429  if ( node->simParameters->useDPME )
2430  mapComputeHomePatches(computeDPMEType);
2431  else {
2432  mapComputeHomePatches(computePmeType);
2434  mapComputeHomePatches(computeEwaldType);
2435  }
2436 #else
2437 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2438  if (node->simParameters->usePMECUDA) {
2439  mapComputePatch(computePmeCUDAType);
2440  } else
2441 #endif
2442  {
2443  mapComputePatch(computePmeType);
2444  }
2446  mapComputeHomePatches(computeEwaldType);
2447 #endif
2448  }
2449 
2450  if ( node->simParameters->globalForcesOn ) {
2451  DebugM(2,"adding ComputeGlobal\n");
2452  mapComputeHomePatches(computeGlobalType);
2453  }
2454 
2455  if ( node->simParameters->extForcesOn )
2456  mapComputeHomePatches(computeExtType);
2457 
2458  if ( node->simParameters->qmForcesOn )
2459  mapComputeHomePatches(computeQMType);
2460 
2461  if ( node->simParameters->GBISserOn )
2462  mapComputeHomePatches(computeGBISserType);
2463 
2464  if ( node->simParameters->MsmSerialOn )
2465  mapComputeHomePatches(computeMsmSerialType);
2466 
2467  if ( node->simParameters->LJPMEOn )
2468  mapComputeHomePatches(computeLjPmeSerialType);
2469 #ifdef CHARM_HAS_MSA
2470  else if ( node->simParameters->MSMOn )
2471  mapComputeHomePatches(computeMsmMsaType);
2472 #else
2473  else if ( node->simParameters->MSMOn )
2474  mapComputeHomePatches(computeMsmType);
2475 #endif
2476 
2477  if ( node->simParameters->FMMOn )
2478  mapComputeHomePatches(computeFmmType);
2479 
2480 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2481 #ifdef BONDED_CUDA
2482  if (node->simParameters->bondedCUDA) {
2483  mapComputeNode(computeBondedCUDAType);
2484  }
2485 #endif
2486 #endif
2487 
2488 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2489  mapComputeNode(computeNonbondedCUDA2Type);
2490  mapComputeHomeTuples(computeExclsType);
2491  mapComputePatch(computeSelfExclsType);
2492 #endif
2493 
2494 #ifdef NAMD_MIC
2495  mapComputeNode(computeNonbondedMICType);
2496 #endif
2497 
2498  mapComputeNonbonded();
2499 
2500  if ( node->simParameters->LCPOOn ) {
2501  mapComputeLCPO();
2502  }
2503 
2504  // If we're doing true pair interactions, no need for bonded terms.
2505  // But if we're doing within-group interactions, we do need them.
2506  if ( !node->simParameters->pairInteractionOn ||
2508  mapComputeHomeTuples(computeBondsType);
2509  mapComputeHomeTuples(computeAnglesType);
2510  mapComputeHomeTuples(computeDihedralsType);
2511  mapComputeHomeTuples(computeImpropersType);
2512  mapComputeHomeTuples(computeCrosstermsType);
2513  mapComputePatch(computeSelfBondsType);
2514  mapComputePatch(computeSelfAnglesType);
2515  mapComputePatch(computeSelfDihedralsType);
2516  mapComputePatch(computeSelfImpropersType);
2517  mapComputePatch(computeSelfCrosstermsType);
2518  }
2519 
2520  if ( node->simParameters->goGroPair ) {
2521  // JLai
2522  mapComputeHomeTuples(computeGromacsPairType);
2523  mapComputePatch(computeSelfGromacsPairType);
2524  // End of JLai
2525  }
2526 
2527  if ( node->simParameters->drudeOn ) {
2528  mapComputeHomeTuples(computeTholeType);
2529  mapComputePatch(computeSelfTholeType);
2530  mapComputeHomeTuples(computeAnisoType);
2531  mapComputePatch(computeSelfAnisoType);
2532  mapComputeHomeTuples(computeOneFourNbTholeType);
2533  mapComputePatch(computeSelfOneFourNbTholeType);
2534  }
2535 
2536  if ( node->simParameters->eFieldOn )
2537  mapComputePatch(computeEFieldType);
2538  /* BEGIN gf */
2539  if ( node->simParameters->mgridforceOn )
2540  mapComputePatch(computeGridForceType);
2541  /* END gf */
2542  if ( node->simParameters->stirOn )
2543  mapComputePatch(computeStirType);
2544  if ( node->simParameters->sphericalBCOn )
2545  mapComputePatch(computeSphericalBCType);
2546  if ( node->simParameters->cylindricalBCOn )
2547  mapComputePatch(computeCylindricalBCType);
2548  if ( node->simParameters->tclBCOn ) {
2549  mapComputeHomePatches(computeTclBCType);
2550  }
2551  if ( node->simParameters->constraintsOn )
2552  mapComputePatch(computeRestraintsType);
2553  if ( node->simParameters->consForceOn )
2554  mapComputePatch(computeConsForceType);
2555  if ( node->simParameters->consTorqueOn )
2556  mapComputePatch(computeConsTorqueType);
2557 
2558  // store the latest compute map
2560  if (simParams->storeComputeMap) {
2561  computeMap->saveComputeMap(simParams->computeMapFilename);
2562  }
2563  // override mapping decision
2564  if (simParams->loadComputeMap) {
2565  computeMap->loadComputeMap(simParams->computeMapFilename);
2566  CkPrintf("ComputeMap has been loaded from %s.\n", simParams->computeMapFilename);
2567  }
2568 }
2569 
2570 //----------------------------------------------------------------------
2571 void WorkDistrib::mapComputeHomeTuples(ComputeType type)
2572 {
2573  PatchMap *patchMap = PatchMap::Object();
2574  ComputeMap *computeMap = ComputeMap::Object();
2575  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2576  Node *node = nd.ckLocalBranch();
2577 
2578  int numNodes = node->numNodes();
2579  SimParameters *simparam = node->simParameters;
2580  if(simparam->simulateInitialMapping) {
2581  numNodes = simparam->simulatedPEs;
2582  }
2583 
2584  char *isBaseNode = new char[numNodes];
2585  memset(isBaseNode,0,numNodes*sizeof(char));
2586 
2587  int numPatches = patchMap->numPatches();
2588  for(int j=0; j<numPatches; j++) {
2589  isBaseNode[patchMap->basenode(j)] = 1;
2590  }
2591 
2592  for(int i=0; i<numNodes; i++) {
2593  if ( isBaseNode[i] ) {
2594  computeMap->storeCompute(i,0,type);
2595  }
2596  }
2597 
2598  delete [] isBaseNode;
2599 }
2600 
2601 //----------------------------------------------------------------------
2602 void WorkDistrib::mapComputeHomePatches(ComputeType type)
2603 {
2604  PatchMap *patchMap = PatchMap::Object();
2605  ComputeMap *computeMap = ComputeMap::Object();
2606  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2607  Node *node = nd.ckLocalBranch();
2608 
2609  int numNodes = node->numNodes();
2610  SimParameters *simparam = node->simParameters;
2611  if(simparam->simulateInitialMapping) {
2612  numNodes = simparam->simulatedPEs;
2613  }
2614 
2615  for(int i=0; i<numNodes; i++) {
2616  if ( patchMap->numPatchesOnNode(i) ) {
2617  computeMap->storeCompute(i,0,type);
2618  }
2619  }
2620 }
2621 
2622 //----------------------------------------------------------------------
2623 void WorkDistrib::mapComputePatch(ComputeType type)
2624 {
2625  PatchMap *patchMap = PatchMap::Object();
2626  ComputeMap *computeMap = ComputeMap::Object();
2627 
2628  PatchID i;
2629  ComputeID cid;
2630 
2631  for(i=0; i<patchMap->numPatches(); i++)
2632  {
2633  cid=computeMap->storeCompute(patchMap->node(i),1,type);
2634  computeMap->newPid(cid,i);
2635  patchMap->newCid(i,cid);
2636  }
2637 
2638 }
2639 
2640 //----------------------------------------------------------------------
2641 void WorkDistrib::mapComputeNode(ComputeType type)
2642 {
2643  PatchMap *patchMap = PatchMap::Object();
2644  ComputeMap *computeMap = ComputeMap::Object();
2645 
2646  PatchID i;
2647  ComputeID cid;
2648 
2649  int ncpus = CkNumPes();
2650  SimParameters *simparam = Node::Object()->simParameters;
2651  if(simparam->simulateInitialMapping) {
2652  ncpus = simparam->simulatedPEs;
2653  }
2654 
2655  for(int i=0; i<ncpus; i++) {
2656  computeMap->storeCompute(i,0,type);
2657  }
2658 
2659 }
2660 
2661 //----------------------------------------------------------------------
2662 void WorkDistrib::mapComputeNonbonded(void)
2663 {
2664  // For each patch, create 1 electrostatic object for self-interaction.
2665  // Then create 1 for each 1-away and 2-away neighbor which has a larger
2666  // pid.
2667 
2668  PatchMap *patchMap = PatchMap::Object();
2669  ComputeMap *computeMap = ComputeMap::Object();
2670  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2671  Node *node = nd.ckLocalBranch();
2673  int ncpus = CkNumPes();
2674  int nodesize = CkMyNodeSize();
2675  if(simParams->simulateInitialMapping) {
2676  ncpus = simParams->simulatedPEs;
2677  nodesize = simParams->simulatedNodeSize;
2678  }
2679 
2681  PatchID oneAwayDownstream[PatchMap::MaxOneOrTwoAway];
2682  int oneAwayTrans[PatchMap::MaxOneOrTwoAway];
2683 
2684  PatchID i;
2685  ComputeID cid;
2686  int numNeighbors;
2687  int j;
2688  double partScaling = 1.0;
2689  if ( ncpus < patchMap->numPatches() ) {
2690  partScaling = ((double)ncpus) / ((double)patchMap->numPatches());
2691  }
2692 
2693  for(i=0; i<patchMap->numPatches(); i++) // do the self
2694  {
2695 
2696  int numPartitions = 1;
2697 #if 0
2698  if ( simParams->ldBalancer == LDBAL_HYBRID ) {
2699 #ifdef MEM_OPT_VERSION
2700  int64 numFixed = patchMap->numFixedAtoms(i);
2701  int64 numAtoms = patchMap->numAtoms(i);
2702 #else
2703  int64 numFixed = patchMap->patch(i)->getNumFixedAtoms(); // avoid overflow
2704  int64 numAtoms = patchMap->patch(i)->getNumAtoms();
2705 #endif
2706 
2707  int divide = node->simParameters->numAtomsSelf;
2708  if (divide > 0) {
2709  numPartitions = (int) ( partScaling * ( 0.5 +
2710  (numAtoms*numAtoms-numFixed*numFixed) / (double)(2*divide*divide) ) );
2711  }
2712  if (numPartitions < 1) numPartitions = 1;
2713  if ( numPartitions > node->simParameters->maxSelfPart )
2714  numPartitions = node->simParameters->maxSelfPart;
2715  // self-interaction
2716  DebugM(4,"Mapping " << numPartitions << " ComputeNonbondedSelf objects for patch " << i << "\n");
2717 // iout <<"Self numPartitions = " <<numPartitions <<" numAtoms " <<numAtoms <<std::endl;
2718  }
2719 #endif
2720 
2721  // DMK - NOTE - For MIC builds (i.e. NAMD_MIC is defined), it is assumed that self computes are
2722  // mapped to the PE their associated patch is on. If the code below should change, making that
2723  // untrue, MIC builds should be special cased so that assumption still holds (or the host vs
2724  // device load balancing scheme should be modified). (See the comment in the function
2725  // mic_assignComputes() in ComputeNonbondedMIC.C for more details.)
2726  for(int partition=0; partition < numPartitions; partition++)
2727  {
2728  cid=computeMap->storeCompute(patchMap->node(i),1,
2730  partition,numPartitions);
2731  computeMap->newPid(cid,i);
2732  patchMap->newCid(i,cid);
2733  }
2734  }
2735 
2736  for(int p1=0; p1 <patchMap->numPatches(); p1++) // do the pairs
2737  {
2738  // this only returns half of neighbors, which is what we want
2739  numNeighbors=patchMap->oneOrTwoAwayNeighbors(p1,oneAway,oneAwayDownstream,oneAwayTrans);
2740  for(j=0;j<numNeighbors;j++)
2741  {
2742  int p2 = oneAway[j];
2743  int dsp = oneAwayDownstream[j];
2744 
2745  int numPartitions = 1;
2746 #if 0
2747  if ( simParams->ldBalancer == LDBAL_HYBRID ) {
2748 #ifdef MEM_OPT_VERSION
2749  int64 numAtoms1 = patchMap->numAtoms(p1);
2750  int64 numAtoms2 = patchMap->numAtoms(p2);
2751  int64 numFixed1 = patchMap->numFixedAtoms(p1);
2752  int64 numFixed2 = patchMap->numFixedAtoms(p2);
2753 #else
2754  int64 numAtoms1 = patchMap->patch(p1)->getNumAtoms();
2755  int64 numAtoms2 = patchMap->patch(p2)->getNumAtoms();
2756  int64 numFixed1 = patchMap->patch(p1)->getNumFixedAtoms();
2757  int64 numFixed2 = patchMap->patch(p2)->getNumFixedAtoms();
2758 #endif
2759 
2760 
2761  const int t2 = oneAwayTrans[j];
2762  const int adim = patchMap->gridsize_a();
2763  const int bdim = patchMap->gridsize_b();
2764  const int cdim = patchMap->gridsize_c();
2765  const int nax = patchMap->numaway_a(); // 1 or 2
2766  const int nay = patchMap->numaway_b(); // 1 or 2
2767  const int naz = patchMap->numaway_c(); // 1 or 2
2768  const int ia1 = patchMap->index_a(p1);
2769  const int ia2 = patchMap->index_a(p2) + adim * Lattice::offset_a(t2);
2770  const int ib1 = patchMap->index_b(p1);
2771  const int ib2 = patchMap->index_b(p2) + bdim * Lattice::offset_b(t2);
2772  const int ic1 = patchMap->index_c(p1);
2773  const int ic2 = patchMap->index_c(p2) + cdim * Lattice::offset_c(t2);
2774 
2775  if ( abs(ia2-ia1) > nax ||
2776  abs(ib2-ib1) > nay ||
2777  abs(ic2-ic1) > naz )
2778  NAMD_bug("Bad patch distance in WorkDistrib::mapComputeNonbonded");
2779 
2780  int distance = 3;
2781  if ( ia1 == ia2 ) --distance;
2782  else if ( ia1 == ia2 + nax - 1 ) --distance;
2783  else if ( ia1 + nax - 1 == ia2 ) --distance;
2784  if ( ib1 == ib2 ) --distance;
2785  else if ( ib1 == ib2 + nay - 1 ) --distance;
2786  else if ( ib1 + nay - 1 == ib2 ) --distance;
2787  if ( ic1 == ic2 ) --distance;
2788  else if ( ic1 == ic2 + naz - 1 ) --distance;
2789  else if ( ic1 + naz - 1 == ic2 ) --distance;
2790  int divide = 0;
2791  if ( distance == 0 ) {
2792  divide = node->simParameters->numAtomsSelf2;
2793  } else if (distance == 1) {
2794  divide = node->simParameters->numAtomsPair;
2795  } else {
2796  divide = node->simParameters->numAtomsPair2;
2797  }
2798  if (divide > 0) {
2799  numPartitions = (int) ( partScaling * ( 0.5 +
2800  (numAtoms1*numAtoms2-numFixed1*numFixed2)/(double)(divide*divide) ) );
2801  }
2802  if ( numPartitions < 1 ) numPartitions = 1;
2803  if ( numPartitions > node->simParameters->maxPairPart )
2804  numPartitions = node->simParameters->maxPairPart;
2805 // if ( numPartitions > 1 ) iout << "Mapping " << numPartitions << " ComputeNonbondedPair objects for patches " << p1 << "(" << numAtoms1 << ") and " << p2 << "(" << numAtoms2 << ")\n" << endi;
2806  }
2807 #endif
2808  for(int partition=0; partition < numPartitions; partition++)
2809  {
2810  cid=computeMap->storeCompute( patchMap->basenode(dsp),
2811  2,computeNonbondedPairType,partition,numPartitions);
2812  computeMap->newPid(cid,p1);
2813  computeMap->newPid(cid,p2,oneAwayTrans[j]);
2814  patchMap->newCid(p1,cid);
2815  patchMap->newCid(p2,cid);
2816  }
2817  }
2818  }
2819 }
2820 
2821 //----------------------------------------------------------------------
2822 void WorkDistrib::mapComputeLCPO(void) {
2823  //iterate over all needed objects
2824 
2825  PatchMap *patchMap = PatchMap::Object();
2826  ComputeMap *computeMap = ComputeMap::Object();
2827  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2828  Node *node = nd.ckLocalBranch();
2830  int ncpus = CkNumPes();
2831  int nodesize = CkMyNodeSize();
2832  const int maxPatches = 8;
2833 
2834  int numPatchesInOctet;
2835  PatchID patchesInOctet[maxPatches];
2836  int oneAwayTrans[maxPatches];
2837 
2838  //partitioned after 1st timestep
2839  int numPartitions = 1;
2840 
2841  PatchID i;
2842  ComputeID cid;
2843 
2844  // one octet per patch
2845  for(i=0; i<patchMap->numPatches(); i++) {
2846  numPatchesInOctet =
2847  patchMap->getPatchesInOctet(i, patchesInOctet, oneAwayTrans);
2848 
2849  for(int partition=0; partition < numPartitions; partition++) {
2850  cid=computeMap->storeCompute(patchMap->node(i),
2851  numPatchesInOctet,
2853  partition,
2854  numPartitions);
2855  for (int p = 0; p < numPatchesInOctet; p++) {
2856  computeMap->newPid(cid, patchesInOctet[p], oneAwayTrans[p]);
2857  }
2858  for (int p = 0; p < numPatchesInOctet; p++) {
2859  patchMap->newCid(patchesInOctet[p],cid);
2860  }
2861  } // for partitions
2862  } // for patches
2863 } // mapComputeLCPO
2864 
2865 //----------------------------------------------------------------------
2867  LocalWorkMsg *msg = compute->localWorkMsg;
2868  int seq = compute->sequence();
2869  int gbisPhase = compute->getGBISPhase();
2870 
2871  if ( seq < 0 ) {
2872  NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageEnqueueWork");
2873  } else {
2874  SET_PRIORITY(msg,seq,compute->priority());
2875  }
2876 
2877  msg->compute = compute; // pointer is valid since send is to local Pe
2878  int type = compute->type();
2879  int cid = compute->cid;
2880 
2881  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
2882  switch ( type ) {
2883  case computeExclsType:
2884  case computeSelfExclsType:
2885  wdProxy[CkMyPe()].enqueueExcls(msg);
2886  break;
2887  case computeBondsType:
2888  case computeSelfBondsType:
2889  wdProxy[CkMyPe()].enqueueBonds(msg);
2890  break;
2891  case computeAnglesType:
2892  case computeSelfAnglesType:
2893  wdProxy[CkMyPe()].enqueueAngles(msg);
2894  break;
2895  case computeDihedralsType:
2897  wdProxy[CkMyPe()].enqueueDihedrals(msg);
2898  break;
2899  case computeImpropersType:
2901  wdProxy[CkMyPe()].enqueueImpropers(msg);
2902  break;
2903  case computeTholeType:
2904  case computeSelfTholeType:
2905  wdProxy[CkMyPe()].enqueueThole(msg);
2906  break;
2907  case computeAnisoType:
2908  case computeSelfAnisoType:
2909  wdProxy[CkMyPe()].enqueueAniso(msg);
2910  break;
2911  case computeCrosstermsType:
2913  wdProxy[CkMyPe()].enqueueCrossterms(msg);
2914  break;
2917  wdProxy[CkMyPe()].enqueueOneFourNbThole(msg);
2918  break;
2919  // JLai
2922  wdProxy[CkMyPe()].enqueueGromacsPair(msg);
2923  break;
2924  // End of JLai
2925  case computeLCPOType:
2926  wdProxy[CkMyPe()].enqueueLCPO(msg);
2927  break;
2929  switch ( seq % 2 ) {
2930  case 0:
2931  //wdProxy[CkMyPe()].enqueueSelfA(msg);
2932  switch ( gbisPhase ) {
2933  case 1:
2934  wdProxy[CkMyPe()].enqueueSelfA1(msg);
2935  break;
2936  case 2:
2937  wdProxy[CkMyPe()].enqueueSelfA2(msg);
2938  break;
2939  case 3:
2940  wdProxy[CkMyPe()].enqueueSelfA3(msg);
2941  break;
2942  }
2943  break;
2944  case 1:
2945  //wdProxy[CkMyPe()].enqueueSelfB(msg);
2946  switch ( gbisPhase ) {
2947  case 1:
2948  wdProxy[CkMyPe()].enqueueSelfB1(msg);
2949  break;
2950  case 2:
2951  wdProxy[CkMyPe()].enqueueSelfB2(msg);
2952  break;
2953  case 3:
2954  wdProxy[CkMyPe()].enqueueSelfB3(msg);
2955  break;
2956  }
2957  break;
2958  default:
2959  NAMD_bug("WorkDistrib::messageEnqueueSelf case statement error!");
2960  }
2961  break;
2963  switch ( seq % 2 ) {
2964  case 0:
2965  //wdProxy[CkMyPe()].enqueueWorkA(msg);
2966  switch ( gbisPhase ) {
2967  case 1:
2968  wdProxy[CkMyPe()].enqueueWorkA1(msg);
2969  break;
2970  case 2:
2971  wdProxy[CkMyPe()].enqueueWorkA2(msg);
2972  break;
2973  case 3:
2974  wdProxy[CkMyPe()].enqueueWorkA3(msg);
2975  break;
2976  }
2977  break;
2978  case 1:
2979  //wdProxy[CkMyPe()].enqueueWorkB(msg);
2980  switch ( gbisPhase ) {
2981  case 1:
2982  wdProxy[CkMyPe()].enqueueWorkB1(msg);
2983  break;
2984  case 2:
2985  wdProxy[CkMyPe()].enqueueWorkB2(msg);
2986  break;
2987  case 3:
2988  wdProxy[CkMyPe()].enqueueWorkB3(msg);
2989  break;
2990  }
2991  break;
2992  case 2:
2993  wdProxy[CkMyPe()].enqueueWorkC(msg);
2994  break;
2995  default:
2996  NAMD_bug("WorkDistrib::messageEnqueueWork case statement error!");
2997  }
2998  break;
2999 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3001 // CkPrintf("WorkDistrib[%d]::CUDA seq=%d phase=%d\n", CkMyPe(), seq, gbisPhase);
3002  //wdProxy[CkMyPe()].enqueueCUDA(msg);
3003  switch ( gbisPhase ) {
3004  case 1:
3005  wdProxy[CkMyPe()].enqueueCUDA(msg);
3006  break;
3007  case 2:
3008  wdProxy[CkMyPe()].enqueueCUDAP2(msg);
3009  break;
3010  case 3:
3011  wdProxy[CkMyPe()].enqueueCUDAP3(msg);
3012  break;
3013  }
3014 #else
3016 #endif
3017  break;
3019 #ifdef NAMD_MIC
3020  wdProxy[CkMyPe()].enqueueMIC(msg);
3021 #endif
3022  break;
3023  case computePmeType:
3024  // CkPrintf("PME %d %d %x\n", CkMyPe(), seq, compute->priority());
3025  wdProxy[CkMyPe()].enqueuePme(msg);
3026  break;
3027 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3028  case computePmeCUDAType:
3029  wdProxy[CkMyPe()].enqueuePme(msg);
3030  break;
3031 #endif
3032  default:
3033  wdProxy[CkMyPe()].enqueueWork(msg);
3034  }
3035 }
3036 
3037 //----------------------------------------------------------------------
3039  LocalWorkMsg *msg = compute->localWorkMsg;
3040  int seq = compute->sequence();
3041  int gbisPhase = compute->getGBISPhase();
3042 
3043  if ( seq < 0 ) {
3044  NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageEnqueueWork");
3045  } else {
3046  SET_PRIORITY(msg,seq,compute->priority());
3047  }
3048 
3049  msg->compute = compute; // pointer is valid since send is to local Pe
3050  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3051 
3052 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3053  //wdProxy[CkMyPe()].finishCUDA(msg);
3054  switch ( gbisPhase ) {
3055  case 1:
3056  wdProxy[CkMyPe()].finishCUDA(msg);
3057  break;
3058  case 2:
3059  wdProxy[CkMyPe()].finishCUDAP2(msg);
3060  break;
3061  case 3:
3062  wdProxy[CkMyPe()].finishCUDAP3(msg);
3063  break;
3064  }
3065 #else
3067 #endif
3068 }
3069 
3070 //----------------------------------------------------------------------
3072  LocalWorkMsg *msg = compute->localWorkMsg;
3073  int seq = compute->sequence();
3074  int gbisPhase = compute->getGBISPhase();
3075 
3076  if ( seq < 0 ) {
3077  NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageFinishMIC");
3078  } else {
3079  SET_PRIORITY(msg,seq,compute->priority());
3080  }
3081 
3082  msg->compute = compute; // pointer is valid since send is to local Pe
3083  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3084 
3085 #ifdef NAMD_MIC
3086  wdProxy[CkMyPe()].finishMIC(msg);
3087 #else
3089 #endif
3090 }
3091 
3094  if ( msg->compute->localWorkMsg != msg )
3095  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3096 }
3097 
3100  if ( msg->compute->localWorkMsg != msg )
3101  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3102 }
3103 
3106  if ( msg->compute->localWorkMsg != msg )
3107  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3108 }
3109 
3112  if ( msg->compute->localWorkMsg != msg )
3113  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3114 }
3115 
3118  if ( msg->compute->localWorkMsg != msg )
3119  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3120 }
3121 
3124  if ( msg->compute->localWorkMsg != msg )
3125  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3126 }
3127 
3130  if ( msg->compute->localWorkMsg != msg )
3131  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3132 }
3133 
3136  if ( msg->compute->localWorkMsg != msg )
3137  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3138 }
3139 
3142  if ( msg->compute->localWorkMsg != msg )
3143  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3144 }
3145 
3148  if ( msg->compute->localWorkMsg != msg )
3149  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3150 }
3151 
3152 // JLai
3154  msg->compute->doWork();
3156  if ( msg->compute->localWorkMsg != msg )
3157  NAMD_bug("\nWorkDistrib LocalWorkMsg recycling failed! Check enqueueGromacsPair from WorkDistrib.C\n");
3158 }
3159 // End of JLai
3160 
3163  if ( msg->compute->localWorkMsg != msg )
3164  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3165 }
3166 
3168  msg->compute->doWork();
3169  if ( msg->compute->localWorkMsg != msg )
3170  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3171 }
3174  if ( msg->compute->localWorkMsg != msg )
3175  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3176 }
3179  if ( msg->compute->localWorkMsg != msg )
3180  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3181 }
3184  if ( msg->compute->localWorkMsg != msg )
3185  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3186 }
3187 
3190  if ( msg->compute->localWorkMsg != msg )
3191  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3192 }
3195  if ( msg->compute->localWorkMsg != msg )
3196  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3197 }
3200  if ( msg->compute->localWorkMsg != msg )
3201  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3202 }
3203 
3206  if ( msg->compute->localWorkMsg != msg )
3207  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3208 }
3211  if ( msg->compute->localWorkMsg != msg )
3212  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3213 }
3216  if ( msg->compute->localWorkMsg != msg )
3217  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3218 }
3219 
3222  if ( msg->compute->localWorkMsg != msg )
3223  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3224 }
3227  if ( msg->compute->localWorkMsg != msg )
3228  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3229 }
3232  if ( msg->compute->localWorkMsg != msg )
3233  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3234 }
3235 
3236 
3237 
3240  if ( msg->compute->localWorkMsg != msg )
3241  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3242 }
3243 
3246 }
3249 }
3252 }
3253 
3255  msg->compute->finishPatch(msg->data);
3256 }
3257 
3260 }
3263 }
3266 }
3267 
3270 }
3273 }
3274 
3275 
3276 //**********************************************************************
3277 //
3278 // FUNCTION velocities_from_PDB
3279 //
3280 // INPUTS:
3281 // v - Array of vectors to populate
3282 // filename - name of the PDB filename to read in
3283 //
3284 // This function reads in a set of initial velocities from a
3285 // PDB file. It places the velocities into the array of Vectors
3286 // passed to it.
3287 //
3288 //***********************************************************************/
3289 
3290 void WorkDistrib::velocities_from_PDB(const char *filename,
3291  Vector *v, int totalAtoms)
3292 {
3293  PDB *v_pdb; // PDB info from velocity PDB
3294  int i;
3295 
3296  // Read the PDB
3297  v_pdb = new PDB(filename);
3298  if ( v_pdb == NULL )
3299  {
3300  NAMD_die("memory allocation failed in Node::velocities_from_PDB");
3301  }
3302 
3303  // Make sure the number of velocities read in matches
3304  // the number of atoms we have
3305  if (v_pdb->num_atoms() != totalAtoms)
3306  {
3307  char err_msg[129];
3308 
3309  sprintf(err_msg, "FOUND %d COORDINATES IN VELOCITY PDB!!",
3310  v_pdb->num_atoms());
3311 
3312  NAMD_die(err_msg);
3313  }
3314 
3315  // Get the entire list of atom info and loop through
3316  // them assigning the velocity vector for each one
3317  v_pdb->get_all_positions(v);
3318 
3319  for (i=0; i<totalAtoms; i++)
3320  {
3321  v[i].x *= PDBVELINVFACTOR;
3322  v[i].y *= PDBVELINVFACTOR;
3323  v[i].z *= PDBVELINVFACTOR;
3324  }
3325 
3326  delete v_pdb;
3327 }
3328 // END OF FUNCTION velocities_from_PDB
3329 
3330 //**********************************************************************
3331 //
3332 // FUNCTION velocities_from_binfile
3333 //
3334 // INPUTS:
3335 // fname - File name to write velocities to
3336 // n - Number of atoms in system
3337 // vels - Array of velocity vectors
3338 //
3339 // This function writes out the velocities in binary format. This is
3340 // done to preserve accuracy between restarts of namd.
3341 //
3342 //**********************************************************************
3343 
3344 void WorkDistrib::velocities_from_binfile(const char *fname, Vector *vels, int n)
3345 {
3346  read_binary_file(fname,vels,n);
3347 }
3348 // END OF FUNCTION velocities_from_binfile
3349 
3350 //**********************************************************************
3351 //
3352 // FUNCTION random_velocities
3353 //
3354 // INPUTS:
3355 // v - array of vectors to populate
3356 // Temp - Temperature to acheive
3357 //
3358 // This function assigns a random velocity distribution to a
3359 // simulation to achieve a desired initial temperature. The method
3360 // used here was stolen from the program X-PLOR.
3361 //
3362 //**********************************************************************
3363 
3364 void WorkDistrib::random_velocities(BigReal Temp,Molecule *structure,
3365  Vector *v, int totalAtoms)
3366 {
3367  int i, j; // Loop counter
3368  BigReal kbT; // Boltzman constant * Temp
3369  BigReal randnum; // Random number from -6.0 to 6.0
3370  BigReal kbToverM; // sqrt(Kb*Temp/Mass)
3372  Bool lesOn = simParams->lesOn;
3373  Random vel_random(simParams->randomSeed);
3374 
3375  int lesReduceTemp = lesOn && simParams->lesReduceTemp;
3376  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
3377 
3378  kbT = Temp*BOLTZMANN;
3379 
3380  // Loop through all the atoms and assign velocities in
3381  // the x, y and z directions for each one
3382  for (i=0; i<totalAtoms; i++)
3383  {
3384  if (structure->atommass(i) <= 0.) {
3385  kbToverM = 0.;
3386  } else {
3387  kbToverM = sqrt(kbT *
3388  ( lesOn && structure->get_fep_type(i) ? tempFactor : 1.0 ) /
3389  structure->atommass(i) );
3390  }
3391 
3392  // The following comment was stolen from X-PLOR where
3393  // the following section of code was adapted from.
3394 
3395  // This section generates a Gaussian random
3396  // deviate of 0.0 mean and standard deviation RFD for
3397  // each of the three spatial dimensions.
3398  // The algorithm is a "sum of uniform deviates algorithm"
3399  // which may be found in Abramowitz and Stegun,
3400  // "Handbook of Mathematical Functions", pg 952.
3401  for (randnum=0.0, j=0; j<12; j++)
3402  {
3403  randnum += vel_random.uniform();
3404  }
3405 
3406  randnum -= 6.0;
3407 
3408  v[i].x = randnum*kbToverM;
3409 
3410  for (randnum=0.0, j=0; j<12; j++)
3411  {
3412  randnum += vel_random.uniform();
3413  }
3414 
3415  randnum -= 6.0;
3416 
3417  v[i].y = randnum*kbToverM;
3418 
3419  for (randnum=0.0, j=0; j<12; j++)
3420  {
3421  randnum += vel_random.uniform();
3422  }
3423 
3424  randnum -= 6.0;
3425 
3426  v[i].z = randnum*kbToverM;
3427  }
3428 
3429  if ( simParams->drudeOn ) for (i=0; i<totalAtoms; i++) {
3430  if ( structure->is_drude(i) ) {
3431  v[i] = v[structure->get_mother_atom(i)]; // zero is good enough
3432  }
3433  }
3434 }
3435 /* END OF FUNCTION random_velocities */
3436 
3437 //**********************************************************************
3438 //
3439 // FUNCTION remove_com_motion
3440 //
3441 // INPUTS:
3442 // vel - Array of initial velocity vectors
3443 //
3444 // This function removes the center of mass motion from a molecule.
3445 //
3446 //**********************************************************************
3447 
3448 void WorkDistrib::remove_com_motion(Vector *vel, Molecule *structure, int n)
3449 {
3450  Vector mv(0,0,0); // Sum of (mv)_i
3451  BigReal totalMass=0; // Total mass of system
3452  int i; // Loop counter
3453 
3454  // Loop through and compute the net momentum
3455  for (i=0; i<n; i++)
3456  {
3457  BigReal mass = structure->atommass(i);
3458  mv += mass * vel[i];
3459  totalMass += mass;
3460  }
3461 
3462  mv /= totalMass;
3463 
3464  iout << iINFO << "REMOVING COM VELOCITY "
3465  << ( PDBVELFACTOR * mv ) << "\n" << endi;
3466 
3467  for (i=0; i<n; i++) { vel[i] -= mv; }
3468 
3469 }
3470 /* END OF FUNCTION remove_com_motion */
3471 
3472 #if USE_TOPOMAP
3473 
3474 //Specifically designed for BGL and other 3d Tori architectures
3475 //Partition Torus and Patch grid together using recursive bisection.
3476 int WorkDistrib::assignPatchesTopoGridRecBisection() {
3477 
3478  PatchMap *patchMap = PatchMap::Object();
3479  int *assignedNode = new int[patchMap->numPatches()];
3480  int numNodes = Node::Object()->numNodes();
3482  if(simParams->simulateInitialMapping) {
3483  numNodes = simParams->simulatedPEs;
3484  }
3485 
3486  int usedNodes = numNodes;
3487  CkPrintf("assignPatchesTopoGridRecBisection\n");
3488  if ( simParams->noPatchesOnZero && numNodes > 1 ) {
3489  usedNodes -= 1;
3490  if ( simParams->noPatchesOnOne && numNodes > 2 )
3491  usedNodes -= 1;
3492  }
3493  RecBisection recBisec(patchMap->numPatches(), PatchMap::Object());
3494 
3495  int xsize = 0, ysize = 0, zsize = 0;
3496 
3497  // Right now assumes a T*** (e.g. TXYZ) mapping
3498  TopoManager tmgr;
3499  xsize = tmgr.getDimNX();
3500  ysize = tmgr.getDimNY();
3501  zsize = tmgr.getDimNZ();
3502 
3503  //Fix to not assign patches to processor 0
3504  int rc = recBisec.partitionProcGrid(xsize, ysize, zsize, assignedNode);
3505 
3506  delete [] assignedNode;
3507 
3508  return rc;
3509 }
3510 #endif
3511 
3512 
3513 #if defined(NAMD_MIC)
3514  extern void mic_hostDeviceLDB();
3515  extern void mic_contributeHostDeviceLDB(int idLen, int * id);
3516  extern void mic_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2);
3517 #endif
3518 
3520  #if defined(NAMD_MIC)
3521  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3522  wdProxy.initHostDeviceLDB();
3523  #endif
3524 }
3525 
3527  #if defined(NAMD_MIC)
3528  mic_hostDeviceLDB();
3529  #endif
3530 }
3531 
3532 void WorkDistrib::send_contributeHostDeviceLDB(int peSetLen, int * peSet) {
3533  #if defined(NAMD_MIC)
3534  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3535  wdProxy[0].contributeHostDeviceLDB(peSetLen, peSet);
3536  #endif
3537 }
3538 
3539 void WorkDistrib::contributeHostDeviceLDB(int peSetLen, int * peSet) {
3540  #if defined(NAMD_MIC)
3541  mic_contributeHostDeviceLDB(peSetLen, peSet);
3542  #endif
3543 }
3544 
3545 void WorkDistrib::send_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2) {
3546  #if defined(NAMD_MIC)
3547  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3548  wdProxy.setDeviceLDBParams(dt, hs, sp1, pp1, pp2);
3549  #endif
3550 }
3551 
3552 void WorkDistrib::setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2) {
3553  #if defined(NAMD_MIC)
3554  mic_setDeviceLDBParams(dt, hs, sp1, pp1, pp2);
3555  #endif
3556 }
3557 
3558 
3559 #include "WorkDistrib.def.h"
3560 
static Node * Object()
Definition: Node.h:86
Real atomcharge(int anum) const
Definition: Molecule.h:1124
#define LDBAL_HYBRID
Definition: SimParameters.h:66
int npatches
Definition: WorkDistrib.C:1623
bool operator()(int p1, int p2) const
Definition: WorkDistrib.C:2049
void setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2)
Definition: WorkDistrib.C:3552
void enqueueMIC(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3268
Real langevinParam
Definition: NamdTypes.h:220
void enqueueOneFourNbThole(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3146
std::ostream & iINFO(std::ostream &s)
Definition: InfoStream.C:81
static void sortPmePes(int *pmepes, int xdim, int ydim)
Definition: WorkDistrib.C:307
int get_mother_atom(int) const
ScaledPosition center(int pid) const
Definition: PatchMap.h:99
Bool simulateInitialMapping
static void messageFinishMIC(Compute *)
Definition: WorkDistrib.C:3071
bool operator()(int a, int b) const
Definition: WorkDistrib.C:148
int getNumAtoms() const
Definition: Patch.h:105
int isSendSpanningTreeUnset()
patch_sortop_curve_b(PatchMap *m)
Definition: WorkDistrib.C:2069
void enqueueAngles(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3110
static void messageFinishCUDA(Compute *)
Definition: WorkDistrib.C:3038
void end(void)
Definition: MStream.C:176
int getNumFixedAtoms() const
Definition: Patch.h:112
Definition: PDB.h:36
int sequence(void)
Definition: Compute.h:64
int size(void) const
Definition: ResizeArray.h:131
PatchID assignToPatch(Position p, const Lattice &l)
Definition: PatchMap.inl:14
void setNewNumPartitions(ComputeID cid, char numPartitions)
Definition: ComputeMap.h:146
void setRecvSpanning()
Definition: ProxyMgr.C:370
static bool less_than_bit_reversed(int a, int b)
Definition: WorkDistrib.C:136
static void recursive_bisect_with_curve(int *patch_begin, int *patch_end, int *node_begin, int *node_end, double *patchLoads, double *sortedLoads, int *assignedNode, TopoManagerWrapper &tmgr)
Definition: WorkDistrib.C:2109
int numComputes(void)
Definition: ComputeMap.h:103
void saveComputeMap(const char *fname)
Definition: ComputeMap.C:260
static ProxyMgr * Object()
Definition: ProxyMgr.h:394
NAMD_HOST_DEVICE int c_p() const
Definition: Lattice.h:291
#define BOLTZMANN
Definition: common.h:54
Definition: Node.h:78
static int * peCompactOrdering
Definition: WorkDistrib.h:118
int32 ComputeID
Definition: NamdTypes.h:288
BigReal max_a(int pid) const
Definition: PatchMap.h:92
void finishCUDAP3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3264
void initPtrs()
Definition: ComputeMap.C:80
void enqueueCrossterms(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3140
Position fixedPosition
Definition: NamdTypes.h:212
bool operator()(int p1, int p2) const
Definition: WorkDistrib.C:2070
static void partition(int *order, const FullAtom *atoms, int begin, int end)
Definition: SortAtoms.C:45
int isRecvSpanningTreeUnset()
void enqueuePme(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3161
static PatchMap * Object()
Definition: PatchMap.h:27
void enqueueWorkA3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3214
ComputeType
Definition: ComputeMap.h:20
void enqueueWork(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3092
void enqueueGromacsPair(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3153
void enqueueSelfA1(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3172
void finishCUDAP2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3261
Definition: Vector.h:72
#define DrudeAtom
Definition: structures.h:23
static void send_contributeHostDeviceLDB(int peSetLen, int *peSet)
Definition: WorkDistrib.C:3532
#define HydrogenAtom
Definition: structures.h:16
SimParameters * simParameters
Definition: Node.h:181
BigReal nonbondedScaling
void loadComputeMap(const char *fname)
Definition: ComputeMap.C:276
Bool CUDASOAintegrateMode
DispCoef dispcoef
Definition: NamdTypes.h:151
void createHomePatch(PatchID pid, FullAtomList &a)
Definition: PatchMgr.C:74
void setSendSpanning()
Definition: ProxyMgr.C:361
void sendAtoms(PatchID pid, FullAtomList &a)
Definition: PatchMgr.C:157
TopoManager tmgr
Definition: WorkDistrib.C:1758
float Real
Definition: common.h:118
void enqueueExcls(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3098
#define DebugM(x, y)
Definition: Debug.h:75
void enqueueBonds(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3104
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
#define ALLBUTME
Definition: Communicate.h:14
BigReal z
Definition: Vector.h:74
void enqueueAniso(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3134
int packSize(void)
Definition: PatchMap.C:314
void enqueueSelfB1(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3188
#define FALSE
Definition: common.h:127
Position position
Definition: NamdTypes.h:78
void enqueueWorkB1(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3220
static void messageEnqueueWork(Compute *)
Definition: WorkDistrib.C:2866
static void peOrderingReady()
Definition: WorkDistrib.C:173
std::ostream & iWARN(std::ostream &s)
Definition: InfoStream.C:82
int operator==(const nodesort &o) const
Definition: WorkDistrib.C:1625
MIStream * get(char &data)
Definition: MStream.h:29
int index_a(int pid) const
Definition: PatchMap.h:86
#define iout
Definition: InfoStream.h:51
int sizeGrid(ScaledPosition xmin, ScaledPosition xmax, const Lattice &lattice, BigReal patchSize, double maxNumPatches, int staticAtomAssignment, int asplit, int bsplit, int csplit)
Definition: PatchMap.C:62
Velocity velocity
Definition: NamdTypes.h:211
ComputeID storeCompute(int node, int maxPids, ComputeType type, int partition=-1, int numPartitions=0)
Definition: ComputeMap.C:151
int num_atoms(void)
Definition: PDB.C:323
Patch * patch(PatchID pid)
Definition: PatchMap.h:244
void enqueueSelfA3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3182
uint32 groupFixed
Definition: NamdTypes.h:163
int add(const Elem &elem)
Definition: ResizeArray.h:101
bool operator()(int pe1, int pe2) const
Definition: WorkDistrib.C:1981
Bool pairInteractionOn
Molecule stores the structural information for the system.
Definition: Molecule.h:174
NAMD_HOST_DEVICE int b_p() const
Definition: Lattice.h:290
void movePatch(PatchID, NodeID)
Definition: PatchMgr.C:84
int getGBISPhase(void)
Definition: Compute.h:66
LocalWorkMsg *const localWorkMsg
Definition: Compute.h:46
void patchMapInit(void)
Definition: WorkDistrib.C:1238
void recvComputeMapChanges(ComputeMapChangeMsg *)
Definition: WorkDistrib.C:377
int allocateCids()
Definition: ComputeMap.C:141
int atomsInGroup
Definition: Hydrogen.h:19
WaterModel watmodel
#define MACHINE_PROGRESS
int gridsize_c(void) const
Definition: PatchMap.h:66
uint32 id
Definition: NamdTypes.h:160
Compute * compute
Definition: WorkDistrib.h:33
char * patchMapData
Definition: WorkDistrib.C:1108
char newNumPartitions(ComputeID cid)
Definition: ComputeMap.h:143
void unpack(char *buf)
Definition: PatchMap.C:365
Charge charge
Definition: NamdTypes.h:79
virtual void doWork()
Definition: Compute.C:120
void reorder(Elem *a, int n)
Definition: Random.h:234
HydrogenGroup hydrogenGroup
Definition: Molecule.h:676
void enqueueCUDA(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3244
void sendComputeMap(void)
Definition: WorkDistrib.C:1211
void enqueueWorkB2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3225
void read_binary_file(const char *fname, Vector *data, int n)
Definition: NamdOneTools.C:52
int numNodes()
Definition: Node.h:192
void enqueueCUDAP2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3247
void assignBaseNode(PatchID, NodeID)
Definition: PatchMap.C:472
static void recursive_bisect_coord(int x_begin, int x_end, int y_begin, int y_end, int *pe_begin, ScaledPosition *coord, int *result, int ydim)
Definition: WorkDistrib.C:275
void newCid(int pid, int cid)
Definition: PatchMap.C:512
constexpr int getWaterModelGroupSize(const WaterModel &watmodel)
Definition: common.h:228
void enqueueSelfB3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3198
int coord(int pe, int dim)
Definition: WorkDistrib.C:1972
int gridsize_a(void) const
Definition: PatchMap.h:64
#define order
Definition: PmeRealSpace.C:235
Definition: Random.h:37
int numPatches(void) const
Definition: PatchMap.h:59
static NAMD_HOST_DEVICE int offset_b(int i)
Definition: Lattice.h:264
void enqueueWorkC(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3238
pe_sortop_bit_reversed(int *r)
Definition: WorkDistrib.C:147
void reinitAtoms(const char *basename=0)
Definition: WorkDistrib.C:1085
int operator<(const nodesort &o) const
Definition: WorkDistrib.C:1634
void enqueueThole(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3128
void enqueueWorkA2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3209
NAMD_HOST_DEVICE Position apply_transform(Position data, const Transform &t) const
Definition: Lattice.h:137
void createHomePatches(void)
Definition: WorkDistrib.C:989
Compute * compute
Definition: WorkDistrib.h:27
void NAMD_bug(const char *err_msg)
Definition: common.C:195
static NAMD_HOST_DEVICE int offset_c(int i)
Definition: Lattice.h:265
void enqueueImpropers(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3122
BigReal min_c(int pid) const
Definition: PatchMap.h:95
static int eventMachineProgress
Definition: WorkDistrib.C:103
int32 migrationGroupSize
Definition: NamdTypes.h:230
AtomID atomID
Definition: Hydrogen.h:16
Real langevin_param(int atomnum) const
Definition: Molecule.h:1388
Index atomvdwtype(int anum) const
Definition: Molecule.h:1134
BigReal langevinDamping
int numaway_c(void) const
Definition: PatchMap.h:70
void enqueueLCPO(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3167
int16 vdwType
Definition: NamdTypes.h:80
void sendMovePatches()
Definition: PatchMgr.C:110
void pack(MOStream *msg)
Definition: ComputeMap.C:61
int oneOrTwoAwayNeighbors(int pid, PatchID *neighbor_ids, PatchID *downstream_ids=0, int *transform_ids=0)
Definition: PatchMap.C:579
int index_b(int pid) const
Definition: PatchMap.h:87
Bool staticAtomAssignment
pe_sortop_coord_y(ScaledPosition *s)
Definition: WorkDistrib.C:269
int Bool
Definition: common.h:142
Bool replicaUniformPatchGrids
bool operator()(int a, int b) const
Definition: WorkDistrib.C:262
#define COMPUTEMAPTAG
Definition: common.h:184
int priority(void)
Definition: Compute.h:65
void finishCUDA(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3258
uint8 partition
Definition: NamdTypes.h:81
~WorkDistrib(void)
Definition: WorkDistrib.C:124
BigReal x
Definition: Vector.h:74
uint8 hydrogenGroupSize
Definition: NamdTypes.h:89
void get_extremes(ScaledPosition &xmin, ScaledPosition &xmax) const
Definition: PDB.h:104
int numaway_a(void) const
Definition: PatchMap.h:68
NAMD_HOST_DEVICE int a_p() const
Definition: Lattice.h:289
NAMD_HOST_DEVICE Vector a_r() const
Definition: Lattice.h:284
NAMD_HOST_DEVICE Vector b_r() const
Definition: Lattice.h:285
int numAtoms
Definition: Molecule.h:586
void setNewNode(ComputeID cid, NodeID node)
Definition: ComputeMap.h:122
virtual void finishPatch(int)
Definition: Compute.C:124
NAMD_HOST_DEVICE Position nearest(Position data, ScaledPosition ref) const
Definition: Lattice.h:95
void NAMD_die(const char *err_msg)
Definition: common.C:147
PDB * pdb
Definition: Node.h:183
void enqueueCUDAP3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3250
static int * peDiffuseOrderingIndex
Definition: WorkDistrib.h:117
BigReal min_a(int pid) const
Definition: PatchMap.h:91
NAMD_HOST_DEVICE Vector c_r() const
Definition: Lattice.h:286
Real atommass(int anum) const
Definition: Molecule.h:1114
static int compare_bit_reversed(int a, int b)
Definition: WorkDistrib.C:127
ConfigList * configList
Definition: Node.h:182
void enqueueWorkA1(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3204
#define BUFSIZE
Definition: Communicate.h:15
Bool pressureProfileEwaldOn
std::vector< std::string > split(const std::string &text, std::string delimiter)
Definition: MoleculeQM.C:74
static int * peDiffuseOrdering
Definition: WorkDistrib.h:116
void makePatches(ScaledPosition xmin, ScaledPosition xmax, const Lattice &lattice, BigReal patchSize, double maxNumPatches, int staticAtomAssignment, int replicaUniformPatchGrids, int lcpo, int asplit, int bsplit, int csplit)
Definition: PatchMap.C:171
int basenode(int pid) const
Definition: PatchMap.h:117
int index_c(int pid) const
Definition: PatchMap.h:88
unsigned char get_fep_type(int anum) const
Definition: Molecule.h:1437
static int peOrderingInit
Definition: WorkDistrib.h:115
void sendPatchMap(void)
Definition: WorkDistrib.C:1111
void find_extremes(const Lattice &, BigReal frac=1.0)
Definition: PDB.C:437
void saveComputeMapChanges(int, CkGroupID)
Definition: WorkDistrib.C:359
int32 status
Atom status bit fields defined in structures.h.
Definition: NamdTypes.h:227
void finishCUDAPatch(FinishWorkMsg *msg)
Definition: WorkDistrib.C:3254
void savePatchMap(PatchMapMsg *msg)
Definition: WorkDistrib.C:1147
void cuda_initialize()
Definition: DeviceCUDA.C:27
void topo_getargs(char **argv)
Definition: WorkDistrib.C:93
static int * peCompactOrderingIndex
Definition: WorkDistrib.h:119
static void buildNodeAwarePeOrdering(void)
Definition: WorkDistrib.C:183
uint8 nonbondedGroupSize
Definition: NamdTypes.h:82
patch_sortop_curve_a(PatchMap *m)
Definition: WorkDistrib.C:2048
Atom * getAtoms() const
Definition: Molecule.h:520
int myid()
Definition: Node.h:191
BigReal initialTemp
Real rigidBondLength
Definition: NamdTypes.h:231
int pressureProfileAtomTypes
#define simParams
Definition: Output.C:131
int atomsInMigrationGroup
Definition: Hydrogen.h:29
static int randtopo
Definition: WorkDistrib.C:87
void newPid(ComputeID cid, int pid, int trans=13)
Definition: ComputeMap.C:196
static void send_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2)
Definition: WorkDistrib.C:3545
ScaledPosition * spos
Definition: WorkDistrib.C:260
static NAMD_HOST_DEVICE int offset_a(int i)
Definition: Lattice.h:263
iterator begin(void)
Definition: ResizeArray.h:36
BigReal max_b(int pid) const
Definition: PatchMap.h:94
void mapComputes(void)
Definition: WorkDistrib.C:2407
char * data
Definition: ConfigList.h:48
void enqueueSelfA2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3177
static ComputeMap * Object()
Definition: ComputeMap.h:91
uint8 isWater
Definition: NamdTypes.h:90
static void build_ordering(void *)
Definition: WorkDistrib.C:89
BigReal y
Definition: Vector.h:74
int numaway_b(void) const
Definition: PatchMap.h:69
Mass mass
Definition: NamdTypes.h:218
void distributeHomePatches(void)
Definition: WorkDistrib.C:1063
void assignNode(PatchID, NodeID)
Definition: PatchMap.C:465
patch_sortop_curve_c(PatchMap *m)
Definition: WorkDistrib.C:2090
void mic_initialize()
#define PDBVELFACTOR
Definition: common.h:57
Bool pairInteractionSelf
BigReal max_c(int pid) const
Definition: PatchMap.h:96
int type()
Definition: Compute.h:48
Bool is_drude(int) const
void enqueueSelfB2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3193
BigReal patchDimension
int gridsize_b(void) const
Definition: PatchMap.h:65
int numPatchesOnNode(int node)
Definition: PatchMap.h:60
void initHostDeviceLDB()
Definition: WorkDistrib.C:3526
MOStream * put(char data)
Definition: MStream.h:112
static void send_initHostDeviceLDB()
Definition: WorkDistrib.C:3519
FullAtomList * createAtomLists(const char *basename=0)
Definition: WorkDistrib.C:654
#define LonepairAtom
Definition: structures.h:22
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
pe_sortop_coord_x(ScaledPosition *s)
Definition: WorkDistrib.C:261
void enqueueDihedrals(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3116
int node(int pid) const
Definition: PatchMap.h:114
Bool is_atom_fixed(int atomnum) const
Definition: Molecule.h:1504
void finishMIC(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3271
StringList * find(const char *name) const
Definition: ConfigList.C:341
void contributeHostDeviceLDB(int peSetLen, int *peSet)
Definition: WorkDistrib.C:3539
void pack(char *buf, int size)
Definition: PatchMap.C:328
#define RIGID_NONE
Definition: SimParameters.h:80
int isOutputProcessor(int pe)
uint32 atomFixed
Definition: NamdTypes.h:162
void get_vdw_params(Real *sigma, Real *epsilon, Real *sigma14, Real *epsilon14, Index index)
Definition: Parameters.h:568
void doneSaveComputeMap(CkReductionMsg *)
Definition: WorkDistrib.C:430
void unpack(MIStream *msg)
Definition: ComputeMap.C:68
Real rigid_bond_length(int atomnum) const
Definition: Molecule.h:1550
double recipMass
Definition: NamdTypes.h:213
int64_t int64
Definition: common.h:39
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23
void get_all_positions(Vector *)
Definition: PDB.C:365
BigReal min_b(int pid) const
Definition: PatchMap.h:93
int32 PatchID
Definition: NamdTypes.h:287
pe_sortop_topo(TopoManagerWrapper &t, int *d)
Definition: WorkDistrib.C:1980
WaterModel
Definition: common.h:221
Molecule * molecule
Definition: Node.h:179
void coords(int pe, int *crds)
Definition: WorkDistrib.C:1958
void enqueueWorkB3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3230
#define TRUE
Definition: common.h:128
const ComputeID cid
Definition: Compute.h:43
NAMD_HOST_DEVICE Vector origin() const
Definition: Lattice.h:278
bool operator()(int a, int b) const
Definition: WorkDistrib.C:270
Bool noPatchesOnOutputPEs
int fixpe(int pe)
Definition: WorkDistrib.C:1762
int * sortAndSplit(int *node_begin, int *node_end, int splitdim)
Definition: WorkDistrib.C:1993
ScaledPosition * spos
Definition: WorkDistrib.C:268
void sortAtomsForPatches(int *order, int *breaks, const FullAtom *atoms, int nmgrps, int natoms, int ni, int nj, int nk)
Definition: SortAtoms.C:135
double BigReal
Definition: common.h:123
Transform transform
Definition: NamdTypes.h:229
bool operator()(int p1, int p2) const
Definition: WorkDistrib.C:2091
#define PDBVELINVFACTOR
Definition: common.h:58
void assignNodeToPatch(void)
Definition: WorkDistrib.C:1456
int getPatchesInOctet(int pid, PatchID *pids, int *transform_ids=0)
Definition: PatchMap.C:634
NodeID newNode(ComputeID cid)
Definition: ComputeMap.h:118