NAMD
WorkDistrib.C
Go to the documentation of this file.
1 
7 /*****************************************************************************
8  * $Source: /home/cvs/namd/cvsroot/namd2/src/WorkDistrib.C,v $
9  * $Author: jim $
10  * $Date: 2017/03/30 20:06:17 $
11  * $Revision: 1.1291 $
12  *****************************************************************************/
13 
20 #include <stdio.h>
21 
22 #include "InfoStream.h"
23 #include "Communicate.h"
24 #include "ProcessorPrivate.h"
25 #include "BOCgroup.h"
26 #include "WorkDistrib.decl.h"
27 #include "WorkDistrib.h"
28 #include "Lattice.h"
29 #include "ComputeMsmMsa.h" // needed for MsmMsaData definition
30 #include "main.decl.h"
31 #include "main.h"
32 #include "Node.h"
33 #include "PatchMgr.h"
34 #include "PatchMap.inl"
35 #include "NamdTypes.h"
36 #include "PDB.h"
37 #include "SimParameters.h"
38 #include "Molecule.h"
39 #include "NamdOneTools.h"
40 #include "Compute.h"
41 #include "ComputeMap.h"
42 #include "RecBisection.h"
43 #include "Random.h"
44 #include "varsizemsg.h"
45 #include "ProxyMgr.h"
46 #include "Priorities.h"
47 #include "SortAtoms.h"
48 #include <algorithm>
49 #include "TopoManager.h"
50 #include "ComputePmeCUDAMgr.h"
51 
52 #include "DeviceCUDA.h"
53 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
54 #ifdef WIN32
55 #define __thread __declspec(thread)
56 #endif
57 extern __thread DeviceCUDA *deviceCUDA;
58 #endif
59 
60 //#define DEBUGM
61 #define MIN_DEBUG_LEVEL 2
62 #include "Debug.h"
63 #ifdef MEM_OPT_VERSION
64 extern int isOutputProcessor(int);
65 #endif
66 class ComputeMapChangeMsg : public CMessage_ComputeMapChangeMsg
67 {
68 public:
69 
72  int *newNodes;
74 
75 // VARSIZE_DECL(ComputeMapChangeMsg);
76 };
77 
78 /*
79 VARSIZE_MSG(ComputeMapChangeMsg,
80  VARSIZE_ARRAY(newNodes);
81 )
82 */
83 
84 static int randtopo;
85 
86 static void build_ordering(void *) {
88 }
89 
90 void topo_getargs(char **argv) {
91  randtopo = CmiGetArgFlag(argv, "+randtopo");
92  if ( CkMyPe() >= CkNumPes() ) return;
93 #if CCD_COND_FN_EXISTS
94  CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdCondFn)build_ordering, (void*)0);
95 #else
96  CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdVoidFn)build_ordering, (void*)0);
97 #endif
98 }
99 
101 
102 //======================================================================
103 // Public functions
104 //----------------------------------------------------------------------
106 {
107  CkpvAccess(BOCclass_group).workDistrib = thisgroup;
108  patchMapArrived = false;
109  computeMapArrived = false;
110 
111 #if CMK_SMP
112 #define MACHINE_PROGRESS
113 #else
114 #define MACHINE_PROGRESS { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }
115  if ( CkMyNodeSize() > 1 ) NAMD_bug("CkMyNodeSize() > 1 for non-smp build");
116  eventMachineProgress = traceRegisterUserEvent("CmiMachineProgressImpl",233);
117 #endif
118 }
119 
120 //----------------------------------------------------------------------
122 { }
123 
124 static int compare_bit_reversed(int a, int b) {
125  int d = a ^ b;
126  int c = 1;
127  if ( d ) while ( ! (d & c) ) {
128  c = c << 1;
129  }
130  return (a & c) - (b & c);
131 }
132 
133 static bool less_than_bit_reversed(int a, int b) {
134  int d = a ^ b;
135  int c = 1;
136  if ( d ) while ( ! (d & c) ) {
137  c = c << 1;
138  }
139  return d && (b & c);
140 }
141 
145  inline bool operator() (int a, int b) const {
146  int c = compare_bit_reversed(CmiRankOf(a),CmiRankOf(b));
147  if ( c < 0 ) return true;
148  if ( c > 0 ) return false;
150  rankInPhysOfNode[CmiNodeOf(a)],rankInPhysOfNode[CmiNodeOf(b)]);
151  if ( c < 0 ) return true;
152  if ( c > 0 ) return false;
153  c = compare_bit_reversed(CmiPhysicalNodeID(a),CmiPhysicalNodeID(b));
154  return ( c < 0 );
155  }
156 };
157 
163 
164 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
165 extern void cuda_initialize();
166 #endif
167 
168 void mic_initialize();
169 
171  //CkPrintf("WorkDistrib::peOrderingReady on %d\n", CkMyPe());
172 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
173  cuda_initialize();
174 #endif
175 #ifdef NAMD_MIC
176  mic_initialize();
177 #endif
178 }
179 
181 
182  CmiMemLock();
183  if ( ! peOrderingInit ) {
184  //CkPrintf("WorkDistrib::buildNodeAwarePeOrdering on pe %d\n", CkMyPe());
185 
186  const int numPhys = CmiNumPhysicalNodes();
187  const int numNode = CmiNumNodes();
188  const int numPe = CmiNumPes();
189  ResizeArray<int> numNodeInPhys(numPhys);
190  ResizeArray<int> rankInPhysOfNode(numNode);
191 
192  peDiffuseOrdering = new int[numPe];
193  peDiffuseOrderingIndex = new int[numPe];
194  peCompactOrdering = new int[numPe];
195  peCompactOrderingIndex = new int[numPe];
196 
197  int k = 0;
198  for ( int ph=0; ph<numPhys; ++ph ) {
199  int *pes, npes;
200  CmiGetPesOnPhysicalNode(ph, &pes, &npes);
201  for ( int i=0; i<npes; ++i, ++k ) {
202  peCompactOrdering[k] = pes[i];
203  }
204  numNodeInPhys[ph] = 0;
205  for ( int i=0, j=0; i<npes; i += CmiNodeSize(CmiNodeOf(pes[i])), ++j ) {
206  rankInPhysOfNode[CmiNodeOf(pes[i])] = j;
207  numNodeInPhys[ph] += 1;
208  }
209  }
210 
211  if ( randtopo && numPhys > 2 ) {
212  if ( ! CkMyNode() ) {
213  iout << iWARN << "RANDOMIZING PHYSICAL NODE ORDERING\n" << endi;
214  }
215  ResizeArray<int> randPhysOrder(numPhys);
216  for ( int j=0; j<numPhys; ++j ) {
217  randPhysOrder[j] = j;
218  }
219  Random(314159265).reorder(randPhysOrder.begin()+2, numPhys-2);
220  for ( int j=0, k=0; j<numPhys; ++j ) {
221  const int ph = randPhysOrder[j];
222  int *pes, npes;
223  CmiGetPesOnPhysicalNode(ph, &pes, &npes);
224  for ( int i=0; i<npes; ++i, ++k ) {
225  peCompactOrdering[k] = pes[i];
226  }
227  }
228  }
229 
230  for ( int i=0; i<numPe; ++i ) {
231  peDiffuseOrdering[i] = i;
232  }
234  pe_sortop_bit_reversed(rankInPhysOfNode.begin()));
235 
236  for ( int i=0; i<numPe; ++i ) {
239  }
240 
241  if ( 0 && CmiMyNode() == 0 ) for ( int i=0; i<numPe; ++i ) {
242  CkPrintf("order %5d %5d %5d %5d %5d\n", i,
247  }
248 
249  peOrderingInit = 1;
250  }
251  CmiMemUnlock();
252  peOrderingReady();
253 
254 }
255 
259  inline bool operator() (int a, int b) const {
260  return ( spos[a].x < spos[b].x );
261  }
262 };
263 
267  inline bool operator() (int a, int b) const {
268  return ( spos[a].y < spos[b].y );
269  }
270 };
271 
273  int x_begin, int x_end, int y_begin, int y_end,
274  int *pe_begin, ScaledPosition *coord,
275  int *result, int ydim
276  ) {
277  int x_len = x_end - x_begin;
278  int y_len = y_end - y_begin;
279  if ( x_len == 1 && y_len == 1 ) {
280  // done, now put this pe in the right place
281  if ( 0 ) CkPrintf("pme %5d %5d on pe %5d at %f %f\n", x_begin, y_begin, *pe_begin,
282  coord[*pe_begin].x, coord[*pe_begin].y);
283  result[x_begin*ydim + y_begin] = *pe_begin;
284  return;
285  }
286  int *pe_end = pe_begin + x_len * y_len;
287  if ( x_len >= y_len ) {
288  std::sort(pe_begin, pe_end, pe_sortop_coord_x(coord));
289  int x_split = x_begin + x_len / 2;
290  int* pe_split = pe_begin + (x_split - x_begin) * y_len;
291  //CkPrintf("x_split %5d %5d %5d\n", x_begin, x_split, x_end);
292  recursive_bisect_coord(x_begin, x_split, y_begin, y_end, pe_begin, coord, result, ydim);
293  recursive_bisect_coord(x_split, x_end, y_begin, y_end, pe_split, coord, result, ydim);
294  } else {
295  std::sort(pe_begin, pe_end, pe_sortop_coord_y(coord));
296  int y_split = y_begin + y_len / 2;
297  int* pe_split = pe_begin + (y_split - y_begin) * x_len;
298  //CkPrintf("y_split %5d %5d %5d\n", y_begin, y_split, y_end);
299  recursive_bisect_coord(x_begin, x_end, y_begin, y_split, pe_begin, coord, result, ydim);
300  recursive_bisect_coord(x_begin, x_end, y_split, y_end, pe_split, coord, result, ydim);
301  }
302 }
303 
304 void WorkDistrib::sortPmePes(int *pmepes, int xdim, int ydim) {
305  int numpes = CkNumPes();
306  ResizeArray<int> count(numpes);
307  ResizeArray<ScaledPosition> sumPos(numpes);
308  ResizeArray<ScaledPosition> avgPos(numpes);
309  for ( int i=0; i<numpes; ++i ) {
310  count[i] = 0;
311  sumPos[i] = 0;
312  avgPos[i] = 0;
313  }
314  PatchMap *patchMap = PatchMap::Object();
315  for ( int i=0, npatches=patchMap->numPatches(); i<npatches; ++i ) {
316  int pe = patchMap->node(i);
317  count[pe] += 1;
318  sumPos[pe] += patchMap->center(i);
319  }
320  const int npmepes = xdim*ydim;
321  ResizeArray<int> sortpes(npmepes);
322  for ( int i=0; i<npmepes; ++i ) {
323  int pe = sortpes[i] = pmepes[i];
324  int cnt = count[pe];
325  ScaledPosition sum = sumPos[pe];
326  if ( cnt == 0 ) {
327  // average over node
328  int node = CkNodeOf(pe);
329  int nsize = CkNodeSize(node);
330  int pe2 = CkNodeFirst(node);
331  for ( int j=0; j<nsize; ++j, ++pe2 ) {
332  cnt += count[pe2];
333  sum += sumPos[pe2];
334  }
335  }
336  if ( cnt == 0 ) {
337  // average over physical node
338  int node = CmiPhysicalNodeID(pe);
339  int nsize, *nlist;
340  CmiGetPesOnPhysicalNode(node, &nlist, &nsize);
341  for ( int j=0; j<nsize; ++j ) {
342  int pe2 = nlist[j];
343  cnt += count[pe2];
344  sum += sumPos[pe2];
345  }
346  }
347  if ( cnt ) {
348  avgPos[pe] = sum / cnt;
349  }
350  }
351  recursive_bisect_coord(0, xdim, 0, ydim, sortpes.begin(), avgPos.begin(), pmepes, ydim);
352 }
353 
354 
355 //----------------------------------------------------------------------
356 void WorkDistrib::saveComputeMapChanges(int ep, CkGroupID chareID)
357 {
358  saveComputeMapReturnEP = ep;
359  saveComputeMapReturnChareID = chareID;
360 
361  ComputeMapChangeMsg *mapMsg = new (0, 0, 0) ComputeMapChangeMsg;
362  CProxy_WorkDistrib(thisgroup).recvComputeMapChanges(mapMsg);
363 
364 /*
365  // store the latest compute map
366  SimParameters *simParams = Node::Object()->simParameters;
367  if (simParams->storeComputeMap) {
368  computeMap->saveComputeMap(simParams->computeMapFilename);
369  CkPrintf("ComputeMap has been stored in %s.\n", simParams->computeMapFilename);
370  }
371 */
372 }
373 
375 
376  delete msg;
377 
378  ComputeMap *computeMap = ComputeMap::Object();
379 
380  int i;
381  int nc = computeMap->numComputes();
382 
383  if ( ! CkMyPe() ) { // send
384  // CkPrintf("At %f on %d WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
385  MOStream *msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, COMPUTEMAPTAG, BUFSIZE);
386  msg->put(nc);
387  for (i=0; i<nc; i++) {
388  int data = computeMap->newNode(i);
389  msg->put(data);
390  }
391  msg->put(nc);
392  for (i=0; i<nc; i++) {
393  char data = computeMap->newNumPartitions(i);
394  msg->put(data);
395  }
396  msg->put(nc);
397  msg->end();
398  delete msg;
399  // CkPrintf("At %f on %d done WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
400  } else if ( ! CkMyRank() ) { // receive
401  // if ( CkMyNode() == 1 ) CkPrintf("At %f on %d WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
402  MIStream *msg = CkpvAccess(comm)->newInputStream(0, COMPUTEMAPTAG);
403  msg->get(i);
404  if ( i != nc ) NAMD_bug("WorkDistrib::recvComputeMapChanges check 1 failed\n");
405  for (i=0; i<nc; i++) {
406  int data;
407  msg->get(data);
408  computeMap->setNewNode(i,data);
409  }
410  msg->get(i);
411  if ( i != nc ) NAMD_bug("WorkDistrib::recvComputeMapChanges check 2 failed\n");
412  for (i=0; i<nc; i++) {
413  char data;
414  msg->get(data);
415  computeMap->setNewNumPartitions(i,data);
416  }
417  msg->get(i);
418  if ( i != nc ) NAMD_bug("WorkDistrib::recvComputeMapChanges check 3 failed\n");
419  delete msg;
420  // if ( CkMyNode() == 1 ) CkPrintf("At %f on %d done WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
421  }
422 
423  CkCallback cb(CkIndex_WorkDistrib::doneSaveComputeMap(NULL), 0, thisgroup);
424  contribute(0, NULL, CkReduction::random, cb);
425 }
426 
427 void WorkDistrib::doneSaveComputeMap(CkReductionMsg *msg) {
428  delete msg;
429 
430  CkSendMsgBranch(saveComputeMapReturnEP, CkAllocMsg(0,0,0), 0, saveComputeMapReturnChareID);
431 }
432 
433 #ifdef MEM_OPT_VERSION
434 //All basic info already exists for each atom inside the FullAtomList because
435 //it is loaded when reading the binary per-atom file. This function will fill
436 //the info regarding transform, nonbondedGroupSize etc. Refer to
437 //WorkDistrib::createAtomLists
438 void WorkDistrib::fillAtomListForOnePatch(int pid, FullAtomList &alist){
439  PatchMap *patchMap = PatchMap::Object();
440 
441  ScaledPosition center(0.5*(patchMap->min_a(pid)+patchMap->max_a(pid)),
442  0.5*(patchMap->min_b(pid)+patchMap->max_b(pid)),
443  0.5*(patchMap->min_c(pid)+patchMap->max_c(pid)));
444 
445  int n = alist.size();
446  FullAtom *a = alist.begin();
447 /*
448  //Those options are not supported in MEM_OPT_VERSIOn -Chao Mei
449 //Modifications for alchemical fep
450  Bool alchFepOn = params->alchFepOn;
451  Bool alchThermIntOn = params->alchThermIntOn;
452 //fepe
453  Bool lesOn = params->lesOn;
454  Bool pairInteractionOn = params->pairInteractionOn;
455 
456  Bool pressureProfileTypes = (params->pressureProfileAtomTypes > 1);
457 */
459  const Lattice lattice = params->lattice;
460  Transform mother_transform;
461  for(int j=0; j < n; j++)
462  {
463  int aid = a[j].id;
464  a[j].nonbondedGroupSize = 0; // must be set based on coordinates
465 
466  // a[j].fixedPosition = a[j].position; ParallelIOMgr stores ref coord here.
467 
468  if ( a[j].migrationGroupSize ) {
469  if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
470  Position pos = a[j].position;
471  int mgs = a[j].migrationGroupSize;
472  int c = 1;
473 
474  for ( int k=a[j].hydrogenGroupSize; k<mgs;
475  k+=a[j+k].hydrogenGroupSize ) {
476  pos += a[j+k].position;
477  ++c;
478  }
479 
480  pos *= 1./c;
481  mother_transform = a[j].transform; // should be 0,0,0
482  pos = lattice.nearest(pos,center,&mother_transform);
483  a[j].position = lattice.apply_transform(a[j].position,mother_transform);
484  a[j].transform = mother_transform;
485  } else {
486  a[j].position = lattice.nearest(a[j].position, center, &(a[j].transform));
487  mother_transform = a[j].transform;
488  }
489  } else {
490  a[j].position = lattice.apply_transform(a[j].position,mother_transform);
491  a[j].transform = mother_transform;
492  }
493 
494 /*
495  //Those options are not supported in MEM_OPT_VERSIOn -Chao Mei
496 //Modifications for alchemical fep
497  if ( alchOn || lesOn || pairInteractionOn || pressureProfileTypes) {
498  a[j].partition = molecule->get_fep_type(aid);
499  }
500  else {
501  a[j].partition = 0;
502  }
503 //fepe
504 */
505  a[j].partition = 0;
506 
507  //set langevinParams based on atom status
508  if(params->langevinOn) {
509  BigReal bval = params->langevinDamping;
510  if(!params->langevinHydrogen &&
511  ((a[j].status & HydrogenAtom)!=0)) {
512  bval = 0;
513  }else if ((a[j].status & LonepairAtom)!=0) {
514  bval = 0;
515  }else if ((a[j].status & DrudeAtom)!=0) {
516  bval = params->langevinDamping;
517  }
518  a[j].langevinParam = bval;
519  }
520 
521  }
522 
523  int size, allfixed;
524  for(int j=0; j < n; j+=size) {
525  size = a[j].hydrogenGroupSize;
526  if ( ! size ) {
527  NAMD_bug("Mother atom with hydrogenGroupSize of 0!");
528  }
529  allfixed = 1;
530  for (int k = 0; k < size; ++k ) {
531  allfixed = ( allfixed && (a[j+k].atomFixed) );
532  }
533  for (int k = 0; k < size; ++k ) {
534  a[j+k].groupFixed = allfixed ? 1 : 0;
535  }
536  }
537 
538  if ( params->outputPatchDetails ) {
539  int patchId = pid;
540  int numAtomsInPatch = n;
541  int numFixedAtomsInPatch = 0;
542  int numAtomsInFixedGroupsInPatch = 0;
543  for(int j=0; j < n; j++) {
544  numFixedAtomsInPatch += ( a[j].atomFixed ? 1 : 0 );
545  numAtomsInFixedGroupsInPatch += ( a[j].groupFixed ? 1 : 0 );
546  }
547  iout << "PATCH_DETAILS:"
548  << " on proc " << CkMyPe()
549  << " patch " << patchId
550  << " atoms " << numAtomsInPatch
551  << " fixed_atoms " << numFixedAtomsInPatch
552  << " fixed_groups " << numAtomsInFixedGroupsInPatch
553  << "\n" << endi;
554  }
555 
556 }
557 
558 void WorkDistrib::random_velocities_parallel(BigReal Temp,InputAtomList &inAtoms)
559 {
560  int i, j; // Loop counter
561  BigReal kbT; // Boltzman constant * Temp
562  BigReal randnum; // Random number from -6.0 to 6.0
563  BigReal kbToverM; // sqrt(Kb*Temp/Mass)
565  Bool lesOn = simParams->lesOn;
566  Random vel_random(simParams->randomSeed);
567  int lesReduceTemp = lesOn && simParams->lesReduceTemp;
568  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
569 
570  kbT = Temp*BOLTZMANN;
571  int count=0;
572  int totalAtoms = inAtoms.size();
573  for(i=0;i<totalAtoms;i++)
574  {
575  Real atomMs=inAtoms[i].mass;
576 
577  if (atomMs <= 0.) {
578  kbToverM = 0.;
579  } else {
580  /*
581  * lesOn is not supported in MEM_OPT_VERSION, so the original assignment
582  * is simplified. --Chao Mei
583  */
584  //kbToverM = sqrt(kbT *
585  //( lesOn && structure->get_fep_type(aid) ? tempFactor : 1.0 ) /
586  // atomMs );
587  kbToverM = sqrt(kbT * 1.0 / atomMs);
588  }
589  for (randnum=0.0, j=0; j<12; j++)
590  {
591  randnum += vel_random.uniform();
592  }
593 
594  randnum -= 6.0;
595 
596  inAtoms[i].velocity.x = randnum*kbToverM;
597 
598  for (randnum=0.0, j=0; j<12; j++)
599  {
600  randnum += vel_random.uniform();
601  }
602 
603  randnum -= 6.0;
604 
605  inAtoms[i].velocity.y = randnum*kbToverM;
606 
607  for (randnum=0.0, j=0; j<12; j++)
608  {
609  randnum += vel_random.uniform();
610  }
611 
612  randnum -= 6.0;
613 
614  inAtoms[i].velocity.z = randnum*kbToverM;
615  }
616 }
617 #endif
618 
619 //----------------------------------------------------------------------
620 // This should only be called on node 0.
621 //----------------------------------------------------------------------
623 {
624  int i;
625  StringList *current; // Pointer used to retrieve configuration items
626  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
627  Node *node = nd.ckLocalBranch();
628  PatchMap *patchMap = PatchMap::Object();
629  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
630  PatchMgr *patchMgr = pm.ckLocalBranch();
631  SimParameters *params = node->simParameters;
632  Molecule *molecule = node->molecule;
633  PDB *pdb = node->pdb;
634 
635  int numPatches = patchMap->numPatches();
636  int numAtoms = pdb->num_atoms();
637 
638  Vector *positions = new Position[numAtoms];
639  Vector *velocities = new Velocity[numAtoms];
640 
641  if ( basename ) {
642  if ( params->binaryOutput ) {
643  read_binary_file((std::string(basename)+".coor").c_str(), positions, numAtoms);
644  read_binary_file((std::string(basename)+".vel").c_str(), velocities, numAtoms);
645  } else {
646  PDB coorpdb((std::string(basename)+".coor").c_str());
647  if ( coorpdb.num_atoms() != numAtoms ) {
648  NAMD_die("Incorrect atom count in coordinate pdb file");
649  }
650  coorpdb.get_all_positions(positions);
651  velocities_from_PDB((std::string(basename)+".vel").c_str(), velocities, numAtoms);
652  }
653  } else {
654  pdb->get_all_positions(positions);
655 
656  if ( params->initialTemp < 0.0 ) {
657  Bool binvels=FALSE;
658 
659  // Reading the veolcities from a PDB
660  current = node->configList->find("velocities");
661 
662  if (current == NULL) {
663  current = node->configList->find("binvelocities");
664  binvels = TRUE;
665  }
666 
667  if (!binvels) {
668  velocities_from_PDB(current->data, velocities, numAtoms);
669  }
670  else {
671  velocities_from_binfile(current->data, velocities, numAtoms);
672  }
673  }
674  else {
675  // Random velocities for a given temperature
676  random_velocities(params->initialTemp, molecule, velocities, numAtoms);
677  }
678  }
679 
680  // If COMMotion == no, remove center of mass motion
681  if (!(params->comMove)) {
682  remove_com_motion(velocities, molecule, numAtoms);
683  }
684 
686 
687  const Lattice lattice = params->lattice;
688 
689  if ( params->staticAtomAssignment ) {
690  FullAtomList sortAtoms;
691  for ( i=0; i < numAtoms; i++ ) {
692  HydrogenGroupID &h = molecule->hydrogenGroup[i];
693  if ( ! h.isMP ) continue;
694  FullAtom a;
695  a.id = i;
697  a.position = positions[h.atomID];
698  sortAtoms.add(a);
699  }
700  int *order = new int[sortAtoms.size()];
701  for ( i=0; i < sortAtoms.size(); i++ ) {
702  order[i] = i;
703  }
704  int *breaks = new int[numPatches];
705  sortAtomsForPatches(order,breaks,sortAtoms.begin(),
706  sortAtoms.size(),numAtoms,
707  patchMap->gridsize_c(),
708  patchMap->gridsize_b(),
709  patchMap->gridsize_a());
710 
711  i = 0;
712  for ( int pid = 0; pid < numPatches; ++pid ) {
713  int iend = breaks[pid];
714  for ( ; i<iend; ++i ) {
715  FullAtom &sa = sortAtoms[order[i]];
716  int mgs = sa.migrationGroupSize;
717 /*
718 CkPrintf("patch %d (%d %d %d) has group %d atom %d size %d at %.2f %.2f %.2f\n",
719  pid, patchMap->index_a(pid), patchMap->index_b(pid),
720  patchMap->index_c(pid), order[i], sa.id, mgs,
721  sa.position.x, sa.position.y, sa.position.z);
722 */
723  for ( int k=0; k<mgs; ++k ) {
724  HydrogenGroupID &h = molecule->hydrogenGroup[sa.id + k];
725  int aid = h.atomID;
726  FullAtom a;
727  a.id = aid;
728  a.position = positions[aid];
729  a.velocity = velocities[aid];
730  a.vdwType = molecule->atomvdwtype(aid);
731  a.status = molecule->getAtoms()[aid].status;
732  a.langevinParam = molecule->langevin_param(aid);
733  a.hydrogenGroupSize = h.isGP ? h.atomsInGroup : 0;
735  if(params->rigidBonds != RIGID_NONE) {
736  a.rigidBondLength = molecule->rigid_bond_length(aid);
737  }else{
738  a.rigidBondLength = 0.0;
739  }
740  atoms[pid].add(a);
741  }
742  }
743 CkPrintf("patch %d (%d %d %d) has %d atoms\n",
744  pid, patchMap->index_a(pid), patchMap->index_b(pid),
745  patchMap->index_c(pid), atoms[pid].size());
746  }
747  delete [] order;
748  delete [] breaks;
749  } else
750  {
751  // split atoms into patches based on migration group and position
752  int aid, pid=0;
753  for(i=0; i < numAtoms; i++)
754  {
755  // Assign atoms to patches without splitting hydrogen groups.
756  // We know that the hydrogenGroup array is sorted with group parents
757  // listed first. Thus, only change the pid if an atom is a group parent.
758  HydrogenGroupID &h = molecule->hydrogenGroup[i];
759  aid = h.atomID;
760  FullAtom a;
761  a.id = aid;
762  a.position = positions[aid];
763  a.velocity = velocities[aid];
764  a.vdwType = molecule->atomvdwtype(aid);
765  a.status = molecule->getAtoms()[aid].status;
766  a.langevinParam = molecule->langevin_param(aid);
767  a.hydrogenGroupSize = h.isGP ? h.atomsInGroup : 0;
769  if(params->rigidBonds != RIGID_NONE) {
770  a.rigidBondLength = molecule->rigid_bond_length(aid);
771  }else{
772  a.rigidBondLength = 0.0;
773  }
774  if (h.isMP) {
775  pid = patchMap->assignToPatch(positions[aid],lattice);
776  } // else: don't change pid
777  atoms[pid].add(a);
778  }
779  }
780 
781  delete [] positions;
782  delete [] velocities;
783 
784  for(i=0; i < numPatches; i++)
785  {
786  ScaledPosition center(0.5*(patchMap->min_a(i)+patchMap->max_a(i)),
787  0.5*(patchMap->min_b(i)+patchMap->max_b(i)),
788  0.5*(patchMap->min_c(i)+patchMap->max_c(i)));
789 
790  int n = atoms[i].size();
791  FullAtom *a = atoms[i].begin();
792  int j;
793 //Modifications for alchemical fep
794  Bool alchOn = params->alchOn;
795 //fepe
796  Bool lesOn = params->lesOn;
797 
798  Bool pairInteractionOn = params->pairInteractionOn;
799 
800  Bool pressureProfileTypes = (params->pressureProfileAtomTypes > 1);
801 
802  Transform mother_transform;
803  for(j=0; j < n; j++)
804  {
805  int aid = a[j].id;
806 
807  a[j].nonbondedGroupSize = 0; // must be set based on coordinates
808 
809  a[j].atomFixed = molecule->is_atom_fixed(aid) ? 1 : 0;
810  a[j].fixedPosition = a[j].position;
811 
812  if ( a[j].migrationGroupSize ) {
813  if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
814  Position pos = a[j].position;
815  int mgs = a[j].migrationGroupSize;
816  int c = 1;
817  for ( int k=a[j].hydrogenGroupSize; k<mgs;
818  k+=a[j+k].hydrogenGroupSize ) {
819  pos += a[j+k].position;
820  ++c;
821  }
822  pos *= 1./c;
823  mother_transform = a[j].transform; // should be 0,0,0
824  pos = lattice.nearest(pos,center,&mother_transform);
825  a[j].position = lattice.apply_transform(a[j].position,mother_transform);
826  a[j].transform = mother_transform;
827  } else {
828  a[j].position = lattice.nearest(
829  a[j].position, center, &(a[j].transform));
830  mother_transform = a[j].transform;
831  }
832  } else {
833  a[j].position = lattice.apply_transform(a[j].position,mother_transform);
834  a[j].transform = mother_transform;
835  }
836 
837  a[j].mass = molecule->atommass(aid);
838  // Using double precision division for reciprocal mass.
839  a[j].recipMass = ( a[j].mass > 0 ? (1. / a[j].mass) : 0 );
840  a[j].charge = molecule->atomcharge(aid);
841 
842 //Modifications for alchemical fep
843  if ( alchOn || lesOn || pairInteractionOn || pressureProfileTypes) {
844  a[j].partition = molecule->get_fep_type(aid);
845  }
846  else {
847  a[j].partition = 0;
848  }
849 //fepe
850 
851  }
852 
853  int size, allfixed, k;
854  for(j=0; j < n; j+=size) {
855  size = a[j].hydrogenGroupSize;
856  if ( ! size ) {
857  NAMD_bug("Mother atom with hydrogenGroupSize of 0!");
858  }
859  allfixed = 1;
860  for ( k = 0; k < size; ++k ) {
861  allfixed = ( allfixed && (a[j+k].atomFixed) );
862  }
863  for ( k = 0; k < size; ++k ) {
864  a[j+k].groupFixed = allfixed ? 1 : 0;
865  }
866  }
867 
868  if ( params->outputPatchDetails ) {
869  int patchId = i;
870  int numAtomsInPatch = n;
871  int numFixedAtomsInPatch = 0;
872  int numAtomsInFixedGroupsInPatch = 0;
873  for(j=0; j < n; j++) {
874  numFixedAtomsInPatch += ( a[j].atomFixed ? 1 : 0 );
875  numAtomsInFixedGroupsInPatch += ( a[j].groupFixed ? 1 : 0 );
876  }
877  iout << "PATCH_DETAILS:"
878  << " patch " << patchId
879  << " atoms " << numAtomsInPatch
880  << " fixed_atoms " << numFixedAtomsInPatch
881  << " fixed_groups " << numAtomsInFixedGroupsInPatch
882  << "\n" << endi;
883  }
884  }
885 
886  return atoms;
887 
888 }
889 
890 //----------------------------------------------------------------------
891 // This should only be called on node 0.
892 //----------------------------------------------------------------------
894 {
895  int i;
896  PatchMap *patchMap = PatchMap::Object();
897  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
898  PatchMgr *patchMgr = pm.ckLocalBranch();
899 
900  int numPatches = patchMap->numPatches();
901 
903 
904 #ifdef MEM_OPT_VERSION
905 /* CProxy_Node nd(CkpvAccess(BOCclass_group).node);
906  Node *node = nd.ckLocalBranch();
907  node->molecule->delEachAtomSigs();
908  node->molecule->delMassChargeSpace();
909 */
910 #endif
911 
912  int maxAtoms = -1;
913  int maxPatch = -1;
914  for(i=0; i < numPatches; i++) {
915  int numAtoms = atoms[i].size();
916  if ( numAtoms > maxAtoms ) { maxAtoms = numAtoms; maxPatch = i; }
917  }
918  iout << iINFO << "LARGEST PATCH (" << maxPatch <<
919  ") HAS " << maxAtoms << " ATOMS\n" << endi;
920 
921  for(i=0; i < numPatches; i++)
922  {
923  if ( ! ( i % 100 ) )
924  {
925  DebugM(3,"Created " << i << " patches so far.\n");
926  }
927 
928  patchMgr->createHomePatch(i,atoms[i]);
929  }
930 
931  delete [] atoms;
932 }
933 
935  // ref BOC
936  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
937  Node *node = nd.ckLocalBranch();
938  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
939  PatchMgr *patchMgr = pm.ckLocalBranch();
940  // ref singleton
941  PatchMap *patchMap = PatchMap::Object();
942 
943  // Move patches to the proper node
944  for(int i=0;i < patchMap->numPatches(); i++)
945  {
946  if (patchMap->node(i) != node->myid() )
947  {
948  DebugM(3,"patchMgr->movePatch("
949  << i << "," << patchMap->node(i) << ")\n");
950  patchMgr->movePatch(i,patchMap->node(i));
951  }
952  }
953  patchMgr->sendMovePatches();
954 }
955 
956 void WorkDistrib::reinitAtoms(const char *basename) {
957 
958  PatchMap *patchMap = PatchMap::Object();
959  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
960  PatchMgr *patchMgr = pm.ckLocalBranch();
961 
962  int numPatches = patchMap->numPatches();
963 
964  FullAtomList *atoms = createAtomLists(basename);
965 
966  for(int i=0; i < numPatches; i++) {
967  patchMgr->sendAtoms(i,atoms[i]);
968  }
969 
970  delete [] atoms;
971 
972 }
973 
974 
975 //----------------------------------------------------------------------
976 
977 class PatchMapMsg : public CMessage_PatchMapMsg {
978  public:
980 };
981 
983 {
984  if ( CkNumPes() == 1 ) {
985  patchMapArrived = true;
986  return;
987  }
988 
989  //Automatically enable spanning tree
990  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
991  Node *node = nd.ckLocalBranch();
992  SimParameters *params = node->simParameters;
993  if( ( PatchMap::Object()->numPatches() <= CkNumPes()/4
994 #ifdef NODEAWARE_PROXY_SPANNINGTREE
995  || CkNumPes() > CkNumNodes()
996  ) && ( CkNumNodes() > 1
997 #endif
998  ) && params->isSendSpanningTreeUnset() )
1000 
1001 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1002  if ( CkNumPes() > CkNumNodes() && CkNumNodes() > 1
1003  && params->isRecvSpanningTreeUnset() )
1005 #endif
1006 
1007  int size = PatchMap::Object()->packSize();
1008 
1009  PatchMapMsg *mapMsg = new (size, 0) PatchMapMsg;
1010 
1011  PatchMap::Object()->pack(mapMsg->patchMapData, size);
1012 
1013  CProxy_WorkDistrib workProxy(thisgroup);
1014  workProxy[0].savePatchMap(mapMsg);
1015 }
1016 
1017 // saveMaps() is called when the map message is received
1019 {
1020  // Use a resend to forward messages before processing. Otherwise the
1021  // map distribution is slow on many CPUs. We need to use a tree
1022  // rather than a broadcast because some implementations of broadcast
1023  // generate a copy of the message on the sender for each recipient.
1024  // This is because MPI doesn't allow re-use of an outstanding buffer.
1025 
1026  if ( CkMyRank() ) patchMapArrived = true;
1027 
1028  if ( patchMapArrived && CkMyPe() ) {
1030 
1031  //Automatically enable spanning tree
1032  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1033  Node *node = nd.ckLocalBranch();
1034  SimParameters *params = node->simParameters;
1035  if( ( PatchMap::Object()->numPatches() <= CkNumPes()/4
1036 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1037  || CkNumPes() > CkNumNodes()
1038  ) && ( CkNumNodes() > 1
1039 #endif
1040  ) && params->isSendSpanningTreeUnset() )
1042 
1043 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1044  if ( CkNumPes() > CkNumNodes() && CkNumNodes() > 1
1045  && params->isRecvSpanningTreeUnset() )
1047 #endif
1048  }
1049 
1050  if ( patchMapArrived ) {
1051  if ( CkMyRank() + 1 < CkNodeSize(CkMyNode()) ) {
1052  ((CProxy_WorkDistrib(thisgroup))[CkMyPe()+1]).savePatchMap(msg);
1053  } else {
1054  delete msg;
1055  }
1056  return;
1057  }
1058 
1059  patchMapArrived = true;
1060 
1061  int self = CkMyNode();
1062  int range_begin = 0;
1063  int range_end = CkNumNodes();
1064  while ( self != range_begin ) {
1065  ++range_begin;
1066  int split = range_begin + ( range_end - range_begin ) / 2;
1067  if ( self < split ) { range_end = split; }
1068  else { range_begin = split; }
1069  }
1070  int send_near = self + 1;
1071  int send_far = send_near + ( range_end - send_near ) / 2;
1072 
1073  int pids[3];
1074  int npid = 0;
1075  if ( send_far < range_end ) pids[npid++] = CkNodeFirst(send_far);
1076  if ( send_near < send_far ) pids[npid++] = CkNodeFirst(send_near);
1077  pids[npid++] = CkMyPe(); // always send the message to ourselves
1078  CProxy_WorkDistrib(thisgroup).savePatchMap(msg,npid,pids);
1079 }
1080 
1081 
1083 {
1084  if ( CkMyRank() ) return;
1085 
1086  if ( CkNumNodes() == 1 ) {
1087  computeMapArrived = true;
1089  return;
1090  }
1091 
1092  if ( ! CkMyPe() ) { // send
1093  MOStream *msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, COMPUTEMAPTAG, BUFSIZE);
1094  ComputeMap::Object()->pack(msg);
1095  msg->end();
1096  delete msg;
1097  } else if ( ! CkMyRank() ) { // receive
1098  MIStream *msg = CkpvAccess(comm)->newInputStream(0, COMPUTEMAPTAG);
1099  ComputeMap::Object()->unpack(msg);
1100  delete msg;
1101  }
1102 
1103  computeMapArrived = true;
1105 }
1106 
1107 
1108 //----------------------------------------------------------------------
1110 {
1111  PatchMap *patchMap = PatchMap::Object();
1112  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1113  Node *node = nd.ckLocalBranch();
1114  SimParameters *params = node->simParameters;
1115  Lattice lattice = params->lattice;
1116 
1117  BigReal patchSize = params->patchDimension;
1118 
1119 #ifndef MEM_OPT_VERSION
1120  const int totalAtoms = node->pdb->num_atoms();
1121 #else
1122  const int totalAtoms = node->molecule->numAtoms;
1123 #endif
1124 
1125  ScaledPosition xmin, xmax;
1126 
1127  double maxNumPatches = 1.e9; // need to adjust fractional values
1128  if ( params->minAtomsPerPatch > 0 )
1129  maxNumPatches = totalAtoms / params->minAtomsPerPatch;
1130 
1131  DebugM(3,"Mapping patches\n");
1132  if ( lattice.a_p() && lattice.b_p() && lattice.c_p() ) {
1133  xmin = 0.; xmax = 0.;
1134  }
1135  else if ( params->FMAOn || params->MSMOn || params->FMMOn ) {
1136  // Need to use full box for FMA to match NAMD 1.X results.
1137 #if 0
1138  node->pdb->find_extremes(&(xmin.x),&(xmax.x),lattice.a_r());
1139  node->pdb->find_extremes(&(xmin.y),&(xmax.y),lattice.b_r());
1140  node->pdb->find_extremes(&(xmin.z),&(xmax.z),lattice.c_r());
1141 #endif
1142  node->pdb->find_extremes(lattice);
1143  node->pdb->get_extremes(xmin, xmax);
1144 #if 0
1145  printf("+++ center=%.4f %.4f %.4f\n",
1146  lattice.origin().x, lattice.origin().y, lattice.origin().z);
1147  printf("+++ xmin=%.4f xmax=%.4f\n", xmin.x, xmax.x);
1148  printf("+++ ymin=%.4f ymax=%.4f\n", xmin.y, xmax.y);
1149  printf("+++ zmin=%.4f zmax=%.4f\n", xmin.z, xmax.z);
1150 #endif
1151  // Otherwise, this allows a small number of stray atoms.
1152  }
1153  else {
1154 #if 0
1155  node->pdb->find_extremes(&(xmin.x),&(xmax.x),lattice.a_r(),0.9);
1156  node->pdb->find_extremes(&(xmin.y),&(xmax.y),lattice.b_r(),0.9);
1157  node->pdb->find_extremes(&(xmin.z),&(xmax.z),lattice.c_r(),0.9);
1158 #endif
1159  node->pdb->find_extremes(lattice, 1.0);
1160  node->pdb->get_extremes(xmin, xmax);
1161  iout << iINFO << "ORIGINAL ATOMS MINMAX IS " << xmin << " " << xmax << "\n" << endi;
1162  double frac = ( (double)totalAtoms - 10000. ) / (double)totalAtoms;
1163  if ( frac < 0.9 ) { frac = 0.9; }
1164  node->pdb->find_extremes(lattice, frac);
1165  node->pdb->get_extremes(xmin, xmax);
1166  iout << iINFO << "ADJUSTED ATOMS MINMAX IS " << xmin << " " << xmax << "\n" << endi;
1167  }
1168 
1169 #if 0
1170  BigReal origin_shift;
1171  origin_shift = lattice.a_r() * lattice.origin();
1172  xmin.x -= origin_shift;
1173  xmax.x -= origin_shift;
1174  origin_shift = lattice.b_r() * lattice.origin();
1175  xmin.y -= origin_shift;
1176  xmax.y -= origin_shift;
1177  origin_shift = lattice.c_r() * lattice.origin();
1178  xmin.z -= origin_shift;
1179  xmax.z -= origin_shift;
1180 #endif
1181 
1182  // SimParameters default is -1 for unset
1183  int twoAwayX = params->twoAwayX;
1184  int twoAwayY = params->twoAwayY;
1185  int twoAwayZ = params->twoAwayZ;
1186 
1187  // SASA implementation is not compatible with twoAway patches
1188  if (params->LCPOOn && patchSize < 32.4) {
1189  if ( twoAwayX > 0 || twoAwayY > 0 || twoAwayZ > 0 ) {
1190  iout << iWARN << "Ignoring twoAway[XYZ] due to LCPO SASA implementation.\n" << endi;
1191  }
1192  twoAwayX = twoAwayY = twoAwayZ = 0;
1193  }
1194 
1195  // if you think you know what you're doing go right ahead
1196  if ( twoAwayX > 0 ) maxNumPatches = 1.e9;
1197  if ( twoAwayY > 0 ) maxNumPatches = 1.e9;
1198  if ( twoAwayZ > 0 ) maxNumPatches = 1.e9;
1199  if ( params->maxPatches > 0 ) {
1200  maxNumPatches = params->maxPatches;
1201  iout << iINFO << "LIMITING NUMBER OF PATCHES TO " <<
1202  maxNumPatches << "\n" << endi;
1203  }
1204 
1205  int numpes = CkNumPes();
1206  SimParameters *simparam = Node::Object()->simParameters;
1207  if(simparam->simulateInitialMapping) {
1208  numpes = simparam->simulatedPEs;
1209  delete [] patchMap->nPatchesOnNode;
1210  patchMap->nPatchesOnNode = new int[numpes];
1211  memset(patchMap->nPatchesOnNode, 0, numpes*sizeof(int));
1212  }
1213 
1214 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
1215  // for CUDA be sure there are more patches than pes
1216 
1217  int numPatches = patchMap->sizeGrid(
1218  xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
1219  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1220  if ( numPatches < numpes && twoAwayX < 0 ) {
1221  twoAwayX = 1;
1222  numPatches = patchMap->sizeGrid(
1223  xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
1224  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1225  }
1226  if ( numPatches < numpes && twoAwayY < 0 ) {
1227  twoAwayY = 1;
1228  numPatches = patchMap->sizeGrid(
1229  xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
1230  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1231  }
1232  if ( numPatches < numpes && twoAwayZ < 0 ) {
1233  twoAwayZ = 1;
1234  numPatches = patchMap->sizeGrid(
1235  xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
1236  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1237  }
1238  if ( numPatches < numpes ) {
1239  #if defined(NAMD_MIC)
1240  NAMD_die("MIC-enabled NAMD requires at least one patch per thread.");
1241  #endif
1242  }
1243  if ( numPatches % numpes && numPatches <= 1.4 * numpes ) {
1244  int exactFit = numPatches - numPatches % numpes;
1245  int newNumPatches = patchMap->sizeGrid(
1246  xmin,xmax,lattice,patchSize,exactFit,params->staticAtomAssignment,
1247  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1248  if ( newNumPatches == exactFit ) {
1249  iout << iINFO << "REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" << endi;
1250  maxNumPatches = exactFit;
1251  }
1252  }
1253 
1254  patchMap->makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,
1255  params->staticAtomAssignment, params->replicaUniformPatchGrids, params->LCPOOn,
1256  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1257 
1258 #else
1259 
1260  int availPes = numpes;
1261  if ( params->noPatchesOnZero && numpes > 1 ) {
1262  availPes -= 1;
1263  if(params->noPatchesOnOne && numpes > 2)
1264  availPes -= 1;
1265  }
1266 #ifdef MEM_OPT_VERSION
1267  if(params->noPatchesOnOutputPEs && numpes - params->numoutputprocs >2)
1268  {
1269  availPes -= params->numoutputprocs;
1270  if ( params->noPatchesOnZero && numpes > 1 && isOutputProcessor(0)){
1271  availPes++;
1272  }
1273  if ( params->noPatchesOnOne && numpes > 2 && isOutputProcessor(1)){
1274  availPes++;
1275  }
1276  }
1277 #endif
1278 
1279  int numPatches = patchMap->sizeGrid(
1280  xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
1281  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1282  if ( ( numPatches > (0.3*availPes) || numPatches > maxNumPatches
1283  ) && twoAwayZ < 0 ) {
1284  twoAwayZ = 0;
1285  numPatches = patchMap->sizeGrid(
1286  xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
1287  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1288  }
1289  if ( ( numPatches > (0.6*availPes) || numPatches > maxNumPatches
1290  ) && twoAwayY < 0 ) {
1291  twoAwayY = 0;
1292  numPatches = patchMap->sizeGrid(
1293  xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
1294  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1295  }
1296  if ( ( numPatches > availPes || numPatches > maxNumPatches
1297  ) && twoAwayX < 0 ) {
1298  twoAwayX = 0;
1299  numPatches = patchMap->sizeGrid(
1300  xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
1301  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1302  }
1303  if ( numPatches > availPes && numPatches <= (1.4*availPes) && availPes <= maxNumPatches ) {
1304  int newNumPatches = patchMap->sizeGrid(
1305  xmin,xmax,lattice,patchSize,availPes,params->staticAtomAssignment,
1306  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1307  if ( newNumPatches <= availPes && numPatches <= (1.4*newNumPatches) ) {
1308  iout << iINFO << "REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" << endi;
1309  maxNumPatches = availPes;
1310  }
1311  }
1312 
1313  patchMap->makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,
1314  params->staticAtomAssignment, params->replicaUniformPatchGrids, params->LCPOOn,
1315  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1316 
1317 #endif
1318 
1319 }
1320 
1321 
1322 //----------------------------------------------------------------------
1324 {
1325  PatchMap *patchMap = PatchMap::Object();
1326  int nNodes = Node::Object()->numNodes();
1327  SimParameters *simparam = Node::Object()->simParameters;
1328  if(simparam->simulateInitialMapping) {
1329  nNodes = simparam->simulatedPEs;
1330  }
1331 
1332 #if (CMK_BLUEGENEP | CMK_BLUEGENEL) && USE_TOPOMAP
1333  TopoManager tmgr;
1334  int numPes = tmgr.getDimNX() * tmgr.getDimNY() * tmgr.getDimNZ();
1335  if (numPes > patchMap->numPatches() && (assignPatchesTopoGridRecBisection() > 0)) {
1336  CkPrintf ("Blue Gene/L topology partitioner finished successfully \n");
1337  }
1338  else
1339 #endif
1340  assignPatchesSpaceFillingCurve();
1341 
1342  int *nAtoms = new int[nNodes];
1343  int numAtoms=0;
1344  int i;
1345  for(i=0; i < nNodes; i++)
1346  nAtoms[i] = 0;
1347 
1348  for(i=0; i < patchMap->numPatches(); i++)
1349  {
1350  // iout << iINFO << "Patch " << i << " has "
1351  // << patchMap->patch(i)->getNumAtoms() << " atoms and "
1352  // << patchMap->patch(i)->getNumAtoms() *
1353  // patchMap->patch(i)->getNumAtoms()
1354  // << " pairs.\n" << endi;
1355 #ifdef MEM_OPT_VERSION
1356  numAtoms += patchMap->numAtoms(i);
1357  nAtoms[patchMap->node(i)] += patchMap->numAtoms(i);
1358 #else
1359  if (patchMap->patch(i)) {
1360  numAtoms += patchMap->patch(i)->getNumAtoms();
1361  nAtoms[patchMap->node(i)] += patchMap->patch(i)->getNumAtoms();
1362  }
1363 #endif
1364  }
1365 
1366  if ( numAtoms != Node::Object()->molecule->numAtoms ) {
1367  for(i=0; i < nNodes; i++)
1368  iout << iINFO << nAtoms[i] << " atoms assigned to node " << i << "\n" << endi;
1369  iout << iINFO << "Assigned " << numAtoms << " atoms but expected " << Node::Object()->molecule->numAtoms << "\n" << endi;
1370  NAMD_die("Incorrect atom count in WorkDistrib::assignNodeToPatch\n");
1371  }
1372 
1373  delete [] nAtoms;
1374 
1375  // PatchMap::Object()->printPatchMap();
1376 }
1377 
1378 //----------------------------------------------------------------------
1379 // void WorkDistrib::assignPatchesSlices()
1380 // {
1381 // int pid;
1382 // int assignedNode = 0;
1383 // PatchMap *patchMap = PatchMap::Object();
1384 // Node *node = CLocalBranch(Node, CkpvAccess(BOCclass_group).node);
1385 
1386 // int *numAtoms = new int[node->numNodes()];
1387 // for (int i=0; i<node->numNodes(); i++) {
1388 // numAtoms[i] = 0;
1389 // }
1390 
1391 // // Assign patch to node with least atoms assigned.
1392 // for(pid=0; pid < patchMap->numPatches(); pid++) {
1393 // assignedNode = 0;
1394 // for (i=1; i < node->numNodes(); i++) {
1395 // if (numAtoms[i] < numAtoms[assignedNode]) assignedNode = i;
1396 // }
1397 // patchMap->assignNode(pid, assignedNode);
1398 // numAtoms[assignedNode] += patchMap->patch(pid)->getNumAtoms();
1399 
1400 // /*
1401 // iout << iINFO << "Patch (" << pid << ") has "
1402 // << patchMap->patch(pid)->getNumAtoms()
1403 // << " atoms: Assigned to Node(" << assignedNode << ")\n"
1404 // << endi;
1405 // */
1406 // }
1407 
1408 // delete[] numAtoms;
1409 // }
1410 
1411 //----------------------------------------------------------------------
1412 void WorkDistrib::assignPatchesToLowestLoadNode()
1413 {
1414  int pid;
1415  int assignedNode = 0;
1416  PatchMap *patchMap = PatchMap::Object();
1417  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1418  Node *node = nd.ckLocalBranch();
1419  SimParameters *simParams = node->simParameters;
1420  int ncpus = node->numNodes();
1421  if(simParams->simulateInitialMapping) {
1422  ncpus = simParams->simulatedPEs;
1423  }
1424 
1425  int *load = new int[ncpus];
1426  int *assignedNodes = new int[patchMap->numPatches()];
1427  for (int i=0; i<ncpus; i++) {
1428  load[i] = 0;
1429  }
1430  CkPrintf("assignPatchesToLowestLoadNode\n");
1431  int defaultNode = 0;
1432  if ( simParams->noPatchesOnZero && ncpus > 1 ){
1433  defaultNode = 1;
1434  if( simParams->noPatchesOnOne && ncpus > 2)
1435  defaultNode = 2;
1436  }
1437  // Assign patch to node with least atoms assigned.
1438  for(pid=0; pid < patchMap->numPatches(); pid++) {
1439  assignedNode = defaultNode;
1440  for (int i=assignedNode + 1; i < ncpus; i++) {
1441  if (load[i] < load[assignedNode]) assignedNode = i;
1442  }
1443  assignedNodes[pid] = assignedNode;
1444 #ifdef MEM_OPT_VERSION
1445  load[assignedNode] += patchMap->numAtoms(pid) + 1;
1446 #else
1447  load[assignedNode] += patchMap->patch(pid)->getNumAtoms() + 1;
1448 #endif
1449  }
1450 
1451  delete[] load;
1452  sortNodesAndAssign(assignedNodes);
1453  delete[] assignedNodes;
1454 }
1455 
1456 //----------------------------------------------------------------------
1457 void WorkDistrib::assignPatchesBitReversal()
1458 {
1459  int pid;
1460  PatchMap *patchMap = PatchMap::Object();
1461  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1462  Node *node = nd.ckLocalBranch();
1463  SimParameters *simparam = node->simParameters;
1464 
1465  int ncpus = node->numNodes();
1466  if(simparam->simulateInitialMapping) {
1467  ncpus = simparam->simulatedPEs;
1468  }
1469  int npatches = patchMap->numPatches();
1470  if ( ncpus <= npatches )
1471  NAMD_bug("WorkDistrib::assignPatchesBitReversal called improperly");
1472 
1473  SortableResizeArray<int> seq(ncpus);
1474  // avoid using node 0 (reverse of 0 is 0 so start at 1)
1475  for ( int i = 1; i < ncpus; ++i ) {
1476  seq[i-1] = peDiffuseOrdering[i];
1477  }
1478 
1479  // extract and sort patch locations
1480  sortNodesAndAssign(seq.begin());
1481  if ( ncpus > 2*npatches ) sortNodesAndAssign(seq.begin()+npatches, 1);
1482 }
1483 
1484 //----------------------------------------------------------------------
1485 struct nodesort {
1486  int node;
1487  int a_total;
1488  int b_total;
1489  int c_total;
1491  nodesort() : node(-1),a_total(0),b_total(0),c_total(0),npatches(0) { ; }
1492  int operator==(const nodesort &o) const {
1493  float a1 = ((float)a_total)/((float)npatches);
1494  float a2 = ((float)o.a_total)/((float)o.npatches);
1495  float b1 = ((float)b_total)/((float)npatches);
1496  float b2 = ((float)o.b_total)/((float)o.npatches);
1497  float c1 = ((float)c_total)/((float)npatches);
1498  float c2 = ((float)o.c_total)/((float)o.npatches);
1499  return ((a1 == a2) && (b1 == b2) && (c1 == c2));
1500  }
1501  int operator<(const nodesort &o) const {
1502  float a1 = ((float)a_total)/((float)npatches);
1503  float a2 = ((float)o.a_total)/((float)o.npatches);
1504  float b1 = ((float)b_total)/((float)npatches);
1505  float b2 = ((float)o.b_total)/((float)o.npatches);
1506  float c1 = ((float)c_total)/((float)npatches);
1507  float c2 = ((float)o.c_total)/((float)o.npatches);
1508  return ( (a1 < a2) || ((a1 == a2) && (b1 < b2)) ||
1509  ((a1 == a2) && (b1 == b2) && (c1 < c2)) );
1510  }
1511 };
1512 
1513 void WorkDistrib::sortNodesAndAssign(int *assignedNode, int baseNodes) {
1514  // if baseNodes is zero (default) then set both nodes and basenodes
1515  // if baseNodes is nonzero then this is a second call to set basenodes only
1516  int i, pid;
1517  PatchMap *patchMap = PatchMap::Object();
1518  int npatches = patchMap->numPatches();
1519  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1520  Node *node = nd.ckLocalBranch();
1521  int nnodes = node->numNodes();
1522  SimParameters *simparam = node->simParameters;
1523  if(simparam->simulateInitialMapping) {
1524  nnodes = simparam->simulatedPEs;
1525  }
1526 
1527  ResizeArray<nodesort> allnodes(nnodes);
1528  for ( i=0; i < nnodes; ++i ) {
1529  allnodes[i].node = i;
1530  }
1531  for ( pid=0; pid<npatches; ++pid ) {
1532  // iout << pid << " " << assignedNode[pid] << "\n" << endi;
1533  allnodes[assignedNode[pid]].npatches++;
1534  allnodes[assignedNode[pid]].a_total += patchMap->index_a(pid);
1535  allnodes[assignedNode[pid]].b_total += patchMap->index_b(pid);
1536  allnodes[assignedNode[pid]].c_total += patchMap->index_c(pid);
1537  }
1538  SortableResizeArray<nodesort> usednodes(nnodes);
1539  usednodes.resize(0);
1540  for ( i=0; i < nnodes; ++i ) {
1541  if ( allnodes[i].npatches ) usednodes.add(allnodes[i]);
1542  }
1543  usednodes.sort();
1544  int i2 = 0;
1545  for ( i=0; i < nnodes; ++i ) {
1546  int pe = peCompactOrdering[i];
1547  if ( allnodes[pe].npatches ) allnodes[usednodes[i2++].node].node = pe;
1548  }
1549 
1550  for ( pid=0; pid<npatches; ++pid ) {
1551  // iout << pid << " " << allnodes[assignedNode[pid]].node << "\n" << endi;
1552  if ( ! baseNodes ) {
1553  patchMap->assignNode(pid, allnodes[assignedNode[pid]].node);
1554  }
1555  patchMap->assignBaseNode(pid, allnodes[assignedNode[pid]].node);
1556  }
1557 }
1558 
1559 //----------------------------------------------------------------------
1560 void WorkDistrib::assignPatchesRoundRobin()
1561 {
1562  int pid;
1563  PatchMap *patchMap = PatchMap::Object();
1564  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1565  Node *node = nd.ckLocalBranch();
1566  SimParameters *simparam = node->simParameters;
1567  int ncpus = node->numNodes();
1568  if(simparam->simulateInitialMapping) {
1569  ncpus = simparam->simulatedPEs;
1570  }
1571  int *assignedNode = new int[patchMap->numPatches()];
1572 
1573  for(pid=0; pid < patchMap->numPatches(); pid++) {
1574  assignedNode[pid] = pid % ncpus;
1575  }
1576 
1577  sortNodesAndAssign(assignedNode);
1578  delete [] assignedNode;
1579 }
1580 
1581 //----------------------------------------------------------------------
1582 void WorkDistrib::assignPatchesRecursiveBisection()
1583 {
1584  PatchMap *patchMap = PatchMap::Object();
1585  int *assignedNode = new int[patchMap->numPatches()];
1586  SimParameters *simParams = Node::Object()->simParameters;
1587  int numNodes = Node::Object()->numNodes();
1588  if(simParams->simulateInitialMapping) {
1589  numNodes = simParams->simulatedPEs;
1590  }
1591 
1592  int usedNodes = numNodes;
1593  int unusedNodes = 0;
1594  CkPrintf("assignPatchesRecursiveBisection\n");
1595  if ( simParams->noPatchesOnZero && numNodes > 1 ){
1596  usedNodes -= 1;
1597  if(simParams->noPatchesOnOne && numNodes > 2)
1598  usedNodes -= 1;
1599  }
1600  unusedNodes = numNodes - usedNodes;
1601  RecBisection recBisec(usedNodes,PatchMap::Object());
1602  if ( recBisec.partition(assignedNode) ) {
1603  if ( unusedNodes !=0 ) {
1604  for ( int i=0; i<patchMap->numPatches(); ++i ) {
1605  assignedNode[i] += unusedNodes;
1606  }
1607  }
1608  sortNodesAndAssign(assignedNode);
1609  delete [] assignedNode;
1610  } else {
1611  //free the array here since a same array will be allocated
1612  //in assignPatchesToLowestLoadNode function, thus reducting
1613  //temporary memory usage
1614  delete [] assignedNode;
1615 
1616  iout << iWARN
1617  << "WorkDistrib: Recursive bisection fails, "
1618  << "invoking space-filling curve algorithm\n";
1619  assignPatchesSpaceFillingCurve();
1620  }
1621 }
1622 
1623 // class to re-order dimensions in decreasing size
1625  TopoManager tmgr;
1629  int fixpe(int pe) { // compensate for lame fallback topology information
1630  return CmiGetFirstPeOnPhysicalNode(CmiPhysicalNodeID(pe));
1631  }
1633 #if CMK_BLUEGENEQ
1634  int na=tmgr.getDimNA();
1635  int nb=tmgr.getDimNB();
1636  int nc=tmgr.getDimNC();
1637  int nd=tmgr.getDimND();
1638  int ne=tmgr.getDimNE();
1639 #else
1640  int na=tmgr.getDimNX();
1641  int nb=tmgr.getDimNY();
1642  int nc=tmgr.getDimNZ();
1643  int nd=1;
1644  int ne=1;
1645 #endif
1646  ResizeArray<int> a_flags(na);
1647  ResizeArray<int> b_flags(nb);
1648  ResizeArray<int> c_flags(nc);
1649  ResizeArray<int> d_flags(nd);
1650  ResizeArray<int> e_flags(ne);
1651  for ( int i=0; i<na; ++i ) { a_flags[i] = 0; }
1652  for ( int i=0; i<nb; ++i ) { b_flags[i] = 0; }
1653  for ( int i=0; i<nc; ++i ) { c_flags[i] = 0; }
1654  for ( int i=0; i<nd; ++i ) { d_flags[i] = 0; }
1655  for ( int i=0; i<ne; ++i ) { e_flags[i] = 0; }
1656  int npes = CkNumPes();
1657  for ( int pe=0; pe<npes; ++pe ) {
1658  int a,b,c,d,e,t;
1659 #if CMK_BLUEGENEQ
1660  tmgr.rankToCoordinates(fixpe(pe),a,b,c,d,e,t);
1661 #else
1662  tmgr.rankToCoordinates(fixpe(pe),a,b,c,t);
1663  d=0; e=0;
1664 #endif
1665  if ( a < 0 || a >= na ) NAMD_bug("inconsistent torus topology!");
1666  if ( b < 0 || b >= nb ) NAMD_bug("inconsistent torus topology!");
1667  if ( c < 0 || c >= nc ) NAMD_bug("inconsistent torus topology!");
1668  if ( d < 0 || d >= nd ) NAMD_bug("inconsistent torus topology!");
1669  if ( e < 0 || e >= ne ) NAMD_bug("inconsistent torus topology!");
1670  a_flags[a] = 1;
1671  b_flags[b] = 1;
1672  c_flags[c] = 1;
1673  d_flags[d] = 1;
1674  e_flags[e] = 1;
1675  }
1676  iout << iINFO << "TORUS A SIZE " << na << " USING";
1677  for ( int i=0; i<na; ++i ) { if ( a_flags[i] ) iout << " " << i; }
1678  iout << "\n" << endi;
1679  iout << iINFO << "TORUS B SIZE " << nb << " USING";
1680  for ( int i=0; i<nb; ++i ) { if ( b_flags[i] ) iout << " " << i; }
1681  iout << "\n" << endi;
1682  iout << iINFO << "TORUS C SIZE " << nc << " USING";
1683  for ( int i=0; i<nc; ++i ) { if ( c_flags[i] ) iout << " " << i; }
1684  iout << "\n" << endi;
1685 #if CMK_BLUEGENEQ
1686  iout << iINFO << "TORUS D SIZE " << nd << " USING";
1687  for ( int i=0; i<nd; ++i ) { if ( d_flags[i] ) iout << " " << i; }
1688  iout << "\n" << endi;
1689  iout << iINFO << "TORUS E SIZE " << ne << " USING";
1690  for ( int i=0; i<ne; ++i ) { if ( e_flags[i] ) iout << " " << i; }
1691  iout << "\n" << endi;
1692 #endif
1693  // find most compact representation of our subset
1694  a_rot = b_rot = c_rot = d_rot = e_rot = 0;
1695  a_mod = na; b_mod = nb; c_mod = nc; d_mod = nd; e_mod = ne;
1696 #if CMK_BLUEGENEQ
1697  if ( tmgr.absA(na) == 0 ) // torus
1698 #else
1699  if ( tmgr.absX(na) == 0 ) // torus
1700 #endif
1701  for ( int i=0, gaplen=0, gapstart=0; i<2*na; ++i ) {
1702  if ( a_flags[i%na] ) gapstart = i+1;
1703  else if ( i - gapstart >= gaplen ) {
1704  a_rot = 2*na-i-1; gaplen = i - gapstart;
1705  }
1706  }
1707 #if CMK_BLUEGENEQ
1708  if ( tmgr.absB(nb) == 0 ) // torus
1709 #else
1710  if ( tmgr.absY(nb) == 0 ) // torus
1711 #endif
1712  for ( int i=0, gaplen=0, gapstart=0; i<2*nb; ++i ) {
1713  if ( b_flags[i%nb] ) gapstart = i+1;
1714  else if ( i - gapstart >= gaplen ) {
1715  b_rot = 2*nb-i-1; gaplen = i - gapstart;
1716  }
1717  }
1718 #if CMK_BLUEGENEQ
1719  if ( tmgr.absC(nc) == 0 ) // torus
1720 #else
1721  if ( tmgr.absZ(nc) == 0 ) // torus
1722 #endif
1723  for ( int i=0, gaplen=0, gapstart=0; i<2*nc; ++i ) {
1724  if ( c_flags[i%nc] ) gapstart = i+1;
1725  else if ( i - gapstart >= gaplen ) {
1726  c_rot = 2*nc-i-1; gaplen = i - gapstart;
1727  }
1728  }
1729 #if CMK_BLUEGENEQ
1730  if ( tmgr.absD(nd) == 0 ) // torus
1731  for ( int i=0, gaplen=0, gapstart=0; i<2*nd; ++i ) {
1732  if ( d_flags[i%nd] ) gapstart = i+1;
1733  else if ( i - gapstart >= gaplen ) {
1734  d_rot = 2*nd-i-1; gaplen = i - gapstart;
1735  }
1736  }
1737  if ( tmgr.absE(ne) == 0 ) // torus
1738  for ( int i=0, gaplen=0, gapstart=0; i<2*ne; ++i ) {
1739  if ( e_flags[i%ne] ) gapstart = i+1;
1740  else if ( i - gapstart >= gaplen ) {
1741  e_rot = 2*ne-i-1; gaplen = i - gapstart;
1742  }
1743  }
1744 #endif
1745  // order dimensions by length
1746  int a_min=na, a_max=-1;
1747  int b_min=nb, b_max=-1;
1748  int c_min=nc, c_max=-1;
1749  int d_min=nd, d_max=-1;
1750  int e_min=ne, e_max=-1;
1751  for ( int pe=0; pe<npes; ++pe ) {
1752  int a,b,c,d,e,t;
1753 #if CMK_BLUEGENEQ
1754  tmgr.rankToCoordinates(fixpe(pe),a,b,c,d,e,t);
1755 #else
1756  tmgr.rankToCoordinates(fixpe(pe),a,b,c,t);
1757  d=0; e=0;
1758 #endif
1759  a = (a+a_rot)%a_mod;
1760  b = (b+b_rot)%b_mod;
1761  c = (c+c_rot)%c_mod;
1762  d = (d+d_rot)%d_mod;
1763  e = (e+e_rot)%e_mod;
1764  if ( a < a_min ) a_min = a;
1765  if ( b < b_min ) b_min = b;
1766  if ( c < c_min ) c_min = c;
1767  if ( d < d_min ) d_min = d;
1768  if ( e < e_min ) e_min = e;
1769  if ( a > a_max ) a_max = a;
1770  if ( b > b_max ) b_max = b;
1771  if ( c > c_max ) c_max = c;
1772  if ( d > d_max ) d_max = d;
1773  if ( e > e_max ) e_max = e;
1774  }
1775  int a_len = a_max - a_min + 1;
1776  int b_len = b_max - b_min + 1;
1777  int c_len = c_max - c_min + 1;
1778  int d_len = d_max - d_min + 1;
1779  int e_len = e_max - e_min + 1;
1780  int lensort[5];
1781  lensort[0] = (a_len << 3) + 0;
1782  lensort[1] = (b_len << 3) + 1;
1783  lensort[2] = (c_len << 3) + 2;
1784  lensort[3] = (d_len << 3) + 3;
1785  lensort[4] = (e_len << 3) + 4;
1786  // CkPrintf("TopoManagerWrapper lensort before %d %d %d %d %d\n", lensort[0] & 7, lensort[1] & 7, lensort[2] & 7, lensort[3] & 7, lensort[4] & 7);
1787  std::sort(lensort, lensort+5);
1788  // CkPrintf("TopoManagerWrapper lensort after %d %d %d %d %d\n", lensort[0] & 7, lensort[1] & 7, lensort[2] & 7, lensort[3] & 7, lensort[4] & 7);
1789  for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 0 ) a_dim = 4-i; }
1790  for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 1 ) b_dim = 4-i; }
1791  for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 2 ) c_dim = 4-i; }
1792  for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 3 ) d_dim = 4-i; }
1793  for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 4 ) e_dim = 4-i; }
1794 #if 0
1795  if ( a_len >= b_len && a_len >= c_len ) {
1796  a_dim = 0;
1797  if ( b_len >= c_len ) {
1798  b_dim = 1; c_dim = 2;
1799  } else {
1800  b_dim = 2; c_dim = 1;
1801  }
1802  } else if ( b_len >= a_len && b_len >= c_len ) {
1803  b_dim = 0;
1804  if ( a_len >= c_len ) {
1805  a_dim = 1; c_dim = 2;
1806  } else {
1807  a_dim = 2; c_dim = 1;
1808  }
1809  } else { // c is longest
1810  c_dim = 0;
1811  if ( a_len >= b_len ) {
1812  a_dim = 1; b_dim = 2;
1813  } else {
1814  a_dim = 2; b_dim = 1;
1815  }
1816  }
1817 #endif
1818  iout << iINFO << "TORUS MINIMAL MESH SIZE IS " << a_len << " BY " << b_len << " BY " << c_len
1819 #if CMK_BLUEGENEQ
1820  << " BY " << d_len << " BY " << e_len
1821 #endif
1822  << "\n" << endi;
1823  // CkPrintf("TopoManagerWrapper dims %d %d %d %d %d\n", a_dim, b_dim, c_dim, d_dim, e_dim);
1824  }
1825  void coords(int pe, int *crds) {
1826  int a,b,c,d,e,t;
1827 #if CMK_BLUEGENEQ
1828  tmgr.rankToCoordinates(fixpe(pe),a,b,c,d,e,t);
1829 #else
1830  tmgr.rankToCoordinates(fixpe(pe),a,b,c,t);
1831  d=0; e=0;
1832 #endif
1833  if ( a_dim < 3 ) crds[a_dim] = (a+a_rot)%a_mod;
1834  if ( b_dim < 3 ) crds[b_dim] = (b+b_rot)%b_mod;
1835  if ( c_dim < 3 ) crds[c_dim] = (c+c_rot)%c_mod;
1836  if ( d_dim < 3 ) crds[d_dim] = (d+d_rot)%d_mod;
1837  if ( e_dim < 3 ) crds[e_dim] = (e+e_rot)%e_mod;
1838  }
1839  int coord(int pe, int dim) {
1840  int crds[3];
1841  coords(pe,crds);
1842  return crds[dim];
1843  }
1846  const int *sortdims;
1848  bool operator() (int pe1, int pe2) const {
1849  int crds1[3], crds2[3];
1850  tmgr.coords(pe1,crds1);
1851  tmgr.coords(pe2,crds2);
1852  for ( int i=0; i<3; ++i ) {
1853  int d = sortdims[i];
1854  if ( crds1[d] != crds2[d] ) return ( crds1[d] < crds2[d] );
1855  }
1856  const int *index = WorkDistrib::peCompactOrderingIndex;
1857  return ( index[pe1] < index[pe2] );
1858  }
1859  };
1860  int* sortAndSplit(int *node_begin, int *node_end, int splitdim) {
1861  if ( node_begin == node_end ) return node_begin;
1862  int tmins[3], tmaxs[3], tlens[3], sortdims[3];
1863  coords(*node_begin, tmins);
1864  coords(*node_begin, tmaxs);
1865  for ( int *peitr = node_begin; peitr != node_end; ++peitr ) {
1866  int tvals[3];
1867  coords(*peitr, tvals);
1868  for ( int i=0; i<3; ++i ) {
1869  if ( tvals[i] < tmins[i] ) tmins[i] = tvals[i];
1870  if ( tvals[i] > tmaxs[i] ) tmaxs[i] = tvals[i];
1871  }
1872  }
1873  for ( int i=0; i<3; ++i ) {
1874  tlens[i] = tmaxs[i] - tmins[i];
1875  }
1876  sortdims[0] = splitdim;
1877  for ( int i=0, j=0; i<3; ++i ) {
1878  if ( i != splitdim ) sortdims[++j] = i;
1879  }
1880  if ( tlens[sortdims[1]] < tlens[sortdims[2]] ) {
1881  int tmp = sortdims[1];
1882  sortdims[1] = sortdims[2];
1883  sortdims[2] = tmp;
1884  }
1885  std::sort(node_begin,node_end,pe_sortop_topo(*this,sortdims));
1886  int *nodes = node_begin;
1887  int nnodes = node_end - node_begin;
1888  int i_split = 0;
1889 #if 0
1890  int c_split = coord(nodes[0],splitdim);
1891  for ( int i=0; i<nnodes; ++i ) {
1892  if ( coord(nodes[i],splitdim) != c_split ) {
1893  int mid = (nnodes+1)/2;
1894  if ( abs(i-mid) < abs(i_split-mid) ) {
1895  i_split = i;
1896  c_split = coord(i,splitdim);
1897  }
1898  else break;
1899  }
1900  }
1901 #endif
1902  for ( int i=0; i<nnodes; ++i ) {
1903  if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
1904  int mid = (nnodes+1)/2;
1905  if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
1906  else break;
1907  }
1908  }
1909  return ( node_begin + i_split );
1910  }
1911 };
1912 
1916  inline bool operator() (int p1, int p2) const {
1917  int a1 = pmap->index_a(p1);
1918  int a2 = pmap->index_a(p2);
1919  if ( a1 < a2 ) return true;
1920  if ( a1 > a2 ) return false;
1921  int dir = ( (a1 & 1) ? -1 : 1 );
1922  int b1 = pmap->index_b(p1);
1923  int b2 = pmap->index_b(p2);
1924  if ( b1 * dir < b2 * dir ) return true;
1925  if ( b1 * dir > b2 * dir ) return false;
1926  dir *= ( (b1 & 1) ? -1 : 1 );
1927  int c1 = pmap->index_c(p1);
1928  int c2 = pmap->index_c(p2);
1929  if ( c1 * dir < c2 * dir ) return true;
1930  return false;
1931  }
1932 };
1933 
1937  inline bool operator() (int p1, int p2) const {
1938  int a1 = pmap->index_b(p1);
1939  int a2 = pmap->index_b(p2);
1940  if ( a1 < a2 ) return true;
1941  if ( a1 > a2 ) return false;
1942  int dir = ( (a1 & 1) ? -1 : 1 );
1943  int b1 = pmap->index_a(p1);
1944  int b2 = pmap->index_a(p2);
1945  if ( b1 * dir < b2 * dir ) return true;
1946  if ( b1 * dir > b2 * dir ) return false;
1947  dir *= ( (b1 & 1) ? -1 : 1 );
1948  int c1 = pmap->index_c(p1);
1949  int c2 = pmap->index_c(p2);
1950  if ( c1 * dir < c2 * dir ) return true;
1951  return false;
1952  }
1953 };
1954 
1958  inline bool operator() (int p1, int p2) const {
1959  int a1 = pmap->index_c(p1);
1960  int a2 = pmap->index_c(p2);
1961  if ( a1 < a2 ) return true;
1962  if ( a1 > a2 ) return false;
1963  int dir = ( (a1 & 1) ? -1 : 1 );
1964  int b1 = pmap->index_a(p1);
1965  int b2 = pmap->index_a(p2);
1966  if ( b1 * dir < b2 * dir ) return true;
1967  if ( b1 * dir > b2 * dir ) return false;
1968  dir *= ( (b1 & 1) ? -1 : 1 );
1969  int c1 = pmap->index_b(p1);
1970  int c2 = pmap->index_b(p2);
1971  if ( c1 * dir < c2 * dir ) return true;
1972  return false;
1973  }
1974 };
1975 
1977  int *patch_begin, int *patch_end,
1978  int *node_begin, int *node_end,
1979  double *patchLoads,
1980  double *sortedLoads,
1981  int *assignedNode,
1982  TopoManagerWrapper &tmgr
1983  ) {
1984 
1985  SimParameters *simParams = Node::Object()->simParameters;
1986  PatchMap *patchMap = PatchMap::Object();
1987  int *patches = patch_begin;
1988  int npatches = patch_end - patch_begin;
1989  int *nodes = node_begin;
1990  int nnodes = node_end - node_begin;
1991 
1992  // assign patch loads
1993  const int emptyPatchLoad = simParams->emptyPatchLoad;
1994  double totalRawLoad = 0;
1995  for ( int i=0; i<npatches; ++i ) {
1996  int pid=patches[i];
1997 #ifdef MEM_OPT_VERSION
1998  double load = patchMap->numAtoms(pid) + emptyPatchLoad;
1999 #else
2000  double load = patchMap->patch(pid)->getNumAtoms() + emptyPatchLoad;
2001 #endif
2002  patchLoads[pid] = load;
2003  sortedLoads[i] = load;
2004  totalRawLoad += load;
2005  }
2006  std::sort(sortedLoads,sortedLoads+npatches);
2007 
2008  // limit maxPatchLoad to adjusted average load per node
2009  double sumLoad = 0;
2010  double maxPatchLoad = 1;
2011  for ( int i=0; i<npatches; ++i ) {
2012  double load = sortedLoads[i];
2013  double total = sumLoad + (npatches-i) * load;
2014  if ( nnodes * load > total ) break;
2015  sumLoad += load;
2016  maxPatchLoad = load;
2017  }
2018  double totalLoad = 0;
2019  for ( int i=0; i<npatches; ++i ) {
2020  int pid=patches[i];
2021  if ( patchLoads[pid] > maxPatchLoad ) patchLoads[pid] = maxPatchLoad;
2022  totalLoad += patchLoads[pid];
2023  }
2024  if ( nnodes * maxPatchLoad > totalLoad )
2025  NAMD_bug("algorithm failure in WorkDistrib recursive_bisect_with_curve()");
2026 
2027  int a_len, b_len, c_len;
2028  int a_min, b_min, c_min;
2029  { // find dimensions
2030  a_min = patchMap->index_a(patches[0]);
2031  b_min = patchMap->index_b(patches[0]);
2032  c_min = patchMap->index_c(patches[0]);
2033  int a_max = a_min;
2034  int b_max = b_min;
2035  int c_max = c_min;
2036  for ( int i=1; i<npatches; ++i ) {
2037  int a = patchMap->index_a(patches[i]);
2038  int b = patchMap->index_b(patches[i]);
2039  int c = patchMap->index_c(patches[i]);
2040  if ( a < a_min ) a_min = a;
2041  if ( b < b_min ) b_min = b;
2042  if ( c < c_min ) c_min = c;
2043  if ( a > a_max ) a_max = a;
2044  if ( b > b_max ) b_max = b;
2045  if ( c > c_max ) c_max = c;
2046  }
2047  a_len = a_max - a_min;
2048  b_len = b_max - b_min;
2049  c_len = c_max - c_min;
2050  }
2051 
2052  int *node_split = node_begin;
2053 
2054  if ( simParams->disableTopology ) ; else
2055  if ( a_len >= b_len && a_len >= c_len ) {
2056  node_split = tmgr.sortAndSplit(node_begin,node_end,0);
2057  } else if ( b_len >= a_len && b_len >= c_len ) {
2058  node_split = tmgr.sortAndSplit(node_begin,node_end,1);
2059  } else if ( c_len >= a_len && c_len >= b_len ) {
2060  node_split = tmgr.sortAndSplit(node_begin,node_end,2);
2061  }
2062 
2063  if ( node_split == node_begin ) { // unable to split torus
2064  // make sure physical nodes are together
2065  std::sort(node_begin, node_end, WorkDistrib::pe_sortop_compact());
2066  // find physical node boundary to split on
2067  int i_split = 0;
2068  for ( int i=0; i<nnodes; ++i ) {
2069  if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
2070  int mid = (nnodes+1)/2;
2071  if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2072  else break;
2073  }
2074  }
2075  node_split = node_begin + i_split;
2076  }
2077 
2078  bool final_patch_sort = false;
2079 
2080  if ( node_split == node_begin ) { // all on same physical node
2081  if ( ( simParams->verboseTopology ) &&
2082  nnodes == CmiNumPesOnPhysicalNode(CmiPhysicalNodeID(*node_begin)) ) {
2083  int crds[3];
2084  tmgr.coords(*node_begin, crds);
2085  CkPrintf("WorkDistrib: physnode %5d pe %5d node %5d at %5d %5d %5d from %5d %5d %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
2086  CmiPhysicalNodeID(*node_begin), *node_begin,
2087  CkNodeOf(*node_begin), crds[0], crds[1], crds[2],
2088  a_min, b_min, c_min, npatches,
2089  a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
2090  }
2091 
2092  // final sort along a to minimize pme message count
2093  final_patch_sort = true;
2094 
2095  // find node (process) boundary to split on
2096  int i_split = 0;
2097  for ( int i=0; i<nnodes; ++i ) {
2098  if ( CmiNodeOf(nodes[i_split]) != CmiNodeOf(nodes[i]) ) {
2099  int mid = (nnodes+1)/2;
2100  if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2101  else break;
2102  }
2103  }
2104  node_split = node_begin + i_split;
2105  }
2106 
2107  if ( node_split == node_begin ) { // all on same node (process)
2108  if ( ( simParams->verboseTopology ) &&
2109  nnodes == CmiNodeSize(CmiNodeOf(*node_begin)) ) {
2110  int crds[3];
2111  tmgr.coords(*node_begin, crds);
2112  CkPrintf("WorkDistrib: node %5d pe %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
2113  CmiNodeOf(*node_begin), *node_begin, npatches,
2114  a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
2115  }
2116 
2117  // no natural divisions so just split at midpoint
2118  node_split = node_begin + nnodes/2;
2119  }
2120 
2121  if ( nnodes == 1 ) { // down to a single pe
2122  // assign all patches
2123  int *node = node_begin;
2124  sumLoad = 0;
2125  for ( int i=0; i < npatches; ++i ) {
2126  int pid = patches[i];
2127  assignedNode[pid] = *node;
2128  sumLoad += patchLoads[pid];
2129  if ( 0 ) CkPrintf("assign %5d node %5d patch %5d %5d %5d load %7f total %7f\n",
2130  i, *node,
2131  patchMap->index_a(pid),
2132  patchMap->index_b(pid),
2133  patchMap->index_c(pid),
2134  patchLoads[pid], sumLoad);
2135  }
2136 
2137  return;
2138  }
2139 
2140  if ( final_patch_sort ) {
2141  // final sort along a to minimize pme message count
2142  std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
2143  } else if ( a_len >= b_len && a_len >= c_len ) {
2144  if ( 0 ) CkPrintf("sort a\n");
2145  std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
2146  } else if ( b_len >= a_len && b_len >= c_len ) {
2147  if ( 0 ) CkPrintf("sort b\n");
2148  std::sort(patch_begin,patch_end,patch_sortop_curve_b(patchMap));
2149  } else if ( c_len >= a_len && c_len >= b_len ) {
2150  if ( 0 ) CkPrintf("sort c\n");
2151  std::sort(patch_begin,patch_end,patch_sortop_curve_c(patchMap));
2152  }
2153 
2154  int *patch_split;
2155  { // walk through patches in sorted order
2156  int *node = node_begin;
2157  sumLoad = 0;
2158  for ( patch_split = patch_begin;
2159  patch_split != patch_end && node != node_split;
2160  ++patch_split ) {
2161  sumLoad += patchLoads[*patch_split];
2162  double targetLoad = totalLoad *
2163  ((double)(node-node_begin+1) / (double)nnodes);
2164  if ( 0 ) CkPrintf("test %5d node %5d patch %5d %5d %5d load %7f target %7f\n",
2165  patch_split - patch_begin, *node,
2166  patchMap->index_a(*patch_split),
2167  patchMap->index_b(*patch_split),
2168  patchMap->index_c(*patch_split),
2169  sumLoad, targetLoad);
2170  double extra = ( patch_split+1 != patch_end ? 0.5 * patchLoads[*(patch_split+1)] : 0 );
2171  if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
2172  }
2173  double targetLoad = totalLoad *
2174  ((double)(node_split-node_begin) / (double)nnodes);
2175  if ( 0 ) CkPrintf("split node %5d/%5d patch %5d/%5d load %7f target %7f\n",
2176  node_split-node_begin, nnodes,
2177  patch_split-patch_begin, npatches,
2178  sumLoad, targetLoad);
2179  }
2180 
2181  // recurse
2183  patch_begin, patch_split, node_begin, node_split,
2184  patchLoads, sortedLoads, assignedNode, tmgr);
2186  patch_split, patch_end, node_split, node_end,
2187  patchLoads, sortedLoads, assignedNode, tmgr);
2188 }
2189 
2190 //----------------------------------------------------------------------
2191 void WorkDistrib::assignPatchesSpaceFillingCurve()
2192 {
2193  TopoManagerWrapper tmgr;
2194  PatchMap *patchMap = PatchMap::Object();
2195  const int numPatches = patchMap->numPatches();
2196  int *assignedNode = new int[numPatches];
2197  ResizeArray<double> patchLoads(numPatches);
2198  SortableResizeArray<double> sortedLoads(numPatches);
2199  int numNodes = Node::Object()->numNodes();
2200  SimParameters *simParams = Node::Object()->simParameters;
2201  if(simParams->simulateInitialMapping) {
2202  NAMD_die("simulateInitialMapping not supported by assignPatchesSpaceFillingCurve()");
2203  numNodes = simParams->simulatedPEs;
2204  }
2205 
2206  ResizeArray<int> patchOrdering(numPatches);
2207  for ( int i=0; i<numPatches; ++i ) {
2208  patchOrdering[i] = i;
2209  }
2210 
2211  ResizeArray<int> nodeOrdering(numNodes);
2212  nodeOrdering.resize(0);
2213  for ( int i=0; i<numNodes; ++i ) {
2214  int pe = peDiffuseOrdering[(i+1)%numNodes]; // avoid 0 if possible
2215  if ( simParams->noPatchesOnZero && numNodes > 1 ) {
2216  if ( pe == 0 ) continue;
2217  if(simParams->noPatchesOnOne && numNodes > 2) {
2218  if ( pe == 1 ) continue;
2219  }
2220  }
2221 #ifdef MEM_OPT_VERSION
2222  if(simParams->noPatchesOnOutputPEs && numNodes-simParams->numoutputprocs >2) {
2223  if ( isOutputProcessor(pe) ) continue;
2224  }
2225 #endif
2226  nodeOrdering.add(pe);
2227  if ( 0 ) CkPrintf("using pe %5d\n", pe);
2228  }
2229 
2230  int *node_begin = nodeOrdering.begin();
2231  int *node_end = nodeOrdering.end();
2232  if ( nodeOrdering.size() > numPatches ) {
2233  node_end = node_begin + numPatches;
2234  }
2235  std::sort(node_begin, node_end, pe_sortop_compact());
2236 
2237  int *basenode_begin = node_begin;
2238  int *basenode_end = node_end;
2239  if ( nodeOrdering.size() > 2*numPatches ) {
2240  basenode_begin = node_end;
2241  basenode_end = basenode_begin + numPatches;
2242  std::sort(basenode_begin, basenode_end, pe_sortop_compact());
2243  }
2244 
2245  if ( simParams->disableTopology ) {
2246  iout << iWARN << "IGNORING TORUS TOPOLOGY DURING PATCH PLACEMENT\n" << endi;
2247  }
2248 
2250  patchOrdering.begin(), patchOrdering.end(),
2251  node_begin, node_end,
2252  patchLoads.begin(), sortedLoads.begin(), assignedNode, tmgr);
2253 
2254  std::sort(node_begin, node_end, pe_sortop_compact());
2255 
2256  int samenodecount = 0;
2257 
2258  for ( int pid=0; pid<numPatches; ++pid ) {
2259  int node = assignedNode[pid];
2260  patchMap->assignNode(pid, node);
2261  int nodeidx = std::lower_bound(node_begin, node_end, node,
2262  pe_sortop_compact()) - node_begin;
2263  int basenode = basenode_begin[nodeidx];
2264  patchMap->assignBaseNode(pid, basenode);
2265  if ( CmiPeOnSamePhysicalNode(node,basenode) ) ++samenodecount;
2266  }
2267 
2268  iout << iINFO << "Placed " << (samenodecount*100./numPatches) << "% of base nodes on same physical node as patch\n" << endi;
2269 
2270  delete [] assignedNode;
2271 }
2272 
2273 //----------------------------------------------------------------------
2275 {
2276  PatchMap *patchMap = PatchMap::Object();
2277  ComputeMap *computeMap = ComputeMap::Object();
2278  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2279  Node *node = nd.ckLocalBranch();
2280 
2281  DebugM(3,"Mapping computes\n");
2282 
2283  computeMap->allocateCids();
2284 
2285  // Handle full electrostatics
2286  if ( node->simParameters->fullDirectOn )
2287  mapComputeHomePatches(computeFullDirectType);
2288  if ( node->simParameters->FMAOn )
2289 #ifdef DPMTA
2290  mapComputeHomePatches(computeDPMTAType);
2291 #else
2292  NAMD_die("This binary does not include DPMTA (FMA).");
2293 #endif
2294  if ( node->simParameters->PMEOn ) {
2295 #ifdef DPME
2296  if ( node->simParameters->useDPME )
2297  mapComputeHomePatches(computeDPMEType);
2298  else {
2299  mapComputeHomePatches(computePmeType);
2301  mapComputeHomePatches(computeEwaldType);
2302  }
2303 #else
2304 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2305  if (node->simParameters->usePMECUDA) {
2306  mapComputePatch(computePmeCUDAType);
2307  } else
2308 #endif
2309  {
2310  mapComputePatch(computePmeType);
2311  }
2313  mapComputeHomePatches(computeEwaldType);
2314 #endif
2315  }
2316 
2317  if ( node->simParameters->globalForcesOn ) {
2318  DebugM(2,"adding ComputeGlobal\n");
2319  mapComputeHomePatches(computeGlobalType);
2320  }
2321 
2322  if ( node->simParameters->extForcesOn )
2323  mapComputeHomePatches(computeExtType);
2324 
2325  if ( node->simParameters->qmForcesOn )
2326  mapComputeHomePatches(computeQMType);
2327 
2328  if ( node->simParameters->GBISserOn )
2329  mapComputeHomePatches(computeGBISserType);
2330 
2331  if ( node->simParameters->MsmSerialOn )
2332  mapComputeHomePatches(computeMsmSerialType);
2333 #ifdef CHARM_HAS_MSA
2334  else if ( node->simParameters->MSMOn )
2335  mapComputeHomePatches(computeMsmMsaType);
2336 #else
2337  else if ( node->simParameters->MSMOn )
2338  mapComputeHomePatches(computeMsmType);
2339 #endif
2340 
2341  if ( node->simParameters->FMMOn )
2342  mapComputeHomePatches(computeFmmType);
2343 
2344 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2345 #ifdef BONDED_CUDA
2346  if (node->simParameters->bondedCUDA) {
2347  mapComputeNode(computeBondedCUDAType);
2348  }
2349 #endif
2350 #endif
2351 
2352 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2353  if (node->simParameters->useCUDA2) {
2354  mapComputeNode(computeNonbondedCUDA2Type);
2355  } else {
2356  mapComputeNode(computeNonbondedCUDAType);
2357  }
2358  mapComputeHomeTuples(computeExclsType);
2359  mapComputePatch(computeSelfExclsType);
2360 #endif
2361 
2362 #ifdef NAMD_MIC
2363  mapComputeNode(computeNonbondedMICType);
2364 #endif
2365 
2366  mapComputeNonbonded();
2367 
2368  if ( node->simParameters->LCPOOn ) {
2369  mapComputeLCPO();
2370  }
2371 
2372  // If we're doing true pair interactions, no need for bonded terms.
2373  // But if we're doing within-group interactions, we do need them.
2374  if ( !node->simParameters->pairInteractionOn ||
2376  mapComputeHomeTuples(computeBondsType);
2377  mapComputeHomeTuples(computeAnglesType);
2378  mapComputeHomeTuples(computeDihedralsType);
2379  mapComputeHomeTuples(computeImpropersType);
2380  mapComputeHomeTuples(computeCrosstermsType);
2381  mapComputePatch(computeSelfBondsType);
2382  mapComputePatch(computeSelfAnglesType);
2383  mapComputePatch(computeSelfDihedralsType);
2384  mapComputePatch(computeSelfImpropersType);
2385  mapComputePatch(computeSelfCrosstermsType);
2386  }
2387 
2388  if ( node->simParameters->goGroPair ) {
2389  // JLai
2390  mapComputeHomeTuples(computeGromacsPairType);
2391  mapComputePatch(computeSelfGromacsPairType);
2392  // End of JLai
2393  }
2394 
2395  if ( node->simParameters->drudeOn ) {
2396  mapComputeHomeTuples(computeTholeType);
2397  mapComputePatch(computeSelfTholeType);
2398  mapComputeHomeTuples(computeAnisoType);
2399  mapComputePatch(computeSelfAnisoType);
2400  }
2401 
2402  if ( node->simParameters->eFieldOn )
2403  mapComputePatch(computeEFieldType);
2404  /* BEGIN gf */
2405  if ( node->simParameters->mgridforceOn )
2406  mapComputePatch(computeGridForceType);
2407  /* END gf */
2408  if ( node->simParameters->stirOn )
2409  mapComputePatch(computeStirType);
2410  if ( node->simParameters->sphericalBCOn )
2411  mapComputePatch(computeSphericalBCType);
2412  if ( node->simParameters->cylindricalBCOn )
2413  mapComputePatch(computeCylindricalBCType);
2414  if ( node->simParameters->tclBCOn ) {
2415  mapComputeHomePatches(computeTclBCType);
2416  }
2417  if ( node->simParameters->constraintsOn )
2418  mapComputePatch(computeRestraintsType);
2419  if ( node->simParameters->consForceOn )
2420  mapComputePatch(computeConsForceType);
2421  if ( node->simParameters->consTorqueOn )
2422  mapComputePatch(computeConsTorqueType);
2423 
2424  // store the latest compute map
2425  SimParameters *simParams = Node::Object()->simParameters;
2426  if (simParams->storeComputeMap) {
2427  computeMap->saveComputeMap(simParams->computeMapFilename);
2428  }
2429  // override mapping decision
2430  if (simParams->loadComputeMap) {
2431  computeMap->loadComputeMap(simParams->computeMapFilename);
2432  CkPrintf("ComputeMap has been loaded from %s.\n", simParams->computeMapFilename);
2433  }
2434 }
2435 
2436 //----------------------------------------------------------------------
2437 void WorkDistrib::mapComputeHomeTuples(ComputeType type)
2438 {
2439  PatchMap *patchMap = PatchMap::Object();
2440  ComputeMap *computeMap = ComputeMap::Object();
2441  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2442  Node *node = nd.ckLocalBranch();
2443 
2444  int numNodes = node->numNodes();
2445  SimParameters *simparam = node->simParameters;
2446  if(simparam->simulateInitialMapping) {
2447  numNodes = simparam->simulatedPEs;
2448  }
2449 
2450  char *isBaseNode = new char[numNodes];
2451  memset(isBaseNode,0,numNodes*sizeof(char));
2452 
2453  int numPatches = patchMap->numPatches();
2454  for(int j=0; j<numPatches; j++) {
2455  isBaseNode[patchMap->basenode(j)] = 1;
2456  }
2457 
2458  for(int i=0; i<numNodes; i++) {
2459  if ( isBaseNode[i] ) {
2460  computeMap->storeCompute(i,0,type);
2461  }
2462  }
2463 
2464  delete [] isBaseNode;
2465 }
2466 
2467 //----------------------------------------------------------------------
2468 void WorkDistrib::mapComputeHomePatches(ComputeType type)
2469 {
2470  PatchMap *patchMap = PatchMap::Object();
2471  ComputeMap *computeMap = ComputeMap::Object();
2472  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2473  Node *node = nd.ckLocalBranch();
2474 
2475  int numNodes = node->numNodes();
2476  SimParameters *simparam = node->simParameters;
2477  if(simparam->simulateInitialMapping) {
2478  numNodes = simparam->simulatedPEs;
2479  }
2480 
2481  for(int i=0; i<numNodes; i++) {
2482  if ( patchMap->numPatchesOnNode(i) ) {
2483  computeMap->storeCompute(i,0,type);
2484  }
2485  }
2486 }
2487 
2488 //----------------------------------------------------------------------
2489 void WorkDistrib::mapComputePatch(ComputeType type)
2490 {
2491  PatchMap *patchMap = PatchMap::Object();
2492  ComputeMap *computeMap = ComputeMap::Object();
2493 
2494  PatchID i;
2495  ComputeID cid;
2496 
2497  for(i=0; i<patchMap->numPatches(); i++)
2498  {
2499  cid=computeMap->storeCompute(patchMap->node(i),1,type);
2500  computeMap->newPid(cid,i);
2501  patchMap->newCid(i,cid);
2502  }
2503 
2504 }
2505 
2506 //----------------------------------------------------------------------
2507 void WorkDistrib::mapComputeNode(ComputeType type)
2508 {
2509  PatchMap *patchMap = PatchMap::Object();
2510  ComputeMap *computeMap = ComputeMap::Object();
2511 
2512  PatchID i;
2513  ComputeID cid;
2514 
2515  int ncpus = CkNumPes();
2516  SimParameters *simparam = Node::Object()->simParameters;
2517  if(simparam->simulateInitialMapping) {
2518  ncpus = simparam->simulatedPEs;
2519  }
2520 
2521  for(int i=0; i<ncpus; i++) {
2522  computeMap->storeCompute(i,0,type);
2523  }
2524 
2525 }
2526 
2527 //----------------------------------------------------------------------
2528 void WorkDistrib::mapComputeNonbonded(void)
2529 {
2530  // For each patch, create 1 electrostatic object for self-interaction.
2531  // Then create 1 for each 1-away and 2-away neighbor which has a larger
2532  // pid.
2533 
2534  PatchMap *patchMap = PatchMap::Object();
2535  ComputeMap *computeMap = ComputeMap::Object();
2536  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2537  Node *node = nd.ckLocalBranch();
2538  SimParameters *simParams = Node::Object()->simParameters;
2539  int ncpus = CkNumPes();
2540  int nodesize = CkMyNodeSize();
2541  if(simParams->simulateInitialMapping) {
2542  ncpus = simParams->simulatedPEs;
2543  nodesize = simParams->simulatedNodeSize;
2544  }
2545 
2547  PatchID oneAwayDownstream[PatchMap::MaxOneOrTwoAway];
2548  int oneAwayTrans[PatchMap::MaxOneOrTwoAway];
2549 
2550  PatchID i;
2551  ComputeID cid;
2552  int numNeighbors;
2553  int j;
2554  double partScaling = 1.0;
2555  if ( ncpus < patchMap->numPatches() ) {
2556  partScaling = ((double)ncpus) / ((double)patchMap->numPatches());
2557  }
2558 
2559  for(i=0; i<patchMap->numPatches(); i++) // do the self
2560  {
2561 
2562  int numPartitions = 1;
2563 #if 0
2564  if ( simParams->ldBalancer == LDBAL_HYBRID ) {
2565 #ifdef MEM_OPT_VERSION
2566  int64 numFixed = patchMap->numFixedAtoms(i);
2567  int64 numAtoms = patchMap->numAtoms(i);
2568 #else
2569  int64 numFixed = patchMap->patch(i)->getNumFixedAtoms(); // avoid overflow
2570  int64 numAtoms = patchMap->patch(i)->getNumAtoms();
2571 #endif
2572 
2573  int divide = node->simParameters->numAtomsSelf;
2574  if (divide > 0) {
2575  numPartitions = (int) ( partScaling * ( 0.5 +
2576  (numAtoms*numAtoms-numFixed*numFixed) / (double)(2*divide*divide) ) );
2577  }
2578  if (numPartitions < 1) numPartitions = 1;
2579  if ( numPartitions > node->simParameters->maxSelfPart )
2580  numPartitions = node->simParameters->maxSelfPart;
2581  // self-interaction
2582  DebugM(4,"Mapping " << numPartitions << " ComputeNonbondedSelf objects for patch " << i << "\n");
2583 // iout <<"Self numPartitions = " <<numPartitions <<" numAtoms " <<numAtoms <<std::endl;
2584  }
2585 #endif
2586 
2587  // DMK - NOTE - For MIC builds (i.e. NAMD_MIC is defined), it is assumed that self computes are
2588  // mapped to the PE their associated patch is on. If the code below should change, making that
2589  // untrue, MIC builds should be special cased so that assumption still holds (or the host vs
2590  // device load balancing scheme should be modified). (See the comment in the function
2591  // mic_assignComputes() in ComputeNonbondedMIC.C for more details.)
2592  for(int partition=0; partition < numPartitions; partition++)
2593  {
2594  cid=computeMap->storeCompute(patchMap->node(i),1,
2596  partition,numPartitions);
2597  computeMap->newPid(cid,i);
2598  patchMap->newCid(i,cid);
2599  }
2600  }
2601 
2602  for(int p1=0; p1 <patchMap->numPatches(); p1++) // do the pairs
2603  {
2604  // this only returns half of neighbors, which is what we want
2605  numNeighbors=patchMap->oneOrTwoAwayNeighbors(p1,oneAway,oneAwayDownstream,oneAwayTrans);
2606  for(j=0;j<numNeighbors;j++)
2607  {
2608  int p2 = oneAway[j];
2609  int dsp = oneAwayDownstream[j];
2610 
2611  int numPartitions = 1;
2612 #if 0
2613  if ( simParams->ldBalancer == LDBAL_HYBRID ) {
2614 #ifdef MEM_OPT_VERSION
2615  int64 numAtoms1 = patchMap->numAtoms(p1);
2616  int64 numAtoms2 = patchMap->numAtoms(p2);
2617  int64 numFixed1 = patchMap->numFixedAtoms(p1);
2618  int64 numFixed2 = patchMap->numFixedAtoms(p2);
2619 #else
2620  int64 numAtoms1 = patchMap->patch(p1)->getNumAtoms();
2621  int64 numAtoms2 = patchMap->patch(p2)->getNumAtoms();
2622  int64 numFixed1 = patchMap->patch(p1)->getNumFixedAtoms();
2623  int64 numFixed2 = patchMap->patch(p2)->getNumFixedAtoms();
2624 #endif
2625 
2626 
2627  const int t2 = oneAwayTrans[j];
2628  const int adim = patchMap->gridsize_a();
2629  const int bdim = patchMap->gridsize_b();
2630  const int cdim = patchMap->gridsize_c();
2631  const int nax = patchMap->numaway_a(); // 1 or 2
2632  const int nay = patchMap->numaway_b(); // 1 or 2
2633  const int naz = patchMap->numaway_c(); // 1 or 2
2634  const int ia1 = patchMap->index_a(p1);
2635  const int ia2 = patchMap->index_a(p2) + adim * Lattice::offset_a(t2);
2636  const int ib1 = patchMap->index_b(p1);
2637  const int ib2 = patchMap->index_b(p2) + bdim * Lattice::offset_b(t2);
2638  const int ic1 = patchMap->index_c(p1);
2639  const int ic2 = patchMap->index_c(p2) + cdim * Lattice::offset_c(t2);
2640 
2641  if ( abs(ia2-ia1) > nax ||
2642  abs(ib2-ib1) > nay ||
2643  abs(ic2-ic1) > naz )
2644  NAMD_bug("Bad patch distance in WorkDistrib::mapComputeNonbonded");
2645 
2646  int distance = 3;
2647  if ( ia1 == ia2 ) --distance;
2648  else if ( ia1 == ia2 + nax - 1 ) --distance;
2649  else if ( ia1 + nax - 1 == ia2 ) --distance;
2650  if ( ib1 == ib2 ) --distance;
2651  else if ( ib1 == ib2 + nay - 1 ) --distance;
2652  else if ( ib1 + nay - 1 == ib2 ) --distance;
2653  if ( ic1 == ic2 ) --distance;
2654  else if ( ic1 == ic2 + naz - 1 ) --distance;
2655  else if ( ic1 + naz - 1 == ic2 ) --distance;
2656  int divide = 0;
2657  if ( distance == 0 ) {
2658  divide = node->simParameters->numAtomsSelf2;
2659  } else if (distance == 1) {
2660  divide = node->simParameters->numAtomsPair;
2661  } else {
2662  divide = node->simParameters->numAtomsPair2;
2663  }
2664  if (divide > 0) {
2665  numPartitions = (int) ( partScaling * ( 0.5 +
2666  (numAtoms1*numAtoms2-numFixed1*numFixed2)/(double)(divide*divide) ) );
2667  }
2668  if ( numPartitions < 1 ) numPartitions = 1;
2669  if ( numPartitions > node->simParameters->maxPairPart )
2670  numPartitions = node->simParameters->maxPairPart;
2671 // if ( numPartitions > 1 ) iout << "Mapping " << numPartitions << " ComputeNonbondedPair objects for patches " << p1 << "(" << numAtoms1 << ") and " << p2 << "(" << numAtoms2 << ")\n" << endi;
2672  }
2673 #endif
2674  for(int partition=0; partition < numPartitions; partition++)
2675  {
2676  cid=computeMap->storeCompute( patchMap->basenode(dsp),
2677  2,computeNonbondedPairType,partition,numPartitions);
2678  computeMap->newPid(cid,p1);
2679  computeMap->newPid(cid,p2,oneAwayTrans[j]);
2680  patchMap->newCid(p1,cid);
2681  patchMap->newCid(p2,cid);
2682  }
2683  }
2684  }
2685 }
2686 
2687 //----------------------------------------------------------------------
2688 void WorkDistrib::mapComputeLCPO(void) {
2689  //iterate over all needed objects
2690 
2691  PatchMap *patchMap = PatchMap::Object();
2692  ComputeMap *computeMap = ComputeMap::Object();
2693  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2694  Node *node = nd.ckLocalBranch();
2695  SimParameters *simParams = Node::Object()->simParameters;
2696  int ncpus = CkNumPes();
2697  int nodesize = CkMyNodeSize();
2698  const int maxPatches = 8;
2699 
2700  int numPatchesInOctet;
2701  PatchID patchesInOctet[maxPatches];
2702  int oneAwayTrans[maxPatches];
2703 
2704  //partitioned after 1st timestep
2705  int numPartitions = 1;
2706 
2707  PatchID i;
2708  ComputeID cid;
2709 
2710  // one octet per patch
2711  for(i=0; i<patchMap->numPatches(); i++) {
2712  numPatchesInOctet =
2713  patchMap->getPatchesInOctet(i, patchesInOctet, oneAwayTrans);
2714 
2715  for(int partition=0; partition < numPartitions; partition++) {
2716  cid=computeMap->storeCompute(patchMap->node(i),
2717  numPatchesInOctet,
2719  partition,
2720  numPartitions);
2721  for (int p = 0; p < numPatchesInOctet; p++) {
2722  computeMap->newPid(cid, patchesInOctet[p], oneAwayTrans[p]);
2723  }
2724  for (int p = 0; p < numPatchesInOctet; p++) {
2725  patchMap->newCid(patchesInOctet[p],cid);
2726  }
2727  } // for partitions
2728  } // for patches
2729 } // mapComputeLCPO
2730 
2731 //----------------------------------------------------------------------
2733  LocalWorkMsg *msg = compute->localWorkMsg;
2734  int seq = compute->sequence();
2735  int gbisPhase = compute->getGBISPhase();
2736 
2737  if ( seq < 0 ) {
2738  NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageEnqueueWork");
2739  } else {
2740  SET_PRIORITY(msg,seq,compute->priority());
2741  }
2742 
2743  msg->compute = compute; // pointer is valid since send is to local Pe
2744  int type = compute->type();
2745  int cid = compute->cid;
2746 
2747  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
2748  switch ( type ) {
2749  case computeExclsType:
2750  case computeSelfExclsType:
2751  wdProxy[CkMyPe()].enqueueExcls(msg);
2752  break;
2753  case computeBondsType:
2754  case computeSelfBondsType:
2755  wdProxy[CkMyPe()].enqueueBonds(msg);
2756  break;
2757  case computeAnglesType:
2758  case computeSelfAnglesType:
2759  wdProxy[CkMyPe()].enqueueAngles(msg);
2760  break;
2761  case computeDihedralsType:
2763  wdProxy[CkMyPe()].enqueueDihedrals(msg);
2764  break;
2765  case computeImpropersType:
2767  wdProxy[CkMyPe()].enqueueImpropers(msg);
2768  break;
2769  case computeTholeType:
2770  case computeSelfTholeType:
2771  wdProxy[CkMyPe()].enqueueThole(msg);
2772  break;
2773  case computeAnisoType:
2774  case computeSelfAnisoType:
2775  wdProxy[CkMyPe()].enqueueAniso(msg);
2776  break;
2777  case computeCrosstermsType:
2779  wdProxy[CkMyPe()].enqueueCrossterms(msg);
2780  break;
2781  // JLai
2784  wdProxy[CkMyPe()].enqueueGromacsPair(msg);
2785  break;
2786  // End of JLai
2787  case computeLCPOType:
2788  wdProxy[CkMyPe()].enqueueLCPO(msg);
2789  break;
2791  switch ( seq % 2 ) {
2792  case 0:
2793  //wdProxy[CkMyPe()].enqueueSelfA(msg);
2794  switch ( gbisPhase ) {
2795  case 1:
2796  wdProxy[CkMyPe()].enqueueSelfA1(msg);
2797  break;
2798  case 2:
2799  wdProxy[CkMyPe()].enqueueSelfA2(msg);
2800  break;
2801  case 3:
2802  wdProxy[CkMyPe()].enqueueSelfA3(msg);
2803  break;
2804  }
2805  break;
2806  case 1:
2807  //wdProxy[CkMyPe()].enqueueSelfB(msg);
2808  switch ( gbisPhase ) {
2809  case 1:
2810  wdProxy[CkMyPe()].enqueueSelfB1(msg);
2811  break;
2812  case 2:
2813  wdProxy[CkMyPe()].enqueueSelfB2(msg);
2814  break;
2815  case 3:
2816  wdProxy[CkMyPe()].enqueueSelfB3(msg);
2817  break;
2818  }
2819  break;
2820  default:
2821  NAMD_bug("WorkDistrib::messageEnqueueSelf case statement error!");
2822  }
2823  break;
2825  switch ( seq % 2 ) {
2826  case 0:
2827  //wdProxy[CkMyPe()].enqueueWorkA(msg);
2828  switch ( gbisPhase ) {
2829  case 1:
2830  wdProxy[CkMyPe()].enqueueWorkA1(msg);
2831  break;
2832  case 2:
2833  wdProxy[CkMyPe()].enqueueWorkA2(msg);
2834  break;
2835  case 3:
2836  wdProxy[CkMyPe()].enqueueWorkA3(msg);
2837  break;
2838  }
2839  break;
2840  case 1:
2841  //wdProxy[CkMyPe()].enqueueWorkB(msg);
2842  switch ( gbisPhase ) {
2843  case 1:
2844  wdProxy[CkMyPe()].enqueueWorkB1(msg);
2845  break;
2846  case 2:
2847  wdProxy[CkMyPe()].enqueueWorkB2(msg);
2848  break;
2849  case 3:
2850  wdProxy[CkMyPe()].enqueueWorkB3(msg);
2851  break;
2852  }
2853  break;
2854  case 2:
2855  wdProxy[CkMyPe()].enqueueWorkC(msg);
2856  break;
2857  default:
2858  NAMD_bug("WorkDistrib::messageEnqueueWork case statement error!");
2859  }
2860  break;
2862 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2864 // CkPrintf("WorkDistrib[%d]::CUDA seq=%d phase=%d\n", CkMyPe(), seq, gbisPhase);
2865  //wdProxy[CkMyPe()].enqueueCUDA(msg);
2866  switch ( gbisPhase ) {
2867  case 1:
2868  wdProxy[CkMyPe()].enqueueCUDA(msg);
2869  break;
2870  case 2:
2871  wdProxy[CkMyPe()].enqueueCUDAP2(msg);
2872  break;
2873  case 3:
2874  wdProxy[CkMyPe()].enqueueCUDAP3(msg);
2875  break;
2876  }
2877 #else
2879 #endif
2880  break;
2882 #ifdef NAMD_MIC
2883  wdProxy[CkMyPe()].enqueueMIC(msg);
2884 #endif
2885  break;
2886  case computePmeType:
2887  // CkPrintf("PME %d %d %x\n", CkMyPe(), seq, compute->priority());
2888  wdProxy[CkMyPe()].enqueuePme(msg);
2889  break;
2890 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2891  case computePmeCUDAType:
2892  wdProxy[CkMyPe()].enqueuePme(msg);
2893  break;
2894 #endif
2895  default:
2896  wdProxy[CkMyPe()].enqueueWork(msg);
2897  }
2898 }
2899 
2900 //----------------------------------------------------------------------
2902  LocalWorkMsg *msg = compute->localWorkMsg;
2903  int seq = compute->sequence();
2904  int gbisPhase = compute->getGBISPhase();
2905 
2906  if ( seq < 0 ) {
2907  NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageEnqueueWork");
2908  } else {
2909  SET_PRIORITY(msg,seq,compute->priority());
2910  }
2911 
2912  msg->compute = compute; // pointer is valid since send is to local Pe
2913  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
2914 
2915 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2916  //wdProxy[CkMyPe()].finishCUDA(msg);
2917  switch ( gbisPhase ) {
2918  case 1:
2919  wdProxy[CkMyPe()].finishCUDA(msg);
2920  break;
2921  case 2:
2922  wdProxy[CkMyPe()].finishCUDAP2(msg);
2923  break;
2924  case 3:
2925  wdProxy[CkMyPe()].finishCUDAP3(msg);
2926  break;
2927  }
2928 #else
2930 #endif
2931 }
2932 
2933 //----------------------------------------------------------------------
2935  LocalWorkMsg *msg = compute->localWorkMsg;
2936  int seq = compute->sequence();
2937  int gbisPhase = compute->getGBISPhase();
2938 
2939  if ( seq < 0 ) {
2940  NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageFinishMIC");
2941  } else {
2942  SET_PRIORITY(msg,seq,compute->priority());
2943  }
2944 
2945  msg->compute = compute; // pointer is valid since send is to local Pe
2946  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
2947 
2948 #ifdef NAMD_MIC
2949  wdProxy[CkMyPe()].finishMIC(msg);
2950 #else
2952 #endif
2953 }
2954 
2957  if ( msg->compute->localWorkMsg != msg )
2958  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
2959 }
2960 
2963  if ( msg->compute->localWorkMsg != msg )
2964  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
2965 }
2966 
2969  if ( msg->compute->localWorkMsg != msg )
2970  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
2971 }
2972 
2975  if ( msg->compute->localWorkMsg != msg )
2976  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
2977 }
2978 
2981  if ( msg->compute->localWorkMsg != msg )
2982  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
2983 }
2984 
2987  if ( msg->compute->localWorkMsg != msg )
2988  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
2989 }
2990 
2993  if ( msg->compute->localWorkMsg != msg )
2994  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
2995 }
2996 
2999  if ( msg->compute->localWorkMsg != msg )
3000  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3001 }
3002 
3005  if ( msg->compute->localWorkMsg != msg )
3006  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3007 }
3008 
3009 // JLai
3011  msg->compute->doWork();
3013  if ( msg->compute->localWorkMsg != msg )
3014  NAMD_bug("\nWorkDistrib LocalWorkMsg recycling failed! Check enqueueGromacsPair from WorkDistrib.C\n");
3015 }
3016 // End of JLai
3017 
3020  if ( msg->compute->localWorkMsg != msg )
3021  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3022 }
3023 
3025  msg->compute->doWork();
3026  if ( msg->compute->localWorkMsg != msg )
3027  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3028 }
3031  if ( msg->compute->localWorkMsg != msg )
3032  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3033 }
3036  if ( msg->compute->localWorkMsg != msg )
3037  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3038 }
3041  if ( msg->compute->localWorkMsg != msg )
3042  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3043 }
3044 
3047  if ( msg->compute->localWorkMsg != msg )
3048  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3049 }
3052  if ( msg->compute->localWorkMsg != msg )
3053  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3054 }
3057  if ( msg->compute->localWorkMsg != msg )
3058  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3059 }
3060 
3063  if ( msg->compute->localWorkMsg != msg )
3064  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3065 }
3068  if ( msg->compute->localWorkMsg != msg )
3069  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3070 }
3073  if ( msg->compute->localWorkMsg != msg )
3074  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3075 }
3076 
3079  if ( msg->compute->localWorkMsg != msg )
3080  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3081 }
3084  if ( msg->compute->localWorkMsg != msg )
3085  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3086 }
3089  if ( msg->compute->localWorkMsg != msg )
3090  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3091 }
3092 
3093 
3094 
3097  if ( msg->compute->localWorkMsg != msg )
3098  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3099 }
3100 
3103  // ComputeNonbondedCUDA *c = msg->compute;
3104  // if ( c->localWorkMsg != msg && c->localWorkMsg2 != msg )
3105  // NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3106 }
3109 }
3112 }
3113 
3115  msg->compute->finishPatch(msg->data);
3116 }
3117 
3120  // ComputeNonbondedCUDA *c = msg->compute;
3121  // if ( c->localWorkMsg != msg && c->localWorkMsg2 != msg )
3122  // NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3123 }
3126 }
3129 }
3130 
3133 }
3136 }
3137 
3138 
3139 //**********************************************************************
3140 //
3141 // FUNCTION velocities_from_PDB
3142 //
3143 // INPUTS:
3144 // v - Array of vectors to populate
3145 // filename - name of the PDB filename to read in
3146 //
3147 // This function reads in a set of initial velocities from a
3148 // PDB file. It places the velocities into the array of Vectors
3149 // passed to it.
3150 //
3151 //***********************************************************************/
3152 
3153 void WorkDistrib::velocities_from_PDB(const char *filename,
3154  Vector *v, int totalAtoms)
3155 {
3156  PDB *v_pdb; // PDB info from velocity PDB
3157  int i;
3158 
3159  // Read the PDB
3160  v_pdb = new PDB(filename);
3161  if ( v_pdb == NULL )
3162  {
3163  NAMD_die("memory allocation failed in Node::velocities_from_PDB");
3164  }
3165 
3166  // Make sure the number of velocities read in matches
3167  // the number of atoms we have
3168  if (v_pdb->num_atoms() != totalAtoms)
3169  {
3170  char err_msg[129];
3171 
3172  sprintf(err_msg, "FOUND %d COORDINATES IN VELOCITY PDB!!",
3173  v_pdb->num_atoms());
3174 
3175  NAMD_die(err_msg);
3176  }
3177 
3178  // Get the entire list of atom info and loop through
3179  // them assigning the velocity vector for each one
3180  v_pdb->get_all_positions(v);
3181 
3182  for (i=0; i<totalAtoms; i++)
3183  {
3184  v[i].x *= PDBVELINVFACTOR;
3185  v[i].y *= PDBVELINVFACTOR;
3186  v[i].z *= PDBVELINVFACTOR;
3187  }
3188 
3189  delete v_pdb;
3190 }
3191 // END OF FUNCTION velocities_from_PDB
3192 
3193 //**********************************************************************
3194 //
3195 // FUNCTION velocities_from_binfile
3196 //
3197 // INPUTS:
3198 // fname - File name to write velocities to
3199 // n - Number of atoms in system
3200 // vels - Array of velocity vectors
3201 //
3202 // This function writes out the velocities in binary format. This is
3203 // done to preserve accuracy between restarts of namd.
3204 //
3205 //**********************************************************************
3206 
3207 void WorkDistrib::velocities_from_binfile(const char *fname, Vector *vels, int n)
3208 {
3209  read_binary_file(fname,vels,n);
3210 }
3211 // END OF FUNCTION velocities_from_binfile
3212 
3213 //**********************************************************************
3214 //
3215 // FUNCTION random_velocities
3216 //
3217 // INPUTS:
3218 // v - array of vectors to populate
3219 // Temp - Temperature to acheive
3220 //
3221 // This function assigns a random velocity distribution to a
3222 // simulation to achieve a desired initial temperature. The method
3223 // used here was stolen from the program X-PLOR.
3224 //
3225 //**********************************************************************
3226 
3227 void WorkDistrib::random_velocities(BigReal Temp,Molecule *structure,
3228  Vector *v, int totalAtoms)
3229 {
3230  int i, j; // Loop counter
3231  BigReal kbT; // Boltzman constant * Temp
3232  BigReal randnum; // Random number from -6.0 to 6.0
3233  BigReal kbToverM; // sqrt(Kb*Temp/Mass)
3234  SimParameters *simParams = Node::Object()->simParameters;
3235  Bool lesOn = simParams->lesOn;
3236  Random vel_random(simParams->randomSeed);
3237 
3238  int lesReduceTemp = lesOn && simParams->lesReduceTemp;
3239  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
3240 
3241  kbT = Temp*BOLTZMANN;
3242 
3243  // Loop through all the atoms and assign velocities in
3244  // the x, y and z directions for each one
3245  for (i=0; i<totalAtoms; i++)
3246  {
3247  if (structure->atommass(i) <= 0.) {
3248  kbToverM = 0.;
3249  } else {
3250  kbToverM = sqrt(kbT *
3251  ( lesOn && structure->get_fep_type(i) ? tempFactor : 1.0 ) /
3252  structure->atommass(i) );
3253  }
3254 
3255  // The following comment was stolen from X-PLOR where
3256  // the following section of code was adapted from.
3257 
3258  // This section generates a Gaussian random
3259  // deviate of 0.0 mean and standard deviation RFD for
3260  // each of the three spatial dimensions.
3261  // The algorithm is a "sum of uniform deviates algorithm"
3262  // which may be found in Abramowitz and Stegun,
3263  // "Handbook of Mathematical Functions", pg 952.
3264  for (randnum=0.0, j=0; j<12; j++)
3265  {
3266  randnum += vel_random.uniform();
3267  }
3268 
3269  randnum -= 6.0;
3270 
3271  v[i].x = randnum*kbToverM;
3272 
3273  for (randnum=0.0, j=0; j<12; j++)
3274  {
3275  randnum += vel_random.uniform();
3276  }
3277 
3278  randnum -= 6.0;
3279 
3280  v[i].y = randnum*kbToverM;
3281 
3282  for (randnum=0.0, j=0; j<12; j++)
3283  {
3284  randnum += vel_random.uniform();
3285  }
3286 
3287  randnum -= 6.0;
3288 
3289  v[i].z = randnum*kbToverM;
3290  }
3291 
3292  if ( simParams->drudeOn ) for (i=0; i<totalAtoms; i++) {
3293  if ( structure->is_drude(i) ) {
3294  v[i] = v[structure->get_mother_atom(i)]; // zero is good enough
3295  }
3296  }
3297 }
3298 /* END OF FUNCTION random_velocities */
3299 
3300 //**********************************************************************
3301 //
3302 // FUNCTION remove_com_motion
3303 //
3304 // INPUTS:
3305 // vel - Array of initial velocity vectors
3306 //
3307 // This function removes the center of mass motion from a molecule.
3308 //
3309 //**********************************************************************
3310 
3311 void WorkDistrib::remove_com_motion(Vector *vel, Molecule *structure, int n)
3312 {
3313  Vector mv(0,0,0); // Sum of (mv)_i
3314  BigReal totalMass=0; // Total mass of system
3315  int i; // Loop counter
3316 
3317  // Loop through and compute the net momentum
3318  for (i=0; i<n; i++)
3319  {
3320  BigReal mass = structure->atommass(i);
3321  mv += mass * vel[i];
3322  totalMass += mass;
3323  }
3324 
3325  mv /= totalMass;
3326 
3327  iout << iINFO << "REMOVING COM VELOCITY "
3328  << ( PDBVELFACTOR * mv ) << "\n" << endi;
3329 
3330  for (i=0; i<n; i++) { vel[i] -= mv; }
3331 
3332 }
3333 /* END OF FUNCTION remove_com_motion */
3334 
3335 #if USE_TOPOMAP
3336 
3337 //Specifically designed for BGL and other 3d Tori architectures
3338 //Partition Torus and Patch grid together using recursive bisection.
3339 int WorkDistrib::assignPatchesTopoGridRecBisection() {
3340 
3341  PatchMap *patchMap = PatchMap::Object();
3342  int *assignedNode = new int[patchMap->numPatches()];
3343  int numNodes = Node::Object()->numNodes();
3344  SimParameters *simParams = Node::Object()->simParameters;
3345  if(simParams->simulateInitialMapping) {
3346  numNodes = simParams->simulatedPEs;
3347  }
3348 
3349  int usedNodes = numNodes;
3350  CkPrintf("assignPatchesTopoGridRecBisection\n");
3351  if ( simParams->noPatchesOnZero && numNodes > 1 ) {
3352  usedNodes -= 1;
3353  if ( simParams->noPatchesOnOne && numNodes > 2 )
3354  usedNodes -= 1;
3355  }
3356  RecBisection recBisec(patchMap->numPatches(), PatchMap::Object());
3357 
3358  int xsize = 0, ysize = 0, zsize = 0;
3359 
3360  // Right now assumes a T*** (e.g. TXYZ) mapping
3361  TopoManager tmgr;
3362  xsize = tmgr.getDimNX();
3363  ysize = tmgr.getDimNY();
3364  zsize = tmgr.getDimNZ();
3365 
3366  //Fix to not assign patches to processor 0
3367  int rc = recBisec.partitionProcGrid(xsize, ysize, zsize, assignedNode);
3368 
3369  delete [] assignedNode;
3370 
3371  return rc;
3372 }
3373 #endif
3374 
3375 
3376 #if defined(NAMD_MIC)
3377  extern void mic_hostDeviceLDB();
3378  extern void mic_contributeHostDeviceLDB(int idLen, int * id);
3379  extern void mic_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2);
3380 #endif
3381 
3383  #if defined(NAMD_MIC)
3384  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3385  wdProxy.initHostDeviceLDB();
3386  #endif
3387 }
3388 
3390  #if defined(NAMD_MIC)
3391  mic_hostDeviceLDB();
3392  #endif
3393 }
3394 
3395 void WorkDistrib::send_contributeHostDeviceLDB(int peSetLen, int * peSet) {
3396  #if defined(NAMD_MIC)
3397  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3398  wdProxy[0].contributeHostDeviceLDB(peSetLen, peSet);
3399  #endif
3400 }
3401 
3402 void WorkDistrib::contributeHostDeviceLDB(int peSetLen, int * peSet) {
3403  #if defined(NAMD_MIC)
3404  mic_contributeHostDeviceLDB(peSetLen, peSet);
3405  #endif
3406 }
3407 
3408 void WorkDistrib::send_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2) {
3409  #if defined(NAMD_MIC)
3410  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3411  wdProxy.setDeviceLDBParams(dt, hs, sp1, pp1, pp2);
3412  #endif
3413 }
3414 
3415 void WorkDistrib::setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2) {
3416  #if defined(NAMD_MIC)
3417  mic_setDeviceLDBParams(dt, hs, sp1, pp1, pp2);
3418  #endif
3419 }
3420 
3421 
3422 #include "WorkDistrib.def.h"
3423 
static Node * Object()
Definition: Node.h:86
#define LDBAL_HYBRID
Definition: SimParameters.h:63
int npatches
Definition: WorkDistrib.C:1490
static int offset_b(int i)
Definition: Lattice.h:248
unsigned char partition
Definition: NamdTypes.h:56
void setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2)
Definition: WorkDistrib.C:3415
BlockLoad::TempStorage load
void enqueueMIC(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3131
Real langevinParam
Definition: NamdTypes.h:110
Real langevin_param(int atomnum) const
Definition: Molecule.h:1300
std::ostream & iINFO(std::ostream &s)
Definition: InfoStream.C:81
static void sortPmePes(int *pmepes, int xdim, int ydim)
Definition: WorkDistrib.C:304
Bool simulateInitialMapping
static void messageFinishMIC(Compute *)
Definition: WorkDistrib.C:2934
int isSendSpanningTreeUnset()
patch_sortop_curve_b(PatchMap *m)
Definition: WorkDistrib.C:1936
void enqueueAngles(LocalWorkMsg *msg)
Definition: WorkDistrib.C:2973
static void messageFinishCUDA(Compute *)
Definition: WorkDistrib.C:2901
void end(void)
Definition: MStream.C:176
Definition: PDB.h:36
int sequence(void)
Definition: Compute.h:64
PatchID assignToPatch(Position p, const Lattice &l)
Definition: PatchMap.inl:14
void setNewNumPartitions(ComputeID cid, char numPartitions)
Definition: ComputeMap.h:144
void setRecvSpanning()
Definition: ProxyMgr.C:371
static int offset_c(int i)
Definition: Lattice.h:249
Vector a_r() const
Definition: Lattice.h:268
static void recursive_bisect_with_curve(int *patch_begin, int *patch_end, int *node_begin, int *node_end, double *patchLoads, double *sortedLoads, int *assignedNode, TopoManagerWrapper &tmgr)
Definition: WorkDistrib.C:1976
int numComputes(void)
Definition: ComputeMap.h:101
void saveComputeMap(const char *fname)
Definition: ComputeMap.C:262
static ProxyMgr * Object()
Definition: ProxyMgr.h:394
unsigned int hydrogenGroupSize
Definition: NamdTypes.h:58
#define BOLTZMANN
Definition: common.h:47
Definition: Node.h:78
static int * peCompactOrdering
Definition: WorkDistrib.h:117
int ComputeID
Definition: NamdTypes.h:183
unsigned int atomFixed
Definition: NamdTypes.h:96
void finishCUDAP3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3127
void initPtrs()
Definition: ComputeMap.C:82
void enqueueCrossterms(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3003
Position fixedPosition
Definition: NamdTypes.h:102
static void partition(int *order, const FullAtom *atoms, int begin, int end)
Definition: SortAtoms.C:42
int isRecvSpanningTreeUnset()
void enqueuePme(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3018
int gridsize_c(void) const
Definition: PatchMap.h:66
static PatchMap * Object()
Definition: PatchMap.h:27
int operator==(const nodesort &o) const
Definition: WorkDistrib.C:1492
void enqueueWorkA3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3071
ComputeType
Definition: ComputeMap.h:20
void enqueueWork(LocalWorkMsg *msg)
Definition: WorkDistrib.C:2955
BigReal max_c(int pid) const
Definition: PatchMap.h:96
int getNumFixedAtoms()
Definition: Patch.h:112
void enqueueGromacsPair(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3010
void enqueueSelfA1(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3029
void finishCUDAP2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3124
Definition: Vector.h:64
#define DrudeAtom
Definition: structures.h:23
BigReal min_a(int pid) const
Definition: PatchMap.h:91
static void send_contributeHostDeviceLDB(int peSetLen, int *peSet)
Definition: WorkDistrib.C:3395
#define HydrogenAtom
Definition: structures.h:16
SimParameters * simParameters
Definition: Node.h:178
Vector c_r() const
Definition: Lattice.h:270
void loadComputeMap(const char *fname)
Definition: ComputeMap.C:278
int index_a(int pid) const
Definition: PatchMap.h:86
static __thread atom * atoms
void createHomePatch(PatchID pid, FullAtomList &a)
Definition: PatchMgr.C:74
Atom * getAtoms() const
Definition: Molecule.h:491
void setSendSpanning()
Definition: ProxyMgr.C:362
void sendAtoms(PatchID pid, FullAtomList &a)
Definition: PatchMgr.C:157
TopoManager tmgr
Definition: WorkDistrib.C:1625
float Real
Definition: common.h:109
void enqueueExcls(LocalWorkMsg *msg)
Definition: WorkDistrib.C:2961
#define DebugM(x, y)
Definition: Debug.h:59
void enqueueBonds(LocalWorkMsg *msg)
Definition: WorkDistrib.C:2967
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
#define ALLBUTME
Definition: Communicate.h:14
BigReal z
Definition: Vector.h:66
void enqueueAniso(LocalWorkMsg *msg)
Definition: WorkDistrib.C:2997
int packSize(void)
Definition: PatchMap.C:314
void enqueueSelfB1(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3045
#define FALSE
Definition: common.h:118
Real rigid_bond_length(int atomnum) const
Definition: Molecule.h:1457
Position position
Definition: NamdTypes.h:53
void enqueueWorkB1(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3077
static void messageEnqueueWork(Compute *)
Definition: WorkDistrib.C:2732
static void peOrderingReady()
Definition: WorkDistrib.C:170
std::ostream & iWARN(std::ostream &s)
Definition: InfoStream.C:82
bool operator()(int pe1, int pe2) const
Definition: WorkDistrib.C:1848
MIStream * get(char &data)
Definition: MStream.h:29
#define iout
Definition: InfoStream.h:51
int operator<(const nodesort &o) const
Definition: WorkDistrib.C:1501
int sizeGrid(ScaledPosition xmin, ScaledPosition xmax, const Lattice &lattice, BigReal patchSize, double maxNumPatches, int staticAtomAssignment, int asplit, int bsplit, int csplit)
Definition: PatchMap.C:62
Velocity velocity
Definition: NamdTypes.h:101
ComputeID storeCompute(int node, int maxPids, ComputeType type, int partition=-1, int numPartitions=0)
Definition: ComputeMap.C:153
int num_atoms(void)
Definition: PDB.C:323
int numaway_b(void) const
Definition: PatchMap.h:69
Patch * patch(PatchID pid)
Definition: PatchMap.h:235
void enqueueSelfA3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3039
char computeMapFilename[NAMD_FILENAME_BUFFER_SIZE]
Vector origin() const
Definition: Lattice.h:262
Position nearest(Position data, ScaledPosition ref) const
Definition: Lattice.h:90
unsigned int groupFixed
Definition: NamdTypes.h:97
Bool pairInteractionOn
int basenode(int pid) const
Definition: PatchMap.h:117
bool operator()(int a, int b) const
Definition: WorkDistrib.C:267
void movePatch(PatchID, NodeID)
Definition: PatchMgr.C:84
Vector b_r() const
Definition: Lattice.h:269
int getGBISPhase(void)
Definition: Compute.h:66
LocalWorkMsg *const localWorkMsg
Definition: Compute.h:46
BigReal min_b(int pid) const
Definition: PatchMap.h:93
void patchMapInit(void)
Definition: WorkDistrib.C:1109
void recvComputeMapChanges(ComputeMapChangeMsg *)
Definition: WorkDistrib.C:374
int allocateCids()
Definition: ComputeMap.C:143
int atomsInGroup
Definition: Hydrogen.h:19
#define MACHINE_PROGRESS
Compute * compute
Definition: WorkDistrib.h:33
char * patchMapData
Definition: WorkDistrib.C:979
char newNumPartitions(ComputeID cid)
Definition: ComputeMap.h:141
void unpack(char *buf)
Definition: PatchMap.C:365
bool operator()(int p1, int p2) const
Definition: WorkDistrib.C:1937
Charge charge
Definition: NamdTypes.h:54
virtual void doWork()
Definition: Compute.C:108
void reorder(Elem *a, int n)
Definition: Random.h:220
HydrogenGroup hydrogenGroup
Definition: Molecule.h:640
void enqueueCUDA(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3101
void sendComputeMap(void)
Definition: WorkDistrib.C:1082
void enqueueWorkB2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3082
void read_binary_file(const char *fname, Vector *data, int n)
Definition: NamdOneTools.C:52
int numNodes()
Definition: Node.h:189
void enqueueCUDAP2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3107
void assignBaseNode(PatchID, NodeID)
Definition: PatchMap.C:472
static void recursive_bisect_coord(int x_begin, int x_end, int y_begin, int y_end, int *pe_begin, ScaledPosition *coord, int *result, int ydim)
Definition: WorkDistrib.C:272
void newCid(int pid, int cid)
Definition: PatchMap.C:512
void enqueueSelfB3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3055
int coord(int pe, int dim)
Definition: WorkDistrib.C:1839
#define order
Definition: PmeRealSpace.C:235
Definition: Random.h:37
void enqueueWorkC(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3095
pe_sortop_bit_reversed(int *r)
Definition: WorkDistrib.C:144
int gridsize_a(void) const
Definition: PatchMap.h:64
int compare_bit_reversed(int a, int b)
Definition: ComputePme.C:324
Bool is_drude(int)
void reinitAtoms(const char *basename=0)
Definition: WorkDistrib.C:956
void enqueueThole(LocalWorkMsg *msg)
Definition: WorkDistrib.C:2991
void enqueueWorkA2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3066
void createHomePatches(void)
Definition: WorkDistrib.C:893
Compute * compute
Definition: WorkDistrib.h:27
void NAMD_bug(const char *err_msg)
Definition: common.C:129
static int offset_a(int i)
Definition: Lattice.h:247
void enqueueImpropers(LocalWorkMsg *msg)
Definition: WorkDistrib.C:2985
static int eventMachineProgress
Definition: WorkDistrib.C:100
AtomID atomID
Definition: Hydrogen.h:16
BigReal langevinDamping
void enqueueLCPO(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3024
void sendMovePatches()
Definition: PatchMgr.C:110
void pack(MOStream *msg)
Definition: ComputeMap.C:63
int oneOrTwoAwayNeighbors(int pid, PatchID *neighbor_ids, PatchID *downstream_ids=0, int *transform_ids=0)
Definition: PatchMap.C:579
Bool staticAtomAssignment
pe_sortop_coord_y(ScaledPosition *s)
Definition: WorkDistrib.C:266
int Bool
Definition: common.h:133
Bool replicaUniformPatchGrids
#define COMPUTEMAPTAG
Definition: common.h:159
int index_b(int pid) const
Definition: PatchMap.h:87
bool operator()(int a, int b) const
Definition: WorkDistrib.C:145
int priority(void)
Definition: Compute.h:65
void finishCUDA(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3118
~WorkDistrib(void)
Definition: WorkDistrib.C:121
BigReal x
Definition: Vector.h:66
int numAtoms
Definition: Molecule.h:557
int PatchID
Definition: NamdTypes.h:182
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int numPatches
void setNewNode(ComputeID cid, NodeID node)
Definition: ComputeMap.h:120
virtual void finishPatch(int)
Definition: Compute.C:112
void NAMD_die(const char *err_msg)
Definition: common.C:85
PDB * pdb
Definition: Node.h:180
void enqueueCUDAP3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3110
static int * peDiffuseOrderingIndex
Definition: WorkDistrib.h:116
Bool outputPatchDetails
ConfigList * configList
Definition: Node.h:179
void enqueueWorkA1(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3061
#define BUFSIZE
Definition: Communicate.h:15
Bool pressureProfileEwaldOn
std::vector< std::string > split(const std::string &text, std::string delimiter)
Definition: MoleculeQM.C:73
static int * peDiffuseOrdering
Definition: WorkDistrib.h:115
void makePatches(ScaledPosition xmin, ScaledPosition xmax, const Lattice &lattice, BigReal patchSize, double maxNumPatches, int staticAtomAssignment, int replicaUniformPatchGrids, int lcpo, int asplit, int bsplit, int csplit)
Definition: PatchMap.C:171
unsigned char get_fep_type(int anum) const
Definition: Molecule.h:1349
BigReal max_b(int pid) const
Definition: PatchMap.h:94
int index_c(int pid) const
Definition: PatchMap.h:88
static int peOrderingInit
Definition: WorkDistrib.h:114
void sendPatchMap(void)
Definition: WorkDistrib.C:982
void find_extremes(const Lattice &, BigReal frac=1.0)
Definition: PDB.C:438
void saveComputeMapChanges(int, CkGroupID)
Definition: WorkDistrib.C:356
int add(const Elem &elem)
Definition: ResizeArray.h:97
int migrationGroupSize
Definition: NamdTypes.h:117
int32 status
Definition: NamdTypes.h:115
void finishCUDAPatch(FinishWorkMsg *msg)
Definition: WorkDistrib.C:3114
BigReal max_a(int pid) const
Definition: PatchMap.h:92
void savePatchMap(PatchMapMsg *msg)
Definition: WorkDistrib.C:1018
static int * peCompactOrderingIndex
Definition: WorkDistrib.h:118
ScaledPosition center(int pid) const
Definition: PatchMap.h:99
BlockRadixSort::TempStorage sort
void topo_getargs(char **)
Definition: WorkDistrib.C:90
static void buildNodeAwarePeOrdering(void)
Definition: WorkDistrib.C:180
patch_sortop_curve_a(PatchMap *m)
Definition: WorkDistrib.C:1915
int myid()
Definition: Node.h:188
unsigned int randomSeed
BigReal initialTemp
Real rigidBondLength
Definition: NamdTypes.h:118
long long int64
Definition: common.h:34
int pressureProfileAtomTypes
#define simParams
Definition: Output.C:127
short vdwType
Definition: NamdTypes.h:55
int atomsInMigrationGroup
Definition: Hydrogen.h:29
static int randtopo
Definition: WorkDistrib.C:84
void newPid(ComputeID cid, int pid, int trans=13)
Definition: ComputeMap.C:198
static void send_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2)
Definition: WorkDistrib.C:3408
Index atomvdwtype(int anum) const
Definition: Molecule.h:1062
ScaledPosition * spos
Definition: WorkDistrib.C:257
int numPatches(void) const
Definition: PatchMap.h:59
int node(int pid) const
Definition: PatchMap.h:114
Position apply_transform(Position data, const Transform &t) const
Definition: Lattice.h:132
bool operator()(int p1, int p2) const
Definition: WorkDistrib.C:1958
void mapComputes(void)
Definition: WorkDistrib.C:2274
char * data
Definition: ConfigList.h:48
void enqueueSelfA2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3034
static ComputeMap * Object()
Definition: ComputeMap.h:89
static void build_ordering(void *)
Definition: WorkDistrib.C:86
BigReal y
Definition: Vector.h:66
int getNumAtoms()
Definition: Patch.h:105
Real atomcharge(int anum) const
Definition: Molecule.h:1052
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:22
Mass mass
Definition: NamdTypes.h:108
void distributeHomePatches(void)
Definition: WorkDistrib.C:934
StringList * find(const char *name) const
Definition: ConfigList.C:341
void assignNode(PatchID, NodeID)
Definition: PatchMap.C:465
bool operator()(int a, int b) const
Definition: WorkDistrib.C:259
bool less_than_bit_reversed(int a, int b)
Definition: ComputePme.C:333
patch_sortop_curve_c(PatchMap *m)
Definition: WorkDistrib.C:1957
void mic_initialize()
#define PDBVELFACTOR
Definition: common.h:50
Bool pairInteractionSelf
int type()
Definition: Compute.h:48
void enqueueSelfB2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3050
BigReal patchDimension
int numPatchesOnNode(int node)
Definition: PatchMap.h:60
gridSize y
void initHostDeviceLDB()
Definition: WorkDistrib.C:3389
MOStream * put(char data)
Definition: MStream.h:112
Real atommass(int anum) const
Definition: Molecule.h:1042
static void send_initHostDeviceLDB()
Definition: WorkDistrib.C:3382
int numaway_c(void) const
Definition: PatchMap.h:70
FullAtomList * createAtomLists(const char *basename=0)
Definition: WorkDistrib.C:622
#define LonepairAtom
Definition: structures.h:22
int size(void) const
Definition: ResizeArray.h:127
unsigned int nonbondedGroupSize
Definition: NamdTypes.h:57
BigReal min_c(int pid) const
Definition: PatchMap.h:95
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
pe_sortop_coord_x(ScaledPosition *s)
Definition: WorkDistrib.C:258
void enqueueDihedrals(LocalWorkMsg *msg)
Definition: WorkDistrib.C:2979
Bool is_atom_fixed(int atomnum) const
Definition: Molecule.h:1411
void cuda_initialize()
Definition: DeviceCUDA.C:24
int b_p() const
Definition: Lattice.h:274
void finishMIC(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3134
gridSize x
void contributeHostDeviceLDB(int peSetLen, int *peSet)
Definition: WorkDistrib.C:3402
void pack(char *buf, int size)
Definition: PatchMap.C:328
#define RIGID_NONE
Definition: SimParameters.h:77
int isOutputProcessor(int pe)
void doneSaveComputeMap(CkReductionMsg *)
Definition: WorkDistrib.C:427
void unpack(MIStream *msg)
Definition: ComputeMap.C:70
double recipMass
Definition: NamdTypes.h:103
int numaway_a(void) const
Definition: PatchMap.h:68
void get_all_positions(Vector *)
Definition: PDB.C:366
int a_p() const
Definition: Lattice.h:273
pe_sortop_topo(TopoManagerWrapper &t, int *d)
Definition: WorkDistrib.C:1847
Molecule * molecule
Definition: Node.h:176
bool operator()(int p1, int p2) const
Definition: WorkDistrib.C:1916
void coords(int pe, int *crds)
Definition: WorkDistrib.C:1825
void enqueueWorkB3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3087
int gridsize_b(void) const
Definition: PatchMap.h:65
#define TRUE
Definition: common.h:119
int get_mother_atom(int)
const ComputeID cid
Definition: Compute.h:43
Bool noPatchesOnOutputPEs
int fixpe(int pe)
Definition: WorkDistrib.C:1629
int * sortAndSplit(int *node_begin, int *node_end, int splitdim)
Definition: WorkDistrib.C:1860
ScaledPosition * spos
Definition: WorkDistrib.C:265
void get_extremes(ScaledPosition &xmin, ScaledPosition &xmax) const
Definition: PDB.h:104
void sortAtomsForPatches(int *order, int *breaks, const FullAtom *atoms, int nmgrps, int natoms, int ni, int nj, int nk)
Definition: SortAtoms.C:129
double BigReal
Definition: common.h:114
Transform transform
Definition: NamdTypes.h:116
int c_p() const
Definition: Lattice.h:275
#define PDBVELINVFACTOR
Definition: common.h:51
void assignNodeToPatch(void)
Definition: WorkDistrib.C:1323
int getPatchesInOctet(int pid, PatchID *pids, int *transform_ids=0)
Definition: PatchMap.C:634
NodeID newNode(ComputeID cid)
Definition: ComputeMap.h:116
iterator begin(void)
Definition: ResizeArray.h:36