namd/doxygen/WorkDistrib_8C_source.html

 /*****************************************************************************
  * $Source: /home/cvs/namd/cvsroot/namd2/src/WorkDistrib.C,v $
  * $Author: jim $
  * $Date: 2017/03/30 20:06:17 $
  * $Revision: 1.1291 $
  *****************************************************************************/

 #include <stdio.h>

 #include "ComputeOneFourNbTholes.h"
 #include "InfoStream.h"
 #include "Communicate.h"
 #include "ProcessorPrivate.h"
 #include "BOCgroup.h"
 #include "WorkDistrib.decl.h"
 #include "WorkDistrib.h"
 #include "Lattice.h"
 #include "ComputeMsmMsa.h"  // needed for MsmMsaData definition
 #include "main.decl.h"
 #include "main.h"
 #include "Node.h"
 #include "PatchMgr.h"
 #include "PatchMap.inl"
 #include "NamdTypes.h"
 #include "PDB.h"
 #include "SimParameters.h"
 #include "Parameters.h"
 #include "Molecule.h"
 #include "NamdOneTools.h"
 #include "Compute.h"
 #include "ComputeMap.h"
 #include "RecBisection.h"
 #include "Random.h"
 #include "varsizemsg.h"
 #include "ProxyMgr.h"
 #include "Priorities.h"
 #include "SortAtoms.h"
 #include <algorithm>
 #include "TopoManager.h"
 #include "ComputePmeCUDAMgr.h"
 #include "ConfigList.h"

 #include "DeviceCUDA.h"
 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
 #ifdef WIN32
 #define __thread __declspec(thread)
 #endif
 extern __thread DeviceCUDA *deviceCUDA;
 #endif

 //#define DEBUGM
 #define MIN_DEBUG_LEVEL 2
 #include "Debug.h"
 #ifdef MEM_OPT_VERSION
 extern int isOutputProcessor(int);
 #endif
 class ComputeMapChangeMsg : public CMessage_ComputeMapChangeMsg
 {
 public:

   int numNewNodes;
   int numNewNumPartitions;
   int *newNodes;
   char *newNumPartitions;

 //  VARSIZE_DECL(ComputeMapChangeMsg);
 };

 /*
 VARSIZE_MSG(ComputeMapChangeMsg,
   VARSIZE_ARRAY(newNodes);
 )
 */

 static int randtopo;

 static void build_ordering(void *) {
   WorkDistrib::buildNodeAwarePeOrdering();
 }

 void topo_getargs(char **argv) {
   randtopo = CmiGetArgFlag(argv, "+randtopo");
   if ( CkMyPe() >= CkNumPes() ) return;
 #if CCD_COND_FN_EXISTS
   CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdCondFn)build_ordering, (void*)0);
 #else
   CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdVoidFn)build_ordering, (void*)0);
 #endif
 }

 static int eventMachineProgress;

 //======================================================================
 // Public functions
 //----------------------------------------------------------------------
 WorkDistrib::WorkDistrib()
 {
   CkpvAccess(BOCclass_group).workDistrib = thisgroup;
   patchMapArrived = false;
   computeMapArrived = false;

 #if CMK_SMP
 #define MACHINE_PROGRESS
 #else
 #define MACHINE_PROGRESS { traceUserEvent(eventMachineProgress);  CmiMachineProgressImpl(); }
   if ( CkMyNodeSize() > 1 ) NAMD_bug("CkMyNodeSize() > 1 for non-smp build");
   eventMachineProgress = traceRegisterUserEvent("CmiMachineProgressImpl",233);
 #endif
 }

 //----------------------------------------------------------------------
 WorkDistrib::~WorkDistrib(void)
 { }

 static int compare_bit_reversed(int a, int b) {
   int d = a ^ b;
   int c = 1;
   if ( d ) while ( ! (d & c) ) {
     c = c << 1;
   }
   return (a & c) - (b & c);
 }

 static bool less_than_bit_reversed(int a, int b) {
   int d = a ^ b;
   int c = 1;
   if ( d ) while ( ! (d & c) ) {
     c = c << 1;
   }
   return d && (b & c);
 }

 struct pe_sortop_bit_reversed {
   int *rankInPhysOfNode;
   pe_sortop_bit_reversed(int *r) : rankInPhysOfNode(r) {}
   inline bool operator() (int a, int b) const {
     int c = compare_bit_reversed(CmiRankOf(a),CmiRankOf(b));
     if ( c < 0 ) return true;
     if ( c > 0 ) return false;
     c = compare_bit_reversed(
         rankInPhysOfNode[CmiNodeOf(a)],rankInPhysOfNode[CmiNodeOf(b)]);
     if ( c < 0 ) return true;
     if ( c > 0 ) return false;
     c = compare_bit_reversed(CmiPhysicalNodeID(a),CmiPhysicalNodeID(b));
     return ( c < 0 );
   }
 };

 int WorkDistrib::peOrderingInit;
 int* WorkDistrib::peDiffuseOrdering;
 int* WorkDistrib::peDiffuseOrderingIndex;
 int* WorkDistrib::peCompactOrdering;
 int* WorkDistrib::peCompactOrderingIndex;

 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
 extern void cuda_initialize();
 #endif

 void mic_initialize();

 void WorkDistrib::peOrderingReady() {
   //CkPrintf("WorkDistrib::peOrderingReady on %d\n", CkMyPe());
 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
   cuda_initialize();
 #endif
 #ifdef NAMD_MIC
   mic_initialize();
 #endif
 }

 void WorkDistrib::buildNodeAwarePeOrdering() {

  CmiMemLock();
  if ( ! peOrderingInit ) {
   //CkPrintf("WorkDistrib::buildNodeAwarePeOrdering on pe %d\n", CkMyPe());

   const int numPhys = CmiNumPhysicalNodes();
   const int numNode = CmiNumNodes();
   const int numPe = CmiNumPes();
   ResizeArray<int> numNodeInPhys(numPhys);
   ResizeArray<int> rankInPhysOfNode(numNode);

   peDiffuseOrdering = new int[numPe];
   peDiffuseOrderingIndex = new int[numPe];
   peCompactOrdering = new int[numPe];
   peCompactOrderingIndex = new int[numPe];

   int k = 0;
   for ( int ph=0; ph<numPhys; ++ph ) {
     int *pes, npes;
     CmiGetPesOnPhysicalNode(ph, &pes, &npes);
     for ( int i=0; i<npes; ++i, ++k ) {
       peCompactOrdering[k] = pes[i];
     }
     numNodeInPhys[ph] = 0;
     for ( int i=0, j=0; i<npes; i += CmiNodeSize(CmiNodeOf(pes[i])), ++j ) {
       rankInPhysOfNode[CmiNodeOf(pes[i])] = j;
       numNodeInPhys[ph] += 1;
     }
   }

   if ( randtopo && numPhys > 2 ) {
     if ( ! CkMyNode() ) {
       iout << iWARN << "RANDOMIZING PHYSICAL NODE ORDERING\n" << endi;
     }
     ResizeArray<int> randPhysOrder(numPhys);
     for ( int j=0; j<numPhys; ++j ) {
       randPhysOrder[j] = j;
     }
     Random(314159265).reorder(randPhysOrder.begin()+2, numPhys-2);
     for ( int j=0, k=0; j<numPhys; ++j ) {
       const int ph = randPhysOrder[j];
       int *pes, npes;
       CmiGetPesOnPhysicalNode(ph, &pes, &npes);
       for ( int i=0; i<npes; ++i, ++k ) {
         peCompactOrdering[k] = pes[i];
       }
     }
   }

   for ( int i=0; i<numPe; ++i ) {
     peDiffuseOrdering[i] = i;
   }
   std::sort(peDiffuseOrdering, peDiffuseOrdering+numPe,
     pe_sortop_bit_reversed(rankInPhysOfNode.begin()));

   for ( int i=0; i<numPe; ++i ) {
     peDiffuseOrderingIndex[peDiffuseOrdering[i]] = i;
     peCompactOrderingIndex[peCompactOrdering[i]] = i;
   }

   if ( 0 && CmiMyNode() == 0 ) for ( int i=0; i<numPe; ++i ) {
     CkPrintf("order %5d %5d %5d %5d %5d\n", i,
       peDiffuseOrdering[i],
       peDiffuseOrderingIndex[i],
       peCompactOrdering[i],
       peCompactOrderingIndex[i]);
   }

   peOrderingInit = 1;
  }
  CmiMemUnlock();
  peOrderingReady();

 }

 struct pe_sortop_coord_x {
   ScaledPosition *spos;
   pe_sortop_coord_x(ScaledPosition *s) : spos(s) {}
   inline bool operator() (int a, int b) const {
     return ( spos[a].x < spos[b].x );
   }
 };

 struct pe_sortop_coord_y {
   ScaledPosition *spos;
   pe_sortop_coord_y(ScaledPosition *s) : spos(s) {}
   inline bool operator() (int a, int b) const {
     return ( spos[a].y < spos[b].y );
   }
 };

 static void recursive_bisect_coord(
     int x_begin, int x_end, int y_begin, int y_end,
     int *pe_begin, ScaledPosition *coord,
     int *result, int ydim
   ) {
   int x_len = x_end - x_begin;
   int y_len = y_end - y_begin;
   if ( x_len == 1 && y_len == 1 ) {
     // done, now put this pe in the right place
     if ( 0 ) CkPrintf("pme %5d %5d on pe %5d at %f %f\n", x_begin, y_begin, *pe_begin,
       coord[*pe_begin].x, coord[*pe_begin].y);
     result[x_begin*ydim + y_begin] = *pe_begin;
     return;
   }
   int *pe_end = pe_begin + x_len * y_len;
   if ( x_len >= y_len ) {
     std::sort(pe_begin, pe_end, pe_sortop_coord_x(coord));
     int x_split = x_begin + x_len / 2;
     int* pe_split = pe_begin + (x_split - x_begin) * y_len;
     //CkPrintf("x_split %5d %5d %5d\n", x_begin, x_split, x_end);
     recursive_bisect_coord(x_begin, x_split, y_begin, y_end, pe_begin, coord, result, ydim);
     recursive_bisect_coord(x_split, x_end, y_begin, y_end, pe_split, coord, result, ydim);
   } else {
     std::sort(pe_begin, pe_end, pe_sortop_coord_y(coord));
     int y_split = y_begin + y_len / 2;
     int* pe_split = pe_begin + (y_split - y_begin) * x_len;
     //CkPrintf("y_split %5d %5d %5d\n", y_begin, y_split, y_end);
     recursive_bisect_coord(x_begin, x_end, y_begin, y_split, pe_begin, coord, result, ydim);
     recursive_bisect_coord(x_begin, x_end, y_split, y_end, pe_split, coord, result, ydim);
   }
 }

 void WorkDistrib::sortPmePes(int *pmepes, int xdim, int ydim) {
   int numpes = CkNumPes();
   ResizeArray<int> count(numpes);
   ResizeArray<ScaledPosition> sumPos(numpes);
   ResizeArray<ScaledPosition> avgPos(numpes);
   for ( int i=0; i<numpes; ++i ) {
     count[i] = 0;
     sumPos[i] = 0;
     avgPos[i] = 0;
   }
   PatchMap *patchMap = PatchMap::Object();
   for ( int i=0, npatches=patchMap->numPatches(); i<npatches; ++i ) {
     int pe = patchMap->node(i);
     count[pe] += 1;
     sumPos[pe] += patchMap->center(i);
   }
   const int npmepes = xdim*ydim;
   ResizeArray<int> sortpes(npmepes);
   for ( int i=0; i<npmepes; ++i ) {
     int pe = sortpes[i] = pmepes[i];
     int cnt = count[pe];
     ScaledPosition sum = sumPos[pe];
     if ( cnt == 0 ) {
       // average over node
       int node = CkNodeOf(pe);
       int nsize = CkNodeSize(node);
       int pe2 = CkNodeFirst(node);
       for ( int j=0; j<nsize; ++j, ++pe2 )  {
         cnt += count[pe2];
         sum += sumPos[pe2];
       }
     }
     if ( cnt == 0 ) {
       // average over physical node
       int node = CmiPhysicalNodeID(pe);
       int nsize, *nlist;
       CmiGetPesOnPhysicalNode(node, &nlist, &nsize);
       for ( int j=0; j<nsize; ++j )  {
         int pe2 = nlist[j];
         cnt += count[pe2];
         sum += sumPos[pe2];
       }
     }
     if ( cnt ) {
       avgPos[pe] = sum / cnt;
     }
   }
   recursive_bisect_coord(0, xdim, 0, ydim, sortpes.begin(), avgPos.begin(), pmepes, ydim);
 }


 //----------------------------------------------------------------------
 void WorkDistrib::saveComputeMapChanges(int ep, CkGroupID chareID)
 {
   saveComputeMapReturnEP = ep;
   saveComputeMapReturnChareID = chareID;

   ComputeMapChangeMsg *mapMsg = new (0, 0, 0) ComputeMapChangeMsg;
   CProxy_WorkDistrib(thisgroup).recvComputeMapChanges(mapMsg);

 /*
     // store the latest compute map
   SimParameters *simParams = Node::Object()->simParameters;
   if (simParams->storeComputeMap) {
     computeMap->saveComputeMap(simParams->computeMapFilename);
     CkPrintf("ComputeMap has been stored in %s.\n", simParams->computeMapFilename);
   }
 */
 }

 void WorkDistrib::recvComputeMapChanges(ComputeMapChangeMsg *msg) {

   delete msg;

   ComputeMap *computeMap = ComputeMap::Object();

   int i;
   int nc = computeMap->numComputes();

   if ( ! CkMyPe() ) { // send
     // CkPrintf("At %f on %d WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
     MOStream *msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, COMPUTEMAPTAG, BUFSIZE);
     msg->put(nc);
     for (i=0; i<nc; i++) {
       int data = computeMap->newNode(i);
       msg->put(data);
     }
     msg->put(nc);
     for (i=0; i<nc; i++) {
       char data = computeMap->newNumPartitions(i);
       msg->put(data);
     }
     msg->put(nc);
     msg->end();
     delete msg;
     // CkPrintf("At %f on %d done WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
   } else if ( ! CkMyRank() ) { // receive
     // if ( CkMyNode() == 1 ) CkPrintf("At %f on %d WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
     MIStream *msg = CkpvAccess(comm)->newInputStream(0, COMPUTEMAPTAG);
     msg->get(i);
     if ( i != nc ) NAMD_bug("WorkDistrib::recvComputeMapChanges check 1 failed\n");
     for (i=0; i<nc; i++) {
       int data;
       msg->get(data);
       computeMap->setNewNode(i,data);
     }
     msg->get(i);
     if ( i != nc ) NAMD_bug("WorkDistrib::recvComputeMapChanges check 2 failed\n");
     for (i=0; i<nc; i++) {
       char data;
       msg->get(data);
       computeMap->setNewNumPartitions(i,data);
     }
     msg->get(i);
     if ( i != nc ) NAMD_bug("WorkDistrib::recvComputeMapChanges check 3 failed\n");
     delete msg;
     // if ( CkMyNode() == 1 ) CkPrintf("At %f on %d done WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
   }

   CkCallback cb(CkIndex_WorkDistrib::doneSaveComputeMap(NULL), 0, thisgroup);
   contribute(0, NULL, CkReduction::random, cb);
 }

 void WorkDistrib::doneSaveComputeMap(CkReductionMsg *msg) {
   delete msg;

   CkSendMsgBranch(saveComputeMapReturnEP, CkAllocMsg(0,0,0), 0, saveComputeMapReturnChareID);
 }

 #ifdef MEM_OPT_VERSION
 //All basic info already exists for each atom inside the FullAtomList because
 //it is loaded when reading the binary per-atom file. This function will fill
 //the info regarding transform, nonbondedGroupSize etc. Refer to
 //WorkDistrib::createAtomLists
 void WorkDistrib::fillAtomListForOnePatch(int pid, FullAtomList &alist){
   PatchMap *patchMap = PatchMap::Object();

   ScaledPosition center(0.5*(patchMap->min_a(pid)+patchMap->max_a(pid)),
                           0.5*(patchMap->min_b(pid)+patchMap->max_b(pid)),
                           0.5*(patchMap->min_c(pid)+patchMap->max_c(pid)));

     int n = alist.size();
     FullAtom *a = alist.begin();
 /*
     //Those options are not supported in MEM_OPT_VERSIOn -Chao Mei
 //Modifications for alchemical fep
     Bool alchFepOn = params->alchFepOn;
     Bool alchThermIntOn = params->alchThermIntOn;
 //fepe
     Bool lesOn = params->lesOn;
     Bool pairInteractionOn = params->pairInteractionOn;

     Bool pressureProfileTypes = (params->pressureProfileAtomTypes > 1);
 */
     SimParameters *params = Node::Object()->simParameters;
     const Lattice lattice = params->lattice;
     Transform mother_transform;
     for(int j=0; j < n; j++)
     {
       int aid = a[j].id;
       a[j].nonbondedGroupSize = 0;  // must be set based on coordinates

       // a[j].fixedPosition = a[j].position;  ParallelIOMgr stores ref coord here.

       if ( a[j].migrationGroupSize ) {
        if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
             Position pos = a[j].position;
             int mgs = a[j].migrationGroupSize;
             int c = 1;

             for ( int k=a[j].hydrogenGroupSize; k<mgs;
                                 k+=a[j+k].hydrogenGroupSize ) {
               pos += a[j+k].position;
               ++c;
             }

             pos *= 1./c;
             mother_transform = a[j].transform;  // should be 0,0,0
             pos = lattice.nearest(pos,center,&mother_transform);
             a[j].position = lattice.apply_transform(a[j].position,mother_transform);
             a[j].transform = mother_transform;
        } else {
         a[j].position = lattice.nearest(a[j].position, center, &(a[j].transform));
         mother_transform = a[j].transform;
        }
       } else {
         a[j].position = lattice.apply_transform(a[j].position,mother_transform);
         a[j].transform = mother_transform;
       }

 /*
     //Those options are not supported in MEM_OPT_VERSIOn -Chao Mei
 //Modifications for alchemical fep
       if ( alchOn || lesOn || pairInteractionOn || pressureProfileTypes) {
         a[j].partition = molecule->get_fep_type(aid);
       }
       else {
         a[j].partition = 0;
       }
 //fepe
 */
       a[j].partition = 0;

       //set langevinParams based on atom status
       if(params->langevinOn) {
         BigReal bval = params->langevinDamping;
         if(!params->langevinHydrogen &&
            ((a[j].status & HydrogenAtom)!=0)) {
           bval = 0;
         }else if ((a[j].status & LonepairAtom)!=0) {
           bval = 0;
         }else if ((a[j].status & DrudeAtom)!=0) {
           bval = params->langevinDamping;
         }
         a[j].langevinParam = bval;
       }

     }

     // DH - promote water checking to take place before simulation
     // Flag the Settle water molecules.
     int size, allfixed;
     const WaterModel watmodel = params->watmodel;
     const int wathgsize = getWaterModelGroupSize(watmodel);
     const int fixedAtomsOn = params->fixedAtomsOn;
     const int useSettle = params->useSettle;
     for(int j=0; j < n; j+=size) {
       size = a[j].hydrogenGroupSize;
       if ( ! size ) {
         NAMD_bug("Mother atom with hydrogenGroupSize of 0!");
       }
       allfixed = 1;
       for (int k = 0; k < size; ++k ) {
         allfixed = ( allfixed && (a[j+k].atomFixed) );
       }
       for (int k = 0; k < size; ++k ) {
         a[j+k].groupFixed = allfixed ? 1 : 0;
       }
       // DH - Set isWater flag for SETTLE water molecules,
       // based on same condition as used in HomePatch::buildRattleList():
       // The rigidBondLength of the FIRST water atom is set greater than 0.
       if (a[j].rigidBondLength > 0) {
         if (size != wathgsize) {
           char errmsg[256];
           sprintf(errmsg,
               "Water molecule starting with atom %d contains %d atoms "
               "but the specified water model requires %d atoms.\n",
               a[j].id+1, size, wathgsize
               );
           NAMD_die(errmsg);
         }
         int anyfixed = 0;
         for (int k = 0;  k < size;  k++) {
           anyfixed += ( fixedAtomsOn && a[j+k].atomFixed );
         }
         if (useSettle && !anyfixed) {
           for (int k = 0;  k < size;  k++) {
             a[j+k].isWater = 1;
           }
         }
       }
     }

     if ( params->outputPatchDetails ) {
       int patchId = pid;
       int numAtomsInPatch = n;
       int numFixedAtomsInPatch = 0;
       int numAtomsInFixedGroupsInPatch = 0;
       for(int j=0; j < n; j++) {
         numFixedAtomsInPatch += ( a[j].atomFixed ? 1 : 0 );
         numAtomsInFixedGroupsInPatch += ( a[j].groupFixed ? 1 : 0 );
       }
       iout << "PATCH_DETAILS:"
            << " on proc " << CkMyPe()
            << " patch " << patchId
            << " atoms " << numAtomsInPatch
            << " fixed_atoms " << numFixedAtomsInPatch
            << " fixed_groups " << numAtomsInFixedGroupsInPatch
            << "\n" << endi;
     }

 }

 void WorkDistrib::random_velocities_parallel(BigReal Temp,InputAtomList &inAtoms)
 {
   int i, j;             //  Loop counter
   BigReal kbT;          //  Boltzman constant * Temp
   BigReal randnum;      //  Random number from -6.0 to 6.0
   BigReal kbToverM;     //  sqrt(Kb*Temp/Mass)
   SimParameters *simParams = Node::Object()->simParameters;
   Bool lesOn = simParams->lesOn;
   Random vel_random(simParams->randomSeed);
   int lesReduceTemp = lesOn && simParams->lesReduceTemp;
   BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;

   kbT = Temp*BOLTZMANN;
   int count=0;
   int totalAtoms = inAtoms.size();
   for(i=0;i<totalAtoms;i++)
   {
     Real atomMs=inAtoms[i].mass;

     if (atomMs <= 0.) {
       kbToverM = 0.;
     } else {
       /*
        * lesOn is not supported in MEM_OPT_VERSION, so the original assignment
        * is simplified. --Chao Mei
        */
       //kbToverM = sqrt(kbT *
         //( lesOn && structure->get_fep_type(aid) ? tempFactor : 1.0 ) /
         //                  atomMs );
       kbToverM = sqrt(kbT * 1.0 / atomMs);
     }
     for (randnum=0.0, j=0; j<12; j++)
     {
       randnum += vel_random.uniform();
     }

     randnum -= 6.0;

     inAtoms[i].velocity.x = randnum*kbToverM;

     for (randnum=0.0, j=0; j<12; j++)
     {
       randnum += vel_random.uniform();
     }

     randnum -= 6.0;

     inAtoms[i].velocity.y = randnum*kbToverM;

     for (randnum=0.0, j=0; j<12; j++)
     {
       randnum += vel_random.uniform();
     }

     randnum -= 6.0;

     inAtoms[i].velocity.z = randnum*kbToverM;
   }
 }
 #endif

 //----------------------------------------------------------------------
 // This should only be called on node 0.
 //----------------------------------------------------------------------
 FullAtomList *WorkDistrib::createAtomLists(const char *basename)
 {
   int i;
   StringList *current;  //  Pointer used to retrieve configuration items
   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
   Node *node = nd.ckLocalBranch();
   PatchMap *patchMap = PatchMap::Object();
   CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
   PatchMgr *patchMgr = pm.ckLocalBranch();
   SimParameters *params = node->simParameters;
   Molecule *molecule = node->molecule;
   PDB *pdb = node->pdb;

   int numPatches = patchMap->numPatches();
   int numAtoms = pdb->num_atoms();

   Vector *positions = new Position[numAtoms];
   Vector *velocities = new Velocity[numAtoms];

  if ( basename ) {
   if ( params->binaryOutput ) {
     read_binary_file((std::string(basename)+".coor").c_str(), positions, numAtoms);
     read_binary_file((std::string(basename)+".vel").c_str(), velocities, numAtoms);
   } else {
     PDB coorpdb((std::string(basename)+".coor").c_str());
     if ( coorpdb.num_atoms() != numAtoms ) {
       NAMD_die("Incorrect atom count in coordinate pdb file");
     }
     coorpdb.get_all_positions(positions);
     velocities_from_PDB((std::string(basename)+".vel").c_str(), velocities, numAtoms);
   }
  } else {
   pdb->get_all_positions(positions);

   if ( params->initialTemp < 0.0 ) {
     Bool binvels=FALSE;

     //  Reading the veolcities from a PDB
     current = node->configList->find("velocities");

     if (current == NULL) {
       current = node->configList->find("binvelocities");
       binvels = TRUE;
     }

     if (!binvels) {
       velocities_from_PDB(current->data, velocities, numAtoms);
     }
     else {
       velocities_from_binfile(current->data, velocities, numAtoms);
     }
   }
   else {
     // Random velocities for a given temperature
     random_velocities(params->initialTemp, molecule, velocities, numAtoms);
   }
  }

   //  If COMMotion == no, remove center of mass motion
   if (!(params->comMove)) {
     remove_com_motion(velocities, molecule, numAtoms);
   }

   FullAtomList *atoms = new FullAtomList[numPatches];

   const Lattice lattice = params->lattice;

     if ( params->staticAtomAssignment ) {
       FullAtomList sortAtoms;
       for ( i=0; i < numAtoms; i++ ) {
         HydrogenGroupID &h = molecule->hydrogenGroup[i];
         if ( ! h.isMP ) continue;
         FullAtom a;
         a.id = i;
         a.migrationGroupSize = h.isMP ? h.atomsInMigrationGroup : 0;
         a.position = positions[h.atomID];
         sortAtoms.add(a);
       }
       int *order = new int[sortAtoms.size()];
       for ( i=0; i < sortAtoms.size(); i++ ) {
         order[i] = i;
       }
       int *breaks = new int[numPatches];
       sortAtomsForPatches(order,breaks,sortAtoms.begin(),
                         sortAtoms.size(),numAtoms,
                         patchMap->gridsize_c(),
                         patchMap->gridsize_b(),
                         patchMap->gridsize_a());

       i = 0;
       for ( int pid = 0; pid < numPatches; ++pid ) {
         int iend = breaks[pid];
         for ( ; i<iend; ++i ) {
           FullAtom &sa = sortAtoms[order[i]];
           int mgs = sa.migrationGroupSize;
 /*
 CkPrintf("patch %d (%d %d %d) has group %d atom %d size %d at %.2f %.2f %.2f\n",
           pid, patchMap->index_a(pid), patchMap->index_b(pid),
           patchMap->index_c(pid), order[i], sa.id, mgs,
           sa.position.x, sa.position.y, sa.position.z);
 */
           for ( int k=0; k<mgs; ++k ) {
             HydrogenGroupID &h = molecule->hydrogenGroup[sa.id + k];
             int aid = h.atomID;
             FullAtom a;
             a.id = aid;
             a.position = positions[aid];
             a.velocity = velocities[aid];
             a.vdwType = molecule->atomvdwtype(aid);
             a.status = molecule->getAtoms()[aid].status;
             a.langevinParam = molecule->langevin_param(aid);
             a.hydrogenGroupSize = h.isGP ? h.atomsInGroup : 0;
             a.migrationGroupSize = h.isMP ? h.atomsInMigrationGroup : 0;
             if(params->rigidBonds != RIGID_NONE) {
               a.rigidBondLength = molecule->rigid_bond_length(aid);
             }else{
               a.rigidBondLength = 0.0;
             }
             atoms[pid].add(a);
           }
         }
 CkPrintf("patch %d (%d %d %d) has %d atoms\n",
           pid, patchMap->index_a(pid), patchMap->index_b(pid),
           patchMap->index_c(pid), atoms[pid].size());
       }
       delete [] order;
       delete [] breaks;
     } else
     {
     // split atoms into patches based on migration group and position
     int aid, pid=0;
     for(i=0; i < numAtoms; i++)
       {
       // Assign atoms to patches without splitting hydrogen groups.
       // We know that the hydrogenGroup array is sorted with group parents
       // listed first.  Thus, only change the pid if an atom is a group parent.
       HydrogenGroupID &h = molecule->hydrogenGroup[i];
       aid = h.atomID;
       FullAtom a;
       a.id = aid;
       a.position = positions[aid];
       a.velocity = velocities[aid];
       a.vdwType = molecule->atomvdwtype(aid);
       a.status = molecule->getAtoms()[aid].status;
       a.langevinParam = molecule->langevin_param(aid);
       a.hydrogenGroupSize = h.isGP ? h.atomsInGroup : 0;
       a.migrationGroupSize = h.isMP ? h.atomsInMigrationGroup : 0;
       if(params->rigidBonds != RIGID_NONE) {
         a.rigidBondLength = molecule->rigid_bond_length(aid);
       }else{
         a.rigidBondLength = 0.0;
       }
       if (h.isMP) {
         pid = patchMap->assignToPatch(positions[aid],lattice);
       } // else: don't change pid
       atoms[pid].add(a);
       }
     }

   delete [] positions;
   delete [] velocities;

   for(i=0; i < numPatches; i++)
   {
     ScaledPosition center(0.5*(patchMap->min_a(i)+patchMap->max_a(i)),
                           0.5*(patchMap->min_b(i)+patchMap->max_b(i)),
                           0.5*(patchMap->min_c(i)+patchMap->max_c(i)));

     int n = atoms[i].size();
     FullAtom *a = atoms[i].begin();
     int j;
 //Modifications for alchemical fep
     Bool alchOn = params->alchOn;
 //fepe
     Bool lesOn = params->lesOn;

     Bool pairInteractionOn = params->pairInteractionOn;

     Bool pressureProfileTypes = (params->pressureProfileAtomTypes > 1);

     Transform mother_transform;
     for(j=0; j < n; j++)
     {
       int aid = a[j].id;

       a[j].nonbondedGroupSize = 0;  // must be set based on coordinates

       a[j].atomFixed = molecule->is_atom_fixed(aid) ? 1 : 0;
       a[j].fixedPosition = a[j].position;

       if ( a[j].migrationGroupSize ) {
        if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
             Position pos = a[j].position;
             int mgs = a[j].migrationGroupSize;
             int c = 1;
             for ( int k=a[j].hydrogenGroupSize; k<mgs;
                                 k+=a[j+k].hydrogenGroupSize ) {
               pos += a[j+k].position;
               ++c;
             }
             pos *= 1./c;
             mother_transform = a[j].transform;  // should be 0,0,0
             pos = lattice.nearest(pos,center,&mother_transform);
             a[j].position = lattice.apply_transform(a[j].position,mother_transform);
             a[j].transform = mother_transform;
        } else {
         a[j].position = lattice.nearest(
                 a[j].position, center, &(a[j].transform));
         mother_transform = a[j].transform;
        }
       } else {
         a[j].position = lattice.apply_transform(a[j].position,mother_transform);
         a[j].transform = mother_transform;
       }

       a[j].mass = molecule->atommass(aid);
       // Using double precision division for reciprocal mass.
       a[j].recipMass = ( a[j].mass > 0 ? (1. / a[j].mass) : 0 );
       a[j].charge = molecule->atomcharge(aid);
       if ( params->LJPMEOn ) {
         const int index = a[j].vdwType;
         const float scaling = params->nonbondedScaling;
         float sigma, epsilon, sigma14, epsilon14;
         molecule->params->get_vdw_params(&sigma, &epsilon, &sigma14, &epsilon14, index);
         a[j].dispcoef = 2*sigma*sigma*sigma*sqrt(scaling * epsilon);
       }
       else {
         a[j].dispcoef = 0;
       }

 //Modifications for alchemical fep
       if ( alchOn || lesOn || pairInteractionOn || pressureProfileTypes) {
         a[j].partition = molecule->get_fep_type(aid);
       }
       else {
         a[j].partition = 0;
       }
 //fepe

     }

 #if 0
     int size, allfixed, k;
     for(j=0; j < n; j+=size) {
       size = a[j].hydrogenGroupSize;
       if ( ! size ) {
         NAMD_bug("Mother atom with hydrogenGroupSize of 0!");
       }
       allfixed = 1;
       for ( k = 0; k < size; ++k ) {
         allfixed = ( allfixed && (a[j+k].atomFixed) );
       }
       for ( k = 0; k < size; ++k ) {
         a[j+k].groupFixed = allfixed ? 1 : 0;
       }
       // DH - set isWater flag
       // based on same condition as used for determining Settle:
       // The rigidBondLength of the FIRST water atom is set greater than 0.
       if (a[j].rigidBondLength > 0) {
         for (k = 0;  k < size;  k++) {
           a[j+k].isWater = 1;
         }
       }
     }
 #else
     // DH - promote water checking to take place before simulation
     // Flag the Settle water molecules.
     int size, allfixed;
     const WaterModel watmodel = params->watmodel;
     const int wathgsize = getWaterModelGroupSize(watmodel);
     const int fixedAtomsOn = params->fixedAtomsOn;
     const int useSettle = params->useSettle;
     for(int j=0; j < n; j+=size) {
       size = a[j].hydrogenGroupSize;
       if ( ! size ) {
         NAMD_bug("Mother atom with hydrogenGroupSize of 0!");
       }
       allfixed = 1;
       for (int k = 0; k < size; ++k ) {
         allfixed = ( allfixed && (a[j+k].atomFixed) );
       }
       for (int k = 0; k < size; ++k ) {
         a[j+k].groupFixed = allfixed ? 1 : 0;
       }
       // DH - Set isWater flag for SETTLE water molecules,
       // based on same condition as used in HomePatch::buildRattleList():
       // The rigidBondLength of the FIRST water atom is set greater than 0.
       if (a[j].rigidBondLength > 0) {
         if (size != wathgsize) {
           char errmsg[256];
           sprintf(errmsg,
               "Water molecule starting with atom %d contains %d atoms "
               "but the specified water model requires %d atoms.\n",
               a[j].id+1, size, wathgsize
               );
           NAMD_die(errmsg);
         }
         int anyfixed = 0;
         for (int k = 0;  k < size;  k++) {
           anyfixed += ( fixedAtomsOn && a[j+k].atomFixed );
         }
         if (useSettle && !anyfixed) {
           for (int k = 0;  k < size;  k++) {
             a[j+k].isWater = 1;
           }
         }
       }
     }
 #endif

     if ( params->outputPatchDetails ) {
       int patchId = i;
       int numAtomsInPatch = n;
       int numFixedAtomsInPatch = 0;
       int numAtomsInFixedGroupsInPatch = 0;
       for(j=0; j < n; j++) {
         numFixedAtomsInPatch += ( a[j].atomFixed ? 1 : 0 );
         numAtomsInFixedGroupsInPatch += ( a[j].groupFixed ? 1 : 0 );
       }
       iout << "PATCH_DETAILS:"
            << " patch " << patchId
            << " atoms " << numAtomsInPatch
            << " fixed_atoms " << numFixedAtomsInPatch
            << " fixed_groups " << numAtomsInFixedGroupsInPatch
            << "\n" << endi;
     }
   }

   return atoms;

 }

 //----------------------------------------------------------------------
 // This should only be called on node 0.
 //----------------------------------------------------------------------
 void WorkDistrib::createHomePatches(void)
 {
   int i;
   PatchMap *patchMap = PatchMap::Object();
   CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
   PatchMgr *patchMgr = pm.ckLocalBranch();

   int numPatches = patchMap->numPatches();

   FullAtomList *atoms = createAtomLists();

 #ifdef MEM_OPT_VERSION
 /*  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
   Node *node = nd.ckLocalBranch();
   node->molecule->delEachAtomSigs();
   node->molecule->delMassChargeSpace();
 */
 #endif

   int maxAtoms = -1;
   int maxPatch = -1;
   for(i=0; i < numPatches; i++) {
     int numAtoms = atoms[i].size();
     if ( numAtoms > maxAtoms ) { maxAtoms = numAtoms; maxPatch = i; }
   }
   iout << iINFO << "LARGEST PATCH (" << maxPatch <<
         ") HAS " << maxAtoms << " ATOMS\n" << endi;

 #ifdef SHOW_HISTOGRAM_HGROUP_SIZES
   // histogram hydrogen group sizes
   int hgroupsize[9] = { 0 };
   int numhgroups = 0;
   int numwaters = 0;
   int maxhgroupsize = 0;
   for (i = 0;  i < numPatches;  i++) {
     const FullAtomList& a = atoms[i];
     int numAtoms = a.size();
     int hgs = 1;  // init to something sane
     for (int j = 0;  j < numAtoms;  j += hgs) {
       hgs = a[j].hydrogenGroupSize;
       int histndx = ( hgs > 8 ? 8 : hgs );
       hgroupsize[ histndx ]++;
       numhgroups++;
       if (a[j].isWater) numwaters++;
       if (maxhgroupsize < hgs) maxhgroupsize = hgs;
     }
   }
   int hgslast = ( maxhgroupsize > 8 ? 8 : maxhgroupsize );
   printf("Number of hydrogen groups:           %7d\n", numhgroups);
   printf("Number of settle water molecules:    %7d\n", numwaters);
   printf("Number of remaining hydrogen groups: %7d\n", numhgroups - numwaters);
   printf("Largest hydrogen group size:         %7d\n", maxhgroupsize);
   printf("Histogram of hydrogen group sizes:\n");
   int hgstotal = 0;
   for (i = 0;  i <= hgslast;  i++) {
     printf("     size %d     count %d\n", i, hgroupsize[i]);
     hgstotal += hgroupsize[i];
   }
   printf("Checksum over hydrogen group sizes:  %7d\n", hgstotal);
 #endif

   for(i=0; i < numPatches; i++)
   {
     if ( ! ( i % 100 ) )
     {
       DebugM(3,"Created " << i << " patches so far.\n");
     }

     patchMgr->createHomePatch(i,atoms[i]);
   }

   delete [] atoms;
 }

 void WorkDistrib::distributeHomePatches() {
   // ref BOC
   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
   Node *node = nd.ckLocalBranch();
   CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
   PatchMgr *patchMgr = pm.ckLocalBranch();
   // ref singleton
   PatchMap *patchMap = PatchMap::Object();

   // Move patches to the proper node
   for(int i=0;i < patchMap->numPatches(); i++)
   {
     if (patchMap->node(i) != node->myid() )
     {
       DebugM(3,"patchMgr->movePatch("
         << i << "," << patchMap->node(i) << ")\n");
       patchMgr->movePatch(i,patchMap->node(i));
     }
   }
   patchMgr->sendMovePatches();
 }

 void WorkDistrib::reinitAtoms(const char *basename) {

   PatchMap *patchMap = PatchMap::Object();
   CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
   PatchMgr *patchMgr = pm.ckLocalBranch();

   int numPatches = patchMap->numPatches();

   FullAtomList *atoms = createAtomLists(basename);

   for(int i=0; i < numPatches; i++) {
     patchMgr->sendAtoms(i,atoms[i]);
   }

   delete [] atoms;

 }


 //----------------------------------------------------------------------

 class PatchMapMsg : public CMessage_PatchMapMsg {
   public:
     char *patchMapData;
 };

 void WorkDistrib::sendPatchMap(void)
 {
   if ( CkNumPes() == 1 ) {
     patchMapArrived = true;
     return;
   }

   //Automatically enable spanning tree
   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
   Node *node = nd.ckLocalBranch();
   SimParameters *params = node->simParameters;
   if( ( PatchMap::Object()->numPatches() <= CkNumPes()/4
 #ifdef NODEAWARE_PROXY_SPANNINGTREE
       || CkNumPes() > CkNumNodes()
       ) && ( CkNumNodes() > 1
 #endif
     ) && params->isSendSpanningTreeUnset() )
     ProxyMgr::Object()->setSendSpanning();

 #ifdef NODEAWARE_PROXY_SPANNINGTREE
   if ( CkNumPes() > CkNumNodes() && CkNumNodes() > 1
         && params->isRecvSpanningTreeUnset() )
     ProxyMgr::Object()->setRecvSpanning();
 #endif

   int size = PatchMap::Object()->packSize();

   PatchMapMsg *mapMsg = new (size, 0) PatchMapMsg;

   PatchMap::Object()->pack(mapMsg->patchMapData, size);

   CProxy_WorkDistrib workProxy(thisgroup);
   workProxy[0].savePatchMap(mapMsg);
 }

 // saveMaps() is called when the map message is received
 void WorkDistrib::savePatchMap(PatchMapMsg *msg)
 {
   // Use a resend to forward messages before processing.  Otherwise the
   // map distribution is slow on many CPUs.  We need to use a tree
   // rather than a broadcast because some implementations of broadcast
   // generate a copy of the message on the sender for each recipient.
   // This is because MPI doesn't allow re-use of an outstanding buffer.

   if ( CkMyRank() ) patchMapArrived = true;

   if ( patchMapArrived && CkMyPe() ) {
     PatchMap::Object()->unpack(msg->patchMapData);

     //Automatically enable spanning tree
     CProxy_Node nd(CkpvAccess(BOCclass_group).node);
     Node *node = nd.ckLocalBranch();
     SimParameters *params = node->simParameters;
     if( ( PatchMap::Object()->numPatches() <= CkNumPes()/4
 #ifdef NODEAWARE_PROXY_SPANNINGTREE
         || CkNumPes() > CkNumNodes()
         ) && ( CkNumNodes() > 1
 #endif
       ) && params->isSendSpanningTreeUnset() )
       ProxyMgr::Object()->setSendSpanning();

 #ifdef NODEAWARE_PROXY_SPANNINGTREE
     if ( CkNumPes() > CkNumNodes() && CkNumNodes() > 1
           && params->isRecvSpanningTreeUnset() )
       ProxyMgr::Object()->setRecvSpanning();
 #endif
   }

   if ( patchMapArrived ) {
     if ( CkMyRank() + 1 < CkNodeSize(CkMyNode()) ) {
       ((CProxy_WorkDistrib(thisgroup))[CkMyPe()+1]).savePatchMap(msg);
     } else {
       delete msg;
     }
     return;
   }

   patchMapArrived = true;

   int self = CkMyNode();
   int range_begin = 0;
   int range_end = CkNumNodes();
   while ( self != range_begin ) {
     ++range_begin;
     int split = range_begin + ( range_end - range_begin ) / 2;
     if ( self < split ) { range_end = split; }
     else { range_begin = split; }
   }
   int send_near = self + 1;
   int send_far = send_near + ( range_end - send_near ) / 2;

   int pids[3];
   int npid = 0;
   if ( send_far < range_end ) pids[npid++] = CkNodeFirst(send_far);
   if ( send_near < send_far ) pids[npid++] = CkNodeFirst(send_near);
   pids[npid++] = CkMyPe();  // always send the message to ourselves
   CProxy_WorkDistrib(thisgroup).savePatchMap(msg,npid,pids);
 }


 void WorkDistrib::sendComputeMap(void)
 {
   if ( CkMyRank() ) return;

   if ( CkNumNodes() == 1 ) {
     computeMapArrived = true;
     ComputeMap::Object()->initPtrs();
     return;
   }

   if ( ! CkMyPe() ) { // send
     MOStream *msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, COMPUTEMAPTAG, BUFSIZE);
     ComputeMap::Object()->pack(msg);
     msg->end();
     delete msg;
   } else if ( ! CkMyRank() ) { // receive
     MIStream *msg = CkpvAccess(comm)->newInputStream(0, COMPUTEMAPTAG);
     ComputeMap::Object()->unpack(msg);
     delete msg;
   }

   computeMapArrived = true;
   ComputeMap::Object()->initPtrs();
 }


 //----------------------------------------------------------------------
 void WorkDistrib::patchMapInit(void)
 {
   PatchMap *patchMap = PatchMap::Object();
   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
   Node *node = nd.ckLocalBranch();
   SimParameters *params = node->simParameters;
   Lattice lattice = params->lattice;

   BigReal patchSize = params->patchDimension;

 #ifndef MEM_OPT_VERSION
   const int totalAtoms = node->pdb->num_atoms();
 #else
   const int totalAtoms = node->molecule->numAtoms;
 #endif

   ScaledPosition xmin, xmax;

   double maxNumPatches = 1.e9;  // need to adjust fractional values
   if ( params->minAtomsPerPatch > 0 )
     maxNumPatches = totalAtoms / params->minAtomsPerPatch;

   DebugM(3,"Mapping patches\n");
   if ( lattice.a_p() && lattice.b_p() && lattice.c_p() ) {
     xmin = 0.;  xmax = 0.;
   }
   else if ( params->FMAOn || params->MSMOn || params->FMMOn ) {
   // Need to use full box for FMA to match NAMD 1.X results.
 #if 0
     node->pdb->find_extremes(&(xmin.x),&(xmax.x),lattice.a_r());
     node->pdb->find_extremes(&(xmin.y),&(xmax.y),lattice.b_r());
     node->pdb->find_extremes(&(xmin.z),&(xmax.z),lattice.c_r());
 #endif
     node->pdb->find_extremes(lattice);
     node->pdb->get_extremes(xmin, xmax);
 #if 0
     printf("+++ center=%.4f %.4f %.4f\n",
         lattice.origin().x, lattice.origin().y, lattice.origin().z);
     printf("+++ xmin=%.4f  xmax=%.4f\n", xmin.x, xmax.x);
     printf("+++ ymin=%.4f  ymax=%.4f\n", xmin.y, xmax.y);
     printf("+++ zmin=%.4f  zmax=%.4f\n", xmin.z, xmax.z);
 #endif
   // Otherwise, this allows a small number of stray atoms.
   }
   else {
 #if 0
     node->pdb->find_extremes(&(xmin.x),&(xmax.x),lattice.a_r(),0.9);
     node->pdb->find_extremes(&(xmin.y),&(xmax.y),lattice.b_r(),0.9);
     node->pdb->find_extremes(&(xmin.z),&(xmax.z),lattice.c_r(),0.9);
 #endif
     node->pdb->find_extremes(lattice, 1.0);
     node->pdb->get_extremes(xmin, xmax);
     iout << iINFO << "ORIGINAL ATOMS MINMAX IS " << xmin << "  " << xmax << "\n" << endi;
     double frac = ( (double)totalAtoms - 10000. ) / (double)totalAtoms;
     if ( frac < 0.9 ) { frac = 0.9; }
     node->pdb->find_extremes(lattice, frac);
     node->pdb->get_extremes(xmin, xmax);
     iout << iINFO << "ADJUSTED ATOMS MINMAX IS " << xmin << "  " << xmax << "\n" << endi;
   }

 #if 0
   BigReal origin_shift;
   origin_shift = lattice.a_r() * lattice.origin();
   xmin.x -= origin_shift;
   xmax.x -= origin_shift;
   origin_shift = lattice.b_r() * lattice.origin();
   xmin.y -= origin_shift;
   xmax.y -= origin_shift;
   origin_shift = lattice.c_r() * lattice.origin();
   xmin.z -= origin_shift;
   xmax.z -= origin_shift;
 #endif

   // SimParameters default is -1 for unset
   int twoAwayX = params->twoAwayX;
   int twoAwayY = params->twoAwayY;
   int twoAwayZ = params->twoAwayZ;

   // SASA implementation is not compatible with twoAway patches
   if (params->LCPOOn && patchSize < 32.4) {
     if ( twoAwayX > 0 || twoAwayY > 0 || twoAwayZ > 0 ) {
       iout << iWARN << "Ignoring twoAway[XYZ] due to LCPO SASA implementation.\n" << endi;
     }
     twoAwayX = twoAwayY = twoAwayZ = 0;
   }

   // if you think you know what you're doing go right ahead
   if ( twoAwayX > 0 ) maxNumPatches = 1.e9;
   if ( twoAwayY > 0 ) maxNumPatches = 1.e9;
   if ( twoAwayZ > 0 ) maxNumPatches = 1.e9;
   if ( params->maxPatches > 0 ) {
       maxNumPatches = params->maxPatches;
       iout << iINFO << "LIMITING NUMBER OF PATCHES TO " <<
                                 maxNumPatches << "\n" << endi;
   }

   int numpes = CkNumPes();
   SimParameters *simparam = Node::Object()->simParameters;
   if(simparam->simulateInitialMapping) {
     numpes = simparam->simulatedPEs;
     delete [] patchMap->nPatchesOnNode;
     patchMap->nPatchesOnNode = new int[numpes];
     memset(patchMap->nPatchesOnNode, 0, numpes*sizeof(int));
   }

 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
   // for CUDA be sure there are more patches than pes

   int numPatches = patchMap->sizeGrid(
         xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
   if ( numPatches < numpes && twoAwayX < 0 ) {
     twoAwayX = 1;
     numPatches = patchMap->sizeGrid(
         xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
   }
   if ( numPatches < numpes && twoAwayY < 0 ) {
     twoAwayY = 1;
     numPatches = patchMap->sizeGrid(
         xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
   }
   if ( numPatches < numpes && twoAwayZ < 0 ) {
     twoAwayZ = 1;
     numPatches = patchMap->sizeGrid(
         xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
   }
   if ( numPatches < numpes ) {
     #if defined(NAMD_MIC)
     NAMD_die("MIC-enabled NAMD requires at least one patch per thread.");
     #else
     if (simparam->CUDASOAintegrateMode) {
       NAMD_die("GPU-resident NAMD requires at least one patch per thread.");
     }
     #endif
   }
   if ( numPatches % numpes && numPatches <= 1.4 * numpes ) {
     int exactFit = numPatches - numPatches % numpes;
     int newNumPatches = patchMap->sizeGrid(
         xmin,xmax,lattice,patchSize,exactFit,params->staticAtomAssignment,
         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
     if ( newNumPatches == exactFit ) {
       iout << iINFO << "REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" << endi;
       maxNumPatches = exactFit;
     }
   }

   patchMap->makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,
         params->staticAtomAssignment, params->replicaUniformPatchGrids, params->LCPOOn,
         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);

 #else

   int availPes = numpes;
   if ( params->noPatchesOnZero && numpes > 1 ) {
       availPes -= 1;
       if(params->noPatchesOnOne && numpes > 2)
         availPes -= 1;
   }
 #ifdef MEM_OPT_VERSION
   if(params->noPatchesOnOutputPEs && numpes - params->numoutputprocs >2)
     {
       availPes -= params->numoutputprocs;
       if ( params->noPatchesOnZero && numpes > 1 && isOutputProcessor(0)){
         availPes++;
       }
       if ( params->noPatchesOnOne && numpes > 2 && isOutputProcessor(1)){
         availPes++;
       }
     }
 #endif

   int numPatches = patchMap->sizeGrid(
         xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
   if ( ( numPatches > (0.3*availPes) || numPatches > maxNumPatches
        ) && twoAwayZ < 0 ) {
     twoAwayZ = 0;
     numPatches = patchMap->sizeGrid(
         xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
   }
   if ( ( numPatches > (0.6*availPes) || numPatches > maxNumPatches
        ) && twoAwayY < 0 ) {
     twoAwayY = 0;
     numPatches = patchMap->sizeGrid(
         xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
   }
   if ( ( numPatches > availPes || numPatches > maxNumPatches
        ) && twoAwayX < 0 ) {
     twoAwayX = 0;
     numPatches = patchMap->sizeGrid(
         xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
   }
   if ( numPatches > availPes && numPatches <= (1.4*availPes) && availPes <= maxNumPatches ) {
     int newNumPatches = patchMap->sizeGrid(
         xmin,xmax,lattice,patchSize,availPes,params->staticAtomAssignment,
         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
     if ( newNumPatches <= availPes && numPatches <= (1.4*newNumPatches) ) {
       iout << iINFO << "REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" << endi;
       maxNumPatches = availPes;
     }
   }

   patchMap->makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,
         params->staticAtomAssignment, params->replicaUniformPatchGrids, params->LCPOOn,
         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);

 #endif

 }


 //----------------------------------------------------------------------
 void WorkDistrib::assignNodeToPatch()
 {
   PatchMap *patchMap = PatchMap::Object();
   int nNodes = Node::Object()->numNodes();
   SimParameters *simparam = Node::Object()->simParameters;
   if(simparam->simulateInitialMapping) {
           nNodes = simparam->simulatedPEs;
   }

 #if (CMK_BLUEGENEP | CMK_BLUEGENEL) && USE_TOPOMAP
   TopoManager tmgr;
   int numPes = tmgr.getDimNX() * tmgr.getDimNY() * tmgr.getDimNZ();
   if (numPes > patchMap->numPatches() && (assignPatchesTopoGridRecBisection() > 0)) {
     CkPrintf ("Blue Gene/L topology partitioner finished successfully \n");
   }
   else
 #endif
   assignPatchesSpaceFillingCurve();

   int *nAtoms = new int[nNodes];
   int numAtoms=0;
   int i;
   for(i=0; i < nNodes; i++)
     nAtoms[i] = 0;

   for(i=0; i < patchMap->numPatches(); i++)
   {
     //    iout << iINFO << "Patch " << i << " has "
     //   << patchMap->patch(i)->getNumAtoms() << " atoms and "
     //   << patchMap->patch(i)->getNumAtoms() *
     //            patchMap->patch(i)->getNumAtoms()
     //   << " pairs.\n" << endi;
 #ifdef MEM_OPT_VERSION
       numAtoms += patchMap->numAtoms(i);
       nAtoms[patchMap->node(i)] += patchMap->numAtoms(i);
 #else
     if (patchMap->patch(i)) {
       numAtoms += patchMap->patch(i)->getNumAtoms();
       nAtoms[patchMap->node(i)] += patchMap->patch(i)->getNumAtoms();
     }
 #endif
   }

   if ( numAtoms != Node::Object()->molecule->numAtoms ) {
     for(i=0; i < nNodes; i++)
       iout << iINFO << nAtoms[i] << " atoms assigned to node " << i << "\n" << endi;
     iout << iINFO << "Assigned " << numAtoms << " atoms but expected " << Node::Object()->molecule->numAtoms << "\n" << endi;
     NAMD_die("Incorrect atom count in WorkDistrib::assignNodeToPatch\n");
   }

   delete [] nAtoms;

   //  PatchMap::Object()->printPatchMap();
 }

 //----------------------------------------------------------------------
 // void WorkDistrib::assignPatchesSlices()
 // {
 //   int pid;
 //   int assignedNode = 0;
 //   PatchMap *patchMap = PatchMap::Object();
 //   Node *node = CLocalBranch(Node, CkpvAccess(BOCclass_group).node);

 //   int *numAtoms = new int[node->numNodes()];
 //   for (int i=0; i<node->numNodes(); i++) {
 //     numAtoms[i] = 0;
 //   }

 //   // Assign patch to node with least atoms assigned.
 //   for(pid=0; pid < patchMap->numPatches(); pid++) {
 //     assignedNode = 0;
 //     for (i=1; i < node->numNodes(); i++) {
 //       if (numAtoms[i] < numAtoms[assignedNode]) assignedNode = i;
 //     }
 //     patchMap->assignNode(pid, assignedNode);
 //     numAtoms[assignedNode] += patchMap->patch(pid)->getNumAtoms();

 //     /*
 //     iout << iINFO << "Patch (" << pid << ") has "
 //       << patchMap->patch(pid)->getNumAtoms()
 //       << " atoms:  Assigned to Node(" << assignedNode << ")\n"
 //       << endi;
 //     */
 //   }

 //   delete[] numAtoms;
 // }

 //----------------------------------------------------------------------
 void WorkDistrib::assignPatchesToLowestLoadNode()
 {
   int pid;
   int assignedNode = 0;
   PatchMap *patchMap = PatchMap::Object();
   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
   Node *node = nd.ckLocalBranch();
   SimParameters *simParams = node->simParameters;
   int ncpus = node->numNodes();
   if(simParams->simulateInitialMapping) {
           ncpus = simParams->simulatedPEs;
   }

   int *load = new int[ncpus];
   int *assignedNodes = new int[patchMap->numPatches()];
   for (int i=0; i<ncpus; i++) {
     load[i] = 0;
   }
   CkPrintf("assignPatchesToLowestLoadNode\n");
   int defaultNode = 0;
   if ( simParams->noPatchesOnZero && ncpus > 1 ){
     defaultNode = 1;
     if( simParams->noPatchesOnOne && ncpus > 2)
       defaultNode = 2;
   }
   // Assign patch to node with least atoms assigned.
   for(pid=0; pid < patchMap->numPatches(); pid++) {
     assignedNode = defaultNode;
     for (int i=assignedNode + 1; i < ncpus; i++) {
       if (load[i] < load[assignedNode]) assignedNode = i;
     }
     assignedNodes[pid] = assignedNode;
 #ifdef MEM_OPT_VERSION
     load[assignedNode] += patchMap->numAtoms(pid) + 1;
 #else
     load[assignedNode] += patchMap->patch(pid)->getNumAtoms() + 1;
 #endif
   }

   delete[] load;
   sortNodesAndAssign(assignedNodes);
   delete[] assignedNodes;
 }

 //----------------------------------------------------------------------
 void WorkDistrib::assignPatchesBitReversal()
 {
   int pid;
   PatchMap *patchMap = PatchMap::Object();
   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
   Node *node = nd.ckLocalBranch();
   SimParameters *simparam = node->simParameters;

   int ncpus = node->numNodes();
   if(simparam->simulateInitialMapping) {
           ncpus = simparam->simulatedPEs;
   }
   int npatches = patchMap->numPatches();
   if ( ncpus <= npatches )
     NAMD_bug("WorkDistrib::assignPatchesBitReversal called improperly");

   SortableResizeArray<int> seq(ncpus);
   // avoid using node 0 (reverse of 0 is 0 so start at 1)
   for ( int i = 1; i < ncpus; ++i ) {
     seq[i-1] = peDiffuseOrdering[i];
   }

   // extract and sort patch locations
   sortNodesAndAssign(seq.begin());
   if ( ncpus > 2*npatches ) sortNodesAndAssign(seq.begin()+npatches, 1);
 }

 //----------------------------------------------------------------------
 struct nodesort {
   int node;
   int a_total;
   int b_total;
   int c_total;
   int npatches;
   nodesort() : node(-1),a_total(0),b_total(0),c_total(0),npatches(0) { ; }
   int operator==(const nodesort &o) const {
     float a1 = ((float)a_total)/((float)npatches);
     float a2 = ((float)o.a_total)/((float)o.npatches);
     float b1 = ((float)b_total)/((float)npatches);
     float b2 = ((float)o.b_total)/((float)o.npatches);
     float c1 = ((float)c_total)/((float)npatches);
     float c2 = ((float)o.c_total)/((float)o.npatches);
     return ((a1 == a2) && (b1 == b2) && (c1 == c2));
   }
   int operator<(const nodesort &o) const {
     float a1 = ((float)a_total)/((float)npatches);
     float a2 = ((float)o.a_total)/((float)o.npatches);
     float b1 = ((float)b_total)/((float)npatches);
     float b2 = ((float)o.b_total)/((float)o.npatches);
     float c1 = ((float)c_total)/((float)npatches);
     float c2 = ((float)o.c_total)/((float)o.npatches);
     return ( (a1 < a2) || ((a1 == a2) && (b1 < b2)) ||
                 ((a1 == a2) && (b1 == b2) && (c1 < c2)) );
   }
 };

 void WorkDistrib::sortNodesAndAssign(int *assignedNode, int baseNodes) {
   // if baseNodes is zero (default) then set both nodes and basenodes
   // if baseNodes is nonzero then this is a second call to set basenodes only
   int i, pid;
   PatchMap *patchMap = PatchMap::Object();
   int npatches = patchMap->numPatches();
   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
   Node *node = nd.ckLocalBranch();
   int nnodes = node->numNodes();
   SimParameters *simparam = node->simParameters;
   if(simparam->simulateInitialMapping) {
           nnodes = simparam->simulatedPEs;
   }

   ResizeArray<nodesort> allnodes(nnodes);
   for ( i=0; i < nnodes; ++i ) {
     allnodes[i].node = i;
   }
   for ( pid=0; pid<npatches; ++pid ) {
     // iout << pid << " " << assignedNode[pid] << "\n" << endi;
     allnodes[assignedNode[pid]].npatches++;
     allnodes[assignedNode[pid]].a_total += patchMap->index_a(pid);
     allnodes[assignedNode[pid]].b_total += patchMap->index_b(pid);
     allnodes[assignedNode[pid]].c_total += patchMap->index_c(pid);
   }
   SortableResizeArray<nodesort> usednodes(nnodes);
   usednodes.resize(0);
   for ( i=0; i < nnodes; ++i ) {
     if ( allnodes[i].npatches ) usednodes.add(allnodes[i]);
   }
   usednodes.sort();
   int i2 = 0;
   for ( i=0; i < nnodes; ++i ) {
     int pe = peCompactOrdering[i];
     if ( allnodes[pe].npatches ) allnodes[usednodes[i2++].node].node = pe;
   }

   for ( pid=0; pid<npatches; ++pid ) {
     // iout << pid << " " <<  allnodes[assignedNode[pid]].node << "\n" << endi;
     if ( ! baseNodes ) {
       patchMap->assignNode(pid, allnodes[assignedNode[pid]].node);
     }
     patchMap->assignBaseNode(pid, allnodes[assignedNode[pid]].node);
   }
 }

 //----------------------------------------------------------------------
 void WorkDistrib::assignPatchesRoundRobin()
 {
   int pid;
   PatchMap *patchMap = PatchMap::Object();
   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
   Node *node = nd.ckLocalBranch();
   SimParameters *simparam = node->simParameters;
   int ncpus = node->numNodes();
   if(simparam->simulateInitialMapping) {
           ncpus = simparam->simulatedPEs;
   }
   int *assignedNode = new int[patchMap->numPatches()];

   for(pid=0; pid < patchMap->numPatches(); pid++) {
     assignedNode[pid] = pid % ncpus;
   }

   sortNodesAndAssign(assignedNode);
   delete [] assignedNode;
 }

 //----------------------------------------------------------------------
 void WorkDistrib::assignPatchesRecursiveBisection()
 {
   PatchMap *patchMap = PatchMap::Object();
   int *assignedNode = new int[patchMap->numPatches()];
   SimParameters *simParams = Node::Object()->simParameters;
   int numNodes = Node::Object()->numNodes();
   if(simParams->simulateInitialMapping) {
           numNodes = simParams->simulatedPEs;
   }

   int usedNodes = numNodes;
   int unusedNodes = 0;
   CkPrintf("assignPatchesRecursiveBisection\n");
   if ( simParams->noPatchesOnZero && numNodes > 1 ){
     usedNodes -= 1;
     if(simParams->noPatchesOnOne && numNodes > 2)
       usedNodes -= 1;
   }
   unusedNodes = numNodes - usedNodes;
   RecBisection recBisec(usedNodes,PatchMap::Object());
   if ( recBisec.partition(assignedNode) ) {
     if ( unusedNodes !=0 ) {
       for ( int i=0; i<patchMap->numPatches(); ++i ) {
         assignedNode[i] += unusedNodes;
       }
     }
     sortNodesAndAssign(assignedNode);
     delete [] assignedNode;
   } else {
     //free the array here since a same array will be allocated
     //in assignPatchesToLowestLoadNode function, thus reducting
     //temporary memory usage
     delete [] assignedNode;

     iout << iWARN
          << "WorkDistrib: Recursive bisection fails, "
          << "invoking space-filling curve algorithm\n";
     assignPatchesSpaceFillingCurve();
   }
 }

 // class to re-order dimensions in decreasing size
 struct TopoManagerWrapper {
   TopoManager tmgr;
   int a_dim, b_dim, c_dim, d_dim, e_dim;
   int a_rot, b_rot, c_rot, d_rot, e_rot;
   int a_mod, b_mod, c_mod, d_mod, e_mod;
   int fixpe(int pe) {  // compensate for lame fallback topology information
     return CmiGetFirstPeOnPhysicalNode(CmiPhysicalNodeID(pe));
   }
   TopoManagerWrapper() {
 #if CMK_BLUEGENEQ
     int na=tmgr.getDimNA();
     int nb=tmgr.getDimNB();
     int nc=tmgr.getDimNC();
     int nd=tmgr.getDimND();
     int ne=tmgr.getDimNE();
 #else
     int na=tmgr.getDimNX();
     int nb=tmgr.getDimNY();
     int nc=tmgr.getDimNZ();
     int nd=1;
     int ne=1;
 #endif
     ResizeArray<int> a_flags(na);
     ResizeArray<int> b_flags(nb);
     ResizeArray<int> c_flags(nc);
     ResizeArray<int> d_flags(nd);
     ResizeArray<int> e_flags(ne);
     for ( int i=0; i<na; ++i ) { a_flags[i] = 0; }
     for ( int i=0; i<nb; ++i ) { b_flags[i] = 0; }
     for ( int i=0; i<nc; ++i ) { c_flags[i] = 0; }
     for ( int i=0; i<nd; ++i ) { d_flags[i] = 0; }
     for ( int i=0; i<ne; ++i ) { e_flags[i] = 0; }
     int npes = CkNumPes();
     for ( int pe=0; pe<npes; ++pe ) {
       int a,b,c,d,e,t;
 #if CMK_BLUEGENEQ
       tmgr.rankToCoordinates(fixpe(pe),a,b,c,d,e,t);
 #else
       tmgr.rankToCoordinates(fixpe(pe),a,b,c,t);
       d=0; e=0;
 #endif
       if ( a < 0 || a >= na ) NAMD_bug("inconsistent torus topology!");
       if ( b < 0 || b >= nb ) NAMD_bug("inconsistent torus topology!");
       if ( c < 0 || c >= nc ) NAMD_bug("inconsistent torus topology!");
       if ( d < 0 || d >= nd ) NAMD_bug("inconsistent torus topology!");
       if ( e < 0 || e >= ne ) NAMD_bug("inconsistent torus topology!");
       a_flags[a] = 1;
       b_flags[b] = 1;
       c_flags[c] = 1;
       d_flags[d] = 1;
       e_flags[e] = 1;
     }
     iout << iINFO << "TORUS A SIZE " << na << " USING";
     for ( int i=0; i<na; ++i ) { if ( a_flags[i] ) iout << " " << i; }
     iout << "\n" << endi;
     iout << iINFO << "TORUS B SIZE " << nb << " USING";
     for ( int i=0; i<nb; ++i ) { if ( b_flags[i] ) iout << " " << i; }
     iout << "\n" << endi;
     iout << iINFO << "TORUS C SIZE " << nc << " USING";
     for ( int i=0; i<nc; ++i ) { if ( c_flags[i] ) iout << " " << i; }
     iout << "\n" << endi;
 #if CMK_BLUEGENEQ
     iout << iINFO << "TORUS D SIZE " << nd << " USING";
     for ( int i=0; i<nd; ++i ) { if ( d_flags[i] ) iout << " " << i; }
     iout << "\n" << endi;
     iout << iINFO << "TORUS E SIZE " << ne << " USING";
     for ( int i=0; i<ne; ++i ) { if ( e_flags[i] ) iout << " " << i; }
     iout << "\n" << endi;
 #endif
     // find most compact representation of our subset
     a_rot = b_rot = c_rot = d_rot = e_rot = 0;
     a_mod = na; b_mod = nb; c_mod = nc; d_mod = nd; e_mod = ne;
 #if CMK_BLUEGENEQ
     if ( tmgr.absA(na) == 0 ) // torus
 #else
     if ( tmgr.absX(na) == 0 ) // torus
 #endif
       for ( int i=0, gaplen=0, gapstart=0; i<2*na; ++i ) {
         if ( a_flags[i%na] ) gapstart = i+1;
         else if ( i - gapstart >= gaplen ) {
           a_rot = 2*na-i-1; gaplen = i - gapstart;
         }
       }
 #if CMK_BLUEGENEQ
     if ( tmgr.absB(nb) == 0 ) // torus
 #else
     if ( tmgr.absY(nb) == 0 ) // torus
 #endif
       for ( int i=0, gaplen=0, gapstart=0; i<2*nb; ++i ) {
         if ( b_flags[i%nb] ) gapstart = i+1;
         else if ( i - gapstart >= gaplen ) {
           b_rot = 2*nb-i-1; gaplen = i - gapstart;
         }
       }
 #if CMK_BLUEGENEQ
     if ( tmgr.absC(nc) == 0 ) // torus
 #else
     if ( tmgr.absZ(nc) == 0 ) // torus
 #endif
       for ( int i=0, gaplen=0, gapstart=0; i<2*nc; ++i ) {
         if ( c_flags[i%nc] ) gapstart = i+1;
         else if ( i - gapstart >= gaplen ) {
           c_rot = 2*nc-i-1; gaplen = i - gapstart;
         }
       }
 #if CMK_BLUEGENEQ
     if ( tmgr.absD(nd) == 0 ) // torus
       for ( int i=0, gaplen=0, gapstart=0; i<2*nd; ++i ) {
         if ( d_flags[i%nd] ) gapstart = i+1;
         else if ( i - gapstart >= gaplen ) {
           d_rot = 2*nd-i-1; gaplen = i - gapstart;
         }
       }
     if ( tmgr.absE(ne) == 0 ) // torus
       for ( int i=0, gaplen=0, gapstart=0; i<2*ne; ++i ) {
         if ( e_flags[i%ne] ) gapstart = i+1;
         else if ( i - gapstart >= gaplen ) {
           e_rot = 2*ne-i-1; gaplen = i - gapstart;
         }
       }
 #endif
     // order dimensions by length
     int a_min=na, a_max=-1;
     int b_min=nb, b_max=-1;
     int c_min=nc, c_max=-1;
     int d_min=nd, d_max=-1;
     int e_min=ne, e_max=-1;
     for ( int pe=0; pe<npes; ++pe ) {
       int a,b,c,d,e,t;
 #if CMK_BLUEGENEQ
       tmgr.rankToCoordinates(fixpe(pe),a,b,c,d,e,t);
 #else
       tmgr.rankToCoordinates(fixpe(pe),a,b,c,t);
       d=0; e=0;
 #endif
       a = (a+a_rot)%a_mod;
       b = (b+b_rot)%b_mod;
       c = (c+c_rot)%c_mod;
       d = (d+d_rot)%d_mod;
       e = (e+e_rot)%e_mod;
       if ( a < a_min ) a_min = a;
       if ( b < b_min ) b_min = b;
       if ( c < c_min ) c_min = c;
       if ( d < d_min ) d_min = d;
       if ( e < e_min ) e_min = e;
       if ( a > a_max ) a_max = a;
       if ( b > b_max ) b_max = b;
       if ( c > c_max ) c_max = c;
       if ( d > d_max ) d_max = d;
       if ( e > e_max ) e_max = e;
     }
     int a_len = a_max - a_min + 1;
     int b_len = b_max - b_min + 1;
     int c_len = c_max - c_min + 1;
     int d_len = d_max - d_min + 1;
     int e_len = e_max - e_min + 1;
     int lensort[5];
     lensort[0] = (a_len << 3) + 0;
     lensort[1] = (b_len << 3) + 1;
     lensort[2] = (c_len << 3) + 2;
     lensort[3] = (d_len << 3) + 3;
     lensort[4] = (e_len << 3) + 4;
     // CkPrintf("TopoManagerWrapper lensort before %d %d %d %d %d\n", lensort[0] & 7, lensort[1] & 7, lensort[2] & 7, lensort[3] & 7, lensort[4] & 7);
     std::sort(lensort, lensort+5);
     // CkPrintf("TopoManagerWrapper lensort after %d %d %d %d %d\n", lensort[0] & 7, lensort[1] & 7, lensort[2] & 7, lensort[3] & 7, lensort[4] & 7);
     for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 0 ) a_dim = 4-i; }
     for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 1 ) b_dim = 4-i; }
     for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 2 ) c_dim = 4-i; }
     for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 3 ) d_dim = 4-i; }
     for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 4 ) e_dim = 4-i; }
 #if 0
     if ( a_len >= b_len && a_len >= c_len ) {
       a_dim = 0;
       if ( b_len >= c_len ) {
         b_dim = 1; c_dim = 2;
       } else {
         b_dim = 2; c_dim = 1;
       }
     } else if ( b_len >= a_len && b_len >= c_len ) {
       b_dim = 0;
       if ( a_len >= c_len ) {
         a_dim = 1; c_dim = 2;
       } else {
         a_dim = 2; c_dim = 1;
       }
     } else { // c is longest
       c_dim = 0;
       if ( a_len >= b_len ) {
         a_dim = 1; b_dim = 2;
       } else {
         a_dim = 2; b_dim = 1;
       }
     }
 #endif
     iout << iINFO << "TORUS MINIMAL MESH SIZE IS " << a_len << " BY " << b_len << " BY " << c_len
 #if CMK_BLUEGENEQ
     << " BY " << d_len << " BY " << e_len
 #endif
     << "\n" << endi;
     // CkPrintf("TopoManagerWrapper dims %d %d %d %d %d\n", a_dim, b_dim, c_dim, d_dim, e_dim);
   }
   void coords(int pe, int *crds) {
     int a,b,c,d,e,t;
 #if CMK_BLUEGENEQ
     tmgr.rankToCoordinates(fixpe(pe),a,b,c,d,e,t);
 #else
     tmgr.rankToCoordinates(fixpe(pe),a,b,c,t);
     d=0; e=0;
 #endif
     if ( a_dim < 3 ) crds[a_dim] = (a+a_rot)%a_mod;
     if ( b_dim < 3 ) crds[b_dim] = (b+b_rot)%b_mod;
     if ( c_dim < 3 ) crds[c_dim] = (c+c_rot)%c_mod;
     if ( d_dim < 3 ) crds[d_dim] = (d+d_rot)%d_mod;
     if ( e_dim < 3 ) crds[e_dim] = (e+e_rot)%e_mod;
   }
   int coord(int pe, int dim) {
     int crds[3];
     coords(pe,crds);
     return crds[dim];
   }
   struct pe_sortop_topo {
     TopoManagerWrapper &tmgr;
     const int *sortdims;
     pe_sortop_topo(TopoManagerWrapper &t, int *d) : tmgr(t), sortdims(d) {}
     bool operator() (int pe1, int pe2) const {
       int crds1[3], crds2[3];
       tmgr.coords(pe1,crds1);
       tmgr.coords(pe2,crds2);
       for ( int i=0; i<3; ++i ) {
         int d = sortdims[i];
         if ( crds1[d] != crds2[d] ) return ( crds1[d] < crds2[d] );
       }
       const int *index = WorkDistrib::peCompactOrderingIndex;
       return ( index[pe1] < index[pe2] );
     }
   };
   int* sortAndSplit(int *node_begin, int *node_end, int splitdim) {
     if ( node_begin == node_end ) return node_begin;
     int tmins[3], tmaxs[3], tlens[3], sortdims[3];
     coords(*node_begin, tmins);
     coords(*node_begin, tmaxs);
     for ( int *peitr = node_begin; peitr != node_end; ++peitr ) {
       int tvals[3];
       coords(*peitr, tvals);
       for ( int i=0; i<3; ++i ) {
         if ( tvals[i] < tmins[i] ) tmins[i] = tvals[i];
         if ( tvals[i] > tmaxs[i] ) tmaxs[i] = tvals[i];
       }
     }
     for ( int i=0; i<3; ++i ) {
       tlens[i] = tmaxs[i] - tmins[i];
     }
     sortdims[0] = splitdim;
     for ( int i=0, j=0; i<3; ++i ) {
       if ( i != splitdim ) sortdims[++j] = i;
     }
     if ( tlens[sortdims[1]] < tlens[sortdims[2]] ) {
       int tmp = sortdims[1];
       sortdims[1] = sortdims[2];
       sortdims[2] = tmp;
     }
     std::sort(node_begin,node_end,pe_sortop_topo(*this,sortdims));
     int *nodes = node_begin;
     int nnodes = node_end - node_begin;
     int i_split = 0;
 #if 0
     int c_split = coord(nodes[0],splitdim);
     for ( int i=0; i<nnodes; ++i ) {
       if ( coord(nodes[i],splitdim) != c_split ) {
         int mid = (nnodes+1)/2;
         if ( abs(i-mid) < abs(i_split-mid) ) {
           i_split = i;
           c_split = coord(i,splitdim);
         }
         else break;
       }
     }
 #endif
     for ( int i=0; i<nnodes; ++i ) {
       if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
         int mid = (nnodes+1)/2;
         if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
         else break;
       }
     }
     return ( node_begin + i_split );
   }
 };

 struct patch_sortop_curve_a {
   PatchMap *pmap;
   patch_sortop_curve_a(PatchMap *m) : pmap(m) {}
   inline bool operator() (int p1, int p2) const {
     int a1 = pmap->index_a(p1);
     int a2 = pmap->index_a(p2);
     if ( a1 < a2 ) return true;
     if ( a1 > a2 ) return false;
     int dir = ( (a1 & 1) ? -1 : 1 );
     int b1 = pmap->index_b(p1);
     int b2 = pmap->index_b(p2);
     if ( b1 * dir < b2 * dir ) return true;
     if ( b1 * dir > b2 * dir ) return false;
     dir *= ( (b1 & 1) ? -1 : 1 );
     int c1 = pmap->index_c(p1);
     int c2 = pmap->index_c(p2);
     if ( c1 * dir < c2 * dir ) return true;
     return false;
   }
 };

 struct patch_sortop_curve_b {
   PatchMap *pmap;
   patch_sortop_curve_b(PatchMap *m) : pmap(m) {}
   inline bool operator() (int p1, int p2) const {
     int a1 = pmap->index_b(p1);
     int a2 = pmap->index_b(p2);
     if ( a1 < a2 ) return true;
     if ( a1 > a2 ) return false;
     int dir = ( (a1 & 1) ? -1 : 1 );
     int b1 = pmap->index_a(p1);
     int b2 = pmap->index_a(p2);
     if ( b1 * dir < b2 * dir ) return true;
     if ( b1 * dir > b2 * dir ) return false;
     dir *= ( (b1 & 1) ? -1 : 1 );
     int c1 = pmap->index_c(p1);
     int c2 = pmap->index_c(p2);
     if ( c1 * dir < c2 * dir ) return true;
     return false;
   }
 };

 struct patch_sortop_curve_c {
   PatchMap *pmap;
   patch_sortop_curve_c(PatchMap *m) : pmap(m) {}
   inline bool operator() (int p1, int p2) const {
     int a1 = pmap->index_c(p1);
     int a2 = pmap->index_c(p2);
     if ( a1 < a2 ) return true;
     if ( a1 > a2 ) return false;
     int dir = ( (a1 & 1) ? -1 : 1 );
     int b1 = pmap->index_a(p1);
     int b2 = pmap->index_a(p2);
     if ( b1 * dir < b2 * dir ) return true;
     if ( b1 * dir > b2 * dir ) return false;
     dir *= ( (b1 & 1) ? -1 : 1 );
     int c1 = pmap->index_b(p1);
     int c2 = pmap->index_b(p2);
     if ( c1 * dir < c2 * dir ) return true;
     return false;
   }
 };

 static void recursive_bisect_with_curve(
   int *patch_begin, int *patch_end,
   int *node_begin, int *node_end,
   double *patchLoads,
   double *sortedLoads,
   int *assignedNode,
   TopoManagerWrapper &tmgr
   ) {

   SimParameters *simParams = Node::Object()->simParameters;
   PatchMap *patchMap = PatchMap::Object();
   int *patches = patch_begin;
   int npatches = patch_end - patch_begin;
   int *nodes = node_begin;
   int nnodes = node_end - node_begin;

   // assign patch loads
   const int emptyPatchLoad = simParams->emptyPatchLoad;
   double totalRawLoad = 0;
   for ( int i=0; i<npatches; ++i ) {
     int pid=patches[i];
 #ifdef MEM_OPT_VERSION
     double load = patchMap->numAtoms(pid) + emptyPatchLoad;
 #else
     double load = patchMap->patch(pid)->getNumAtoms() + emptyPatchLoad;
 #endif
     patchLoads[pid] = load;
     sortedLoads[i] = load;
     totalRawLoad += load;
   }
   std::sort(sortedLoads,sortedLoads+npatches);

   // limit maxPatchLoad to adjusted average load per node
   double sumLoad = 0;
   double maxPatchLoad = 1;
   for ( int i=0; i<npatches; ++i ) {
     double load = sortedLoads[i];
     double total = sumLoad + (npatches-i) * load;
     if ( nnodes * load > total ) break;
     sumLoad += load;
     maxPatchLoad = load;
   }
   double totalLoad = 0;
   for ( int i=0; i<npatches; ++i ) {
     int pid=patches[i];
     if ( patchLoads[pid] > maxPatchLoad ) patchLoads[pid] = maxPatchLoad;
     totalLoad += patchLoads[pid];
   }
   if ( nnodes * maxPatchLoad > totalLoad )
     NAMD_bug("algorithm failure in WorkDistrib recursive_bisect_with_curve()");

   int a_len, b_len, c_len;
   int a_min, b_min, c_min;
   { // find dimensions
     a_min = patchMap->index_a(patches[0]);
     b_min = patchMap->index_b(patches[0]);
     c_min = patchMap->index_c(patches[0]);
     int a_max = a_min;
     int b_max = b_min;
     int c_max = c_min;
     for ( int i=1; i<npatches; ++i ) {
       int a = patchMap->index_a(patches[i]);
       int b = patchMap->index_b(patches[i]);
       int c = patchMap->index_c(patches[i]);
       if ( a < a_min ) a_min = a;
       if ( b < b_min ) b_min = b;
       if ( c < c_min ) c_min = c;
       if ( a > a_max ) a_max = a;
       if ( b > b_max ) b_max = b;
       if ( c > c_max ) c_max = c;
     }
     a_len = a_max - a_min;
     b_len = b_max - b_min;
     c_len = c_max - c_min;
   }

   int *node_split = node_begin;

   if ( simParams->disableTopology ) ; else
   if ( a_len >= b_len && a_len >= c_len ) {
     node_split = tmgr.sortAndSplit(node_begin,node_end,0);
   } else if ( b_len >= a_len && b_len >= c_len ) {
     node_split = tmgr.sortAndSplit(node_begin,node_end,1);
   } else if ( c_len >= a_len && c_len >= b_len ) {
     node_split = tmgr.sortAndSplit(node_begin,node_end,2);
   }

   if ( node_split == node_begin ) {  // unable to split torus
     // make sure physical nodes are together
     std::sort(node_begin, node_end, WorkDistrib::pe_sortop_compact());
     // find physical node boundary to split on
     int i_split = 0;
     for ( int i=0; i<nnodes; ++i ) {
       if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
         int mid = (nnodes+1)/2;
         if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
         else break;
       }
     }
     node_split = node_begin + i_split;
   }

   bool final_patch_sort = false;

   if ( node_split == node_begin ) {  // all on same physical node
     if ( ( simParams->verboseTopology ) &&
         nnodes == CmiNumPesOnPhysicalNode(CmiPhysicalNodeID(*node_begin)) ) {
       int crds[3];
       tmgr.coords(*node_begin, crds);
       CkPrintf("WorkDistrib: physnode %5d pe %5d node %5d at %5d %5d %5d from %5d %5d %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
                CmiPhysicalNodeID(*node_begin), *node_begin,
                CkNodeOf(*node_begin), crds[0], crds[1], crds[2],
                a_min, b_min, c_min, npatches,
                a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
     }

     // final sort along a to minimize pme message count
     final_patch_sort = true;

     // find node (process) boundary to split on
     int i_split = 0;
     for ( int i=0; i<nnodes; ++i ) {
       if ( CmiNodeOf(nodes[i_split]) != CmiNodeOf(nodes[i]) ) {
         int mid = (nnodes+1)/2;
         if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
         else break;
       }
     }
     node_split = node_begin + i_split;
   }

   if ( node_split == node_begin ) {  // all on same node (process)
     if ( ( simParams->verboseTopology ) &&
         nnodes == CmiNodeSize(CmiNodeOf(*node_begin)) ) {
       int crds[3];
       tmgr.coords(*node_begin, crds);
       CkPrintf("WorkDistrib: node %5d pe %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
                CmiNodeOf(*node_begin), *node_begin, npatches,
                a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
     }

     // no natural divisions so just split at midpoint
     node_split = node_begin + nnodes/2;
   }

   if ( nnodes == 1 ) {  // down to a single pe
     // assign all patches
     int *node = node_begin;
     sumLoad = 0;
     for ( int i=0; i < npatches; ++i ) {
       int pid = patches[i];
       assignedNode[pid] = *node;
       sumLoad += patchLoads[pid];
       if ( 0 ) CkPrintf("assign %5d node %5d patch %5d %5d %5d load %7f total %7f\n",
                 i, *node,
                 patchMap->index_a(pid),
                 patchMap->index_b(pid),
                 patchMap->index_c(pid),
                 patchLoads[pid], sumLoad);
     }

     return;
   }

   if ( final_patch_sort ) {
     // final sort along a to minimize pme message count
     std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
   } else if ( a_len >= b_len && a_len >= c_len ) {
     if ( 0 ) CkPrintf("sort a\n");
     std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
   } else if ( b_len >= a_len && b_len >= c_len ) {
     if ( 0 ) CkPrintf("sort b\n");
     std::sort(patch_begin,patch_end,patch_sortop_curve_b(patchMap));
   } else if ( c_len >= a_len && c_len >= b_len ) {
     if ( 0 ) CkPrintf("sort c\n");
     std::sort(patch_begin,patch_end,patch_sortop_curve_c(patchMap));
   }

   int *patch_split;
   { // walk through patches in sorted order
     int *node = node_begin;
     sumLoad = 0;
     for ( patch_split = patch_begin;
           patch_split != patch_end && node != node_split;
           ++patch_split ) {
       sumLoad += patchLoads[*patch_split];
       double targetLoad = totalLoad *
         ((double)(node-node_begin+1) / (double)nnodes);
       if ( 0 ) CkPrintf("test %5ld node %5d patch %5d %5d %5d load %7f target %7f\n",
                 patch_split - patch_begin, *node,
                 patchMap->index_a(*patch_split),
                 patchMap->index_b(*patch_split),
                 patchMap->index_c(*patch_split),
                 sumLoad, targetLoad);
       double extra = ( patch_split+1 != patch_end ? 0.5 * patchLoads[*(patch_split+1)] : 0 );
       if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
     }
     double targetLoad = totalLoad *
       ((double)(node_split-node_begin) / (double)nnodes);
     if ( 0 ) CkPrintf("split node %5ld/%5d patch %5ld/%5d load %7f target %7f\n",
               node_split-node_begin, nnodes,
               patch_split-patch_begin, npatches,
               sumLoad, targetLoad);
   }

   // recurse
   recursive_bisect_with_curve(
     patch_begin, patch_split, node_begin, node_split,
     patchLoads, sortedLoads, assignedNode, tmgr);
   recursive_bisect_with_curve(
     patch_split, patch_end, node_split, node_end,
     patchLoads, sortedLoads, assignedNode, tmgr);
 }

 //----------------------------------------------------------------------
 void WorkDistrib::assignPatchesSpaceFillingCurve()
 {
   TopoManagerWrapper tmgr;
   PatchMap *patchMap = PatchMap::Object();
   const int numPatches = patchMap->numPatches();
   int *assignedNode = new int[numPatches];
   ResizeArray<double> patchLoads(numPatches);
   SortableResizeArray<double> sortedLoads(numPatches);
   int numNodes = Node::Object()->numNodes();
   SimParameters *simParams = Node::Object()->simParameters;
   if(simParams->simulateInitialMapping) {
           NAMD_die("simulateInitialMapping not supported by assignPatchesSpaceFillingCurve()");
           numNodes = simParams->simulatedPEs;
   }

   ResizeArray<int> patchOrdering(numPatches);
   for ( int i=0; i<numPatches; ++i ) {
     patchOrdering[i] = i;
   }

   ResizeArray<int> nodeOrdering(numNodes);
   nodeOrdering.resize(0);
   for ( int i=0; i<numNodes; ++i ) {
     int pe = peDiffuseOrdering[(i+1)%numNodes];  // avoid 0 if possible
     if ( simParams->noPatchesOnZero && numNodes > 1 ) {
       if ( pe == 0 ) continue;
       if(simParams->noPatchesOnOne && numNodes > 2) {
         if ( pe == 1 ) continue;
       }
     }
 #ifdef MEM_OPT_VERSION
     if(simParams->noPatchesOnOutputPEs && numNodes-simParams->numoutputprocs >2) {
       if ( isOutputProcessor(pe) ) continue;
     }
 #endif
     nodeOrdering.add(pe);
     if ( 0 ) CkPrintf("using pe %5d\n", pe);
   }

   int *node_begin = nodeOrdering.begin();
   int *node_end = nodeOrdering.end();
   if ( nodeOrdering.size() > numPatches ) {
     node_end = node_begin + numPatches;
   }
   std::sort(node_begin, node_end, pe_sortop_compact());

   int *basenode_begin = node_begin;
   int *basenode_end = node_end;
   if ( nodeOrdering.size() > 2*numPatches ) {
     basenode_begin = node_end;
     basenode_end = basenode_begin + numPatches;
     std::sort(basenode_begin, basenode_end, pe_sortop_compact());
   }

   if ( simParams->disableTopology ) {
     iout << iWARN << "IGNORING TORUS TOPOLOGY DURING PATCH PLACEMENT\n" << endi;
   }

   recursive_bisect_with_curve(
     patchOrdering.begin(), patchOrdering.end(),
     node_begin, node_end,
     patchLoads.begin(), sortedLoads.begin(), assignedNode, tmgr);

   std::sort(node_begin, node_end, pe_sortop_compact());

   int samenodecount = 0;

   for ( int pid=0; pid<numPatches; ++pid ) {
     int node = assignedNode[pid];
     patchMap->assignNode(pid, node);
     int nodeidx = std::lower_bound(node_begin, node_end, node,
                                    pe_sortop_compact()) - node_begin;
     int basenode = basenode_begin[nodeidx];
     patchMap->assignBaseNode(pid, basenode);
     if ( CmiPeOnSamePhysicalNode(node,basenode) ) ++samenodecount;
   }

   iout << iINFO << "Placed " << (samenodecount*100./numPatches) << "% of base nodes on same physical node as patch\n" << endi;

   delete [] assignedNode;
 }

 //----------------------------------------------------------------------
 void WorkDistrib::mapComputes(void)
 {
   PatchMap *patchMap = PatchMap::Object();
   ComputeMap *computeMap = ComputeMap::Object();
   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
   Node *node = nd.ckLocalBranch();

   DebugM(3,"Mapping computes\n");

   computeMap->allocateCids();

   // Handle full electrostatics
   if ( node->simParameters->fullDirectOn )
     mapComputeHomePatches(computeFullDirectType);
   if ( node->simParameters->FMAOn )
 #ifdef DPMTA
     mapComputeHomePatches(computeDPMTAType);
 #else
     NAMD_die("This binary does not include DPMTA (FMA).");
 #endif
   if ( node->simParameters->PMEOn ) {
 #ifdef DPME
     if ( node->simParameters->useDPME )
       mapComputeHomePatches(computeDPMEType);
     else {
       mapComputeHomePatches(computePmeType);
       if ( node->simParameters->pressureProfileEwaldOn )
         mapComputeHomePatches(computeEwaldType);
     }
 #else
 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
     if (node->simParameters->usePMECUDA) {
       mapComputePatch(computePmeCUDAType);
     } else
 #endif
     {
       mapComputePatch(computePmeType);
     }
     if ( node->simParameters->pressureProfileEwaldOn )
       mapComputeHomePatches(computeEwaldType);
 #endif
   }

   if ( node->simParameters->globalForcesOn ) {
     DebugM(2,"adding ComputeGlobal\n");
     mapComputeHomePatches(computeGlobalType);
   }

   if ( node->simParameters->extForcesOn )
     mapComputeHomePatches(computeExtType);

   if ( node->simParameters->qmForcesOn )
     mapComputeHomePatches(computeQMType);

   if ( node->simParameters->GBISserOn )
     mapComputeHomePatches(computeGBISserType);

   if ( node->simParameters->MsmSerialOn )
     mapComputeHomePatches(computeMsmSerialType);

   if ( node->simParameters->LJPMESerialOn )
     mapComputeHomePatches(computeLjPmeSerialType);
 #ifdef CHARM_HAS_MSA
   else if ( node->simParameters->MSMOn )
     mapComputeHomePatches(computeMsmMsaType);
 #else
   else if ( node->simParameters->MSMOn )
     mapComputeHomePatches(computeMsmType);
 #endif

   if ( node->simParameters->FMMOn )
     mapComputeHomePatches(computeFmmType);

 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
 #ifdef BONDED_CUDA
   if (node->simParameters->bondedCUDA) {
     mapComputeNode(computeBondedCUDAType);
   }
 #endif
 #endif

 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
   mapComputeNode(computeNonbondedCUDA2Type);
   mapComputeHomeTuples(computeExclsType);
   mapComputePatch(computeSelfExclsType);
 #endif

 #ifdef NAMD_MIC
   mapComputeNode(computeNonbondedMICType);
 #endif

   mapComputeNonbonded();

   if ( node->simParameters->LCPOOn ) {
     mapComputeLCPO();
   }

   // If we're doing true pair interactions, no need for bonded terms.
   // But if we're doing within-group interactions, we do need them.
   if ( !node->simParameters->pairInteractionOn ||
       node->simParameters->pairInteractionSelf) {
     mapComputeHomeTuples(computeBondsType);
     mapComputeHomeTuples(computeAnglesType);
     mapComputeHomeTuples(computeDihedralsType);
     mapComputeHomeTuples(computeImpropersType);
     mapComputeHomeTuples(computeCrosstermsType);
     mapComputePatch(computeSelfBondsType);
     mapComputePatch(computeSelfAnglesType);
     mapComputePatch(computeSelfDihedralsType);
     mapComputePatch(computeSelfImpropersType);
     mapComputePatch(computeSelfCrosstermsType);
   }

   if ( node->simParameters->goGroPair ) {
       // JLai
       mapComputeHomeTuples(computeGromacsPairType);
       mapComputePatch(computeSelfGromacsPairType);
     // End of JLai
   }

   if ( node->simParameters->drudeOn ) {
     mapComputeHomeTuples(computeTholeType);
     mapComputePatch(computeSelfTholeType);
     mapComputeHomeTuples(computeAnisoType);
     mapComputePatch(computeSelfAnisoType);
     mapComputeHomeTuples(computeOneFourNbTholeType);
     mapComputePatch(computeSelfOneFourNbTholeType);
   }

   if ( node->simParameters->eFieldOn )
     mapComputePatch(computeEFieldType);
   /* BEGIN gf */
   if ( node->simParameters->mgridforceOn )
     mapComputePatch(computeGridForceType);
   /* END gf */
   if ( node->simParameters->stirOn )
     mapComputePatch(computeStirType);
   if ( node->simParameters->sphericalBCOn )
     mapComputePatch(computeSphericalBCType);
   if ( node->simParameters->cylindricalBCOn )
     mapComputePatch(computeCylindricalBCType);
   if ( node->simParameters->tclBCOn ) {
     mapComputeHomePatches(computeTclBCType);
   }
   if ( node->simParameters->constraintsOn )
     mapComputePatch(computeRestraintsType);
   if ( node->simParameters->consForceOn )
     mapComputePatch(computeConsForceType);
   if ( node->simParameters->consTorqueOn )
     mapComputePatch(computeConsTorqueType);

     // store the latest compute map
   SimParameters *simParams = Node::Object()->simParameters;
   if (simParams->storeComputeMap) {
     computeMap->saveComputeMap(simParams->computeMapFilename);
   }
     // override mapping decision
   if (simParams->loadComputeMap) {
     computeMap->loadComputeMap(simParams->computeMapFilename);
     CkPrintf("ComputeMap has been loaded from %s.\n", simParams->computeMapFilename);
   }
 }

 //----------------------------------------------------------------------
 void WorkDistrib::mapComputeHomeTuples(ComputeType type)
 {
   PatchMap *patchMap = PatchMap::Object();
   ComputeMap *computeMap = ComputeMap::Object();
   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
   Node *node = nd.ckLocalBranch();

   int numNodes = node->numNodes();
   SimParameters *simparam = node->simParameters;
   if(simparam->simulateInitialMapping) {
           numNodes = simparam->simulatedPEs;
   }

   char *isBaseNode = new char[numNodes];
   memset(isBaseNode,0,numNodes*sizeof(char));

   int numPatches = patchMap->numPatches();
   for(int j=0; j<numPatches; j++) {
     isBaseNode[patchMap->basenode(j)] = 1;
   }

   for(int i=0; i<numNodes; i++) {
     if ( isBaseNode[i] ) {
       computeMap->storeCompute(i,0,type);
     }
   }

   delete [] isBaseNode;
 }

 //----------------------------------------------------------------------
 void WorkDistrib::mapComputeHomePatches(ComputeType type)
 {
   PatchMap *patchMap = PatchMap::Object();
   ComputeMap *computeMap = ComputeMap::Object();
   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
   Node *node = nd.ckLocalBranch();

   int numNodes = node->numNodes();
   SimParameters *simparam = node->simParameters;
   if(simparam->simulateInitialMapping) {
           numNodes = simparam->simulatedPEs;
   }

   for(int i=0; i<numNodes; i++) {
     if ( patchMap->numPatchesOnNode(i) ) {
       computeMap->storeCompute(i,0,type);
     }
   }
 }

 //----------------------------------------------------------------------
 void WorkDistrib::mapComputePatch(ComputeType type)
 {
   PatchMap *patchMap = PatchMap::Object();
   ComputeMap *computeMap = ComputeMap::Object();

   PatchID i;
   ComputeID cid;

   for(i=0; i<patchMap->numPatches(); i++)
   {
     cid=computeMap->storeCompute(patchMap->node(i),1,type);
     computeMap->newPid(cid,i);
     patchMap->newCid(i,cid);
   }

 }

 //----------------------------------------------------------------------
 void WorkDistrib::mapComputeNode(ComputeType type)
 {
   PatchMap *patchMap = PatchMap::Object();
   ComputeMap *computeMap = ComputeMap::Object();

   PatchID i;
   ComputeID cid;

   int ncpus = CkNumPes();
   SimParameters *simparam = Node::Object()->simParameters;
   if(simparam->simulateInitialMapping) {
           ncpus = simparam->simulatedPEs;
   }

   for(int i=0; i<ncpus; i++) {
     computeMap->storeCompute(i,0,type);
   }

 }

 //----------------------------------------------------------------------
 void WorkDistrib::mapComputeNonbonded(void)
 {
   // For each patch, create 1 electrostatic object for self-interaction.
   // Then create 1 for each 1-away and 2-away neighbor which has a larger
   // pid.

   PatchMap *patchMap = PatchMap::Object();
   ComputeMap *computeMap = ComputeMap::Object();
   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
   Node *node = nd.ckLocalBranch();
   SimParameters *simParams = Node::Object()->simParameters;
   int ncpus = CkNumPes();
   int nodesize = CkMyNodeSize();
   if(simParams->simulateInitialMapping) {
           ncpus = simParams->simulatedPEs;
           nodesize = simParams->simulatedNodeSize;
   }

   PatchID oneAway[PatchMap::MaxOneOrTwoAway];
   PatchID oneAwayDownstream[PatchMap::MaxOneOrTwoAway];
   int oneAwayTrans[PatchMap::MaxOneOrTwoAway];

   PatchID i;
   ComputeID cid;
   int numNeighbors;
   int j;
   double partScaling = 1.0;
   if ( ncpus < patchMap->numPatches() ) {
     partScaling = ((double)ncpus) / ((double)patchMap->numPatches());
   }

   for(i=0; i<patchMap->numPatches(); i++) // do the self
   {

    int numPartitions = 1;
 #if 0
    if ( simParams->ldBalancer == LDBAL_HYBRID ) {
 #ifdef  MEM_OPT_VERSION
     int64 numFixed = patchMap->numFixedAtoms(i);
     int64 numAtoms = patchMap->numAtoms(i);
 #else
     int64 numFixed = patchMap->patch(i)->getNumFixedAtoms();  // avoid overflow
     int64 numAtoms = patchMap->patch(i)->getNumAtoms();
 #endif

     int divide = node->simParameters->numAtomsSelf;
     if (divide > 0) {
       numPartitions = (int) ( partScaling * ( 0.5 +
         (numAtoms*numAtoms-numFixed*numFixed) / (double)(2*divide*divide) ) );
     }
     if (numPartitions < 1) numPartitions = 1;
     if ( numPartitions > node->simParameters->maxSelfPart )
                         numPartitions = node->simParameters->maxSelfPart;
     // self-interaction
     DebugM(4,"Mapping " << numPartitions << " ComputeNonbondedSelf objects for patch " << i << "\n");
 //    iout <<"Self numPartitions = " <<numPartitions <<" numAtoms " <<numAtoms <<std::endl;
    }
 #endif

     // DMK - NOTE - For MIC builds (i.e. NAMD_MIC is defined), it is assumed that self computes are
     //   mapped to the PE their associated patch is on. If the code below should change, making that
     //   untrue, MIC builds should be special cased so that assumption still holds (or the host vs
     //   device load balancing scheme should be modified).  (See the comment in the function
     //   mic_assignComputes() in ComputeNonbondedMIC.C for more details.)
     for(int partition=0; partition < numPartitions; partition++)
     {
       cid=computeMap->storeCompute(patchMap->node(i),1,
                                    computeNonbondedSelfType,
                                    partition,numPartitions);
       computeMap->newPid(cid,i);
       patchMap->newCid(i,cid);
     }
   }

   for(int p1=0; p1 <patchMap->numPatches(); p1++) // do the pairs
   {
     // this only returns half of neighbors, which is what we want
     numNeighbors=patchMap->oneOrTwoAwayNeighbors(p1,oneAway,oneAwayDownstream,oneAwayTrans);
     for(j=0;j<numNeighbors;j++)
     {
         int p2 = oneAway[j];
         int dsp = oneAwayDownstream[j];

       int numPartitions = 1;
 #if 0
       if ( simParams->ldBalancer == LDBAL_HYBRID ) {
 #ifdef  MEM_OPT_VERSION
         int64 numAtoms1 = patchMap->numAtoms(p1);
         int64 numAtoms2 = patchMap->numAtoms(p2);
         int64 numFixed1 = patchMap->numFixedAtoms(p1);
         int64 numFixed2 = patchMap->numFixedAtoms(p2);
 #else
         int64 numAtoms1 = patchMap->patch(p1)->getNumAtoms();
         int64 numAtoms2 = patchMap->patch(p2)->getNumAtoms();
         int64 numFixed1 = patchMap->patch(p1)->getNumFixedAtoms();
         int64 numFixed2 = patchMap->patch(p2)->getNumFixedAtoms();
 #endif


         const int t2 = oneAwayTrans[j];
         const int adim = patchMap->gridsize_a();
         const int bdim = patchMap->gridsize_b();
         const int cdim = patchMap->gridsize_c();
         const int nax = patchMap->numaway_a();  // 1 or 2
         const int nay = patchMap->numaway_b();  // 1 or 2
         const int naz = patchMap->numaway_c();  // 1 or 2
         const int ia1 = patchMap->index_a(p1);
         const int ia2 = patchMap->index_a(p2) + adim * Lattice::offset_a(t2);
         const int ib1 = patchMap->index_b(p1);
         const int ib2 = patchMap->index_b(p2) + bdim * Lattice::offset_b(t2);
         const int ic1 = patchMap->index_c(p1);
         const int ic2 = patchMap->index_c(p2) + cdim * Lattice::offset_c(t2);

         if ( abs(ia2-ia1) > nax ||
              abs(ib2-ib1) > nay ||
              abs(ic2-ic1) > naz )
           NAMD_bug("Bad patch distance in WorkDistrib::mapComputeNonbonded");

         int distance = 3;
         if ( ia1 == ia2 ) --distance;
         else if ( ia1 == ia2 + nax - 1 ) --distance;
         else if ( ia1 + nax - 1 == ia2 ) --distance;
         if ( ib1 == ib2 ) --distance;
         else if ( ib1 == ib2 + nay - 1 ) --distance;
         else if ( ib1 + nay - 1 == ib2 ) --distance;
         if ( ic1 == ic2 ) --distance;
         else if ( ic1 == ic2 + naz - 1 ) --distance;
         else if ( ic1 + naz - 1 == ic2 ) --distance;
         int divide = 0;
         if ( distance == 0 ) {
           divide = node->simParameters->numAtomsSelf2;
         } else if (distance == 1) {
           divide = node->simParameters->numAtomsPair;
         } else {
           divide = node->simParameters->numAtomsPair2;
         }
         if (divide > 0) {
           numPartitions = (int) ( partScaling * ( 0.5 +
             (numAtoms1*numAtoms2-numFixed1*numFixed2)/(double)(divide*divide) ) );
         }
         if ( numPartitions < 1 ) numPartitions = 1;
         if ( numPartitions > node->simParameters->maxPairPart )
                         numPartitions = node->simParameters->maxPairPart;
 //      if ( numPartitions > 1 ) iout << "Mapping " << numPartitions << " ComputeNonbondedPair objects for patches " << p1 << "(" << numAtoms1 << ") and " << p2 << "(" << numAtoms2 << ")\n" << endi;
       }
 #endif
                 for(int partition=0; partition < numPartitions; partition++)
                 {
                   cid=computeMap->storeCompute( patchMap->basenode(dsp),
                         2,computeNonbondedPairType,partition,numPartitions);
                   computeMap->newPid(cid,p1);
                   computeMap->newPid(cid,p2,oneAwayTrans[j]);
                   patchMap->newCid(p1,cid);
                   patchMap->newCid(p2,cid);
                 }
     }
   }
 }

 //----------------------------------------------------------------------
 void WorkDistrib::mapComputeLCPO(void) {
   //iterate over all needed objects

   PatchMap *patchMap = PatchMap::Object();
   ComputeMap *computeMap = ComputeMap::Object();
   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
   Node *node = nd.ckLocalBranch();
   SimParameters *simParams = Node::Object()->simParameters;
   int ncpus = CkNumPes();
   int nodesize = CkMyNodeSize();
   const int maxPatches = 8;

   int numPatchesInOctet;
   PatchID patchesInOctet[maxPatches];
   int oneAwayTrans[maxPatches];

   //partitioned after 1st timestep
   int numPartitions = 1;

   PatchID i;
   ComputeID cid;

   // one octet per patch
   for(i=0; i<patchMap->numPatches(); i++) {
     numPatchesInOctet =
         patchMap->getPatchesInOctet(i, patchesInOctet, oneAwayTrans);

                 for(int partition=0; partition < numPartitions; partition++) {
       cid=computeMap->storeCompute(patchMap->node(i),
           numPatchesInOctet,
                                   computeLCPOType,
                                   partition,
           numPartitions);
       for (int p = 0; p < numPatchesInOctet; p++) {
         computeMap->newPid(cid, patchesInOctet[p], oneAwayTrans[p]);
       }
       for (int p = 0; p < numPatchesInOctet; p++) {
         patchMap->newCid(patchesInOctet[p],cid);
       }
     } // for partitions
   } // for patches
 } // mapComputeLCPO

 //----------------------------------------------------------------------
 void WorkDistrib::messageEnqueueWork(Compute *compute) {
   LocalWorkMsg *msg = compute->localWorkMsg;
   int seq = compute->sequence();
   int gbisPhase = compute->getGBISPhase();

   if ( seq < 0 ) {
     NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageEnqueueWork");
   } else {
     SET_PRIORITY(msg,seq,compute->priority());
   }

   msg->compute = compute; // pointer is valid since send is to local Pe
   int type = compute->type();
   int cid = compute->cid;

   CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
   switch ( type ) {
   case computeExclsType:
   case computeSelfExclsType:
     wdProxy[CkMyPe()].enqueueExcls(msg);
     break;
   case computeBondsType:
   case computeSelfBondsType:
     wdProxy[CkMyPe()].enqueueBonds(msg);
     break;
   case computeAnglesType:
   case computeSelfAnglesType:
     wdProxy[CkMyPe()].enqueueAngles(msg);
     break;
   case computeDihedralsType:
   case computeSelfDihedralsType:
     wdProxy[CkMyPe()].enqueueDihedrals(msg);
     break;
   case computeImpropersType:
   case computeSelfImpropersType:
     wdProxy[CkMyPe()].enqueueImpropers(msg);
     break;
   case computeTholeType:
   case computeSelfTholeType:
     wdProxy[CkMyPe()].enqueueThole(msg);
     break;
   case computeAnisoType:
   case computeSelfAnisoType:
     wdProxy[CkMyPe()].enqueueAniso(msg);
     break;
   case computeCrosstermsType:
   case computeSelfCrosstermsType:
     wdProxy[CkMyPe()].enqueueCrossterms(msg);
     break;
   case computeOneFourNbTholeType:
   case computeSelfOneFourNbTholeType:
     wdProxy[CkMyPe()].enqueueOneFourNbThole(msg);
     break;
   // JLai
   case computeGromacsPairType:
   case computeSelfGromacsPairType:
     wdProxy[CkMyPe()].enqueueGromacsPair(msg);
     break;
   // End of JLai
   case computeLCPOType:
     wdProxy[CkMyPe()].enqueueLCPO(msg);
     break;
   case computeNonbondedSelfType:
     switch ( seq % 2 ) {
     case 0:
       //wdProxy[CkMyPe()].enqueueSelfA(msg);
       switch ( gbisPhase ) {
          case 1:
            wdProxy[CkMyPe()].enqueueSelfA1(msg);
            break;
          case 2:
            wdProxy[CkMyPe()].enqueueSelfA2(msg);
            break;
          case 3:
            wdProxy[CkMyPe()].enqueueSelfA3(msg);
            break;
       }
       break;
     case 1:
       //wdProxy[CkMyPe()].enqueueSelfB(msg);
       switch ( gbisPhase ) {
          case 1:
            wdProxy[CkMyPe()].enqueueSelfB1(msg);
            break;
          case 2:
            wdProxy[CkMyPe()].enqueueSelfB2(msg);
            break;
          case 3:
            wdProxy[CkMyPe()].enqueueSelfB3(msg);
            break;
       }
       break;
     default:
       NAMD_bug("WorkDistrib::messageEnqueueSelf case statement error!");
     }
     break;
   case computeNonbondedPairType:
     switch ( seq % 2 ) {
     case 0:
       //wdProxy[CkMyPe()].enqueueWorkA(msg);
       switch ( gbisPhase ) {
          case 1:
            wdProxy[CkMyPe()].enqueueWorkA1(msg);
            break;
          case 2:
            wdProxy[CkMyPe()].enqueueWorkA2(msg);
            break;
          case 3:
            wdProxy[CkMyPe()].enqueueWorkA3(msg);
            break;
       }
       break;
     case 1:
       //wdProxy[CkMyPe()].enqueueWorkB(msg);
       switch ( gbisPhase ) {
          case 1:
            wdProxy[CkMyPe()].enqueueWorkB1(msg);
            break;
          case 2:
            wdProxy[CkMyPe()].enqueueWorkB2(msg);
            break;
          case 3:
            wdProxy[CkMyPe()].enqueueWorkB3(msg);
            break;
       }
       break;
     case 2:
       wdProxy[CkMyPe()].enqueueWorkC(msg);
       break;
     default:
       NAMD_bug("WorkDistrib::messageEnqueueWork case statement error!");
     }
     break;
 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
   case computeNonbondedCUDA2Type:
 //     CkPrintf("WorkDistrib[%d]::CUDA seq=%d phase=%d\n", CkMyPe(), seq, gbisPhase);
     //wdProxy[CkMyPe()].enqueueCUDA(msg);
     switch ( gbisPhase ) {
        case 1:
          wdProxy[CkMyPe()].enqueueCUDA(msg);
          break;
        case 2:
          wdProxy[CkMyPe()].enqueueCUDAP2(msg);
          break;
        case 3:
          wdProxy[CkMyPe()].enqueueCUDAP3(msg);
          break;
     }
 #else
     msg->compute->doWork();  MACHINE_PROGRESS
 #endif
     break;
   case computeNonbondedMICType:
 #ifdef NAMD_MIC
     wdProxy[CkMyPe()].enqueueMIC(msg);
 #endif
     break;
   case computePmeType:
     // CkPrintf("PME %d %d %x\n", CkMyPe(), seq, compute->priority());
     wdProxy[CkMyPe()].enqueuePme(msg);
     break;
 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
   case computePmeCUDAType:
     wdProxy[CkMyPe()].enqueuePme(msg);
     break;
 #endif
   default:
     wdProxy[CkMyPe()].enqueueWork(msg);
   }
 }

 //----------------------------------------------------------------------
 void WorkDistrib::messageFinishCUDA(Compute *compute) {
   LocalWorkMsg *msg = compute->localWorkMsg;
   int seq = compute->sequence();
   int gbisPhase = compute->getGBISPhase();

   if ( seq < 0 ) {
     NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageEnqueueWork");
   } else {
     SET_PRIORITY(msg,seq,compute->priority());
   }

   msg->compute = compute; // pointer is valid since send is to local Pe
   CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);

 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
     //wdProxy[CkMyPe()].finishCUDA(msg);
     switch ( gbisPhase ) {
        case 1:
          wdProxy[CkMyPe()].finishCUDA(msg);
          break;
        case 2:
          wdProxy[CkMyPe()].finishCUDAP2(msg);
          break;
        case 3:
          wdProxy[CkMyPe()].finishCUDAP3(msg);
          break;
     }
 #else
     msg->compute->doWork();  MACHINE_PROGRESS
 #endif
 }

 //----------------------------------------------------------------------
 void WorkDistrib::messageFinishMIC(Compute *compute) {
   LocalWorkMsg *msg = compute->localWorkMsg;
   int seq = compute->sequence();
   int gbisPhase = compute->getGBISPhase();

   if ( seq < 0 ) {
     NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageFinishMIC");
   } else {
     SET_PRIORITY(msg,seq,compute->priority());
   }

   msg->compute = compute; // pointer is valid since send is to local Pe
   CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);

 #ifdef NAMD_MIC
     wdProxy[CkMyPe()].finishMIC(msg);
 #else
     msg->compute->doWork();  MACHINE_PROGRESS
 #endif
 }

 void WorkDistrib::enqueueWork(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }

 void WorkDistrib::enqueueExcls(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }

 void WorkDistrib::enqueueBonds(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }

 void WorkDistrib::enqueueAngles(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }

 void WorkDistrib::enqueueDihedrals(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }

 void WorkDistrib::enqueueImpropers(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }

 void WorkDistrib::enqueueThole(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }

 void WorkDistrib::enqueueAniso(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }

 void WorkDistrib::enqueueCrossterms(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }

 void WorkDistrib::enqueueOneFourNbThole(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }

 // JLai
 void WorkDistrib::enqueueGromacsPair(LocalWorkMsg *msg) {
   msg->compute->doWork();
   MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("\nWorkDistrib LocalWorkMsg recycling failed! Check enqueueGromacsPair from WorkDistrib.C\n");
 }
 // End of JLai

 void WorkDistrib::enqueuePme(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }

 void WorkDistrib::enqueueLCPO(LocalWorkMsg *msg) {
   msg->compute->doWork();
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }
 void WorkDistrib::enqueueSelfA1(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }
 void WorkDistrib::enqueueSelfA2(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }
 void WorkDistrib::enqueueSelfA3(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }

 void WorkDistrib::enqueueSelfB1(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }
 void WorkDistrib::enqueueSelfB2(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }
 void WorkDistrib::enqueueSelfB3(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }

 void WorkDistrib::enqueueWorkA1(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }
 void WorkDistrib::enqueueWorkA2(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }
 void WorkDistrib::enqueueWorkA3(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }

 void WorkDistrib::enqueueWorkB1(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }
 void WorkDistrib::enqueueWorkB2(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }
 void WorkDistrib::enqueueWorkB3(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }


 void WorkDistrib::enqueueWorkC(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
   if ( msg->compute->localWorkMsg != msg )
     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
 }

 void WorkDistrib::enqueueCUDA(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
 }
 void WorkDistrib::enqueueCUDAP2(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
 }
 void WorkDistrib::enqueueCUDAP3(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
 }

 void WorkDistrib::finishCUDAPatch(FinishWorkMsg *msg) {
   msg->compute->finishPatch(msg->data);
 }

 void WorkDistrib::finishCUDA(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
 }
 void WorkDistrib::finishCUDAP2(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
 }
 void WorkDistrib::finishCUDAP3(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
 }

 void WorkDistrib::enqueueMIC(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
 }
 void WorkDistrib::finishMIC(LocalWorkMsg *msg) {
   msg->compute->doWork();  MACHINE_PROGRESS
 }


 //**********************************************************************
 //
 //                      FUNCTION velocities_from_PDB
 //
 //   INPUTS:
 //      v - Array of vectors to populate
 //      filename - name of the PDB filename to read in
 //
 //      This function reads in a set of initial velocities from a
 //      PDB file.  It places the velocities into the array of Vectors
 //      passed to it.
 //
 //***********************************************************************/

 void WorkDistrib::velocities_from_PDB(const char *filename,
                                       Vector *v, int totalAtoms)
 {
   PDB *v_pdb;           //  PDB info from velocity PDB
   int i;

   //  Read the PDB
   v_pdb = new PDB(filename);
   if ( v_pdb == NULL )
   {
     NAMD_die("memory allocation failed in Node::velocities_from_PDB");
   }

   //  Make sure the number of velocities read in matches
   //  the number of atoms we have
   if (v_pdb->num_atoms() != totalAtoms)
   {
     char err_msg[129];

     sprintf(err_msg, "FOUND %d COORDINATES IN VELOCITY PDB!!",
             v_pdb->num_atoms());

     NAMD_die(err_msg);
   }

   //  Get the entire list of atom info and loop through
   //  them assigning the velocity vector for each one
   v_pdb->get_all_positions(v);

   for (i=0; i<totalAtoms; i++)
   {
     v[i].x *= PDBVELINVFACTOR;
     v[i].y *= PDBVELINVFACTOR;
     v[i].z *= PDBVELINVFACTOR;
   }

   delete v_pdb;
 }
 //              END OF FUNCTION velocities_from_PDB

 //**********************************************************************
 //
 //                      FUNCTION velocities_from_binfile
 //
 //    INPUTS:
 //      fname - File name to write velocities to
 //      n - Number of atoms in system
 //      vels - Array of velocity vectors
 //
 //      This function writes out the velocities in binary format.  This is
 //     done to preserve accuracy between restarts of namd.
 //
 //**********************************************************************

 void WorkDistrib::velocities_from_binfile(const char *fname, Vector *vels, int n)
 {
   read_binary_file(fname,vels,n);
 }
 //               END OF FUNCTION velocities_from_binfile

 //**********************************************************************
 //
 //                      FUNCTION random_velocities
 //
 //   INPUTS:
 //      v - array of vectors to populate
 //      Temp - Temperature to acheive
 //
 //      This function assigns a random velocity distribution to a
 //   simulation to achieve a desired initial temperature.  The method
 //   used here was stolen from the program X-PLOR.
 //
 //**********************************************************************

 void WorkDistrib::random_velocities(BigReal Temp,Molecule *structure,
                                     Vector *v, int totalAtoms)
 {
   int i, j;             //  Loop counter
   BigReal kbT;          //  Boltzman constant * Temp
   BigReal randnum;      //  Random number from -6.0 to 6.0
   BigReal kbToverM;     //  sqrt(Kb*Temp/Mass)
   SimParameters *simParams = Node::Object()->simParameters;
   Bool lesOn = simParams->lesOn;
   Random vel_random(simParams->randomSeed);

   int lesReduceTemp = lesOn && simParams->lesReduceTemp;
   BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;

   kbT = Temp*BOLTZMANN;

   //  Loop through all the atoms and assign velocities in
   //  the x, y and z directions for each one
   for (i=0; i<totalAtoms; i++)
   {
     if (structure->atommass(i) <= 0.) {
       kbToverM = 0.;
     } else {
       kbToverM = sqrt(kbT *
         ( lesOn && structure->get_fep_type(i) ? tempFactor : 1.0 ) /
                           structure->atommass(i) );
     }

     //  The following comment was stolen from X-PLOR where
     //  the following section of code was adapted from.

     //  This section generates a Gaussian random
     //  deviate of 0.0 mean and standard deviation RFD for
     //  each of the three spatial dimensions.
     //  The algorithm is a "sum of uniform deviates algorithm"
     //  which may be found in Abramowitz and Stegun,
     //  "Handbook of Mathematical Functions", pg 952.
     for (randnum=0.0, j=0; j<12; j++)
     {
       randnum += vel_random.uniform();
     }

     randnum -= 6.0;

     v[i].x = randnum*kbToverM;

     for (randnum=0.0, j=0; j<12; j++)
     {
       randnum += vel_random.uniform();
     }

     randnum -= 6.0;

     v[i].y = randnum*kbToverM;

     for (randnum=0.0, j=0; j<12; j++)
     {
       randnum += vel_random.uniform();
     }

     randnum -= 6.0;

     v[i].z = randnum*kbToverM;
   }

   if ( simParams->drudeOn ) for (i=0; i<totalAtoms; i++) {
     if ( structure->is_drude(i) ) {
       v[i] = v[structure->get_mother_atom(i)];  // zero is good enough
     }
   }
 }
 /*                      END OF FUNCTION random_velocities               */

 //**********************************************************************
 //
 //                      FUNCTION remove_com_motion
 //
 //   INPUTS:
 //      vel - Array of initial velocity vectors
 //
 //      This function removes the center of mass motion from a molecule.
 //
 //**********************************************************************

 void WorkDistrib::remove_com_motion(Vector *vel, Molecule *structure, int n)
 {
   Vector mv(0,0,0);             //  Sum of (mv)_i
   BigReal totalMass=0;  //  Total mass of system
   int i;                        //  Loop counter

   //  Loop through and compute the net momentum
   for (i=0; i<n; i++)
   {
     BigReal mass = structure->atommass(i);
     mv += mass * vel[i];
     totalMass += mass;
   }

   mv /= totalMass;

   iout << iINFO << "REMOVING COM VELOCITY "
         << ( PDBVELFACTOR * mv ) << "\n" << endi;

   for (i=0; i<n; i++) { vel[i] -= mv; }

 }
 /*                      END OF FUNCTION remove_com_motion               */

 #if USE_TOPOMAP

 //Specifically designed for BGL and other 3d Tori architectures
 //Partition Torus and Patch grid together using recursive bisection.
 int WorkDistrib::assignPatchesTopoGridRecBisection() {

   PatchMap *patchMap = PatchMap::Object();
   int *assignedNode = new int[patchMap->numPatches()];
   int numNodes = Node::Object()->numNodes();
   SimParameters *simParams = Node::Object()->simParameters;
   if(simParams->simulateInitialMapping) {
           numNodes = simParams->simulatedPEs;
   }

   int usedNodes = numNodes;
   CkPrintf("assignPatchesTopoGridRecBisection\n");
   if ( simParams->noPatchesOnZero && numNodes > 1 ) {
     usedNodes -= 1;
     if ( simParams->noPatchesOnOne && numNodes > 2 )
       usedNodes -= 1;
   }
   RecBisection recBisec(patchMap->numPatches(), PatchMap::Object());

   int xsize = 0, ysize = 0, zsize = 0;

   // Right now assumes a T*** (e.g. TXYZ) mapping
   TopoManager tmgr;
   xsize = tmgr.getDimNX();
   ysize = tmgr.getDimNY();
   zsize = tmgr.getDimNZ();

   //Fix to not assign patches to processor 0
   int rc = recBisec.partitionProcGrid(xsize, ysize, zsize, assignedNode);

   delete [] assignedNode;

   return rc;
 }
 #endif


 #if defined(NAMD_MIC)
   extern void mic_hostDeviceLDB();
   extern void mic_contributeHostDeviceLDB(int idLen, int * id);
   extern void mic_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2);
 #endif

 void WorkDistrib::send_initHostDeviceLDB() {
   #if defined(NAMD_MIC)
     CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
     wdProxy.initHostDeviceLDB();
   #endif
 }

 void WorkDistrib::initHostDeviceLDB() {
   #if defined(NAMD_MIC)
     mic_hostDeviceLDB();
   #endif
 }

 void WorkDistrib::send_contributeHostDeviceLDB(int peSetLen, int * peSet) {
   #if defined(NAMD_MIC)
     CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
     wdProxy[0].contributeHostDeviceLDB(peSetLen, peSet);
   #endif
 }

 void WorkDistrib::contributeHostDeviceLDB(int peSetLen, int * peSet) {
   #if defined(NAMD_MIC)
     mic_contributeHostDeviceLDB(peSetLen, peSet);
   #endif
 }

 void WorkDistrib::send_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2) {
   #if defined(NAMD_MIC)
     CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
     wdProxy.setDeviceLDBParams(dt, hs, sp1, pp1, pp2);
   #endif
 }

 void WorkDistrib::setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2) {
   #if defined(NAMD_MIC)
     mic_setDeviceLDBParams(dt, hs, sp1, pp1, pp2);
   #endif
 }


 #include "WorkDistrib.def.h"

Node::Object
static Node * Object()
Definition: Node.h:86

TopoManagerWrapper::pe_sortop_topo
Definition: WorkDistrib.C:1977

Molecule::atomcharge
Real atomcharge(int anum) const
Definition: Molecule.h:1124

LDBAL_HYBRID
#define LDBAL_HYBRID
Definition: SimParameters.h:66

nodesort::c_total
int c_total
Definition: WorkDistrib.C:1622

nodesort::npatches
int npatches
Definition: WorkDistrib.C:1623

computeMsmMsaType
Definition: ComputeMap.h:71

patch_sortop_curve_a::pmap
PatchMap * pmap
Definition: WorkDistrib.C:2047

patch_sortop_curve_a::operator()
bool operator()(int p1, int p2) const
Definition: WorkDistrib.C:2049

WorkDistrib::setDeviceLDBParams
void setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2)
Definition: WorkDistrib.C:3552

PatchMgr.h

WorkDistrib::enqueueMIC
void enqueueMIC(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3268

FullAtom::langevinParam
Real langevinParam
Definition: NamdTypes.h:220

computeSelfBondsType
Definition: ComputeMap.h:39

WorkDistrib::enqueueOneFourNbThole
void enqueueOneFourNbThole(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3146

iINFO
std::ostream & iINFO(std::ostream &s)
Definition: InfoStream.C:81

WorkDistrib::sortPmePes
static void sortPmePes(int *pmepes, int xdim, int ydim)
Definition: WorkDistrib.C:307

Molecule::get_mother_atom
int get_mother_atom(int) const

PatchMap::center
ScaledPosition center(int pid) const
Definition: PatchMap.h:99

SimParameters::simulateInitialMapping
Bool simulateInitialMapping
Definition: SimParameters.h:227

WorkDistrib::messageFinishMIC
static void messageFinishMIC(Compute *)
Definition: WorkDistrib.C:3071

pe_sortop_bit_reversed::operator()
bool operator()(int a, int b) const
Definition: WorkDistrib.C:148

Patch::getNumAtoms
int getNumAtoms() const
Definition: Patch.h:105

SimParameters::isSendSpanningTreeUnset
int isSendSpanningTreeUnset()
Definition: SimParameters.h:1223

patch_sortop_curve_b::patch_sortop_curve_b
patch_sortop_curve_b(PatchMap *m)
Definition: WorkDistrib.C:2069

WorkDistrib::enqueueAngles
void enqueueAngles(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3110

WorkDistrib::messageFinishCUDA
static void messageFinishCUDA(Compute *)
Definition: WorkDistrib.C:3038

SimParameters::stirOn
Bool stirOn
Definition: SimParameters.h:988

MOStream::end
void end(void)
Definition: MStream.C:176

ConfigList.h

pe_sortop_coord_y
Definition: WorkDistrib.C:267

Patch::getNumFixedAtoms
int getNumFixedAtoms() const
Definition: Patch.h:112

PDB
Definition: PDB.h:36

Compute::sequence
int sequence(void)
Definition: Compute.h:64

ResizeArray::size
int size(void) const
Definition: ResizeArray.h:131

SimParameters::numoutputprocs
int numoutputprocs
Definition: SimParameters.h:1172

computeDihedralsType
Definition: ComputeMap.h:28

PatchMap::assignToPatch
PatchID assignToPatch(Position p, const Lattice &l)
Definition: PatchMap.inl:14

ComputeMap::setNewNumPartitions
void setNewNumPartitions(ComputeID cid, char numPartitions)
Definition: ComputeMap.h:146

varsizemsg.h

ProxyMgr::setRecvSpanning
void setRecvSpanning()
Definition: ProxyMgr.C:370

less_than_bit_reversed
static bool less_than_bit_reversed(int a, int b)
Definition: WorkDistrib.C:136

SimParameters::consTorqueOn
Bool consTorqueOn
Definition: SimParameters.h:421

computeSelfExclsType
Definition: ComputeMap.h:38

recursive_bisect_with_curve
static void recursive_bisect_with_curve(int *patch_begin, int *patch_end, int *node_begin, int *node_end, double *patchLoads, double *sortedLoads, int *assignedNode, TopoManagerWrapper &tmgr)
Definition: WorkDistrib.C:2109

ComputeMap::numComputes
int numComputes(void)
Definition: ComputeMap.h:103

Communicate.h

nodesort::node
int node
Definition: WorkDistrib.C:1619

computeTholeType
Definition: ComputeMap.h:30

SimParameters::langevinHydrogen
Bool langevinHydrogen
Definition: SimParameters.h:663

Compute
Definition: Compute.h:28

computeSelfOneFourNbTholeType
Definition: ComputeMap.h:46

ComputeMap::saveComputeMap
void saveComputeMap(const char *fname)
Definition: ComputeMap.C:260

ProxyMgr::Object
static ProxyMgr * Object()
Definition: ProxyMgr.h:394

TopoManagerWrapper::d_rot
int d_rot
Definition: WorkDistrib.C:1760

Lattice::c_p
NAMD_HOST_DEVICE int c_p() const
Definition: Lattice.h:291

BOLTZMANN
#define BOLTZMANN
Definition: common.h:54

SimParameters::twoAwayX
int twoAwayX
Definition: SimParameters.h:241

computeNonbondedPairType
Definition: ComputeMap.h:23

Node
Definition: Node.h:78

WorkDistrib::peCompactOrdering
static int * peCompactOrdering
Definition: WorkDistrib.h:118

computeLjPmeSerialType
Definition: ComputeMap.h:70

TopoManagerWrapper::b_mod
int b_mod
Definition: WorkDistrib.C:1761

Debug.h

ComputeID
int32 ComputeID
Definition: NamdTypes.h:288

PatchMap::max_a
BigReal max_a(int pid) const
Definition: PatchMap.h:92

WorkDistrib::finishCUDAP3
void finishCUDAP3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3264

ComputeMap::initPtrs
void initPtrs()
Definition: ComputeMap.C:80

WorkDistrib::enqueueCrossterms
void enqueueCrossterms(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3140

FullAtom::fixedPosition
Position fixedPosition
Definition: NamdTypes.h:212

patch_sortop_curve_b::operator()
bool operator()(int p1, int p2) const
Definition: WorkDistrib.C:2070

SimParameters
Definition: SimParameters.h:139

partition
static void partition(int *order, const FullAtom *atoms, int begin, int end)
Definition: SortAtoms.C:45

SimParameters::isRecvSpanningTreeUnset
int isRecvSpanningTreeUnset()
Definition: SimParameters.h:1225

WorkDistrib::enqueuePme
void enqueuePme(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3161

PatchMap::Object
static PatchMap * Object()
Definition: PatchMap.h:27

computeFullDirectType
Definition: ComputeMap.h:62

SimParameters::MsmSerialOn
Bool MsmSerialOn
Definition: SimParameters.h:870

computeLCPOType
Definition: ComputeMap.h:67

WorkDistrib::enqueueWorkA3
void enqueueWorkA3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3214

ComputeType
ComputeType
Definition: ComputeMap.h:20

WorkDistrib::enqueueWork
void enqueueWork(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3092

ComputeMapChangeMsg
Definition: WorkDistrib.C:69

SimParameters::qmForcesOn
Bool qmForcesOn
Definition: SimParameters.h:549

WorkDistrib::enqueueGromacsPair
void enqueueGromacsPair(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3153

computeCrosstermsType
Definition: ComputeMap.h:32

WorkDistrib::enqueueSelfA1
void enqueueSelfA1(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3172

WorkDistrib::finishCUDAP2
void finishCUDAP2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3261

Vector
Definition: Vector.h:72

DrudeAtom
#define DrudeAtom
Definition: structures.h:23

WorkDistrib::send_contributeHostDeviceLDB
static void send_contributeHostDeviceLDB(int peSetLen, int *peSet)
Definition: WorkDistrib.C:3532

HydrogenAtom
#define HydrogenAtom
Definition: structures.h:16

Node::simParameters
SimParameters * simParameters
Definition: Node.h:181

SimParameters::nonbondedScaling
BigReal nonbondedScaling
Definition: SimParameters.h:273

ComputeMap::loadComputeMap
void loadComputeMap(const char *fname)
Definition: ComputeMap.C:276

SimParameters::CUDASOAintegrateMode
Bool CUDASOAintegrateMode
Definition: SimParameters.h:163

Node.h

SimParameters::useSettle
Bool useSettle
Definition: SimParameters.h:1058

computeNonbondedSelfType
Definition: ComputeMap.h:22

CompAtomExt::dispcoef
DispCoef dispcoef
Definition: NamdTypes.h:151

PatchMgr::createHomePatch
void createHomePatch(PatchID pid, FullAtomList &a)
Definition: PatchMgr.C:74

ProxyMgr::setSendSpanning
void setSendSpanning()
Definition: ProxyMgr.C:361

ComputeMapChangeMsg::numNewNumPartitions
int numNewNumPartitions
Definition: WorkDistrib.C:74

PatchMgr::sendAtoms
void sendAtoms(PatchID pid, FullAtomList &a)
Definition: PatchMgr.C:157

TopoManagerWrapper::tmgr
TopoManager tmgr
Definition: WorkDistrib.C:1758

Real
float Real
Definition: common.h:118

WorkDistrib::enqueueExcls
void enqueueExcls(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3098

DebugM
#define DebugM(x, y)
Definition: Debug.h:75

patch_sortop_curve_b
Definition: WorkDistrib.C:2067

InfoStream.h

computeAnisoType
Definition: ComputeMap.h:31

WorkDistrib::enqueueBonds
void enqueueBonds(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3104

endi
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54

computeNonbondedCUDA2Type
Definition: ComputeMap.h:56

ALLBUTME
#define ALLBUTME
Definition: Communicate.h:14

Vector::z
BigReal z
Definition: Vector.h:74

WorkDistrib::enqueueAniso
void enqueueAniso(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3134

SimParameters::alchOn
Bool alchOn
Definition: SimParameters.h:471

PatchMap::packSize
int packSize(void)
Definition: PatchMap.C:314

WorkDistrib::enqueueSelfB1
void enqueueSelfB1(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3188

PDB.h

FALSE
#define FALSE
Definition: common.h:127

SimParameters::numAtomsSelf
int numAtomsSelf
Definition: SimParameters.h:1069

CompAtom::position
Position position
Definition: NamdTypes.h:78

SimParameters::constraintsOn
Bool constraintsOn
Definition: SimParameters.h:337

computeQMType
Definition: ComputeMap.h:65

WorkDistrib::enqueueWorkB1
void enqueueWorkB1(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3220

WorkDistrib::messageEnqueueWork
static void messageEnqueueWork(Compute *)
Definition: WorkDistrib.C:2866

WorkDistrib::peOrderingReady
static void peOrderingReady()
Definition: WorkDistrib.C:173

iWARN
std::ostream & iWARN(std::ostream &s)
Definition: InfoStream.C:82

computeBondsType
Definition: ComputeMap.h:26

nodesort::operator==
int operator==(const nodesort &o) const
Definition: WorkDistrib.C:1625

MIStream::get
MIStream * get(char &data)
Definition: MStream.h:29

TopoManagerWrapper::e_dim
int e_dim
Definition: WorkDistrib.C:1759

StringList
Definition: ConfigList.h:46

computeStirType
Definition: ComputeMap.h:77

PatchMap::index_a
int index_a(int pid) const
Definition: PatchMap.h:86

iout
#define iout
Definition: InfoStream.h:51

HydrogenGroupID
Definition: Hydrogen.h:14

TopoManagerWrapper::d_dim
int d_dim
Definition: WorkDistrib.C:1759

PatchMap::sizeGrid
int sizeGrid(ScaledPosition xmin, ScaledPosition xmax, const Lattice &lattice, BigReal patchSize, double maxNumPatches, int staticAtomAssignment, int asplit, int bsplit, int csplit)
Definition: PatchMap.C:62

SimParameters::sphericalBCOn
Bool sphericalBCOn
Definition: SimParameters.h:957

FullAtom::velocity
Velocity velocity
Definition: NamdTypes.h:211

ComputeMap::storeCompute
ComputeID storeCompute(int node, int maxPids, ComputeType type, int partition=-1, int numPartitions=0)
Definition: ComputeMap.C:151

PDB::num_atoms
int num_atoms(void)
Definition: PDB.C:323

PatchMap::patch
Patch * patch(PatchID pid)
Definition: PatchMap.h:244

SimParameters::twoAwayY
int twoAwayY
Definition: SimParameters.h:242

nodesort::a_total
int a_total
Definition: WorkDistrib.C:1620

SimParameters::mgridforceOn
Bool mgridforceOn
Definition: SimParameters.h:352

NamdOneTools.h

WorkDistrib::enqueueSelfA3
void enqueueSelfA3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3182

CompAtomExt::groupFixed
uint32 groupFixed
Definition: NamdTypes.h:163

computeSelfCrosstermsType
Definition: ComputeMap.h:45

ResizeArray::add
int add(const Elem &elem)
Definition: ResizeArray.h:101

computeSelfAnisoType
Definition: ComputeMap.h:44

ComputeMsmMsa.h

SimParameters::fullDirectOn
Bool fullDirectOn
Definition: SimParameters.h:831

TopoManagerWrapper::pe_sortop_topo::operator()
bool operator()(int pe1, int pe2) const
Definition: WorkDistrib.C:1981

SimParameters::pairInteractionOn
Bool pairInteractionOn
Definition: SimParameters.h:630

Molecule
Molecule stores the structural information for the system.
Definition: Molecule.h:174

computeConsForceType
Definition: ComputeMap.h:82

MIStream
Definition: MStream.h:16

Lattice::b_p
NAMD_HOST_DEVICE int b_p() const
Definition: Lattice.h:290

TopoManagerWrapper::e_rot
int e_rot
Definition: WorkDistrib.C:1760

PatchMgr::movePatch
void movePatch(PatchID, NodeID)
Definition: PatchMgr.C:84

Compute::getGBISPhase
int getGBISPhase(void)
Definition: Compute.h:66

Compute::localWorkMsg
LocalWorkMsg *const localWorkMsg
Definition: Compute.h:46

WorkDistrib::patchMapInit
void patchMapInit(void)
Definition: WorkDistrib.C:1238

computeSelfImpropersType
Definition: ComputeMap.h:42

WorkDistrib::recvComputeMapChanges
void recvComputeMapChanges(ComputeMapChangeMsg *)
Definition: WorkDistrib.C:377

SortAtoms.h

computeSelfAnglesType
Definition: ComputeMap.h:40

ComputeMap::allocateCids
int allocateCids()
Definition: ComputeMap.C:141

ComputeMap.h

SimParameters::globalForcesOn
Bool globalForcesOn
Definition: SimParameters.h:636

computeGromacsPairType
Definition: ComputeMap.h:35

SimParameters::cylindricalBCOn
Bool cylindricalBCOn
Definition: SimParameters.h:969

computeExtType
Definition: ComputeMap.h:64

HydrogenGroupID::atomsInGroup
int atomsInGroup
Definition: Hydrogen.h:19

TopoManagerWrapper::d_mod
int d_mod
Definition: WorkDistrib.C:1761

SimParameters::watmodel
WaterModel watmodel
Definition: SimParameters.h:183

MACHINE_PROGRESS
#define MACHINE_PROGRESS

SimParameters::lattice
Lattice lattice
Definition: SimParameters.h:195

main.h

PatchMap::gridsize_c
int gridsize_c(void) const
Definition: PatchMap.h:66

WorkDistrib.h

CompAtomExt::id
uint32 id
Definition: NamdTypes.h:160

FinishWorkMsg::compute
Compute * compute
Definition: WorkDistrib.h:33

PatchMapMsg::patchMapData
char * patchMapData
Definition: WorkDistrib.C:1108

ComputeMap::newNumPartitions
char newNumPartitions(ComputeID cid)
Definition: ComputeMap.h:143

ComputeMap
Definition: ComputeMap.h:87

PatchMap::unpack
void unpack(char *buf)
Definition: PatchMap.C:365

CompAtom::charge
Charge charge
Definition: NamdTypes.h:79

SimParameters::eFieldOn
Bool eFieldOn
Definition: SimParameters.h:982

ComputeOneFourNbTholes.h

Compute::doWork
virtual void doWork()
Definition: Compute.C:120

Random::reorder
void reorder(Elem *a, int n)
Definition: Random.h:234

TopoManagerWrapper
Definition: WorkDistrib.C:1757

Molecule::hydrogenGroup
HydrogenGroup hydrogenGroup
Definition: Molecule.h:676

SimParameters::GBISserOn
Bool GBISserOn
Definition: SimParameters.h:607

WorkDistrib::enqueueCUDA
void enqueueCUDA(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3244

WorkDistrib::sendComputeMap
void sendComputeMap(void)
Definition: WorkDistrib.C:1211

FinishWorkMsg
Definition: WorkDistrib.h:30

Molecule.h

WorkDistrib::enqueueWorkB2
void enqueueWorkB2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3225

SimParameters::noPatchesOnOne
Bool noPatchesOnOne
Definition: SimParameters.h:251

read_binary_file
void read_binary_file(const char *fname, Vector *data, int n)
Definition: NamdOneTools.C:52

Node::numNodes
int numNodes()
Definition: Node.h:192

WorkDistrib::enqueueCUDAP2
void enqueueCUDAP2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3247

PatchMap::assignBaseNode
void assignBaseNode(PatchID, NodeID)
Definition: PatchMap.C:472

computeCylindricalBCType
Definition: ComputeMap.h:79

recursive_bisect_coord
static void recursive_bisect_coord(int x_begin, int x_end, int y_begin, int y_end, int *pe_begin, ScaledPosition *coord, int *result, int ydim)
Definition: WorkDistrib.C:275

PatchMap::newCid
void newCid(int pid, int cid)
Definition: PatchMap.C:512

SimParameters::FMAOn
Bool FMAOn
Definition: SimParameters.h:825

getWaterModelGroupSize
constexpr int getWaterModelGroupSize(const WaterModel &watmodel)
Definition: common.h:228

WorkDistrib::enqueueSelfB3
void enqueueSelfB3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3198

TopoManagerWrapper::coord
int coord(int pe, int dim)
Definition: WorkDistrib.C:1972

PatchMap::gridsize_a
int gridsize_a(void) const
Definition: PatchMap.h:64

order
#define order
Definition: PmeRealSpace.C:235

SimParameters::PMEOn
Bool PMEOn
Definition: SimParameters.h:892

TopoManagerWrapper::pe_sortop_topo::tmgr
TopoManagerWrapper & tmgr
Definition: WorkDistrib.C:1978

Random
Definition: Random.h:37

patch_sortop_curve_a
Definition: WorkDistrib.C:2046

PatchMap::numPatches
int numPatches(void) const
Definition: PatchMap.h:59

SimParameters::LJPMESerialOn
Bool LJPMESerialOn
Definition: SimParameters.h:888

ComputePmeCUDAMgr.h

Lattice::offset_b
static NAMD_HOST_DEVICE int offset_b(int i)
Definition: Lattice.h:264

WorkDistrib::enqueueWorkC
void enqueueWorkC(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3238

pe_sortop_bit_reversed::pe_sortop_bit_reversed
pe_sortop_bit_reversed(int *r)
Definition: WorkDistrib.C:147

SimParameters::MSMOn
Bool MSMOn
Definition: SimParameters.h:834

computeSphericalBCType
Definition: ComputeMap.h:78

patch_sortop_curve_c
Definition: WorkDistrib.C:2088

WorkDistrib::reinitAtoms
void reinitAtoms(const char *basename=0)
Definition: WorkDistrib.C:1085

nodesort::operator<
int operator<(const nodesort &o) const
Definition: WorkDistrib.C:1634

PatchMap.inl

atom_constants::status
int32 status
Definition: structures.h:42

WorkDistrib::enqueueThole
void enqueueThole(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3128

WorkDistrib::enqueueWorkA2
void enqueueWorkA2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3209

Lattice::apply_transform
NAMD_HOST_DEVICE Position apply_transform(Position data, const Transform &t) const
Definition: Lattice.h:137

WorkDistrib::createHomePatches
void createHomePatches(void)
Definition: WorkDistrib.C:989

LocalWorkMsg::compute
Compute * compute
Definition: WorkDistrib.h:27

SimParameters::extForcesOn
Bool extForcesOn
Definition: SimParameters.h:542

NAMD_bug
void NAMD_bug(const char *err_msg)
Definition: common.C:195

computeEFieldType
Definition: ComputeMap.h:73

SimParameters::maxPatches
int maxPatches
Definition: SimParameters.h:244

Lattice::offset_c
static NAMD_HOST_DEVICE int offset_c(int i)
Definition: Lattice.h:265

SimParameters::twoAwayZ
int twoAwayZ
Definition: SimParameters.h:243

WorkDistrib::enqueueImpropers
void enqueueImpropers(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3122

computeMsmType
Definition: ComputeMap.h:72

PatchMap::min_c
BigReal min_c(int pid) const
Definition: PatchMap.h:95

ComputeMapChangeMsg::newNumPartitions
char * newNumPartitions
Definition: WorkDistrib.C:76

eventMachineProgress
static int eventMachineProgress
Definition: WorkDistrib.C:103

FullAtom::migrationGroupSize
int32 migrationGroupSize
Definition: NamdTypes.h:230

MOStream
Definition: MStream.h:101

computeOneFourNbTholeType
Definition: ComputeMap.h:33

HydrogenGroupID::atomID
AtomID atomID
Definition: Hydrogen.h:16

Molecule::langevin_param
Real langevin_param(int atomnum) const
Definition: Molecule.h:1388

Molecule::atomvdwtype
Index atomvdwtype(int anum) const
Definition: Molecule.h:1134

SimParameters::langevinDamping
BigReal langevinDamping
Definition: SimParameters.h:662

PatchMap::numaway_c
int numaway_c(void) const
Definition: PatchMap.h:70

WorkDistrib::enqueueLCPO
void enqueueLCPO(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3167

computePmeCUDAType
Definition: ComputeMap.h:55

CompAtom::vdwType
int16 vdwType
Definition: NamdTypes.h:80

SimParameters::goGroPair
Bool goGroPair
Definition: SimParameters.h:381

PatchMgr::sendMovePatches
void sendMovePatches()
Definition: PatchMgr.C:110

ComputeMap::pack
void pack(MOStream *msg)
Definition: ComputeMap.C:61

PatchMap::oneOrTwoAwayNeighbors
int oneOrTwoAwayNeighbors(int pid, PatchID *neighbor_ids, PatchID *downstream_ids=0, int *transform_ids=0)
Definition: PatchMap.C:579

PatchMap::index_b
int index_b(int pid) const
Definition: PatchMap.h:87

pe_sortop_bit_reversed::rankInPhysOfNode
int * rankInPhysOfNode
Definition: WorkDistrib.C:146

SimParameters::staticAtomAssignment
Bool staticAtomAssignment
Definition: SimParameters.h:1084

pe_sortop_coord_y::pe_sortop_coord_y
pe_sortop_coord_y(ScaledPosition *s)
Definition: WorkDistrib.C:269

ResizeArray< int >

computeConsTorqueType
Definition: ComputeMap.h:83

Bool
int Bool
Definition: common.h:142

SimParameters::replicaUniformPatchGrids
Bool replicaUniformPatchGrids
Definition: SimParameters.h:1085

pe_sortop_coord_x::operator()
bool operator()(int a, int b) const
Definition: WorkDistrib.C:262

COMPUTEMAPTAG
#define COMPUTEMAPTAG
Definition: common.h:184

SimParameters::langevinOn
Bool langevinOn
Definition: SimParameters.h:660

nodesort
Definition: WorkDistrib.C:1618

TopoManagerWrapper::a_rot
int a_rot
Definition: WorkDistrib.C:1760

Compute::priority
int priority(void)
Definition: Compute.h:65

WorkDistrib::finishCUDA
void finishCUDA(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3258

HydrogenGroupID::isMP
int isMP
Definition: Hydrogen.h:27

computePmeType
Definition: ComputeMap.h:53

SimParameters::simulatedPEs
int simulatedPEs
Definition: SimParameters.h:228

CompAtom::partition
uint8 partition
Definition: NamdTypes.h:81

WorkDistrib::~WorkDistrib
~WorkDistrib(void)
Definition: WorkDistrib.C:124

Vector::x
BigReal x
Definition: Vector.h:74

CompAtom::hydrogenGroupSize
uint8 hydrogenGroupSize
Definition: NamdTypes.h:89

FinishWorkMsg::data
int data
Definition: WorkDistrib.h:34

TopoManagerWrapper::b_rot
int b_rot
Definition: WorkDistrib.C:1760

PDB::get_extremes
void get_extremes(ScaledPosition &xmin, ScaledPosition &xmax) const
Definition: PDB.h:104

PatchMap::numaway_a
int numaway_a(void) const
Definition: PatchMap.h:68

Lattice::a_p
NAMD_HOST_DEVICE int a_p() const
Definition: Lattice.h:289

Lattice::a_r
NAMD_HOST_DEVICE Vector a_r() const
Definition: Lattice.h:284

Lattice::b_r
NAMD_HOST_DEVICE Vector b_r() const
Definition: Lattice.h:285

Molecule::numAtoms
int numAtoms
Definition: Molecule.h:586

computeTclBCType
Definition: ComputeMap.h:80

SimParameters::maxPairPart
int maxPairPart
Definition: SimParameters.h:1067

ComputeMap::setNewNode
void setNewNode(ComputeID cid, NodeID node)
Definition: ComputeMap.h:122

Random.h

Compute::finishPatch
virtual void finishPatch(int)
Definition: Compute.C:124

Lattice::nearest
NAMD_HOST_DEVICE Position nearest(Position data, ScaledPosition ref) const
Definition: Lattice.h:95

NAMD_die
void NAMD_die(const char *err_msg)
Definition: common.C:147

Node::pdb
PDB * pdb
Definition: Node.h:183

FullAtom
Definition: NamdTypes.h:210

WorkDistrib::enqueueCUDAP3
void enqueueCUDAP3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3250

SimParameters::binaryOutput
Bool binaryOutput
Definition: SimParameters.h:304

WorkDistrib::peDiffuseOrderingIndex
static int * peDiffuseOrderingIndex
Definition: WorkDistrib.h:117

PatchMap::min_a
BigReal min_a(int pid) const
Definition: PatchMap.h:91

SimParameters::consForceOn
Bool consForceOn
Definition: SimParameters.h:1001

SimParameters::outputPatchDetails
Bool outputPatchDetails
Definition: SimParameters.h:1083

computeFmmType
Definition: ComputeMap.h:68

Lattice::c_r
NAMD_HOST_DEVICE Vector c_r() const
Definition: Lattice.h:286

Molecule::atommass
Real atommass(int anum) const
Definition: Molecule.h:1114

computeMsmSerialType
Definition: ComputeMap.h:69

PatchMgr
Definition: PatchMgr.h:145

TopoManagerWrapper::c_rot
int c_rot
Definition: WorkDistrib.C:1760

compare_bit_reversed
static int compare_bit_reversed(int a, int b)
Definition: WorkDistrib.C:127

Node::configList
ConfigList * configList
Definition: Node.h:182

SimParameters::rigidBonds
int rigidBonds
Definition: SimParameters.h:1051

WorkDistrib::enqueueWorkA1
void enqueueWorkA1(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3204

BUFSIZE
#define BUFSIZE
Definition: Communicate.h:15

SimParameters::pressureProfileEwaldOn
Bool pressureProfileEwaldOn
Definition: SimParameters.h:815

SimParameters::numAtomsPair2
int numAtomsPair2
Definition: SimParameters.h:1075

split
std::vector< std::string > split(const std::string &text, std::string delimiter)
Definition: MoleculeQM.C:74

WorkDistrib::peDiffuseOrdering
static int * peDiffuseOrdering
Definition: WorkDistrib.h:116

SimParameters::bondedCUDA
uint32 bondedCUDA
Definition: SimParameters.h:920

patch_sortop_curve_b::pmap
PatchMap * pmap
Definition: WorkDistrib.C:2068

PatchMap::makePatches
void makePatches(ScaledPosition xmin, ScaledPosition xmax, const Lattice &lattice, BigReal patchSize, double maxNumPatches, int staticAtomAssignment, int replicaUniformPatchGrids, int lcpo, int asplit, int bsplit, int csplit)
Definition: PatchMap.C:171

PatchMap::basenode
int basenode(int pid) const
Definition: PatchMap.h:117

PatchMap::index_c
int index_c(int pid) const
Definition: PatchMap.h:88

Molecule::get_fep_type
unsigned char get_fep_type(int anum) const
Definition: Molecule.h:1437

nodesort::nodesort
nodesort()
Definition: WorkDistrib.C:1624

patch_sortop_curve_c::pmap
PatchMap * pmap
Definition: WorkDistrib.C:2089

RecBisection.h

WorkDistrib::peOrderingInit
static int peOrderingInit
Definition: WorkDistrib.h:115

HydrogenGroupID::isGP
int isGP
Definition: Hydrogen.h:18

WorkDistrib::sendPatchMap
void sendPatchMap(void)
Definition: WorkDistrib.C:1111

PDB::find_extremes
void find_extremes(const Lattice &, BigReal frac=1.0)
Definition: PDB.C:437

WorkDistrib::saveComputeMapChanges
void saveComputeMapChanges(int, CkGroupID)
Definition: WorkDistrib.C:359

computeGlobalType
Definition: ComputeMap.h:63

FullAtom::status
int32 status
Atom status bit fields defined in structures.h.
Definition: NamdTypes.h:227

SimParameters::tclBCOn
Bool tclBCOn
Definition: SimParameters.h:648

computeSelfGromacsPairType
Definition: ComputeMap.h:36

WorkDistrib::finishCUDAPatch
void finishCUDAPatch(FinishWorkMsg *msg)
Definition: WorkDistrib.C:3254

LocalWorkMsg
Definition: WorkDistrib.h:24

WorkDistrib::savePatchMap
void savePatchMap(PatchMapMsg *msg)
Definition: WorkDistrib.C:1147

cuda_initialize
void cuda_initialize()
Definition: DeviceCUDA.C:27

ProcessorPrivate.h

topo_getargs
void topo_getargs(char **argv)
Definition: WorkDistrib.C:93

NamdTypes.h

WorkDistrib::peCompactOrderingIndex
static int * peCompactOrderingIndex
Definition: WorkDistrib.h:119

WorkDistrib::buildNodeAwarePeOrdering
static void buildNodeAwarePeOrdering(void)
Definition: WorkDistrib.C:183

CompAtom::nonbondedGroupSize
uint8 nonbondedGroupSize
Definition: NamdTypes.h:82

patch_sortop_curve_a::patch_sortop_curve_a
patch_sortop_curve_a(PatchMap *m)
Definition: WorkDistrib.C:2048

Molecule::getAtoms
Atom * getAtoms() const
Definition: Molecule.h:520

Node::myid
int myid()
Definition: Node.h:191

TopoManagerWrapper::a_dim
int a_dim
Definition: WorkDistrib.C:1759

computeSelfDihedralsType
Definition: ComputeMap.h:41

SimParameters::initialTemp
BigReal initialTemp
Definition: SimParameters.h:253

FullAtom::rigidBondLength
Real rigidBondLength
Definition: NamdTypes.h:231

pe_sortop_coord_x
Definition: WorkDistrib.C:259

SimParameters::pressureProfileAtomTypes
int pressureProfileAtomTypes
Definition: SimParameters.h:814

simParams
#define simParams
Definition: Output.C:131

SimParameters::usePMECUDA
Bool usePMECUDA
Definition: SimParameters.h:915

HydrogenGroupID::atomsInMigrationGroup
int atomsInMigrationGroup
Definition: Hydrogen.h:29

randtopo
static int randtopo
Definition: WorkDistrib.C:87

ComputeMap::newPid
void newPid(ComputeID cid, int pid, int trans=13)
Definition: ComputeMap.C:196

SimParameters::useDPME
Bool useDPME
Definition: SimParameters.h:913

WorkDistrib::send_setDeviceLDBParams
static void send_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2)
Definition: WorkDistrib.C:3545

pe_sortop_coord_x::spos
ScaledPosition * spos
Definition: WorkDistrib.C:260

Lattice::offset_a
static NAMD_HOST_DEVICE int offset_a(int i)
Definition: Lattice.h:263

ResizeArray::begin
iterator begin(void)
Definition: ResizeArray.h:36

PatchMapMsg
Definition: WorkDistrib.C:1106

PatchMap::max_b
BigReal max_b(int pid) const
Definition: PatchMap.h:94

SortableResizeArray< int >

DeviceCUDA.h

computeImpropersType
Definition: ComputeMap.h:29

SimParameters::FMMOn
Bool FMMOn
Definition: SimParameters.h:872

WorkDistrib::mapComputes
void mapComputes(void)
Definition: WorkDistrib.C:2407

StringList::data
char * data
Definition: ConfigList.h:48

TopoManagerWrapper::c_mod
int c_mod
Definition: WorkDistrib.C:1761

WorkDistrib::enqueueSelfA2
void enqueueSelfA2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3177

ComputeMap::Object
static ComputeMap * Object()
Definition: ComputeMap.h:91

CompAtom::isWater
uint8 isWater
Definition: NamdTypes.h:90

build_ordering
static void build_ordering(void *)
Definition: WorkDistrib.C:89

Vector::y
BigReal y
Definition: Vector.h:74

TopoManagerWrapper::e_mod
int e_mod
Definition: WorkDistrib.C:1761

SimParameters::maxSelfPart
int maxSelfPart
Definition: SimParameters.h:1065

Transform
Definition: NamdTypes.h:43

BOCgroup.h

pe_sortop_bit_reversed
Definition: WorkDistrib.C:145

Compute.h

PatchMap::numaway_b
int numaway_b(void) const
Definition: PatchMap.h:69

computeAnglesType
Definition: ComputeMap.h:27

ComputeMapChangeMsg::numNewNodes
int numNewNodes
Definition: WorkDistrib.C:73

FullAtom::mass
Mass mass
Definition: NamdTypes.h:218

WorkDistrib::distributeHomePatches
void distributeHomePatches(void)
Definition: WorkDistrib.C:1063

PatchMap::assignNode
void assignNode(PatchID, NodeID)
Definition: PatchMap.C:465

TopoManagerWrapper::a_mod
int a_mod
Definition: WorkDistrib.C:1761

computeGridForceType
Definition: ComputeMap.h:75

computeRestraintsType
Definition: ComputeMap.h:81

computeSelfTholeType
Definition: ComputeMap.h:43

computeGBISserType
Definition: ComputeMap.h:66

patch_sortop_curve_c::patch_sortop_curve_c
patch_sortop_curve_c(PatchMap *m)
Definition: WorkDistrib.C:2090

mic_initialize
void mic_initialize()

PDBVELFACTOR
#define PDBVELFACTOR
Definition: common.h:57

SimParameters::minAtomsPerPatch
int minAtomsPerPatch
Definition: SimParameters.h:1077

SimParameters::pairInteractionSelf
Bool pairInteractionSelf
Definition: SimParameters.h:633

PatchMap::max_c
BigReal max_c(int pid) const
Definition: PatchMap.h:96

Compute::type
int type()
Definition: Compute.h:48

SimParameters::lesOn
Bool lesOn
Definition: SimParameters.h:523

Molecule::is_drude
Bool is_drude(int) const

WorkDistrib::pe_sortop_compact
Definition: WorkDistrib.h:127

ComputeMapChangeMsg::newNodes
int * newNodes
Definition: WorkDistrib.C:75

WorkDistrib::enqueueSelfB2
void enqueueSelfB2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3193

SimParameters::patchDimension
BigReal patchDimension
Definition: SimParameters.h:308

PatchMap::gridsize_b
int gridsize_b(void) const
Definition: PatchMap.h:65

PatchMap::numPatchesOnNode
int numPatchesOnNode(int node)
Definition: PatchMap.h:60

WorkDistrib::initHostDeviceLDB
void initHostDeviceLDB()
Definition: WorkDistrib.C:3526

MOStream::put
MOStream * put(char data)
Definition: MStream.h:112

WorkDistrib::send_initHostDeviceLDB
static void send_initHostDeviceLDB()
Definition: WorkDistrib.C:3519

WorkDistrib::createAtomLists
FullAtomList * createAtomLists(const char *basename=0)
Definition: WorkDistrib.C:654

LonepairAtom
#define LonepairAtom
Definition: structures.h:22

Lattice.h

Lattice
Definition: Lattice.h:17

Priorities.h

SimParameters::noPatchesOnZero
Bool noPatchesOnZero
Definition: SimParameters.h:249

SimParameters::LCPOOn
Bool LCPOOn
Definition: SimParameters.h:617

RecBisection
Definition: RecBisection.h:103

SET_PRIORITY
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18

TopoManagerWrapper::b_dim
int b_dim
Definition: WorkDistrib.C:1759

pe_sortop_coord_x::pe_sortop_coord_x
pe_sortop_coord_x(ScaledPosition *s)
Definition: WorkDistrib.C:261

WorkDistrib::enqueueDihedrals
void enqueueDihedrals(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3116

SimParameters::comMove
Bool comMove
Definition: SimParameters.h:255

PatchMap::node
int node(int pid) const
Definition: PatchMap.h:114

Molecule::is_atom_fixed
Bool is_atom_fixed(int atomnum) const
Definition: Molecule.h:1504

SimParameters::fixedAtomsOn
Bool fixedAtomsOn
Definition: SimParameters.h:655

WorkDistrib::finishMIC
void finishMIC(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3271

ConfigList::find
StringList * find(const char *name) const
Definition: ConfigList.C:341

TopoManagerWrapper::pe_sortop_topo::sortdims
const int * sortdims
Definition: WorkDistrib.C:1979

WorkDistrib::contributeHostDeviceLDB
void contributeHostDeviceLDB(int peSetLen, int *peSet)
Definition: WorkDistrib.C:3539

PatchMap::pack
void pack(char *buf, int size)
Definition: PatchMap.C:328

PatchMap
Definition: PatchMap.h:23

computeNonbondedMICType
Definition: ComputeMap.h:24

RIGID_NONE
#define RIGID_NONE
Definition: SimParameters.h:80

isOutputProcessor
int isOutputProcessor(int pe)
Definition: ParallelIOMgr.C:362

CompAtomExt::atomFixed
uint32 atomFixed
Definition: NamdTypes.h:162

Parameters::get_vdw_params
void get_vdw_params(Real *sigma, Real *epsilon, Real *sigma14, Real *epsilon14, Index index)
Definition: Parameters.h:568

WorkDistrib::doneSaveComputeMap
void doneSaveComputeMap(CkReductionMsg *)
Definition: WorkDistrib.C:430

ComputeMap::unpack
void unpack(MIStream *msg)
Definition: ComputeMap.C:68

Molecule::rigid_bond_length
Real rigid_bond_length(int atomnum) const
Definition: Molecule.h:1550

FullAtom::recipMass
double recipMass
Definition: NamdTypes.h:213

PatchMap::MaxOneOrTwoAway
Definition: PatchMap.h:47

int64
int64_t int64
Definition: common.h:39

TopoManagerWrapper::TopoManagerWrapper
TopoManagerWrapper()
Definition: WorkDistrib.C:1765

deviceCUDA
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23

PDB::get_all_positions
void get_all_positions(Vector *)
Definition: PDB.C:365

PatchMap::min_b
BigReal min_b(int pid) const
Definition: PatchMap.h:93

computeEwaldType
Definition: ComputeMap.h:61

WorkDistrib::WorkDistrib
WorkDistrib()
Definition: WorkDistrib.C:108

PatchID
int32 PatchID
Definition: NamdTypes.h:287

TopoManagerWrapper::pe_sortop_topo::pe_sortop_topo
pe_sortop_topo(TopoManagerWrapper &t, int *d)
Definition: WorkDistrib.C:1980

WaterModel
WaterModel
Definition: common.h:221

Node::molecule
Molecule * molecule
Definition: Node.h:179

TopoManagerWrapper::coords
void coords(int pe, int *crds)
Definition: WorkDistrib.C:1958

computeExclsType
Definition: ComputeMap.h:25

WorkDistrib::enqueueWorkB3
void enqueueWorkB3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3230

TRUE
#define TRUE
Definition: common.h:128

Compute::cid
const ComputeID cid
Definition: Compute.h:43

Lattice::origin
NAMD_HOST_DEVICE Vector origin() const
Definition: Lattice.h:278

SimParameters::LJPMEOn
Bool LJPMEOn
Definition: SimParameters.h:877

pe_sortop_coord_y::operator()
bool operator()(int a, int b) const
Definition: WorkDistrib.C:270

SimParameters::noPatchesOnOutputPEs
Bool noPatchesOnOutputPEs
Definition: SimParameters.h:250

TopoManagerWrapper::fixpe
int fixpe(int pe)
Definition: WorkDistrib.C:1762

ProxyMgr.h

TopoManagerWrapper::sortAndSplit
int * sortAndSplit(int *node_begin, int *node_end, int splitdim)
Definition: WorkDistrib.C:1993

pe_sortop_coord_y::spos
ScaledPosition * spos
Definition: WorkDistrib.C:268

sortAtomsForPatches
void sortAtomsForPatches(int *order, int *breaks, const FullAtom *atoms, int nmgrps, int natoms, int ni, int nj, int nk)
Definition: SortAtoms.C:135

SimParameters::numAtomsPair
int numAtomsPair
Definition: SimParameters.h:1073

DeviceCUDA
Definition: DeviceCUDA.h:54

SimParameters::drudeOn
Bool drudeOn
Definition: SimParameters.h:620

SimParameters::numAtomsSelf2
int numAtomsSelf2
Definition: SimParameters.h:1071

BigReal
double BigReal
Definition: common.h:123

FullAtom::transform
Transform transform
Definition: NamdTypes.h:229

TopoManagerWrapper::c_dim
int c_dim
Definition: WorkDistrib.C:1759

patch_sortop_curve_c::operator()
bool operator()(int p1, int p2) const
Definition: WorkDistrib.C:2091

SimParameters.h

isWater
Definition: MShakeKernel.h:68

PDBVELINVFACTOR
#define PDBVELINVFACTOR
Definition: common.h:58

WorkDistrib::assignNodeToPatch
void assignNodeToPatch(void)
Definition: WorkDistrib.C:1456

Parameters.h

PatchMap::getPatchesInOctet
int getPatchesInOctet(int pid, PatchID *pids, int *transform_ids=0)
Definition: PatchMap.C:634

ComputeMap::newNode
NodeID newNode(ComputeID cid)
Definition: ComputeMap.h:118

nodesort::b_total
int b_total
Definition: WorkDistrib.C:1621