WorkDistrib.C

Go to the documentation of this file.
00001 
00007 /*****************************************************************************
00008  * $Source: /home/cvs/namd/cvsroot/namd2/src/WorkDistrib.C,v $
00009  * $Author: jim $
00010  * $Date: 2017/03/30 20:06:17 $
00011  * $Revision: 1.1291 $
00012  *****************************************************************************/
00013 
00020 #include <stdio.h>
00021  
00022 #include "InfoStream.h"
00023 #include "Communicate.h"
00024 #include "ProcessorPrivate.h"
00025 #include "BOCgroup.h"
00026 #include "WorkDistrib.decl.h"
00027 #include "WorkDistrib.h"
00028 #include "Lattice.h"
00029 #include "ComputeMsmMsa.h"  // needed for MsmMsaData definition
00030 #include "main.decl.h"
00031 #include "main.h"
00032 #include "Node.h"
00033 #include "PatchMgr.h"
00034 #include "PatchMap.inl"
00035 #include "NamdTypes.h"
00036 #include "PDB.h"
00037 #include "SimParameters.h"
00038 #include "Molecule.h"
00039 #include "NamdOneTools.h"
00040 #include "Compute.h"
00041 #include "ComputeMap.h"
00042 #include "RecBisection.h"
00043 #include "Random.h"
00044 #include "varsizemsg.h"
00045 #include "ProxyMgr.h"
00046 #include "Priorities.h"
00047 #include "SortAtoms.h"
00048 #include <algorithm>
00049 #include "TopoManager.h"
00050 #include "ComputePmeCUDAMgr.h"
00051 
00052 #include "DeviceCUDA.h"
00053 #ifdef NAMD_CUDA
00054 #ifdef WIN32
00055 #define __thread __declspec(thread)
00056 #endif
00057 extern __thread DeviceCUDA *deviceCUDA;
00058 #endif
00059 
00060 //#define DEBUGM
00061 #define MIN_DEBUG_LEVEL 2
00062 #include "Debug.h"
00063 #ifdef MEM_OPT_VERSION
00064 extern int isOutputProcessor(int); 
00065 #endif
00066 class ComputeMapChangeMsg : public CMessage_ComputeMapChangeMsg
00067 {
00068 public:
00069 
00070   int numNewNodes;
00071   int numNewNumPartitions;
00072   int *newNodes;
00073   char *newNumPartitions;
00074 
00075 //  VARSIZE_DECL(ComputeMapChangeMsg);
00076 };
00077 
00078 /*
00079 VARSIZE_MSG(ComputeMapChangeMsg,
00080   VARSIZE_ARRAY(newNodes);
00081 )
00082 */
00083 
00084 static int randtopo;
00085 
00086 static void build_ordering(void *) {
00087   WorkDistrib::buildNodeAwarePeOrdering();
00088 }
00089 
00090 void topo_getargs(char **argv) {
00091   randtopo = CmiGetArgFlag(argv, "+randtopo");
00092   if ( CkMyPe() >= CkNumPes() ) return;
00093   CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdVoidFn)build_ordering, (void*)0);
00094 }
00095 
00096 static int eventMachineProgress;
00097 
00098 //======================================================================
00099 // Public functions
00100 //----------------------------------------------------------------------
00101 WorkDistrib::WorkDistrib()
00102 {
00103   CkpvAccess(BOCclass_group).workDistrib = thisgroup;
00104   patchMapArrived = false;
00105   computeMapArrived = false;
00106 
00107 #if CMK_SMP
00108 #define MACHINE_PROGRESS
00109 #else
00110 #define MACHINE_PROGRESS { traceUserEvent(eventMachineProgress);  CmiMachineProgressImpl(); }
00111   if ( CkMyNodeSize() > 1 ) NAMD_bug("CkMyNodeSize() > 1 for non-smp build");
00112   eventMachineProgress = traceRegisterUserEvent("CmiMachineProgressImpl",233);
00113 #endif
00114 }
00115 
00116 //----------------------------------------------------------------------
00117 WorkDistrib::~WorkDistrib(void)
00118 { }
00119 
00120 static int compare_bit_reversed(int a, int b) {
00121   int d = a ^ b;
00122   int c = 1;
00123   if ( d ) while ( ! (d & c) ) {
00124     c = c << 1;
00125   }
00126   return (a & c) - (b & c);
00127 }
00128 
00129 static bool less_than_bit_reversed(int a, int b) {
00130   int d = a ^ b;
00131   int c = 1;
00132   if ( d ) while ( ! (d & c) ) {
00133     c = c << 1;
00134   }
00135   return d && (b & c);
00136 }
00137 
00138 struct pe_sortop_bit_reversed {
00139   int *rankInPhysOfNode;
00140   pe_sortop_bit_reversed(int *r) : rankInPhysOfNode(r) {}
00141   inline bool operator() (int a, int b) const {
00142     int c = compare_bit_reversed(CmiRankOf(a),CmiRankOf(b));
00143     if ( c < 0 ) return true;
00144     if ( c > 0 ) return false;
00145     c = compare_bit_reversed(
00146         rankInPhysOfNode[CmiNodeOf(a)],rankInPhysOfNode[CmiNodeOf(b)]);
00147     if ( c < 0 ) return true;
00148     if ( c > 0 ) return false;
00149     c = compare_bit_reversed(CmiPhysicalNodeID(a),CmiPhysicalNodeID(b));
00150     return ( c < 0 );
00151   }
00152 };
00153 
00154 int WorkDistrib::peOrderingInit;
00155 int* WorkDistrib::peDiffuseOrdering;
00156 int* WorkDistrib::peDiffuseOrderingIndex;
00157 int* WorkDistrib::peCompactOrdering;
00158 int* WorkDistrib::peCompactOrderingIndex;
00159 
00160 #ifdef NAMD_CUDA
00161 extern void cuda_initialize();
00162 #endif
00163 
00164 void mic_initialize();
00165 
00166 void WorkDistrib::peOrderingReady() {
00167   //CkPrintf("WorkDistrib::peOrderingReady on %d\n", CkMyPe());
00168 #ifdef NAMD_CUDA
00169   cuda_initialize();
00170 #endif
00171 #ifdef NAMD_MIC
00172   mic_initialize();
00173 #endif
00174 }
00175 
00176 void WorkDistrib::buildNodeAwarePeOrdering() {
00177 
00178  CmiMemLock();
00179  if ( ! peOrderingInit ) {
00180   //CkPrintf("WorkDistrib::buildNodeAwarePeOrdering on pe %d\n", CkMyPe());
00181 
00182   const int numPhys = CmiNumPhysicalNodes();
00183   const int numNode = CmiNumNodes();
00184   const int numPe = CmiNumPes();
00185   ResizeArray<int> numNodeInPhys(numPhys);
00186   ResizeArray<int> rankInPhysOfNode(numNode);
00187 
00188   peDiffuseOrdering = new int[numPe];
00189   peDiffuseOrderingIndex = new int[numPe];
00190   peCompactOrdering = new int[numPe];
00191   peCompactOrderingIndex = new int[numPe];
00192 
00193   int k = 0;
00194   for ( int ph=0; ph<numPhys; ++ph ) {
00195     int *pes, npes;
00196     CmiGetPesOnPhysicalNode(ph, &pes, &npes);
00197     for ( int i=0; i<npes; ++i, ++k ) {
00198       peCompactOrdering[k] = pes[i];
00199     }
00200     numNodeInPhys[ph] = 0;
00201     for ( int i=0, j=0; i<npes; i += CmiNodeSize(CmiNodeOf(pes[i])), ++j ) {
00202       rankInPhysOfNode[CmiNodeOf(pes[i])] = j;
00203       numNodeInPhys[ph] += 1;
00204     }
00205   }
00206 
00207   if ( randtopo && numPhys > 2 ) {
00208     if ( ! CkMyNode() ) {
00209       iout << iWARN << "RANDOMIZING PHYSICAL NODE ORDERING\n" << endi;
00210     }
00211     ResizeArray<int> randPhysOrder(numPhys);
00212     for ( int j=0; j<numPhys; ++j ) {
00213       randPhysOrder[j] = j;
00214     }
00215     Random(314159265).reorder(randPhysOrder.begin()+2, numPhys-2);
00216     for ( int j=0, k=0; j<numPhys; ++j ) {
00217       const int ph = randPhysOrder[j];
00218       int *pes, npes;
00219       CmiGetPesOnPhysicalNode(ph, &pes, &npes);
00220       for ( int i=0; i<npes; ++i, ++k ) {
00221         peCompactOrdering[k] = pes[i];
00222       }
00223     }
00224   }
00225 
00226   for ( int i=0; i<numPe; ++i ) {
00227     peDiffuseOrdering[i] = i;
00228   }
00229   std::sort(peDiffuseOrdering, peDiffuseOrdering+numPe,
00230     pe_sortop_bit_reversed(rankInPhysOfNode.begin()));
00231 
00232   for ( int i=0; i<numPe; ++i ) {
00233     peDiffuseOrderingIndex[peDiffuseOrdering[i]] = i;
00234     peCompactOrderingIndex[peCompactOrdering[i]] = i;
00235   }
00236 
00237   if ( 0 && CmiMyNode() == 0 ) for ( int i=0; i<numPe; ++i ) {
00238     CkPrintf("order %5d %5d %5d %5d %5d\n", i,
00239       peDiffuseOrdering[i],
00240       peDiffuseOrderingIndex[i],
00241       peCompactOrdering[i],
00242       peCompactOrderingIndex[i]);
00243   }
00244 
00245   peOrderingInit = 1;
00246  }
00247  CmiMemUnlock();
00248  peOrderingReady();
00249 
00250 }
00251 
00252 struct pe_sortop_coord_x {
00253   ScaledPosition *spos;
00254   pe_sortop_coord_x(ScaledPosition *s) : spos(s) {}
00255   inline bool operator() (int a, int b) const {
00256     return ( spos[a].x < spos[b].x );
00257   }
00258 };
00259 
00260 struct pe_sortop_coord_y {
00261   ScaledPosition *spos;
00262   pe_sortop_coord_y(ScaledPosition *s) : spos(s) {}
00263   inline bool operator() (int a, int b) const {
00264     return ( spos[a].y < spos[b].y );
00265   }
00266 };
00267 
00268 static void recursive_bisect_coord(
00269     int x_begin, int x_end, int y_begin, int y_end,
00270     int *pe_begin, ScaledPosition *coord,
00271     int *result, int ydim
00272   ) {
00273   int x_len = x_end - x_begin;
00274   int y_len = y_end - y_begin;
00275   if ( x_len == 1 && y_len == 1 ) {
00276     // done, now put this pe in the right place
00277     if ( 0 ) CkPrintf("pme %5d %5d on pe %5d at %f %f\n", x_begin, y_begin, *pe_begin,
00278       coord[*pe_begin].x, coord[*pe_begin].y);
00279     result[x_begin*ydim + y_begin] = *pe_begin;
00280     return;
00281   }
00282   int *pe_end = pe_begin + x_len * y_len;
00283   if ( x_len >= y_len ) {
00284     std::sort(pe_begin, pe_end, pe_sortop_coord_x(coord));
00285     int x_split = x_begin + x_len / 2;
00286     int* pe_split = pe_begin + (x_split - x_begin) * y_len;
00287     //CkPrintf("x_split %5d %5d %5d\n", x_begin, x_split, x_end);
00288     recursive_bisect_coord(x_begin, x_split, y_begin, y_end, pe_begin, coord, result, ydim);
00289     recursive_bisect_coord(x_split, x_end, y_begin, y_end, pe_split, coord, result, ydim);
00290   } else {
00291     std::sort(pe_begin, pe_end, pe_sortop_coord_y(coord));
00292     int y_split = y_begin + y_len / 2;
00293     int* pe_split = pe_begin + (y_split - y_begin) * x_len;
00294     //CkPrintf("y_split %5d %5d %5d\n", y_begin, y_split, y_end);
00295     recursive_bisect_coord(x_begin, x_end, y_begin, y_split, pe_begin, coord, result, ydim);
00296     recursive_bisect_coord(x_begin, x_end, y_split, y_end, pe_split, coord, result, ydim);
00297   }
00298 }
00299 
00300 void WorkDistrib::sortPmePes(int *pmepes, int xdim, int ydim) {
00301   int numpes = CkNumPes();
00302   ResizeArray<int> count(numpes);
00303   ResizeArray<ScaledPosition> sumPos(numpes);
00304   ResizeArray<ScaledPosition> avgPos(numpes);
00305   for ( int i=0; i<numpes; ++i ) {
00306     count[i] = 0;
00307     sumPos[i] = 0;
00308     avgPos[i] = 0;
00309   }
00310   PatchMap *patchMap = PatchMap::Object();
00311   for ( int i=0, npatches=patchMap->numPatches(); i<npatches; ++i ) {
00312     int pe = patchMap->node(i);
00313     count[pe] += 1;
00314     sumPos[pe] += patchMap->center(i);
00315   }
00316   const int npmepes = xdim*ydim;
00317   ResizeArray<int> sortpes(npmepes);
00318   for ( int i=0; i<npmepes; ++i ) {
00319     int pe = sortpes[i] = pmepes[i];
00320     int cnt = count[pe];
00321     ScaledPosition sum = sumPos[pe];
00322     if ( cnt == 0 ) {
00323       // average over node
00324       int node = CkNodeOf(pe);
00325       int nsize = CkNodeSize(node);
00326       int pe2 = CkNodeFirst(node);
00327       for ( int j=0; j<nsize; ++j, ++pe2 )  {
00328         cnt += count[pe2];
00329         sum += sumPos[pe2];
00330       }
00331     }
00332     if ( cnt == 0 ) {
00333       // average over physical node
00334       int node = CmiPhysicalNodeID(pe);
00335       int nsize, *nlist;
00336       CmiGetPesOnPhysicalNode(node, &nlist, &nsize);
00337       for ( int j=0; j<nsize; ++j )  {
00338         int pe2 = nlist[j];
00339         cnt += count[pe2];
00340         sum += sumPos[pe2];
00341       }
00342     }
00343     if ( cnt ) {
00344       avgPos[pe] = sum / cnt;
00345     }
00346   }
00347   recursive_bisect_coord(0, xdim, 0, ydim, sortpes.begin(), avgPos.begin(), pmepes, ydim);
00348 }
00349 
00350 
00351 //----------------------------------------------------------------------
00352 void WorkDistrib::saveComputeMapChanges(int ep, CkGroupID chareID)
00353 {
00354   saveComputeMapReturnEP = ep;
00355   saveComputeMapReturnChareID = chareID;
00356 
00357   ComputeMapChangeMsg *mapMsg = new (0, 0, 0) ComputeMapChangeMsg;
00358   CProxy_WorkDistrib(thisgroup).recvComputeMapChanges(mapMsg);
00359 
00360 /*
00361     // store the latest compute map
00362   SimParameters *simParams = Node::Object()->simParameters;
00363   if (simParams->storeComputeMap) {
00364     computeMap->saveComputeMap(simParams->computeMapFilename);
00365     CkPrintf("ComputeMap has been stored in %s.\n", simParams->computeMapFilename);
00366   }
00367 */
00368 }
00369 
00370 void WorkDistrib::recvComputeMapChanges(ComputeMapChangeMsg *msg) {
00371 
00372   delete msg;
00373 
00374   ComputeMap *computeMap = ComputeMap::Object();
00375 
00376   int i;
00377   int nc = computeMap->numComputes();
00378   
00379   if ( ! CkMyPe() ) { // send
00380     // CkPrintf("At %f on %d WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
00381     MOStream *msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, COMPUTEMAPTAG, BUFSIZE);
00382     msg->put(nc);
00383     for (i=0; i<nc; i++) {
00384       int data = computeMap->newNode(i);
00385       msg->put(data);
00386     }
00387     msg->put(nc);
00388     for (i=0; i<nc; i++) {
00389       char data = computeMap->newNumPartitions(i);
00390       msg->put(data);
00391     }
00392     msg->put(nc);
00393     msg->end();
00394     delete msg;
00395     // CkPrintf("At %f on %d done WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
00396   } else if ( ! CkMyRank() ) { // receive
00397     // if ( CkMyNode() == 1 ) CkPrintf("At %f on %d WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
00398     MIStream *msg = CkpvAccess(comm)->newInputStream(0, COMPUTEMAPTAG);
00399     msg->get(i);
00400     if ( i != nc ) NAMD_bug("WorkDistrib::recvComputeMapChanges check 1 failed\n");
00401     for (i=0; i<nc; i++) {
00402       int data;
00403       msg->get(data);
00404       computeMap->setNewNode(i,data);
00405     }
00406     msg->get(i);
00407     if ( i != nc ) NAMD_bug("WorkDistrib::recvComputeMapChanges check 2 failed\n");
00408     for (i=0; i<nc; i++) {
00409       char data;
00410       msg->get(data);
00411       computeMap->setNewNumPartitions(i,data);
00412     }
00413     msg->get(i);
00414     if ( i != nc ) NAMD_bug("WorkDistrib::recvComputeMapChanges check 3 failed\n");
00415     delete msg;
00416     // if ( CkMyNode() == 1 ) CkPrintf("At %f on %d done WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
00417   }
00418 
00419   CkCallback cb(CkIndex_WorkDistrib::doneSaveComputeMap(NULL), 0, thisgroup);
00420   contribute(0, NULL, CkReduction::random, cb);
00421 }
00422 
00423 void WorkDistrib::doneSaveComputeMap(CkReductionMsg *msg) {
00424   delete msg;
00425 
00426   CkSendMsgBranch(saveComputeMapReturnEP, CkAllocMsg(0,0,0), 0, saveComputeMapReturnChareID);
00427 }
00428 
00429 #ifdef MEM_OPT_VERSION
00430 //All basic info already exists for each atom inside the FullAtomList because
00431 //it is loaded when reading the binary per-atom file. This function will fill
00432 //the info regarding transform, nonbondedGroupSize etc. Refer to
00433 //WorkDistrib::createAtomLists
00434 void WorkDistrib::fillAtomListForOnePatch(int pid, FullAtomList &alist){
00435   PatchMap *patchMap = PatchMap::Object();
00436 
00437   ScaledPosition center(0.5*(patchMap->min_a(pid)+patchMap->max_a(pid)),
00438                           0.5*(patchMap->min_b(pid)+patchMap->max_b(pid)),
00439                           0.5*(patchMap->min_c(pid)+patchMap->max_c(pid)));
00440 
00441     int n = alist.size();
00442     FullAtom *a = alist.begin();
00443 /* 
00444     //Those options are not supported in MEM_OPT_VERSIOn -Chao Mei 
00445 //Modifications for alchemical fep
00446     Bool alchFepOn = params->alchFepOn;
00447     Bool alchThermIntOn = params->alchThermIntOn;
00448 //fepe
00449     Bool lesOn = params->lesOn;
00450     Bool pairInteractionOn = params->pairInteractionOn;
00451 
00452     Bool pressureProfileTypes = (params->pressureProfileAtomTypes > 1);
00453 */
00454     SimParameters *params = Node::Object()->simParameters;
00455     const Lattice lattice = params->lattice;
00456     Transform mother_transform;
00457     for(int j=0; j < n; j++)
00458     {
00459       int aid = a[j].id;
00460       a[j].nonbondedGroupSize = 0;  // must be set based on coordinates
00461       
00462       // a[j].fixedPosition = a[j].position;  ParallelIOMgr stores ref coord here.
00463 
00464       if ( a[j].migrationGroupSize ) {
00465        if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
00466             Position pos = a[j].position;
00467             int mgs = a[j].migrationGroupSize;
00468             int c = 1;
00469 
00470             for ( int k=a[j].hydrogenGroupSize; k<mgs;
00471                                 k+=a[j+k].hydrogenGroupSize ) {
00472               pos += a[j+k].position;
00473               ++c;
00474             }
00475 
00476             pos *= 1./c;
00477             mother_transform = a[j].transform;  // should be 0,0,0
00478             pos = lattice.nearest(pos,center,&mother_transform);
00479             a[j].position = lattice.apply_transform(a[j].position,mother_transform);
00480             a[j].transform = mother_transform;        
00481        } else {
00482         a[j].position = lattice.nearest(a[j].position, center, &(a[j].transform));
00483         mother_transform = a[j].transform;
00484        }
00485       } else {
00486         a[j].position = lattice.apply_transform(a[j].position,mother_transform);
00487         a[j].transform = mother_transform;
00488       }
00489 
00490 /* 
00491     //Those options are not supported in MEM_OPT_VERSIOn -Chao Mei 
00492 //Modifications for alchemical fep
00493       if ( alchOn || lesOn || pairInteractionOn || pressureProfileTypes) {
00494         a[j].partition = molecule->get_fep_type(aid);
00495       } 
00496       else {
00497         a[j].partition = 0;
00498       }
00499 //fepe
00500 */
00501       a[j].partition = 0;
00502 
00503       //set langevinParams based on atom status
00504       if(params->langevinOn) {
00505         BigReal bval = params->langevinDamping;
00506         if(!params->langevinHydrogen && 
00507            ((a[j].status & HydrogenAtom)!=0)) {
00508           bval = 0;
00509         }else if ((a[j].status & LonepairAtom)!=0) {
00510           bval = 0;
00511         }else if ((a[j].status & DrudeAtom)!=0) {
00512           bval = params->langevinDamping;
00513         }
00514         a[j].langevinParam = bval;
00515       }
00516 
00517     }
00518 
00519     int size, allfixed;
00520     for(int j=0; j < n; j+=size) {
00521       size = a[j].hydrogenGroupSize;
00522       if ( ! size ) {
00523         NAMD_bug("Mother atom with hydrogenGroupSize of 0!");
00524       }
00525       allfixed = 1;
00526       for (int k = 0; k < size; ++k ) {
00527         allfixed = ( allfixed && (a[j+k].atomFixed) );
00528       }
00529       for (int k = 0; k < size; ++k ) {
00530         a[j+k].groupFixed = allfixed ? 1 : 0;
00531       }
00532     }
00533 
00534     if ( params->outputPatchDetails ) {
00535       int patchId = pid;
00536       int numAtomsInPatch = n;
00537       int numFixedAtomsInPatch = 0;
00538       int numAtomsInFixedGroupsInPatch = 0;
00539       for(int j=0; j < n; j++) {
00540         numFixedAtomsInPatch += ( a[j].atomFixed ? 1 : 0 );
00541         numAtomsInFixedGroupsInPatch += ( a[j].groupFixed ? 1 : 0 );
00542       }
00543       iout << "PATCH_DETAILS:"
00544            << " on proc " << CkMyPe()
00545            << " patch " << patchId
00546            << " atoms " << numAtomsInPatch
00547            << " fixed_atoms " << numFixedAtomsInPatch
00548            << " fixed_groups " << numAtomsInFixedGroupsInPatch
00549            << "\n" << endi;
00550     }
00551 
00552 }
00553 
00554 void WorkDistrib::random_velocities_parallel(BigReal Temp,InputAtomList &inAtoms)
00555 {
00556   int i, j;             //  Loop counter
00557   BigReal kbT;          //  Boltzman constant * Temp
00558   BigReal randnum;      //  Random number from -6.0 to 6.0
00559   BigReal kbToverM;     //  sqrt(Kb*Temp/Mass)
00560   SimParameters *simParams = Node::Object()->simParameters;
00561   Bool lesOn = simParams->lesOn;
00562   Random vel_random(simParams->randomSeed);
00563   int lesReduceTemp = lesOn && simParams->lesReduceTemp;
00564   BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
00565 
00566   kbT = Temp*BOLTZMANN;
00567   int count=0;
00568   int totalAtoms = inAtoms.size();
00569   for(i=0;i<totalAtoms;i++)
00570   {
00571     Real atomMs=inAtoms[i].mass;
00572 
00573     if (atomMs <= 0.) {
00574       kbToverM = 0.;
00575     } else {
00576       /*
00577        * lesOn is not supported in MEM_OPT_VERSION, so the original assignment
00578        * is simplified. --Chao Mei
00579        */
00580       //kbToverM = sqrt(kbT *
00581         //( lesOn && structure->get_fep_type(aid) ? tempFactor : 1.0 ) /
00582         //                  atomMs );
00583       kbToverM = sqrt(kbT * 1.0 / atomMs);
00584     }
00585     for (randnum=0.0, j=0; j<12; j++)
00586     {
00587       randnum += vel_random.uniform();
00588     }
00589 
00590     randnum -= 6.0;
00591 
00592     inAtoms[i].velocity.x = randnum*kbToverM;
00593 
00594     for (randnum=0.0, j=0; j<12; j++)
00595     {
00596       randnum += vel_random.uniform();
00597     }
00598 
00599     randnum -= 6.0;
00600 
00601     inAtoms[i].velocity.y = randnum*kbToverM;
00602 
00603     for (randnum=0.0, j=0; j<12; j++)
00604     {
00605       randnum += vel_random.uniform();
00606     }
00607 
00608     randnum -= 6.0;
00609     
00610     inAtoms[i].velocity.z = randnum*kbToverM;
00611   }
00612 }
00613 #endif
00614 
00615 //----------------------------------------------------------------------
00616 // This should only be called on node 0.
00617 //----------------------------------------------------------------------
00618 FullAtomList *WorkDistrib::createAtomLists(const char *basename)
00619 {
00620   int i;
00621   StringList *current;  //  Pointer used to retrieve configuration items
00622   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00623   Node *node = nd.ckLocalBranch();
00624   PatchMap *patchMap = PatchMap::Object();
00625   CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
00626   PatchMgr *patchMgr = pm.ckLocalBranch();
00627   SimParameters *params = node->simParameters;
00628   Molecule *molecule = node->molecule;
00629   PDB *pdb = node->pdb;
00630 
00631   int numPatches = patchMap->numPatches();
00632   int numAtoms = pdb->num_atoms();
00633 
00634   Vector *positions = new Position[numAtoms];
00635   Vector *velocities = new Velocity[numAtoms];
00636 
00637  if ( basename ) {
00638   if ( params->binaryOutput ) {
00639     read_binary_file((std::string(basename)+".coor").c_str(), positions, numAtoms);
00640     read_binary_file((std::string(basename)+".vel").c_str(), velocities, numAtoms);
00641   } else {
00642     PDB coorpdb((std::string(basename)+".coor").c_str());
00643     if ( coorpdb.num_atoms() != numAtoms ) {
00644       NAMD_die("Incorrect atom count in coordinate pdb file");
00645     }
00646     coorpdb.get_all_positions(positions);
00647     velocities_from_PDB((std::string(basename)+".vel").c_str(), velocities, numAtoms);
00648   }
00649  } else {
00650   pdb->get_all_positions(positions);
00651 
00652   if ( params->initialTemp < 0.0 ) {
00653     Bool binvels=FALSE;
00654 
00655     //  Reading the veolcities from a PDB
00656     current = node->configList->find("velocities");
00657 
00658     if (current == NULL) {
00659       current = node->configList->find("binvelocities");
00660       binvels = TRUE;
00661     }
00662 
00663     if (!binvels) {
00664       velocities_from_PDB(current->data, velocities, numAtoms);
00665     }
00666     else {
00667       velocities_from_binfile(current->data, velocities, numAtoms);
00668     }
00669   }
00670   else {
00671     // Random velocities for a given temperature
00672     random_velocities(params->initialTemp, molecule, velocities, numAtoms);
00673   }
00674  }
00675 
00676   //  If COMMotion == no, remove center of mass motion
00677   if (!(params->comMove)) {
00678     remove_com_motion(velocities, molecule, numAtoms);
00679   }
00680 
00681   FullAtomList *atoms = new FullAtomList[numPatches];
00682 
00683   const Lattice lattice = params->lattice;
00684 
00685     if ( params->staticAtomAssignment ) {
00686       FullAtomList sortAtoms;
00687       for ( i=0; i < numAtoms; i++ ) {
00688         HydrogenGroupID &h = molecule->hydrogenGroup[i];
00689         if ( ! h.isMP ) continue;
00690         FullAtom a;
00691         a.id = i;
00692         a.migrationGroupSize = h.isMP ? h.atomsInMigrationGroup : 0;
00693         a.position = positions[h.atomID];
00694         sortAtoms.add(a);
00695       } 
00696       int *order = new int[sortAtoms.size()];
00697       for ( i=0; i < sortAtoms.size(); i++ ) {
00698         order[i] = i;
00699       } 
00700       int *breaks = new int[numPatches];
00701       sortAtomsForPatches(order,breaks,sortAtoms.begin(),
00702                         sortAtoms.size(),numAtoms,
00703                         patchMap->gridsize_c(),
00704                         patchMap->gridsize_b(),
00705                         patchMap->gridsize_a());
00706 
00707       i = 0;
00708       for ( int pid = 0; pid < numPatches; ++pid ) {
00709         int iend = breaks[pid];
00710         for ( ; i<iend; ++i ) {
00711           FullAtom &sa = sortAtoms[order[i]];
00712           int mgs = sa.migrationGroupSize;
00713 /*
00714 CkPrintf("patch %d (%d %d %d) has group %d atom %d size %d at %.2f %.2f %.2f\n",
00715           pid, patchMap->index_a(pid), patchMap->index_b(pid), 
00716           patchMap->index_c(pid), order[i], sa.id, mgs,
00717           sa.position.x, sa.position.y, sa.position.z);
00718 */
00719           for ( int k=0; k<mgs; ++k ) {
00720             HydrogenGroupID &h = molecule->hydrogenGroup[sa.id + k];
00721             int aid = h.atomID;
00722             FullAtom a;
00723             a.id = aid;
00724             a.position = positions[aid];
00725             a.velocity = velocities[aid];
00726             a.vdwType = molecule->atomvdwtype(aid);
00727             a.status = molecule->getAtoms()[aid].status;
00728             a.langevinParam = molecule->langevin_param(aid);
00729             a.hydrogenGroupSize = h.isGP ? h.atomsInGroup : 0;
00730             a.migrationGroupSize = h.isMP ? h.atomsInMigrationGroup : 0;
00731             if(params->rigidBonds != RIGID_NONE) {
00732               a.rigidBondLength = molecule->rigid_bond_length(aid);
00733             }else{
00734               a.rigidBondLength = 0.0;
00735             }
00736             atoms[pid].add(a);
00737           }
00738         }
00739 CkPrintf("patch %d (%d %d %d) has %d atoms\n",
00740           pid, patchMap->index_a(pid), patchMap->index_b(pid), 
00741           patchMap->index_c(pid), atoms[pid].size());
00742       }
00743       delete [] order;
00744       delete [] breaks;
00745     } else
00746     {
00747     // split atoms into patches based on migration group and position
00748     int aid, pid=0;
00749     for(i=0; i < numAtoms; i++)
00750       {
00751       // Assign atoms to patches without splitting hydrogen groups.
00752       // We know that the hydrogenGroup array is sorted with group parents
00753       // listed first.  Thus, only change the pid if an atom is a group parent.
00754       HydrogenGroupID &h = molecule->hydrogenGroup[i];
00755       aid = h.atomID;
00756       FullAtom a;
00757       a.id = aid;
00758       a.position = positions[aid];
00759       a.velocity = velocities[aid];
00760       a.vdwType = molecule->atomvdwtype(aid);
00761       a.status = molecule->getAtoms()[aid].status;
00762       a.langevinParam = molecule->langevin_param(aid);
00763       a.hydrogenGroupSize = h.isGP ? h.atomsInGroup : 0;
00764       a.migrationGroupSize = h.isMP ? h.atomsInMigrationGroup : 0;
00765       if(params->rigidBonds != RIGID_NONE) {
00766         a.rigidBondLength = molecule->rigid_bond_length(aid);
00767       }else{
00768         a.rigidBondLength = 0.0;
00769       }
00770       if (h.isMP) {
00771         pid = patchMap->assignToPatch(positions[aid],lattice);
00772       } // else: don't change pid
00773       atoms[pid].add(a);
00774       }
00775     }
00776 
00777   delete [] positions;
00778   delete [] velocities;
00779 
00780   for(i=0; i < numPatches; i++)
00781   {
00782     ScaledPosition center(0.5*(patchMap->min_a(i)+patchMap->max_a(i)),
00783                           0.5*(patchMap->min_b(i)+patchMap->max_b(i)),
00784                           0.5*(patchMap->min_c(i)+patchMap->max_c(i)));
00785 
00786     int n = atoms[i].size();
00787     FullAtom *a = atoms[i].begin();
00788     int j;
00789 //Modifications for alchemical fep
00790     Bool alchOn = params->alchOn;
00791 //fepe
00792     Bool lesOn = params->lesOn;
00793   
00794     Bool pairInteractionOn = params->pairInteractionOn;
00795 
00796     Bool pressureProfileTypes = (params->pressureProfileAtomTypes > 1);
00797 
00798     Transform mother_transform;
00799     for(j=0; j < n; j++)
00800     {
00801       int aid = a[j].id;
00802 
00803       a[j].nonbondedGroupSize = 0;  // must be set based on coordinates
00804 
00805       a[j].atomFixed = molecule->is_atom_fixed(aid) ? 1 : 0;
00806       a[j].fixedPosition = a[j].position;
00807 
00808       if ( a[j].migrationGroupSize ) {
00809        if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
00810             Position pos = a[j].position;
00811             int mgs = a[j].migrationGroupSize;
00812             int c = 1;
00813             for ( int k=a[j].hydrogenGroupSize; k<mgs;
00814                                 k+=a[j+k].hydrogenGroupSize ) {
00815               pos += a[j+k].position;
00816               ++c;
00817             }
00818             pos *= 1./c;
00819             mother_transform = a[j].transform;  // should be 0,0,0
00820             pos = lattice.nearest(pos,center,&mother_transform);
00821             a[j].position = lattice.apply_transform(a[j].position,mother_transform);
00822             a[j].transform = mother_transform;
00823        } else {
00824         a[j].position = lattice.nearest(
00825                 a[j].position, center, &(a[j].transform));
00826         mother_transform = a[j].transform;
00827        }
00828       } else {
00829         a[j].position = lattice.apply_transform(a[j].position,mother_transform);
00830         a[j].transform = mother_transform;
00831       }
00832 
00833       a[j].mass = molecule->atommass(aid);
00834       // Using double precision division for reciprocal mass.
00835       a[j].recipMass = ( a[j].mass > 0 ? (1. / a[j].mass) : 0 );
00836       a[j].charge = molecule->atomcharge(aid);
00837 
00838 //Modifications for alchemical fep
00839       if ( alchOn || lesOn || pairInteractionOn || pressureProfileTypes) {
00840         a[j].partition = molecule->get_fep_type(aid);
00841       } 
00842       else {
00843         a[j].partition = 0;
00844       }
00845 //fepe
00846 
00847     }
00848 
00849     int size, allfixed, k;
00850     for(j=0; j < n; j+=size) {
00851       size = a[j].hydrogenGroupSize;
00852       if ( ! size ) {
00853         NAMD_bug("Mother atom with hydrogenGroupSize of 0!");
00854       }
00855       allfixed = 1;
00856       for ( k = 0; k < size; ++k ) {
00857         allfixed = ( allfixed && (a[j+k].atomFixed) );
00858       }
00859       for ( k = 0; k < size; ++k ) {
00860         a[j+k].groupFixed = allfixed ? 1 : 0;
00861       }
00862     }
00863 
00864     if ( params->outputPatchDetails ) {
00865       int patchId = i;
00866       int numAtomsInPatch = n;
00867       int numFixedAtomsInPatch = 0;
00868       int numAtomsInFixedGroupsInPatch = 0;
00869       for(j=0; j < n; j++) {
00870         numFixedAtomsInPatch += ( a[j].atomFixed ? 1 : 0 );
00871         numAtomsInFixedGroupsInPatch += ( a[j].groupFixed ? 1 : 0 );
00872       }
00873       iout << "PATCH_DETAILS:"
00874            << " patch " << patchId
00875            << " atoms " << numAtomsInPatch
00876            << " fixed_atoms " << numFixedAtomsInPatch
00877            << " fixed_groups " << numAtomsInFixedGroupsInPatch
00878            << "\n" << endi;
00879     }
00880   }
00881 
00882   return atoms;
00883 
00884 }
00885 
00886 //----------------------------------------------------------------------
00887 // This should only be called on node 0.
00888 //----------------------------------------------------------------------
00889 void WorkDistrib::createHomePatches(void)
00890 {
00891   int i;
00892   PatchMap *patchMap = PatchMap::Object();
00893   CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
00894   PatchMgr *patchMgr = pm.ckLocalBranch();
00895 
00896   int numPatches = patchMap->numPatches();
00897 
00898   FullAtomList *atoms = createAtomLists();
00899     
00900 #ifdef MEM_OPT_VERSION
00901 /*  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00902   Node *node = nd.ckLocalBranch();
00903   node->molecule->delEachAtomSigs();
00904   node->molecule->delMassChargeSpace();
00905 */
00906 #endif
00907 
00908   int maxAtoms = -1;
00909   int maxPatch = -1;
00910   for(i=0; i < numPatches; i++) {
00911     int numAtoms = atoms[i].size();
00912     if ( numAtoms > maxAtoms ) { maxAtoms = numAtoms; maxPatch = i; }
00913   }
00914   iout << iINFO << "LARGEST PATCH (" << maxPatch <<
00915         ") HAS " << maxAtoms << " ATOMS\n" << endi;
00916 
00917   for(i=0; i < numPatches; i++)
00918   {
00919     if ( ! ( i % 100 ) )
00920     {
00921       DebugM(3,"Created " << i << " patches so far.\n");
00922     }
00923 
00924     patchMgr->createHomePatch(i,atoms[i]);
00925   }
00926 
00927   delete [] atoms;
00928 }
00929 
00930 void WorkDistrib::distributeHomePatches() {
00931   // ref BOC
00932   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00933   Node *node = nd.ckLocalBranch();
00934   CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
00935   PatchMgr *patchMgr = pm.ckLocalBranch();
00936   // ref singleton
00937   PatchMap *patchMap = PatchMap::Object();
00938 
00939   // Move patches to the proper node
00940   for(int i=0;i < patchMap->numPatches(); i++)
00941   {
00942     if (patchMap->node(i) != node->myid() )
00943     {
00944       DebugM(3,"patchMgr->movePatch("
00945         << i << "," << patchMap->node(i) << ")\n");
00946       patchMgr->movePatch(i,patchMap->node(i));
00947     }
00948   }
00949   patchMgr->sendMovePatches();
00950 }
00951 
00952 void WorkDistrib::reinitAtoms(const char *basename) {
00953 
00954   PatchMap *patchMap = PatchMap::Object();
00955   CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
00956   PatchMgr *patchMgr = pm.ckLocalBranch();
00957 
00958   int numPatches = patchMap->numPatches();
00959 
00960   FullAtomList *atoms = createAtomLists(basename);
00961 
00962   for(int i=0; i < numPatches; i++) {
00963     patchMgr->sendAtoms(i,atoms[i]);
00964   }
00965 
00966   delete [] atoms;
00967 
00968 }
00969 
00970 
00971 //----------------------------------------------------------------------
00972 
00973 class PatchMapMsg : public CMessage_PatchMapMsg {
00974   public:
00975     char *patchMapData;
00976 };
00977 
00978 void WorkDistrib::sendPatchMap(void)
00979 {
00980   if ( CkNumPes() == 1 ) {
00981     patchMapArrived = true;
00982     return;
00983   }
00984 
00985   //Automatically enable spanning tree
00986   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00987   Node *node = nd.ckLocalBranch();
00988   SimParameters *params = node->simParameters;
00989   if( ( PatchMap::Object()->numPatches() <= CkNumPes()/4
00990 #ifdef NODEAWARE_PROXY_SPANNINGTREE 
00991       || CkNumPes() > CkNumNodes()
00992       ) && ( CkNumNodes() > 1
00993 #endif
00994     ) && params->isSendSpanningTreeUnset() )
00995     ProxyMgr::Object()->setSendSpanning();
00996 
00997 #ifdef NODEAWARE_PROXY_SPANNINGTREE 
00998   if ( CkNumPes() > CkNumNodes() && CkNumNodes() > 1
00999         && params->isRecvSpanningTreeUnset() )
01000     ProxyMgr::Object()->setRecvSpanning();
01001 #endif
01002 
01003   int size = PatchMap::Object()->packSize();
01004 
01005   PatchMapMsg *mapMsg = new (size, 0) PatchMapMsg;
01006 
01007   PatchMap::Object()->pack(mapMsg->patchMapData, size);
01008 
01009   CProxy_WorkDistrib workProxy(thisgroup);
01010   workProxy[0].savePatchMap(mapMsg);
01011 }
01012 
01013 // saveMaps() is called when the map message is received
01014 void WorkDistrib::savePatchMap(PatchMapMsg *msg)
01015 {
01016   // Use a resend to forward messages before processing.  Otherwise the
01017   // map distribution is slow on many CPUs.  We need to use a tree
01018   // rather than a broadcast because some implementations of broadcast
01019   // generate a copy of the message on the sender for each recipient.
01020   // This is because MPI doesn't allow re-use of an outstanding buffer.
01021 
01022   if ( CkMyRank() ) patchMapArrived = true;
01023 
01024   if ( patchMapArrived && CkMyPe() ) {
01025     PatchMap::Object()->unpack(msg->patchMapData);
01026 
01027     //Automatically enable spanning tree
01028     CProxy_Node nd(CkpvAccess(BOCclass_group).node);
01029     Node *node = nd.ckLocalBranch();
01030     SimParameters *params = node->simParameters;
01031     if( ( PatchMap::Object()->numPatches() <= CkNumPes()/4
01032 #ifdef NODEAWARE_PROXY_SPANNINGTREE 
01033         || CkNumPes() > CkNumNodes()
01034         ) && ( CkNumNodes() > 1
01035 #endif
01036       ) && params->isSendSpanningTreeUnset() )
01037       ProxyMgr::Object()->setSendSpanning();
01038 
01039 #ifdef NODEAWARE_PROXY_SPANNINGTREE 
01040     if ( CkNumPes() > CkNumNodes() && CkNumNodes() > 1
01041           && params->isRecvSpanningTreeUnset() )
01042       ProxyMgr::Object()->setRecvSpanning();
01043 #endif
01044   }
01045 
01046   if ( patchMapArrived ) {
01047     if ( CkMyRank() + 1 < CkNodeSize(CkMyNode()) ) {
01048       ((CProxy_WorkDistrib(thisgroup))[CkMyPe()+1]).savePatchMap(msg);
01049     } else {
01050       delete msg;
01051     }
01052     return;
01053   }
01054 
01055   patchMapArrived = true;
01056 
01057   int self = CkMyNode();
01058   int range_begin = 0;
01059   int range_end = CkNumNodes();
01060   while ( self != range_begin ) {
01061     ++range_begin;
01062     int split = range_begin + ( range_end - range_begin ) / 2;
01063     if ( self < split ) { range_end = split; }
01064     else { range_begin = split; }
01065   }
01066   int send_near = self + 1;
01067   int send_far = send_near + ( range_end - send_near ) / 2;
01068 
01069   int pids[3];
01070   int npid = 0;
01071   if ( send_far < range_end ) pids[npid++] = CkNodeFirst(send_far);
01072   if ( send_near < send_far ) pids[npid++] = CkNodeFirst(send_near);
01073   pids[npid++] = CkMyPe();  // always send the message to ourselves
01074   CProxy_WorkDistrib(thisgroup).savePatchMap(msg,npid,pids);
01075 }
01076 
01077 
01078 class ComputeMapMsg : public CMessage_ComputeMapMsg {
01079   public:
01080     int nComputes;
01081     ComputeMap::ComputeData *computeMapData;
01082 };
01083 
01084 void WorkDistrib::sendComputeMap(void)
01085 {
01086   if ( CkNumNodes() == 1 ) {
01087     computeMapArrived = true;
01088     ComputeMap::Object()->initPtrs();
01089     return;
01090   }
01091 
01092   int size = ComputeMap::Object()->numComputes();
01093 
01094   ComputeMapMsg *mapMsg = new (size, 0) ComputeMapMsg;
01095 
01096   mapMsg->nComputes = size;
01097   ComputeMap::Object()->pack(mapMsg->computeMapData);
01098 
01099   CProxy_WorkDistrib workProxy(thisgroup);
01100   workProxy[0].saveComputeMap(mapMsg);
01101 }
01102 
01103 // saveMaps() is called when the map message is received
01104 void WorkDistrib::saveComputeMap(ComputeMapMsg *msg)
01105 {
01106   // Use a resend to forward messages before processing.  Otherwise the
01107   // map distribution is slow on many CPUs.  We need to use a tree
01108   // rather than a broadcast because some implementations of broadcast
01109   // generate a copy of the message on the sender for each recipient.
01110   // This is because MPI doesn't allow re-use of an outstanding buffer.
01111 
01112   if ( CkMyRank() ) {
01113     NAMD_bug("WorkDistrib::saveComputeMap called on non-rank-zero pe");
01114   }
01115 
01116   if ( computeMapArrived && CkMyPe() ) {
01117     ComputeMap::Object()->unpack(msg->nComputes, msg->computeMapData);
01118   }
01119 
01120   if ( computeMapArrived ) {
01121     delete msg;
01122     ComputeMap::Object()->initPtrs();
01123     return;
01124   }
01125 
01126   computeMapArrived = true;
01127 
01128   int self = CkMyNode();
01129   int range_begin = 0;
01130   int range_end = CkNumNodes();
01131   while ( self != range_begin ) {
01132     ++range_begin;
01133     int split = range_begin + ( range_end - range_begin ) / 2;
01134     if ( self < split ) { range_end = split; }
01135     else { range_begin = split; }
01136   }
01137   int send_near = self + 1;
01138   int send_far = send_near + ( range_end - send_near ) / 2;
01139 
01140   int pids[3];
01141   int npid = 0;
01142   if ( send_far < range_end ) pids[npid++] = CkNodeFirst(send_far);
01143   if ( send_near < send_far ) pids[npid++] = CkNodeFirst(send_near);
01144   pids[npid++] = CkMyPe();  // always send the message to ourselves
01145   CProxy_WorkDistrib(thisgroup).saveComputeMap(msg,npid,pids);
01146 }
01147 
01148 
01149 //----------------------------------------------------------------------
01150 void WorkDistrib::patchMapInit(void)
01151 {
01152   PatchMap *patchMap = PatchMap::Object();
01153   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
01154   Node *node = nd.ckLocalBranch();
01155   SimParameters *params = node->simParameters;
01156   Lattice lattice = params->lattice;
01157 
01158   BigReal patchSize = params->patchDimension;
01159 
01160 #ifndef MEM_OPT_VERSION
01161   const int totalAtoms = node->pdb->num_atoms();
01162 #else
01163   const int totalAtoms = node->molecule->numAtoms;
01164 #endif
01165 
01166   ScaledPosition xmin, xmax;
01167 
01168   double maxNumPatches = 1.e9;  // need to adjust fractional values
01169   if ( params->minAtomsPerPatch > 0 )
01170     maxNumPatches = totalAtoms / params->minAtomsPerPatch;
01171 
01172   DebugM(3,"Mapping patches\n");
01173   if ( lattice.a_p() && lattice.b_p() && lattice.c_p() ) {
01174     xmin = 0.;  xmax = 0.;
01175   }
01176   else if ( params->FMAOn || params->MSMOn || params->FMMOn ) {
01177   // Need to use full box for FMA to match NAMD 1.X results.
01178 #if 0
01179     node->pdb->find_extremes(&(xmin.x),&(xmax.x),lattice.a_r());
01180     node->pdb->find_extremes(&(xmin.y),&(xmax.y),lattice.b_r());
01181     node->pdb->find_extremes(&(xmin.z),&(xmax.z),lattice.c_r());
01182 #endif
01183     node->pdb->find_extremes(lattice);
01184     node->pdb->get_extremes(xmin, xmax);
01185 #if 0
01186     printf("+++ center=%.4f %.4f %.4f\n",
01187         lattice.origin().x, lattice.origin().y, lattice.origin().z);
01188     printf("+++ xmin=%.4f  xmax=%.4f\n", xmin.x, xmax.x);
01189     printf("+++ ymin=%.4f  ymax=%.4f\n", xmin.y, xmax.y);
01190     printf("+++ zmin=%.4f  zmax=%.4f\n", xmin.z, xmax.z);
01191 #endif
01192   // Otherwise, this allows a small number of stray atoms.
01193   }
01194   else {
01195 #if 0
01196     node->pdb->find_extremes(&(xmin.x),&(xmax.x),lattice.a_r(),0.9);
01197     node->pdb->find_extremes(&(xmin.y),&(xmax.y),lattice.b_r(),0.9);
01198     node->pdb->find_extremes(&(xmin.z),&(xmax.z),lattice.c_r(),0.9);
01199 #endif
01200     node->pdb->find_extremes(lattice, 1.0);
01201     node->pdb->get_extremes(xmin, xmax);
01202     iout << iINFO << "ORIGINAL ATOMS MINMAX IS " << xmin << "  " << xmax << "\n" << endi;
01203     double frac = ( (double)totalAtoms - 10000. ) / (double)totalAtoms;
01204     if ( frac < 0.9 ) { frac = 0.9; }
01205     node->pdb->find_extremes(lattice, frac);
01206     node->pdb->get_extremes(xmin, xmax);
01207     iout << iINFO << "ADJUSTED ATOMS MINMAX IS " << xmin << "  " << xmax << "\n" << endi;
01208   }
01209 
01210 #if 0
01211   BigReal origin_shift;
01212   origin_shift = lattice.a_r() * lattice.origin();
01213   xmin.x -= origin_shift;
01214   xmax.x -= origin_shift;
01215   origin_shift = lattice.b_r() * lattice.origin();
01216   xmin.y -= origin_shift;
01217   xmax.y -= origin_shift;
01218   origin_shift = lattice.c_r() * lattice.origin();
01219   xmin.z -= origin_shift;
01220   xmax.z -= origin_shift;
01221 #endif
01222 
01223   // SimParameters default is -1 for unset
01224   int twoAwayX = params->twoAwayX;
01225   int twoAwayY = params->twoAwayY;
01226   int twoAwayZ = params->twoAwayZ;
01227 
01228   // SASA implementation is not compatible with twoAway patches
01229   if (params->LCPOOn && patchSize < 32.4) {
01230     if ( twoAwayX > 0 || twoAwayY > 0 || twoAwayZ > 0 ) {
01231       iout << iWARN << "Ignoring twoAway[XYZ] due to LCPO SASA implementation.\n" << endi;
01232     }
01233     twoAwayX = twoAwayY = twoAwayZ = 0;
01234   }
01235 
01236   // if you think you know what you're doing go right ahead
01237   if ( twoAwayX > 0 ) maxNumPatches = 1.e9;
01238   if ( twoAwayY > 0 ) maxNumPatches = 1.e9;
01239   if ( twoAwayZ > 0 ) maxNumPatches = 1.e9;
01240   if ( params->maxPatches > 0 ) {
01241       maxNumPatches = params->maxPatches;
01242       iout << iINFO << "LIMITING NUMBER OF PATCHES TO " <<
01243                                 maxNumPatches << "\n" << endi;
01244   }
01245 
01246   int numpes = CkNumPes();
01247   SimParameters *simparam = Node::Object()->simParameters;
01248   if(simparam->simulateInitialMapping) {
01249     numpes = simparam->simulatedPEs;
01250     delete [] patchMap->nPatchesOnNode;
01251     patchMap->nPatchesOnNode = new int[numpes];
01252     memset(patchMap->nPatchesOnNode, 0, numpes*sizeof(int));    
01253   }
01254 
01255 #if defined(NAMD_CUDA) || defined(NAMD_MIC)
01256   // for CUDA be sure there are more patches than pes
01257 
01258   int numPatches = patchMap->sizeGrid(
01259         xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
01260         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
01261   if ( numPatches < numpes && twoAwayX < 0 ) {
01262     twoAwayX = 1;
01263     numPatches = patchMap->sizeGrid(
01264         xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
01265         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
01266   }
01267   if ( numPatches < numpes && twoAwayY < 0 ) {
01268     twoAwayY = 1;
01269     numPatches = patchMap->sizeGrid(
01270         xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
01271         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
01272   }
01273   if ( numPatches < numpes && twoAwayZ < 0 ) {
01274     twoAwayZ = 1;
01275     numPatches = patchMap->sizeGrid(
01276         xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
01277         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
01278   }
01279   if ( numPatches < numpes ) {
01280     #if defined(NAMD_MIC)
01281     NAMD_die("MIC-enabled NAMD requires at least one patch per thread.");
01282     #endif
01283   }
01284   if ( numPatches % numpes && numPatches <= 1.4 * numpes ) {
01285     int exactFit = numPatches - numPatches % numpes;
01286     int newNumPatches = patchMap->sizeGrid(
01287         xmin,xmax,lattice,patchSize,exactFit,params->staticAtomAssignment,
01288         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
01289     if ( newNumPatches == exactFit ) {
01290       iout << iINFO << "REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" << endi;
01291       maxNumPatches = exactFit;
01292     }
01293   }
01294 
01295   patchMap->makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,
01296         params->staticAtomAssignment, params->replicaUniformPatchGrids, params->LCPOOn,
01297         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
01298 
01299 #else
01300 
01301   int availPes = numpes;
01302   if ( params->noPatchesOnZero && numpes > 1 ) {
01303       availPes -= 1;
01304       if(params->noPatchesOnOne && numpes > 2)
01305         availPes -= 1;
01306   }
01307 #ifdef MEM_OPT_VERSION
01308   if(params->noPatchesOnOutputPEs && numpes - params->numoutputprocs >2)
01309     {
01310       availPes -= params->numoutputprocs;
01311       if ( params->noPatchesOnZero && numpes > 1 && isOutputProcessor(0)){
01312         availPes++;
01313       }
01314       if ( params->noPatchesOnOne && numpes > 2 && isOutputProcessor(1)){
01315         availPes++;
01316       }
01317     }
01318 #endif
01319 
01320   int numPatches = patchMap->sizeGrid(
01321         xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
01322         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
01323   if ( ( numPatches > (0.3*availPes) || numPatches > maxNumPatches
01324        ) && twoAwayZ < 0 ) {
01325     twoAwayZ = 0;
01326     numPatches = patchMap->sizeGrid(
01327         xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
01328         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
01329   }
01330   if ( ( numPatches > (0.6*availPes) || numPatches > maxNumPatches
01331        ) && twoAwayY < 0 ) {
01332     twoAwayY = 0;
01333     numPatches = patchMap->sizeGrid(
01334         xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
01335         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
01336   }
01337   if ( ( numPatches > availPes || numPatches > maxNumPatches
01338        ) && twoAwayX < 0 ) {
01339     twoAwayX = 0;
01340     numPatches = patchMap->sizeGrid(
01341         xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
01342         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
01343   }
01344   if ( numPatches > availPes && numPatches <= (1.4*availPes) && availPes <= maxNumPatches ) {
01345     int newNumPatches = patchMap->sizeGrid(
01346         xmin,xmax,lattice,patchSize,availPes,params->staticAtomAssignment,
01347         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
01348     if ( newNumPatches <= availPes && numPatches <= (1.4*newNumPatches) ) {
01349       iout << iINFO << "REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" << endi;
01350       maxNumPatches = availPes;
01351     }
01352   }
01353 
01354   patchMap->makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,
01355         params->staticAtomAssignment, params->replicaUniformPatchGrids, params->LCPOOn,
01356         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
01357 
01358 #endif
01359 
01360 }
01361 
01362 
01363 //----------------------------------------------------------------------
01364 void WorkDistrib::assignNodeToPatch()
01365 {
01366   PatchMap *patchMap = PatchMap::Object();
01367   int nNodes = Node::Object()->numNodes();
01368   SimParameters *simparam = Node::Object()->simParameters;
01369   if(simparam->simulateInitialMapping) {
01370           nNodes = simparam->simulatedPEs;
01371   }
01372 
01373 #if (CMK_BLUEGENEP | CMK_BLUEGENEL) && USE_TOPOMAP 
01374   TopoManager tmgr;
01375   int numPes = tmgr.getDimNX() * tmgr.getDimNY() * tmgr.getDimNZ();
01376   if (numPes > patchMap->numPatches() && (assignPatchesTopoGridRecBisection() > 0)) {
01377     CkPrintf ("Blue Gene/L topology partitioner finished successfully \n");
01378   }
01379   else
01380 #endif
01381   assignPatchesSpaceFillingCurve();       
01382   
01383   int *nAtoms = new int[nNodes];
01384   int numAtoms=0;
01385   int i;
01386   for(i=0; i < nNodes; i++)
01387     nAtoms[i] = 0;
01388 
01389   for(i=0; i < patchMap->numPatches(); i++)
01390   {
01391     //    iout << iINFO << "Patch " << i << " has " 
01392     //   << patchMap->patch(i)->getNumAtoms() << " atoms and "
01393     //   << patchMap->patch(i)->getNumAtoms() * 
01394     //            patchMap->patch(i)->getNumAtoms() 
01395     //   << " pairs.\n" << endi;         
01396 #ifdef MEM_OPT_VERSION
01397       numAtoms += patchMap->numAtoms(i);
01398       nAtoms[patchMap->node(i)] += patchMap->numAtoms(i);         
01399 #else
01400     if (patchMap->patch(i)) {
01401       numAtoms += patchMap->patch(i)->getNumAtoms();
01402       nAtoms[patchMap->node(i)] += patchMap->patch(i)->getNumAtoms();     
01403     }
01404 #endif
01405   }
01406 
01407   if ( numAtoms != Node::Object()->molecule->numAtoms ) {
01408     for(i=0; i < nNodes; i++)
01409       iout << iINFO << nAtoms[i] << " atoms assigned to node " << i << "\n" << endi;
01410     iout << iINFO << "Assigned " << numAtoms << " atoms but expected " << Node::Object()->molecule->numAtoms << "\n" << endi;
01411     NAMD_die("Incorrect atom count in WorkDistrib::assignNodeToPatch\n");
01412   }
01413 
01414   delete [] nAtoms;
01415  
01416   //  PatchMap::Object()->printPatchMap();
01417 }
01418 
01419 //----------------------------------------------------------------------
01420 // void WorkDistrib::assignPatchesSlices() 
01421 // {
01422 //   int pid; 
01423 //   int assignedNode = 0;
01424 //   PatchMap *patchMap = PatchMap::Object();
01425 //   Node *node = CLocalBranch(Node, CkpvAccess(BOCclass_group).node);
01426 
01427 //   int *numAtoms = new int[node->numNodes()];
01428 //   for (int i=0; i<node->numNodes(); i++) {
01429 //     numAtoms[i] = 0;
01430 //   }
01431 
01432 //   // Assign patch to node with least atoms assigned.
01433 //   for(pid=0; pid < patchMap->numPatches(); pid++) {
01434 //     assignedNode = 0;
01435 //     for (i=1; i < node->numNodes(); i++) {
01436 //       if (numAtoms[i] < numAtoms[assignedNode]) assignedNode = i;
01437 //     }
01438 //     patchMap->assignNode(pid, assignedNode);
01439 //     numAtoms[assignedNode] += patchMap->patch(pid)->getNumAtoms();
01440 
01441 //     /*
01442 //     iout << iINFO << "Patch (" << pid << ") has " 
01443 //       << patchMap->patch(pid)->getNumAtoms() 
01444 //       << " atoms:  Assigned to Node(" << assignedNode << ")\n" 
01445 //       << endi;
01446 //     */
01447 //   }
01448 
01449 //   delete[] numAtoms;
01450 // }
01451 
01452 //----------------------------------------------------------------------
01453 void WorkDistrib::assignPatchesToLowestLoadNode() 
01454 {
01455   int pid; 
01456   int assignedNode = 0;
01457   PatchMap *patchMap = PatchMap::Object();
01458   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
01459   Node *node = nd.ckLocalBranch();
01460   SimParameters *simParams = node->simParameters;
01461   int ncpus = node->numNodes();
01462   if(simParams->simulateInitialMapping) {
01463           ncpus = simParams->simulatedPEs;
01464   }
01465 
01466   int *load = new int[ncpus];
01467   int *assignedNodes = new int[patchMap->numPatches()];
01468   for (int i=0; i<ncpus; i++) {
01469     load[i] = 0;
01470   }
01471   CkPrintf("assignPatchesToLowestLoadNode\n");
01472   int defaultNode = 0;
01473   if ( simParams->noPatchesOnZero && ncpus > 1 ){
01474     defaultNode = 1;
01475     if( simParams->noPatchesOnOne && ncpus > 2)
01476       defaultNode = 2;
01477   }
01478   // Assign patch to node with least atoms assigned.
01479   for(pid=0; pid < patchMap->numPatches(); pid++) {
01480     assignedNode = defaultNode;
01481     for (int i=assignedNode + 1; i < ncpus; i++) {
01482       if (load[i] < load[assignedNode]) assignedNode = i;
01483     }
01484     assignedNodes[pid] = assignedNode;
01485 #ifdef MEM_OPT_VERSION
01486     load[assignedNode] += patchMap->numAtoms(pid) + 1;
01487 #else
01488     load[assignedNode] += patchMap->patch(pid)->getNumAtoms() + 1;
01489 #endif
01490   }
01491 
01492   delete[] load;
01493   sortNodesAndAssign(assignedNodes);
01494   delete[] assignedNodes;
01495 }
01496 
01497 //----------------------------------------------------------------------
01498 void WorkDistrib::assignPatchesBitReversal() 
01499 {
01500   int pid; 
01501   PatchMap *patchMap = PatchMap::Object();
01502   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
01503   Node *node = nd.ckLocalBranch();
01504   SimParameters *simparam = node->simParameters;
01505 
01506   int ncpus = node->numNodes();
01507   if(simparam->simulateInitialMapping) {
01508           ncpus = simparam->simulatedPEs;
01509   }
01510   int npatches = patchMap->numPatches();
01511   if ( ncpus <= npatches )
01512     NAMD_bug("WorkDistrib::assignPatchesBitReversal called improperly");
01513 
01514   SortableResizeArray<int> seq(ncpus);
01515   // avoid using node 0 (reverse of 0 is 0 so start at 1)
01516   for ( int i = 1; i < ncpus; ++i ) {
01517     seq[i-1] = peDiffuseOrdering[i];
01518   }
01519 
01520   // extract and sort patch locations
01521   sortNodesAndAssign(seq.begin());
01522   if ( ncpus > 2*npatches ) sortNodesAndAssign(seq.begin()+npatches, 1);
01523 }
01524 
01525 //----------------------------------------------------------------------
01526 struct nodesort {
01527   int node;
01528   int a_total;
01529   int b_total;
01530   int c_total;
01531   int npatches;
01532   nodesort() : node(-1),a_total(0),b_total(0),c_total(0),npatches(0) { ; }
01533   int operator==(const nodesort &o) const {
01534     float a1 = ((float)a_total)/((float)npatches);
01535     float a2 = ((float)o.a_total)/((float)o.npatches);
01536     float b1 = ((float)b_total)/((float)npatches);
01537     float b2 = ((float)o.b_total)/((float)o.npatches);
01538     float c1 = ((float)c_total)/((float)npatches);
01539     float c2 = ((float)o.c_total)/((float)o.npatches);
01540     return ((a1 == a2) && (b1 == b2) && (c1 == c2));
01541   }
01542   int operator<(const nodesort &o) const {
01543     float a1 = ((float)a_total)/((float)npatches);
01544     float a2 = ((float)o.a_total)/((float)o.npatches);
01545     float b1 = ((float)b_total)/((float)npatches);
01546     float b2 = ((float)o.b_total)/((float)o.npatches);
01547     float c1 = ((float)c_total)/((float)npatches);
01548     float c2 = ((float)o.c_total)/((float)o.npatches);
01549     return ( (a1 < a2) || ((a1 == a2) && (b1 < b2)) ||
01550                 ((a1 == a2) && (b1 == b2) && (c1 < c2)) );
01551   }
01552 };
01553 
01554 void WorkDistrib::sortNodesAndAssign(int *assignedNode, int baseNodes) {
01555   // if baseNodes is zero (default) then set both nodes and basenodes
01556   // if baseNodes is nonzero then this is a second call to set basenodes only
01557   int i, pid; 
01558   PatchMap *patchMap = PatchMap::Object();
01559   int npatches = patchMap->numPatches();
01560   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
01561   Node *node = nd.ckLocalBranch();
01562   int nnodes = node->numNodes();
01563   SimParameters *simparam = node->simParameters;
01564   if(simparam->simulateInitialMapping) {
01565           nnodes = simparam->simulatedPEs;
01566   }
01567 
01568   ResizeArray<nodesort> allnodes(nnodes);
01569   for ( i=0; i < nnodes; ++i ) {
01570     allnodes[i].node = i;
01571   }
01572   for ( pid=0; pid<npatches; ++pid ) {
01573     // iout << pid << " " << assignedNode[pid] << "\n" << endi;
01574     allnodes[assignedNode[pid]].npatches++;
01575     allnodes[assignedNode[pid]].a_total += patchMap->index_a(pid);
01576     allnodes[assignedNode[pid]].b_total += patchMap->index_b(pid);
01577     allnodes[assignedNode[pid]].c_total += patchMap->index_c(pid);
01578   }
01579   SortableResizeArray<nodesort> usednodes(nnodes);
01580   usednodes.resize(0);
01581   for ( i=0; i < nnodes; ++i ) {
01582     if ( allnodes[i].npatches ) usednodes.add(allnodes[i]);
01583   }
01584   usednodes.sort();
01585   int i2 = 0;
01586   for ( i=0; i < nnodes; ++i ) {
01587     int pe = peCompactOrdering[i];
01588     if ( allnodes[pe].npatches ) allnodes[usednodes[i2++].node].node = pe;
01589   }
01590 
01591   for ( pid=0; pid<npatches; ++pid ) {
01592     // iout << pid << " " <<  allnodes[assignedNode[pid]].node << "\n" << endi;
01593     if ( ! baseNodes ) {
01594       patchMap->assignNode(pid, allnodes[assignedNode[pid]].node);      
01595     }
01596     patchMap->assignBaseNode(pid, allnodes[assignedNode[pid]].node);    
01597   }
01598 }
01599 
01600 //----------------------------------------------------------------------
01601 void WorkDistrib::assignPatchesRoundRobin() 
01602 {
01603   int pid; 
01604   PatchMap *patchMap = PatchMap::Object();
01605   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
01606   Node *node = nd.ckLocalBranch();
01607   SimParameters *simparam = node->simParameters;
01608   int ncpus = node->numNodes();
01609   if(simparam->simulateInitialMapping) {
01610           ncpus = simparam->simulatedPEs;
01611   }
01612   int *assignedNode = new int[patchMap->numPatches()];
01613 
01614   for(pid=0; pid < patchMap->numPatches(); pid++) {
01615     assignedNode[pid] = pid % ncpus;
01616   }
01617 
01618   sortNodesAndAssign(assignedNode);
01619   delete [] assignedNode;
01620 }
01621 
01622 //----------------------------------------------------------------------
01623 void WorkDistrib::assignPatchesRecursiveBisection() 
01624 {
01625   PatchMap *patchMap = PatchMap::Object();
01626   int *assignedNode = new int[patchMap->numPatches()];
01627   SimParameters *simParams = Node::Object()->simParameters;
01628   int numNodes = Node::Object()->numNodes();
01629   if(simParams->simulateInitialMapping) {
01630           numNodes = simParams->simulatedPEs;
01631   }
01632   
01633   int usedNodes = numNodes;
01634   int unusedNodes = 0;
01635   CkPrintf("assignPatchesRecursiveBisection\n");
01636   if ( simParams->noPatchesOnZero && numNodes > 1 ){
01637     usedNodes -= 1;
01638     if(simParams->noPatchesOnOne && numNodes > 2)
01639       usedNodes -= 1;
01640   }
01641   unusedNodes = numNodes - usedNodes;
01642   RecBisection recBisec(usedNodes,PatchMap::Object());
01643   if ( recBisec.partition(assignedNode) ) {
01644     if ( unusedNodes !=0 ) {
01645       for ( int i=0; i<patchMap->numPatches(); ++i ) {
01646         assignedNode[i] += unusedNodes;
01647       }
01648     }
01649     sortNodesAndAssign(assignedNode);
01650     delete [] assignedNode;
01651   } else {
01652     //free the array here since a same array will be allocated
01653     //in assignPatchesToLowestLoadNode function, thus reducting
01654     //temporary memory usage
01655     delete [] assignedNode; 
01656     
01657     iout << iWARN 
01658          << "WorkDistrib: Recursive bisection fails, "
01659          << "invoking space-filling curve algorithm\n";
01660     assignPatchesSpaceFillingCurve();
01661   }
01662 }
01663 
01664 // class to re-order dimensions in decreasing size
01665 struct TopoManagerWrapper {
01666   TopoManager tmgr;
01667   int a_dim, b_dim, c_dim, d_dim, e_dim;
01668   int a_rot, b_rot, c_rot, d_rot, e_rot;
01669   int a_mod, b_mod, c_mod, d_mod, e_mod;
01670   int fixpe(int pe) {  // compensate for lame fallback topology information
01671     return CmiGetFirstPeOnPhysicalNode(CmiPhysicalNodeID(pe));
01672   }
01673   TopoManagerWrapper() {
01674 #if CMK_BLUEGENEQ
01675     int na=tmgr.getDimNA();
01676     int nb=tmgr.getDimNB();
01677     int nc=tmgr.getDimNC();
01678     int nd=tmgr.getDimND();
01679     int ne=tmgr.getDimNE();
01680 #else
01681     int na=tmgr.getDimNX();
01682     int nb=tmgr.getDimNY();
01683     int nc=tmgr.getDimNZ();
01684     int nd=1;
01685     int ne=1;
01686 #endif
01687     ResizeArray<int> a_flags(na);
01688     ResizeArray<int> b_flags(nb);
01689     ResizeArray<int> c_flags(nc);
01690     ResizeArray<int> d_flags(nd);
01691     ResizeArray<int> e_flags(ne);
01692     for ( int i=0; i<na; ++i ) { a_flags[i] = 0; }
01693     for ( int i=0; i<nb; ++i ) { b_flags[i] = 0; }
01694     for ( int i=0; i<nc; ++i ) { c_flags[i] = 0; }
01695     for ( int i=0; i<nd; ++i ) { d_flags[i] = 0; }
01696     for ( int i=0; i<ne; ++i ) { e_flags[i] = 0; }
01697     int npes = CkNumPes();
01698     for ( int pe=0; pe<npes; ++pe ) {
01699       int a,b,c,d,e,t;
01700 #if CMK_BLUEGENEQ
01701       tmgr.rankToCoordinates(fixpe(pe),a,b,c,d,e,t);
01702 #else
01703       tmgr.rankToCoordinates(fixpe(pe),a,b,c,t);
01704       d=0; e=0;
01705 #endif
01706       if ( a < 0 || a >= na ) NAMD_bug("inconsistent torus topology!");
01707       if ( b < 0 || b >= nb ) NAMD_bug("inconsistent torus topology!");
01708       if ( c < 0 || c >= nc ) NAMD_bug("inconsistent torus topology!");
01709       if ( d < 0 || d >= nd ) NAMD_bug("inconsistent torus topology!");
01710       if ( e < 0 || e >= ne ) NAMD_bug("inconsistent torus topology!");
01711       a_flags[a] = 1;
01712       b_flags[b] = 1;
01713       c_flags[c] = 1;
01714       d_flags[d] = 1;
01715       e_flags[e] = 1;
01716     }
01717     iout << iINFO << "TORUS A SIZE " << na << " USING";
01718     for ( int i=0; i<na; ++i ) { if ( a_flags[i] ) iout << " " << i; }
01719     iout << "\n" << endi;
01720     iout << iINFO << "TORUS B SIZE " << nb << " USING";
01721     for ( int i=0; i<nb; ++i ) { if ( b_flags[i] ) iout << " " << i; }
01722     iout << "\n" << endi;
01723     iout << iINFO << "TORUS C SIZE " << nc << " USING";
01724     for ( int i=0; i<nc; ++i ) { if ( c_flags[i] ) iout << " " << i; }
01725     iout << "\n" << endi;
01726 #if CMK_BLUEGENEQ
01727     iout << iINFO << "TORUS D SIZE " << nd << " USING";
01728     for ( int i=0; i<nd; ++i ) { if ( d_flags[i] ) iout << " " << i; }
01729     iout << "\n" << endi;
01730     iout << iINFO << "TORUS E SIZE " << ne << " USING";
01731     for ( int i=0; i<ne; ++i ) { if ( e_flags[i] ) iout << " " << i; }
01732     iout << "\n" << endi;
01733 #endif
01734     // find most compact representation of our subset
01735     a_rot = b_rot = c_rot = d_rot = e_rot = 0;
01736     a_mod = na; b_mod = nb; c_mod = nc; d_mod = nd; e_mod = ne;
01737 #if CMK_BLUEGENEQ
01738     if ( tmgr.absA(na) == 0 ) // torus
01739 #else
01740     if ( tmgr.absX(na) == 0 ) // torus
01741 #endif
01742       for ( int i=0, gaplen=0, gapstart=0; i<2*na; ++i ) {
01743         if ( a_flags[i%na] ) gapstart = i+1;
01744         else if ( i - gapstart >= gaplen ) {
01745           a_rot = 2*na-i-1; gaplen = i - gapstart;
01746         }
01747       }
01748 #if CMK_BLUEGENEQ
01749     if ( tmgr.absB(nb) == 0 ) // torus
01750 #else
01751     if ( tmgr.absY(nb) == 0 ) // torus
01752 #endif
01753       for ( int i=0, gaplen=0, gapstart=0; i<2*nb; ++i ) {
01754         if ( b_flags[i%nb] ) gapstart = i+1;
01755         else if ( i - gapstart >= gaplen ) {
01756           b_rot = 2*nb-i-1; gaplen = i - gapstart;
01757         }
01758       }
01759 #if CMK_BLUEGENEQ
01760     if ( tmgr.absC(nc) == 0 ) // torus
01761 #else
01762     if ( tmgr.absZ(nc) == 0 ) // torus
01763 #endif
01764       for ( int i=0, gaplen=0, gapstart=0; i<2*nc; ++i ) {
01765         if ( c_flags[i%nc] ) gapstart = i+1;
01766         else if ( i - gapstart >= gaplen ) {
01767           c_rot = 2*nc-i-1; gaplen = i - gapstart;
01768         }
01769       }
01770 #if CMK_BLUEGENEQ
01771     if ( tmgr.absD(nd) == 0 ) // torus
01772       for ( int i=0, gaplen=0, gapstart=0; i<2*nd; ++i ) {
01773         if ( d_flags[i%nd] ) gapstart = i+1;
01774         else if ( i - gapstart >= gaplen ) {
01775           d_rot = 2*nd-i-1; gaplen = i - gapstart;
01776         }
01777       }
01778     if ( tmgr.absE(ne) == 0 ) // torus
01779       for ( int i=0, gaplen=0, gapstart=0; i<2*ne; ++i ) {
01780         if ( e_flags[i%ne] ) gapstart = i+1;
01781         else if ( i - gapstart >= gaplen ) {
01782           e_rot = 2*ne-i-1; gaplen = i - gapstart;
01783         }
01784       }
01785 #endif
01786     // order dimensions by length
01787     int a_min=na, a_max=-1;
01788     int b_min=nb, b_max=-1;
01789     int c_min=nc, c_max=-1;
01790     int d_min=nd, d_max=-1;
01791     int e_min=ne, e_max=-1;
01792     for ( int pe=0; pe<npes; ++pe ) {
01793       int a,b,c,d,e,t;
01794 #if CMK_BLUEGENEQ
01795       tmgr.rankToCoordinates(fixpe(pe),a,b,c,d,e,t);
01796 #else
01797       tmgr.rankToCoordinates(fixpe(pe),a,b,c,t);
01798       d=0; e=0;
01799 #endif
01800       a = (a+a_rot)%a_mod;
01801       b = (b+b_rot)%b_mod;
01802       c = (c+c_rot)%c_mod;
01803       d = (d+d_rot)%d_mod;
01804       e = (e+e_rot)%e_mod;
01805       if ( a < a_min ) a_min = a;
01806       if ( b < b_min ) b_min = b;
01807       if ( c < c_min ) c_min = c;
01808       if ( d < d_min ) d_min = d;
01809       if ( e < e_min ) e_min = e;
01810       if ( a > a_max ) a_max = a;
01811       if ( b > b_max ) b_max = b;
01812       if ( c > c_max ) c_max = c;
01813       if ( d > d_max ) d_max = d;
01814       if ( e > e_max ) e_max = e;
01815     }
01816     int a_len = a_max - a_min + 1;
01817     int b_len = b_max - b_min + 1;
01818     int c_len = c_max - c_min + 1;
01819     int d_len = d_max - d_min + 1;
01820     int e_len = e_max - e_min + 1;
01821     int lensort[5];
01822     lensort[0] = (a_len << 3) + 0;
01823     lensort[1] = (b_len << 3) + 1;
01824     lensort[2] = (c_len << 3) + 2;
01825     lensort[3] = (d_len << 3) + 3;
01826     lensort[4] = (e_len << 3) + 4;
01827     // CkPrintf("TopoManagerWrapper lensort before %d %d %d %d %d\n", lensort[0] & 7, lensort[1] & 7, lensort[2] & 7, lensort[3] & 7, lensort[4] & 7);
01828     std::sort(lensort, lensort+5);
01829     // CkPrintf("TopoManagerWrapper lensort after %d %d %d %d %d\n", lensort[0] & 7, lensort[1] & 7, lensort[2] & 7, lensort[3] & 7, lensort[4] & 7);
01830     for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 0 ) a_dim = 4-i; }
01831     for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 1 ) b_dim = 4-i; }
01832     for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 2 ) c_dim = 4-i; }
01833     for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 3 ) d_dim = 4-i; }
01834     for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 4 ) e_dim = 4-i; }
01835 #if 0
01836     if ( a_len >= b_len && a_len >= c_len ) {
01837       a_dim = 0;
01838       if ( b_len >= c_len ) {
01839         b_dim = 1; c_dim = 2;
01840       } else {
01841         b_dim = 2; c_dim = 1;
01842       }
01843     } else if ( b_len >= a_len && b_len >= c_len ) {
01844       b_dim = 0;
01845       if ( a_len >= c_len ) {
01846         a_dim = 1; c_dim = 2;
01847       } else {
01848         a_dim = 2; c_dim = 1;
01849       }
01850     } else { // c is longest
01851       c_dim = 0;
01852       if ( a_len >= b_len ) {
01853         a_dim = 1; b_dim = 2;
01854       } else {
01855         a_dim = 2; b_dim = 1;
01856       }
01857     }
01858 #endif
01859     iout << iINFO << "TORUS MINIMAL MESH SIZE IS " << a_len << " BY " << b_len << " BY " << c_len
01860 #if CMK_BLUEGENEQ
01861     << " BY " << d_len << " BY " << e_len
01862 #endif
01863     << "\n" << endi;
01864     // CkPrintf("TopoManagerWrapper dims %d %d %d %d %d\n", a_dim, b_dim, c_dim, d_dim, e_dim);
01865   }
01866   void coords(int pe, int *crds) {
01867     int a,b,c,d,e,t;
01868 #if CMK_BLUEGENEQ
01869     tmgr.rankToCoordinates(fixpe(pe),a,b,c,d,e,t);
01870 #else
01871     tmgr.rankToCoordinates(fixpe(pe),a,b,c,t);
01872     d=0; e=0;
01873 #endif
01874     if ( a_dim < 3 ) crds[a_dim] = (a+a_rot)%a_mod;
01875     if ( b_dim < 3 ) crds[b_dim] = (b+b_rot)%b_mod;
01876     if ( c_dim < 3 ) crds[c_dim] = (c+c_rot)%c_mod;
01877     if ( d_dim < 3 ) crds[d_dim] = (d+d_rot)%d_mod;
01878     if ( e_dim < 3 ) crds[e_dim] = (e+e_rot)%e_mod;
01879   }
01880   int coord(int pe, int dim) {
01881     int crds[3];
01882     coords(pe,crds);
01883     return crds[dim];
01884   }
01885   struct pe_sortop_topo {
01886     TopoManagerWrapper &tmgr;
01887     const int *sortdims;
01888     pe_sortop_topo(TopoManagerWrapper &t, int *d) : tmgr(t), sortdims(d) {}
01889     bool operator() (int pe1, int pe2) const {
01890       int crds1[3], crds2[3];
01891       tmgr.coords(pe1,crds1);
01892       tmgr.coords(pe2,crds2);
01893       for ( int i=0; i<3; ++i ) {
01894         int d = sortdims[i];
01895         if ( crds1[d] != crds2[d] ) return ( crds1[d] < crds2[d] );
01896       }
01897       const int *index = WorkDistrib::peCompactOrderingIndex;
01898       return ( index[pe1] < index[pe2] );
01899     }
01900   };
01901   int* sortAndSplit(int *node_begin, int *node_end, int splitdim) {
01902     if ( node_begin == node_end ) return node_begin;
01903     int tmins[3], tmaxs[3], tlens[3], sortdims[3];
01904     coords(*node_begin, tmins);
01905     coords(*node_begin, tmaxs);
01906     for ( int *peitr = node_begin; peitr != node_end; ++peitr ) {
01907       int tvals[3];
01908       coords(*peitr, tvals);
01909       for ( int i=0; i<3; ++i ) {
01910         if ( tvals[i] < tmins[i] ) tmins[i] = tvals[i];
01911         if ( tvals[i] > tmaxs[i] ) tmaxs[i] = tvals[i];
01912       }
01913     }
01914     for ( int i=0; i<3; ++i ) {
01915       tlens[i] = tmaxs[i] - tmins[i];
01916     }
01917     sortdims[0] = splitdim;
01918     for ( int i=0, j=0; i<3; ++i ) {
01919       if ( i != splitdim ) sortdims[++j] = i;
01920     }
01921     if ( tlens[sortdims[1]] < tlens[sortdims[2]] ) {
01922       int tmp = sortdims[1];
01923       sortdims[1] = sortdims[2];
01924       sortdims[2] = tmp;
01925     }
01926     std::sort(node_begin,node_end,pe_sortop_topo(*this,sortdims));
01927     int *nodes = node_begin;
01928     int nnodes = node_end - node_begin;
01929     int i_split = 0;
01930 #if 0
01931     int c_split = coord(nodes[0],splitdim);
01932     for ( int i=0; i<nnodes; ++i ) {
01933       if ( coord(nodes[i],splitdim) != c_split ) {
01934         int mid = (nnodes+1)/2;
01935         if ( abs(i-mid) < abs(i_split-mid) ) {
01936           i_split = i;
01937           c_split = coord(i,splitdim);
01938         }
01939         else break;
01940       }
01941     }
01942 #endif
01943     for ( int i=0; i<nnodes; ++i ) {
01944       if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
01945         int mid = (nnodes+1)/2;
01946         if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
01947         else break;
01948       }
01949     }
01950     return ( node_begin + i_split );
01951   }
01952 };
01953 
01954 struct patch_sortop_curve_a {
01955   PatchMap *pmap;
01956   patch_sortop_curve_a(PatchMap *m) : pmap(m) {}
01957   inline bool operator() (int p1, int p2) const {
01958     int a1 = pmap->index_a(p1);
01959     int a2 = pmap->index_a(p2);
01960     if ( a1 < a2 ) return true;
01961     if ( a1 > a2 ) return false;
01962     int dir = ( (a1 & 1) ? -1 : 1 );
01963     int b1 = pmap->index_b(p1);
01964     int b2 = pmap->index_b(p2);
01965     if ( b1 * dir < b2 * dir ) return true;
01966     if ( b1 * dir > b2 * dir ) return false;
01967     dir *= ( (b1 & 1) ? -1 : 1 );
01968     int c1 = pmap->index_c(p1);
01969     int c2 = pmap->index_c(p2);
01970     if ( c1 * dir < c2 * dir ) return true;
01971     return false;
01972   }
01973 };
01974 
01975 struct patch_sortop_curve_b {
01976   PatchMap *pmap;
01977   patch_sortop_curve_b(PatchMap *m) : pmap(m) {}
01978   inline bool operator() (int p1, int p2) const {
01979     int a1 = pmap->index_b(p1);
01980     int a2 = pmap->index_b(p2);
01981     if ( a1 < a2 ) return true;
01982     if ( a1 > a2 ) return false;
01983     int dir = ( (a1 & 1) ? -1 : 1 );
01984     int b1 = pmap->index_a(p1);
01985     int b2 = pmap->index_a(p2);
01986     if ( b1 * dir < b2 * dir ) return true;
01987     if ( b1 * dir > b2 * dir ) return false;
01988     dir *= ( (b1 & 1) ? -1 : 1 );
01989     int c1 = pmap->index_c(p1);
01990     int c2 = pmap->index_c(p2);
01991     if ( c1 * dir < c2 * dir ) return true;
01992     return false;
01993   }
01994 };
01995 
01996 struct patch_sortop_curve_c {
01997   PatchMap *pmap;
01998   patch_sortop_curve_c(PatchMap *m) : pmap(m) {}
01999   inline bool operator() (int p1, int p2) const {
02000     int a1 = pmap->index_c(p1);
02001     int a2 = pmap->index_c(p2);
02002     if ( a1 < a2 ) return true;
02003     if ( a1 > a2 ) return false;
02004     int dir = ( (a1 & 1) ? -1 : 1 );
02005     int b1 = pmap->index_a(p1);
02006     int b2 = pmap->index_a(p2);
02007     if ( b1 * dir < b2 * dir ) return true;
02008     if ( b1 * dir > b2 * dir ) return false;
02009     dir *= ( (b1 & 1) ? -1 : 1 );
02010     int c1 = pmap->index_b(p1);
02011     int c2 = pmap->index_b(p2);
02012     if ( c1 * dir < c2 * dir ) return true;
02013     return false;
02014   }
02015 };
02016 
02017 static void recursive_bisect_with_curve(
02018   int *patch_begin, int *patch_end,
02019   int *node_begin, int *node_end,
02020   double *patchLoads,
02021   double *sortedLoads,
02022   int *assignedNode,
02023   TopoManagerWrapper &tmgr
02024   ) {
02025 
02026   SimParameters *simParams = Node::Object()->simParameters;
02027   PatchMap *patchMap = PatchMap::Object();
02028   int *patches = patch_begin;
02029   int npatches = patch_end - patch_begin;
02030   int *nodes = node_begin;
02031   int nnodes = node_end - node_begin;
02032 
02033   // assign patch loads
02034   double totalRawLoad = 0;
02035   for ( int i=0; i<npatches; ++i ) {
02036     int pid=patches[i];
02037 #ifdef MEM_OPT_VERSION
02038     double load = patchMap->numAtoms(pid) + 10;      
02039 #else
02040     double load = patchMap->patch(pid)->getNumAtoms() + 10;
02041 #endif
02042     patchLoads[pid] = load;
02043     sortedLoads[i] = load;
02044     totalRawLoad += load;
02045   }
02046   std::sort(sortedLoads,sortedLoads+npatches);
02047 
02048   // limit maxPatchLoad to adjusted average load per node
02049   double sumLoad = 0;
02050   double maxPatchLoad = 1;
02051   for ( int i=0; i<npatches; ++i ) {
02052     double load = sortedLoads[i];
02053     double total = sumLoad + (npatches-i) * load;
02054     if ( nnodes * load > total ) break;
02055     sumLoad += load;
02056     maxPatchLoad = load;
02057   }
02058   double totalLoad = 0;
02059   for ( int i=0; i<npatches; ++i ) {
02060     int pid=patches[i];
02061     if ( patchLoads[pid] > maxPatchLoad ) patchLoads[pid] = maxPatchLoad;
02062     totalLoad += patchLoads[pid];
02063   }
02064   if ( nnodes * maxPatchLoad > totalLoad )
02065     NAMD_bug("algorithm failure in WorkDistrib recursive_bisect_with_curve()");
02066 
02067   int a_len, b_len, c_len;
02068   int a_min, b_min, c_min;
02069   { // find dimensions
02070     a_min = patchMap->index_a(patches[0]);
02071     b_min = patchMap->index_b(patches[0]);
02072     c_min = patchMap->index_c(patches[0]);
02073     int a_max = a_min;
02074     int b_max = b_min;
02075     int c_max = c_min;
02076     for ( int i=1; i<npatches; ++i ) {
02077       int a = patchMap->index_a(patches[i]);
02078       int b = patchMap->index_b(patches[i]);
02079       int c = patchMap->index_c(patches[i]);
02080       if ( a < a_min ) a_min = a;
02081       if ( b < b_min ) b_min = b;
02082       if ( c < c_min ) c_min = c;
02083       if ( a > a_max ) a_max = a;
02084       if ( b > b_max ) b_max = b;
02085       if ( c > c_max ) c_max = c;
02086     }
02087     a_len = a_max - a_min;
02088     b_len = b_max - b_min;
02089     c_len = c_max - c_min;
02090   }
02091 
02092   int *node_split = node_begin;
02093 
02094   if ( simParams->disableTopology ) ; else
02095   if ( a_len >= b_len && a_len >= c_len ) {
02096     node_split = tmgr.sortAndSplit(node_begin,node_end,0);
02097   } else if ( b_len >= a_len && b_len >= c_len ) {
02098     node_split = tmgr.sortAndSplit(node_begin,node_end,1);
02099   } else if ( c_len >= a_len && c_len >= b_len ) {
02100     node_split = tmgr.sortAndSplit(node_begin,node_end,2);
02101   }
02102 
02103   if ( node_split == node_begin ) {  // unable to split torus
02104     // make sure physical nodes are together
02105     std::sort(node_begin, node_end, WorkDistrib::pe_sortop_compact());
02106     // find physical node boundary to split on
02107     int i_split = 0;
02108     for ( int i=0; i<nnodes; ++i ) {
02109       if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
02110         int mid = (nnodes+1)/2;
02111         if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
02112         else break;
02113       }
02114     }
02115     node_split = node_begin + i_split;
02116   }
02117 
02118   if ( node_split == node_begin ) {
02119     if ( simParams->verboseTopology ) {
02120       int crds[3];
02121       tmgr.coords(*node_begin, crds);
02122       CkPrintf("WorkDistrib: physnode %5d pe %5d node %5d at %5d %5d %5d from %5d %5d %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
02123                CmiPhysicalNodeID(*node_begin), *node_begin,
02124                CkNodeOf(*node_begin), crds[0], crds[1], crds[2],
02125                a_min, b_min, c_min, npatches,
02126                a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
02127     }
02128 
02129     // final sort along a to minimize pme message count
02130     std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
02131 
02132     // walk through patches in sorted order
02133     int *node = node_begin;
02134     sumLoad = 0;
02135     for ( int i=0; i < npatches; ++i ) {
02136       int pid = patches[i];
02137       assignedNode[pid] = *node;
02138       sumLoad += patchLoads[pid];
02139       double targetLoad = totalLoad *
02140         ((double)(node-node_begin+1) / (double)nnodes);
02141       if ( 0 ) CkPrintf("assign %5d node %5d patch %5d %5d %5d load %7f target %7f\n",
02142                 i, *node,
02143                 patchMap->index_a(pid),
02144                 patchMap->index_b(pid),
02145                 patchMap->index_c(pid),
02146                 sumLoad, targetLoad);
02147       double extra = ( i+1 < npatches ? 0.5 * patchLoads[patches[i+1]] : 0 );
02148       if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
02149     }
02150 
02151     return;
02152   }
02153 
02154   if ( a_len >= b_len && a_len >= c_len ) {
02155     if ( 0 ) CkPrintf("sort a\n");
02156     std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
02157   } else if ( b_len >= a_len && b_len >= c_len ) {
02158     if ( 0 ) CkPrintf("sort b\n");
02159     std::sort(patch_begin,patch_end,patch_sortop_curve_b(patchMap));
02160   } else if ( c_len >= a_len && c_len >= b_len ) {
02161     if ( 0 ) CkPrintf("sort c\n");
02162     std::sort(patch_begin,patch_end,patch_sortop_curve_c(patchMap));
02163   }
02164 
02165   int *patch_split;
02166   { // walk through patches in sorted order
02167     int *node = node_begin;
02168     sumLoad = 0;
02169     for ( patch_split = patch_begin;
02170           patch_split != patch_end && node != node_split;
02171           ++patch_split ) {
02172       sumLoad += patchLoads[*patch_split];
02173       double targetLoad = totalLoad *
02174         ((double)(node-node_begin+1) / (double)nnodes);
02175       if ( 0 ) CkPrintf("test %5d node %5d patch %5d %5d %5d load %7f target %7f\n",
02176                 patch_split - patch_begin, *node,
02177                 patchMap->index_a(*patch_split),
02178                 patchMap->index_b(*patch_split),
02179                 patchMap->index_c(*patch_split),
02180                 sumLoad, targetLoad);
02181       double extra = ( patch_split+1 != patch_end ? 0.5 * patchLoads[*(patch_split+1)] : 0 );
02182       if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
02183     }
02184     double targetLoad = totalLoad *
02185       ((double)(node_split-node_begin) / (double)nnodes);
02186     if ( 0 ) CkPrintf("split node %5d/%5d patch %5d/%5d load %7f target %7f\n",
02187               node_split-node_begin, nnodes,
02188               patch_split-patch_begin, npatches,
02189               sumLoad, targetLoad);
02190   }
02191 
02192   // recurse
02193   recursive_bisect_with_curve(
02194     patch_begin, patch_split, node_begin, node_split,
02195     patchLoads, sortedLoads, assignedNode, tmgr);
02196   recursive_bisect_with_curve(
02197     patch_split, patch_end, node_split, node_end,
02198     patchLoads, sortedLoads, assignedNode, tmgr);
02199 }
02200 
02201 //----------------------------------------------------------------------
02202 void WorkDistrib::assignPatchesSpaceFillingCurve() 
02203 {
02204   TopoManagerWrapper tmgr;
02205   PatchMap *patchMap = PatchMap::Object();
02206   const int numPatches = patchMap->numPatches();
02207   int *assignedNode = new int[numPatches];
02208   ResizeArray<double> patchLoads(numPatches);
02209   SortableResizeArray<double> sortedLoads(numPatches);
02210   int numNodes = Node::Object()->numNodes();
02211   SimParameters *simParams = Node::Object()->simParameters;
02212   if(simParams->simulateInitialMapping) {
02213           NAMD_die("simulateInitialMapping not supported by assignPatchesSpaceFillingCurve()");
02214           numNodes = simParams->simulatedPEs;
02215   }
02216 
02217   ResizeArray<int> patchOrdering(numPatches);
02218   for ( int i=0; i<numPatches; ++i ) {
02219     patchOrdering[i] = i;
02220   }
02221 
02222   ResizeArray<int> nodeOrdering(numNodes);
02223   nodeOrdering.resize(0);
02224   for ( int i=0; i<numNodes; ++i ) {
02225     int pe = peDiffuseOrdering[(i+1)%numNodes];  // avoid 0 if possible
02226     if ( simParams->noPatchesOnZero && numNodes > 1 ) {
02227       if ( pe == 0 ) continue;
02228       if(simParams->noPatchesOnOne && numNodes > 2) {
02229         if ( pe == 1 ) continue;
02230       }
02231     }  
02232 #ifdef MEM_OPT_VERSION
02233     if(simParams->noPatchesOnOutputPEs && numNodes-simParams->numoutputprocs >2) {
02234       if ( isOutputProcessor(pe) ) continue;
02235     }
02236 #endif
02237     nodeOrdering.add(pe);
02238     if ( 0 ) CkPrintf("using pe %5d\n", pe);
02239   }
02240 
02241   int *node_begin = nodeOrdering.begin();
02242   int *node_end = nodeOrdering.end();
02243   if ( nodeOrdering.size() > numPatches ) {
02244     node_end = node_begin + numPatches;
02245   }
02246   std::sort(node_begin, node_end, pe_sortop_compact());
02247 
02248   int *basenode_begin = node_begin;
02249   int *basenode_end = node_end;
02250   if ( nodeOrdering.size() > 2*numPatches ) {
02251     basenode_begin = node_end;
02252     basenode_end = basenode_begin + numPatches;
02253     std::sort(basenode_begin, basenode_end, pe_sortop_compact());
02254   }
02255 
02256   if ( simParams->disableTopology ) {
02257     iout << iWARN << "IGNORING TORUS TOPOLOGY DURING PATCH PLACEMENT\n" << endi;
02258   }
02259 
02260   recursive_bisect_with_curve(
02261     patchOrdering.begin(), patchOrdering.end(),
02262     node_begin, node_end,
02263     patchLoads.begin(), sortedLoads.begin(), assignedNode, tmgr);
02264 
02265   std::sort(node_begin, node_end, pe_sortop_compact());
02266 
02267   int samenodecount = 0;
02268 
02269   for ( int pid=0; pid<numPatches; ++pid ) {
02270     int node = assignedNode[pid];
02271     patchMap->assignNode(pid, node);
02272     int nodeidx = std::lower_bound(node_begin, node_end, node,
02273                                    pe_sortop_compact()) - node_begin;
02274     int basenode = basenode_begin[nodeidx];
02275     patchMap->assignBaseNode(pid, basenode);
02276     if ( CmiPeOnSamePhysicalNode(node,basenode) ) ++samenodecount;
02277   }
02278 
02279   iout << iINFO << "Placed " << (samenodecount*100./numPatches) << "% of base nodes on same physical node as patch\n" << endi;
02280 
02281   delete [] assignedNode; 
02282 }
02283 
02284 //----------------------------------------------------------------------
02285 void WorkDistrib::mapComputes(void)
02286 {
02287   PatchMap *patchMap = PatchMap::Object();
02288   ComputeMap *computeMap = ComputeMap::Object();
02289   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
02290   Node *node = nd.ckLocalBranch();
02291 
02292   DebugM(3,"Mapping computes\n");
02293 
02294   computeMap->allocateCids();
02295 
02296   // Handle full electrostatics
02297   if ( node->simParameters->fullDirectOn )
02298     mapComputeHomePatches(computeFullDirectType);
02299   if ( node->simParameters->FMAOn )
02300 #ifdef DPMTA
02301     mapComputeHomePatches(computeDPMTAType);
02302 #else
02303     NAMD_die("This binary does not include DPMTA (FMA).");
02304 #endif
02305   if ( node->simParameters->PMEOn ) {
02306 #ifdef DPME
02307     if ( node->simParameters->useDPME )
02308       mapComputeHomePatches(computeDPMEType);
02309     else {
02310       if (node->simParameters->useOptPME) {
02311         mapComputeHomePatches(optPmeType);
02312         if ( node->simParameters->pressureProfileEwaldOn )
02313           mapComputeHomePatches(computeEwaldType);
02314       }
02315       else {
02316         mapComputeHomePatches(computePmeType);
02317         if ( node->simParameters->pressureProfileEwaldOn )
02318           mapComputeHomePatches(computeEwaldType);
02319       }
02320     }
02321 #else
02322     if (node->simParameters->useOptPME) {
02323       mapComputeHomePatches(optPmeType);
02324       if ( node->simParameters->pressureProfileEwaldOn )
02325         mapComputeHomePatches(computeEwaldType);
02326     }
02327     else {      
02328 #ifdef NAMD_CUDA
02329       if (node->simParameters->usePMECUDA) {
02330         mapComputePatch(computePmeCUDAType);
02331       } else 
02332 #endif
02333       {
02334         mapComputePatch(computePmeType);
02335       }
02336       if ( node->simParameters->pressureProfileEwaldOn )
02337         mapComputeHomePatches(computeEwaldType);
02338     }
02339 #endif
02340   }
02341 
02342   if ( node->simParameters->globalForcesOn ) {
02343     DebugM(2,"adding ComputeGlobal\n");
02344     mapComputeHomePatches(computeGlobalType);
02345   }
02346 
02347   if ( node->simParameters->extForcesOn )
02348     mapComputeHomePatches(computeExtType);
02349 
02350   if ( node->simParameters->qmForcesOn )
02351     mapComputeHomePatches(computeQMType);
02352 
02353   if ( node->simParameters->GBISserOn )
02354     mapComputeHomePatches(computeGBISserType);
02355 
02356   if ( node->simParameters->MsmSerialOn )
02357     mapComputeHomePatches(computeMsmSerialType);
02358 #ifdef CHARM_HAS_MSA
02359   else if ( node->simParameters->MSMOn )
02360     mapComputeHomePatches(computeMsmMsaType);
02361 #else
02362   else if ( node->simParameters->MSMOn )
02363     mapComputeHomePatches(computeMsmType);
02364 #endif
02365 
02366   if ( node->simParameters->FMMOn )
02367     mapComputeHomePatches(computeFmmType);
02368 
02369 #ifdef NAMD_CUDA
02370 #ifdef BONDED_CUDA
02371   if (node->simParameters->bondedCUDA) {
02372     mapComputeNode(computeBondedCUDAType);
02373   }
02374 #endif
02375 #endif
02376 
02377 #ifdef NAMD_CUDA
02378   if (node->simParameters->useCUDA2) {
02379     mapComputeNode(computeNonbondedCUDA2Type);
02380   } else {
02381     mapComputeNode(computeNonbondedCUDAType);
02382   }
02383   mapComputeHomeTuples(computeExclsType);
02384   mapComputePatch(computeSelfExclsType);
02385 #endif
02386 
02387 #ifdef NAMD_MIC
02388   mapComputeNode(computeNonbondedMICType);
02389 #endif
02390 
02391   mapComputeNonbonded();
02392 
02393   if ( node->simParameters->LCPOOn ) {
02394     mapComputeLCPO();
02395   }
02396 
02397   // If we're doing true pair interactions, no need for bonded terms.
02398   // But if we're doing within-group interactions, we do need them.
02399   if ( !node->simParameters->pairInteractionOn || 
02400       node->simParameters->pairInteractionSelf) { 
02401     mapComputeHomeTuples(computeBondsType);
02402     mapComputeHomeTuples(computeAnglesType);
02403     mapComputeHomeTuples(computeDihedralsType);
02404     mapComputeHomeTuples(computeImpropersType);
02405     mapComputeHomeTuples(computeCrosstermsType);
02406     mapComputePatch(computeSelfBondsType);
02407     mapComputePatch(computeSelfAnglesType);
02408     mapComputePatch(computeSelfDihedralsType);
02409     mapComputePatch(computeSelfImpropersType);
02410     mapComputePatch(computeSelfCrosstermsType);
02411   }
02412 
02413   if ( node->simParameters->goGroPair ) {
02414       // JLai
02415       mapComputeHomeTuples(computeGromacsPairType);
02416       mapComputePatch(computeSelfGromacsPairType);
02417     // End of JLai
02418   }
02419 
02420   if ( node->simParameters->drudeOn ) {
02421     mapComputeHomeTuples(computeTholeType);
02422     mapComputePatch(computeSelfTholeType);
02423     mapComputeHomeTuples(computeAnisoType);
02424     mapComputePatch(computeSelfAnisoType);
02425   }
02426 
02427   if ( node->simParameters->eFieldOn )
02428     mapComputePatch(computeEFieldType);
02429   /* BEGIN gf */
02430   if ( node->simParameters->mgridforceOn )
02431     mapComputePatch(computeGridForceType);
02432   /* END gf */
02433   if ( node->simParameters->stirOn )
02434     mapComputePatch(computeStirType);
02435   if ( node->simParameters->sphericalBCOn )
02436     mapComputePatch(computeSphericalBCType);
02437   if ( node->simParameters->cylindricalBCOn )
02438     mapComputePatch(computeCylindricalBCType);
02439   if ( node->simParameters->tclBCOn ) {
02440     mapComputeHomePatches(computeTclBCType);
02441   }
02442   if ( node->simParameters->constraintsOn )
02443     mapComputePatch(computeRestraintsType);
02444   if ( node->simParameters->consForceOn )
02445     mapComputePatch(computeConsForceType);
02446   if ( node->simParameters->consTorqueOn )
02447     mapComputePatch(computeConsTorqueType);
02448 
02449     // store the latest compute map
02450   SimParameters *simParams = Node::Object()->simParameters;
02451   if (simParams->storeComputeMap) {
02452     computeMap->saveComputeMap(simParams->computeMapFilename);
02453   }
02454     // override mapping decision
02455   if (simParams->loadComputeMap) {
02456     computeMap->loadComputeMap(simParams->computeMapFilename);
02457     CkPrintf("ComputeMap has been loaded from %s.\n", simParams->computeMapFilename);
02458   }
02459 }
02460 
02461 //----------------------------------------------------------------------
02462 void WorkDistrib::mapComputeHomeTuples(ComputeType type)
02463 {
02464   PatchMap *patchMap = PatchMap::Object();
02465   ComputeMap *computeMap = ComputeMap::Object();
02466   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
02467   Node *node = nd.ckLocalBranch();
02468 
02469   int numNodes = node->numNodes();
02470   SimParameters *simparam = node->simParameters;
02471   if(simparam->simulateInitialMapping) {
02472           numNodes = simparam->simulatedPEs;
02473   }
02474 
02475   char *isBaseNode = new char[numNodes];
02476   memset(isBaseNode,0,numNodes*sizeof(char));
02477 
02478   int numPatches = patchMap->numPatches();
02479   for(int j=0; j<numPatches; j++) {
02480     isBaseNode[patchMap->basenode(j)] = 1;
02481   }
02482 
02483   for(int i=0; i<numNodes; i++) {
02484     if ( isBaseNode[i] ) {
02485       computeMap->storeCompute(i,0,type);
02486     }
02487   }
02488 
02489   delete [] isBaseNode;
02490 }
02491 
02492 //----------------------------------------------------------------------
02493 void WorkDistrib::mapComputeHomePatches(ComputeType type)
02494 {
02495   PatchMap *patchMap = PatchMap::Object();
02496   ComputeMap *computeMap = ComputeMap::Object();
02497   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
02498   Node *node = nd.ckLocalBranch();
02499 
02500   int numNodes = node->numNodes();
02501   SimParameters *simparam = node->simParameters;
02502   if(simparam->simulateInitialMapping) {
02503           numNodes = simparam->simulatedPEs;
02504   }
02505 
02506   for(int i=0; i<numNodes; i++) {
02507     if ( patchMap->numPatchesOnNode(i) ) {
02508       computeMap->storeCompute(i,0,type);
02509     }
02510   }
02511 }
02512 
02513 //----------------------------------------------------------------------
02514 void WorkDistrib::mapComputePatch(ComputeType type)
02515 {
02516   PatchMap *patchMap = PatchMap::Object();
02517   ComputeMap *computeMap = ComputeMap::Object();
02518 
02519   PatchID i;
02520   ComputeID cid;
02521 
02522   for(i=0; i<patchMap->numPatches(); i++)
02523   {
02524     cid=computeMap->storeCompute(patchMap->node(i),1,type);
02525     computeMap->newPid(cid,i);
02526     patchMap->newCid(i,cid);
02527   }
02528 
02529 }
02530 
02531 //----------------------------------------------------------------------
02532 void WorkDistrib::mapComputeNode(ComputeType type)
02533 {
02534   PatchMap *patchMap = PatchMap::Object();
02535   ComputeMap *computeMap = ComputeMap::Object();
02536 
02537   PatchID i;
02538   ComputeID cid;
02539 
02540   int ncpus = CkNumPes();
02541   SimParameters *simparam = Node::Object()->simParameters;
02542   if(simparam->simulateInitialMapping) {
02543           ncpus = simparam->simulatedPEs;
02544   }
02545 
02546   for(int i=0; i<ncpus; i++) {
02547     computeMap->storeCompute(i,0,type);
02548   }
02549 
02550 }
02551 
02552 //----------------------------------------------------------------------
02553 void WorkDistrib::mapComputeNonbonded(void)
02554 {
02555   // For each patch, create 1 electrostatic object for self-interaction.
02556   // Then create 1 for each 1-away and 2-away neighbor which has a larger
02557   // pid.
02558 
02559   PatchMap *patchMap = PatchMap::Object();
02560   ComputeMap *computeMap = ComputeMap::Object();
02561   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
02562   Node *node = nd.ckLocalBranch();
02563   SimParameters *simParams = Node::Object()->simParameters;
02564   int ncpus = CkNumPes();
02565   int nodesize = CkMyNodeSize();
02566   if(simParams->simulateInitialMapping) {
02567           ncpus = simParams->simulatedPEs;
02568           nodesize = simParams->simulatedNodeSize;
02569   }
02570 
02571   PatchID oneAway[PatchMap::MaxOneOrTwoAway];
02572   PatchID oneAwayDownstream[PatchMap::MaxOneOrTwoAway];
02573   int oneAwayTrans[PatchMap::MaxOneOrTwoAway];
02574 
02575   PatchID i;
02576   ComputeID cid;
02577   int numNeighbors;
02578   int j;
02579   double partScaling = 1.0;
02580   if ( ncpus < patchMap->numPatches() ) {
02581     partScaling = ((double)ncpus) / ((double)patchMap->numPatches());
02582   }
02583 
02584   for(i=0; i<patchMap->numPatches(); i++) // do the self 
02585   {
02586 
02587    int numPartitions = 1;
02588 #if 0
02589    if ( simParams->ldBalancer == LDBAL_HYBRID ) {
02590 #ifdef  MEM_OPT_VERSION    
02591     int64 numFixed = patchMap->numFixedAtoms(i);
02592     int64 numAtoms = patchMap->numAtoms(i);
02593 #else
02594     int64 numFixed = patchMap->patch(i)->getNumFixedAtoms();  // avoid overflow
02595     int64 numAtoms = patchMap->patch(i)->getNumAtoms();
02596 #endif
02597 
02598     int divide = node->simParameters->numAtomsSelf;
02599     if (divide > 0) {
02600       numPartitions = (int) ( partScaling * ( 0.5 +
02601         (numAtoms*numAtoms-numFixed*numFixed) / (double)(2*divide*divide) ) );
02602     }
02603     if (numPartitions < 1) numPartitions = 1;
02604     if ( numPartitions > node->simParameters->maxSelfPart )
02605                         numPartitions = node->simParameters->maxSelfPart;
02606     // self-interaction
02607     DebugM(4,"Mapping " << numPartitions << " ComputeNonbondedSelf objects for patch " << i << "\n");
02608 //    iout <<"Self numPartitions = " <<numPartitions <<" numAtoms " <<numAtoms <<std::endl;
02609    }
02610 #endif
02611 
02612     // DMK - NOTE - For MIC builds (i.e. NAMD_MIC is defined), it is assumed that self computes are
02613     //   mapped to the PE their associated patch is on. If the code below should change, making that
02614     //   untrue, MIC builds should be special cased so that assumption still holds (or the host vs
02615     //   device load balancing scheme should be modified).  (See the comment in the function
02616     //   mic_assignComputes() in ComputeNonbondedMIC.C for more details.)
02617     for(int partition=0; partition < numPartitions; partition++)
02618     {
02619       cid=computeMap->storeCompute(patchMap->node(i),1,
02620                                    computeNonbondedSelfType,
02621                                    partition,numPartitions);
02622       computeMap->newPid(cid,i);
02623       patchMap->newCid(i,cid);
02624     }
02625   }
02626 
02627   for(int p1=0; p1 <patchMap->numPatches(); p1++) // do the pairs
02628   {
02629     // this only returns half of neighbors, which is what we want
02630     numNeighbors=patchMap->oneOrTwoAwayNeighbors(p1,oneAway,oneAwayDownstream,oneAwayTrans);
02631     for(j=0;j<numNeighbors;j++)
02632     {
02633         int p2 = oneAway[j];
02634         int dsp = oneAwayDownstream[j];
02635 
02636       int numPartitions = 1;
02637 #if 0
02638       if ( simParams->ldBalancer == LDBAL_HYBRID ) {
02639 #ifdef  MEM_OPT_VERSION        
02640         int64 numAtoms1 = patchMap->numAtoms(p1);
02641         int64 numAtoms2 = patchMap->numAtoms(p2);
02642         int64 numFixed1 = patchMap->numFixedAtoms(p1);
02643         int64 numFixed2 = patchMap->numFixedAtoms(p2);
02644 #else
02645         int64 numAtoms1 = patchMap->patch(p1)->getNumAtoms();
02646         int64 numAtoms2 = patchMap->patch(p2)->getNumAtoms();
02647         int64 numFixed1 = patchMap->patch(p1)->getNumFixedAtoms();
02648         int64 numFixed2 = patchMap->patch(p2)->getNumFixedAtoms();
02649 #endif
02650 
02651 
02652         const int t2 = oneAwayTrans[j];
02653         const int adim = patchMap->gridsize_a();
02654         const int bdim = patchMap->gridsize_b();
02655         const int cdim = patchMap->gridsize_c();
02656         const int nax = patchMap->numaway_a();  // 1 or 2
02657         const int nay = patchMap->numaway_b();  // 1 or 2
02658         const int naz = patchMap->numaway_c();  // 1 or 2
02659         const int ia1 = patchMap->index_a(p1);
02660         const int ia2 = patchMap->index_a(p2) + adim * Lattice::offset_a(t2);
02661         const int ib1 = patchMap->index_b(p1);
02662         const int ib2 = patchMap->index_b(p2) + bdim * Lattice::offset_b(t2);
02663         const int ic1 = patchMap->index_c(p1);
02664         const int ic2 = patchMap->index_c(p2) + cdim * Lattice::offset_c(t2);
02665 
02666         if ( abs(ia2-ia1) > nax ||
02667              abs(ib2-ib1) > nay ||
02668              abs(ic2-ic1) > naz )
02669           NAMD_bug("Bad patch distance in WorkDistrib::mapComputeNonbonded");
02670 
02671         int distance = 3;
02672         if ( ia1 == ia2 ) --distance;
02673         else if ( ia1 == ia2 + nax - 1 ) --distance;
02674         else if ( ia1 + nax - 1 == ia2 ) --distance;
02675         if ( ib1 == ib2 ) --distance;
02676         else if ( ib1 == ib2 + nay - 1 ) --distance;
02677         else if ( ib1 + nay - 1 == ib2 ) --distance;
02678         if ( ic1 == ic2 ) --distance;
02679         else if ( ic1 == ic2 + naz - 1 ) --distance;
02680         else if ( ic1 + naz - 1 == ic2 ) --distance;
02681         int divide = 0;
02682         if ( distance == 0 ) {
02683           divide = node->simParameters->numAtomsSelf2;
02684         } else if (distance == 1) {
02685           divide = node->simParameters->numAtomsPair;
02686         } else {
02687           divide = node->simParameters->numAtomsPair2;
02688         }
02689         if (divide > 0) {
02690           numPartitions = (int) ( partScaling * ( 0.5 +
02691             (numAtoms1*numAtoms2-numFixed1*numFixed2)/(double)(divide*divide) ) );
02692         }
02693         if ( numPartitions < 1 ) numPartitions = 1;
02694         if ( numPartitions > node->simParameters->maxPairPart )
02695                         numPartitions = node->simParameters->maxPairPart;
02696 //      if ( numPartitions > 1 ) iout << "Mapping " << numPartitions << " ComputeNonbondedPair objects for patches " << p1 << "(" << numAtoms1 << ") and " << p2 << "(" << numAtoms2 << ")\n" << endi;
02697       }
02698 #endif
02699                 for(int partition=0; partition < numPartitions; partition++)
02700                 {
02701                   cid=computeMap->storeCompute( patchMap->basenode(dsp),
02702                         2,computeNonbondedPairType,partition,numPartitions);
02703                   computeMap->newPid(cid,p1);
02704                   computeMap->newPid(cid,p2,oneAwayTrans[j]);
02705                   patchMap->newCid(p1,cid);
02706                   patchMap->newCid(p2,cid);
02707                 }
02708     }
02709   }
02710 }
02711 
02712 //----------------------------------------------------------------------
02713 void WorkDistrib::mapComputeLCPO(void) {
02714   //iterate over all needed objects
02715 
02716   PatchMap *patchMap = PatchMap::Object();
02717   ComputeMap *computeMap = ComputeMap::Object();
02718   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
02719   Node *node = nd.ckLocalBranch();
02720   SimParameters *simParams = Node::Object()->simParameters;
02721   int ncpus = CkNumPes();
02722   int nodesize = CkMyNodeSize();
02723   const int maxPatches = 8;
02724 
02725   int numPatchesInOctet;
02726   PatchID patchesInOctet[maxPatches];
02727   int oneAwayTrans[maxPatches];
02728 
02729   //partitioned after 1st timestep
02730   int numPartitions = 1;
02731 
02732   PatchID i;
02733   ComputeID cid;
02734 
02735   // one octet per patch
02736   for(i=0; i<patchMap->numPatches(); i++) {
02737     numPatchesInOctet =
02738         patchMap->getPatchesInOctet(i, patchesInOctet, oneAwayTrans);
02739 
02740                 for(int partition=0; partition < numPartitions; partition++) {
02741       cid=computeMap->storeCompute(patchMap->node(i),
02742           numPatchesInOctet,
02743                                   computeLCPOType,
02744                                   partition,
02745           numPartitions);
02746       for (int p = 0; p < numPatchesInOctet; p++) {
02747         computeMap->newPid(cid, patchesInOctet[p], oneAwayTrans[p]);
02748       }
02749       for (int p = 0; p < numPatchesInOctet; p++) {
02750         patchMap->newCid(patchesInOctet[p],cid);
02751       }
02752     } // for partitions 
02753   } // for patches
02754 } // mapComputeLCPO
02755 
02756 //----------------------------------------------------------------------
02757 void WorkDistrib::messageEnqueueWork(Compute *compute) {
02758   LocalWorkMsg *msg = compute->localWorkMsg;
02759   int seq = compute->sequence();
02760   int gbisPhase = compute->getGBISPhase();
02761 
02762   if ( seq < 0 ) {
02763     NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageEnqueueWork");
02764   } else {
02765     SET_PRIORITY(msg,seq,compute->priority());
02766   }
02767 
02768   msg->compute = compute; // pointer is valid since send is to local Pe
02769   int type = compute->type();
02770   int cid = compute->cid;
02771 
02772   CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
02773   switch ( type ) {
02774   case computeExclsType:
02775   case computeSelfExclsType:
02776     wdProxy[CkMyPe()].enqueueExcls(msg);
02777     break;
02778   case computeBondsType:
02779   case computeSelfBondsType:
02780     wdProxy[CkMyPe()].enqueueBonds(msg);
02781     break;
02782   case computeAnglesType:
02783   case computeSelfAnglesType:
02784     wdProxy[CkMyPe()].enqueueAngles(msg);
02785     break;
02786   case computeDihedralsType:
02787   case computeSelfDihedralsType:
02788     wdProxy[CkMyPe()].enqueueDihedrals(msg);
02789     break;
02790   case computeImpropersType:
02791   case computeSelfImpropersType:
02792     wdProxy[CkMyPe()].enqueueImpropers(msg);
02793     break;
02794   case computeTholeType:
02795   case computeSelfTholeType:
02796     wdProxy[CkMyPe()].enqueueThole(msg);
02797     break;
02798   case computeAnisoType:
02799   case computeSelfAnisoType:
02800     wdProxy[CkMyPe()].enqueueAniso(msg);
02801     break;
02802   case computeCrosstermsType:
02803   case computeSelfCrosstermsType:
02804     wdProxy[CkMyPe()].enqueueCrossterms(msg);
02805     break;
02806   // JLai
02807   case computeGromacsPairType:
02808   case computeSelfGromacsPairType:
02809     wdProxy[CkMyPe()].enqueueGromacsPair(msg);
02810     break;    
02811   // End of JLai
02812   case computeLCPOType:
02813     wdProxy[CkMyPe()].enqueueLCPO(msg);
02814     break;
02815   case computeNonbondedSelfType:
02816     switch ( seq % 2 ) {
02817     case 0:
02818       //wdProxy[CkMyPe()].enqueueSelfA(msg);
02819       switch ( gbisPhase ) {
02820          case 1:
02821            wdProxy[CkMyPe()].enqueueSelfA1(msg);
02822            break;
02823          case 2:
02824            wdProxy[CkMyPe()].enqueueSelfA2(msg);
02825            break;
02826          case 3:
02827            wdProxy[CkMyPe()].enqueueSelfA3(msg);
02828            break;
02829       }
02830       break;
02831     case 1:
02832       //wdProxy[CkMyPe()].enqueueSelfB(msg);
02833       switch ( gbisPhase ) {
02834          case 1:
02835            wdProxy[CkMyPe()].enqueueSelfB1(msg);
02836            break;
02837          case 2:
02838            wdProxy[CkMyPe()].enqueueSelfB2(msg);
02839            break;
02840          case 3:
02841            wdProxy[CkMyPe()].enqueueSelfB3(msg);
02842            break;
02843       }
02844       break;
02845     default:
02846       NAMD_bug("WorkDistrib::messageEnqueueSelf case statement error!");
02847     }
02848     break;
02849   case computeNonbondedPairType:
02850     switch ( seq % 2 ) {
02851     case 0:
02852       //wdProxy[CkMyPe()].enqueueWorkA(msg);
02853       switch ( gbisPhase ) {
02854          case 1:
02855            wdProxy[CkMyPe()].enqueueWorkA1(msg);
02856            break;
02857          case 2:
02858            wdProxy[CkMyPe()].enqueueWorkA2(msg);
02859            break;
02860          case 3:
02861            wdProxy[CkMyPe()].enqueueWorkA3(msg);
02862            break;
02863       }
02864       break;
02865     case 1:
02866       //wdProxy[CkMyPe()].enqueueWorkB(msg);
02867       switch ( gbisPhase ) {
02868          case 1:
02869            wdProxy[CkMyPe()].enqueueWorkB1(msg);
02870            break;
02871          case 2:
02872            wdProxy[CkMyPe()].enqueueWorkB2(msg);
02873            break;
02874          case 3:
02875            wdProxy[CkMyPe()].enqueueWorkB3(msg);
02876            break;
02877       }
02878       break;
02879     case 2:
02880       wdProxy[CkMyPe()].enqueueWorkC(msg);
02881       break;
02882     default:
02883       NAMD_bug("WorkDistrib::messageEnqueueWork case statement error!");
02884     }
02885     break;
02886   case computeNonbondedCUDAType:
02887 #ifdef NAMD_CUDA
02888   case computeNonbondedCUDA2Type:
02889 //     CkPrintf("WorkDistrib[%d]::CUDA seq=%d phase=%d\n", CkMyPe(), seq, gbisPhase);
02890     //wdProxy[CkMyPe()].enqueueCUDA(msg);
02891     switch ( gbisPhase ) {
02892        case 1:
02893          wdProxy[CkMyPe()].enqueueCUDA(msg);
02894          break;
02895        case 2:
02896          wdProxy[CkMyPe()].enqueueCUDAP2(msg);
02897          break;
02898        case 3:
02899          wdProxy[CkMyPe()].enqueueCUDAP3(msg);
02900          break;
02901     }
02902 #else
02903     msg->compute->doWork();  MACHINE_PROGRESS
02904 #endif
02905     break;
02906   case computeNonbondedMICType:
02907 #ifdef NAMD_MIC
02908     wdProxy[CkMyPe()].enqueueMIC(msg);
02909 #endif
02910     break;
02911   case computePmeType:
02912     // CkPrintf("PME %d %d %x\n", CkMyPe(), seq, compute->priority());
02913     wdProxy[CkMyPe()].enqueuePme(msg);
02914     break;
02915 #ifdef NAMD_CUDA
02916   case computePmeCUDAType:
02917     wdProxy[CkMyPe()].enqueuePme(msg);
02918     break;
02919 #endif
02920   case optPmeType:
02921     // CkPrintf("PME %d %d %x\n", CkMyPe(), seq, compute->priority());
02922 #ifdef NAMD_CUDA
02923     wdProxy[CkMyPe()].enqueuePme(msg);
02924 #else
02925     msg->compute->doWork();  MACHINE_PROGRESS
02926 #endif
02927     break;
02928   default:
02929     wdProxy[CkMyPe()].enqueueWork(msg);
02930   }
02931 }
02932 
02933 //----------------------------------------------------------------------
02934 void WorkDistrib::messageFinishCUDA(Compute *compute) {
02935   LocalWorkMsg *msg = compute->localWorkMsg;
02936   int seq = compute->sequence();
02937   int gbisPhase = compute->getGBISPhase();
02938 
02939   if ( seq < 0 ) {
02940     NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageEnqueueWork");
02941   } else {
02942     SET_PRIORITY(msg,seq,compute->priority());
02943   }
02944 
02945   msg->compute = compute; // pointer is valid since send is to local Pe
02946   CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
02947 
02948 #ifdef NAMD_CUDA
02949     //wdProxy[CkMyPe()].finishCUDA(msg);
02950     switch ( gbisPhase ) {
02951        case 1:
02952          wdProxy[CkMyPe()].finishCUDA(msg);
02953          break;
02954        case 2:
02955          wdProxy[CkMyPe()].finishCUDAP2(msg);
02956          break;
02957        case 3:
02958          wdProxy[CkMyPe()].finishCUDAP3(msg);
02959          break;
02960     }
02961 #else
02962     msg->compute->doWork();  MACHINE_PROGRESS
02963 #endif
02964 }
02965 
02966 //----------------------------------------------------------------------
02967 void WorkDistrib::messageFinishMIC(Compute *compute) {
02968   LocalWorkMsg *msg = compute->localWorkMsg;
02969   int seq = compute->sequence();
02970   int gbisPhase = compute->getGBISPhase();
02971 
02972   if ( seq < 0 ) {
02973     NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageFinishMIC");
02974   } else {
02975     SET_PRIORITY(msg,seq,compute->priority());
02976   }
02977 
02978   msg->compute = compute; // pointer is valid since send is to local Pe
02979   CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
02980 
02981 #ifdef NAMD_MIC
02982     wdProxy[CkMyPe()].finishMIC(msg);
02983 #else
02984     msg->compute->doWork();  MACHINE_PROGRESS
02985 #endif
02986 }
02987 
02988 void WorkDistrib::enqueueWork(LocalWorkMsg *msg) {
02989   msg->compute->doWork();  MACHINE_PROGRESS
02990   if ( msg->compute->localWorkMsg != msg )
02991     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02992 }
02993 
02994 void WorkDistrib::enqueueExcls(LocalWorkMsg *msg) {
02995   msg->compute->doWork();  MACHINE_PROGRESS
02996   if ( msg->compute->localWorkMsg != msg )
02997     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02998 }
02999 
03000 void WorkDistrib::enqueueBonds(LocalWorkMsg *msg) {
03001   msg->compute->doWork();  MACHINE_PROGRESS
03002   if ( msg->compute->localWorkMsg != msg )
03003     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03004 }
03005 
03006 void WorkDistrib::enqueueAngles(LocalWorkMsg *msg) {
03007   msg->compute->doWork();  MACHINE_PROGRESS
03008   if ( msg->compute->localWorkMsg != msg )
03009     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03010 }
03011 
03012 void WorkDistrib::enqueueDihedrals(LocalWorkMsg *msg) {
03013   msg->compute->doWork();  MACHINE_PROGRESS
03014   if ( msg->compute->localWorkMsg != msg )
03015     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03016 }
03017 
03018 void WorkDistrib::enqueueImpropers(LocalWorkMsg *msg) {
03019   msg->compute->doWork();  MACHINE_PROGRESS
03020   if ( msg->compute->localWorkMsg != msg )
03021     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03022 }
03023 
03024 void WorkDistrib::enqueueThole(LocalWorkMsg *msg) {
03025   msg->compute->doWork();  MACHINE_PROGRESS
03026   if ( msg->compute->localWorkMsg != msg )
03027     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03028 }
03029 
03030 void WorkDistrib::enqueueAniso(LocalWorkMsg *msg) {
03031   msg->compute->doWork();  MACHINE_PROGRESS
03032   if ( msg->compute->localWorkMsg != msg )
03033     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03034 }
03035 
03036 void WorkDistrib::enqueueCrossterms(LocalWorkMsg *msg) {
03037   msg->compute->doWork();  MACHINE_PROGRESS
03038   if ( msg->compute->localWorkMsg != msg )
03039     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03040 }
03041 
03042 // JLai
03043 void WorkDistrib::enqueueGromacsPair(LocalWorkMsg *msg) {
03044   msg->compute->doWork();
03045   MACHINE_PROGRESS
03046   if ( msg->compute->localWorkMsg != msg )
03047     NAMD_bug("\nWorkDistrib LocalWorkMsg recycling failed! Check enqueueGromacsPair from WorkDistrib.C\n");
03048 }
03049 // End of JLai
03050 
03051 void WorkDistrib::enqueuePme(LocalWorkMsg *msg) {
03052   msg->compute->doWork();  MACHINE_PROGRESS
03053   if ( msg->compute->localWorkMsg != msg )
03054     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03055 }
03056 
03057 void WorkDistrib::enqueueLCPO(LocalWorkMsg *msg) {
03058   msg->compute->doWork();
03059   if ( msg->compute->localWorkMsg != msg )
03060     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03061 }
03062 void WorkDistrib::enqueueSelfA1(LocalWorkMsg *msg) {
03063   msg->compute->doWork();  MACHINE_PROGRESS
03064   if ( msg->compute->localWorkMsg != msg )
03065     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03066 }
03067 void WorkDistrib::enqueueSelfA2(LocalWorkMsg *msg) {
03068   msg->compute->doWork();  MACHINE_PROGRESS
03069   if ( msg->compute->localWorkMsg != msg )
03070     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03071 }
03072 void WorkDistrib::enqueueSelfA3(LocalWorkMsg *msg) {
03073   msg->compute->doWork();  MACHINE_PROGRESS
03074   if ( msg->compute->localWorkMsg != msg )
03075     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03076 }
03077 
03078 void WorkDistrib::enqueueSelfB1(LocalWorkMsg *msg) {
03079   msg->compute->doWork();  MACHINE_PROGRESS
03080   if ( msg->compute->localWorkMsg != msg )
03081     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03082 }
03083 void WorkDistrib::enqueueSelfB2(LocalWorkMsg *msg) {
03084   msg->compute->doWork();  MACHINE_PROGRESS
03085   if ( msg->compute->localWorkMsg != msg )
03086     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03087 }
03088 void WorkDistrib::enqueueSelfB3(LocalWorkMsg *msg) {
03089   msg->compute->doWork();  MACHINE_PROGRESS
03090   if ( msg->compute->localWorkMsg != msg )
03091     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03092 }
03093 
03094 void WorkDistrib::enqueueWorkA1(LocalWorkMsg *msg) {
03095   msg->compute->doWork();  MACHINE_PROGRESS
03096   if ( msg->compute->localWorkMsg != msg )
03097     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03098 }
03099 void WorkDistrib::enqueueWorkA2(LocalWorkMsg *msg) {
03100   msg->compute->doWork();  MACHINE_PROGRESS
03101   if ( msg->compute->localWorkMsg != msg )
03102     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03103 }
03104 void WorkDistrib::enqueueWorkA3(LocalWorkMsg *msg) {
03105   msg->compute->doWork();  MACHINE_PROGRESS
03106   if ( msg->compute->localWorkMsg != msg )
03107     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03108 }
03109 
03110 void WorkDistrib::enqueueWorkB1(LocalWorkMsg *msg) {
03111   msg->compute->doWork();  MACHINE_PROGRESS
03112   if ( msg->compute->localWorkMsg != msg )
03113     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03114 }
03115 void WorkDistrib::enqueueWorkB2(LocalWorkMsg *msg) {
03116   msg->compute->doWork();  MACHINE_PROGRESS
03117   if ( msg->compute->localWorkMsg != msg )
03118     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03119 }
03120 void WorkDistrib::enqueueWorkB3(LocalWorkMsg *msg) {
03121   msg->compute->doWork();  MACHINE_PROGRESS
03122   if ( msg->compute->localWorkMsg != msg )
03123     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03124 }
03125 
03126 
03127 
03128 void WorkDistrib::enqueueWorkC(LocalWorkMsg *msg) {
03129   msg->compute->doWork();  MACHINE_PROGRESS
03130   if ( msg->compute->localWorkMsg != msg )
03131     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03132 }
03133 
03134 void WorkDistrib::enqueueCUDA(LocalWorkMsg *msg) {
03135   msg->compute->doWork();  MACHINE_PROGRESS
03136   // ComputeNonbondedCUDA *c = msg->compute;
03137   // if ( c->localWorkMsg != msg && c->localWorkMsg2 != msg )
03138   //   NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03139 }
03140 void WorkDistrib::enqueueCUDAP2(LocalWorkMsg *msg) {
03141   msg->compute->doWork();  MACHINE_PROGRESS
03142 }
03143 void WorkDistrib::enqueueCUDAP3(LocalWorkMsg *msg) {
03144   msg->compute->doWork();  MACHINE_PROGRESS
03145 }
03146 
03147 void WorkDistrib::finishCUDAPatch(FinishWorkMsg *msg) {
03148   msg->compute->finishPatch(msg->data);
03149 }
03150 
03151 void WorkDistrib::finishCUDA(LocalWorkMsg *msg) {
03152   msg->compute->doWork();  MACHINE_PROGRESS
03153   // ComputeNonbondedCUDA *c = msg->compute;
03154   // if ( c->localWorkMsg != msg && c->localWorkMsg2 != msg )
03155   //   NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03156 }
03157 void WorkDistrib::finishCUDAP2(LocalWorkMsg *msg) {
03158   msg->compute->doWork();  MACHINE_PROGRESS
03159 }
03160 void WorkDistrib::finishCUDAP3(LocalWorkMsg *msg) {
03161   msg->compute->doWork();  MACHINE_PROGRESS
03162 }
03163 
03164 void WorkDistrib::enqueueMIC(LocalWorkMsg *msg) {
03165   msg->compute->doWork();  MACHINE_PROGRESS
03166 }
03167 void WorkDistrib::finishMIC(LocalWorkMsg *msg) {
03168   msg->compute->doWork();  MACHINE_PROGRESS
03169 }
03170 
03171 
03172 //**********************************************************************
03173 //
03174 //                      FUNCTION velocities_from_PDB
03175 //
03176 //   INPUTS:
03177 //      v - Array of vectors to populate
03178 //      filename - name of the PDB filename to read in
03179 //
03180 //      This function reads in a set of initial velocities from a
03181 //      PDB file.  It places the velocities into the array of Vectors
03182 //      passed to it.
03183 //
03184 //***********************************************************************/
03185 
03186 void WorkDistrib::velocities_from_PDB(const char *filename, 
03187                                       Vector *v, int totalAtoms)
03188 {
03189   PDB *v_pdb;           //  PDB info from velocity PDB
03190   int i;
03191 
03192   //  Read the PDB
03193   v_pdb = new PDB(filename);
03194   if ( v_pdb == NULL )
03195   {
03196     NAMD_die("memory allocation failed in Node::velocities_from_PDB");
03197   }
03198 
03199   //  Make sure the number of velocities read in matches
03200   //  the number of atoms we have
03201   if (v_pdb->num_atoms() != totalAtoms)
03202   {
03203     char err_msg[129];
03204 
03205     sprintf(err_msg, "FOUND %d COORDINATES IN VELOCITY PDB!!",
03206             v_pdb->num_atoms());
03207 
03208     NAMD_die(err_msg);
03209   }
03210 
03211   //  Get the entire list of atom info and loop through
03212   //  them assigning the velocity vector for each one
03213   v_pdb->get_all_positions(v);
03214 
03215   for (i=0; i<totalAtoms; i++)
03216   {
03217     v[i].x *= PDBVELINVFACTOR;
03218     v[i].y *= PDBVELINVFACTOR;
03219     v[i].z *= PDBVELINVFACTOR;
03220   }
03221 
03222   delete v_pdb;
03223 }
03224 //              END OF FUNCTION velocities_from_PDB
03225 
03226 //**********************************************************************
03227 //
03228 //                      FUNCTION velocities_from_binfile
03229 //
03230 //    INPUTS:
03231 //      fname - File name to write velocities to
03232 //      n - Number of atoms in system
03233 //      vels - Array of velocity vectors
03234 //                                      
03235 //      This function writes out the velocities in binary format.  This is
03236 //     done to preserve accuracy between restarts of namd.
03237 //
03238 //**********************************************************************
03239 
03240 void WorkDistrib::velocities_from_binfile(const char *fname, Vector *vels, int n)
03241 {
03242   read_binary_file(fname,vels,n);
03243 }
03244 //               END OF FUNCTION velocities_from_binfile
03245 
03246 //**********************************************************************
03247 //
03248 //                      FUNCTION random_velocities
03249 //
03250 //   INPUTS:
03251 //      v - array of vectors to populate
03252 //      Temp - Temperature to acheive
03253 //
03254 //      This function assigns a random velocity distribution to a
03255 //   simulation to achieve a desired initial temperature.  The method
03256 //   used here was stolen from the program X-PLOR.
03257 //
03258 //**********************************************************************
03259 
03260 void WorkDistrib::random_velocities(BigReal Temp,Molecule *structure,
03261                                     Vector *v, int totalAtoms)
03262 {
03263   int i, j;             //  Loop counter
03264   BigReal kbT;          //  Boltzman constant * Temp
03265   BigReal randnum;      //  Random number from -6.0 to 6.0
03266   BigReal kbToverM;     //  sqrt(Kb*Temp/Mass)
03267   SimParameters *simParams = Node::Object()->simParameters;
03268   Bool lesOn = simParams->lesOn;
03269   Random vel_random(simParams->randomSeed);
03270 
03271   int lesReduceTemp = lesOn && simParams->lesReduceTemp;
03272   BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
03273 
03274   kbT = Temp*BOLTZMANN;
03275 
03276   //  Loop through all the atoms and assign velocities in
03277   //  the x, y and z directions for each one
03278   for (i=0; i<totalAtoms; i++)
03279   {
03280     if (structure->atommass(i) <= 0.) {
03281       kbToverM = 0.;
03282     } else {
03283       kbToverM = sqrt(kbT *
03284         ( lesOn && structure->get_fep_type(i) ? tempFactor : 1.0 ) /
03285                           structure->atommass(i) );
03286     }
03287 
03288     //  The following comment was stolen from X-PLOR where
03289     //  the following section of code was adapted from.
03290     
03291     //  This section generates a Gaussian random
03292     //  deviate of 0.0 mean and standard deviation RFD for
03293     //  each of the three spatial dimensions.
03294     //  The algorithm is a "sum of uniform deviates algorithm"
03295     //  which may be found in Abramowitz and Stegun,
03296     //  "Handbook of Mathematical Functions", pg 952.
03297     for (randnum=0.0, j=0; j<12; j++)
03298     {
03299       randnum += vel_random.uniform();
03300     }
03301 
03302     randnum -= 6.0;
03303 
03304     v[i].x = randnum*kbToverM;
03305 
03306     for (randnum=0.0, j=0; j<12; j++)
03307     {
03308       randnum += vel_random.uniform();
03309     }
03310 
03311     randnum -= 6.0;
03312 
03313     v[i].y = randnum*kbToverM;
03314 
03315     for (randnum=0.0, j=0; j<12; j++)
03316     {
03317       randnum += vel_random.uniform();
03318     }
03319 
03320     randnum -= 6.0;
03321     
03322     v[i].z = randnum*kbToverM;
03323   }
03324 
03325   if ( simParams->drudeOn ) for (i=0; i<totalAtoms; i++) {
03326     if ( structure->is_drude(i) ) {
03327       v[i] = v[structure->get_mother_atom(i)];  // zero is good enough
03328     }
03329   }
03330 }
03331 /*                      END OF FUNCTION random_velocities               */
03332 
03333 //**********************************************************************
03334 //
03335 //                      FUNCTION remove_com_motion
03336 //
03337 //   INPUTS:
03338 //      vel - Array of initial velocity vectors
03339 //
03340 //      This function removes the center of mass motion from a molecule.
03341 //
03342 //**********************************************************************
03343 
03344 void WorkDistrib::remove_com_motion(Vector *vel, Molecule *structure, int n)
03345 {
03346   Vector mv(0,0,0);             //  Sum of (mv)_i
03347   BigReal totalMass=0;  //  Total mass of system
03348   int i;                        //  Loop counter
03349 
03350   //  Loop through and compute the net momentum
03351   for (i=0; i<n; i++)
03352   {
03353     BigReal mass = structure->atommass(i);
03354     mv += mass * vel[i];
03355     totalMass += mass;
03356   }
03357 
03358   mv /= totalMass;
03359 
03360   iout << iINFO << "REMOVING COM VELOCITY "
03361         << ( PDBVELFACTOR * mv ) << "\n" << endi;
03362 
03363   for (i=0; i<n; i++) { vel[i] -= mv; }
03364 
03365 }
03366 /*                      END OF FUNCTION remove_com_motion               */
03367 
03368 #if USE_TOPOMAP 
03369 
03370 //Specifically designed for BGL and other 3d Tori architectures
03371 //Partition Torus and Patch grid together using recursive bisection.
03372 int WorkDistrib::assignPatchesTopoGridRecBisection() {
03373   
03374   PatchMap *patchMap = PatchMap::Object();
03375   int *assignedNode = new int[patchMap->numPatches()];
03376   int numNodes = Node::Object()->numNodes();
03377   SimParameters *simParams = Node::Object()->simParameters;
03378   if(simParams->simulateInitialMapping) {
03379           numNodes = simParams->simulatedPEs;
03380   }
03381 
03382   int usedNodes = numNodes;
03383   CkPrintf("assignPatchesTopoGridRecBisection\n");
03384   if ( simParams->noPatchesOnZero && numNodes > 1 ) {
03385     usedNodes -= 1;
03386     if ( simParams->noPatchesOnOne && numNodes > 2 )
03387       usedNodes -= 1;
03388   }
03389   RecBisection recBisec(patchMap->numPatches(), PatchMap::Object());
03390   
03391   int xsize = 0, ysize = 0, zsize = 0;
03392   
03393   // Right now assumes a T*** (e.g. TXYZ) mapping
03394   TopoManager tmgr;
03395   xsize = tmgr.getDimNX();
03396   ysize = tmgr.getDimNY();
03397   zsize = tmgr.getDimNZ();
03398   
03399   //Fix to not assign patches to processor 0
03400   int rc = recBisec.partitionProcGrid(xsize, ysize, zsize, assignedNode);
03401  
03402   delete [] assignedNode;
03403 
03404   return rc;
03405 }
03406 #endif
03407 
03408 
03409 #if defined(NAMD_MIC)
03410   extern void mic_hostDeviceLDB();
03411   extern void mic_contributeHostDeviceLDB(int idLen, int * id);
03412   extern void mic_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2);
03413 #endif
03414 
03415 void WorkDistrib::send_initHostDeviceLDB() {
03416   #if defined(NAMD_MIC)
03417     CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
03418     wdProxy.initHostDeviceLDB();
03419   #endif
03420 }
03421 
03422 void WorkDistrib::initHostDeviceLDB() {
03423   #if defined(NAMD_MIC)
03424     mic_hostDeviceLDB();
03425   #endif
03426 }
03427 
03428 void WorkDistrib::send_contributeHostDeviceLDB(int peSetLen, int * peSet) {
03429   #if defined(NAMD_MIC)
03430     CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
03431     wdProxy[0].contributeHostDeviceLDB(peSetLen, peSet);
03432   #endif
03433 }
03434 
03435 void WorkDistrib::contributeHostDeviceLDB(int peSetLen, int * peSet) {
03436   #if defined(NAMD_MIC)
03437     mic_contributeHostDeviceLDB(peSetLen, peSet);
03438   #endif
03439 }
03440 
03441 void WorkDistrib::send_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2) {
03442   #if defined(NAMD_MIC)
03443     CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
03444     wdProxy.setDeviceLDBParams(dt, hs, sp1, pp1, pp2);
03445   #endif
03446 }
03447 
03448 void WorkDistrib::setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2) {
03449   #if defined(NAMD_MIC)
03450     mic_setDeviceLDBParams(dt, hs, sp1, pp1, pp2);
03451   #endif
03452 }
03453 
03454 
03455 #include "WorkDistrib.def.h"
03456 

Generated on Tue Nov 21 01:17:15 2017 for NAMD by  doxygen 1.4.7