WorkDistrib.C

Go to the documentation of this file.
00001 
00007 /*****************************************************************************
00008  * $Source: /home/cvs/namd/cvsroot/namd2/src/WorkDistrib.C,v $
00009  * $Author: jim $
00010  * $Date: 2017/03/30 20:06:17 $
00011  * $Revision: 1.1291 $
00012  *****************************************************************************/
00013 
00020 #include <stdio.h>
00021  
00022 #include "InfoStream.h"
00023 #include "Communicate.h"
00024 #include "ProcessorPrivate.h"
00025 #include "BOCgroup.h"
00026 #include "WorkDistrib.decl.h"
00027 #include "WorkDistrib.h"
00028 #include "Lattice.h"
00029 #include "ComputeMsmMsa.h"  // needed for MsmMsaData definition
00030 #include "main.decl.h"
00031 #include "main.h"
00032 #include "Node.h"
00033 #include "PatchMgr.h"
00034 #include "PatchMap.inl"
00035 #include "NamdTypes.h"
00036 #include "PDB.h"
00037 #include "SimParameters.h"
00038 #include "Molecule.h"
00039 #include "NamdOneTools.h"
00040 #include "Compute.h"
00041 #include "ComputeMap.h"
00042 #include "RecBisection.h"
00043 #include "Random.h"
00044 #include "varsizemsg.h"
00045 #include "ProxyMgr.h"
00046 #include "Priorities.h"
00047 #include "SortAtoms.h"
00048 #include <algorithm>
00049 #include "TopoManager.h"
00050 #include "ComputePmeCUDAMgr.h"
00051 
00052 #include "DeviceCUDA.h"
00053 #ifdef NAMD_CUDA
00054 #ifdef WIN32
00055 #define __thread __declspec(thread)
00056 #endif
00057 extern __thread DeviceCUDA *deviceCUDA;
00058 #endif
00059 
00060 //#define DEBUGM
00061 #define MIN_DEBUG_LEVEL 2
00062 #include "Debug.h"
00063 #ifdef MEM_OPT_VERSION
00064 extern int isOutputProcessor(int); 
00065 #endif
00066 class ComputeMapChangeMsg : public CMessage_ComputeMapChangeMsg
00067 {
00068 public:
00069 
00070   int numNewNodes;
00071   int numNewNumPartitions;
00072   int *newNodes;
00073   char *newNumPartitions;
00074 
00075 //  VARSIZE_DECL(ComputeMapChangeMsg);
00076 };
00077 
00078 /*
00079 VARSIZE_MSG(ComputeMapChangeMsg,
00080   VARSIZE_ARRAY(newNodes);
00081 )
00082 */
00083 
00084 static int randtopo;
00085 
00086 static void build_ordering(void *) {
00087   WorkDistrib::buildNodeAwarePeOrdering();
00088 }
00089 
00090 void topo_getargs(char **argv) {
00091   randtopo = CmiGetArgFlag(argv, "+randtopo");
00092   if ( CkMyPe() >= CkNumPes() ) return;
00093   CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdVoidFn)build_ordering, (void*)0);
00094 }
00095 
00096 static int eventMachineProgress;
00097 
00098 //======================================================================
00099 // Public functions
00100 //----------------------------------------------------------------------
00101 WorkDistrib::WorkDistrib()
00102 {
00103   CkpvAccess(BOCclass_group).workDistrib = thisgroup;
00104   patchMapArrived = false;
00105   computeMapArrived = false;
00106 
00107 #if CMK_SMP
00108 #define MACHINE_PROGRESS
00109 #else
00110 #define MACHINE_PROGRESS { traceUserEvent(eventMachineProgress);  CmiMachineProgressImpl(); }
00111   if ( CkMyNodeSize() > 1 ) NAMD_bug("CkMyNodeSize() > 1 for non-smp build");
00112   eventMachineProgress = traceRegisterUserEvent("CmiMachineProgressImpl",233);
00113 #endif
00114 }
00115 
00116 //----------------------------------------------------------------------
00117 WorkDistrib::~WorkDistrib(void)
00118 { }
00119 
00120 static int compare_bit_reversed(int a, int b) {
00121   int d = a ^ b;
00122   int c = 1;
00123   if ( d ) while ( ! (d & c) ) {
00124     c = c << 1;
00125   }
00126   return (a & c) - (b & c);
00127 }
00128 
00129 static bool less_than_bit_reversed(int a, int b) {
00130   int d = a ^ b;
00131   int c = 1;
00132   if ( d ) while ( ! (d & c) ) {
00133     c = c << 1;
00134   }
00135   return d && (b & c);
00136 }
00137 
00138 struct pe_sortop_bit_reversed {
00139   int *rankInPhysOfNode;
00140   pe_sortop_bit_reversed(int *r) : rankInPhysOfNode(r) {}
00141   inline bool operator() (int a, int b) const {
00142     int c = compare_bit_reversed(CmiRankOf(a),CmiRankOf(b));
00143     if ( c < 0 ) return true;
00144     if ( c > 0 ) return false;
00145     c = compare_bit_reversed(
00146         rankInPhysOfNode[CmiNodeOf(a)],rankInPhysOfNode[CmiNodeOf(b)]);
00147     if ( c < 0 ) return true;
00148     if ( c > 0 ) return false;
00149     c = compare_bit_reversed(CmiPhysicalNodeID(a),CmiPhysicalNodeID(b));
00150     return ( c < 0 );
00151   }
00152 };
00153 
00154 int WorkDistrib::peOrderingInit;
00155 int* WorkDistrib::peDiffuseOrdering;
00156 int* WorkDistrib::peDiffuseOrderingIndex;
00157 int* WorkDistrib::peCompactOrdering;
00158 int* WorkDistrib::peCompactOrderingIndex;
00159 
00160 #ifdef NAMD_CUDA
00161 extern void cuda_initialize();
00162 #endif
00163 
00164 void mic_initialize();
00165 
00166 void WorkDistrib::peOrderingReady() {
00167   //CkPrintf("WorkDistrib::peOrderingReady on %d\n", CkMyPe());
00168 #ifdef NAMD_CUDA
00169   cuda_initialize();
00170 #endif
00171 #ifdef NAMD_MIC
00172   mic_initialize();
00173 #endif
00174 }
00175 
00176 void WorkDistrib::buildNodeAwarePeOrdering() {
00177 
00178  CmiMemLock();
00179  if ( ! peOrderingInit ) {
00180   //CkPrintf("WorkDistrib::buildNodeAwarePeOrdering on pe %d\n", CkMyPe());
00181 
00182   const int numPhys = CmiNumPhysicalNodes();
00183   const int numNode = CmiNumNodes();
00184   const int numPe = CmiNumPes();
00185   ResizeArray<int> numNodeInPhys(numPhys);
00186   ResizeArray<int> rankInPhysOfNode(numNode);
00187 
00188   peDiffuseOrdering = new int[numPe];
00189   peDiffuseOrderingIndex = new int[numPe];
00190   peCompactOrdering = new int[numPe];
00191   peCompactOrderingIndex = new int[numPe];
00192 
00193   int k = 0;
00194   for ( int ph=0; ph<numPhys; ++ph ) {
00195     int *pes, npes;
00196     CmiGetPesOnPhysicalNode(ph, &pes, &npes);
00197     for ( int i=0; i<npes; ++i, ++k ) {
00198       peCompactOrdering[k] = pes[i];
00199     }
00200     numNodeInPhys[ph] = 0;
00201     for ( int i=0, j=0; i<npes; i += CmiNodeSize(CmiNodeOf(pes[i])), ++j ) {
00202       rankInPhysOfNode[CmiNodeOf(pes[i])] = j;
00203       numNodeInPhys[ph] += 1;
00204     }
00205   }
00206 
00207   if ( randtopo && numPhys > 2 ) {
00208     if ( ! CkMyNode() ) {
00209       iout << iWARN << "RANDOMIZING PHYSICAL NODE ORDERING\n" << endi;
00210     }
00211     ResizeArray<int> randPhysOrder(numPhys);
00212     for ( int j=0; j<numPhys; ++j ) {
00213       randPhysOrder[j] = j;
00214     }
00215     Random(314159265).reorder(randPhysOrder.begin()+2, numPhys-2);
00216     for ( int j=0, k=0; j<numPhys; ++j ) {
00217       const int ph = randPhysOrder[j];
00218       int *pes, npes;
00219       CmiGetPesOnPhysicalNode(ph, &pes, &npes);
00220       for ( int i=0; i<npes; ++i, ++k ) {
00221         peCompactOrdering[k] = pes[i];
00222       }
00223     }
00224   }
00225 
00226   for ( int i=0; i<numPe; ++i ) {
00227     peDiffuseOrdering[i] = i;
00228   }
00229   std::sort(peDiffuseOrdering, peDiffuseOrdering+numPe,
00230     pe_sortop_bit_reversed(rankInPhysOfNode.begin()));
00231 
00232   for ( int i=0; i<numPe; ++i ) {
00233     peDiffuseOrderingIndex[peDiffuseOrdering[i]] = i;
00234     peCompactOrderingIndex[peCompactOrdering[i]] = i;
00235   }
00236 
00237   if ( 0 && CmiMyNode() == 0 ) for ( int i=0; i<numPe; ++i ) {
00238     CkPrintf("order %5d %5d %5d %5d %5d\n", i,
00239       peDiffuseOrdering[i],
00240       peDiffuseOrderingIndex[i],
00241       peCompactOrdering[i],
00242       peCompactOrderingIndex[i]);
00243   }
00244 
00245   peOrderingInit = 1;
00246  }
00247  CmiMemUnlock();
00248  peOrderingReady();
00249 
00250 }
00251 
00252 struct pe_sortop_coord_x {
00253   ScaledPosition *spos;
00254   pe_sortop_coord_x(ScaledPosition *s) : spos(s) {}
00255   inline bool operator() (int a, int b) const {
00256     return ( spos[a].x < spos[b].x );
00257   }
00258 };
00259 
00260 struct pe_sortop_coord_y {
00261   ScaledPosition *spos;
00262   pe_sortop_coord_y(ScaledPosition *s) : spos(s) {}
00263   inline bool operator() (int a, int b) const {
00264     return ( spos[a].y < spos[b].y );
00265   }
00266 };
00267 
00268 static void recursive_bisect_coord(
00269     int x_begin, int x_end, int y_begin, int y_end,
00270     int *pe_begin, ScaledPosition *coord,
00271     int *result, int ydim
00272   ) {
00273   int x_len = x_end - x_begin;
00274   int y_len = y_end - y_begin;
00275   if ( x_len == 1 && y_len == 1 ) {
00276     // done, now put this pe in the right place
00277     if ( 0 ) CkPrintf("pme %5d %5d on pe %5d at %f %f\n", x_begin, y_begin, *pe_begin,
00278       coord[*pe_begin].x, coord[*pe_begin].y);
00279     result[x_begin*ydim + y_begin] = *pe_begin;
00280     return;
00281   }
00282   int *pe_end = pe_begin + x_len * y_len;
00283   if ( x_len >= y_len ) {
00284     std::sort(pe_begin, pe_end, pe_sortop_coord_x(coord));
00285     int x_split = x_begin + x_len / 2;
00286     int* pe_split = pe_begin + (x_split - x_begin) * y_len;
00287     //CkPrintf("x_split %5d %5d %5d\n", x_begin, x_split, x_end);
00288     recursive_bisect_coord(x_begin, x_split, y_begin, y_end, pe_begin, coord, result, ydim);
00289     recursive_bisect_coord(x_split, x_end, y_begin, y_end, pe_split, coord, result, ydim);
00290   } else {
00291     std::sort(pe_begin, pe_end, pe_sortop_coord_y(coord));
00292     int y_split = y_begin + y_len / 2;
00293     int* pe_split = pe_begin + (y_split - y_begin) * x_len;
00294     //CkPrintf("y_split %5d %5d %5d\n", y_begin, y_split, y_end);
00295     recursive_bisect_coord(x_begin, x_end, y_begin, y_split, pe_begin, coord, result, ydim);
00296     recursive_bisect_coord(x_begin, x_end, y_split, y_end, pe_split, coord, result, ydim);
00297   }
00298 }
00299 
00300 void WorkDistrib::sortPmePes(int *pmepes, int xdim, int ydim) {
00301   int numpes = CkNumPes();
00302   ResizeArray<int> count(numpes);
00303   ResizeArray<ScaledPosition> sumPos(numpes);
00304   ResizeArray<ScaledPosition> avgPos(numpes);
00305   for ( int i=0; i<numpes; ++i ) {
00306     count[i] = 0;
00307     sumPos[i] = 0;
00308     avgPos[i] = 0;
00309   }
00310   PatchMap *patchMap = PatchMap::Object();
00311   for ( int i=0, npatches=patchMap->numPatches(); i<npatches; ++i ) {
00312     int pe = patchMap->node(i);
00313     count[pe] += 1;
00314     sumPos[pe] += patchMap->center(i);
00315   }
00316   const int npmepes = xdim*ydim;
00317   ResizeArray<int> sortpes(npmepes);
00318   for ( int i=0; i<npmepes; ++i ) {
00319     int pe = sortpes[i] = pmepes[i];
00320     int cnt = count[pe];
00321     ScaledPosition sum = sumPos[pe];
00322     if ( cnt == 0 ) {
00323       // average over node
00324       int node = CkNodeOf(pe);
00325       int nsize = CkNodeSize(node);
00326       int pe2 = CkNodeFirst(node);
00327       for ( int j=0; j<nsize; ++j, ++pe2 )  {
00328         cnt += count[pe2];
00329         sum += sumPos[pe2];
00330       }
00331     }
00332     if ( cnt == 0 ) {
00333       // average over physical node
00334       int node = CmiPhysicalNodeID(pe);
00335       int nsize, *nlist;
00336       CmiGetPesOnPhysicalNode(node, &nlist, &nsize);
00337       for ( int j=0; j<nsize; ++j )  {
00338         int pe2 = nlist[j];
00339         cnt += count[pe2];
00340         sum += sumPos[pe2];
00341       }
00342     }
00343     if ( cnt ) {
00344       avgPos[pe] = sum / cnt;
00345     }
00346   }
00347   recursive_bisect_coord(0, xdim, 0, ydim, sortpes.begin(), avgPos.begin(), pmepes, ydim);
00348 }
00349 
00350 
00351 //----------------------------------------------------------------------
00352 void WorkDistrib::saveComputeMapChanges(int ep, CkGroupID chareID)
00353 {
00354   saveComputeMapReturnEP = ep;
00355   saveComputeMapReturnChareID = chareID;
00356 
00357   ComputeMapChangeMsg *mapMsg = new (0, 0, 0) ComputeMapChangeMsg;
00358   CProxy_WorkDistrib(thisgroup).recvComputeMapChanges(mapMsg);
00359 
00360 /*
00361     // store the latest compute map
00362   SimParameters *simParams = Node::Object()->simParameters;
00363   if (simParams->storeComputeMap) {
00364     computeMap->saveComputeMap(simParams->computeMapFilename);
00365     CkPrintf("ComputeMap has been stored in %s.\n", simParams->computeMapFilename);
00366   }
00367 */
00368 }
00369 
00370 void WorkDistrib::recvComputeMapChanges(ComputeMapChangeMsg *msg) {
00371 
00372   delete msg;
00373 
00374   ComputeMap *computeMap = ComputeMap::Object();
00375 
00376   int i;
00377   int nc = computeMap->numComputes();
00378   
00379   if ( ! CkMyPe() ) { // send
00380     // CkPrintf("At %f on %d WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
00381     MOStream *msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, COMPUTEMAPTAG, BUFSIZE);
00382     msg->put(nc);
00383     for (i=0; i<nc; i++) {
00384       int data = computeMap->newNode(i);
00385       msg->put(data);
00386     }
00387     msg->put(nc);
00388     for (i=0; i<nc; i++) {
00389       char data = computeMap->newNumPartitions(i);
00390       msg->put(data);
00391     }
00392     msg->put(nc);
00393     msg->end();
00394     delete msg;
00395     // CkPrintf("At %f on %d done WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
00396   } else if ( ! CkMyRank() ) { // receive
00397     // if ( CkMyNode() == 1 ) CkPrintf("At %f on %d WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
00398     MIStream *msg = CkpvAccess(comm)->newInputStream(0, COMPUTEMAPTAG);
00399     msg->get(i);
00400     if ( i != nc ) NAMD_bug("WorkDistrib::recvComputeMapChanges check 1 failed\n");
00401     for (i=0; i<nc; i++) {
00402       int data;
00403       msg->get(data);
00404       computeMap->setNewNode(i,data);
00405     }
00406     msg->get(i);
00407     if ( i != nc ) NAMD_bug("WorkDistrib::recvComputeMapChanges check 2 failed\n");
00408     for (i=0; i<nc; i++) {
00409       char data;
00410       msg->get(data);
00411       computeMap->setNewNumPartitions(i,data);
00412     }
00413     msg->get(i);
00414     if ( i != nc ) NAMD_bug("WorkDistrib::recvComputeMapChanges check 3 failed\n");
00415     delete msg;
00416     // if ( CkMyNode() == 1 ) CkPrintf("At %f on %d done WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
00417   }
00418 
00419   CkCallback cb(CkIndex_WorkDistrib::doneSaveComputeMap(NULL), 0, thisgroup);
00420   contribute(0, NULL, CkReduction::random, cb);
00421 }
00422 
00423 void WorkDistrib::doneSaveComputeMap(CkReductionMsg *msg) {
00424   delete msg;
00425 
00426   CkSendMsgBranch(saveComputeMapReturnEP, CkAllocMsg(0,0,0), 0, saveComputeMapReturnChareID);
00427 }
00428 
00429 #ifdef MEM_OPT_VERSION
00430 //All basic info already exists for each atom inside the FullAtomList because
00431 //it is loaded when reading the binary per-atom file. This function will fill
00432 //the info regarding transform, nonbondedGroupSize etc. Refer to
00433 //WorkDistrib::createAtomLists
00434 void WorkDistrib::fillAtomListForOnePatch(int pid, FullAtomList &alist){
00435   PatchMap *patchMap = PatchMap::Object();
00436 
00437   ScaledPosition center(0.5*(patchMap->min_a(pid)+patchMap->max_a(pid)),
00438                           0.5*(patchMap->min_b(pid)+patchMap->max_b(pid)),
00439                           0.5*(patchMap->min_c(pid)+patchMap->max_c(pid)));
00440 
00441     int n = alist.size();
00442     FullAtom *a = alist.begin();
00443 /* 
00444     //Those options are not supported in MEM_OPT_VERSIOn -Chao Mei 
00445 //Modifications for alchemical fep
00446     Bool alchFepOn = params->alchFepOn;
00447     Bool alchThermIntOn = params->alchThermIntOn;
00448 //fepe
00449     Bool lesOn = params->lesOn;
00450     Bool pairInteractionOn = params->pairInteractionOn;
00451 
00452     Bool pressureProfileTypes = (params->pressureProfileAtomTypes > 1);
00453 */
00454     SimParameters *params = Node::Object()->simParameters;
00455     const Lattice lattice = params->lattice;
00456     Transform mother_transform;
00457     for(int j=0; j < n; j++)
00458     {
00459       int aid = a[j].id;
00460       a[j].nonbondedGroupSize = 0;  // must be set based on coordinates
00461       
00462       // a[j].fixedPosition = a[j].position;  ParallelIOMgr stores ref coord here.
00463 
00464       if ( a[j].migrationGroupSize ) {
00465        if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
00466             Position pos = a[j].position;
00467             int mgs = a[j].migrationGroupSize;
00468             int c = 1;
00469 
00470             for ( int k=a[j].hydrogenGroupSize; k<mgs;
00471                                 k+=a[j+k].hydrogenGroupSize ) {
00472               pos += a[j+k].position;
00473               ++c;
00474             }
00475 
00476             pos *= 1./c;
00477             mother_transform = a[j].transform;  // should be 0,0,0
00478             pos = lattice.nearest(pos,center,&mother_transform);
00479             a[j].position = lattice.apply_transform(a[j].position,mother_transform);
00480             a[j].transform = mother_transform;        
00481        } else {
00482         a[j].position = lattice.nearest(a[j].position, center, &(a[j].transform));
00483         mother_transform = a[j].transform;
00484        }
00485       } else {
00486         a[j].position = lattice.apply_transform(a[j].position,mother_transform);
00487         a[j].transform = mother_transform;
00488       }
00489 
00490 /* 
00491     //Those options are not supported in MEM_OPT_VERSIOn -Chao Mei 
00492 //Modifications for alchemical fep
00493       if ( alchOn || lesOn || pairInteractionOn || pressureProfileTypes) {
00494         a[j].partition = molecule->get_fep_type(aid);
00495       } 
00496       else {
00497         a[j].partition = 0;
00498       }
00499 //fepe
00500 */
00501       a[j].partition = 0;
00502 
00503       //set langevinParams based on atom status
00504       if(params->langevinOn) {
00505         BigReal bval = params->langevinDamping;
00506         if(!params->langevinHydrogen && 
00507            ((a[j].status & HydrogenAtom)!=0)) {
00508           bval = 0;
00509         }else if ((a[j].status & LonepairAtom)!=0) {
00510           bval = 0;
00511         }else if ((a[j].status & DrudeAtom)!=0) {
00512           bval = params->langevinDamping;
00513         }
00514         a[j].langevinParam = bval;
00515       }
00516 
00517     }
00518 
00519     int size, allfixed;
00520     for(int j=0; j < n; j+=size) {
00521       size = a[j].hydrogenGroupSize;
00522       if ( ! size ) {
00523         NAMD_bug("Mother atom with hydrogenGroupSize of 0!");
00524       }
00525       allfixed = 1;
00526       for (int k = 0; k < size; ++k ) {
00527         allfixed = ( allfixed && (a[j+k].atomFixed) );
00528       }
00529       for (int k = 0; k < size; ++k ) {
00530         a[j+k].groupFixed = allfixed ? 1 : 0;
00531       }
00532     }
00533 
00534     if ( params->outputPatchDetails ) {
00535       int patchId = pid;
00536       int numAtomsInPatch = n;
00537       int numFixedAtomsInPatch = 0;
00538       int numAtomsInFixedGroupsInPatch = 0;
00539       for(int j=0; j < n; j++) {
00540         numFixedAtomsInPatch += ( a[j].atomFixed ? 1 : 0 );
00541         numAtomsInFixedGroupsInPatch += ( a[j].groupFixed ? 1 : 0 );
00542       }
00543       iout << "PATCH_DETAILS:"
00544            << " on proc " << CkMyPe()
00545            << " patch " << patchId
00546            << " atoms " << numAtomsInPatch
00547            << " fixed_atoms " << numFixedAtomsInPatch
00548            << " fixed_groups " << numAtomsInFixedGroupsInPatch
00549            << "\n" << endi;
00550     }
00551 
00552 }
00553 
00554 void WorkDistrib::random_velocities_parallel(BigReal Temp,InputAtomList &inAtoms)
00555 {
00556   int i, j;             //  Loop counter
00557   BigReal kbT;          //  Boltzman constant * Temp
00558   BigReal randnum;      //  Random number from -6.0 to 6.0
00559   BigReal kbToverM;     //  sqrt(Kb*Temp/Mass)
00560   SimParameters *simParams = Node::Object()->simParameters;
00561   Bool lesOn = simParams->lesOn;
00562   Random vel_random(simParams->randomSeed);
00563   int lesReduceTemp = lesOn && simParams->lesReduceTemp;
00564   BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
00565 
00566   kbT = Temp*BOLTZMANN;
00567   int count=0;
00568   int totalAtoms = inAtoms.size();
00569   for(i=0;i<totalAtoms;i++)
00570   {
00571     Real atomMs=inAtoms[i].mass;
00572 
00573     if (atomMs <= 0.) {
00574       kbToverM = 0.;
00575     } else {
00576       /*
00577        * lesOn is not supported in MEM_OPT_VERSION, so the original assignment
00578        * is simplified. --Chao Mei
00579        */
00580       //kbToverM = sqrt(kbT *
00581         //( lesOn && structure->get_fep_type(aid) ? tempFactor : 1.0 ) /
00582         //                  atomMs );
00583       kbToverM = sqrt(kbT * 1.0 / atomMs);
00584     }
00585     for (randnum=0.0, j=0; j<12; j++)
00586     {
00587       randnum += vel_random.uniform();
00588     }
00589 
00590     randnum -= 6.0;
00591 
00592     inAtoms[i].velocity.x = randnum*kbToverM;
00593 
00594     for (randnum=0.0, j=0; j<12; j++)
00595     {
00596       randnum += vel_random.uniform();
00597     }
00598 
00599     randnum -= 6.0;
00600 
00601     inAtoms[i].velocity.y = randnum*kbToverM;
00602 
00603     for (randnum=0.0, j=0; j<12; j++)
00604     {
00605       randnum += vel_random.uniform();
00606     }
00607 
00608     randnum -= 6.0;
00609     
00610     inAtoms[i].velocity.z = randnum*kbToverM;
00611   }
00612 }
00613 #endif
00614 
00615 //----------------------------------------------------------------------
00616 // This should only be called on node 0.
00617 //----------------------------------------------------------------------
00618 FullAtomList *WorkDistrib::createAtomLists(const char *basename)
00619 {
00620   int i;
00621   StringList *current;  //  Pointer used to retrieve configuration items
00622   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00623   Node *node = nd.ckLocalBranch();
00624   PatchMap *patchMap = PatchMap::Object();
00625   CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
00626   PatchMgr *patchMgr = pm.ckLocalBranch();
00627   SimParameters *params = node->simParameters;
00628   Molecule *molecule = node->molecule;
00629   PDB *pdb = node->pdb;
00630 
00631   int numPatches = patchMap->numPatches();
00632   int numAtoms = pdb->num_atoms();
00633 
00634   Vector *positions = new Position[numAtoms];
00635   Vector *velocities = new Velocity[numAtoms];
00636 
00637  if ( basename ) {
00638   if ( params->binaryOutput ) {
00639     read_binary_file((std::string(basename)+".coor").c_str(), positions, numAtoms);
00640     read_binary_file((std::string(basename)+".vel").c_str(), velocities, numAtoms);
00641   } else {
00642     PDB coorpdb((std::string(basename)+".coor").c_str());
00643     if ( coorpdb.num_atoms() != numAtoms ) {
00644       NAMD_die("Incorrect atom count in coordinate pdb file");
00645     }
00646     coorpdb.get_all_positions(positions);
00647     velocities_from_PDB((std::string(basename)+".vel").c_str(), velocities, numAtoms);
00648   }
00649  } else {
00650   pdb->get_all_positions(positions);
00651 
00652   if ( params->initialTemp < 0.0 ) {
00653     Bool binvels=FALSE;
00654 
00655     //  Reading the veolcities from a PDB
00656     current = node->configList->find("velocities");
00657 
00658     if (current == NULL) {
00659       current = node->configList->find("binvelocities");
00660       binvels = TRUE;
00661     }
00662 
00663     if (!binvels) {
00664       velocities_from_PDB(current->data, velocities, numAtoms);
00665     }
00666     else {
00667       velocities_from_binfile(current->data, velocities, numAtoms);
00668     }
00669   }
00670   else {
00671     // Random velocities for a given temperature
00672     random_velocities(params->initialTemp, molecule, velocities, numAtoms);
00673   }
00674  }
00675 
00676   //  If COMMotion == no, remove center of mass motion
00677   if (!(params->comMove)) {
00678     remove_com_motion(velocities, molecule, numAtoms);
00679   }
00680 
00681   FullAtomList *atoms = new FullAtomList[numPatches];
00682 
00683   const Lattice lattice = params->lattice;
00684 
00685     if ( params->staticAtomAssignment ) {
00686       FullAtomList sortAtoms;
00687       for ( i=0; i < numAtoms; i++ ) {
00688         HydrogenGroupID &h = molecule->hydrogenGroup[i];
00689         if ( ! h.isMP ) continue;
00690         FullAtom a;
00691         a.id = i;
00692         a.migrationGroupSize = h.isMP ? h.atomsInMigrationGroup : 0;
00693         a.position = positions[h.atomID];
00694         sortAtoms.add(a);
00695       } 
00696       int *order = new int[sortAtoms.size()];
00697       for ( i=0; i < sortAtoms.size(); i++ ) {
00698         order[i] = i;
00699       } 
00700       int *breaks = new int[numPatches];
00701       sortAtomsForPatches(order,breaks,sortAtoms.begin(),
00702                         sortAtoms.size(),numAtoms,
00703                         patchMap->gridsize_c(),
00704                         patchMap->gridsize_b(),
00705                         patchMap->gridsize_a());
00706 
00707       i = 0;
00708       for ( int pid = 0; pid < numPatches; ++pid ) {
00709         int iend = breaks[pid];
00710         for ( ; i<iend; ++i ) {
00711           FullAtom &sa = sortAtoms[order[i]];
00712           int mgs = sa.migrationGroupSize;
00713 /*
00714 CkPrintf("patch %d (%d %d %d) has group %d atom %d size %d at %.2f %.2f %.2f\n",
00715           pid, patchMap->index_a(pid), patchMap->index_b(pid), 
00716           patchMap->index_c(pid), order[i], sa.id, mgs,
00717           sa.position.x, sa.position.y, sa.position.z);
00718 */
00719           for ( int k=0; k<mgs; ++k ) {
00720             HydrogenGroupID &h = molecule->hydrogenGroup[sa.id + k];
00721             int aid = h.atomID;
00722             FullAtom a;
00723             a.id = aid;
00724             a.position = positions[aid];
00725             a.velocity = velocities[aid];
00726             a.vdwType = molecule->atomvdwtype(aid);
00727             a.status = molecule->getAtoms()[aid].status;
00728             a.langevinParam = molecule->langevin_param(aid);
00729             a.hydrogenGroupSize = h.isGP ? h.atomsInGroup : 0;
00730             a.migrationGroupSize = h.isMP ? h.atomsInMigrationGroup : 0;
00731             if(params->rigidBonds != RIGID_NONE) {
00732               a.rigidBondLength = molecule->rigid_bond_length(aid);
00733             }else{
00734               a.rigidBondLength = 0.0;
00735             }
00736             atoms[pid].add(a);
00737           }
00738         }
00739 CkPrintf("patch %d (%d %d %d) has %d atoms\n",
00740           pid, patchMap->index_a(pid), patchMap->index_b(pid), 
00741           patchMap->index_c(pid), atoms[pid].size());
00742       }
00743       delete [] order;
00744       delete [] breaks;
00745     } else
00746     {
00747     // split atoms into patches based on migration group and position
00748     int aid, pid=0;
00749     for(i=0; i < numAtoms; i++)
00750       {
00751       // Assign atoms to patches without splitting hydrogen groups.
00752       // We know that the hydrogenGroup array is sorted with group parents
00753       // listed first.  Thus, only change the pid if an atom is a group parent.
00754       HydrogenGroupID &h = molecule->hydrogenGroup[i];
00755       aid = h.atomID;
00756       FullAtom a;
00757       a.id = aid;
00758       a.position = positions[aid];
00759       a.velocity = velocities[aid];
00760       a.vdwType = molecule->atomvdwtype(aid);
00761       a.status = molecule->getAtoms()[aid].status;
00762       a.langevinParam = molecule->langevin_param(aid);
00763       a.hydrogenGroupSize = h.isGP ? h.atomsInGroup : 0;
00764       a.migrationGroupSize = h.isMP ? h.atomsInMigrationGroup : 0;
00765       if(params->rigidBonds != RIGID_NONE) {
00766         a.rigidBondLength = molecule->rigid_bond_length(aid);
00767       }else{
00768         a.rigidBondLength = 0.0;
00769       }
00770       if (h.isMP) {
00771         pid = patchMap->assignToPatch(positions[aid],lattice);
00772       } // else: don't change pid
00773       atoms[pid].add(a);
00774       }
00775     }
00776 
00777   delete [] positions;
00778   delete [] velocities;
00779 
00780   for(i=0; i < numPatches; i++)
00781   {
00782     ScaledPosition center(0.5*(patchMap->min_a(i)+patchMap->max_a(i)),
00783                           0.5*(patchMap->min_b(i)+patchMap->max_b(i)),
00784                           0.5*(patchMap->min_c(i)+patchMap->max_c(i)));
00785 
00786     int n = atoms[i].size();
00787     FullAtom *a = atoms[i].begin();
00788     int j;
00789 //Modifications for alchemical fep
00790     Bool alchOn = params->alchOn;
00791 //fepe
00792     Bool lesOn = params->lesOn;
00793   
00794     Bool pairInteractionOn = params->pairInteractionOn;
00795 
00796     Bool pressureProfileTypes = (params->pressureProfileAtomTypes > 1);
00797 
00798     Transform mother_transform;
00799     for(j=0; j < n; j++)
00800     {
00801       int aid = a[j].id;
00802 
00803       a[j].nonbondedGroupSize = 0;  // must be set based on coordinates
00804 
00805       a[j].atomFixed = molecule->is_atom_fixed(aid) ? 1 : 0;
00806       a[j].fixedPosition = a[j].position;
00807 
00808       if ( a[j].migrationGroupSize ) {
00809        if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
00810             Position pos = a[j].position;
00811             int mgs = a[j].migrationGroupSize;
00812             int c = 1;
00813             for ( int k=a[j].hydrogenGroupSize; k<mgs;
00814                                 k+=a[j+k].hydrogenGroupSize ) {
00815               pos += a[j+k].position;
00816               ++c;
00817             }
00818             pos *= 1./c;
00819             mother_transform = a[j].transform;  // should be 0,0,0
00820             pos = lattice.nearest(pos,center,&mother_transform);
00821             a[j].position = lattice.apply_transform(a[j].position,mother_transform);
00822             a[j].transform = mother_transform;
00823        } else {
00824         a[j].position = lattice.nearest(
00825                 a[j].position, center, &(a[j].transform));
00826         mother_transform = a[j].transform;
00827        }
00828       } else {
00829         a[j].position = lattice.apply_transform(a[j].position,mother_transform);
00830         a[j].transform = mother_transform;
00831       }
00832 
00833       a[j].mass = molecule->atommass(aid);
00834       a[j].charge = molecule->atomcharge(aid);
00835 
00836 //Modifications for alchemical fep
00837       if ( alchOn || lesOn || pairInteractionOn || pressureProfileTypes) {
00838         a[j].partition = molecule->get_fep_type(aid);
00839       } 
00840       else {
00841         a[j].partition = 0;
00842       }
00843 //fepe
00844 
00845     }
00846 
00847     int size, allfixed, k;
00848     for(j=0; j < n; j+=size) {
00849       size = a[j].hydrogenGroupSize;
00850       if ( ! size ) {
00851         NAMD_bug("Mother atom with hydrogenGroupSize of 0!");
00852       }
00853       allfixed = 1;
00854       for ( k = 0; k < size; ++k ) {
00855         allfixed = ( allfixed && (a[j+k].atomFixed) );
00856       }
00857       for ( k = 0; k < size; ++k ) {
00858         a[j+k].groupFixed = allfixed ? 1 : 0;
00859       }
00860     }
00861 
00862     if ( params->outputPatchDetails ) {
00863       int patchId = i;
00864       int numAtomsInPatch = n;
00865       int numFixedAtomsInPatch = 0;
00866       int numAtomsInFixedGroupsInPatch = 0;
00867       for(j=0; j < n; j++) {
00868         numFixedAtomsInPatch += ( a[j].atomFixed ? 1 : 0 );
00869         numAtomsInFixedGroupsInPatch += ( a[j].groupFixed ? 1 : 0 );
00870       }
00871       iout << "PATCH_DETAILS:"
00872            << " patch " << patchId
00873            << " atoms " << numAtomsInPatch
00874            << " fixed_atoms " << numFixedAtomsInPatch
00875            << " fixed_groups " << numAtomsInFixedGroupsInPatch
00876            << "\n" << endi;
00877     }
00878   }
00879 
00880   return atoms;
00881 
00882 }
00883 
00884 //----------------------------------------------------------------------
00885 // This should only be called on node 0.
00886 //----------------------------------------------------------------------
00887 void WorkDistrib::createHomePatches(void)
00888 {
00889   int i;
00890   PatchMap *patchMap = PatchMap::Object();
00891   CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
00892   PatchMgr *patchMgr = pm.ckLocalBranch();
00893 
00894   int numPatches = patchMap->numPatches();
00895 
00896   FullAtomList *atoms = createAtomLists();
00897     
00898 #ifdef MEM_OPT_VERSION
00899 /*  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00900   Node *node = nd.ckLocalBranch();
00901   node->molecule->delEachAtomSigs();
00902   node->molecule->delMassChargeSpace();
00903 */
00904 #endif
00905 
00906   int maxAtoms = -1;
00907   int maxPatch = -1;
00908   for(i=0; i < numPatches; i++) {
00909     int numAtoms = atoms[i].size();
00910     if ( numAtoms > maxAtoms ) { maxAtoms = numAtoms; maxPatch = i; }
00911   }
00912   iout << iINFO << "LARGEST PATCH (" << maxPatch <<
00913         ") HAS " << maxAtoms << " ATOMS\n" << endi;
00914 
00915   for(i=0; i < numPatches; i++)
00916   {
00917     if ( ! ( i % 100 ) )
00918     {
00919       DebugM(3,"Created " << i << " patches so far.\n");
00920     }
00921 
00922     patchMgr->createHomePatch(i,atoms[i]);
00923   }
00924 
00925   delete [] atoms;
00926 }
00927 
00928 void WorkDistrib::distributeHomePatches() {
00929   // ref BOC
00930   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00931   Node *node = nd.ckLocalBranch();
00932   CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
00933   PatchMgr *patchMgr = pm.ckLocalBranch();
00934   // ref singleton
00935   PatchMap *patchMap = PatchMap::Object();
00936 
00937   // Move patches to the proper node
00938   for(int i=0;i < patchMap->numPatches(); i++)
00939   {
00940     if (patchMap->node(i) != node->myid() )
00941     {
00942       DebugM(3,"patchMgr->movePatch("
00943         << i << "," << patchMap->node(i) << ")\n");
00944       patchMgr->movePatch(i,patchMap->node(i));
00945     }
00946   }
00947   patchMgr->sendMovePatches();
00948 }
00949 
00950 void WorkDistrib::reinitAtoms(const char *basename) {
00951 
00952   PatchMap *patchMap = PatchMap::Object();
00953   CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
00954   PatchMgr *patchMgr = pm.ckLocalBranch();
00955 
00956   int numPatches = patchMap->numPatches();
00957 
00958   FullAtomList *atoms = createAtomLists(basename);
00959 
00960   for(int i=0; i < numPatches; i++) {
00961     patchMgr->sendAtoms(i,atoms[i]);
00962   }
00963 
00964   delete [] atoms;
00965 
00966 }
00967 
00968 
00969 //----------------------------------------------------------------------
00970 
00971 class PatchMapMsg : public CMessage_PatchMapMsg {
00972   public:
00973     char *patchMapData;
00974 };
00975 
00976 void WorkDistrib::sendPatchMap(void)
00977 {
00978   if ( CkNumPes() == 1 ) {
00979     patchMapArrived = true;
00980     return;
00981   }
00982 
00983   //Automatically enable spanning tree
00984   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00985   Node *node = nd.ckLocalBranch();
00986   SimParameters *params = node->simParameters;
00987   if( ( PatchMap::Object()->numPatches() <= CkNumPes()/4
00988 #ifdef NODEAWARE_PROXY_SPANNINGTREE 
00989       || CkNumPes() > CkNumNodes()
00990       ) && ( CkNumNodes() > 1
00991 #endif
00992     ) && params->isSendSpanningTreeUnset() )
00993     ProxyMgr::Object()->setSendSpanning();
00994 
00995 #ifdef NODEAWARE_PROXY_SPANNINGTREE 
00996   if ( CkNumPes() > CkNumNodes() && CkNumNodes() > 1
00997         && params->isRecvSpanningTreeUnset() )
00998     ProxyMgr::Object()->setRecvSpanning();
00999 #endif
01000 
01001   int size = PatchMap::Object()->packSize();
01002 
01003   PatchMapMsg *mapMsg = new (size, 0) PatchMapMsg;
01004 
01005   PatchMap::Object()->pack(mapMsg->patchMapData, size);
01006 
01007   CProxy_WorkDistrib workProxy(thisgroup);
01008   workProxy[0].savePatchMap(mapMsg);
01009 }
01010 
01011 // saveMaps() is called when the map message is received
01012 void WorkDistrib::savePatchMap(PatchMapMsg *msg)
01013 {
01014   // Use a resend to forward messages before processing.  Otherwise the
01015   // map distribution is slow on many CPUs.  We need to use a tree
01016   // rather than a broadcast because some implementations of broadcast
01017   // generate a copy of the message on the sender for each recipient.
01018   // This is because MPI doesn't allow re-use of an outstanding buffer.
01019 
01020   if ( CkMyRank() ) patchMapArrived = true;
01021 
01022   if ( patchMapArrived && CkMyPe() ) {
01023     PatchMap::Object()->unpack(msg->patchMapData);
01024 
01025     //Automatically enable spanning tree
01026     CProxy_Node nd(CkpvAccess(BOCclass_group).node);
01027     Node *node = nd.ckLocalBranch();
01028     SimParameters *params = node->simParameters;
01029     if( ( PatchMap::Object()->numPatches() <= CkNumPes()/4
01030 #ifdef NODEAWARE_PROXY_SPANNINGTREE 
01031         || CkNumPes() > CkNumNodes()
01032         ) && ( CkNumNodes() > 1
01033 #endif
01034       ) && params->isSendSpanningTreeUnset() )
01035       ProxyMgr::Object()->setSendSpanning();
01036 
01037 #ifdef NODEAWARE_PROXY_SPANNINGTREE 
01038     if ( CkNumPes() > CkNumNodes() && CkNumNodes() > 1
01039           && params->isRecvSpanningTreeUnset() )
01040       ProxyMgr::Object()->setRecvSpanning();
01041 #endif
01042   }
01043 
01044   if ( patchMapArrived ) {
01045     if ( CkMyRank() + 1 < CkNodeSize(CkMyNode()) ) {
01046       ((CProxy_WorkDistrib(thisgroup))[CkMyPe()+1]).savePatchMap(msg);
01047     } else {
01048       delete msg;
01049     }
01050     return;
01051   }
01052 
01053   patchMapArrived = true;
01054 
01055   int self = CkMyNode();
01056   int range_begin = 0;
01057   int range_end = CkNumNodes();
01058   while ( self != range_begin ) {
01059     ++range_begin;
01060     int split = range_begin + ( range_end - range_begin ) / 2;
01061     if ( self < split ) { range_end = split; }
01062     else { range_begin = split; }
01063   }
01064   int send_near = self + 1;
01065   int send_far = send_near + ( range_end - send_near ) / 2;
01066 
01067   int pids[3];
01068   int npid = 0;
01069   if ( send_far < range_end ) pids[npid++] = CkNodeFirst(send_far);
01070   if ( send_near < send_far ) pids[npid++] = CkNodeFirst(send_near);
01071   pids[npid++] = CkMyPe();  // always send the message to ourselves
01072   CProxy_WorkDistrib(thisgroup).savePatchMap(msg,npid,pids);
01073 }
01074 
01075 
01076 class ComputeMapMsg : public CMessage_ComputeMapMsg {
01077   public:
01078     int nComputes;
01079     ComputeMap::ComputeData *computeMapData;
01080 };
01081 
01082 void WorkDistrib::sendComputeMap(void)
01083 {
01084   if ( CkNumNodes() == 1 ) {
01085     computeMapArrived = true;
01086     ComputeMap::Object()->initPtrs();
01087     return;
01088   }
01089 
01090   int size = ComputeMap::Object()->numComputes();
01091 
01092   ComputeMapMsg *mapMsg = new (size, 0) ComputeMapMsg;
01093 
01094   mapMsg->nComputes = size;
01095   ComputeMap::Object()->pack(mapMsg->computeMapData);
01096 
01097   CProxy_WorkDistrib workProxy(thisgroup);
01098   workProxy[0].saveComputeMap(mapMsg);
01099 }
01100 
01101 // saveMaps() is called when the map message is received
01102 void WorkDistrib::saveComputeMap(ComputeMapMsg *msg)
01103 {
01104   // Use a resend to forward messages before processing.  Otherwise the
01105   // map distribution is slow on many CPUs.  We need to use a tree
01106   // rather than a broadcast because some implementations of broadcast
01107   // generate a copy of the message on the sender for each recipient.
01108   // This is because MPI doesn't allow re-use of an outstanding buffer.
01109 
01110   if ( CkMyRank() ) {
01111     NAMD_bug("WorkDistrib::saveComputeMap called on non-rank-zero pe");
01112   }
01113 
01114   if ( computeMapArrived && CkMyPe() ) {
01115     ComputeMap::Object()->unpack(msg->nComputes, msg->computeMapData);
01116   }
01117 
01118   if ( computeMapArrived ) {
01119     delete msg;
01120     ComputeMap::Object()->initPtrs();
01121     return;
01122   }
01123 
01124   computeMapArrived = true;
01125 
01126   int self = CkMyNode();
01127   int range_begin = 0;
01128   int range_end = CkNumNodes();
01129   while ( self != range_begin ) {
01130     ++range_begin;
01131     int split = range_begin + ( range_end - range_begin ) / 2;
01132     if ( self < split ) { range_end = split; }
01133     else { range_begin = split; }
01134   }
01135   int send_near = self + 1;
01136   int send_far = send_near + ( range_end - send_near ) / 2;
01137 
01138   int pids[3];
01139   int npid = 0;
01140   if ( send_far < range_end ) pids[npid++] = CkNodeFirst(send_far);
01141   if ( send_near < send_far ) pids[npid++] = CkNodeFirst(send_near);
01142   pids[npid++] = CkMyPe();  // always send the message to ourselves
01143   CProxy_WorkDistrib(thisgroup).saveComputeMap(msg,npid,pids);
01144 }
01145 
01146 
01147 //----------------------------------------------------------------------
01148 void WorkDistrib::patchMapInit(void)
01149 {
01150   PatchMap *patchMap = PatchMap::Object();
01151   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
01152   Node *node = nd.ckLocalBranch();
01153   SimParameters *params = node->simParameters;
01154   Lattice lattice = params->lattice;
01155 
01156   BigReal patchSize = params->patchDimension;
01157 
01158 #ifndef MEM_OPT_VERSION
01159   const int totalAtoms = node->pdb->num_atoms();
01160 #else
01161   const int totalAtoms = node->molecule->numAtoms;
01162 #endif
01163 
01164   ScaledPosition xmin, xmax;
01165 
01166   double maxNumPatches = 1.e9;  // need to adjust fractional values
01167   if ( params->minAtomsPerPatch > 0 )
01168     maxNumPatches = totalAtoms / params->minAtomsPerPatch;
01169 
01170   DebugM(3,"Mapping patches\n");
01171   if ( lattice.a_p() && lattice.b_p() && lattice.c_p() ) {
01172     xmin = 0.;  xmax = 0.;
01173   }
01174   else if ( params->FMAOn || params->MSMOn || params->FMMOn ) {
01175   // Need to use full box for FMA to match NAMD 1.X results.
01176 #if 0
01177     node->pdb->find_extremes(&(xmin.x),&(xmax.x),lattice.a_r());
01178     node->pdb->find_extremes(&(xmin.y),&(xmax.y),lattice.b_r());
01179     node->pdb->find_extremes(&(xmin.z),&(xmax.z),lattice.c_r());
01180 #endif
01181     node->pdb->find_extremes(lattice);
01182     node->pdb->get_extremes(xmin, xmax);
01183 #if 0
01184     printf("+++ center=%.4f %.4f %.4f\n",
01185         lattice.origin().x, lattice.origin().y, lattice.origin().z);
01186     printf("+++ xmin=%.4f  xmax=%.4f\n", xmin.x, xmax.x);
01187     printf("+++ ymin=%.4f  ymax=%.4f\n", xmin.y, xmax.y);
01188     printf("+++ zmin=%.4f  zmax=%.4f\n", xmin.z, xmax.z);
01189 #endif
01190   // Otherwise, this allows a small number of stray atoms.
01191   }
01192   else {
01193 #if 0
01194     node->pdb->find_extremes(&(xmin.x),&(xmax.x),lattice.a_r(),0.9);
01195     node->pdb->find_extremes(&(xmin.y),&(xmax.y),lattice.b_r(),0.9);
01196     node->pdb->find_extremes(&(xmin.z),&(xmax.z),lattice.c_r(),0.9);
01197 #endif
01198     node->pdb->find_extremes(lattice, 1.0);
01199     node->pdb->get_extremes(xmin, xmax);
01200     iout << iINFO << "ORIGINAL ATOMS MINMAX IS " << xmin << "  " << xmax << "\n" << endi;
01201     double frac = ( (double)totalAtoms - 10000. ) / (double)totalAtoms;
01202     if ( frac < 0.9 ) { frac = 0.9; }
01203     node->pdb->find_extremes(lattice, frac);
01204     node->pdb->get_extremes(xmin, xmax);
01205     iout << iINFO << "ADJUSTED ATOMS MINMAX IS " << xmin << "  " << xmax << "\n" << endi;
01206   }
01207 
01208 #if 0
01209   BigReal origin_shift;
01210   origin_shift = lattice.a_r() * lattice.origin();
01211   xmin.x -= origin_shift;
01212   xmax.x -= origin_shift;
01213   origin_shift = lattice.b_r() * lattice.origin();
01214   xmin.y -= origin_shift;
01215   xmax.y -= origin_shift;
01216   origin_shift = lattice.c_r() * lattice.origin();
01217   xmin.z -= origin_shift;
01218   xmax.z -= origin_shift;
01219 #endif
01220 
01221   // SimParameters default is -1 for unset
01222   int twoAwayX = params->twoAwayX;
01223   int twoAwayY = params->twoAwayY;
01224   int twoAwayZ = params->twoAwayZ;
01225 
01226   // SASA implementation is not compatible with twoAway patches
01227   if (params->LCPOOn && patchSize < 32.4) {
01228     if ( twoAwayX > 0 || twoAwayY > 0 || twoAwayZ > 0 ) {
01229       iout << iWARN << "Ignoring twoAway[XYZ] due to LCPO SASA implementation.\n" << endi;
01230     }
01231     twoAwayX = twoAwayY = twoAwayZ = 0;
01232   }
01233 
01234   // if you think you know what you're doing go right ahead
01235   if ( twoAwayX > 0 ) maxNumPatches = 1.e9;
01236   if ( twoAwayY > 0 ) maxNumPatches = 1.e9;
01237   if ( twoAwayZ > 0 ) maxNumPatches = 1.e9;
01238   if ( params->maxPatches > 0 ) {
01239       maxNumPatches = params->maxPatches;
01240       iout << iINFO << "LIMITING NUMBER OF PATCHES TO " <<
01241                                 maxNumPatches << "\n" << endi;
01242   }
01243 
01244   int numpes = CkNumPes();
01245   SimParameters *simparam = Node::Object()->simParameters;
01246   if(simparam->simulateInitialMapping) {
01247     numpes = simparam->simulatedPEs;
01248     delete [] patchMap->nPatchesOnNode;
01249     patchMap->nPatchesOnNode = new int[numpes];
01250     memset(patchMap->nPatchesOnNode, 0, numpes*sizeof(int));    
01251   }
01252 
01253 #if defined(NAMD_CUDA) || defined(NAMD_MIC)
01254   // for CUDA be sure there are more patches than pes
01255 
01256   int numPatches = patchMap->sizeGrid(
01257         xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
01258         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
01259   if ( numPatches < numpes && twoAwayX < 0 ) {
01260     twoAwayX = 1;
01261     numPatches = patchMap->sizeGrid(
01262         xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
01263         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
01264   }
01265   if ( numPatches < numpes && twoAwayY < 0 ) {
01266     twoAwayY = 1;
01267     numPatches = patchMap->sizeGrid(
01268         xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
01269         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
01270   }
01271   if ( numPatches < numpes && twoAwayZ < 0 ) {
01272     twoAwayZ = 1;
01273     numPatches = patchMap->sizeGrid(
01274         xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
01275         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
01276   }
01277   if ( numPatches < numpes ) {
01278     #if defined(NAMD_MIC)
01279     NAMD_die("MIC-enabled NAMD requires at least one patch per thread.");
01280     #endif
01281   }
01282   if ( numPatches % numpes && numPatches <= 1.4 * numpes ) {
01283     int exactFit = numPatches - numPatches % numpes;
01284     int newNumPatches = patchMap->sizeGrid(
01285         xmin,xmax,lattice,patchSize,exactFit,params->staticAtomAssignment,
01286         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
01287     if ( newNumPatches == exactFit ) {
01288       iout << iINFO << "REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" << endi;
01289       maxNumPatches = exactFit;
01290     }
01291   }
01292 
01293   patchMap->makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,
01294         params->staticAtomAssignment, params->replicaUniformPatchGrids, params->LCPOOn,
01295         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
01296 
01297 #else
01298 
01299   int availPes = numpes;
01300   if ( params->noPatchesOnZero && numpes > 1 ) {
01301       availPes -= 1;
01302       if(params->noPatchesOnOne && numpes > 2)
01303         availPes -= 1;
01304   }
01305 #ifdef MEM_OPT_VERSION
01306   if(params->noPatchesOnOutputPEs && numpes - params->numoutputprocs >2)
01307     {
01308       availPes -= params->numoutputprocs;
01309       if ( params->noPatchesOnZero && numpes > 1 && isOutputProcessor(0)){
01310         availPes++;
01311       }
01312       if ( params->noPatchesOnOne && numpes > 2 && isOutputProcessor(1)){
01313         availPes++;
01314       }
01315     }
01316 #endif
01317 
01318   int numPatches = patchMap->sizeGrid(
01319         xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
01320         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
01321   if ( ( numPatches > (0.3*availPes) || numPatches > maxNumPatches
01322        ) && twoAwayZ < 0 ) {
01323     twoAwayZ = 0;
01324     numPatches = patchMap->sizeGrid(
01325         xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
01326         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
01327   }
01328   if ( ( numPatches > (0.6*availPes) || numPatches > maxNumPatches
01329        ) && twoAwayY < 0 ) {
01330     twoAwayY = 0;
01331     numPatches = patchMap->sizeGrid(
01332         xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
01333         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
01334   }
01335   if ( ( numPatches > availPes || numPatches > maxNumPatches
01336        ) && twoAwayX < 0 ) {
01337     twoAwayX = 0;
01338     numPatches = patchMap->sizeGrid(
01339         xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
01340         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
01341   }
01342   if ( numPatches > availPes && numPatches <= (1.4*availPes) && availPes <= maxNumPatches ) {
01343     int newNumPatches = patchMap->sizeGrid(
01344         xmin,xmax,lattice,patchSize,availPes,params->staticAtomAssignment,
01345         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
01346     if ( newNumPatches <= availPes && numPatches <= (1.4*newNumPatches) ) {
01347       iout << iINFO << "REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" << endi;
01348       maxNumPatches = availPes;
01349     }
01350   }
01351 
01352   patchMap->makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,
01353         params->staticAtomAssignment, params->replicaUniformPatchGrids, params->LCPOOn,
01354         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
01355 
01356 #endif
01357 
01358 }
01359 
01360 
01361 //----------------------------------------------------------------------
01362 void WorkDistrib::assignNodeToPatch()
01363 {
01364   PatchMap *patchMap = PatchMap::Object();
01365   int nNodes = Node::Object()->numNodes();
01366   SimParameters *simparam = Node::Object()->simParameters;
01367   if(simparam->simulateInitialMapping) {
01368           nNodes = simparam->simulatedPEs;
01369   }
01370 
01371 #if (CMK_BLUEGENEP | CMK_BLUEGENEL) && USE_TOPOMAP 
01372   TopoManager tmgr;
01373   int numPes = tmgr.getDimNX() * tmgr.getDimNY() * tmgr.getDimNZ();
01374   if (numPes > patchMap->numPatches() && (assignPatchesTopoGridRecBisection() > 0)) {
01375     CkPrintf ("Blue Gene/L topology partitioner finished successfully \n");
01376   }
01377   else
01378 #endif
01379   assignPatchesSpaceFillingCurve();       
01380   
01381   int *nAtoms = new int[nNodes];
01382   int numAtoms=0;
01383   int i;
01384   for(i=0; i < nNodes; i++)
01385     nAtoms[i] = 0;
01386 
01387   for(i=0; i < patchMap->numPatches(); i++)
01388   {
01389     //    iout << iINFO << "Patch " << i << " has " 
01390     //   << patchMap->patch(i)->getNumAtoms() << " atoms and "
01391     //   << patchMap->patch(i)->getNumAtoms() * 
01392     //            patchMap->patch(i)->getNumAtoms() 
01393     //   << " pairs.\n" << endi;         
01394 #ifdef MEM_OPT_VERSION
01395       numAtoms += patchMap->numAtoms(i);
01396       nAtoms[patchMap->node(i)] += patchMap->numAtoms(i);         
01397 #else
01398     if (patchMap->patch(i)) {
01399       numAtoms += patchMap->patch(i)->getNumAtoms();
01400       nAtoms[patchMap->node(i)] += patchMap->patch(i)->getNumAtoms();     
01401     }
01402 #endif
01403   }
01404 
01405   if ( numAtoms != Node::Object()->molecule->numAtoms ) {
01406     for(i=0; i < nNodes; i++)
01407       iout << iINFO << nAtoms[i] << " atoms assigned to node " << i << "\n" << endi;
01408     iout << iINFO << "Assigned " << numAtoms << " atoms but expected " << Node::Object()->molecule->numAtoms << "\n" << endi;
01409     NAMD_die("Incorrect atom count in WorkDistrib::assignNodeToPatch\n");
01410   }
01411 
01412   delete [] nAtoms;
01413  
01414   //  PatchMap::Object()->printPatchMap();
01415 }
01416 
01417 //----------------------------------------------------------------------
01418 // void WorkDistrib::assignPatchesSlices() 
01419 // {
01420 //   int pid; 
01421 //   int assignedNode = 0;
01422 //   PatchMap *patchMap = PatchMap::Object();
01423 //   Node *node = CLocalBranch(Node, CkpvAccess(BOCclass_group).node);
01424 
01425 //   int *numAtoms = new int[node->numNodes()];
01426 //   for (int i=0; i<node->numNodes(); i++) {
01427 //     numAtoms[i] = 0;
01428 //   }
01429 
01430 //   // Assign patch to node with least atoms assigned.
01431 //   for(pid=0; pid < patchMap->numPatches(); pid++) {
01432 //     assignedNode = 0;
01433 //     for (i=1; i < node->numNodes(); i++) {
01434 //       if (numAtoms[i] < numAtoms[assignedNode]) assignedNode = i;
01435 //     }
01436 //     patchMap->assignNode(pid, assignedNode);
01437 //     numAtoms[assignedNode] += patchMap->patch(pid)->getNumAtoms();
01438 
01439 //     /*
01440 //     iout << iINFO << "Patch (" << pid << ") has " 
01441 //       << patchMap->patch(pid)->getNumAtoms() 
01442 //       << " atoms:  Assigned to Node(" << assignedNode << ")\n" 
01443 //       << endi;
01444 //     */
01445 //   }
01446 
01447 //   delete[] numAtoms;
01448 // }
01449 
01450 //----------------------------------------------------------------------
01451 void WorkDistrib::assignPatchesToLowestLoadNode() 
01452 {
01453   int pid; 
01454   int assignedNode = 0;
01455   PatchMap *patchMap = PatchMap::Object();
01456   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
01457   Node *node = nd.ckLocalBranch();
01458   SimParameters *simParams = node->simParameters;
01459   int ncpus = node->numNodes();
01460   if(simParams->simulateInitialMapping) {
01461           ncpus = simParams->simulatedPEs;
01462   }
01463 
01464   int *load = new int[ncpus];
01465   int *assignedNodes = new int[patchMap->numPatches()];
01466   for (int i=0; i<ncpus; i++) {
01467     load[i] = 0;
01468   }
01469   CkPrintf("assignPatchesToLowestLoadNode\n");
01470   int defaultNode = 0;
01471   if ( simParams->noPatchesOnZero && ncpus > 1 ){
01472     defaultNode = 1;
01473     if( simParams->noPatchesOnOne && ncpus > 2)
01474       defaultNode = 2;
01475   }
01476   // Assign patch to node with least atoms assigned.
01477   for(pid=0; pid < patchMap->numPatches(); pid++) {
01478     assignedNode = defaultNode;
01479     for (int i=assignedNode + 1; i < ncpus; i++) {
01480       if (load[i] < load[assignedNode]) assignedNode = i;
01481     }
01482     assignedNodes[pid] = assignedNode;
01483 #ifdef MEM_OPT_VERSION
01484     load[assignedNode] += patchMap->numAtoms(pid) + 1;
01485 #else
01486     load[assignedNode] += patchMap->patch(pid)->getNumAtoms() + 1;
01487 #endif
01488   }
01489 
01490   delete[] load;
01491   sortNodesAndAssign(assignedNodes);
01492   delete[] assignedNodes;
01493 }
01494 
01495 //----------------------------------------------------------------------
01496 void WorkDistrib::assignPatchesBitReversal() 
01497 {
01498   int pid; 
01499   PatchMap *patchMap = PatchMap::Object();
01500   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
01501   Node *node = nd.ckLocalBranch();
01502   SimParameters *simparam = node->simParameters;
01503 
01504   int ncpus = node->numNodes();
01505   if(simparam->simulateInitialMapping) {
01506           ncpus = simparam->simulatedPEs;
01507   }
01508   int npatches = patchMap->numPatches();
01509   if ( ncpus <= npatches )
01510     NAMD_bug("WorkDistrib::assignPatchesBitReversal called improperly");
01511 
01512   SortableResizeArray<int> seq(ncpus);
01513   // avoid using node 0 (reverse of 0 is 0 so start at 1)
01514   for ( int i = 1; i < ncpus; ++i ) {
01515     seq[i-1] = peDiffuseOrdering[i];
01516   }
01517 
01518   // extract and sort patch locations
01519   sortNodesAndAssign(seq.begin());
01520   if ( ncpus > 2*npatches ) sortNodesAndAssign(seq.begin()+npatches, 1);
01521 }
01522 
01523 //----------------------------------------------------------------------
01524 struct nodesort {
01525   int node;
01526   int a_total;
01527   int b_total;
01528   int c_total;
01529   int npatches;
01530   nodesort() : node(-1),a_total(0),b_total(0),c_total(0),npatches(0) { ; }
01531   int operator==(const nodesort &o) const {
01532     float a1 = ((float)a_total)/((float)npatches);
01533     float a2 = ((float)o.a_total)/((float)o.npatches);
01534     float b1 = ((float)b_total)/((float)npatches);
01535     float b2 = ((float)o.b_total)/((float)o.npatches);
01536     float c1 = ((float)c_total)/((float)npatches);
01537     float c2 = ((float)o.c_total)/((float)o.npatches);
01538     return ((a1 == a2) && (b1 == b2) && (c1 == c2));
01539   }
01540   int operator<(const nodesort &o) const {
01541     float a1 = ((float)a_total)/((float)npatches);
01542     float a2 = ((float)o.a_total)/((float)o.npatches);
01543     float b1 = ((float)b_total)/((float)npatches);
01544     float b2 = ((float)o.b_total)/((float)o.npatches);
01545     float c1 = ((float)c_total)/((float)npatches);
01546     float c2 = ((float)o.c_total)/((float)o.npatches);
01547     return ( (a1 < a2) || ((a1 == a2) && (b1 < b2)) ||
01548                 ((a1 == a2) && (b1 == b2) && (c1 < c2)) );
01549   }
01550 };
01551 
01552 void WorkDistrib::sortNodesAndAssign(int *assignedNode, int baseNodes) {
01553   // if baseNodes is zero (default) then set both nodes and basenodes
01554   // if baseNodes is nonzero then this is a second call to set basenodes only
01555   int i, pid; 
01556   PatchMap *patchMap = PatchMap::Object();
01557   int npatches = patchMap->numPatches();
01558   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
01559   Node *node = nd.ckLocalBranch();
01560   int nnodes = node->numNodes();
01561   SimParameters *simparam = node->simParameters;
01562   if(simparam->simulateInitialMapping) {
01563           nnodes = simparam->simulatedPEs;
01564   }
01565 
01566   ResizeArray<nodesort> allnodes(nnodes);
01567   for ( i=0; i < nnodes; ++i ) {
01568     allnodes[i].node = i;
01569   }
01570   for ( pid=0; pid<npatches; ++pid ) {
01571     // iout << pid << " " << assignedNode[pid] << "\n" << endi;
01572     allnodes[assignedNode[pid]].npatches++;
01573     allnodes[assignedNode[pid]].a_total += patchMap->index_a(pid);
01574     allnodes[assignedNode[pid]].b_total += patchMap->index_b(pid);
01575     allnodes[assignedNode[pid]].c_total += patchMap->index_c(pid);
01576   }
01577   SortableResizeArray<nodesort> usednodes(nnodes);
01578   usednodes.resize(0);
01579   for ( i=0; i < nnodes; ++i ) {
01580     if ( allnodes[i].npatches ) usednodes.add(allnodes[i]);
01581   }
01582   usednodes.sort();
01583   int i2 = 0;
01584   for ( i=0; i < nnodes; ++i ) {
01585     int pe = peCompactOrdering[i];
01586     if ( allnodes[pe].npatches ) allnodes[usednodes[i2++].node].node = pe;
01587   }
01588 
01589   for ( pid=0; pid<npatches; ++pid ) {
01590     // iout << pid << " " <<  allnodes[assignedNode[pid]].node << "\n" << endi;
01591     if ( ! baseNodes ) {
01592       patchMap->assignNode(pid, allnodes[assignedNode[pid]].node);      
01593     }
01594     patchMap->assignBaseNode(pid, allnodes[assignedNode[pid]].node);    
01595   }
01596 }
01597 
01598 //----------------------------------------------------------------------
01599 void WorkDistrib::assignPatchesRoundRobin() 
01600 {
01601   int pid; 
01602   PatchMap *patchMap = PatchMap::Object();
01603   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
01604   Node *node = nd.ckLocalBranch();
01605   SimParameters *simparam = node->simParameters;
01606   int ncpus = node->numNodes();
01607   if(simparam->simulateInitialMapping) {
01608           ncpus = simparam->simulatedPEs;
01609   }
01610   int *assignedNode = new int[patchMap->numPatches()];
01611 
01612   for(pid=0; pid < patchMap->numPatches(); pid++) {
01613     assignedNode[pid] = pid % ncpus;
01614   }
01615 
01616   sortNodesAndAssign(assignedNode);
01617   delete [] assignedNode;
01618 }
01619 
01620 //----------------------------------------------------------------------
01621 void WorkDistrib::assignPatchesRecursiveBisection() 
01622 {
01623   PatchMap *patchMap = PatchMap::Object();
01624   int *assignedNode = new int[patchMap->numPatches()];
01625   SimParameters *simParams = Node::Object()->simParameters;
01626   int numNodes = Node::Object()->numNodes();
01627   if(simParams->simulateInitialMapping) {
01628           numNodes = simParams->simulatedPEs;
01629   }
01630   
01631   int usedNodes = numNodes;
01632   int unusedNodes = 0;
01633   CkPrintf("assignPatchesRecursiveBisection\n");
01634   if ( simParams->noPatchesOnZero && numNodes > 1 ){
01635     usedNodes -= 1;
01636     if(simParams->noPatchesOnOne && numNodes > 2)
01637       usedNodes -= 1;
01638   }
01639   unusedNodes = numNodes - usedNodes;
01640   RecBisection recBisec(usedNodes,PatchMap::Object());
01641   if ( recBisec.partition(assignedNode) ) {
01642     if ( unusedNodes !=0 ) {
01643       for ( int i=0; i<patchMap->numPatches(); ++i ) {
01644         assignedNode[i] += unusedNodes;
01645       }
01646     }
01647     sortNodesAndAssign(assignedNode);
01648     delete [] assignedNode;
01649   } else {
01650     //free the array here since a same array will be allocated
01651     //in assignPatchesToLowestLoadNode function, thus reducting
01652     //temporary memory usage
01653     delete [] assignedNode; 
01654     
01655     iout << iWARN 
01656          << "WorkDistrib: Recursive bisection fails, "
01657          << "invoking space-filling curve algorithm\n";
01658     assignPatchesSpaceFillingCurve();
01659   }
01660 }
01661 
01662 // class to re-order dimensions in decreasing size
01663 struct TopoManagerWrapper {
01664   TopoManager tmgr;
01665   int a_dim, b_dim, c_dim, d_dim, e_dim;
01666   int a_rot, b_rot, c_rot, d_rot, e_rot;
01667   int a_mod, b_mod, c_mod, d_mod, e_mod;
01668   int fixpe(int pe) {  // compensate for lame fallback topology information
01669     return CmiGetFirstPeOnPhysicalNode(CmiPhysicalNodeID(pe));
01670   }
01671   TopoManagerWrapper() {
01672 #if CMK_BLUEGENEQ
01673     int na=tmgr.getDimNA();
01674     int nb=tmgr.getDimNB();
01675     int nc=tmgr.getDimNC();
01676     int nd=tmgr.getDimND();
01677     int ne=tmgr.getDimNE();
01678 #else
01679     int na=tmgr.getDimNX();
01680     int nb=tmgr.getDimNY();
01681     int nc=tmgr.getDimNZ();
01682     int nd=1;
01683     int ne=1;
01684 #endif
01685     ResizeArray<int> a_flags(na);
01686     ResizeArray<int> b_flags(nb);
01687     ResizeArray<int> c_flags(nc);
01688     ResizeArray<int> d_flags(nd);
01689     ResizeArray<int> e_flags(ne);
01690     for ( int i=0; i<na; ++i ) { a_flags[i] = 0; }
01691     for ( int i=0; i<nb; ++i ) { b_flags[i] = 0; }
01692     for ( int i=0; i<nc; ++i ) { c_flags[i] = 0; }
01693     for ( int i=0; i<nd; ++i ) { d_flags[i] = 0; }
01694     for ( int i=0; i<ne; ++i ) { e_flags[i] = 0; }
01695     int npes = CkNumPes();
01696     for ( int pe=0; pe<npes; ++pe ) {
01697       int a,b,c,d,e,t;
01698 #if CMK_BLUEGENEQ
01699       tmgr.rankToCoordinates(fixpe(pe),a,b,c,d,e,t);
01700 #else
01701       tmgr.rankToCoordinates(fixpe(pe),a,b,c,t);
01702       d=0; e=0;
01703 #endif
01704       if ( a < 0 || a >= na ) NAMD_bug("inconsistent torus topology!");
01705       if ( b < 0 || b >= nb ) NAMD_bug("inconsistent torus topology!");
01706       if ( c < 0 || c >= nc ) NAMD_bug("inconsistent torus topology!");
01707       if ( d < 0 || d >= nd ) NAMD_bug("inconsistent torus topology!");
01708       if ( e < 0 || e >= ne ) NAMD_bug("inconsistent torus topology!");
01709       a_flags[a] = 1;
01710       b_flags[b] = 1;
01711       c_flags[c] = 1;
01712       d_flags[d] = 1;
01713       e_flags[e] = 1;
01714     }
01715     iout << iINFO << "TORUS A SIZE " << na << " USING";
01716     for ( int i=0; i<na; ++i ) { if ( a_flags[i] ) iout << " " << i; }
01717     iout << "\n" << endi;
01718     iout << iINFO << "TORUS B SIZE " << nb << " USING";
01719     for ( int i=0; i<nb; ++i ) { if ( b_flags[i] ) iout << " " << i; }
01720     iout << "\n" << endi;
01721     iout << iINFO << "TORUS C SIZE " << nc << " USING";
01722     for ( int i=0; i<nc; ++i ) { if ( c_flags[i] ) iout << " " << i; }
01723     iout << "\n" << endi;
01724 #if CMK_BLUEGENEQ
01725     iout << iINFO << "TORUS D SIZE " << nd << " USING";
01726     for ( int i=0; i<nd; ++i ) { if ( d_flags[i] ) iout << " " << i; }
01727     iout << "\n" << endi;
01728     iout << iINFO << "TORUS E SIZE " << ne << " USING";
01729     for ( int i=0; i<ne; ++i ) { if ( e_flags[i] ) iout << " " << i; }
01730     iout << "\n" << endi;
01731 #endif
01732     // find most compact representation of our subset
01733     a_rot = b_rot = c_rot = d_rot = e_rot = 0;
01734     a_mod = na; b_mod = nb; c_mod = nc; d_mod = nd; e_mod = ne;
01735 #if CMK_BLUEGENEQ
01736     if ( tmgr.absA(na) == 0 ) // torus
01737 #else
01738     if ( tmgr.absX(na) == 0 ) // torus
01739 #endif
01740       for ( int i=0, gaplen=0, gapstart=0; i<2*na; ++i ) {
01741         if ( a_flags[i%na] ) gapstart = i+1;
01742         else if ( i - gapstart >= gaplen ) {
01743           a_rot = 2*na-i-1; gaplen = i - gapstart;
01744         }
01745       }
01746 #if CMK_BLUEGENEQ
01747     if ( tmgr.absB(nb) == 0 ) // torus
01748 #else
01749     if ( tmgr.absY(nb) == 0 ) // torus
01750 #endif
01751       for ( int i=0, gaplen=0, gapstart=0; i<2*nb; ++i ) {
01752         if ( b_flags[i%nb] ) gapstart = i+1;
01753         else if ( i - gapstart >= gaplen ) {
01754           b_rot = 2*nb-i-1; gaplen = i - gapstart;
01755         }
01756       }
01757 #if CMK_BLUEGENEQ
01758     if ( tmgr.absC(nc) == 0 ) // torus
01759 #else
01760     if ( tmgr.absZ(nc) == 0 ) // torus
01761 #endif
01762       for ( int i=0, gaplen=0, gapstart=0; i<2*nc; ++i ) {
01763         if ( c_flags[i%nc] ) gapstart = i+1;
01764         else if ( i - gapstart >= gaplen ) {
01765           c_rot = 2*nc-i-1; gaplen = i - gapstart;
01766         }
01767       }
01768 #if CMK_BLUEGENEQ
01769     if ( tmgr.absD(nd) == 0 ) // torus
01770       for ( int i=0, gaplen=0, gapstart=0; i<2*nd; ++i ) {
01771         if ( d_flags[i%nd] ) gapstart = i+1;
01772         else if ( i - gapstart >= gaplen ) {
01773           d_rot = 2*nd-i-1; gaplen = i - gapstart;
01774         }
01775       }
01776     if ( tmgr.absE(ne) == 0 ) // torus
01777       for ( int i=0, gaplen=0, gapstart=0; i<2*ne; ++i ) {
01778         if ( e_flags[i%ne] ) gapstart = i+1;
01779         else if ( i - gapstart >= gaplen ) {
01780           e_rot = 2*ne-i-1; gaplen = i - gapstart;
01781         }
01782       }
01783 #endif
01784     // order dimensions by length
01785     int a_min=na, a_max=-1;
01786     int b_min=nb, b_max=-1;
01787     int c_min=nc, c_max=-1;
01788     int d_min=nd, d_max=-1;
01789     int e_min=ne, e_max=-1;
01790     for ( int pe=0; pe<npes; ++pe ) {
01791       int a,b,c,d,e,t;
01792 #if CMK_BLUEGENEQ
01793       tmgr.rankToCoordinates(fixpe(pe),a,b,c,d,e,t);
01794 #else
01795       tmgr.rankToCoordinates(fixpe(pe),a,b,c,t);
01796       d=0; e=0;
01797 #endif
01798       a = (a+a_rot)%a_mod;
01799       b = (b+b_rot)%b_mod;
01800       c = (c+c_rot)%c_mod;
01801       d = (d+d_rot)%d_mod;
01802       e = (e+e_rot)%e_mod;
01803       if ( a < a_min ) a_min = a;
01804       if ( b < b_min ) b_min = b;
01805       if ( c < c_min ) c_min = c;
01806       if ( d < d_min ) d_min = d;
01807       if ( e < e_min ) e_min = e;
01808       if ( a > a_max ) a_max = a;
01809       if ( b > b_max ) b_max = b;
01810       if ( c > c_max ) c_max = c;
01811       if ( d > d_max ) d_max = d;
01812       if ( e > e_max ) e_max = e;
01813     }
01814     int a_len = a_max - a_min + 1;
01815     int b_len = b_max - b_min + 1;
01816     int c_len = c_max - c_min + 1;
01817     int d_len = d_max - d_min + 1;
01818     int e_len = e_max - e_min + 1;
01819     int lensort[5];
01820     lensort[0] = (a_len << 3) + 0;
01821     lensort[1] = (b_len << 3) + 1;
01822     lensort[2] = (c_len << 3) + 2;
01823     lensort[3] = (d_len << 3) + 3;
01824     lensort[4] = (e_len << 3) + 4;
01825     // CkPrintf("TopoManagerWrapper lensort before %d %d %d %d %d\n", lensort[0] & 7, lensort[1] & 7, lensort[2] & 7, lensort[3] & 7, lensort[4] & 7);
01826     std::sort(lensort, lensort+5);
01827     // CkPrintf("TopoManagerWrapper lensort after %d %d %d %d %d\n", lensort[0] & 7, lensort[1] & 7, lensort[2] & 7, lensort[3] & 7, lensort[4] & 7);
01828     for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 0 ) a_dim = 4-i; }
01829     for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 1 ) b_dim = 4-i; }
01830     for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 2 ) c_dim = 4-i; }
01831     for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 3 ) d_dim = 4-i; }
01832     for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 4 ) e_dim = 4-i; }
01833 #if 0
01834     if ( a_len >= b_len && a_len >= c_len ) {
01835       a_dim = 0;
01836       if ( b_len >= c_len ) {
01837         b_dim = 1; c_dim = 2;
01838       } else {
01839         b_dim = 2; c_dim = 1;
01840       }
01841     } else if ( b_len >= a_len && b_len >= c_len ) {
01842       b_dim = 0;
01843       if ( a_len >= c_len ) {
01844         a_dim = 1; c_dim = 2;
01845       } else {
01846         a_dim = 2; c_dim = 1;
01847       }
01848     } else { // c is longest
01849       c_dim = 0;
01850       if ( a_len >= b_len ) {
01851         a_dim = 1; b_dim = 2;
01852       } else {
01853         a_dim = 2; b_dim = 1;
01854       }
01855     }
01856 #endif
01857     iout << iINFO << "TORUS MINIMAL MESH SIZE IS " << a_len << " BY " << b_len << " BY " << c_len
01858 #if CMK_BLUEGENEQ
01859     << " BY " << d_len << " BY " << e_len
01860 #endif
01861     << "\n" << endi;
01862     // CkPrintf("TopoManagerWrapper dims %d %d %d %d %d\n", a_dim, b_dim, c_dim, d_dim, e_dim);
01863   }
01864   void coords(int pe, int *crds) {
01865     int a,b,c,d,e,t;
01866 #if CMK_BLUEGENEQ
01867     tmgr.rankToCoordinates(fixpe(pe),a,b,c,d,e,t);
01868 #else
01869     tmgr.rankToCoordinates(fixpe(pe),a,b,c,t);
01870     d=0; e=0;
01871 #endif
01872     if ( a_dim < 3 ) crds[a_dim] = (a+a_rot)%a_mod;
01873     if ( b_dim < 3 ) crds[b_dim] = (b+b_rot)%b_mod;
01874     if ( c_dim < 3 ) crds[c_dim] = (c+c_rot)%c_mod;
01875     if ( d_dim < 3 ) crds[d_dim] = (d+d_rot)%d_mod;
01876     if ( e_dim < 3 ) crds[e_dim] = (e+e_rot)%e_mod;
01877   }
01878   int coord(int pe, int dim) {
01879     int crds[3];
01880     coords(pe,crds);
01881     return crds[dim];
01882   }
01883   struct pe_sortop_topo {
01884     TopoManagerWrapper &tmgr;
01885     const int *sortdims;
01886     pe_sortop_topo(TopoManagerWrapper &t, int *d) : tmgr(t), sortdims(d) {}
01887     bool operator() (int pe1, int pe2) const {
01888       int crds1[3], crds2[3];
01889       tmgr.coords(pe1,crds1);
01890       tmgr.coords(pe2,crds2);
01891       for ( int i=0; i<3; ++i ) {
01892         int d = sortdims[i];
01893         if ( crds1[d] != crds2[d] ) return ( crds1[d] < crds2[d] );
01894       }
01895       const int *index = WorkDistrib::peCompactOrderingIndex;
01896       return ( index[pe1] < index[pe2] );
01897     }
01898   };
01899   int* sortAndSplit(int *node_begin, int *node_end, int splitdim) {
01900     if ( node_begin == node_end ) return node_begin;
01901     int tmins[3], tmaxs[3], tlens[3], sortdims[3];
01902     coords(*node_begin, tmins);
01903     coords(*node_begin, tmaxs);
01904     for ( int *peitr = node_begin; peitr != node_end; ++peitr ) {
01905       int tvals[3];
01906       coords(*peitr, tvals);
01907       for ( int i=0; i<3; ++i ) {
01908         if ( tvals[i] < tmins[i] ) tmins[i] = tvals[i];
01909         if ( tvals[i] > tmaxs[i] ) tmaxs[i] = tvals[i];
01910       }
01911     }
01912     for ( int i=0; i<3; ++i ) {
01913       tlens[i] = tmaxs[i] - tmins[i];
01914     }
01915     sortdims[0] = splitdim;
01916     for ( int i=0, j=0; i<3; ++i ) {
01917       if ( i != splitdim ) sortdims[++j] = i;
01918     }
01919     if ( tlens[sortdims[1]] < tlens[sortdims[2]] ) {
01920       int tmp = sortdims[1];
01921       sortdims[1] = sortdims[2];
01922       sortdims[2] = tmp;
01923     }
01924     std::sort(node_begin,node_end,pe_sortop_topo(*this,sortdims));
01925     int *nodes = node_begin;
01926     int nnodes = node_end - node_begin;
01927     int i_split = 0;
01928 #if 0
01929     int c_split = coord(nodes[0],splitdim);
01930     for ( int i=0; i<nnodes; ++i ) {
01931       if ( coord(nodes[i],splitdim) != c_split ) {
01932         int mid = (nnodes+1)/2;
01933         if ( abs(i-mid) < abs(i_split-mid) ) {
01934           i_split = i;
01935           c_split = coord(i,splitdim);
01936         }
01937         else break;
01938       }
01939     }
01940 #endif
01941     for ( int i=0; i<nnodes; ++i ) {
01942       if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
01943         int mid = (nnodes+1)/2;
01944         if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
01945         else break;
01946       }
01947     }
01948     return ( node_begin + i_split );
01949   }
01950 };
01951 
01952 struct patch_sortop_curve_a {
01953   PatchMap *pmap;
01954   patch_sortop_curve_a(PatchMap *m) : pmap(m) {}
01955   inline bool operator() (int p1, int p2) const {
01956     int a1 = pmap->index_a(p1);
01957     int a2 = pmap->index_a(p2);
01958     if ( a1 < a2 ) return true;
01959     if ( a1 > a2 ) return false;
01960     int dir = ( (a1 & 1) ? -1 : 1 );
01961     int b1 = pmap->index_b(p1);
01962     int b2 = pmap->index_b(p2);
01963     if ( b1 * dir < b2 * dir ) return true;
01964     if ( b1 * dir > b2 * dir ) return false;
01965     dir *= ( (b1 & 1) ? -1 : 1 );
01966     int c1 = pmap->index_c(p1);
01967     int c2 = pmap->index_c(p2);
01968     if ( c1 * dir < c2 * dir ) return true;
01969     return false;
01970   }
01971 };
01972 
01973 struct patch_sortop_curve_b {
01974   PatchMap *pmap;
01975   patch_sortop_curve_b(PatchMap *m) : pmap(m) {}
01976   inline bool operator() (int p1, int p2) const {
01977     int a1 = pmap->index_b(p1);
01978     int a2 = pmap->index_b(p2);
01979     if ( a1 < a2 ) return true;
01980     if ( a1 > a2 ) return false;
01981     int dir = ( (a1 & 1) ? -1 : 1 );
01982     int b1 = pmap->index_a(p1);
01983     int b2 = pmap->index_a(p2);
01984     if ( b1 * dir < b2 * dir ) return true;
01985     if ( b1 * dir > b2 * dir ) return false;
01986     dir *= ( (b1 & 1) ? -1 : 1 );
01987     int c1 = pmap->index_c(p1);
01988     int c2 = pmap->index_c(p2);
01989     if ( c1 * dir < c2 * dir ) return true;
01990     return false;
01991   }
01992 };
01993 
01994 struct patch_sortop_curve_c {
01995   PatchMap *pmap;
01996   patch_sortop_curve_c(PatchMap *m) : pmap(m) {}
01997   inline bool operator() (int p1, int p2) const {
01998     int a1 = pmap->index_c(p1);
01999     int a2 = pmap->index_c(p2);
02000     if ( a1 < a2 ) return true;
02001     if ( a1 > a2 ) return false;
02002     int dir = ( (a1 & 1) ? -1 : 1 );
02003     int b1 = pmap->index_a(p1);
02004     int b2 = pmap->index_a(p2);
02005     if ( b1 * dir < b2 * dir ) return true;
02006     if ( b1 * dir > b2 * dir ) return false;
02007     dir *= ( (b1 & 1) ? -1 : 1 );
02008     int c1 = pmap->index_b(p1);
02009     int c2 = pmap->index_b(p2);
02010     if ( c1 * dir < c2 * dir ) return true;
02011     return false;
02012   }
02013 };
02014 
02015 static void recursive_bisect_with_curve(
02016   int *patch_begin, int *patch_end,
02017   int *node_begin, int *node_end,
02018   double *patchLoads,
02019   double *sortedLoads,
02020   int *assignedNode,
02021   TopoManagerWrapper &tmgr
02022   ) {
02023 
02024   SimParameters *simParams = Node::Object()->simParameters;
02025   PatchMap *patchMap = PatchMap::Object();
02026   int *patches = patch_begin;
02027   int npatches = patch_end - patch_begin;
02028   int *nodes = node_begin;
02029   int nnodes = node_end - node_begin;
02030 
02031   // assign patch loads
02032   double totalRawLoad = 0;
02033   for ( int i=0; i<npatches; ++i ) {
02034     int pid=patches[i];
02035 #ifdef MEM_OPT_VERSION
02036     double load = patchMap->numAtoms(pid) + 10;      
02037 #else
02038     double load = patchMap->patch(pid)->getNumAtoms() + 10;
02039 #endif
02040     patchLoads[pid] = load;
02041     sortedLoads[i] = load;
02042     totalRawLoad += load;
02043   }
02044   std::sort(sortedLoads,sortedLoads+npatches);
02045 
02046   // limit maxPatchLoad to adjusted average load per node
02047   double sumLoad = 0;
02048   double maxPatchLoad = 1;
02049   for ( int i=0; i<npatches; ++i ) {
02050     double load = sortedLoads[i];
02051     double total = sumLoad + (npatches-i) * load;
02052     if ( nnodes * load > total ) break;
02053     sumLoad += load;
02054     maxPatchLoad = load;
02055   }
02056   double totalLoad = 0;
02057   for ( int i=0; i<npatches; ++i ) {
02058     int pid=patches[i];
02059     if ( patchLoads[pid] > maxPatchLoad ) patchLoads[pid] = maxPatchLoad;
02060     totalLoad += patchLoads[pid];
02061   }
02062   if ( nnodes * maxPatchLoad > totalLoad )
02063     NAMD_bug("algorithm failure in WorkDistrib recursive_bisect_with_curve()");
02064 
02065   int a_len, b_len, c_len;
02066   int a_min, b_min, c_min;
02067   { // find dimensions
02068     a_min = patchMap->index_a(patches[0]);
02069     b_min = patchMap->index_b(patches[0]);
02070     c_min = patchMap->index_c(patches[0]);
02071     int a_max = a_min;
02072     int b_max = b_min;
02073     int c_max = c_min;
02074     for ( int i=1; i<npatches; ++i ) {
02075       int a = patchMap->index_a(patches[i]);
02076       int b = patchMap->index_b(patches[i]);
02077       int c = patchMap->index_c(patches[i]);
02078       if ( a < a_min ) a_min = a;
02079       if ( b < b_min ) b_min = b;
02080       if ( c < c_min ) c_min = c;
02081       if ( a > a_max ) a_max = a;
02082       if ( b > b_max ) b_max = b;
02083       if ( c > c_max ) c_max = c;
02084     }
02085     a_len = a_max - a_min;
02086     b_len = b_max - b_min;
02087     c_len = c_max - c_min;
02088   }
02089 
02090   int *node_split = node_begin;
02091 
02092   if ( simParams->disableTopology ) ; else
02093   if ( a_len >= b_len && a_len >= c_len ) {
02094     node_split = tmgr.sortAndSplit(node_begin,node_end,0);
02095   } else if ( b_len >= a_len && b_len >= c_len ) {
02096     node_split = tmgr.sortAndSplit(node_begin,node_end,1);
02097   } else if ( c_len >= a_len && c_len >= b_len ) {
02098     node_split = tmgr.sortAndSplit(node_begin,node_end,2);
02099   }
02100 
02101   if ( node_split == node_begin ) {  // unable to split torus
02102     // make sure physical nodes are together
02103     std::sort(node_begin, node_end, WorkDistrib::pe_sortop_compact());
02104     // find physical node boundary to split on
02105     int i_split = 0;
02106     for ( int i=0; i<nnodes; ++i ) {
02107       if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
02108         int mid = (nnodes+1)/2;
02109         if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
02110         else break;
02111       }
02112     }
02113     node_split = node_begin + i_split;
02114   }
02115 
02116   if ( node_split == node_begin ) {
02117     if ( simParams->verboseTopology ) {
02118       int crds[3];
02119       tmgr.coords(*node_begin, crds);
02120       CkPrintf("WorkDistrib: physnode %5d pe %5d node %5d at %5d %5d %5d from %5d %5d %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
02121                CmiPhysicalNodeID(*node_begin), *node_begin,
02122                CkNodeOf(*node_begin), crds[0], crds[1], crds[2],
02123                a_min, b_min, c_min, npatches,
02124                a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
02125     }
02126 
02127     // final sort along a to minimize pme message count
02128     std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
02129 
02130     // walk through patches in sorted order
02131     int *node = node_begin;
02132     sumLoad = 0;
02133     for ( int i=0; i < npatches; ++i ) {
02134       int pid = patches[i];
02135       assignedNode[pid] = *node;
02136       sumLoad += patchLoads[pid];
02137       double targetLoad = totalLoad *
02138         ((double)(node-node_begin+1) / (double)nnodes);
02139       if ( 0 ) CkPrintf("assign %5d node %5d patch %5d %5d %5d load %7f target %7f\n",
02140                 i, *node,
02141                 patchMap->index_a(pid),
02142                 patchMap->index_b(pid),
02143                 patchMap->index_c(pid),
02144                 sumLoad, targetLoad);
02145       double extra = ( i+1 < npatches ? 0.5 * patchLoads[patches[i+1]] : 0 );
02146       if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
02147     }
02148 
02149     return;
02150   }
02151 
02152   if ( a_len >= b_len && a_len >= c_len ) {
02153     if ( 0 ) CkPrintf("sort a\n");
02154     std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
02155   } else if ( b_len >= a_len && b_len >= c_len ) {
02156     if ( 0 ) CkPrintf("sort b\n");
02157     std::sort(patch_begin,patch_end,patch_sortop_curve_b(patchMap));
02158   } else if ( c_len >= a_len && c_len >= b_len ) {
02159     if ( 0 ) CkPrintf("sort c\n");
02160     std::sort(patch_begin,patch_end,patch_sortop_curve_c(patchMap));
02161   }
02162 
02163   int *patch_split;
02164   { // walk through patches in sorted order
02165     int *node = node_begin;
02166     sumLoad = 0;
02167     for ( patch_split = patch_begin;
02168           patch_split != patch_end && node != node_split;
02169           ++patch_split ) {
02170       sumLoad += patchLoads[*patch_split];
02171       double targetLoad = totalLoad *
02172         ((double)(node-node_begin+1) / (double)nnodes);
02173       if ( 0 ) CkPrintf("test %5d node %5d patch %5d %5d %5d load %7f target %7f\n",
02174                 patch_split - patch_begin, *node,
02175                 patchMap->index_a(*patch_split),
02176                 patchMap->index_b(*patch_split),
02177                 patchMap->index_c(*patch_split),
02178                 sumLoad, targetLoad);
02179       double extra = ( patch_split+1 != patch_end ? 0.5 * patchLoads[*(patch_split+1)] : 0 );
02180       if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
02181     }
02182     double targetLoad = totalLoad *
02183       ((double)(node_split-node_begin) / (double)nnodes);
02184     if ( 0 ) CkPrintf("split node %5d/%5d patch %5d/%5d load %7f target %7f\n",
02185               node_split-node_begin, nnodes,
02186               patch_split-patch_begin, npatches,
02187               sumLoad, targetLoad);
02188   }
02189 
02190   // recurse
02191   recursive_bisect_with_curve(
02192     patch_begin, patch_split, node_begin, node_split,
02193     patchLoads, sortedLoads, assignedNode, tmgr);
02194   recursive_bisect_with_curve(
02195     patch_split, patch_end, node_split, node_end,
02196     patchLoads, sortedLoads, assignedNode, tmgr);
02197 }
02198 
02199 //----------------------------------------------------------------------
02200 void WorkDistrib::assignPatchesSpaceFillingCurve() 
02201 {
02202   TopoManagerWrapper tmgr;
02203   PatchMap *patchMap = PatchMap::Object();
02204   const int numPatches = patchMap->numPatches();
02205   int *assignedNode = new int[numPatches];
02206   ResizeArray<double> patchLoads(numPatches);
02207   SortableResizeArray<double> sortedLoads(numPatches);
02208   int numNodes = Node::Object()->numNodes();
02209   SimParameters *simParams = Node::Object()->simParameters;
02210   if(simParams->simulateInitialMapping) {
02211           NAMD_die("simulateInitialMapping not supported by assignPatchesSpaceFillingCurve()");
02212           numNodes = simParams->simulatedPEs;
02213   }
02214 
02215   ResizeArray<int> patchOrdering(numPatches);
02216   for ( int i=0; i<numPatches; ++i ) {
02217     patchOrdering[i] = i;
02218   }
02219 
02220   ResizeArray<int> nodeOrdering(numNodes);
02221   nodeOrdering.resize(0);
02222   for ( int i=0; i<numNodes; ++i ) {
02223     int pe = peDiffuseOrdering[(i+1)%numNodes];  // avoid 0 if possible
02224     if ( simParams->noPatchesOnZero && numNodes > 1 ) {
02225       if ( pe == 0 ) continue;
02226       if(simParams->noPatchesOnOne && numNodes > 2) {
02227         if ( pe == 1 ) continue;
02228       }
02229     }  
02230 #ifdef MEM_OPT_VERSION
02231     if(simParams->noPatchesOnOutputPEs && numNodes-simParams->numoutputprocs >2) {
02232       if ( isOutputProcessor(pe) ) continue;
02233     }
02234 #endif
02235     nodeOrdering.add(pe);
02236     if ( 0 ) CkPrintf("using pe %5d\n", pe);
02237   }
02238 
02239   int *node_begin = nodeOrdering.begin();
02240   int *node_end = nodeOrdering.end();
02241   if ( nodeOrdering.size() > numPatches ) {
02242     node_end = node_begin + numPatches;
02243   }
02244   std::sort(node_begin, node_end, pe_sortop_compact());
02245 
02246   int *basenode_begin = node_begin;
02247   int *basenode_end = node_end;
02248   if ( nodeOrdering.size() > 2*numPatches ) {
02249     basenode_begin = node_end;
02250     basenode_end = basenode_begin + numPatches;
02251     std::sort(basenode_begin, basenode_end, pe_sortop_compact());
02252   }
02253 
02254   if ( simParams->disableTopology ) {
02255     iout << iWARN << "IGNORING TORUS TOPOLOGY DURING PATCH PLACEMENT\n" << endi;
02256   }
02257 
02258   recursive_bisect_with_curve(
02259     patchOrdering.begin(), patchOrdering.end(),
02260     node_begin, node_end,
02261     patchLoads.begin(), sortedLoads.begin(), assignedNode, tmgr);
02262 
02263   std::sort(node_begin, node_end, pe_sortop_compact());
02264 
02265   int samenodecount = 0;
02266 
02267   for ( int pid=0; pid<numPatches; ++pid ) {
02268     int node = assignedNode[pid];
02269     patchMap->assignNode(pid, node);
02270     int nodeidx = std::lower_bound(node_begin, node_end, node,
02271                                    pe_sortop_compact()) - node_begin;
02272     int basenode = basenode_begin[nodeidx];
02273     patchMap->assignBaseNode(pid, basenode);
02274     if ( CmiPeOnSamePhysicalNode(node,basenode) ) ++samenodecount;
02275   }
02276 
02277   iout << iINFO << "Placed " << (samenodecount*100./numPatches) << "% of base nodes on same physical node as patch\n" << endi;
02278 
02279   delete [] assignedNode; 
02280 }
02281 
02282 //----------------------------------------------------------------------
02283 void WorkDistrib::mapComputes(void)
02284 {
02285   PatchMap *patchMap = PatchMap::Object();
02286   ComputeMap *computeMap = ComputeMap::Object();
02287   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
02288   Node *node = nd.ckLocalBranch();
02289 
02290   DebugM(3,"Mapping computes\n");
02291 
02292   computeMap->allocateCids();
02293 
02294   // Handle full electrostatics
02295   if ( node->simParameters->fullDirectOn )
02296     mapComputeHomePatches(computeFullDirectType);
02297   if ( node->simParameters->FMAOn )
02298 #ifdef DPMTA
02299     mapComputeHomePatches(computeDPMTAType);
02300 #else
02301     NAMD_die("This binary does not include DPMTA (FMA).");
02302 #endif
02303   if ( node->simParameters->PMEOn ) {
02304 #ifdef DPME
02305     if ( node->simParameters->useDPME )
02306       mapComputeHomePatches(computeDPMEType);
02307     else {
02308       if (node->simParameters->useOptPME) {
02309         mapComputeHomePatches(optPmeType);
02310         if ( node->simParameters->pressureProfileEwaldOn )
02311           mapComputeHomePatches(computeEwaldType);
02312       }
02313       else {
02314         mapComputeHomePatches(computePmeType);
02315         if ( node->simParameters->pressureProfileEwaldOn )
02316           mapComputeHomePatches(computeEwaldType);
02317       }
02318     }
02319 #else
02320     if (node->simParameters->useOptPME) {
02321       mapComputeHomePatches(optPmeType);
02322       if ( node->simParameters->pressureProfileEwaldOn )
02323         mapComputeHomePatches(computeEwaldType);
02324     }
02325     else {      
02326 #ifdef NAMD_CUDA
02327       if (node->simParameters->usePMECUDA) {
02328         mapComputePatch(computePmeCUDAType);
02329       } else 
02330 #endif
02331       {
02332         mapComputePatch(computePmeType);
02333       }
02334       if ( node->simParameters->pressureProfileEwaldOn )
02335         mapComputeHomePatches(computeEwaldType);
02336     }
02337 #endif
02338   }
02339 
02340   if ( node->simParameters->globalForcesOn ) {
02341     DebugM(2,"adding ComputeGlobal\n");
02342     mapComputeHomePatches(computeGlobalType);
02343   }
02344 
02345   if ( node->simParameters->extForcesOn )
02346     mapComputeHomePatches(computeExtType);
02347 
02348   if ( node->simParameters->qmForcesOn )
02349     mapComputeHomePatches(computeQMType);
02350 
02351   if ( node->simParameters->GBISserOn )
02352     mapComputeHomePatches(computeGBISserType);
02353 
02354   if ( node->simParameters->MsmSerialOn )
02355     mapComputeHomePatches(computeMsmSerialType);
02356 #ifdef CHARM_HAS_MSA
02357   else if ( node->simParameters->MSMOn )
02358     mapComputeHomePatches(computeMsmMsaType);
02359 #else
02360   else if ( node->simParameters->MSMOn )
02361     mapComputeHomePatches(computeMsmType);
02362 #endif
02363 
02364   if ( node->simParameters->FMMOn )
02365     mapComputeHomePatches(computeFmmType);
02366 
02367 #ifdef NAMD_CUDA
02368 #ifdef BONDED_CUDA
02369   if (node->simParameters->bondedCUDA) {
02370     mapComputeNode(computeBondedCUDAType);
02371   }
02372 #endif
02373 #endif
02374 
02375 #ifdef NAMD_CUDA
02376   if (node->simParameters->useCUDA2) {
02377     mapComputeNode(computeNonbondedCUDA2Type);
02378   } else {
02379     mapComputeNode(computeNonbondedCUDAType);
02380   }
02381   mapComputeHomeTuples(computeExclsType);
02382   mapComputePatch(computeSelfExclsType);
02383 #endif
02384 
02385 #ifdef NAMD_MIC
02386   mapComputeNode(computeNonbondedMICType);
02387 #endif
02388 
02389   mapComputeNonbonded();
02390 
02391   if ( node->simParameters->LCPOOn ) {
02392     mapComputeLCPO();
02393   }
02394 
02395   // If we're doing true pair interactions, no need for bonded terms.
02396   // But if we're doing within-group interactions, we do need them.
02397   if ( !node->simParameters->pairInteractionOn || 
02398       node->simParameters->pairInteractionSelf) { 
02399     mapComputeHomeTuples(computeBondsType);
02400     mapComputeHomeTuples(computeAnglesType);
02401     mapComputeHomeTuples(computeDihedralsType);
02402     mapComputeHomeTuples(computeImpropersType);
02403     mapComputeHomeTuples(computeCrosstermsType);
02404     mapComputePatch(computeSelfBondsType);
02405     mapComputePatch(computeSelfAnglesType);
02406     mapComputePatch(computeSelfDihedralsType);
02407     mapComputePatch(computeSelfImpropersType);
02408     mapComputePatch(computeSelfCrosstermsType);
02409   }
02410 
02411   if ( node->simParameters->goGroPair ) {
02412       // JLai
02413       mapComputeHomeTuples(computeGromacsPairType);
02414       mapComputePatch(computeSelfGromacsPairType);
02415     // End of JLai
02416   }
02417 
02418   if ( node->simParameters->drudeOn ) {
02419     mapComputeHomeTuples(computeTholeType);
02420     mapComputePatch(computeSelfTholeType);
02421     mapComputeHomeTuples(computeAnisoType);
02422     mapComputePatch(computeSelfAnisoType);
02423   }
02424 
02425   if ( node->simParameters->eFieldOn )
02426     mapComputePatch(computeEFieldType);
02427   /* BEGIN gf */
02428   if ( node->simParameters->mgridforceOn )
02429     mapComputePatch(computeGridForceType);
02430   /* END gf */
02431   if ( node->simParameters->stirOn )
02432     mapComputePatch(computeStirType);
02433   if ( node->simParameters->sphericalBCOn )
02434     mapComputePatch(computeSphericalBCType);
02435   if ( node->simParameters->cylindricalBCOn )
02436     mapComputePatch(computeCylindricalBCType);
02437   if ( node->simParameters->tclBCOn ) {
02438     mapComputeHomePatches(computeTclBCType);
02439   }
02440   if ( node->simParameters->constraintsOn )
02441     mapComputePatch(computeRestraintsType);
02442   if ( node->simParameters->consForceOn )
02443     mapComputePatch(computeConsForceType);
02444   if ( node->simParameters->consTorqueOn )
02445     mapComputePatch(computeConsTorqueType);
02446 
02447     // store the latest compute map
02448   SimParameters *simParams = Node::Object()->simParameters;
02449   if (simParams->storeComputeMap) {
02450     computeMap->saveComputeMap(simParams->computeMapFilename);
02451   }
02452     // override mapping decision
02453   if (simParams->loadComputeMap) {
02454     computeMap->loadComputeMap(simParams->computeMapFilename);
02455     CkPrintf("ComputeMap has been loaded from %s.\n", simParams->computeMapFilename);
02456   }
02457 }
02458 
02459 //----------------------------------------------------------------------
02460 void WorkDistrib::mapComputeHomeTuples(ComputeType type)
02461 {
02462   PatchMap *patchMap = PatchMap::Object();
02463   ComputeMap *computeMap = ComputeMap::Object();
02464   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
02465   Node *node = nd.ckLocalBranch();
02466 
02467   int numNodes = node->numNodes();
02468   SimParameters *simparam = node->simParameters;
02469   if(simparam->simulateInitialMapping) {
02470           numNodes = simparam->simulatedPEs;
02471   }
02472 
02473   char *isBaseNode = new char[numNodes];
02474   memset(isBaseNode,0,numNodes*sizeof(char));
02475 
02476   int numPatches = patchMap->numPatches();
02477   for(int j=0; j<numPatches; j++) {
02478     isBaseNode[patchMap->basenode(j)] = 1;
02479   }
02480 
02481   for(int i=0; i<numNodes; i++) {
02482     if ( isBaseNode[i] ) {
02483       computeMap->storeCompute(i,0,type);
02484     }
02485   }
02486 
02487   delete [] isBaseNode;
02488 }
02489 
02490 //----------------------------------------------------------------------
02491 void WorkDistrib::mapComputeHomePatches(ComputeType type)
02492 {
02493   PatchMap *patchMap = PatchMap::Object();
02494   ComputeMap *computeMap = ComputeMap::Object();
02495   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
02496   Node *node = nd.ckLocalBranch();
02497 
02498   int numNodes = node->numNodes();
02499   SimParameters *simparam = node->simParameters;
02500   if(simparam->simulateInitialMapping) {
02501           numNodes = simparam->simulatedPEs;
02502   }
02503 
02504   for(int i=0; i<numNodes; i++) {
02505     if ( patchMap->numPatchesOnNode(i) ) {
02506       computeMap->storeCompute(i,0,type);
02507     }
02508   }
02509 }
02510 
02511 //----------------------------------------------------------------------
02512 void WorkDistrib::mapComputePatch(ComputeType type)
02513 {
02514   PatchMap *patchMap = PatchMap::Object();
02515   ComputeMap *computeMap = ComputeMap::Object();
02516 
02517   PatchID i;
02518   ComputeID cid;
02519 
02520   for(i=0; i<patchMap->numPatches(); i++)
02521   {
02522     cid=computeMap->storeCompute(patchMap->node(i),1,type);
02523     computeMap->newPid(cid,i);
02524     patchMap->newCid(i,cid);
02525   }
02526 
02527 }
02528 
02529 //----------------------------------------------------------------------
02530 void WorkDistrib::mapComputeNode(ComputeType type)
02531 {
02532   PatchMap *patchMap = PatchMap::Object();
02533   ComputeMap *computeMap = ComputeMap::Object();
02534 
02535   PatchID i;
02536   ComputeID cid;
02537 
02538   int ncpus = CkNumPes();
02539   SimParameters *simparam = Node::Object()->simParameters;
02540   if(simparam->simulateInitialMapping) {
02541           ncpus = simparam->simulatedPEs;
02542   }
02543 
02544   for(int i=0; i<ncpus; i++) {
02545     computeMap->storeCompute(i,0,type);
02546   }
02547 
02548 }
02549 
02550 //----------------------------------------------------------------------
02551 void WorkDistrib::mapComputeNonbonded(void)
02552 {
02553   // For each patch, create 1 electrostatic object for self-interaction.
02554   // Then create 1 for each 1-away and 2-away neighbor which has a larger
02555   // pid.
02556 
02557   PatchMap *patchMap = PatchMap::Object();
02558   ComputeMap *computeMap = ComputeMap::Object();
02559   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
02560   Node *node = nd.ckLocalBranch();
02561   SimParameters *simParams = Node::Object()->simParameters;
02562   int ncpus = CkNumPes();
02563   int nodesize = CkMyNodeSize();
02564   if(simParams->simulateInitialMapping) {
02565           ncpus = simParams->simulatedPEs;
02566           nodesize = simParams->simulatedNodeSize;
02567   }
02568 
02569   PatchID oneAway[PatchMap::MaxOneOrTwoAway];
02570   PatchID oneAwayDownstream[PatchMap::MaxOneOrTwoAway];
02571   int oneAwayTrans[PatchMap::MaxOneOrTwoAway];
02572 
02573   PatchID i;
02574   ComputeID cid;
02575   int numNeighbors;
02576   int j;
02577   double partScaling = 1.0;
02578   if ( ncpus < patchMap->numPatches() ) {
02579     partScaling = ((double)ncpus) / ((double)patchMap->numPatches());
02580   }
02581 
02582   for(i=0; i<patchMap->numPatches(); i++) // do the self 
02583   {
02584 
02585    int numPartitions = 1;
02586 #if 0
02587    if ( simParams->ldBalancer == LDBAL_HYBRID ) {
02588 #ifdef  MEM_OPT_VERSION    
02589     int64 numFixed = patchMap->numFixedAtoms(i);
02590     int64 numAtoms = patchMap->numAtoms(i);
02591 #else
02592     int64 numFixed = patchMap->patch(i)->getNumFixedAtoms();  // avoid overflow
02593     int64 numAtoms = patchMap->patch(i)->getNumAtoms();
02594 #endif
02595 
02596     int divide = node->simParameters->numAtomsSelf;
02597     if (divide > 0) {
02598       numPartitions = (int) ( partScaling * ( 0.5 +
02599         (numAtoms*numAtoms-numFixed*numFixed) / (double)(2*divide*divide) ) );
02600     }
02601     if (numPartitions < 1) numPartitions = 1;
02602     if ( numPartitions > node->simParameters->maxSelfPart )
02603                         numPartitions = node->simParameters->maxSelfPart;
02604     // self-interaction
02605     DebugM(4,"Mapping " << numPartitions << " ComputeNonbondedSelf objects for patch " << i << "\n");
02606 //    iout <<"Self numPartitions = " <<numPartitions <<" numAtoms " <<numAtoms <<std::endl;
02607    }
02608 #endif
02609 
02610     // DMK - NOTE - For MIC builds (i.e. NAMD_MIC is defined), it is assumed that self computes are
02611     //   mapped to the PE their associated patch is on. If the code below should change, making that
02612     //   untrue, MIC builds should be special cased so that assumption still holds (or the host vs
02613     //   device load balancing scheme should be modified).  (See the comment in the function
02614     //   mic_assignComputes() in ComputeNonbondedMIC.C for more details.)
02615     for(int partition=0; partition < numPartitions; partition++)
02616     {
02617       cid=computeMap->storeCompute(patchMap->node(i),1,
02618                                    computeNonbondedSelfType,
02619                                    partition,numPartitions);
02620       computeMap->newPid(cid,i);
02621       patchMap->newCid(i,cid);
02622     }
02623   }
02624 
02625   for(int p1=0; p1 <patchMap->numPatches(); p1++) // do the pairs
02626   {
02627     // this only returns half of neighbors, which is what we want
02628     numNeighbors=patchMap->oneOrTwoAwayNeighbors(p1,oneAway,oneAwayDownstream,oneAwayTrans);
02629     for(j=0;j<numNeighbors;j++)
02630     {
02631         int p2 = oneAway[j];
02632         int dsp = oneAwayDownstream[j];
02633 
02634       int numPartitions = 1;
02635 #if 0
02636       if ( simParams->ldBalancer == LDBAL_HYBRID ) {
02637 #ifdef  MEM_OPT_VERSION        
02638         int64 numAtoms1 = patchMap->numAtoms(p1);
02639         int64 numAtoms2 = patchMap->numAtoms(p2);
02640         int64 numFixed1 = patchMap->numFixedAtoms(p1);
02641         int64 numFixed2 = patchMap->numFixedAtoms(p2);
02642 #else
02643         int64 numAtoms1 = patchMap->patch(p1)->getNumAtoms();
02644         int64 numAtoms2 = patchMap->patch(p2)->getNumAtoms();
02645         int64 numFixed1 = patchMap->patch(p1)->getNumFixedAtoms();
02646         int64 numFixed2 = patchMap->patch(p2)->getNumFixedAtoms();
02647 #endif
02648 
02649 
02650         const int t2 = oneAwayTrans[j];
02651         const int adim = patchMap->gridsize_a();
02652         const int bdim = patchMap->gridsize_b();
02653         const int cdim = patchMap->gridsize_c();
02654         const int nax = patchMap->numaway_a();  // 1 or 2
02655         const int nay = patchMap->numaway_b();  // 1 or 2
02656         const int naz = patchMap->numaway_c();  // 1 or 2
02657         const int ia1 = patchMap->index_a(p1);
02658         const int ia2 = patchMap->index_a(p2) + adim * Lattice::offset_a(t2);
02659         const int ib1 = patchMap->index_b(p1);
02660         const int ib2 = patchMap->index_b(p2) + bdim * Lattice::offset_b(t2);
02661         const int ic1 = patchMap->index_c(p1);
02662         const int ic2 = patchMap->index_c(p2) + cdim * Lattice::offset_c(t2);
02663 
02664         if ( abs(ia2-ia1) > nax ||
02665              abs(ib2-ib1) > nay ||
02666              abs(ic2-ic1) > naz )
02667           NAMD_bug("Bad patch distance in WorkDistrib::mapComputeNonbonded");
02668 
02669         int distance = 3;
02670         if ( ia1 == ia2 ) --distance;
02671         else if ( ia1 == ia2 + nax - 1 ) --distance;
02672         else if ( ia1 + nax - 1 == ia2 ) --distance;
02673         if ( ib1 == ib2 ) --distance;
02674         else if ( ib1 == ib2 + nay - 1 ) --distance;
02675         else if ( ib1 + nay - 1 == ib2 ) --distance;
02676         if ( ic1 == ic2 ) --distance;
02677         else if ( ic1 == ic2 + naz - 1 ) --distance;
02678         else if ( ic1 + naz - 1 == ic2 ) --distance;
02679         int divide = 0;
02680         if ( distance == 0 ) {
02681           divide = node->simParameters->numAtomsSelf2;
02682         } else if (distance == 1) {
02683           divide = node->simParameters->numAtomsPair;
02684         } else {
02685           divide = node->simParameters->numAtomsPair2;
02686         }
02687         if (divide > 0) {
02688           numPartitions = (int) ( partScaling * ( 0.5 +
02689             (numAtoms1*numAtoms2-numFixed1*numFixed2)/(double)(divide*divide) ) );
02690         }
02691         if ( numPartitions < 1 ) numPartitions = 1;
02692         if ( numPartitions > node->simParameters->maxPairPart )
02693                         numPartitions = node->simParameters->maxPairPart;
02694 //      if ( numPartitions > 1 ) iout << "Mapping " << numPartitions << " ComputeNonbondedPair objects for patches " << p1 << "(" << numAtoms1 << ") and " << p2 << "(" << numAtoms2 << ")\n" << endi;
02695       }
02696 #endif
02697                 for(int partition=0; partition < numPartitions; partition++)
02698                 {
02699                   cid=computeMap->storeCompute( patchMap->basenode(dsp),
02700                         2,computeNonbondedPairType,partition,numPartitions);
02701                   computeMap->newPid(cid,p1);
02702                   computeMap->newPid(cid,p2,oneAwayTrans[j]);
02703                   patchMap->newCid(p1,cid);
02704                   patchMap->newCid(p2,cid);
02705                 }
02706     }
02707   }
02708 }
02709 
02710 //----------------------------------------------------------------------
02711 void WorkDistrib::mapComputeLCPO(void) {
02712   //iterate over all needed objects
02713 
02714   PatchMap *patchMap = PatchMap::Object();
02715   ComputeMap *computeMap = ComputeMap::Object();
02716   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
02717   Node *node = nd.ckLocalBranch();
02718   SimParameters *simParams = Node::Object()->simParameters;
02719   int ncpus = CkNumPes();
02720   int nodesize = CkMyNodeSize();
02721   const int maxPatches = 8;
02722 
02723   int numPatchesInOctet;
02724   PatchID patchesInOctet[maxPatches];
02725   int oneAwayTrans[maxPatches];
02726 
02727   //partitioned after 1st timestep
02728   int numPartitions = 1;
02729 
02730   PatchID i;
02731   ComputeID cid;
02732 
02733   // one octet per patch
02734   for(i=0; i<patchMap->numPatches(); i++) {
02735     numPatchesInOctet =
02736         patchMap->getPatchesInOctet(i, patchesInOctet, oneAwayTrans);
02737 
02738                 for(int partition=0; partition < numPartitions; partition++) {
02739       cid=computeMap->storeCompute(patchMap->node(i),
02740           numPatchesInOctet,
02741                                   computeLCPOType,
02742                                   partition,
02743           numPartitions);
02744       for (int p = 0; p < numPatchesInOctet; p++) {
02745         computeMap->newPid(cid, patchesInOctet[p], oneAwayTrans[p]);
02746       }
02747       for (int p = 0; p < numPatchesInOctet; p++) {
02748         patchMap->newCid(patchesInOctet[p],cid);
02749       }
02750     } // for partitions 
02751   } // for patches
02752 } // mapComputeLCPO
02753 
02754 //----------------------------------------------------------------------
02755 void WorkDistrib::messageEnqueueWork(Compute *compute) {
02756   LocalWorkMsg *msg = compute->localWorkMsg;
02757   int seq = compute->sequence();
02758   int gbisPhase = compute->getGBISPhase();
02759 
02760   if ( seq < 0 ) {
02761     NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageEnqueueWork");
02762   } else {
02763     SET_PRIORITY(msg,seq,compute->priority());
02764   }
02765 
02766   msg->compute = compute; // pointer is valid since send is to local Pe
02767   int type = compute->type();
02768   int cid = compute->cid;
02769 
02770   CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
02771   switch ( type ) {
02772   case computeExclsType:
02773   case computeSelfExclsType:
02774     wdProxy[CkMyPe()].enqueueExcls(msg);
02775     break;
02776   case computeBondsType:
02777   case computeSelfBondsType:
02778     wdProxy[CkMyPe()].enqueueBonds(msg);
02779     break;
02780   case computeAnglesType:
02781   case computeSelfAnglesType:
02782     wdProxy[CkMyPe()].enqueueAngles(msg);
02783     break;
02784   case computeDihedralsType:
02785   case computeSelfDihedralsType:
02786     wdProxy[CkMyPe()].enqueueDihedrals(msg);
02787     break;
02788   case computeImpropersType:
02789   case computeSelfImpropersType:
02790     wdProxy[CkMyPe()].enqueueImpropers(msg);
02791     break;
02792   case computeTholeType:
02793   case computeSelfTholeType:
02794     wdProxy[CkMyPe()].enqueueThole(msg);
02795     break;
02796   case computeAnisoType:
02797   case computeSelfAnisoType:
02798     wdProxy[CkMyPe()].enqueueAniso(msg);
02799     break;
02800   case computeCrosstermsType:
02801   case computeSelfCrosstermsType:
02802     wdProxy[CkMyPe()].enqueueCrossterms(msg);
02803     break;
02804   // JLai
02805   case computeGromacsPairType:
02806   case computeSelfGromacsPairType:
02807     wdProxy[CkMyPe()].enqueueGromacsPair(msg);
02808     break;    
02809   // End of JLai
02810   case computeLCPOType:
02811     wdProxy[CkMyPe()].enqueueLCPO(msg);
02812     break;
02813   case computeNonbondedSelfType:
02814     switch ( seq % 2 ) {
02815     case 0:
02816       //wdProxy[CkMyPe()].enqueueSelfA(msg);
02817       switch ( gbisPhase ) {
02818          case 1:
02819            wdProxy[CkMyPe()].enqueueSelfA1(msg);
02820            break;
02821          case 2:
02822            wdProxy[CkMyPe()].enqueueSelfA2(msg);
02823            break;
02824          case 3:
02825            wdProxy[CkMyPe()].enqueueSelfA3(msg);
02826            break;
02827       }
02828       break;
02829     case 1:
02830       //wdProxy[CkMyPe()].enqueueSelfB(msg);
02831       switch ( gbisPhase ) {
02832          case 1:
02833            wdProxy[CkMyPe()].enqueueSelfB1(msg);
02834            break;
02835          case 2:
02836            wdProxy[CkMyPe()].enqueueSelfB2(msg);
02837            break;
02838          case 3:
02839            wdProxy[CkMyPe()].enqueueSelfB3(msg);
02840            break;
02841       }
02842       break;
02843     default:
02844       NAMD_bug("WorkDistrib::messageEnqueueSelf case statement error!");
02845     }
02846     break;
02847   case computeNonbondedPairType:
02848     switch ( seq % 2 ) {
02849     case 0:
02850       //wdProxy[CkMyPe()].enqueueWorkA(msg);
02851       switch ( gbisPhase ) {
02852          case 1:
02853            wdProxy[CkMyPe()].enqueueWorkA1(msg);
02854            break;
02855          case 2:
02856            wdProxy[CkMyPe()].enqueueWorkA2(msg);
02857            break;
02858          case 3:
02859            wdProxy[CkMyPe()].enqueueWorkA3(msg);
02860            break;
02861       }
02862       break;
02863     case 1:
02864       //wdProxy[CkMyPe()].enqueueWorkB(msg);
02865       switch ( gbisPhase ) {
02866          case 1:
02867            wdProxy[CkMyPe()].enqueueWorkB1(msg);
02868            break;
02869          case 2:
02870            wdProxy[CkMyPe()].enqueueWorkB2(msg);
02871            break;
02872          case 3:
02873            wdProxy[CkMyPe()].enqueueWorkB3(msg);
02874            break;
02875       }
02876       break;
02877     case 2:
02878       wdProxy[CkMyPe()].enqueueWorkC(msg);
02879       break;
02880     default:
02881       NAMD_bug("WorkDistrib::messageEnqueueWork case statement error!");
02882     }
02883     break;
02884   case computeNonbondedCUDAType:
02885 #ifdef NAMD_CUDA
02886   case computeNonbondedCUDA2Type:
02887 //     CkPrintf("WorkDistrib[%d]::CUDA seq=%d phase=%d\n", CkMyPe(), seq, gbisPhase);
02888     //wdProxy[CkMyPe()].enqueueCUDA(msg);
02889     switch ( gbisPhase ) {
02890        case 1:
02891          wdProxy[CkMyPe()].enqueueCUDA(msg);
02892          break;
02893        case 2:
02894          wdProxy[CkMyPe()].enqueueCUDAP2(msg);
02895          break;
02896        case 3:
02897          wdProxy[CkMyPe()].enqueueCUDAP3(msg);
02898          break;
02899     }
02900 #else
02901     msg->compute->doWork();  MACHINE_PROGRESS
02902 #endif
02903     break;
02904   case computeNonbondedMICType:
02905 #ifdef NAMD_MIC
02906     wdProxy[CkMyPe()].enqueueMIC(msg);
02907 #endif
02908     break;
02909   case computePmeType:
02910     // CkPrintf("PME %d %d %x\n", CkMyPe(), seq, compute->priority());
02911     wdProxy[CkMyPe()].enqueuePme(msg);
02912     break;
02913 #ifdef NAMD_CUDA
02914   case computePmeCUDAType:
02915     wdProxy[CkMyPe()].enqueuePme(msg);
02916     break;
02917 #endif
02918   case optPmeType:
02919     // CkPrintf("PME %d %d %x\n", CkMyPe(), seq, compute->priority());
02920 #ifdef NAMD_CUDA
02921     wdProxy[CkMyPe()].enqueuePme(msg);
02922 #else
02923     msg->compute->doWork();  MACHINE_PROGRESS
02924 #endif
02925     break;
02926   default:
02927     wdProxy[CkMyPe()].enqueueWork(msg);
02928   }
02929 }
02930 
02931 //----------------------------------------------------------------------
02932 void WorkDistrib::messageFinishCUDA(Compute *compute) {
02933   LocalWorkMsg *msg = compute->localWorkMsg;
02934   int seq = compute->sequence();
02935   int gbisPhase = compute->getGBISPhase();
02936 
02937   if ( seq < 0 ) {
02938     NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageEnqueueWork");
02939   } else {
02940     SET_PRIORITY(msg,seq,compute->priority());
02941   }
02942 
02943   msg->compute = compute; // pointer is valid since send is to local Pe
02944   CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
02945 
02946 #ifdef NAMD_CUDA
02947     //wdProxy[CkMyPe()].finishCUDA(msg);
02948     switch ( gbisPhase ) {
02949        case 1:
02950          wdProxy[CkMyPe()].finishCUDA(msg);
02951          break;
02952        case 2:
02953          wdProxy[CkMyPe()].finishCUDAP2(msg);
02954          break;
02955        case 3:
02956          wdProxy[CkMyPe()].finishCUDAP3(msg);
02957          break;
02958     }
02959 #else
02960     msg->compute->doWork();  MACHINE_PROGRESS
02961 #endif
02962 }
02963 
02964 //----------------------------------------------------------------------
02965 void WorkDistrib::messageFinishMIC(Compute *compute) {
02966   LocalWorkMsg *msg = compute->localWorkMsg;
02967   int seq = compute->sequence();
02968   int gbisPhase = compute->getGBISPhase();
02969 
02970   if ( seq < 0 ) {
02971     NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageFinishMIC");
02972   } else {
02973     SET_PRIORITY(msg,seq,compute->priority());
02974   }
02975 
02976   msg->compute = compute; // pointer is valid since send is to local Pe
02977   CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
02978 
02979 #ifdef NAMD_MIC
02980     wdProxy[CkMyPe()].finishMIC(msg);
02981 #else
02982     msg->compute->doWork();  MACHINE_PROGRESS
02983 #endif
02984 }
02985 
02986 void WorkDistrib::enqueueWork(LocalWorkMsg *msg) {
02987   msg->compute->doWork();  MACHINE_PROGRESS
02988   if ( msg->compute->localWorkMsg != msg )
02989     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02990 }
02991 
02992 void WorkDistrib::enqueueExcls(LocalWorkMsg *msg) {
02993   msg->compute->doWork();  MACHINE_PROGRESS
02994   if ( msg->compute->localWorkMsg != msg )
02995     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
02996 }
02997 
02998 void WorkDistrib::enqueueBonds(LocalWorkMsg *msg) {
02999   msg->compute->doWork();  MACHINE_PROGRESS
03000   if ( msg->compute->localWorkMsg != msg )
03001     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03002 }
03003 
03004 void WorkDistrib::enqueueAngles(LocalWorkMsg *msg) {
03005   msg->compute->doWork();  MACHINE_PROGRESS
03006   if ( msg->compute->localWorkMsg != msg )
03007     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03008 }
03009 
03010 void WorkDistrib::enqueueDihedrals(LocalWorkMsg *msg) {
03011   msg->compute->doWork();  MACHINE_PROGRESS
03012   if ( msg->compute->localWorkMsg != msg )
03013     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03014 }
03015 
03016 void WorkDistrib::enqueueImpropers(LocalWorkMsg *msg) {
03017   msg->compute->doWork();  MACHINE_PROGRESS
03018   if ( msg->compute->localWorkMsg != msg )
03019     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03020 }
03021 
03022 void WorkDistrib::enqueueThole(LocalWorkMsg *msg) {
03023   msg->compute->doWork();  MACHINE_PROGRESS
03024   if ( msg->compute->localWorkMsg != msg )
03025     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03026 }
03027 
03028 void WorkDistrib::enqueueAniso(LocalWorkMsg *msg) {
03029   msg->compute->doWork();  MACHINE_PROGRESS
03030   if ( msg->compute->localWorkMsg != msg )
03031     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03032 }
03033 
03034 void WorkDistrib::enqueueCrossterms(LocalWorkMsg *msg) {
03035   msg->compute->doWork();  MACHINE_PROGRESS
03036   if ( msg->compute->localWorkMsg != msg )
03037     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03038 }
03039 
03040 // JLai
03041 void WorkDistrib::enqueueGromacsPair(LocalWorkMsg *msg) {
03042   msg->compute->doWork();
03043   MACHINE_PROGRESS
03044   if ( msg->compute->localWorkMsg != msg )
03045     NAMD_bug("\nWorkDistrib LocalWorkMsg recycling failed! Check enqueueGromacsPair from WorkDistrib.C\n");
03046 }
03047 // End of JLai
03048 
03049 void WorkDistrib::enqueuePme(LocalWorkMsg *msg) {
03050   msg->compute->doWork();  MACHINE_PROGRESS
03051   if ( msg->compute->localWorkMsg != msg )
03052     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03053 }
03054 
03055 void WorkDistrib::enqueueLCPO(LocalWorkMsg *msg) {
03056   msg->compute->doWork();
03057   if ( msg->compute->localWorkMsg != msg )
03058     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03059 }
03060 void WorkDistrib::enqueueSelfA1(LocalWorkMsg *msg) {
03061   msg->compute->doWork();  MACHINE_PROGRESS
03062   if ( msg->compute->localWorkMsg != msg )
03063     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03064 }
03065 void WorkDistrib::enqueueSelfA2(LocalWorkMsg *msg) {
03066   msg->compute->doWork();  MACHINE_PROGRESS
03067   if ( msg->compute->localWorkMsg != msg )
03068     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03069 }
03070 void WorkDistrib::enqueueSelfA3(LocalWorkMsg *msg) {
03071   msg->compute->doWork();  MACHINE_PROGRESS
03072   if ( msg->compute->localWorkMsg != msg )
03073     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03074 }
03075 
03076 void WorkDistrib::enqueueSelfB1(LocalWorkMsg *msg) {
03077   msg->compute->doWork();  MACHINE_PROGRESS
03078   if ( msg->compute->localWorkMsg != msg )
03079     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03080 }
03081 void WorkDistrib::enqueueSelfB2(LocalWorkMsg *msg) {
03082   msg->compute->doWork();  MACHINE_PROGRESS
03083   if ( msg->compute->localWorkMsg != msg )
03084     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03085 }
03086 void WorkDistrib::enqueueSelfB3(LocalWorkMsg *msg) {
03087   msg->compute->doWork();  MACHINE_PROGRESS
03088   if ( msg->compute->localWorkMsg != msg )
03089     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03090 }
03091 
03092 void WorkDistrib::enqueueWorkA1(LocalWorkMsg *msg) {
03093   msg->compute->doWork();  MACHINE_PROGRESS
03094   if ( msg->compute->localWorkMsg != msg )
03095     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03096 }
03097 void WorkDistrib::enqueueWorkA2(LocalWorkMsg *msg) {
03098   msg->compute->doWork();  MACHINE_PROGRESS
03099   if ( msg->compute->localWorkMsg != msg )
03100     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03101 }
03102 void WorkDistrib::enqueueWorkA3(LocalWorkMsg *msg) {
03103   msg->compute->doWork();  MACHINE_PROGRESS
03104   if ( msg->compute->localWorkMsg != msg )
03105     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03106 }
03107 
03108 void WorkDistrib::enqueueWorkB1(LocalWorkMsg *msg) {
03109   msg->compute->doWork();  MACHINE_PROGRESS
03110   if ( msg->compute->localWorkMsg != msg )
03111     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03112 }
03113 void WorkDistrib::enqueueWorkB2(LocalWorkMsg *msg) {
03114   msg->compute->doWork();  MACHINE_PROGRESS
03115   if ( msg->compute->localWorkMsg != msg )
03116     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03117 }
03118 void WorkDistrib::enqueueWorkB3(LocalWorkMsg *msg) {
03119   msg->compute->doWork();  MACHINE_PROGRESS
03120   if ( msg->compute->localWorkMsg != msg )
03121     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03122 }
03123 
03124 
03125 
03126 void WorkDistrib::enqueueWorkC(LocalWorkMsg *msg) {
03127   msg->compute->doWork();  MACHINE_PROGRESS
03128   if ( msg->compute->localWorkMsg != msg )
03129     NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03130 }
03131 
03132 void WorkDistrib::enqueueCUDA(LocalWorkMsg *msg) {
03133   msg->compute->doWork();  MACHINE_PROGRESS
03134   // ComputeNonbondedCUDA *c = msg->compute;
03135   // if ( c->localWorkMsg != msg && c->localWorkMsg2 != msg )
03136   //   NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03137 }
03138 void WorkDistrib::enqueueCUDAP2(LocalWorkMsg *msg) {
03139   msg->compute->doWork();  MACHINE_PROGRESS
03140 }
03141 void WorkDistrib::enqueueCUDAP3(LocalWorkMsg *msg) {
03142   msg->compute->doWork();  MACHINE_PROGRESS
03143 }
03144 
03145 void WorkDistrib::finishCUDAPatch(FinishWorkMsg *msg) {
03146   msg->compute->finishPatch(msg->data);
03147 }
03148 
03149 void WorkDistrib::finishCUDA(LocalWorkMsg *msg) {
03150   msg->compute->doWork();  MACHINE_PROGRESS
03151   // ComputeNonbondedCUDA *c = msg->compute;
03152   // if ( c->localWorkMsg != msg && c->localWorkMsg2 != msg )
03153   //   NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
03154 }
03155 void WorkDistrib::finishCUDAP2(LocalWorkMsg *msg) {
03156   msg->compute->doWork();  MACHINE_PROGRESS
03157 }
03158 void WorkDistrib::finishCUDAP3(LocalWorkMsg *msg) {
03159   msg->compute->doWork();  MACHINE_PROGRESS
03160 }
03161 
03162 void WorkDistrib::enqueueMIC(LocalWorkMsg *msg) {
03163   msg->compute->doWork();  MACHINE_PROGRESS
03164 }
03165 void WorkDistrib::finishMIC(LocalWorkMsg *msg) {
03166   msg->compute->doWork();  MACHINE_PROGRESS
03167 }
03168 
03169 
03170 //**********************************************************************
03171 //
03172 //                      FUNCTION velocities_from_PDB
03173 //
03174 //   INPUTS:
03175 //      v - Array of vectors to populate
03176 //      filename - name of the PDB filename to read in
03177 //
03178 //      This function reads in a set of initial velocities from a
03179 //      PDB file.  It places the velocities into the array of Vectors
03180 //      passed to it.
03181 //
03182 //***********************************************************************/
03183 
03184 void WorkDistrib::velocities_from_PDB(const char *filename, 
03185                                       Vector *v, int totalAtoms)
03186 {
03187   PDB *v_pdb;           //  PDB info from velocity PDB
03188   int i;
03189 
03190   //  Read the PDB
03191   v_pdb = new PDB(filename);
03192   if ( v_pdb == NULL )
03193   {
03194     NAMD_die("memory allocation failed in Node::velocities_from_PDB");
03195   }
03196 
03197   //  Make sure the number of velocities read in matches
03198   //  the number of atoms we have
03199   if (v_pdb->num_atoms() != totalAtoms)
03200   {
03201     char err_msg[129];
03202 
03203     sprintf(err_msg, "FOUND %d COORDINATES IN VELOCITY PDB!!",
03204             v_pdb->num_atoms());
03205 
03206     NAMD_die(err_msg);
03207   }
03208 
03209   //  Get the entire list of atom info and loop through
03210   //  them assigning the velocity vector for each one
03211   v_pdb->get_all_positions(v);
03212 
03213   for (i=0; i<totalAtoms; i++)
03214   {
03215     v[i].x *= PDBVELINVFACTOR;
03216     v[i].y *= PDBVELINVFACTOR;
03217     v[i].z *= PDBVELINVFACTOR;
03218   }
03219 
03220   delete v_pdb;
03221 }
03222 //              END OF FUNCTION velocities_from_PDB
03223 
03224 //**********************************************************************
03225 //
03226 //                      FUNCTION velocities_from_binfile
03227 //
03228 //    INPUTS:
03229 //      fname - File name to write velocities to
03230 //      n - Number of atoms in system
03231 //      vels - Array of velocity vectors
03232 //                                      
03233 //      This function writes out the velocities in binary format.  This is
03234 //     done to preserve accuracy between restarts of namd.
03235 //
03236 //**********************************************************************
03237 
03238 void WorkDistrib::velocities_from_binfile(const char *fname, Vector *vels, int n)
03239 {
03240   read_binary_file(fname,vels,n);
03241 }
03242 //               END OF FUNCTION velocities_from_binfile
03243 
03244 //**********************************************************************
03245 //
03246 //                      FUNCTION random_velocities
03247 //
03248 //   INPUTS:
03249 //      v - array of vectors to populate
03250 //      Temp - Temperature to acheive
03251 //
03252 //      This function assigns a random velocity distribution to a
03253 //   simulation to achieve a desired initial temperature.  The method
03254 //   used here was stolen from the program X-PLOR.
03255 //
03256 //**********************************************************************
03257 
03258 void WorkDistrib::random_velocities(BigReal Temp,Molecule *structure,
03259                                     Vector *v, int totalAtoms)
03260 {
03261   int i, j;             //  Loop counter
03262   BigReal kbT;          //  Boltzman constant * Temp
03263   BigReal randnum;      //  Random number from -6.0 to 6.0
03264   BigReal kbToverM;     //  sqrt(Kb*Temp/Mass)
03265   SimParameters *simParams = Node::Object()->simParameters;
03266   Bool lesOn = simParams->lesOn;
03267   Random vel_random(simParams->randomSeed);
03268 
03269   int lesReduceTemp = lesOn && simParams->lesReduceTemp;
03270   BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
03271 
03272   kbT = Temp*BOLTZMANN;
03273 
03274   //  Loop through all the atoms and assign velocities in
03275   //  the x, y and z directions for each one
03276   for (i=0; i<totalAtoms; i++)
03277   {
03278     if (structure->atommass(i) <= 0.) {
03279       kbToverM = 0.;
03280     } else {
03281       kbToverM = sqrt(kbT *
03282         ( lesOn && structure->get_fep_type(i) ? tempFactor : 1.0 ) /
03283                           structure->atommass(i) );
03284     }
03285 
03286     //  The following comment was stolen from X-PLOR where
03287     //  the following section of code was adapted from.
03288     
03289     //  This section generates a Gaussian random
03290     //  deviate of 0.0 mean and standard deviation RFD for
03291     //  each of the three spatial dimensions.
03292     //  The algorithm is a "sum of uniform deviates algorithm"
03293     //  which may be found in Abramowitz and Stegun,
03294     //  "Handbook of Mathematical Functions", pg 952.
03295     for (randnum=0.0, j=0; j<12; j++)
03296     {
03297       randnum += vel_random.uniform();
03298     }
03299 
03300     randnum -= 6.0;
03301 
03302     v[i].x = randnum*kbToverM;
03303 
03304     for (randnum=0.0, j=0; j<12; j++)
03305     {
03306       randnum += vel_random.uniform();
03307     }
03308 
03309     randnum -= 6.0;
03310 
03311     v[i].y = randnum*kbToverM;
03312 
03313     for (randnum=0.0, j=0; j<12; j++)
03314     {
03315       randnum += vel_random.uniform();
03316     }
03317 
03318     randnum -= 6.0;
03319     
03320     v[i].z = randnum*kbToverM;
03321   }
03322 
03323   if ( simParams->drudeOn ) for (i=0; i<totalAtoms; i++) {
03324     if ( structure->is_drude(i) ) {
03325       v[i] = v[structure->get_mother_atom(i)];  // zero is good enough
03326     }
03327   }
03328 }
03329 /*                      END OF FUNCTION random_velocities               */
03330 
03331 //**********************************************************************
03332 //
03333 //                      FUNCTION remove_com_motion
03334 //
03335 //   INPUTS:
03336 //      vel - Array of initial velocity vectors
03337 //
03338 //      This function removes the center of mass motion from a molecule.
03339 //
03340 //**********************************************************************
03341 
03342 void WorkDistrib::remove_com_motion(Vector *vel, Molecule *structure, int n)
03343 {
03344   Vector mv(0,0,0);             //  Sum of (mv)_i
03345   BigReal totalMass=0;  //  Total mass of system
03346   int i;                        //  Loop counter
03347 
03348   //  Loop through and compute the net momentum
03349   for (i=0; i<n; i++)
03350   {
03351     BigReal mass = structure->atommass(i);
03352     mv += mass * vel[i];
03353     totalMass += mass;
03354   }
03355 
03356   mv /= totalMass;
03357 
03358   iout << iINFO << "REMOVING COM VELOCITY "
03359         << ( PDBVELFACTOR * mv ) << "\n" << endi;
03360 
03361   for (i=0; i<n; i++) { vel[i] -= mv; }
03362 
03363 }
03364 /*                      END OF FUNCTION remove_com_motion               */
03365 
03366 #if USE_TOPOMAP 
03367 
03368 //Specifically designed for BGL and other 3d Tori architectures
03369 //Partition Torus and Patch grid together using recursive bisection.
03370 int WorkDistrib::assignPatchesTopoGridRecBisection() {
03371   
03372   PatchMap *patchMap = PatchMap::Object();
03373   int *assignedNode = new int[patchMap->numPatches()];
03374   int numNodes = Node::Object()->numNodes();
03375   SimParameters *simParams = Node::Object()->simParameters;
03376   if(simParams->simulateInitialMapping) {
03377           numNodes = simParams->simulatedPEs;
03378   }
03379 
03380   int usedNodes = numNodes;
03381   CkPrintf("assignPatchesTopoGridRecBisection\n");
03382   if ( simParams->noPatchesOnZero && numNodes > 1 ) {
03383     usedNodes -= 1;
03384     if ( simParams->noPatchesOnOne && numNodes > 2 )
03385       usedNodes -= 1;
03386   }
03387   RecBisection recBisec(patchMap->numPatches(), PatchMap::Object());
03388   
03389   int xsize = 0, ysize = 0, zsize = 0;
03390   
03391   // Right now assumes a T*** (e.g. TXYZ) mapping
03392   TopoManager tmgr;
03393   xsize = tmgr.getDimNX();
03394   ysize = tmgr.getDimNY();
03395   zsize = tmgr.getDimNZ();
03396   
03397   //Fix to not assign patches to processor 0
03398   int rc = recBisec.partitionProcGrid(xsize, ysize, zsize, assignedNode);
03399  
03400   delete [] assignedNode;
03401 
03402   return rc;
03403 }
03404 #endif
03405 
03406 
03407 #if defined(NAMD_MIC)
03408   extern void mic_hostDeviceLDB();
03409   extern void mic_contributeHostDeviceLDB(int idLen, int * id);
03410   extern void mic_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2);
03411 #endif
03412 
03413 void WorkDistrib::send_initHostDeviceLDB() {
03414   #if defined(NAMD_MIC)
03415     CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
03416     wdProxy.initHostDeviceLDB();
03417   #endif
03418 }
03419 
03420 void WorkDistrib::initHostDeviceLDB() {
03421   #if defined(NAMD_MIC)
03422     mic_hostDeviceLDB();
03423   #endif
03424 }
03425 
03426 void WorkDistrib::send_contributeHostDeviceLDB(int peSetLen, int * peSet) {
03427   #if defined(NAMD_MIC)
03428     CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
03429     wdProxy[0].contributeHostDeviceLDB(peSetLen, peSet);
03430   #endif
03431 }
03432 
03433 void WorkDistrib::contributeHostDeviceLDB(int peSetLen, int * peSet) {
03434   #if defined(NAMD_MIC)
03435     mic_contributeHostDeviceLDB(peSetLen, peSet);
03436   #endif
03437 }
03438 
03439 void WorkDistrib::send_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2) {
03440   #if defined(NAMD_MIC)
03441     CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
03442     wdProxy.setDeviceLDBParams(dt, hs, sp1, pp1, pp2);
03443   #endif
03444 }
03445 
03446 void WorkDistrib::setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2) {
03447   #if defined(NAMD_MIC)
03448     mic_setDeviceLDBParams(dt, hs, sp1, pp1, pp2);
03449   #endif
03450 }
03451 
03452 
03453 #include "WorkDistrib.def.h"
03454 

Generated on Tue Sep 26 01:17:15 2017 for NAMD by  doxygen 1.4.7