Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

HomePatch.C

Go to the documentation of this file.
00001 
00008 /*
00009    HomePatch owns the actual atoms of a Patch of space
00010    Proxy(s) get messages via ProxyMgr from HomePatch(es)
00011    to update lists of atoms and their coordinates
00012    HomePatch(es) also have a Sequencer bound to them
00013 
00014    superclass:  Patch           
00015 */
00016 
00017 #include <math.h>
00018 #include "charm++.h"
00019 
00020 #include "SimParameters.h"
00021 #include "HomePatch.h"
00022 #include "AtomMap.h"
00023 #include "Node.h"
00024 #include "PatchMap.inl"
00025 #include "main.h"
00026 #include "ProxyMgr.decl.h"
00027 #include "ProxyMgr.h"
00028 #include "Migration.h"
00029 #include "Molecule.h"
00030 #include "PatchMgr.h"
00031 #include "Sequencer.h"
00032 #include "LdbCoordinator.h"
00033 #include "Settle.h"
00034 #include "ReductionMgr.h"
00035 #include "Sync.h"
00036 #include "Random.h"
00037 #include "Priorities.h"
00038 
00039 #define TINY 1.0e-20;
00040 #define MAXHGS 10
00041 #define MIN_DEBUG_LEVEL 2
00042 //#define DEBUGM
00043 #include "Debug.h"
00044 
00045 typedef int HGArrayInt[MAXHGS];
00046 typedef BigReal HGArrayBigReal[MAXHGS];
00047 typedef zVector HGArrayVector[MAXHGS];
00048 typedef BigReal HGMatrixBigReal[MAXHGS][MAXHGS];
00049 typedef zVector HGMatrixVector[MAXHGS][MAXHGS];
00050 
00051 int average(CompAtom *qtilde,const HGArrayVector &q,BigReal *lambda,const int n,const int m, const HGArrayBigReal &imass, const HGArrayBigReal &length2, const HGArrayInt &ial, const HGArrayInt &ibl, const HGArrayVector &refab, const BigReal tolf, const int ntrial);
00052 
00053 void mollify(CompAtom *qtilde,const HGArrayVector &q0,const BigReal *lambda, HGArrayVector &force,const int n, const int m, const HGArrayBigReal &imass,const HGArrayInt &ial,const HGArrayInt &ibl,const HGArrayVector &refab);
00054 
00055 
00056 // DMK - Atom Separation (water vs. non-water)
00057 #if NAMD_SeparateWaters != 0
00058 
00059 // Macro to test if a hydrogen group represents a water molecule.
00060 // NOTE: This test is the same test in Molecule.C for setting the
00061 //   OxygenAtom flag in status.
00062 // hgtype should be the number of atoms in a water hydrogen group
00063 // It must now be set based on simulation parameters because we might
00064 // be using tip4p
00065 
00066 // DJH: This will give false positive for full Drude model,
00067 //      e.g. O D H is not water but has hgs==3
00068 #define IS_HYDROGEN_GROUP_WATER(hgs, mass)                 \
00069   ((hgs >= 3) && ((mass >= 14.0) && (mass <= 18.0)))
00070 
00071 #endif
00072 
00073 
00074 HomePatch::HomePatch(PatchID pd, int atomCnt) : Patch(pd)
00075 // DMK - Atom Separation (water vs. non-water)
00076 #if NAMD_SeparateWaters != 0
00077   ,tempAtom()
00078 #endif
00079 {
00080   min.x = PatchMap::Object()->min_a(patchID);
00081   min.y = PatchMap::Object()->min_b(patchID);
00082   min.z = PatchMap::Object()->min_c(patchID);
00083   max.x = PatchMap::Object()->max_a(patchID);
00084   max.y = PatchMap::Object()->max_b(patchID);
00085   max.z = PatchMap::Object()->max_c(patchID);
00086   center = 0.5*(min+max);
00087 
00088   aAway = PatchMap::Object()->numaway_a();
00089   bAway = PatchMap::Object()->numaway_b();
00090   cAway = PatchMap::Object()->numaway_c();
00091 
00092   migrationSuspended = false;
00093   allMigrationIn = false;
00094   marginViolations = 0;
00095   patchMapRead = 0; // We delay read of PatchMap data
00096                     // to make sure it is really valid
00097   inMigration = false;
00098   numMlBuf = 0;
00099   flags.sequence = -1;
00100 
00101   numAtoms = atomCnt;
00102   replacementForces = 0;
00103 
00104   SimParameters *simParams = Node::Object()->simParameters;
00105   doPairlistCheck_newTolerance = 
00106         0.5 * ( simParams->pairlistDist - simParams->cutoff );
00107 
00108 
00109   numFixedAtoms = 0;
00110   //if ( simParams->fixedAtomsOn ) {
00111   //  for ( int i = 0; i < numAtoms; ++i ) {
00112   //    numFixedAtoms += ( atom[i].atomFixed ? 1 : 0 );
00113   //  }
00114   //}
00115 
00116 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00117   ptnTree.resize(0);
00118   /*children = NULL;
00119   numChild = 0;*/
00120 #else
00121   child =  new int[proxySpanDim];
00122   nChild = 0;   // number of proxy spanning tree children
00123 #endif
00124 
00125 #if CMK_PERSISTENT_COMM
00126   phsReady = 0;
00127   nphs = 0;
00128   localphs = new PersistentHandle[CkNumPes()];
00129   for (int i=0; i<CkNumPes(); i++) localphs[i] = 0;
00130 #endif
00131 
00132 
00133   // DMK - Atom Separation (water vs. non-water)
00134   #if NAMD_SeparateWaters != 0
00135 
00136     // Create the scratch memory for separating atoms
00137     tempAtom.resize(numAtoms);
00138     numWaterAtoms = -1;
00139 
00140   #endif
00141   // Handle unusual water models here
00142   if (simParams->watmodel == WAT_TIP4) init_tip4();
00143   else if (simParams->watmodel == WAT_SWM4) init_swm4();
00144 
00145 #ifdef MEM_OPT_VERSION
00146   isNewProxyAdded = 0;
00147 #endif
00148 }
00149 
00150 HomePatch::HomePatch(PatchID pd, FullAtomList al) : Patch(pd), atom(al)
00151 // DMK - Atom Separation (water vs. non-water)
00152 #if NAMD_SeparateWaters != 0
00153   ,tempAtom()
00154 #endif
00155 { 
00156   min.x = PatchMap::Object()->min_a(patchID);
00157   min.y = PatchMap::Object()->min_b(patchID);
00158   min.z = PatchMap::Object()->min_c(patchID);
00159   max.x = PatchMap::Object()->max_a(patchID);
00160   max.y = PatchMap::Object()->max_b(patchID);
00161   max.z = PatchMap::Object()->max_c(patchID);
00162   center = 0.5*(min+max);
00163 
00164   aAway = PatchMap::Object()->numaway_a();
00165   bAway = PatchMap::Object()->numaway_b();
00166   cAway = PatchMap::Object()->numaway_c();
00167 
00168   migrationSuspended = false;
00169   allMigrationIn = false;
00170   marginViolations = 0;
00171   patchMapRead = 0; // We delay read of PatchMap data
00172                     // to make sure it is really valid
00173   inMigration = false;
00174   numMlBuf = 0;
00175   flags.sequence = -1;
00176 
00177   numAtoms = atom.size();
00178   replacementForces = 0;
00179 
00180   SimParameters *simParams = Node::Object()->simParameters;
00181   doPairlistCheck_newTolerance = 
00182         0.5 * ( simParams->pairlistDist - simParams->cutoff );
00183 
00184 
00185   numFixedAtoms = 0;
00186   if ( simParams->fixedAtomsOn ) {
00187     for ( int i = 0; i < numAtoms; ++i ) {
00188       numFixedAtoms += ( atom[i].atomFixed ? 1 : 0 );
00189     }
00190   }
00191 
00192 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00193   ptnTree.resize(0);
00194   /*children = NULL;
00195   numChild = 0;*/
00196 #else
00197   child =  new int[proxySpanDim];
00198   nChild = 0;   // number of proxy spanning tree children
00199 #endif
00200 
00201 #if CMK_PERSISTENT_COMM
00202   phsReady = 0;
00203   nphs = 0;
00204   localphs = new PersistentHandle[CkNumPes()];
00205   for (int i=0; i<CkNumPes(); i++) localphs[i] = 0;
00206 #endif
00207 
00208 
00209   // DMK - Atom Separation (water vs. non-water)
00210   #if NAMD_SeparateWaters != 0
00211 
00212     // Create the scratch memory for separating atoms
00213     tempAtom.resize(numAtoms);
00214     numWaterAtoms = -1;
00215 
00216     // Separate the current list of atoms
00217     separateAtoms();
00218 
00219   #endif
00220     
00221   // Handle unusual water models here
00222   if (simParams->watmodel == WAT_TIP4) init_tip4();
00223   else if (simParams->watmodel == WAT_SWM4) init_swm4();
00224 
00225 #ifdef MEM_OPT_VERSION
00226   isNewProxyAdded = 0;
00227 #endif
00228 
00229 }
00230 
00231 void HomePatch::write_tip4_props() {
00232   printf("Writing r_om and r_ohc: %f | %f\n", r_om, r_ohc);
00233 }
00234 
00235 void HomePatch::init_tip4() {
00236   // initialize the distances needed for the tip4p water model
00237 
00238   Molecule *mol = Node::Object()->molecule;
00239   r_om = mol->r_om;
00240   r_ohc = mol->r_ohc;
00241 }
00242 
00243 
00244 void ::HomePatch::init_swm4() {
00245   // initialize the distances needed for the SWM4 water model
00246   SimParameters *simParams = Node::Object()->simParameters;
00247   Molecule *mol = Node::Object()->molecule;
00248   int ig;
00249   if (RIGID_NONE == simParams->rigidBonds) return;
00250   if (WAT_SWM4 != simParams->watmodel) return;
00251   for (ig = 0;  ig < numAtoms;  ig += atom[ig].hydrogenGroupSize ) {
00252     // find a water
00253     if (mol->rigid_bond_length(atom[ig].id) > 0) {
00254       // water is guaranteed by Molecule to have order:  O D LP H1 H2
00255       BigReal r_hh = mol->rigid_bond_length(atom[ig].id);
00256       BigReal r_oh = mol->rigid_bond_length(atom[ig+3].id);
00257       r_om = mol->rigid_bond_length(atom[ig+2].id);
00258       r_ohc = sqrt(r_oh * r_oh - 0.25 * r_hh * r_hh);
00259       //printf("r_om and r_ohc initialized to %f and %f\n", r_om, r_ohc);
00260       break;
00261     }
00262   }
00263 }
00264 
00265 void HomePatch::reinitAtoms(FullAtomList al) {
00266   atom = al;
00267   numAtoms = atom.size();
00268 
00269   // DMK - Atom Separation (water vs. non-water)
00270   #if NAMD_SeparateWaters != 0
00271 
00272     // Reset the numWaterAtoms value
00273     numWaterAtoms = -1;
00274 
00275     // Separate the atoms
00276     separateAtoms();
00277 
00278   #endif
00279 }
00280 
00281 // Bind a Sequencer to this HomePatch
00282 void HomePatch::useSequencer(Sequencer *sequencerPtr)
00283 { sequencer=sequencerPtr; }
00284 
00285 // start simulation over this Patch of atoms
00286 void HomePatch::runSequencer(void)
00287 { sequencer->run(); }
00288 
00289 void HomePatch::readPatchMap() {
00290   // iout << "Patch " << patchID << " has " << proxy.size() << " proxies.\n" << endi;
00291   PatchMap *p = PatchMap::Object();
00292   PatchID nnPatchID[PatchMap::MaxOneAway];
00293 
00294   patchMigrationCounter = numNeighbors 
00295     = PatchMap::Object()->oneAwayNeighbors(patchID, nnPatchID);
00296   DebugM( 1, "NumNeighbors for pid " <<patchID<<" is "<< numNeighbors << "\n");
00297   int n;
00298   for (n=0; n<numNeighbors; n++) {
00299     realInfo[n].destNodeID = p->node(realInfo[n].destPatchID = nnPatchID[n]);
00300      DebugM( 1, " nnPatchID=" <<nnPatchID[n]<<" nnNodeID="<< realInfo[n].destNodeID<< "\n");
00301     realInfo[n].mList.resize(0);
00302   }
00303 
00304   // Make mapping from the 3x3x3 cube of pointers to real migration info
00305   for (int i=0; i<3; i++)
00306     for (int j=0; j<3; j++)
00307       for (int k=0; k<3; k++)
00308       {
00309         int pid =  p->pid(p->index_a(patchID)+i-1, 
00310             p->index_b(patchID)+j-1, p->index_c(patchID)+k-1);
00311         if (pid < 0) {
00312            DebugM(5, "ERROR, for patchID " << patchID <<" I got neigh pid = " << pid << "\n");
00313         }
00314         if (pid == patchID && ! (
00315                 ( (i-1) && p->periodic_a() ) ||
00316                 ( (j-1) && p->periodic_b() ) ||
00317                 ( (k-1) && p->periodic_c() ) )) {
00318           mInfo[i][j][k] = NULL;
00319         }
00320         else {
00321           // Does not work as expected for periodic with only two patches.
00322           // Also need to check which image we want, but OK for now.  -JCP
00323           for (n = 0; n<numNeighbors; n++) {
00324             if (pid == realInfo[n].destPatchID) {
00325               mInfo[i][j][k] = &realInfo[n];
00326               break;
00327             }
00328           }
00329           if (n == numNeighbors) { // disaster! 
00330             DebugM(4,"BAD News, I could not find PID " << pid << "\n");
00331           }
00332         }
00333       }
00334 
00335   DebugM(1,"Patch("<<patchID<<") # of neighbors = " << numNeighbors << "\n");
00336 }
00337 
00338 HomePatch::~HomePatch()
00339 {
00340 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00341     ptnTree.resize(0);
00342     delete [] children;
00343     #ifdef USE_NODEPATCHMGR
00344     delete [] nodeChildren;    
00345     #endif
00346 #else
00347   delete [] child;
00348 #endif
00349 }
00350 
00351 
00352 void HomePatch::boxClosed(int)
00353 {
00354   if ( ! --boxesOpen )
00355   {
00356     if ( replacementForces ) {
00357       for ( int i = 0; i < numAtoms; ++i ) {
00358         if ( replacementForces[i].replace ) {
00359           for ( int j = 0; j < Results::maxNumForces; ++j ) { f[j][i] = 0; }
00360           f[Results::normal][i] = replacementForces[i].force;
00361         }
00362       }
00363       replacementForces = 0;
00364     }
00365     DebugM(1,patchID << ": " << CthSelf() << " awakening sequencer "
00366         << sequencer->thread << "(" << patchID << ") @" << CmiTimer() << "\n");
00367     // only awaken suspended threads.  Then say it is suspended.
00368     sequencer->awaken();
00369     return;
00370   }
00371   else
00372   {
00373     DebugM(1,patchID << ": " << boxesOpen << " boxes left to close.\n");
00374   }
00375 }
00376 
00377 void HomePatch::registerProxy(RegisterProxyMsg *msg) {
00378   DebugM(4, "registerProxy("<<patchID<<") - adding node " <<msg->node<<"\n");
00379   proxy.add(ProxyListElem(msg->node,forceBox.checkOut()));
00380 
00381 #ifdef MEM_OPT_VERSION
00382   isNewProxyAdded = 1;
00383 #endif
00384 
00385   Random((patchID + 37) * 137).reorder(proxy.begin(),proxy.size());
00386   delete msg;
00387 }
00388 
00389 void HomePatch::unregisterProxy(UnregisterProxyMsg *msg) {
00390   int n = msg->node;
00391   ProxyListElem *pe = proxy.begin();
00392   for ( ; pe->node != n; ++pe );
00393   forceBox.checkIn(pe->forceBox);
00394   proxy.del(pe - proxy.begin());
00395   delete msg;
00396 }
00397 
00398 #if USE_TOPOMAP && USE_SPANNING_TREE
00399 
00400 int HomePatch::findSubroots(int dim, int* subroots, int psize, int* pidscopy){
00401   int nChild = 0;
00402   int cones[6][proxySpanDim*proxySpanDim+proxySpanDim];
00403   int conesizes[6] = {0,0,0,0,0,0};
00404   int conecounters[6] = {0,0,0,0,0,0};
00405   int childcounter = 0;
00406   nChild = (psize>proxySpanDim)?proxySpanDim:psize;
00407   TopoManager tmgr;
00408   for(int i=0;i<psize;i++){
00409     int cone = tmgr.getConeNumberForRank(pidscopy[i]);
00410     cones[cone][conesizes[cone]++] = pidscopy[i];
00411   }
00412 
00413   while(childcounter<nChild){
00414     for(int i=0;i<6;i++){
00415       if(conecounters[i]<conesizes[i]){
00416         subroots[childcounter++] = cones[i][conecounters[i]++];
00417       }
00418     }
00419   }
00420   for(int i=nChild;i<proxySpanDim;i++)
00421     subroots[i] = -1;
00422   return nChild;
00423 }
00424 #endif // USE_TOPOMAP 
00425 
00426 static int compDistance(const void *a, const void *b)
00427 {
00428   int d1 = abs(*(int *)a - CkMyPe());
00429   int d2 = abs(*(int *)b - CkMyPe());
00430   if (d1 < d2) 
00431     return -1;
00432   else if (d1 == d2) 
00433     return 0;
00434   else 
00435     return 1;
00436 }
00437 
00438 void HomePatch::sendProxies()
00439 {
00440   ProxyListIter pli(proxy);
00441   NodeIDList list;
00442   for ( pli = pli.begin(); pli != pli.end(); ++pli )
00443   {
00444     list.add(pli->node);
00445   }
00446   ProxyMgr::Object()->sendProxies(patchID, &list[0], list.size());  
00447 }
00448 
00449 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00450 void HomePatch::buildNodeAwareSpanningTree(void){
00451     //build the naive spanning tree for this home patch    
00452     int *proxyNodeMap = new int[CkNumNodes()]; //each element indiates the number of proxies residing on this node 
00453     NodeIDList proxyPeList;
00454     int proxyCnt = proxy.size();
00455     if(proxyCnt==0) {
00456         //this case will not happen in practice.
00457         //In debugging state where spanning tree is enforced, then this could happen
00458         //Chao Mei        
00459         #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
00460         DebugFileTrace *dft = DebugFileTrace::Object();
00461         dft->openTrace();
00462         dft->writeTrace("HomePatch[%d] has 0 proxy on proc[%d] node[%d]\n", patchID, CkMyPe(), CkMyNode());
00463         dft->closeTrace();
00464         #endif
00465         return;
00466     }
00467     proxyPeList.resize(proxyCnt);
00468     ProxyListIter pli(proxy);
00469     int i=0;
00470     for ( pli = pli.begin(); pli != pli.end(); ++pli, i++ )
00471         proxyPeList[i] = pli->node;    
00472     ProxyMgr::buildSinglePatchNodeAwareSpanningTree(patchID, proxyPeList, ptnTree, proxyNodeMap);    
00473     delete [] proxyNodeMap;
00474     proxyPeList.resize(0);
00475 
00476     //optimize on the naive spanning tree
00477 
00478     //setup the children
00479     setupChildrenFromProxySpanningTree();
00480     //send down to children
00481     sendNodeAwareSpanningTree();
00482 }
00483 
00484 void HomePatch::setupChildrenFromProxySpanningTree(){
00485     if(ptnTree.size()==0) {
00486         numChild = 0;
00487         delete [] children;
00488         children = NULL;
00489         #ifdef USE_NODEPATCHMGR
00490         numNodeChild = 0;
00491         delete [] nodeChildren;
00492         nodeChildren = NULL;        
00493         #endif
00494         return;
00495     }
00496     proxyTreeNode *rootnode = &ptnTree.item(0);
00497     CmiAssert(rootnode->peIDs[0] == CkMyPe());
00498     //set up children
00499     //1. add external children (the first proc inside the proxy tree node)    
00500     //2. add internal children (with threshold that would enable spanning    
00501     int internalChild;
00502     int externalChild;
00503     internalChild = rootnode->numPes-1;
00504     numChild = internalChild;
00505     if(numChild > inNodeProxySpanDim) {        
00506         //tree construction within a node)
00507         CmiAbort("Enabling in-node spanning tree construction has not been implemented yet!\n");
00508     }else{
00509         //exclude the root node
00510         int treesize = ptnTree.size();
00511         externalChild = (proxySpanDim>(treesize-1))?(treesize-1):proxySpanDim;
00512         numChild += externalChild;        
00513 
00514         delete [] children;
00515         #ifdef USE_NODEPATCHMGR
00516         delete [] nodeChildren;
00517         #endif
00518         if(numChild==0){
00519             children = NULL;
00520             #ifdef USE_NODEPATCHMGR
00521             nodeChildren = NULL;
00522             numNodeChild = 0;
00523             #endif
00524             return;
00525         }
00526         children = new int[numChild];    
00527         for(int i=0; i<externalChild; i++) {
00528             children[i] = ptnTree.item(i+1).peIDs[0];
00529         }
00530         for(int i=externalChild, j=1; i<numChild; i++, j++) {
00531             children[i] = rootnode->peIDs[j];
00532         }
00533     }
00534 
00535     #ifdef USE_NODEPATCHMGR
00536     //only register the cores that have proxy patches. The HomePach's core
00537     //doesn't need to be registered.
00538     CProxy_NodeProxyMgr pm(CkpvAccess(BOCclass_group).nodeProxyMgr);
00539     NodeProxyMgr *npm = pm[CkMyNode()].ckLocalBranch();
00540     if(rootnode->numPes==1){
00541         npm->registerPatch(patchID, 0, NULL);        
00542     }
00543     else{
00544         npm->registerPatch(patchID, rootnode->numPes-1, &rootnode->peIDs[1]);        
00545     }
00546 
00547     //set up childrens in terms of node ids
00548     numNodeChild = externalChild;
00549     if(internalChild) numNodeChild++;
00550     nodeChildren = new int[numNodeChild];    
00551     for(int i=0; i<externalChild; i++) {
00552         nodeChildren[i] = ptnTree.item(i+1).nodeID;        
00553     }
00554     //the last entry always stores this node id if there are proxies
00555     //on other cores of the same node for this patch.
00556     if(internalChild)
00557         nodeChildren[numNodeChild-1] = rootnode->nodeID;
00558     #endif
00559     
00560     #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
00561     DebugFileTrace *dft = DebugFileTrace::Object();
00562     dft->openTrace();
00563     dft->writeTrace("HomePatch[%d] has %d children: ", patchID, numChild);
00564     for(int i=0; i<numChild; i++)
00565         dft->writeTrace("%d ", children[i]);
00566     dft->writeTrace("\n");
00567     dft->closeTrace();
00568     #endif
00569     
00570 }
00571 #endif
00572 
00573 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00574 //This is not an entry method, but takes an argument of message type
00575 void HomePatch::recvNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *msg){
00576     //set up the whole tree ptnTree
00577     int treesize = msg->numNodesWithProxies;    
00578     ptnTree.resize(treesize);    
00579     int *pAllPes = msg->allPes;
00580     for(int i=0; i<treesize; i++) {
00581         proxyTreeNode *oneNode = &ptnTree.item(i);
00582         delete oneNode->peIDs;
00583         oneNode->numPes = msg->numPesOfNode[i];
00584         oneNode->nodeID = CkNodeOf(*pAllPes);
00585         oneNode->peIDs = new int[oneNode->numPes];
00586         for(int j=0; j<oneNode->numPes; j++) {
00587             oneNode->peIDs[j] = *pAllPes;
00588             pAllPes++;
00589         }
00590     }
00591     //setup children
00592     setupChildrenFromProxySpanningTree();
00593     //send down to children
00594     sendNodeAwareSpanningTree();
00595 }
00596 
00597 void HomePatch::sendNodeAwareSpanningTree(){
00598     if(ptnTree.size()==0) return;    
00599     ProxyNodeAwareSpanningTreeMsg *msg = 
00600         ProxyNodeAwareSpanningTreeMsg::getANewMsg(patchID, CkMyPe(), ptnTree.begin(), ptnTree.size());
00601    
00602     #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
00603     msg->printOut("HP::sendST");
00604     #endif
00605 
00606     CmiAssert(CkMyPe() == msg->allPes[0]);
00607     ProxyMgr::Object()->sendNodeAwareSpanningTree(msg);
00608 
00609 }
00610 #else
00611 void HomePatch::recvNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *msg){}
00612 void HomePatch::sendNodeAwareSpanningTree(){}
00613 #endif
00614 
00615 #ifndef NODEAWARE_PROXY_SPANNINGTREE
00616 // recv a spanning tree from load balancer
00617 void HomePatch::recvSpanningTree(int *t, int n)
00618 {
00619   int i;
00620   nChild=0;
00621   tree.resize(n);
00622   for (i=0; i<n; i++) {
00623     tree[i] = t[i];
00624   }
00625 
00626   for (i=1; i<=proxySpanDim; i++) {
00627     if (tree.size() <= i) break;
00628     child[i-1] = tree[i];
00629     nChild++;
00630   }
00631 
00632   // send down to children
00633   sendSpanningTree();
00634 }
00635 
00636 void HomePatch::sendSpanningTree()
00637 {
00638   if (tree.size() == 0) return;
00639   ProxySpanningTreeMsg *msg = new ProxySpanningTreeMsg;
00640   msg->patch = patchID;
00641   msg->node = CkMyPe();
00642   msg->tree = tree;
00643   ProxyMgr::Object()->sendSpanningTree(msg);  
00644 }
00645 #else
00646 void HomePatch::recvSpanningTree(int *t, int n){}
00647 void HomePatch::sendSpanningTree(){}
00648 #endif
00649 
00650 #ifndef NODEAWARE_PROXY_SPANNINGTREE
00651 void HomePatch::buildSpanningTree(void)
00652 {
00653   nChild = 0;
00654   int psize = proxy.size();
00655   if (psize == 0) return;
00656   NodeIDList oldtree = tree;
00657   int oldsize = oldtree.size();
00658   tree.resize(psize + 1);
00659   tree.setall(-1);
00660   tree[0] = CkMyPe();
00661   int s=1, e=psize+1;
00662   ProxyListIter pli(proxy);
00663   int patchNodesLast =
00664     ( PatchMap::Object()->numNodesWithPatches() < ( 0.7 * CkNumPes() ) );
00665   int nNonPatch = 0;
00666 #if 0
00667   int* pelists = new int[psize];
00668   for (int i=0; i<psize; i++) pelists[i] = -1;
00669   for ( pli = pli.begin(); pli != pli.end(); ++pli ) {
00670     int idx = rand()%psize;
00671     while (pelists[idx] != -1) { idx++; if (idx == psize) idx = 0; }
00672     pelists[idx] = pli->node;
00673   }
00674   for ( int i=0; i<psize; i++)
00675   {
00676     if ( patchNodesLast && PatchMap::Object()->numPatchesOnNode(pelists[i]) ) {
00677       tree[--e] = pelists[i];
00678     } else {
00679       tree[s++] = pelists[i];
00680       nNonPatch++;
00681     }
00682   }
00683   delete [] pelists;
00684 #else
00685     // try to put it to the same old tree
00686   for ( pli = pli.begin(); pli != pli.end(); ++pli )
00687   {
00688     int oldindex = oldtree.find(pli->node);
00689     if (oldindex != -1 && oldindex < psize) {
00690       tree[oldindex] = pli->node;
00691     }
00692   }
00693   s=1; e=psize;
00694   for ( pli = pli.begin(); pli != pli.end(); ++pli )
00695   {
00696     if (tree.find(pli->node) != -1) continue;    // already assigned
00697     if ( patchNodesLast && PatchMap::Object()->numPatchesOnNode(pli->node) ) {
00698       while (tree[e] != -1) { e--; if (e==-1) e = psize; }
00699       tree[e] = pli->node;
00700     } else {
00701       while (tree[s] != -1) { s++; if (s==psize+1) s = 1; }
00702       tree[s] = pli->node;
00703       nNonPatch++;
00704     }
00705   }
00706 #if 1
00707   if (oldsize==0 && nNonPatch) {
00708     // first time, sort by distance
00709     qsort(&tree[1], nNonPatch, sizeof(int), compDistance);
00710   }
00711 #endif
00712 
00713   //CkPrintf("home: %d:(%d) %d %d %d %d %d\n", patchID, tree.size(),tree[0],tree[1],tree[2],tree[3],tree[4]);
00714 
00715 #if USE_TOPOMAP && USE_SPANNING_TREE
00716   
00717   //Right now only works for spanning trees with two levels
00718   int *treecopy = new int [psize];
00719   int subroots[proxySpanDim];
00720   int subsizes[proxySpanDim];
00721   int subtrees[proxySpanDim][proxySpanDim];
00722   int idxes[proxySpanDim];
00723   int i = 0;
00724 
00725   for(i=0;i<proxySpanDim;i++){
00726     subsizes[i] = 0;
00727     idxes[i] = i;
00728   }
00729   
00730   for(i=0;i<psize;i++){
00731     treecopy[i] = tree[i+1];
00732   }
00733   
00734   TopoManager tmgr;
00735   tmgr.sortRanksByHops(treecopy,nNonPatch);
00736   tmgr.sortRanksByHops(treecopy+nNonPatch,
00737                                                 psize-nNonPatch);  
00738   
00739   /* build tree and subtrees */
00740   nChild = findSubroots(proxySpanDim,subroots,psize,treecopy);
00741   delete [] treecopy;
00742   
00743   for(int i=1;i<psize+1;i++){
00744     int isSubroot=0;
00745     for(int j=0;j<nChild;j++)
00746       if(tree[i]==subroots[j]){
00747         isSubroot=1;
00748         break;
00749       }
00750     if(isSubroot) continue;
00751     
00752     int bAdded = 0;
00753     tmgr.sortIndexByHops(tree[i], subroots,
00754                                                   idxes, proxySpanDim);
00755     for(int j=0;j<proxySpanDim;j++){
00756       if(subsizes[idxes[j]]<proxySpanDim){
00757         subtrees[idxes[j]][(subsizes[idxes[j]])++] = tree[i];
00758         bAdded = 1; 
00759         break;
00760       }
00761     }
00762     if( psize > proxySpanDim && ! bAdded ) {
00763       NAMD_bug("HomePatch BGL Spanning Tree error: Couldn't find subtree for leaf\n");
00764     }
00765   }
00766 
00767 #else /* USE_TOPOMAP && USE_SPANNING_TREE */
00768   
00769   for (int i=1; i<=proxySpanDim; i++) {
00770     if (tree.size() <= i) break;
00771     child[i-1] = tree[i];
00772     nChild++;
00773   }
00774 #endif
00775 #endif
00776   
00777 #if 0
00778   // for debugging
00779   CkPrintf("[%d] Spanning tree for %d with %d children %d nNonPatch %d\n", CkMyPe(), patchID, psize, nNonPatch);
00780   for (int i=0; i<psize+1; i++) {
00781     CkPrintf("%d ", tree[i]);
00782   }
00783   CkPrintf("\n");
00784 #endif
00785   // send to children nodes
00786   sendSpanningTree();
00787 }
00788 #endif
00789 
00790 
00791 void HomePatch::receiveResults(ProxyResultVarsizeMsg *msg){
00792     DebugM(4, "patchID("<<patchID<<") receiveRes() nodeID("<<msg->node<<")\n");
00793     int n = msg->node;
00794     ProxyListElem *pe = proxy.begin();
00795     for ( ; pe->node != n; ++pe );
00796     Results *r = pe->forceBox->open();
00797 
00798     char *iszeroPtr = msg->isZero;
00799     Force *msgFPtr = msg->forceArr;
00800 
00801     for ( int k = 0; k < Results::maxNumForces; ++k )
00802     {
00803       Force *rfPtr = r->f[k];
00804       for(int i=0; i<msg->flLen[k]; i++, rfPtr++, iszeroPtr++) {
00805           if((*iszeroPtr)!=1) {
00806               *rfPtr += *msgFPtr;
00807               msgFPtr++;
00808           }
00809       }      
00810     }
00811     pe->forceBox->close(&r);
00812     delete msg;
00813 }
00814 
00815 void HomePatch::receiveResults(ProxyResultMsg *msg)
00816 {
00817   DebugM(4, "patchID("<<patchID<<") receiveRes() nodeID("<<msg->node<<")\n");
00818   int n = msg->node;
00819   ProxyListElem *pe = proxy.begin();
00820   for ( ; pe->node != n; ++pe );
00821   Results *r = pe->forceBox->open();
00822   for ( int k = 0; k < Results::maxNumForces; ++k )
00823   {
00824     Force *f = r->f[k];
00825     register ForceList::iterator f_i, f_e;
00826     f_i = msg->forceList[k].begin();
00827     f_e = msg->forceList[k].end();
00828     for ( ; f_i != f_e; ++f_i, ++f ) *f += *f_i;
00829   }
00830   pe->forceBox->close(&r);
00831   delete msg;
00832 }
00833 
00834 void HomePatch::receiveResults(ProxyCombinedResultMsg *msg)
00835 {
00836   DebugM(4, "patchID("<<patchID<<") receiveRes() #nodes("<<msg->nodes.size()<<")\n");
00837 //CkPrintf("[%d] Homepatch: %d receiveResults from %d nodes\n", CkMyPe(), patchID, n);
00838   for (int i=0; i<msg->nodes.size(); i++) {
00839     int node = msg->nodes[i];
00840     ProxyListElem *pe = proxy.begin();
00841     for ( ; pe->node != node; ++pe );
00842     Results *r = pe->forceBox->open();
00843     if (i==0) {
00844       for ( int k = 0; k < Results::maxNumForces; ++k )
00845       {
00846         Force *f = r->f[k];
00847         register ForceList::iterator f_i, f_e;
00848         f_i = msg->forceList[k].begin();
00849         f_e = msg->forceList[k].end();
00850         //for ( ; f_i != f_e; ++f_i, ++f ) *f += *f_i;
00851 
00852         int nf = f_e - f_i;
00853 #ifdef ARCH_POWERPC
00854 #pragma disjoint (*f_i, *f)
00855 #pragma unroll(4)
00856 #endif
00857         for (int count = 0; count < nf; count++) {
00858           f[count].x += f_i[count].x;      
00859           f[count].y += f_i[count].y;      
00860           f[count].z += f_i[count].z;
00861         }
00862       }
00863     }
00864     pe->forceBox->close(&r);
00865   }
00866   delete msg;
00867 }
00868 
00869 void HomePatch::positionsReady(int doMigration)
00870 {    
00871   flags.sequence++;
00872 
00873   if (!patchMapRead) { readPatchMap(); }
00874       
00875   if (numNeighbors) {
00876     if (doMigration) {
00877       doAtomMigration();
00878     } else {
00879       doMarginCheck();
00880     }
00881   }
00882   doMigration = (doMigration && numNeighbors) || ! patchMapRead;
00883 
00884   // Workaround for oversize groups
00885   doGroupSizeCheck();
00886 
00887   // Copy information needed by computes and proxys to Patch::p.
00888   p.resize(numAtoms);
00889   CompAtom *p_i = p.begin();
00890 #ifdef MEM_OPT_VERSION
00891   pExt.resize(numAtoms);
00892   CompAtomExt *pExt_i = pExt.begin();
00893 #endif
00894   FullAtom *a_i = atom.begin();
00895   int i; int n = numAtoms;
00896   for ( i=0; i<n; ++i ) { 
00897     p_i[i] = a_i[i]; 
00898     #ifdef MEM_OPT_VERSION
00899     pExt_i[i] = a_i[i];
00900     #endif
00901   }
00902 
00903   // Measure atom movement to test pairlist validity
00904   doPairlistCheck();
00905 
00906   if (flags.doMolly) mollyAverage();
00907 
00908   // Must Add Proxy Changes when migration completed!
00909   ProxyListIter pli(proxy);
00910   int *pids = NULL;
00911   int npid;
00912   if (proxySendSpanning == 0) {
00913     npid = proxy.size();
00914     pids = new int[npid];
00915     int *pidi = pids;
00916     int *pide = pids + proxy.size();
00917     int patchNodesLast =
00918       ( PatchMap::Object()->numNodesWithPatches() < ( 0.7 * CkNumPes() ) );
00919     for ( pli = pli.begin(); pli != pli.end(); ++pli )
00920     {
00921       if ( patchNodesLast && PatchMap::Object()->numPatchesOnNode(pli->node) ) {
00922         *(--pide) = pli->node;
00923       } else {
00924         *(pidi++) = pli->node;
00925       }
00926     }
00927   }
00928   else {
00929 #ifdef NODEAWARE_PROXY_SPANNINGTREE
00930     #ifdef USE_NODEPATCHMGR
00931     npid = numNodeChild;
00932     if(numNodeChild>0) {
00933         pids = new int[npid];
00934         memcpy(pids, nodeChildren, sizeof(int)*npid);
00935     }
00936     #else
00937     npid = numChild;
00938     if(numChild>0) {
00939         pids = new int[numChild];
00940         memcpy(pids, children, sizeof(int)*numChild);
00941     }
00942     #endif
00943 #else
00944     npid = nChild;
00945     pids = new int[proxySpanDim];
00946     for (int i=0; i<nChild; i++) pids[i] = child[i];
00947 #endif
00948   }
00949   if (npid) {
00950 #if CMK_PERSISTENT_COMM
00951     if (phsReady == 0)
00952       {
00953 //CmiPrintf("Build on %d phs0:%d\n", CkMyPe(), localphs[0]);
00954      for (int i=0; i<npid; i++) {
00955        localphs[i] = CmiCreatePersistent(pids[i], 30000);
00956      }
00957      nphs = npid;
00958      phsReady = 1;
00959     }
00960 #endif
00961     int seq = flags.sequence;
00962     int priority = PROXY_DATA_PRIORITY + PATCH_PRIORITY(patchID);
00963     //begin to prepare proxy msg and send it
00964     int pdMsgPLLen = p.size();
00965     int pdMsgAvgPLLen = 0;
00966     if(flags.doMolly) {
00967         pdMsgAvgPLLen = p_avg.size();
00968     }
00969     int pdMsgPLExtLen = 0;
00970 #ifdef MEM_OPT_VERSION
00971     if(doMigration || isNewProxyAdded) {
00972         pdMsgPLExtLen = pExt.size();
00973     }
00974 #endif
00975     ProxyDataMsg *nmsg = new (pdMsgPLLen, pdMsgAvgPLLen, pdMsgPLExtLen, PRIORITY_SIZE) ProxyDataMsg;
00976     SET_PRIORITY(nmsg,seq,priority);
00977     nmsg->patch = patchID;
00978     nmsg->flags = flags;
00979     nmsg->plLen = pdMsgPLLen;                
00980     //copying data to the newly created msg
00981     memcpy(nmsg->positionList, p.begin(), sizeof(CompAtom)*pdMsgPLLen);
00982     nmsg->avgPlLen = pdMsgAvgPLLen;        
00983     if(flags.doMolly) {
00984         memcpy(nmsg->avgPositionList, p_avg.begin(), sizeof(CompAtom)*pdMsgAvgPLLen);
00985     }
00986     nmsg->plExtLen = pdMsgPLExtLen;
00987 #ifdef MEM_OPT_VERSION
00988     if(doMigration || isNewProxyAdded){     
00989         memcpy(nmsg->positionExtList, pExt.begin(), sizeof(CompAtomExt)*pdMsgPLExtLen);
00990     }
00991 #endif
00992     
00993 #if NAMD_SeparateWaters != 0
00994     //DMK - Atom Separation (water vs. non-water)
00995     nmsg->numWaterAtoms = numWaterAtoms;
00996 #endif
00997 
00998     
00999     #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)
01000     DebugFileTrace *dft = DebugFileTrace::Object();
01001     dft->openTrace();
01002     dft->writeTrace("HP::posReady: for HomePatch[%d], sending proxy msg to: ", patchID);
01003     for(int i=0; i<npid; i++) {
01004         dft->writeTrace("%d ", pids[i]);
01005     }
01006     dft->writeTrace("\n");
01007     dft->closeTrace();
01008     #endif
01009 
01010     if(doMigration) {
01011         ProxyMgr::Object()->sendProxyAll(nmsg,npid,pids);
01012     }else{
01013         ProxyMgr::Object()->sendProxyData(nmsg,npid,pids);
01014     }
01015 #if CMK_PERSISTENT_COMM
01016     CmiUsePersistentHandle(NULL, 0);
01017 #endif
01018 #ifdef MEM_OPT_VERSION
01019     isNewProxyAdded = 0;
01020 #endif
01021   }
01022   delete [] pids;
01023   DebugM(4, "patchID("<<patchID<<") doing positions Ready\n");
01024 
01025 #ifdef REMOVE_PROXYDATAMSG_EXTRACOPY
01026   positionPtrBegin = p.begin();
01027   positionPtrEnd = p.end();
01028 #endif
01029 
01030   if(flags.doMolly) {
01031       avgPositionPtrBegin = p_avg.begin();
01032       avgPositionPtrEnd = p_avg.end();
01033   }
01034   Patch::positionsReady(doMigration);
01035 
01036   patchMapRead = 1;
01037 
01038   // gzheng
01039   if (useSync) Sync::Object()->PatchReady();
01040 }
01041 
01042 void HomePatch::replaceForces(ExtForce *f)
01043 {
01044   replacementForces = f;
01045 }
01046 
01047 void HomePatch::saveForce(const int ftag)
01048 {
01049   f_saved[ftag].resize(numAtoms);
01050   for ( int i = 0; i < numAtoms; ++i )
01051   {
01052     f_saved[ftag][i] = f[ftag][i];
01053   }
01054 }
01055 
01056 #undef DEBUG_REDISTRIB_FORCE 
01057 #undef DEBUG_REDISTRIB_FORCE_VERBOSE
01058 /* Redistribute forces from the massless lonepair charge particle onto
01059  * the other atoms of the water.
01060  *
01061  * This is done using the same algorithm as charmm uses for TIP4P lonepairs.
01062  *
01063  * Pass by reference the forces (O H1 H2 LP) to be modified,
01064  * pass by constant reference the corresponding positions,
01065  * and a pointer to virial.
01066  */
01067 void HomePatch::redistrib_lp_force(
01068     Vector& f_ox, Vector& f_h1, Vector& f_h2, Vector& f_lp,
01069     const Vector& p_ox, const Vector& p_h1, const Vector& p_h2,
01070     const Vector& p_lp, Tensor *virial) {
01071 
01072   Tensor wc;  // accumulate virial contribution from force redistribution
01073 
01074 #ifdef DEBUG_REDISTRIB_FORCE 
01075   // Debug information to check against results at end
01076 
01077   // total force and torque relative to origin
01078   Vector totforce(0.0, 0.0, 0.0);
01079   Vector tottorque(0.0, 0.0, 0.0);
01080 
01081   totforce = f_ox + f_h1 + f_h2 + f_lp;
01082 
01083   tottorque += cross(f_ox, p_ox);
01084   tottorque += cross(f_h1, p_h1);
01085   tottorque += cross(f_h2, p_h2);
01086   //printf("Torque without LP is %f/%f/%f\n",
01087   //    tottorque.x, tottorque.y, tottorque.z);
01088   tottorque += cross(f_lp, p_lp);
01089   //printf("Torque with LP is %f/%f/%f\n",
01090   //    tottorque.x, tottorque.y, tottorque.z);
01091 #endif
01092 
01093   // Calculate the radial component of the force and add it to the oxygen
01094   Vector r_ox_lp = p_lp - p_ox;
01095   BigReal rad_factor = (f_lp * r_ox_lp) * r_ox_lp.rlength() * r_ox_lp.rlength();
01096   Vector f_rad = r_ox_lp * rad_factor;
01097 
01098   Tensor vir = outer(f_rad, p_ox);
01099   wc += vir;
01100 
01101   f_ox = f_ox + f_rad;
01102 
01103   // Calculate the angular component
01104   Vector r_hcom_ox = p_ox - ( (p_h1 + p_h2) * 0.5 );
01105   Vector r_h2_h1_2 = (p_h1 - p_h2) * 0.5; // half of r_h2_h1
01106 
01107   // deviation from collinearity of charge site
01108   Vector r_oop = cross(r_ox_lp, r_hcom_ox);
01109   //
01110   // vector out of o-h-h plane
01111   Vector r_perp = cross(r_hcom_ox, r_h2_h1_2);
01112 
01113   // Here we assume that Ox/Lp/Hcom are linear
01114   // If you want to correct for deviations, this is the place
01115 
01116 //  printf("Deviation from linearity for ox %i: %f/%f/%f\n", oxind, r_oop.x, r_oop.y, r_oop.z);
01117 
01118   Vector f_ang = f_lp - f_rad; // leave the angular component
01119 
01120   // now split this component onto the other atoms
01121   BigReal oxcomp = (r_hcom_ox.length() - r_ox_lp.length()) *
01122     r_hcom_ox.rlength();
01123   BigReal hydcomp = 0.5 * r_ox_lp.length() * r_hcom_ox.rlength();
01124 
01125   f_ox = f_ox + (f_ang * oxcomp);
01126   f_h1 = f_h1 + (f_ang * hydcomp);
01127   f_h2 = f_h2 + (f_ang * hydcomp);
01128 
01129   // Add virial contributions
01130   vir = outer(f_ang * oxcomp, p_ox);
01131   wc += vir;
01132   vir = outer(f_ang * hydcomp, p_h1);
01133   wc += vir;
01134   vir = outer(f_ang * hydcomp, p_h2);
01135   wc += vir;
01136   vir = outer(-1.0 * f_lp, p_lp);
01137   wc += vir;
01138 
01139   if ( virial ) *virial += wc;
01140 
01141   //Vector zerovec(0.0, 0.0, 0.0);
01142   f_lp = Vector(0.0, 0.0, 0.0);
01143 
01144 #ifdef DEBUG_REDISTRIB_FORCE 
01145   // Check that the total force and torque come out right
01146   Vector newforce(0.0, 0.0, 0.0);
01147   Vector newtorque(0.0, 0.0, 0.0);
01148   BigReal error = 0.0;
01149 
01150   newforce = f_ox + f_h1 + f_h2;
01151 
01152   newtorque += cross(f_ox, p_ox);
01153   newtorque += cross(f_h1, p_h1);
01154   newtorque += cross(f_h2, p_h2);
01155 
01156   error = fabs(newforce.length() - totforce.length());
01157   if (error > 0.0001) {
01158      printf("Error:  Force redistribution for water "
01159          "exceeded force tolerance (%f vs. %f)\n",
01160          newforce.length(), totforce.length());
01161   }
01162 #ifdef DEBUG_REDISTRIB_FORCE_VERBOSE
01163   printf("Error in force length:  %f\n", error);
01164 #endif
01165 
01166   error = fabs(newtorque.length() - tottorque.length());
01167   if (error > 0.0001) {
01168      printf("Error:  Force redistribution for water "
01169          "exceeded torque tolerance (%f vs. %f)\n",
01170          newtorque.length(), tottorque.length());
01171   }
01172 #ifdef DEBUG_REDISTRIB_FORCE_VERBOSE
01173   printf("Error in torque:  %f\n", error);
01174 #endif
01175 #endif /* DEBUG */
01176 }
01177 
01178 void HomePatch::swm4_omrepos(Vector *ref, Vector *pos, Vector *vel,
01179     BigReal invdt) {
01180   // Reposition lonepair (Om) particle of Drude SWM4 water.
01181   // Same comments apply as to tip4_omrepos(), but the ordering of atoms
01182   // is different: O, D, LP, H1, H2.
01183   pos[2] = pos[0] + (0.5 * (pos[3] + pos[4]) - pos[0]) * (r_om / r_ohc);
01184   // Now, adjust velocity of particle to get it to appropriate place
01185   if (invdt != 0) {
01186     vel[2] = (pos[2] - ref[2]) * invdt;
01187   }
01188   // No virial correction needed since lonepair is massless
01189 }
01190 
01191 void HomePatch::tip4_omrepos(Vector* ref, Vector* pos, Vector* vel, BigReal invdt) {
01192   /* Reposition the om particle of a tip4p water
01193    * A little geometry shows that the appropriate position is given by
01194    * R_O + (1 / 2 r_ohc) * ( 0.5 (R_H1 + R_H2) - R_O ) 
01195    * Here r_om is the distance from the oxygen to Om site, and r_ohc
01196    * is the altitude from the oxygen to the hydrogen center of mass
01197    * Those quantities are precalculated upon initialization of HomePatch
01198    *
01199    * Ordering of TIP4P atoms: O, H1, H2, LP.
01200    */
01201 
01202 //  printf("rom/rohc are %f %f\n", r_om, r_ohc);
01203 //  printf("Other positions are: \n  0: %f %f %f\n  1: %f %f %f\n  2: %f %f %f\n", pos[0].x, pos[0].y, pos[0].z, pos[1].x, pos[1].y, pos[1].z, pos[2].x, pos[2].y, pos[2].z);
01204   pos[3] = pos[0] + (0.5 * (pos[1] + pos[2]) - pos[0]) * (r_om / r_ohc); 
01205 //  printf("New position for lp is %f %f %f\n", pos[3].x, pos[3].y, pos[3].z);
01206 
01207   // Now, adjust the velocity of the particle to get it to the appropriate place
01208   if (invdt != 0) {
01209     vel[3] = (pos[3] - ref[3]) * invdt;
01210   }
01211 
01212   // No virial correction needed, since this is a massless particle
01213   return;
01214 }
01215 
01216 void HomePatch::redistrib_swm4_forces(const int ftag, Tensor *virial) {
01217   // Loop over the patch's atoms and apply the appropriate corrections
01218   // to get all forces off of lone pairs
01219   ForceList *f_mod = f;
01220   for (int i = 0;  i < numAtoms;  i++) {
01221     if (atom[i].mass < 0.01) {
01222       // found lonepair
01223       redistrib_lp_force(f_mod[ftag][i-2], f_mod[ftag][i+1],
01224           f_mod[ftag][i+2], f_mod[ftag][i],
01225           atom[i-2].position, atom[i+1].position,
01226           atom[i+2].position, atom[i].position, virial);
01227     }
01228   }
01229 }
01230 
01231 void HomePatch::redistrib_tip4p_forces(const int ftag, Tensor* virial) {
01232   // Loop over the patch's atoms and apply the appropriate corrections
01233   // to get all forces off of lone pairs
01234   // Atom ordering:  O H1 H2 LP
01235 
01236   ForceList *f_mod =f;
01237   for (int i=0; i<numAtoms; i++) {
01238     if (atom[i].mass < 0.01) {
01239       // found lonepair
01240       redistrib_lp_force(f_mod[ftag][i-3], f_mod[ftag][i-2],
01241           f_mod[ftag][i-1], f_mod[ftag][i],
01242           atom[i-3].position, atom[i-2].position,
01243           atom[i-1].position, atom[i].position, virial);
01244     }
01245   }
01246 }
01247 
01248 
01249 void HomePatch::addForceToMomentum(const BigReal timestep, const int ftag,
01250                                                         const int useSaved)
01251 {
01252   SimParameters *simParams = Node::Object()->simParameters;
01253   const BigReal dt = timestep / TIMEFACTOR;
01254   ForceList *f_use = (useSaved ? f_saved : f);
01255 
01256   if ( simParams->fixedAtomsOn ) {
01257     for ( int i = 0; i < numAtoms; ++i ) {
01258       if ( atom[i].atomFixed || atom[i].mass <= 0. ) atom[i].velocity = 0;
01259       else atom[i].velocity += f_use[ftag][i] * ( dt * namd_reciprocal (atom[i].mass) );
01260     }
01261   } else {
01262     FullAtom *atom_arr  = atom.begin();
01263     const Force    *force_arr = f_use[ftag].const_begin();
01264 #ifdef ARCH_POWERPC
01265 #pragma disjoint (*force_arr, *atom_arr)
01266 #endif
01267     for ( int i = 0; i < numAtoms; ++i ) {
01268       if (atom[i].mass == 0.) continue;
01269       BigReal recip_val = ( atom[i].mass > 0. ? dt * namd_reciprocal( atom[i].mass ) : 0.); 
01270       //printf("Taking reciprocal of mass %f\n", atom[i].mass);
01271       atom_arr[i].velocity.x += force_arr[i].x * recip_val;
01272       atom_arr[i].velocity.y += force_arr[i].y * recip_val;
01273       atom_arr[i].velocity.z += force_arr[i].z * recip_val;
01274     }
01275   }
01276 }
01277 
01278 void HomePatch::addVelocityToPosition(const BigReal timestep)
01279 {
01280   SimParameters *simParams = Node::Object()->simParameters;
01281   const BigReal dt = timestep / TIMEFACTOR;
01282   if ( simParams->fixedAtomsOn ) {
01283     for ( int i = 0; i < numAtoms; ++i ) {
01284       if ( ! atom[i].atomFixed ) atom[i].position += atom[i].velocity * dt;
01285     }
01286   } else {
01287     FullAtom *atom_arr  = atom.begin();
01288     for ( int i = 0; i < numAtoms; ++i ) {
01289       atom_arr[i].position.x  +=  atom_arr[i].velocity.x * dt;
01290       atom_arr[i].position.y  +=  atom_arr[i].velocity.y * dt;
01291       atom_arr[i].position.z  +=  atom_arr[i].velocity.z * dt;
01292     }
01293   }
01294 }
01295 
01296 //  RATTLE algorithm from Allen & Tildesley
01297 int HomePatch::rattle1(const BigReal timestep, Tensor *virial, 
01298     SubmitReduction *ppreduction)
01299 {
01300   Molecule *mol = Node::Object()->molecule;
01301   SimParameters *simParams = Node::Object()->simParameters;
01302   const int fixedAtomsOn = simParams->fixedAtomsOn;
01303   const int useSettle = simParams->useSettle;
01304   const BigReal dt = timestep / TIMEFACTOR;
01305   const BigReal invdt = (dt == 0.) ? 0. : 1.0 / dt; // precalc 1/dt
01306   BigReal tol2 = 2.0 * simParams->rigidTol;
01307   int maxiter = simParams->rigidIter;
01308   int dieOnError = simParams->rigidDie;
01309   int i, iter;
01310   BigReal dsq[10], tmp;
01311   int ial[10], ibl[10];
01312   Vector ref[10];  // reference position
01313   Vector refab[10];  // reference vector
01314   Vector pos[10];  // new position
01315   Vector vel[10];  // new velocity
01316   Vector netdp[10];  // total momentum change from constraint
01317   BigReal rmass[10];  // 1 / mass
01318   int fixed[10];  // is atom fixed?
01319   Tensor wc;  // constraint virial
01320   BigReal idz, zmin;
01321   int nslabs;
01322 
01323   // Initialize the settle algorithm with water parameters
01324   // settle1() assumes all waters are identical,
01325   // and will generate bad results if they are not.
01326   // XXX this will move to Molecule::build_atom_status when that 
01327   // version is debugged
01328   if (!settle1isinitted()) {
01329     for ( int ig = 0; ig < numAtoms; ig += atom[ig].hydrogenGroupSize ) {
01330       // find a water
01331       if (mol->rigid_bond_length(atom[ig].id) > 0) {
01332         int oatm;
01333         if (simParams->watmodel == WAT_SWM4) {
01334           oatm = ig+3;  // skip over Drude and Lonepair
01335           //printf("ig=%d  mass_ig=%g  oatm=%d  mass_oatm=%g\n",
01336           //    ig, atom[ig].mass, oatm, atom[oatm].mass);
01337         }
01338         else {
01339           oatm = ig+1;
01340           // Avoid using the Om site to set this by mistake
01341           if (atom[ig].mass < 0.5 || atom[ig+1].mass < 0.5) {
01342             oatm += 1;
01343           }
01344         }
01345 
01346         // initialize settle water parameters
01347         settle1init(atom[ig].mass, atom[oatm].mass, 
01348                     mol->rigid_bond_length(atom[ig].id), 
01349                     mol->rigid_bond_length(atom[oatm].id));
01350         break; // done with init
01351       }
01352     }
01353   }
01354 
01355   if (ppreduction) {
01356     nslabs = simParams->pressureProfileSlabs;
01357     idz = nslabs/lattice.c().z;
01358     zmin = lattice.origin().z - 0.5*lattice.c().z;
01359   }
01360 
01361   // Size of a hydrogen group for water
01362   int wathgsize = 3;
01363   int watmodel = simParams->watmodel;
01364   if (watmodel == WAT_TIP4) wathgsize = 4;
01365   else if (watmodel == WAT_SWM4) wathgsize = 5;
01366   
01367   for ( int ig = 0; ig < numAtoms; ig += atom[ig].hydrogenGroupSize ) {
01368     int hgs = atom[ig].hydrogenGroupSize;
01369     if ( hgs == 1 ) continue;  // only one atom in group
01370     // cache data in local arrays and integrate positions normally
01371     for ( i = 0; i < hgs; ++i ) {
01372       ref[i] = atom[ig+i].position;
01373       pos[i] = atom[ig+i].position;
01374       vel[i] = atom[ig+i].velocity;
01375       rmass[i] = (atom[ig+1].mass > 0. ? 1. / atom[ig+i].mass : 0.);
01376       //printf("rmass of %i is %f\n", ig+i, rmass[i]);
01377       fixed[i] = ( fixedAtomsOn && atom[ig+i].atomFixed );
01378       // undo addVelocityToPosition to get proper reference coordinates
01379       if ( fixed[i] ) rmass[i] = 0.; else pos[i] += vel[i] * dt;
01380     }
01381     int icnt = 0;
01382     if ( ( tmp = mol->rigid_bond_length(atom[ig].id) ) > 0 ) {  // for water
01383       if (hgs != wathgsize) {
01384         NAMD_bug("Hydrogen group error caught in rattle1().");
01385       }
01386       // Use SETTLE for water unless some of the water atoms are fixed,
01387       // for speed we test groupFixed rather than the individual atoms
01388       if (useSettle && !atom[ig].groupFixed) {
01389         if (simParams->watmodel == WAT_SWM4) {
01390           // SWM4 ordering:  O D LP H1 H2
01391           // do swap(O,LP) and call settle with subarray O H1 H2
01392           // swap back after we return
01393           Vector lp_ref = ref[2];
01394           Vector lp_pos = pos[2];
01395           Vector lp_vel = vel[2];
01396           ref[2] = ref[0];
01397           pos[2] = pos[0];
01398           vel[2] = vel[0];
01399           settle1(ref+2, pos+2, vel+2, invdt);
01400           ref[0] = ref[2];
01401           pos[0] = pos[2];
01402           vel[0] = vel[2];
01403           ref[2] = lp_ref;
01404           pos[2] = lp_pos;
01405           vel[2] = lp_vel;
01406           // determine for LP updated pos and vel
01407           swm4_omrepos(ref, pos, vel, invdt);
01408         }
01409         else {
01410           settle1(ref, pos, vel, invdt);
01411           if (simParams->watmodel == WAT_TIP4) {
01412             tip4_omrepos(ref, pos, vel, invdt);
01413           }
01414         }
01415 
01416         // which slab the hydrogen group will belong to
01417         // for pprofile calculations.
01418         int ppoffset, partition;
01419         if ( invdt == 0 ) for ( i = 0; i < wathgsize; ++i ) {
01420           atom[ig+i].position = pos[i];
01421         } else if ( virial == 0 ) for ( i = 0; i < wathgsize; ++i ) {
01422           atom[ig+i].velocity = vel[i];
01423         } else for ( i = 0; i < wathgsize; ++i ) {
01424           Force df = (vel[i] - atom[ig+i].velocity) * ( atom[ig+i].mass * invdt );
01425           Tensor vir = outer(df, ref[i]);
01426           wc += vir;
01427           f[Results::normal][ig+i] += df;
01428           atom[ig+i].velocity = vel[i];
01429           if (ppreduction) {
01430             // put all the atoms from a water in the same slab.  Atom 0
01431             // should be the parent atom.
01432             if (!i) {
01433               BigReal z = pos[i].z;
01434               partition = atom[ig].partition;
01435               int slab = (int)floor((z-zmin)*idz);
01436               if (slab < 0) slab += nslabs;
01437               else if (slab >= nslabs) slab -= nslabs;
01438               ppoffset = 3*(slab + nslabs*partition);
01439             }
01440             ppreduction->item(ppoffset  ) += vir.xx;
01441             ppreduction->item(ppoffset+1) += vir.yy;
01442             ppreduction->item(ppoffset+2) += vir.zz;
01443           }
01444         }
01445         continue;
01446       }
01447       if ( !(fixed[1] && fixed[2]) ) {
01448         dsq[icnt] = tmp * tmp;  ial[icnt] = 1;  ibl[icnt] = 2;  ++icnt;
01449       }
01450     }
01451     for ( i = 1; i < hgs; ++i ) {  // normal bonds to mother atom
01452       if ( ( tmp = mol->rigid_bond_length(atom[ig+i].id) ) > 0 ) {
01453         if ( !(fixed[0] && fixed[i]) ) {
01454           dsq[icnt] = tmp * tmp;  ial[icnt] = 0;  ibl[icnt] = i;  ++icnt;
01455         }
01456       }
01457     }
01458     if ( icnt == 0 ) continue;  // no constraints
01459     for ( i = 0; i < icnt; ++i ) {
01460       refab[i] = ref[ial[i]] - ref[ibl[i]];
01461     }
01462     for ( i = 0; i < hgs; ++i ) {
01463       netdp[i] = 0.;
01464     }
01465     int done;
01466     int consFailure;
01467     for ( iter = 0; iter < maxiter; ++iter ) {
01468 //if (iter > 0) CkPrintf("iteration %d\n", iter);
01469       done = 1;
01470       consFailure = 0;
01471       for ( i = 0; i < icnt; ++i ) {
01472         int a = ial[i];  int b = ibl[i];
01473         Vector pab = pos[a] - pos[b];
01474         BigReal pabsq = pab.x*pab.x + pab.y*pab.y + pab.z*pab.z;
01475         BigReal rabsq = dsq[i];
01476         BigReal diffsq = rabsq - pabsq;
01477         if ( fabs(diffsq) > (rabsq * tol2) ) {
01478           Vector &rab = refab[i];
01479           BigReal rpab = rab.x*pab.x + rab.y*pab.y + rab.z*pab.z;
01480           if ( rpab < ( rabsq * 1.0e-6 ) ) {
01481             done = 0;
01482             consFailure = 1;
01483             continue;
01484           }
01485           BigReal rma = rmass[a];
01486           BigReal rmb = rmass[b];
01487           BigReal gab = diffsq / ( 2.0 * ( rma + rmb ) * rpab );
01488           Vector dp = rab * gab;
01489           pos[a] += rma * dp;
01490           pos[b] -= rmb * dp;
01491           if ( invdt != 0. ) {
01492             dp *= invdt;
01493             netdp[a] += dp;
01494             netdp[b] -= dp;
01495           }
01496           done = 0;
01497         }
01498       }
01499       if ( done ) break;
01500     }
01501 
01502     if ( consFailure ) {
01503       if ( dieOnError ) {
01504         iout << iERROR << "Constraint failure in RATTLE algorithm for atom "
01505                         << (atom[ig].id + 1) << "!\n" << endi;
01506         return -1;  // triggers early exit
01507       } else {
01508         iout << iWARN << "Constraint failure in RATTLE algorithm for atom "
01509                         << (atom[ig].id + 1) << "!\n" << endi;
01510       }
01511     } else if ( ! done ) {
01512       if ( dieOnError ) {
01513         iout << iERROR << "Exceeded RATTLE iteration limit for atom "
01514                         << (atom[ig].id + 1) << "!\n" << endi;
01515         return -1;  // triggers early exit
01516       } else {
01517         iout << iWARN << "Exceeded RATTLE iteration limit for atom "
01518                         << (atom[ig].id + 1) << "!\n" << endi;
01519       }
01520     }
01521 
01522     // store data back to patch
01523     int ppoffset, partition;
01524     if ( invdt == 0 ) for ( i = 0; i < hgs; ++i ) {
01525       atom[ig+i].position = pos[i];
01526     } else if ( virial == 0 ) for ( i = 0; i < hgs; ++i ) {
01527       atom[ig+i].velocity = vel[i] + rmass[i] * netdp[i];
01528     } else for ( i = 0; i < hgs; ++i ) {
01529       Force df = netdp[i] * invdt;
01530       Tensor vir = outer(df, ref[i]);
01531       wc += vir;
01532       f[Results::normal][ig+i] += df;
01533       atom[ig+i].velocity = vel[i] + rmass[i] * netdp[i];
01534       if (ppreduction) {
01535         if (!i) {
01536           BigReal z = pos[i].z;
01537           int partition = atom[ig].partition;
01538           int slab = (int)floor((z-zmin)*idz);
01539           if (slab < 0) slab += nslabs;
01540           else if (slab >= nslabs) slab -= nslabs;
01541           ppoffset = 3*(slab + nslabs*partition);
01542         }
01543         ppreduction->item(ppoffset  ) += vir.xx;
01544         ppreduction->item(ppoffset+1) += vir.yy;
01545         ppreduction->item(ppoffset+2) += vir.zz;
01546       }
01547     }
01548   }
01549   if ( dt && virial ) *virial += wc;
01550 
01551   return 0;
01552 }
01553 
01554 //  RATTLE algorithm from Allen & Tildesley
01555 void HomePatch::rattle2(const BigReal timestep, Tensor *virial)
01556 {
01557   Molecule *mol = Node::Object()->molecule;
01558   SimParameters *simParams = Node::Object()->simParameters;
01559   const int fixedAtomsOn = simParams->fixedAtomsOn;
01560   const int useSettle = simParams->useSettle;
01561   const BigReal dt = timestep / TIMEFACTOR;
01562   Tensor wc;  // constraint virial
01563   BigReal tol = simParams->rigidTol;
01564   int maxiter = simParams->rigidIter;
01565   int dieOnError = simParams->rigidDie;
01566   int i, iter;
01567   BigReal dsqi[10], tmp;
01568   int ial[10], ibl[10];
01569   Vector ref[10];  // reference position
01570   Vector refab[10];  // reference vector
01571   Vector vel[10];  // new velocity
01572   BigReal rmass[10];  // 1 / mass
01573   BigReal redmass[10];  // reduced mass
01574   int fixed[10];  // is atom fixed?
01575   
01576   // Size of a hydrogen group for water
01577   int wathgsize = 3;
01578   if (simParams->watmodel == WAT_TIP4) wathgsize = 4;
01579   else if (simParams->watmodel == WAT_SWM4) wathgsize = 5;
01580 
01581   //  CkPrintf("In rattle2!\n");
01582   for ( int ig = 0; ig < numAtoms; ig += atom[ig].hydrogenGroupSize ) {
01583     //    CkPrintf("ig=%d\n",ig);
01584     int hgs = atom[ig].hydrogenGroupSize;
01585     if ( hgs == 1 ) continue;  // only one atom in group
01586     // cache data in lo