Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

ComputePmeMgr Class Reference

Inheritance diagram for ComputePmeMgr:

BOCclass List of all members.

Public Member Functions

 ComputePmeMgr ()
 ~ComputePmeMgr ()
void initialize (CkQdMsg *)
void initialize_pencils (CkQdMsg *)
void activate_pencils (CkQdMsg *)
void recvArrays (CProxy_PmeXPencil, CProxy_PmeYPencil, CProxy_PmeZPencil)
void initialize_computes ()
void sendData (Lattice &, int sequence)
void sendPencils (Lattice &, int sequence)
void recvGrid (PmeGridMsg *)
void gridCalc1 (void)
void sendTransBarrier (void)
void sendTrans (void)
void fwdSharedTrans (PmeTransMsg *)
void recvSharedTrans (PmeSharedTransMsg *)
void recvTrans (PmeTransMsg *)
void procTrans (PmeTransMsg *)
void gridCalc2 (void)
void gridCalc2R (void)
void fwdSharedUntrans (PmeUntransMsg *)
void recvSharedUntrans (PmeSharedUntransMsg *)
void sendUntrans (void)
void recvUntrans (PmeUntransMsg *)
void procUntrans (PmeUntransMsg *)
void gridCalc3 (void)
void sendUngrid (void)
void recvUngrid (PmeGridMsg *)
void recvAck (PmeAckMsg *)
void copyResults (PmeGridMsg *)
void copyPencils (PmeGridMsg *)
void ungridCalc (void)
void submitReductions ()
int isPmeProcessor (int p)

Static Public Attributes

CmiNodeLock fftw_plan_lock

Friends

class ComputePme
class NodePmeMgr

Constructor & Destructor Documentation

ComputePmeMgr::ComputePmeMgr  ) 
 

Definition at line 524 of file ComputePme.C.

References fftw_plan_lock.

00524                              : pmeProxy(thisgroup), 
00525                                  pmeProxyDir(thisgroup) {
00526 
00527   CkpvAccess(BOCclass_group).computePmeMgr = thisgroup;
00528   pmeNodeProxy = CkpvAccess(BOCclass_group).nodePmeMgr;
00529 
00530   pmeNodeProxy.ckLocalBranch()->initialize();
00531 
00532   if ( CmiMyRank() == 0 ) {
00533     fftw_plan_lock = CmiCreateLock();
00534   }
00535 
00536   myKSpace = 0;
00537   kgrid = 0;
00538   work = 0;
00539   grid_count = 0;
00540   trans_count = 0;
00541   untrans_count = 0;
00542   ungrid_count = 0;
00543   gridmsg_reuse= new PmeGridMsg*[CkNumPes()];
00544   useBarrier = 0;
00545   sendTransBarrier_received = 0;
00546   usePencils = 0;
00547 }

ComputePmeMgr::~ComputePmeMgr  ) 
 

Definition at line 1532 of file ComputePme.C.

References fftw_plan_lock.

01532                               {
01533 
01534   if ( CmiMyRank() == 0 ) {
01535     CmiDestroyLock(fftw_plan_lock);
01536   }
01537 
01538   delete myKSpace;
01539   delete [] localInfo;
01540   delete [] gridNodeInfo;
01541   delete [] transNodeInfo;
01542   delete [] gridPeMap;
01543   delete [] transPeMap;
01544   delete [] recipPeDest;
01545   delete [] gridPeOrder;
01546   delete [] gridNodeOrder;
01547   delete [] transNodeOrder;
01548   delete [] isPmeFlag;
01549   delete [] qgrid;
01550   if ( kgrid != qgrid ) delete [] kgrid;
01551   delete [] work;
01552   delete [] gridmsg_reuse;
01553 
01554   for (int i=0; i<fsize*numGrids; ++i) {
01555     if ( q_arr[i] ) {
01556       delete [] q_arr[i];
01557     }
01558   }
01559   delete [] q_arr;
01560   delete [] q_list;
01561   delete [] f_arr;
01562   delete [] fz_arr;
01563 }


Member Function Documentation

void ComputePmeMgr::activate_pencils CkQdMsg *   ) 
 

Definition at line 1526 of file ComputePme.C.

01526                                                  {
01527   if ( ! usePencils ) return;
01528   if ( CkMyPe() == 0 ) zPencil.dummyRecvGrid(CkMyPe(),1);
01529 }

void ComputePmeMgr::copyPencils PmeGridMsg  ) 
 

Definition at line 2724 of file ComputePme.C.

References PmeGrid::block1, PmeGrid::block2, PmeGrid::dim2, PmeGrid::dim3, PmeGridMsg::evir, j, PmeGrid::K1, PmeGrid::K2, PmeGridMsg::qgrid, PmeGridMsg::sourceNode, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by recvUngrid().

02724                                                {
02725 
02726   int K1 = myGrid.K1;
02727   int K2 = myGrid.K2;
02728   int dim2 = myGrid.dim2;
02729   int dim3 = myGrid.dim3;
02730   int block1 = myGrid.block1;
02731   int block2 = myGrid.block2;
02732 
02733   // msg->sourceNode = thisIndex.x * initdata.yBlocks + thisIndex.y;
02734   int ib = msg->sourceNode / yBlocks;
02735   int jb = msg->sourceNode % yBlocks;
02736 
02737   int ibegin = ib*block1;
02738   int iend = ibegin + block1;  if ( iend > K1 ) iend = K1;
02739   int jbegin = jb*block2;
02740   int jend = jbegin + block2;  if ( jend > K2 ) jend = K2;
02741 
02742   int zlistlen = msg->zlistlen;
02743   int *zlist = msg->zlist;
02744   float *qmsg = msg->qgrid;
02745   int g;
02746   for ( g=0; g<numGrids; ++g ) {
02747     evir[g] += msg->evir[g];
02748     char *f = f_arr + g*fsize;
02749     float **q = q_arr + g*fsize;
02750     for ( int i=ibegin; i<iend; ++i ) {
02751      for ( int j=jbegin; j<jend; ++j ) {
02752       if( f[i*dim2+j] ) {
02753         f[i*dim2+j] = 0;
02754         for ( int k=0; k<zlistlen; ++k ) {
02755           q[i*dim2+j][zlist[k]] = *(qmsg++);
02756         }
02757       }
02758      }
02759     }
02760   }
02761 }

void ComputePmeMgr::copyResults PmeGridMsg  ) 
 

Definition at line 2861 of file ComputePme.C.

References PmeGrid::dim3, PmeGridMsg::evir, PmeGridMsg::fgrid, PmeGridMsg::len, PmeGridMsg::qgrid, PmeGridMsg::start, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by recvUngrid().

02861                                                {
02862 
02863   int zdim = myGrid.dim3;
02864   int flen = msg->len;
02865   int fstart = msg->start;
02866   int zlistlen = msg->zlistlen;
02867   int *zlist = msg->zlist;
02868   float *qmsg = msg->qgrid;
02869   int g;
02870   for ( g=0; g<numGrids; ++g ) {
02871     evir[g] += msg->evir[g];
02872     char *f = msg->fgrid + g*flen;
02873     float **q = q_arr + fstart + g*fsize;
02874     for ( int i=0; i<flen; ++i ) {
02875       if ( f[i] ) {
02876         f[i] = 0;
02877         for ( int k=0; k<zlistlen; ++k ) {
02878           q[i][zlist[k]] = *(qmsg++);
02879         }
02880       }
02881     }
02882   }
02883 }

void ComputePmeMgr::fwdSharedTrans PmeTransMsg  ) 
 

Definition at line 1734 of file ComputePme.C.

References PmeSharedTransMsg::count, PmeSharedTransMsg::lock, PmeSharedTransMsg::msg, NodePmeInfo::npe, NodePmeInfo::pe_start, PME_TRANS_PRIORITY, PmeTransMsg::sequence, and SET_PRIORITY.

Referenced by NodePmeMgr::recvTrans(), and sendTrans().

01734                                                    {
01735   // CkPrintf("fwdSharedTrans on Pe(%d)\n",CkMyPe());
01736   int pe = transNodeInfo[myTransNode].pe_start;
01737   int npe = transNodeInfo[myTransNode].npe;
01738   CmiNodeLock lock = CmiCreateLock();
01739   int *count = new int; *count = npe;
01740   for (int i=0; i<npe; ++i, ++pe) {
01741     PmeSharedTransMsg *shmsg = new (PRIORITY_SIZE) PmeSharedTransMsg;
01742     SET_PRIORITY(shmsg,msg->sequence,PME_TRANS_PRIORITY)
01743     shmsg->msg = msg;
01744     shmsg->count = count;
01745     shmsg->lock = lock;
01746     pmeProxy[transPeMap[pe]].recvSharedTrans(shmsg);
01747   }
01748 }

void ComputePmeMgr::fwdSharedUntrans PmeUntransMsg  ) 
 

Definition at line 1957 of file ComputePme.C.

References PmeSharedUntransMsg::count, PmeSharedUntransMsg::lock, PmeSharedUntransMsg::msg, NodePmeInfo::npe, and NodePmeInfo::pe_start.

Referenced by NodePmeMgr::recvUntrans(), and sendUntrans().

01957                                                        {
01958   int pe = gridNodeInfo[myGridNode].pe_start;
01959   int npe = gridNodeInfo[myGridNode].npe;
01960   CmiNodeLock lock = CmiCreateLock();
01961   int *count = new int; *count = npe;
01962   for (int i=0; i<npe; ++i, ++pe) {
01963     PmeSharedUntransMsg *shmsg = new PmeSharedUntransMsg;
01964     shmsg->msg = msg;
01965     shmsg->count = count;
01966     shmsg->lock = lock;
01967     pmeProxy[gridPeMap[pe]].recvSharedUntrans(shmsg);
01968   }
01969 }

void ComputePmeMgr::gridCalc1 void   ) 
 

Definition at line 1644 of file ComputePme.C.

References PmeGrid::dim2, and PmeGrid::dim3.

01644                                   {
01645   // CkPrintf("gridCalc1 on Pe(%d)\n",CkMyPe());
01646 
01647 #ifdef NAMD_FFTW
01648   for ( int g=0; g<numGrids; ++g ) {
01649 #ifdef NAMD_FFTW_3
01650     fftwf_execute(forward_plan_yz[g]);
01651 #else
01652     rfftwnd_real_to_complex(forward_plan_yz, localInfo[myGridPe].nx,
01653         qgrid + qgrid_size * g, 1, myGrid.dim2 * myGrid.dim3, 0, 0, 0);
01654 #endif
01655 
01656   }
01657 #endif
01658 
01659   if ( ! useBarrier ) pmeProxyDir[CkMyPe()].sendTrans();
01660 }

void ComputePmeMgr::gridCalc2 void   ) 
 

Definition at line 1802 of file ComputePme.C.

References PmeGrid::dim3, gridCalc2R(), LocalPmeInfo::ny_after_transpose, and simParams.

01802                                   {
01803   // CkPrintf("gridCalc2 on Pe(%d)\n",CkMyPe());
01804 
01805 #if CMK_BLUEGENEL
01806   CmiNetworkProgressAfter (0);
01807 #endif
01808 
01809   int zdim = myGrid.dim3;
01810   // int y_start = localInfo[myTransPe].y_start_after_transpose;
01811   int ny = localInfo[myTransPe].ny_after_transpose;
01812 
01813   for ( int g=0; g<numGrids; ++g ) {
01814     // finish forward FFT (x dimension)
01815 #ifdef NAMD_FFTW
01816 #ifdef NAMD_FFTW_3
01817     fftwf_execute(forward_plan_x[g]);
01818 #else
01819     fftw(forward_plan_x, ny * zdim / 2, (fftw_complex *)(kgrid+qgrid_size*g),
01820         ny * zdim / 2, 1, work, 1, 0);
01821 #endif
01822 #endif
01823   }
01824 
01825 #ifdef OPENATOM_VERSION
01826     if ( ! simParams -> openatomOn ) { 
01827 #endif // OPENATOM_VERSION
01828       gridCalc2R();
01829 #ifdef OPENATOM_VERSION
01830     } else {
01831       gridCalc2Moa();
01832     }
01833 #endif // OPENATOM_VERSION
01834 }

void ComputePmeMgr::gridCalc2R void   ) 
 

Definition at line 1862 of file ComputePme.C.

References BigReal, PmeKSpace::compute_energy(), PmeGrid::dim3, and LocalPmeInfo::ny_after_transpose.

Referenced by gridCalc2().

01862                                    {
01863 
01864   int zdim = myGrid.dim3;
01865   // int y_start = localInfo[myTransPe].y_start_after_transpose;
01866   int ny = localInfo[myTransPe].ny_after_transpose;
01867 
01868   for ( int g=0; g<numGrids; ++g ) {
01869     // reciprocal space portion of PME
01870     BigReal ewaldcof = ComputeNonbondedUtil::ewaldcof;
01871     recip_evir2[g][0] = myKSpace->compute_energy(kgrid+qgrid_size*g,
01872                         lattice, ewaldcof, &(recip_evir2[g][1]));
01873     // CkPrintf("Ewald reciprocal energy = %f\n", recip_evir2[g][0]);
01874 
01875     // start backward FFT (x dimension)
01876 
01877 #ifdef NAMD_FFTW
01878 #ifdef NAMD_FFTW_3
01879     fftwf_execute(backward_plan_x[g]);
01880 #else
01881     fftw(backward_plan_x, ny * zdim / 2, (fftw_complex *)(kgrid+qgrid_size*g),
01882         ny * zdim / 2, 1, work, 1, 0);
01883 #endif
01884 #endif
01885   }
01886   
01887   pmeProxyDir[CkMyPe()].sendUntrans();
01888 }

void ComputePmeMgr::gridCalc3 void   ) 
 

Definition at line 2039 of file ComputePme.C.

References PmeGrid::dim2, and PmeGrid::dim3.

02039                                   {
02040   // CkPrintf("gridCalc3 on Pe(%d)\n",CkMyPe());
02041 
02042   // finish backward FFT
02043 #ifdef NAMD_FFTW
02044 
02045   for ( int g=0; g<numGrids; ++g ) {
02046 #ifdef NAMD_FFTW_3
02047     fftwf_execute(backward_plan_yz[g]);
02048 #else
02049     rfftwnd_complex_to_real(backward_plan_yz, localInfo[myGridPe].nx,
02050         (fftw_complex *) (qgrid + qgrid_size * g),
02051         1, myGrid.dim2 * myGrid.dim3 / 2, 0, 0, 0);
02052 #endif
02053   }
02054 
02055 #endif
02056 
02057   pmeProxyDir[CkMyPe()].sendUngrid();
02058 }

void ComputePmeMgr::initialize CkQdMsg *   ) 
 

Definition at line 612 of file ComputePme.C.

References Lattice::a(), Lattice::a_r(), ResizeArray< Elem >::add(), SimParameters::alchDecouple, SimParameters::alchElecLambdaStart, SimParameters::alchFepOn, SimParameters::alchThermIntOn, ResizeArray< Elem >::begin(), BigReal, PmeGrid::block1, PmeGrid::block2, PmeGrid::block3, CmiPeOnSamePhysicalNode, SimParameters::cutoff, PmeGrid::dim2, PmeGrid::dim3, ResizeArray< Elem >::end(), fftw_plan_lock, SimParameters::FFTWEstimate, SimParameters::FFTWPatient, generatePmePeList2(), PmePencilInitMsgData::grid, iINFO(), ResizeArray< Elem >::insert(), iout, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, SimParameters::lattice, SimParameters::lesFactor, SimParameters::lesOn, PatchMap::max_a(), PatchMap::min_a(), NAMD_bug(), NAMD_die(), PatchMap::node(), NodePmeInfo::npe, PatchMap::numNodesWithPatches(), PatchMap::numPatches(), PatchMap::numPatchesOnNode(), LocalPmeInfo::nx, LocalPmeInfo::ny_after_transpose, PatchMap::Object(), Node::Object(), PmeGrid::order, SimParameters::pairInteractionOn, SimParameters::pairInteractionSelf, SimParameters::patchDimension, NodePmeInfo::pe_start, pencilPMEProcessors, SimParameters::PMEBarrier, SimParameters::PMEGridSizeX, SimParameters::PMEGridSizeY, SimParameters::PMEGridSizeZ, SimParameters::PMEInterpOrder, SimParameters::PMEMinPoints, SimParameters::PMEMinSlices, PmePencilInitMsgData::pmeNodeProxy, SimParameters::PMEPencils, SimParameters::PMEPencilsX, SimParameters::PMEPencilsXLayout, SimParameters::PMEPencilsY, SimParameters::PMEPencilsYLayout, SimParameters::PMEPencilsZ, SimParameters::PMEProcessors, PmePencilInitMsgData::pmeProxy, NodePmeInfo::real_node, Random::reorder(), ResizeArray< Elem >::resize(), Node::simParameters, simParams, ResizeArray< Elem >::size(), SortableResizeArray< Elem >::sort(), WorkDistrib::sortPmePes(), Vector::unit(), LocalPmeInfo::x_start, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::xm, PmePencilInitMsgData::xPencil, LocalPmeInfo::y_start_after_transpose, PmePencilInitMsgData::yBlocks, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, PmePencilInitMsgData::zBlocks, PmePencilInitMsgData::zm, and PmePencilInitMsgData::zPencil.

00612                                            {
00613   delete msg;
00614 
00615   localInfo = new LocalPmeInfo[CkNumPes()];
00616   gridNodeInfo = new NodePmeInfo[CkNumNodes()];
00617   transNodeInfo = new NodePmeInfo[CkNumNodes()];
00618   gridPeMap = new int[CkNumPes()];
00619   transPeMap = new int[CkNumPes()];
00620   recipPeDest = new int[CkNumPes()];
00621   gridPeOrder = new int[CkNumPes()];
00622   gridNodeOrder = new int[CkNumNodes()];
00623   transNodeOrder = new int[CkNumNodes()];
00624   isPmeFlag = new char[CkNumPes()];  
00625 
00626   SimParameters *simParams = Node::Object()->simParameters;
00627   PatchMap *patchMap = PatchMap::Object();
00628 
00629   alchFepOn = simParams->alchFepOn;
00630   alchThermIntOn = simParams->alchThermIntOn;
00631   alchDecouple = (alchFepOn || alchThermIntOn) && (simParams->alchDecouple);
00632   alchElecLambdaStart = (alchFepOn || alchThermIntOn) ? 
00633     simParams->alchElecLambdaStart : 0;
00634   if (alchFepOn || alchThermIntOn) {
00635     numGrids = 2;
00636     if (alchDecouple) numGrids += 2;
00637     if (alchElecLambdaStart || alchThermIntOn) numGrids ++;
00638   }
00639   else numGrids = 1;
00640   lesOn = simParams->lesOn;
00641   useBarrier = simParams->PMEBarrier;
00642   if ( lesOn ) {
00643     lesFactor = simParams->lesFactor;
00644     numGrids = lesFactor;
00645   }
00646   selfOn = 0;
00647   pairOn = simParams->pairInteractionOn;
00648   if ( pairOn ) {
00649     selfOn = simParams->pairInteractionSelf;
00650     if ( selfOn ) pairOn = 0;  // make pairOn and selfOn exclusive
00651     numGrids = selfOn ? 1 : 3;
00652   }
00653 
00654   if ( numGrids != 1 || simParams->PMEPencils == 0 ) usePencils = 0;
00655   else if ( simParams->PMEPencils > 0 ) usePencils = 1;
00656   else {
00657     int nrps = simParams->PMEProcessors;
00658     if ( nrps <= 0 ) nrps = CkNumPes();
00659     if ( nrps > CkNumPes() ) nrps = CkNumPes();
00660     int dimx = simParams->PMEGridSizeX;
00661     int dimy = simParams->PMEGridSizeY;
00662     int maxslabs = 1 + (dimx - 1) / simParams->PMEMinSlices;
00663     if ( maxslabs > nrps ) maxslabs = nrps;
00664     int maxpencils = ( simParams->PMEGridSizeX * simParams->PMEGridSizeY
00665                 * simParams->PMEGridSizeZ ) / simParams->PMEMinPoints;
00666     if ( maxpencils > nrps ) maxpencils = nrps;
00667     if ( maxpencils > 3 * maxslabs ) usePencils = 1;
00668     else usePencils = 0;
00669   }
00670 
00671   if ( usePencils ) {
00672     int nrps = simParams->PMEProcessors;
00673     if ( nrps <= 0 ) nrps = CkNumPes();
00674     if ( nrps > CkNumPes() ) nrps = CkNumPes();
00675     if ( simParams->PMEPencils > 1 &&
00676          simParams->PMEPencils * simParams->PMEPencils <= nrps ) {
00677       xBlocks = yBlocks = zBlocks = simParams->PMEPencils;
00678     } else {
00679       int nb2 = ( simParams->PMEGridSizeX * simParams->PMEGridSizeY
00680                 * simParams->PMEGridSizeZ ) / simParams->PMEMinPoints;
00681       if ( nb2 > nrps ) nb2 = nrps;
00682       if ( nb2 < 1 ) nb2 = 1;
00683       int nb = (int) sqrt((float)nb2);
00684       if ( nb < 1 ) nb = 1;
00685       xBlocks = zBlocks = nb;
00686       yBlocks = nb2 / nb;
00687     }
00688 
00689     if ( simParams->PMEPencilsX > 0 ) xBlocks = simParams->PMEPencilsX;
00690     if ( simParams->PMEPencilsY > 0 ) yBlocks = simParams->PMEPencilsY;
00691     if ( simParams->PMEPencilsZ > 0 ) zBlocks = simParams->PMEPencilsZ;
00692 
00693     int dimx = simParams->PMEGridSizeX;
00694     int bx = 1 + ( dimx - 1 ) / xBlocks;
00695     xBlocks = 1 + ( dimx - 1 ) / bx;
00696 
00697     int dimy = simParams->PMEGridSizeY;
00698     int by = 1 + ( dimy - 1 ) / yBlocks;
00699     yBlocks = 1 + ( dimy - 1 ) / by;
00700 
00701     int dimz = simParams->PMEGridSizeZ / 2 + 1;  // complex
00702     int bz = 1 + ( dimz - 1 ) / zBlocks;
00703     zBlocks = 1 + ( dimz - 1 ) / bz;
00704 
00705     if ( xBlocks * yBlocks > CkNumPes() ) {
00706       NAMD_die("PME pencils xBlocks * yBlocks > numPes");
00707     }
00708     if ( xBlocks * zBlocks > CkNumPes() ) {
00709       NAMD_die("PME pencils xBlocks * zBlocks > numPes");
00710     }
00711     if ( yBlocks * zBlocks > CkNumPes() ) {
00712       NAMD_die("PME pencils yBlocks * zBlocks > numPes");
00713     }
00714 
00715     if ( ! CkMyPe() ) {
00716       iout << iINFO << "PME using " << xBlocks << " x " <<
00717         yBlocks << " x " << zBlocks <<
00718         " pencil grid for FFT and reciprocal sum.\n" << endi;
00719     }
00720   } else { // usePencils
00721 
00722   {  // decide how many pes to use for reciprocal sum
00723 
00724     // rules based on work available
00725     int minslices = simParams->PMEMinSlices;
00726     int dimx = simParams->PMEGridSizeX;
00727     int nrpx = ( dimx + minslices - 1 ) / minslices;
00728     int dimy = simParams->PMEGridSizeY;
00729     int nrpy = ( dimy + minslices - 1 ) / minslices;
00730 
00731     // rules based on processors available
00732     int nrpp = CkNumPes();
00733     // if ( nrpp > 32 ) nrpp = 32;  // cap to limit messages
00734     if ( nrpp < nrpx ) nrpx = nrpp;
00735     if ( nrpp < nrpy ) nrpy = nrpp;
00736 
00737     // user override
00738     int nrps = simParams->PMEProcessors;
00739     if ( nrps > CkNumPes() ) nrps = CkNumPes();
00740     if ( nrps > 0 ) nrpx = nrps;
00741     if ( nrps > 0 ) nrpy = nrps;
00742 
00743     // make sure there aren't any totally empty processors
00744     int bx = ( dimx + nrpx - 1 ) / nrpx;
00745     nrpx = ( dimx + bx - 1 ) / bx;
00746     int by = ( dimy + nrpy - 1 ) / nrpy;
00747     nrpy = ( dimy + by - 1 ) / by;
00748     if ( bx != ( dimx + nrpx - 1 ) / nrpx )
00749       NAMD_bug("Error in selecting number of PME processors.");
00750     if ( by != ( dimy + nrpy - 1 ) / nrpy )
00751       NAMD_bug("Error in selecting number of PME processors.");
00752 
00753     numGridPes = nrpx;
00754     numTransPes = nrpy;
00755   }
00756   if ( ! CkMyPe() ) {
00757     iout << iINFO << "PME using " << numGridPes << " and " << numTransPes <<
00758       " processors for FFT and reciprocal sum.\n" << endi;
00759   }
00760 
00761   int sum_npes = numTransPes + numGridPes;
00762   int max_npes = (numTransPes > numGridPes)?numTransPes:numGridPes;
00763 
00764 #if 0 // USE_TOPOMAP
00765   /* This code is being disabled permanently for slab PME on Blue Gene machines */
00766   PatchMap * pmap = PatchMap::Object();
00767   
00768   int patch_pes = pmap->numNodesWithPatches();
00769   TopoManager tmgr;
00770   if(tmgr.hasMultipleProcsPerNode())
00771     patch_pes *= 2;
00772 
00773   bool done = false;
00774   if(CkNumPes() > 2*sum_npes + patch_pes) {    
00775     done = generateBGLORBPmePeList(transPeMap, numTransPes);
00776     done &= generateBGLORBPmePeList(gridPeMap, numGridPes, transPeMap, numTransPes);    
00777   }
00778   else 
00779     if(CkNumPes() > 2 *max_npes + patch_pes) {
00780       done = generateBGLORBPmePeList(transPeMap, max_npes);
00781       gridPeMap = transPeMap;
00782     }
00783 
00784   if (!done)
00785 #endif
00786     {
00787       //generatePmePeList(transPeMap, max_npes);
00788       //gridPeMap = transPeMap;
00789       generatePmePeList2(gridPeMap, numGridPes, transPeMap, numTransPes);
00790     }
00791   
00792   if ( ! CkMyPe() ) {
00793     iout << iINFO << "PME GRID LOCATIONS:";
00794     int i;
00795     for ( i=0; i<numGridPes && i<10; ++i ) {
00796       iout << " " << gridPeMap[i];
00797     }
00798     if ( i < numGridPes ) iout << " ...";
00799     iout << "\n" << endi;
00800     iout << iINFO << "PME TRANS LOCATIONS:";
00801     for ( i=0; i<numTransPes && i<10; ++i ) {
00802       iout << " " << transPeMap[i];
00803     }
00804     if ( i < numTransPes ) iout << " ...";
00805     iout << "\n" << endi;
00806   }
00807 
00808   // sort based on nodes and physical nodes
00809   std::sort(gridPeMap,gridPeMap+numGridPes,WorkDistrib::pe_sortop_compact());
00810 
00811   myGridPe = -1;
00812   myGridNode = -1;
00813   int i = 0;
00814   for ( i=0; i<CkNumPes(); ++i )
00815     isPmeFlag[i] = 0;
00816   int node = -1;
00817   int real_node = -1;
00818   for ( i=0; i<numGridPes; ++i ) {
00819     if ( gridPeMap[i] == CkMyPe() ) myGridPe = i;
00820     isPmeFlag[gridPeMap[i]] |= 1;
00821     int real_node_i = CkNodeOf(gridPeMap[i]);
00822     if ( real_node_i == real_node ) {
00823       gridNodeInfo[node].npe += 1;
00824     } else {
00825       real_node = real_node_i;
00826       ++node;
00827       gridNodeInfo[node].real_node = real_node;
00828       gridNodeInfo[node].pe_start = i;
00829       gridNodeInfo[node].npe = 1;
00830     }
00831     if ( CkMyNode() == real_node_i ) myGridNode = node;
00832   }
00833   numGridNodes = node + 1;
00834   myTransPe = -1;
00835   myTransNode = -1;
00836   node = -1;
00837   real_node = -1;
00838   for ( i=0; i<numTransPes; ++i ) {
00839     if ( transPeMap[i] == CkMyPe() ) myTransPe = i;
00840     isPmeFlag[transPeMap[i]] |= 2;
00841     int real_node_i = CkNodeOf(transPeMap[i]);
00842     if ( real_node_i == real_node ) {
00843       transNodeInfo[node].npe += 1;
00844     } else {
00845       real_node = real_node_i;
00846       ++node;
00847       transNodeInfo[node].real_node = real_node;
00848       transNodeInfo[node].pe_start = i;
00849       transNodeInfo[node].npe = 1;
00850     }
00851     if ( CkMyNode() == real_node_i ) myTransNode = node;
00852   }
00853   numTransNodes = node + 1;
00854 
00855   if ( ! CkMyPe() ) {
00856     iout << iINFO << "PME USING " << numGridNodes << " GRID NODES AND "
00857          << numTransNodes << " TRANS NODES\n" << endi;
00858   }
00859 
00860   { // generate random orderings for grid and trans messages
00861     int i;
00862     for ( i = 0; i < numGridPes; ++i ) {
00863       gridPeOrder[i] = i;
00864     }
00865     Random rand(CkMyPe());
00866     if ( myGridPe < 0 ) {
00867       rand.reorder(gridPeOrder,numGridPes);
00868     } else {  // self last
00869       gridPeOrder[myGridPe] = numGridPes-1;
00870       gridPeOrder[numGridPes-1] = myGridPe;
00871       rand.reorder(gridPeOrder,numGridPes-1);
00872     } 
00873     for ( i = 0; i < numGridNodes; ++i ) {
00874       gridNodeOrder[i] = i;
00875     }
00876     if ( myGridNode < 0 ) {
00877       rand.reorder(gridNodeOrder,numGridNodes);
00878     } else {  // self last
00879       gridNodeOrder[myGridNode] = numGridNodes-1;
00880       gridNodeOrder[numGridNodes-1] = myGridNode;
00881       rand.reorder(gridNodeOrder,numGridNodes-1);
00882     }
00883     for ( i = 0; i < numTransNodes; ++i ) {
00884       transNodeOrder[i] = i;
00885     }
00886     if ( myTransNode < 0 ) {
00887       rand.reorder(transNodeOrder,numTransNodes);
00888     } else {  // self last
00889       transNodeOrder[myTransNode] = numTransNodes-1;
00890       transNodeOrder[numTransNodes-1] = myTransNode;
00891       rand.reorder(transNodeOrder,numTransNodes-1);
00892     }
00893   }
00894   
00895   } // ! usePencils
00896 
00897   myGrid.K1 = simParams->PMEGridSizeX;
00898   myGrid.K2 = simParams->PMEGridSizeY;
00899   myGrid.K3 = simParams->PMEGridSizeZ;
00900   myGrid.order = simParams->PMEInterpOrder;
00901   myGrid.dim2 = myGrid.K2;
00902   myGrid.dim3 = 2 * (myGrid.K3/2 + 1);
00903 
00904   if ( ! usePencils ) {
00905     myGrid.block1 = ( myGrid.K1 + numGridPes - 1 ) / numGridPes;
00906     myGrid.block2 = ( myGrid.K2 + numTransPes - 1 ) / numTransPes;
00907     myGrid.block3 = myGrid.dim3 / 2;  // complex
00908   }
00909 
00910   if ( usePencils ) {
00911     myGrid.block1 = ( myGrid.K1 + xBlocks - 1 ) / xBlocks;
00912     myGrid.block2 = ( myGrid.K2 + yBlocks - 1 ) / yBlocks;
00913     myGrid.block3 = ( myGrid.K3/2 + 1 + zBlocks - 1 ) / zBlocks;  // complex
00914 
00915 
00916       int pe = 0;
00917       int x,y,z;
00918 
00919                 SortableResizeArray<int> zprocs(xBlocks*yBlocks);
00920                 SortableResizeArray<int> yprocs(xBlocks*zBlocks);
00921                 SortableResizeArray<int> xprocs(yBlocks*zBlocks);
00922                 {
00923                 int basepe = 0;  int npe = CkNumPes();
00924                         if ( npe > xBlocks*yBlocks &&
00925                                 npe > xBlocks*zBlocks &&
00926                                 npe > yBlocks*zBlocks ) {
00927                         // avoid node 0
00928                         ++basepe;
00929                         --npe;
00930                 }
00931 
00932       
00933                 // decide which pes to use by bit reversal and patch use
00934                 int i;
00935                 int ncpus = CkNumPes();
00936                 SortableResizeArray<int> patches, nopatches, pmeprocs;
00937                 PatchMap *pmap = PatchMap::Object();
00938                 for ( int icpu=0; icpu<ncpus; ++icpu ) {
00939                         int ri = WorkDistrib::peDiffuseOrdering[icpu];
00940                         if ( ri ) { // keep 0 for special case
00941                                 if ( pmap->numPatchesOnNode(ri) ) patches.add(ri);
00942                                 else nopatches.add(ri);
00943                         }
00944                 }
00945 
00946 #if USE_RANDOM_TOPO
00947             Random rand(CkMyPe());
00948             int *tmp = new int[patches.size()];
00949             int nn = patches.size();
00950             for (i=0;i<nn;i++)  tmp[i] = patches[i];
00951             rand.reorder(tmp, nn);
00952             patches.resize(0);
00953             for (i=0;i<nn;i++)  patches.add(tmp[i]);
00954             delete [] tmp;
00955             tmp = new int[nopatches.size()];
00956             nn = nopatches.size();
00957             for (i=0;i<nn;i++)  tmp[i] = nopatches[i];
00958             rand.reorder(tmp, nn);
00959             nopatches.resize(0);
00960             for (i=0;i<nn;i++)  nopatches.add(tmp[i]);
00961             delete [] tmp;
00962 #endif
00963 
00964                 // only use zero if it eliminates overloading or has patches
00965                 int useZero = 0;
00966                 int npens = xBlocks*yBlocks;
00967                 if ( npens % ncpus == 0 ) useZero = 1;
00968                 if ( npens == nopatches.size() + 1 ) useZero = 1;
00969                 npens += xBlocks*zBlocks;
00970                 if ( npens % ncpus == 0 ) useZero = 1;
00971                 if ( npens == nopatches.size() + 1 ) useZero = 1;
00972                 npens += yBlocks*zBlocks;
00973                 if ( npens % ncpus == 0 ) useZero = 1;
00974                 if ( npens == nopatches.size() + 1 ) useZero = 1;
00975 
00976                 // add nopatches then patches in reversed order
00977                 for ( i=nopatches.size()-1; i>=0; --i ) pmeprocs.add(nopatches[i]);
00978                 if ( useZero && ! pmap->numPatchesOnNode(0) ) pmeprocs.add(0);
00979                 for ( i=patches.size()-1; i>=0; --i ) pmeprocs.add(patches[i]);
00980                 if ( pmap->numPatchesOnNode(0) ) pmeprocs.add(0);
00981   
00982                 int npes = pmeprocs.size();
00983                 for ( i=0; i<xBlocks*yBlocks; ++i, ++pe ) zprocs[i] = pmeprocs[pe%npes];
00984                 if ( i>1 && zprocs[0] == zprocs[i-1] ) zprocs[0] = 0;
00985 #if !USE_RANDOM_TOPO
00986                 zprocs.sort();
00987 #endif
00988                 for ( i=0; i<xBlocks*zBlocks; ++i, ++pe ) yprocs[i] = pmeprocs[pe%npes];
00989                 if ( i>1 && yprocs[0] == yprocs[i-1] ) yprocs[0] = 0;
00990 #if !USE_RANDOM_TOPO
00991                 yprocs.sort();
00992 #endif
00993       for ( i=0; i<yBlocks*zBlocks; ++i, ++pe ) xprocs[i] = pmeprocs[pe%npes];
00994       if ( i>1 && xprocs[0] == xprocs[i-1] ) xprocs[0] = 0;
00995 #if !USE_RANDOM_TOPO
00996       xprocs.sort();
00997 #endif
00998 
00999 #if USE_TOPO_SFC
01000   CmiLock(tmgr_lock);
01001   //{
01002   TopoManager tmgr;
01003   int xdim = tmgr.getDimNX();
01004   int ydim = tmgr.getDimNY();
01005   int zdim = tmgr.getDimNZ();
01006   int xdim1 = find_level_grid(xdim);
01007   int ydim1 = find_level_grid(ydim);
01008   int zdim1 = find_level_grid(zdim);
01009   if(CkMyPe() == 0)
01010       printf("xdim: %d %d %d, %d %d %d\n", xdim, ydim, zdim, xdim1, ydim1, zdim1);
01011 
01012   vector<Coord> result;
01013   SFC_grid(xdim, ydim, zdim, xdim1, ydim1, zdim1, result);
01014   sort_sfc(xprocs, tmgr, result);
01015   sort_sfc(yprocs, tmgr, result);
01016   sort_sfc(zprocs, tmgr, result);
01017   //}
01018   CmiUnlock(tmgr_lock);
01019 #endif
01020 
01021       pencilPMEProcessors = new char [CkNumPes()];
01022       memset (pencilPMEProcessors, 0, sizeof(char) * CkNumPes());
01023 
01024 
01025                 if(CkMyPe() == 0){  
01026               iout << iINFO << "PME Z PENCIL LOCATIONS:";
01027           for ( i=0; i<zprocs.size() && i<10; ++i ) {
01028 #if USE_TOPO_SFC
01029               int x,y,z,t;
01030               tmgr.rankToCoordinates(zprocs[i], x,y, z, t);
01031               iout << " " << zprocs[i] << "(" << x << " " << y << " " << z << ")";
01032 #else
01033               iout << " " << zprocs[i];
01034 #endif
01035           }
01036           if ( i < zprocs.size() ) iout << " ...";
01037               iout << "\n" << endi;
01038                 }
01039 
01040       for (pe=0, x = 0; x < xBlocks; ++x)
01041         for (y = 0; y < yBlocks; ++y, ++pe ) {
01042           pencilPMEProcessors[zprocs[pe]] = 1;
01043         }
01044      
01045                 if(CkMyPe() == 0){  
01046               iout << iINFO << "PME Y PENCIL LOCATIONS:";
01047           for ( i=0; i<yprocs.size() && i<10; ++i ) {
01048 #if USE_TOPO_SFC
01049               int x,y,z,t;
01050               tmgr.rankToCoordinates(yprocs[i], x,y, z, t);
01051               iout << " " << yprocs[i] << "(" << x << " " << y << " " << z << ")";
01052 #else
01053               iout << " " << yprocs[i];
01054 #endif
01055           }
01056           if ( i < yprocs.size() ) iout << " ...";
01057               iout << "\n" << endi;
01058                 }
01059 
01060       for (pe=0, z = 0; z < zBlocks; ++z )
01061         for (x = 0; x < xBlocks; ++x, ++pe ) {
01062           pencilPMEProcessors[yprocs[pe]] = 1;
01063         }
01064     
01065                 if(CkMyPe() == 0){  
01066                 iout << iINFO << "PME X PENCIL LOCATIONS:";
01067                     for ( i=0; i<xprocs.size() && i<10; ++i ) {
01068 #if USE_TOPO_SFC
01069                 int x,y,z,t;
01070                 tmgr.rankToCoordinates(xprocs[i], x,y, z, t);
01071                 iout << " " << xprocs[i] << "(" << x << "  " << y << " " << z << ")";
01072 #else
01073                 iout << " " << xprocs[i];
01074 #endif
01075             }
01076                 if ( i < xprocs.size() ) iout << " ...";
01077                 iout << "\n" << endi;
01078                 }
01079 
01080       for (pe=0, y = 0; y < yBlocks; ++y )      
01081         for (z = 0; z < zBlocks; ++z, ++pe ) {
01082           pencilPMEProcessors[xprocs[pe]] = 1;
01083         }
01084         
01085     }
01086 
01087 
01088         // creating the pencil arrays
01089         if ( CkMyPe() == 0 ){
01090 #if !USE_RANDOM_TOPO
01091         // std::sort(zprocs.begin(),zprocs.end(),WorkDistrib::pe_sortop_compact());
01092         WorkDistrib::sortPmePes(zprocs.begin(),xBlocks,yBlocks);
01093         std::sort(yprocs.begin(),yprocs.end(),WorkDistrib::pe_sortop_compact());
01094         std::sort(xprocs.begin(),xprocs.end(),WorkDistrib::pe_sortop_compact());
01095 #endif
01096 #if 1
01097         CProxy_PmePencilMap zm = CProxy_PmePencilMap::ckNew(0,1,yBlocks,xBlocks*yBlocks,zprocs.begin());
01098         CProxy_PmePencilMap ym;
01099         if ( simParams->PMEPencilsYLayout )
01100           ym = CProxy_PmePencilMap::ckNew(0,2,zBlocks,zBlocks*xBlocks,yprocs.begin()); // new
01101         else
01102           ym = CProxy_PmePencilMap::ckNew(2,0,xBlocks,zBlocks*xBlocks,yprocs.begin()); // old
01103         CProxy_PmePencilMap xm;
01104         if ( simParams->PMEPencilsXLayout )
01105           xm = CProxy_PmePencilMap::ckNew(2,1,yBlocks,yBlocks*zBlocks,xprocs.begin()); // new
01106         else
01107           xm = CProxy_PmePencilMap::ckNew(1,2,zBlocks,yBlocks*zBlocks,xprocs.begin()); // old
01108         pmeNodeProxy.recvPencilMapProxies(xm,ym,zm);
01109         CkArrayOptions zo(xBlocks,yBlocks,1);  zo.setMap(zm);
01110         CkArrayOptions yo(xBlocks,1,zBlocks);  yo.setMap(ym);
01111         CkArrayOptions xo(1,yBlocks,zBlocks);  xo.setMap(xm);
01112 #if CHARM_VERSION > 60301
01113         zo.setAnytimeMigration(false);  zo.setStaticInsertion(true);
01114         yo.setAnytimeMigration(false);  yo.setStaticInsertion(true);
01115         xo.setAnytimeMigration(false);  xo.setStaticInsertion(true);
01116 #endif
01117         zPencil = CProxy_PmeZPencil::ckNew(zo);  // (xBlocks,yBlocks,1);
01118         yPencil = CProxy_PmeYPencil::ckNew(yo);  // (xBlocks,1,zBlocks);
01119         xPencil = CProxy_PmeXPencil::ckNew(xo);  // (1,yBlocks,zBlocks);
01120 #else
01121         zPencil = CProxy_PmeZPencil::ckNew();  // (xBlocks,yBlocks,1);
01122         yPencil = CProxy_PmeYPencil::ckNew();  // (xBlocks,1,zBlocks);
01123         xPencil = CProxy_PmeXPencil::ckNew();  // (1,yBlocks,zBlocks);
01124 
01125                 for (pe=0, x = 0; x < xBlocks; ++x)
01126                         for (y = 0; y < yBlocks; ++y, ++pe ) {
01127                                 zPencil(x,y,0).insert(zprocs[pe]);
01128                         }
01129         zPencil.doneInserting();
01130 
01131                 for (pe=0, x = 0; x < xBlocks; ++x)
01132                         for (z = 0; z < zBlocks; ++z, ++pe ) {
01133                                 yPencil(x,0,z).insert(yprocs[pe]);
01134                         }
01135         yPencil.doneInserting();
01136 
01137 
01138                 for (pe=0, y = 0; y < yBlocks; ++y )    
01139                         for (z = 0; z < zBlocks; ++z, ++pe ) {
01140                                 xPencil(0,y,z).insert(xprocs[pe]);
01141                         }
01142                 xPencil.doneInserting();     
01143 #endif
01144 
01145                 pmeProxy.recvArrays(xPencil,yPencil,zPencil);
01146                 PmePencilInitMsgData msgdata;
01147                 msgdata.grid = myGrid;
01148                 msgdata.xBlocks = xBlocks;
01149                 msgdata.yBlocks = yBlocks;
01150                 msgdata.zBlocks = zBlocks;
01151                 msgdata.xPencil = xPencil;
01152                 msgdata.yPencil = yPencil;
01153                 msgdata.zPencil = zPencil;
01154                 msgdata.pmeProxy = pmeProxyDir;
01155         msgdata.pmeNodeProxy = pmeNodeProxy;
01156         msgdata.xm = xm;
01157         msgdata.ym = ym;
01158         msgdata.zm = zm;
01159                 xPencil.init(new PmePencilInitMsg(msgdata));
01160                 yPencil.init(new PmePencilInitMsg(msgdata));
01161                 zPencil.init(new PmePencilInitMsg(msgdata));
01162         }
01163 
01164     return;  // continue in initialize_pencils() at next startup stage
01165   }
01166 
01167 
01168   int pe;
01169   int nx = 0;
01170   for ( pe = 0; pe < numGridPes; ++pe ) {
01171     localInfo[pe].x_start = nx;
01172     nx += myGrid.block1;
01173     if ( nx > myGrid.K1 ) nx = myGrid.K1;
01174     localInfo[pe].nx = nx - localInfo[pe].x_start;
01175   }
01176   int ny = 0;
01177   for ( pe = 0; pe < numTransPes; ++pe ) {
01178     localInfo[pe].y_start_after_transpose = ny;
01179     ny += myGrid.block2;
01180     if ( ny > myGrid.K2 ) ny = myGrid.K2;
01181     localInfo[pe].ny_after_transpose =
01182                         ny - localInfo[pe].y_start_after_transpose;
01183   }
01184 
01185   {  // decide how many pes this node exchanges charges with
01186 
01187   PatchMap *patchMap = PatchMap::Object();
01188   Lattice lattice = simParams->lattice;
01189   BigReal sysdima = lattice.a_r().unit() * lattice.a();
01190   BigReal cutoff = simParams->cutoff;
01191   BigReal patchdim = simParams->patchDimension;
01192   int numPatches = patchMap->numPatches();
01193   int numNodes = CkNumPes();
01194   int *source_flags = new int[numNodes];
01195   int node;
01196   for ( node=0; node<numNodes; ++node ) {
01197     source_flags[node] = 0;
01198     recipPeDest[node] = 0;
01199   }
01200 
01201   // // make sure that we don't get ahead of ourselves on this node
01202   // if ( CkMyPe() < numPatches && myRecipPe >= 0 ) {
01203   //   source_flags[CkMyPe()] = 1;
01204   //   recipPeDest[myRecipPe] = 1;
01205   // }
01206 
01207   for ( int pid=0; pid < numPatches; ++pid ) {
01208     int pnode = patchMap->node(pid);
01209     int shift1 = (myGrid.K1 + myGrid.order - 1)/2;
01210     BigReal minx = patchMap->min_a(pid);
01211     BigReal maxx = patchMap->max_a(pid);
01212     BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
01213     // min1 (max1) is smallest (largest) grid line for this patch
01214     int min1 = ((int) floor(myGrid.K1 * (minx - margina))) + shift1 - myGrid.order + 1;
01215     int max1 = ((int) floor(myGrid.K1 * (maxx + margina))) + shift1;
01216     for ( int i=min1; i<=max1; ++i ) {
01217       int ix = i;
01218       while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
01219       while ( ix < 0 ) ix += myGrid.K1;
01220       // set source_flags[pnode] if this patch sends to our node
01221       if ( myGridPe >= 0 && ix >= localInfo[myGridPe].x_start &&
01222            ix < localInfo[myGridPe].x_start + localInfo[myGridPe].nx ) {
01223         source_flags[pnode] = 1;
01224       }
01225       // set dest_flags[] for node that our patch sends to
01226       if ( pnode == CkMyPe() ) {
01227         recipPeDest[ix / myGrid.block1] = 1;
01228       }
01229     }
01230   }
01231 
01232   int numSourcesSamePhysicalNode = 0;
01233   numSources = 0;
01234   numDestRecipPes = 0;
01235   for ( node=0; node<numNodes; ++node ) {
01236     if ( source_flags[node] ) ++numSources;
01237     if ( recipPeDest[node] ) ++numDestRecipPes;
01238     if ( source_flags[node] && CmiPeOnSamePhysicalNode(node,CkMyPe()) ) ++numSourcesSamePhysicalNode;
01239   }
01240 
01241 #if 0
01242   if ( numSources ) {
01243     CkPrintf("pe %5d pme %5d of %5d on same physical node\n",
01244             CkMyPe(), numSourcesSamePhysicalNode, numSources);
01245     iout << iINFO << "PME " << CkMyPe() << " sources:";
01246     for ( node=0; node<numNodes; ++node ) {
01247       if ( source_flags[node] ) iout << " " << node;
01248     }
01249     iout << "\n" << endi;
01250   }
01251 #endif
01252 
01253   delete [] source_flags;
01254 
01255   // CkPrintf("PME on node %d has %d sources and %d destinations\n",
01256   //           CkMyPe(), numSources, numDestRecipPes);
01257 
01258   }  // decide how many pes this node exchanges charges with (end)
01259 
01260   ungrid_count = numDestRecipPes;
01261 
01262   sendTransBarrier_received = 0;
01263 
01264   if ( myGridPe < 0 && myTransPe < 0 ) return;
01265   // the following only for nodes doing reciprocal sum
01266 
01267   if ( myTransPe >= 0 ) {
01268       int k2_start = localInfo[myTransPe].y_start_after_transpose;
01269       int k2_end = k2_start + localInfo[myTransPe].ny_after_transpose;
01270       #ifdef OPENATOM_VERSION
01271       if ( simParams->openatomOn ) { 
01272         CProxy_ComputeMoaMgr moaProxy(CkpvAccess(BOCclass_group).computeMoaMgr);
01273         myKSpace = new PmeKSpace(myGrid, k2_start, k2_end, 0, myGrid.dim3/2, moaProxy);
01274       } else {
01275         myKSpace = new PmeKSpace(myGrid, k2_start, k2_end, 0, myGrid.dim3/2);
01276       }
01277       #else  // OPENATOM_VERSION
01278       myKSpace = new PmeKSpace(myGrid, k2_start, k2_end, 0, myGrid.dim3/2);
01279       #endif // OPENATOM_VERSION
01280   }
01281 
01282   int local_size = myGrid.block1 * myGrid.K2 * myGrid.dim3;
01283   int local_size_2 = myGrid.block2 * myGrid.K1 * myGrid.dim3;
01284   if ( local_size < local_size_2 ) local_size = local_size_2;
01285   qgrid = new float[local_size*numGrids];
01286   if ( numGridPes > 1 || numTransPes > 1 ) {
01287     kgrid = new float[local_size*numGrids];
01288   } else {
01289     kgrid = qgrid;
01290   }
01291   qgrid_size = local_size;
01292 
01293   if ( myGridPe >= 0 ) {
01294   qgrid_start = localInfo[myGridPe].x_start * myGrid.K2 * myGrid.dim3;
01295   qgrid_len = localInfo[myGridPe].nx * myGrid.K2 * myGrid.dim3;
01296   fgrid_start = localInfo[myGridPe].x_start * myGrid.K2;
01297   fgrid_len = localInfo[myGridPe].nx * myGrid.K2;
01298   }
01299 
01300   int n[3]; n[0] = myGrid.K1; n[1] = myGrid.K2; n[2] = myGrid.K3;
01301 #ifdef NAMD_FFTW
01302   CmiLock(fftw_plan_lock);
01303 #ifdef NAMD_FFTW_3
01304   work = new fftwf_complex[n[0]];
01305   int fftwFlags = simParams->FFTWPatient ? FFTW_PATIENT  : simParams->FFTWEstimate ? FFTW_ESTIMATE  : FFTW_MEASURE ;
01306   if ( myGridPe >= 0 ) {
01307     forward_plan_yz=new fftwf_plan[numGrids];
01308     backward_plan_yz=new fftwf_plan[numGrids];
01309   }
01310   if ( myTransPe >= 0 ) {
01311     forward_plan_x=new fftwf_plan[numGrids];
01312     backward_plan_x=new fftwf_plan[numGrids];
01313   }
01314   /* need one plan per grid */
01315   if ( ! CkMyPe() ) iout << iINFO << "Optimizing 4 FFT steps.  1..." << endi;
01316   if ( myGridPe >= 0 ) {
01317     for( int g=0; g<numGrids; g++)
01318       {
01319         forward_plan_yz[g] = fftwf_plan_many_dft_r2c(2, n+1, 
01320                                                      localInfo[myGridPe].nx,
01321                                                      qgrid + qgrid_size * g,
01322                                                      NULL,
01323                                                      1,
01324                                                      myGrid.dim2 * myGrid.dim3,
01325                                                      (fftwf_complex *) 
01326                                                      qgrid + qgrid_size * g,
01327                                                      NULL,
01328                                                      1,
01329                                                      myGrid.dim2 * (myGrid.dim3/2),
01330                                                      fftwFlags);
01331       }
01332   }
01333   int zdim = myGrid.dim3;
01334   int xStride=localInfo[myTransPe].ny_after_transpose *( myGrid.dim3 / 2);
01335   if ( ! CkMyPe() ) iout << " 2..." << endi;
01336   if ( myTransPe >= 0 ) {
01337     for( int g=0; g<numGrids; g++)
01338       {
01339 
01340         forward_plan_x[g] = fftwf_plan_many_dft(1, n, xStride,
01341                                                 (fftwf_complex *)
01342                                                 (kgrid+qgrid_size*g),
01343                                                 NULL,
01344                                                 xStride,
01345                                                 1,
01346                                                 (fftwf_complex *)
01347                                                 (kgrid+qgrid_size*g),
01348                                                 NULL,
01349                                                 xStride,
01350                                                 1,
01351                                                 FFTW_FORWARD,fftwFlags);
01352         
01353       }
01354   }
01355   if ( ! CkMyPe() ) iout << " 3..." << endi;
01356   if ( myTransPe >= 0 ) {
01357     for( int g=0; g<numGrids; g++)
01358       {
01359         backward_plan_x[g] = fftwf_plan_many_dft(1, n, xStride,
01360                                                  (fftwf_complex *)
01361                                                  (kgrid+qgrid_size*g),
01362                                                  NULL,
01363                                                  xStride,
01364                                                  1,
01365                                                  (fftwf_complex *)
01366                                                  (kgrid+qgrid_size*g),
01367                                                  NULL,
01368                                                  xStride,
01369                                                  1,
01370                                                  FFTW_BACKWARD, fftwFlags);
01371 
01372       }
01373   }
01374   if ( ! CkMyPe() ) iout << " 4..." << endi;
01375   if ( myGridPe >= 0 ) {
01376     for( int g=0; g<numGrids; g++)
01377       {
01378         backward_plan_yz[g] = fftwf_plan_many_dft_c2r(2, n+1, 
01379                                                       localInfo[myGridPe].nx,
01380                                                       (fftwf_complex *)
01381                                                       qgrid + qgrid_size * g,
01382                                                       NULL,
01383                                                       1,
01384                                                       myGrid.dim2*(myGrid.dim3/2),
01385                                                       qgrid + qgrid_size * g,
01386                                                       NULL,
01387                                                       1,
01388                                                       myGrid.dim2 * myGrid.dim3,
01389                                                       fftwFlags);
01390       }
01391   }
01392   if ( ! CkMyPe() ) iout << "   Done.\n" << endi;
01393 
01394 #else
01395   work = new fftw_complex[n[0]];
01396 
01397   if ( ! CkMyPe() ) iout << iINFO << "Optimizing 4 FFT steps.  1..." << endi;
01398   if ( myGridPe >= 0 ) {
01399   forward_plan_yz = rfftwnd_create_plan_specific(2, n+1, FFTW_REAL_TO_COMPLEX,
01400         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
01401         | FFTW_IN_PLACE | FFTW_USE_WISDOM, qgrid, 1, 0, 0);
01402   }
01403   if ( ! CkMyPe() ) iout << " 2..." << endi;
01404   if ( myTransPe >= 0 ) {
01405       forward_plan_x = fftw_create_plan_specific(n[0], FFTW_FORWARD,
01406         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
01407         | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) kgrid,
01408         localInfo[myTransPe].ny_after_transpose * myGrid.dim3 / 2, work, 1);
01409   }
01410   if ( ! CkMyPe() ) iout << " 3..." << endi;
01411   if ( myTransPe >= 0 ) {
01412   backward_plan_x = fftw_create_plan_specific(n[0], FFTW_BACKWARD,
01413         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
01414         | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) kgrid,
01415         localInfo[myTransPe].ny_after_transpose * myGrid.dim3 / 2, work, 1);
01416   }
01417   if ( ! CkMyPe() ) iout << " 4..." << endi;
01418   if ( myGridPe >= 0 ) {
01419   backward_plan_yz = rfftwnd_create_plan_specific(2, n+1, FFTW_COMPLEX_TO_REAL,
01420         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
01421         | FFTW_IN_PLACE | FFTW_USE_WISDOM, qgrid, 1, 0, 0);
01422   }
01423   if ( ! CkMyPe() ) iout << "   Done.\n" << endi;
01424 #endif
01425   CmiUnlock(fftw_plan_lock);
01426 #else
01427   NAMD_die("Sorry, FFTW must be compiled in to use PME.");
01428 #endif
01429 
01430   if ( myGridPe >= 0 && numSources == 0 )
01431                 NAMD_bug("PME grid elements exist without sources.");
01432   grid_count = numSources;
01433   memset( (void*) qgrid, 0, qgrid_size * numGrids * sizeof(float) );
01434   trans_count = numGridPes;
01435 }

void ComputePmeMgr::initialize_computes  ) 
 

Definition at line 2204 of file ComputePme.C.

References SimParameters::accelMDOn, PmeGrid::dim2, PmeGrid::dim3, ijpair::i, j, ijpair::j, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, Node::Object(), ReductionMgr::Object(), PmeGrid::order, REDUCTIONS_AMD, REDUCTIONS_BASIC, Node::simParameters, simParams, and ReductionMgr::willSubmit().

02204                                         {
02205 
02206   noWorkCount = 0;
02207   doWorkCount = 0;
02208   ungridForcesCount = 0;
02209 
02210   reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
02211 
02212   SimParameters *simParams = Node::Object()->simParameters;
02213 
02214   strayChargeErrors = 0;
02215 
02216   if (simParams->accelMDOn) {
02217      amd_reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_AMD);
02218   } else {
02219      amd_reduction = NULL;
02220   }
02221 
02222   qsize = myGrid.K1 * myGrid.dim2 * myGrid.dim3;
02223   fsize = myGrid.K1 * myGrid.dim2;
02224   q_arr = new float*[fsize*numGrids];
02225   memset( (void*) q_arr, 0, fsize*numGrids * sizeof(float*) );
02226   q_list = new float*[fsize*numGrids];
02227   memset( (void*) q_list, 0, fsize*numGrids * sizeof(float*) );
02228   q_count = 0;
02229   f_arr = new char[fsize*numGrids];
02230   // memset to non-zero value has race condition on BlueGene/Q
02231   // memset( (void*) f_arr, 2, fsize*numGrids * sizeof(char) );
02232   for ( int n=fsize*numGrids, i=0; i<n; ++i ) f_arr[i] = 2;
02233   fz_arr = new char[myGrid.K3+myGrid.order-1];
02234 
02235   for ( int g=0; g<numGrids; ++g ) {
02236     char *f = f_arr + g*fsize;
02237     if ( usePencils ) {
02238       int K1 = myGrid.K1;
02239       int K2 = myGrid.K2;
02240       int block1 = ( K1 + xBlocks - 1 ) / xBlocks;
02241       int block2 = ( K2 + yBlocks - 1 ) / yBlocks;
02242       int dim2 = myGrid.dim2;
02243       for (int ap=0; ap<numPencilsActive; ++ap) {
02244         int ib = activePencils[ap].i;
02245         int jb = activePencils[ap].j;
02246         int ibegin = ib*block1;
02247         int iend = ibegin + block1;  if ( iend > K1 ) iend = K1;
02248         int jbegin = jb*block2;
02249         int jend = jbegin + block2;  if ( jend > K2 ) jend = K2;
02250         int flen = numGrids * (iend - ibegin) * (jend - jbegin);
02251         for ( int i=ibegin; i<iend; ++i ) {
02252           for ( int j=jbegin; j<jend; ++j ) {
02253             f[i*dim2+j] = 0;
02254           }
02255         }
02256       }
02257     } else {
02258       int block1 = ( myGrid.K1 + numGridPes - 1 ) / numGridPes;
02259       bsize = block1 * myGrid.dim2 * myGrid.dim3;
02260       for (int pe=0; pe<numGridPes; pe++) {
02261         if ( ! recipPeDest[pe] ) continue;
02262         int start = pe * bsize;
02263         int len = bsize;
02264         if ( start >= qsize ) { start = 0; len = 0; }
02265         if ( start + len > qsize ) { len = qsize - start; }
02266         int zdim = myGrid.dim3;
02267         int fstart = start / zdim;
02268         int flen = len / zdim;
02269         memset(f + fstart, 0, flen*sizeof(char));
02270       }
02271     }
02272   }
02273 
02274 #if USE_PERSISTENT
02275   recvGrid_handle = NULL;
02276 #endif
02277 }

void ComputePmeMgr::initialize_pencils CkQdMsg *   ) 
 

Definition at line 1439 of file ComputePme.C.

References Lattice::a(), Lattice::a_r(), Lattice::b(), Lattice::b_r(), BigReal, PmeGrid::block1, PmeGrid::block2, SimParameters::cutoff, j, PmeGrid::K1, PmeGrid::K2, SimParameters::lattice, PatchMap::max_a(), PatchMap::max_b(), PatchMap::min_a(), PatchMap::min_b(), PatchMap::node(), PatchMap::numPatches(), PatchMap::Object(), Node::Object(), PmeGrid::order, SimParameters::patchDimension, SimParameters::PMESendOrder, Random::reorder(), Node::simParameters, simParams, and Vector::unit().

01439                                                    {
01440   delete msg;
01441   if ( ! usePencils ) return;
01442 
01443   SimParameters *simParams = Node::Object()->simParameters;
01444 
01445   PatchMap *patchMap = PatchMap::Object();
01446   Lattice lattice = simParams->lattice;
01447   BigReal sysdima = lattice.a_r().unit() * lattice.a();
01448   BigReal sysdimb = lattice.b_r().unit() * lattice.b();
01449   BigReal cutoff = simParams->cutoff;
01450   BigReal patchdim = simParams->patchDimension;
01451   int numPatches = patchMap->numPatches();
01452 
01453   pencilActive = new char[xBlocks*yBlocks];
01454   for ( int i=0; i<xBlocks; ++i ) {
01455     for ( int j=0; j<yBlocks; ++j ) {
01456       pencilActive[i*yBlocks+j] = 0;
01457     }
01458   }
01459 
01460   for ( int pid=0; pid < numPatches; ++pid ) {
01461     int pnode = patchMap->node(pid);
01462     if ( pnode != CkMyPe() ) continue;
01463 
01464     int shift1 = (myGrid.K1 + myGrid.order - 1)/2;
01465     int shift2 = (myGrid.K2 + myGrid.order - 1)/2;
01466 
01467     BigReal minx = patchMap->min_a(pid);
01468     BigReal maxx = patchMap->max_a(pid);
01469     BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
01470     // min1 (max1) is smallest (largest) grid line for this patch
01471     int min1 = ((int) floor(myGrid.K1 * (minx - margina))) + shift1 - myGrid.order + 1;
01472     int max1 = ((int) floor(myGrid.K1 * (maxx + margina))) + shift1;
01473 
01474     BigReal miny = patchMap->min_b(pid);
01475     BigReal maxy = patchMap->max_b(pid);
01476     BigReal marginb = 0.5 * ( patchdim - cutoff ) / sysdimb;
01477     // min2 (max2) is smallest (largest) grid line for this patch
01478     int min2 = ((int) floor(myGrid.K2 * (miny - marginb))) + shift2 - myGrid.order + 1;
01479     int max2 = ((int) floor(myGrid.K2 * (maxy + marginb))) + shift2;
01480 
01481     for ( int i=min1; i<=max1; ++i ) {
01482       int ix = i;
01483       while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
01484       while ( ix < 0 ) ix += myGrid.K1;
01485       for ( int j=min2; j<=max2; ++j ) {
01486         int jy = j;
01487         while ( jy >= myGrid.K2 ) jy -= myGrid.K2;
01488         while ( jy < 0 ) jy += myGrid.K2;
01489         pencilActive[(ix / myGrid.block1)*yBlocks + (jy / myGrid.block2)] = 1;
01490       }
01491     }
01492   }
01493 
01494   numPencilsActive = 0;
01495   for ( int i=0; i<xBlocks; ++i ) {
01496     for ( int j=0; j<yBlocks; ++j ) {
01497       if ( pencilActive[i*yBlocks+j] ) {
01498         ++numPencilsActive;
01499         zPencil(i,j,0).dummyRecvGrid(CkMyPe(),0);
01500       }
01501     }
01502   }
01503   activePencils = new ijpair[numPencilsActive];
01504   numPencilsActive = 0;
01505   for ( int i=0; i<xBlocks; ++i ) {
01506     for ( int j=0; j<yBlocks; ++j ) {
01507       if ( pencilActive[i*yBlocks+j] ) {
01508         activePencils[numPencilsActive++] = ijpair(i,j);
01509       }
01510     }
01511   }
01512   if ( simParams->PMESendOrder ) {
01513     std::sort(activePencils,activePencils+numPencilsActive,ijpair_sortop_bit_reversed());
01514   } else {
01515     Random rand(CkMyPe());
01516     rand.reorder(activePencils,numPencilsActive);
01517   }
01518   //if ( numPencilsActive ) {
01519   //  CkPrintf("node %d sending to %d pencils\n", CkMyPe(), numPencilsActive);
01520   //}
01521 
01522   ungrid_count = numPencilsActive;
01523 }

int ComputePmeMgr::isPmeProcessor int  p  ) 
 

Definition at line 439 of file ComputePme.C.

References Node::Object(), pencilPMEProcessors, SimParameters::PMEPencils, Node::simParameters, and simParams.

00439                                       { 
00440   SimParameters *simParams = Node::Object()->simParameters;
00441   return ( usePencils || (simParams->PMEPencils>0) ? pencilPMEProcessors[p] : isPmeFlag[p] );
00442 }

void ComputePmeMgr::procTrans PmeTransMsg  ) 
 

Definition at line 1768 of file ComputePme.C.

References PmeGrid::dim3, PmeTransMsg::lattice, NodePmeInfo::npe, PmeTransMsg::nx, LocalPmeInfo::ny_after_transpose, NodePmeInfo::pe_start, PmeTransMsg::qgrid, PmeTransMsg::sequence, PmeTransMsg::x_start, and LocalPmeInfo::y_start_after_transpose.

Referenced by recvSharedTrans(), and recvTrans().

01768                                               {
01769   // CkPrintf("procTrans on Pe(%d)\n",CkMyPe());
01770   if ( trans_count == numGridPes ) {
01771     lattice = msg->lattice;
01772     grid_sequence = msg->sequence;
01773   }
01774 
01775  if ( msg->nx ) {
01776   int zdim = myGrid.dim3;
01777   NodePmeInfo &nodeInfo(transNodeInfo[myTransNode]);
01778   int first_pe = nodeInfo.pe_start;
01779   int last_pe = first_pe+nodeInfo.npe-1;
01780   int y_skip = localInfo[myTransPe].y_start_after_transpose
01781              - localInfo[first_pe].y_start_after_transpose;
01782   int ny_msg = localInfo[last_pe].y_start_after_transpose
01783              + localInfo[last_pe].ny_after_transpose
01784              - localInfo[first_pe].y_start_after_transpose;
01785   int ny = localInfo[myTransPe].ny_after_transpose;
01786   int x_start = msg->x_start;
01787   int nx = msg->nx;
01788   for ( int g=0; g<numGrids; ++g ) {
01789     CmiMemcpy((void*)(kgrid + qgrid_size * g + x_start*ny*zdim),
01790         (void*)(msg->qgrid + nx*(ny_msg*g+y_skip)*zdim),
01791         nx*ny*zdim*sizeof(float));
01792   }
01793  }
01794 
01795   --trans_count;
01796 
01797   if ( trans_count == 0 ) {
01798     pmeProxyDir[CkMyPe()].gridCalc2();
01799   }
01800 }

void ComputePmeMgr::procUntrans PmeUntransMsg  ) 
 

Definition at line 1989 of file ComputePme.C.

References PmeGrid::dim3, PmeUntransMsg::evir, PmeGrid::K2, NodePmeInfo::npe, LocalPmeInfo::nx, PmeUntransMsg::ny, NodePmeInfo::pe_start, PmeUntransMsg::qgrid, LocalPmeInfo::x_start, and PmeUntransMsg::y_start.

Referenced by recvSharedUntrans(), and recvUntrans().

01989                                                   {
01990   // CkPrintf("recvUntrans on Pe(%d)\n",CkMyPe());
01991   if ( untrans_count == numTransPes ) {
01992     for ( int g=0; g<numGrids; ++g ) {
01993       recip_evir[g] = 0.;
01994     }
01995   }
01996 
01997 #if CMK_BLUEGENEL
01998   CmiNetworkProgressAfter (0);
01999 #endif
02000 
02001   NodePmeInfo &nodeInfo(gridNodeInfo[myGridNode]);
02002   int first_pe = nodeInfo.pe_start;
02003   int g;
02004   if ( myGridPe == first_pe ) for ( g=0; g<numGrids; ++g ) {
02005     recip_evir[g] += msg->evir[g];
02006   }
02007 
02008  if ( msg->ny ) {
02009   int zdim = myGrid.dim3;
02010   int last_pe = first_pe+nodeInfo.npe-1;
02011   int x_skip = localInfo[myGridPe].x_start
02012              - localInfo[first_pe].x_start;
02013   int nx_msg = localInfo[last_pe].x_start
02014              + localInfo[last_pe].nx
02015              - localInfo[first_pe].x_start;
02016   int nx = localInfo[myGridPe].nx;
02017   int y_start = msg->y_start;
02018   int ny = msg->ny;
02019   int slicelen = myGrid.K2 * zdim;
02020   int cpylen = ny * zdim;
02021   for ( g=0; g<numGrids; ++g ) {
02022     float *q = qgrid + qgrid_size * g + y_start * zdim;
02023     float *qmsg = msg->qgrid + (nx_msg*g+x_skip) * cpylen;
02024     for ( int x = 0; x < nx; ++x ) {
02025       CmiMemcpy((void*)q, (void*)qmsg, cpylen*sizeof(float));
02026       q += slicelen;
02027       qmsg += cpylen;
02028     }
02029   }
02030  }
02031 
02032   --untrans_count;
02033 
02034   if ( untrans_count == 0 ) {
02035     pmeProxyDir[CkMyPe()].gridCalc3();
02036   }
02037 }

void ComputePmeMgr::recvAck PmeAckMsg  ) 
 

Definition at line 2116 of file ComputePme.C.

Referenced by recvUngrid().

02116                                           {
02117   if ( msg ) delete msg;
02118   --ungrid_count;
02119 
02120   if ( ungrid_count == 0 ) {
02121     pmeProxyDir[CkMyPe()].ungridCalc();
02122   }
02123 }

void ComputePmeMgr::recvArrays CProxy_PmeXPencil  ,
CProxy_PmeYPencil  ,
CProxy_PmeZPencil 
 

Definition at line 550 of file ComputePme.C.

00551                                                                        {
00552   xPencil = x;  yPencil = y;  zPencil = z;
00553   
00554     if(CmiMyRank()==0)
00555     {
00556       pmeNodeProxy.ckLocalBranch()->xPencil=x;
00557       pmeNodeProxy.ckLocalBranch()->yPencil=y;
00558       pmeNodeProxy.ckLocalBranch()->zPencil=z;
00559     }
00560 }

void ComputePmeMgr::recvGrid PmeGridMsg  ) 
 

Definition at line 1565 of file ComputePme.C.

References PmeGrid::dim3, PmeGridMsg::fgrid, PmeGridMsg::lattice, NAMD_bug(), PmeGridMsg::qgrid, PmeGridMsg::sequence, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

01565                                             {
01566   // CkPrintf("recvGrid from %d on Pe(%d)\n",msg->sourceNode,CkMyPe());
01567   if ( grid_count == 0 ) {
01568     NAMD_bug("Message order failure in ComputePmeMgr::recvGrid\n");
01569   }
01570   if ( grid_count == numSources ) {
01571     lattice = msg->lattice;
01572     grid_sequence = msg->sequence;
01573   }
01574 
01575   int zdim = myGrid.dim3;
01576   int zlistlen = msg->zlistlen;
01577   int *zlist = msg->zlist;
01578   float *qmsg = msg->qgrid;
01579   for ( int g=0; g<numGrids; ++g ) {
01580     char *f = msg->fgrid + fgrid_len * g;
01581     float *q = qgrid + qgrid_size * g;
01582     for ( int i=0; i<fgrid_len; ++i ) {
01583       if ( f[i] ) {
01584         for ( int k=0; k<zlistlen; ++k ) {
01585           q[zlist[k]] += *(qmsg++);
01586         }
01587       }
01588       q += zdim;
01589     }
01590   }
01591 
01592   gridmsg_reuse[numSources-grid_count] = msg;
01593   --grid_count;
01594 
01595   if ( grid_count == 0 ) {
01596     pmeProxyDir[CkMyPe()].gridCalc1();
01597     if ( useBarrier ) pmeProxyDir[0].sendTransBarrier();
01598   }
01599 }

void ComputePmeMgr::recvSharedTrans PmeSharedTransMsg  ) 
 

Definition at line 1750 of file ComputePme.C.

References PmeSharedTransMsg::count, PmeSharedTransMsg::lock, PmeSharedTransMsg::msg, and procTrans().

01750                                                           {
01751   procTrans(msg->msg);
01752   CmiLock(msg->lock);
01753   int count = --(*msg->count);
01754   CmiUnlock(msg->lock);
01755   if ( count == 0 ) {
01756     CmiDestroyLock(msg->lock);
01757     delete msg->count;
01758     delete msg->msg;
01759   }
01760   delete msg;
01761 }

void ComputePmeMgr::recvSharedUntrans PmeSharedUntransMsg  ) 
 

Definition at line 1971 of file ComputePme.C.

References PmeSharedUntransMsg::count, PmeSharedUntransMsg::lock, PmeSharedUntransMsg::msg, and procUntrans().

01971                                                               {
01972   procUntrans(msg->msg);
01973   CmiLock(msg->lock);
01974   int count = --(*msg->count);
01975   CmiUnlock(msg->lock);
01976   if ( count == 0 ) {
01977     CmiDestroyLock(msg->lock);
01978     delete msg->count;
01979     delete msg->msg;
01980   }
01981   delete msg;
01982 }

void ComputePmeMgr::recvTrans PmeTransMsg  ) 
 

Definition at line 1763 of file ComputePme.C.

References procTrans().

01763                                               {
01764   procTrans(msg);
01765   delete msg;
01766 }

void ComputePmeMgr::recvUngrid PmeGridMsg  ) 
 

Definition at line 2104 of file ComputePme.C.

References copyPencils(), copyResults(), NAMD_bug(), and recvAck().

02104                                               {
02105   // CkPrintf("recvUngrid on Pe(%d)\n",CkMyPe());
02106   if ( ungrid_count == 0 ) {
02107     NAMD_bug("Message order failure in ComputePmeMgr::recvUngrid\n");
02108   }
02109 
02110   if ( usePencils ) copyPencils(msg);
02111   else copyResults(msg);
02112   delete msg;
02113   recvAck(0);
02114 }

void ComputePmeMgr::recvUntrans PmeUntransMsg  ) 
 

Definition at line 1984 of file ComputePme.C.

References procUntrans().

01984                                                   {
01985   procUntrans(msg);
01986   delete msg;
01987 }

void ComputePmeMgr::sendData Lattice ,
int  sequence
 

Definition at line 2764 of file ComputePme.C.

References PmeGrid::block1, PmeGrid::dim2, PmeGrid::dim3, PmeGridMsg::fgrid, iERROR(), iout, j, PmeGrid::K2, PmeGrid::K3, PmeGridMsg::lattice, PmeGridMsg::len, PME_GRID_PRIORITY, PmeGridMsg::qgrid, PmeGridMsg::sequence, SET_PRIORITY, PmeGridMsg::sourceNode, PmeGridMsg::start, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by ComputePme::doWork().

02764                                                            {
02765 
02766   // iout << "Sending charge grid for " << numLocalAtoms << " atoms to FFT on " << iPE << ".\n" << endi;
02767 
02768   bsize = myGrid.block1 * myGrid.dim2 * myGrid.dim3;
02769 
02770   CProxy_ComputePmeMgr pmeProxy(CkpvAccess(BOCclass_group).computePmeMgr);
02771   for (int j=0; j<numGridPes; j++) {
02772     int pe = gridPeOrder[j];  // different order
02773     if ( ! recipPeDest[pe] && ! strayChargeErrors ) continue;
02774     int start = pe * bsize;
02775     int len = bsize;
02776     if ( start >= qsize ) { start = 0; len = 0; }
02777     if ( start + len > qsize ) { len = qsize - start; }
02778     int zdim = myGrid.dim3;
02779     int fstart = start / zdim;
02780     int flen = len / zdim;
02781     int fcount = 0;
02782     int i;
02783 
02784     int g;
02785     for ( g=0; g<numGrids; ++g ) {
02786       char *f = f_arr + fstart + g*fsize;
02787       for ( i=0; i<flen; ++i ) {
02788         fcount += f[i];
02789       }
02790       if ( ! recipPeDest[pe] ) {
02791         int errfound = 0;
02792         for ( i=0; i<flen; ++i ) {
02793           if ( f[i] == 3 ) {
02794             errfound = 1;
02795             break;
02796           }
02797         }
02798         if ( errfound ) {
02799           iout << iERROR << "Stray PME grid charges detected: "
02800                 << CkMyPe() << " sending to " << gridPeMap[pe] << " for planes";
02801           int iz = -1;
02802           for ( i=0; i<flen; ++i ) {
02803             if ( f[i] == 3 ) {
02804               f[i] = 2;
02805               int jz = (i+fstart)/myGrid.K2;
02806               if ( iz != jz ) { iout << " " << jz;  iz = jz; }
02807             }
02808           }
02809           iout << "\n" << endi;
02810         }
02811       }
02812     }
02813 
02814 #ifdef NETWORK_PROGRESS
02815     CmiNetworkProgress();
02816 #endif
02817 
02818     if ( ! recipPeDest[pe] ) continue;
02819 
02820     int zlistlen = 0;
02821     for ( i=0; i<myGrid.K3; ++i ) {
02822       if ( fz_arr[i] ) ++zlistlen;
02823     }
02824 
02825     PmeGridMsg *msg = new (numGrids, zlistlen, flen*numGrids,
02826                                 fcount*zlistlen, PRIORITY_SIZE) PmeGridMsg;
02827 
02828     msg->sourceNode = CkMyPe();
02829     msg->lattice = lattice;
02830     msg->start = fstart;
02831     msg->len = flen;
02832     msg->zlistlen = zlistlen;
02833     int *zlist = msg->zlist;
02834     zlistlen = 0;
02835     for ( i=0; i<myGrid.K3; ++i ) {
02836       if ( fz_arr[i] ) zlist[zlistlen++] = i;
02837     }
02838     float *qmsg = msg->qgrid;
02839     for ( g=0; g<numGrids; ++g ) {
02840       char *f = f_arr + fstart + g*fsize;
02841       CmiMemcpy((void*)(msg->fgrid+g*flen),(void*)f,flen*sizeof(char));
02842       float **q = q_arr + fstart + g*fsize;
02843       for ( i=0; i<flen; ++i ) {
02844         if ( f[i] ) {
02845           for ( int k=0; k<zlistlen; ++k ) {
02846             *(qmsg++) = q[i][zlist[k]];
02847           }
02848         }
02849       }
02850     }
02851 
02852     msg->sequence = compute_sequence;
02853     SET_PRIORITY(msg,compute_sequence,PME_GRID_PRIORITY)
02854     pmeProxy[gridPeMap[pe]].recvGrid(msg);
02855   }
02856  
02857   strayChargeErrors = 0;
02858 
02859 }

void ComputePmeMgr::sendPencils Lattice ,
int  sequence
 

Definition at line 2569 of file ComputePme.C.

References PmeGrid::block1, PmeGrid::block2, PmeGridMsg::destElem, PmeGrid::dim2, PmeGrid::dim3, PmeGridMsg::fgrid, PmeGridMsg::hasData, ijpair::i, iERROR(), iout, j, ijpair::j, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, PmeGridMsg::lattice, PmeGridMsg::len, NAMD_bug(), PME_GRID_PRIORITY, PmeGridMsg::qgrid, NodePmeMgr::recvZGrid(), PmeGridMsg::sequence, SET_PRIORITY, PmeGridMsg::sourceNode, PmeGridMsg::start, PmeGridMsg::zlist, PmeGridMsg::zlistlen, and NodePmeMgr::zm.

Referenced by ComputePme::doWork().

02569                                                               {
02570 
02571   // iout << "Sending charge grid for " << numLocalAtoms << " atoms to FFT on " << iPE << ".\n" << endi;
02572 
02573 #if  USE_PERSISTENT
02574     if (recvGrid_handle== NULL) setup_recvgrid_persistent();
02575 #endif
02576   int K1 = myGrid.K1;
02577   int K2 = myGrid.K2;
02578   int dim2 = myGrid.dim2;
02579   int dim3 = myGrid.dim3;
02580   int block1 = myGrid.block1;
02581   int block2 = myGrid.block2;
02582 
02583   // int savedMessages = 0;
02584   NodePmeMgr *npMgr = pmeNodeProxy[CkMyNode()].ckLocalBranch();
02585 
02586   for (int ap=0; ap<numPencilsActive; ++ap) {
02587     int ib = activePencils[ap].i;
02588     int jb = activePencils[ap].j;
02589     int ibegin = ib*block1;
02590     int iend = ibegin + block1;  if ( iend > K1 ) iend = K1;
02591     int jbegin = jb*block2;
02592     int jend = jbegin + block2;  if ( jend > K2 ) jend = K2;
02593     int flen = numGrids * (iend - ibegin) * (jend - jbegin);
02594 
02595     int fcount = 0;
02596     for ( int g=0; g<numGrids; ++g ) {
02597       char *f = f_arr + g*fsize;
02598       for ( int i=ibegin; i<iend; ++i ) {
02599        for ( int j=jbegin; j<jend; ++j ) {
02600         fcount += f[i*dim2+j];
02601        }
02602       }
02603     }
02604 
02605 #ifdef NETWORK_PROGRESS
02606     CmiNetworkProgress();
02607 #endif
02608 
02609     if ( ! pencilActive[ib*yBlocks+jb] )
02610       NAMD_bug("PME activePencils list inconsistent");
02611 
02612     int zlistlen = 0;
02613     for ( int i=0; i<myGrid.K3; ++i ) {
02614       if ( fz_arr[i] ) ++zlistlen;
02615     }
02616 
02617     int hd = ( fcount? 1 : 0 );  // has data?
02618     // if ( ! hd ) ++savedMessages;
02619 
02620     
02621     PmeGridMsg *msg = new ( hd*numGrids, hd*zlistlen, hd*flen,
02622         hd*fcount*zlistlen, PRIORITY_SIZE) PmeGridMsg;
02623     msg->sourceNode = CkMyPe();
02624     msg->hasData = hd;
02625     msg->lattice = lattice;
02626    if ( hd ) {
02627 #if 0
02628     msg->start = fstart;
02629     msg->len = flen;
02630 #else
02631     msg->start = -1;   // obsolete?
02632     msg->len = -1;   // obsolete?
02633 #endif
02634     msg->zlistlen = zlistlen;
02635     int *zlist = msg->zlist;
02636     zlistlen = 0;
02637     for ( int i=0; i<myGrid.K3; ++i ) {
02638       if ( fz_arr[i] ) zlist[zlistlen++] = i;
02639     }
02640     char *fmsg = msg->fgrid;
02641     float *qmsg = msg->qgrid;
02642     for ( int g=0; g<numGrids; ++g ) {
02643       char *f = f_arr + g*fsize;
02644       float **q = q_arr + g*fsize;
02645       for ( int i=ibegin; i<iend; ++i ) {
02646        for ( int j=jbegin; j<jend; ++j ) {
02647         *(fmsg++) = f[i*dim2+j];
02648         if( f[i*dim2+j] ) {
02649           for ( int k=0; k<zlistlen; ++k ) {
02650             *(qmsg++) = q[i*dim2+j][zlist[k]];
02651           }
02652         }
02653        }
02654       }
02655     }
02656    }
02657 
02658     msg->sequence = compute_sequence;
02659     SET_PRIORITY(msg,compute_sequence,PME_GRID_PRIORITY)
02660     CmiEnableUrgentSend(1);
02661 #if USE_NODE_PAR_RECEIVE
02662     msg->destElem=CkArrayIndex3D(ib,jb,0);
02663     CProxy_PmePencilMap lzm = npMgr->zm;
02664     int destproc = lzm.ckLocalBranch()->procNum(0, msg->destElem);
02665     int destnode = CmiNodeOf(destproc);
02666     
02667 #if  0 
02668     CmiUsePersistentHandle(&recvGrid_handle[ap], 1);
02669 #endif
02670     pmeNodeProxy[destnode].recvZGrid(msg);
02671 #if 0 
02672     CmiUsePersistentHandle(NULL, 0);
02673 #endif
02674 #else
02675 #if 0 
02676     CmiUsePersistentHandle(&recvGrid_handle[ap], 1);
02677 #endif
02678     zPencil(ib,jb,0).recvGrid(msg);
02679 #if 0 
02680     CmiUsePersistentHandle(NULL, 0);
02681 #endif
02682 #endif
02683     CmiEnableUrgentSend(0);
02684   }
02685 
02686 
02687   if ( strayChargeErrors ) {
02688    strayChargeErrors = 0;
02689    iout << iERROR << "Stray PME grid charges detected: "
02690         << CkMyPe() << " sending to (x,y)";
02691    for (int ib=0; ib<xBlocks; ++ib) {
02692     for (int jb=0; jb<yBlocks; ++jb) {
02693      int ibegin = ib*block1;
02694      int iend = ibegin + block1;  if ( iend > K1 ) iend = K1;
02695      int jbegin = jb*block2;
02696      int jend = jbegin + block2;  if ( jend > K2 ) jend = K2;
02697      int flen = numGrids * (iend - ibegin) * (jend - jbegin);
02698 
02699      for ( int g=0; g<numGrids; ++g ) {
02700        char *f = f_arr + g*fsize;
02701        if ( ! pencilActive[ib*yBlocks+jb] ) {
02702            for ( int i=ibegin; i<iend; ++i ) {
02703             for ( int j=jbegin; j<jend; ++j ) {
02704              if ( f[i*dim2+j] == 3 ) {
02705                f[i*dim2+j] = 2;
02706                iout << " (" << i << "," << j << ")";
02707              }
02708             }
02709            }
02710        }
02711      }
02712     }
02713    }
02714    iout << "\n" << endi;
02715   }
02716 
02717   // if ( savedMessages ) {
02718   //   CkPrintf("Pe %d eliminated %d PME messages\n",CkMyPe(),savedMessages);
02719   // }
02720 
02721 }

void ComputePmeMgr::sendTrans void   ) 
 

Definition at line 1672 of file ComputePme.C.

References PmeGrid::dim3, fwdSharedTrans(), j, PmeGrid::K2, kgrid, PmeTransMsg::lattice, NodePmeInfo::npe, PmeTransMsg::nx, LocalPmeInfo::nx, LocalPmeInfo::ny_after_transpose, NodePmeInfo::pe_start, PME_TRANS_PRIORITY, PmeTransMsg::qgrid, qgrid_size, NodePmeInfo::real_node, PmeTransMsg::sequence, SET_PRIORITY, PmeTransMsg::sourceNode, PmeTransMsg::x_start, LocalPmeInfo::x_start, and LocalPmeInfo::y_start_after_transpose.

01672                                   {
01673   // CkPrintf("sendTrans on Pe(%d)\n",CkMyPe());
01674 
01675   // send data for transpose
01676   int zdim = myGrid.dim3;
01677   int nx = localInfo[myGridPe].nx;
01678   int x_start = localInfo[myGridPe].x_start;
01679   int slicelen = myGrid.K2 * zdim;
01680 
01681   ComputePmeMgr **mgrObjects = pmeNodeProxy.ckLocalBranch()->mgrObjects;
01682 
01683 #if CMK_BLUEGENEL
01684   CmiNetworkProgressAfter (0);
01685 #endif
01686 
01687   for (int j=0; j<numTransNodes; j++) {
01688     int node = transNodeOrder[j];  // different order on each node
01689     int pe = transNodeInfo[node].pe_start;
01690     int npe = transNodeInfo[node].npe;
01691     int totlen = 0;
01692     if ( node != myTransNode ) for (int i=0; i<npe; ++i, ++pe) {
01693       LocalPmeInfo &li = localInfo[pe];
01694       int cpylen = li.ny_after_transpose * zdim;
01695       totlen += cpylen;
01696     }
01697     PmeTransMsg *newmsg = new (nx * totlen * numGrids,
01698                                 PRIORITY_SIZE) PmeTransMsg;
01699     newmsg->sourceNode = myGridPe;
01700     newmsg->lattice = lattice;
01701     newmsg->x_start = x_start;
01702     newmsg->nx = nx;
01703     for ( int g=0; g<numGrids; ++g ) {
01704       float *qmsg = newmsg->qgrid + nx * totlen * g;
01705       pe = transNodeInfo[node].pe_start;
01706       for (int i=0; i<npe; ++i, ++pe) {
01707         LocalPmeInfo &li = localInfo[pe];
01708         int cpylen = li.ny_after_transpose * zdim;
01709         if ( node == myTransNode ) {
01710           ComputePmeMgr *m = mgrObjects[CkRankOf(transPeMap[pe])];
01711           qmsg = m->kgrid + m->qgrid_size * g + x_start*cpylen;
01712         }
01713         float *q = qgrid + qgrid_size * g + li.y_start_after_transpose * zdim;
01714         for ( int x = 0; x < nx; ++x ) {
01715           CmiMemcpy((void*)qmsg, (void*)q, cpylen*sizeof(float));
01716           q += slicelen;
01717           qmsg += cpylen;
01718         }
01719       }
01720     }
01721     newmsg->sequence = grid_sequence;
01722     SET_PRIORITY(newmsg,grid_sequence,PME_TRANS_PRIORITY)
01723     if ( node == myTransNode ) newmsg->nx = 0;
01724     if ( npe > 1 ) {
01725       if ( node == myTransNode ) fwdSharedTrans(newmsg);
01726       else pmeNodeProxy[transNodeInfo[node].real_node].recvTrans(newmsg);
01727     } else pmeProxy[transPeMap[transNodeInfo[node].pe_start]].recvTrans(newmsg);
01728   }
01729  
01730   untrans_count = numTransPes;
01731 
01732 }

void ComputePmeMgr::sendTransBarrier void   ) 
 

Definition at line 1662 of file ComputePme.C.

01662                                          {
01663   sendTransBarrier_received += 1;
01664   // CkPrintf("sendTransBarrier on %d %d\n",myGridPe,numGridPes-sendTransBarrier_received);
01665   if ( sendTransBarrier_received < numGridPes ) return;
01666   sendTransBarrier_received = 0;
01667   for ( int i=0; i<numGridPes; ++i ) {
01668     pmeProxyDir[gridPeMap[i]].sendTrans();
01669   }
01670 }

void ComputePmeMgr::sendUngrid void   ) 
 

Definition at line 2060 of file ComputePme.C.

References PmeGrid::dim3, PmeGridMsg::evir, PmeGridMsg::fgrid, j, PmeGridMsg::len, PME_UNGRID_PRIORITY, PmeGridMsg::qgrid, SET_PRIORITY, PmeGridMsg::sourceNode, PmeGridMsg::start, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

02060                                    {
02061 
02062   for ( int j=0; j<numSources; ++j ) {
02063     // int msglen = qgrid_len;
02064     PmeGridMsg *newmsg = gridmsg_reuse[j];
02065     int pe = newmsg->sourceNode;
02066     if ( j == 0 ) {  // only need these once
02067       for ( int g=0; g<numGrids; ++g ) {
02068         newmsg->evir[g] = recip_evir[g];
02069       }
02070     } else {
02071       for ( int g=0; g<numGrids; ++g ) {
02072         newmsg->evir[g] = 0.;
02073       }
02074     }
02075     int zdim = myGrid.dim3;
02076     int flen = newmsg->len;
02077     int fstart = newmsg->start;
02078     int zlistlen = newmsg->zlistlen;
02079     int *zlist = newmsg->zlist;
02080     float *qmsg = newmsg->qgrid;
02081     for ( int g=0; g<numGrids; ++g ) {
02082       char *f = newmsg->fgrid + fgrid_len * g;
02083       float *q = qgrid + qgrid_size * g + (fstart-fgrid_start) * zdim;
02084       for ( int i=0; i<flen; ++i ) {
02085         if ( f[i] ) {
02086           for ( int k=0; k<zlistlen; ++k ) {
02087             *(qmsg++) = q[zlist[k]];
02088           }
02089         }
02090         q += zdim;
02091       }
02092     }
02093     newmsg->sourceNode = myGridPe;
02094 
02095     SET_PRIORITY(newmsg,grid_sequence,PME_UNGRID_PRIORITY)
02096     CmiEnableUrgentSend(1);
02097     pmeProxyDir[pe].recvUngrid(newmsg);
02098     CmiEnableUrgentSend(0);
02099   }
02100   grid_count = numSources;
02101   memset( (void*) qgrid, 0, qgrid_size * numGrids * sizeof(float) );
02102 }

void ComputePmeMgr::sendUntrans void   ) 
 

Definition at line 1890 of file ComputePme.C.

References PmeGrid::dim3, PmeUntransMsg::evir, fwdSharedUntrans(), j, PmeGrid::K2, NodePmeInfo::npe, LocalPmeInfo::nx, PmeUntransMsg::ny, LocalPmeInfo::ny_after_transpose, NodePmeInfo::pe_start, PME_UNTRANS_PRIORITY, qgrid, PmeUntransMsg::qgrid, qgrid_size, NodePmeInfo::real_node, SET_PRIORITY, PmeUntransMsg::sourceNode, LocalPmeInfo::x_start, PmeUntransMsg::y_start, and LocalPmeInfo::y_start_after_transpose.

01890                                     {
01891 
01892   int zdim = myGrid.dim3;
01893   int y_start = localInfo[myTransPe].y_start_after_transpose;
01894   int ny = localInfo[myTransPe].ny_after_transpose;
01895   int slicelen = myGrid.K2 * zdim;
01896 
01897   ComputePmeMgr **mgrObjects = pmeNodeProxy.ckLocalBranch()->mgrObjects;
01898 
01899 #if CMK_BLUEGENEL
01900   CmiNetworkProgressAfter (0);
01901 #endif
01902 
01903   // send data for reverse transpose
01904   for (int j=0; j<numGridNodes; j++) {
01905     int node = gridNodeOrder[j];  // different order on each node
01906     int pe = gridNodeInfo[node].pe_start;
01907     int npe = gridNodeInfo[node].npe;
01908     int totlen = 0;
01909     if ( node != myGridNode ) for (int i=0; i<npe; ++i, ++pe) {
01910       LocalPmeInfo &li = localInfo[pe];
01911       int cpylen = li.nx * zdim;
01912       totlen += cpylen;
01913     }
01914     PmeUntransMsg *newmsg = new (numGrids, ny * totlen * numGrids, PRIORITY_SIZE) PmeUntransMsg;
01915     newmsg->sourceNode = myTransPe;
01916     newmsg->y_start = y_start;
01917     newmsg->ny = ny;
01918     for ( int g=0; g<numGrids; ++g ) {
01919       if ( j == 0 ) {  // only need these once
01920         newmsg->evir[g] = recip_evir2[g];
01921       } else {
01922         newmsg->evir[g] = 0.;
01923       }
01924       float *qmsg = newmsg->qgrid + ny * totlen * g;
01925       pe = gridNodeInfo[node].pe_start;
01926       for (int i=0; i<npe; ++i, ++pe) {
01927         LocalPmeInfo &li = localInfo[pe];
01928         if ( node == myGridNode ) {
01929           ComputePmeMgr *m = mgrObjects[CkRankOf(gridPeMap[pe])];
01930           qmsg = m->qgrid + m->qgrid_size * g + y_start * zdim;
01931           float *q = kgrid + qgrid_size*g + li.x_start*ny*zdim;
01932           int cpylen = ny * zdim;
01933           for ( int x = 0; x < li.nx; ++x ) {
01934             CmiMemcpy((void*)qmsg, (void*)q, cpylen*sizeof(float));
01935             q += cpylen;
01936             qmsg += slicelen;
01937           }
01938         } else {
01939           CmiMemcpy((void*)qmsg,
01940                 (void*)(kgrid + qgrid_size*g + li.x_start*ny*zdim),
01941                 li.nx*ny*zdim*sizeof(float));
01942           qmsg += li.nx*ny*zdim;
01943         }
01944       }
01945     }
01946     SET_PRIORITY(newmsg,grid_sequence,PME_UNTRANS_PRIORITY)
01947     if ( node == myGridNode ) newmsg->ny = 0;
01948     if ( npe > 1 ) {
01949       if ( node == myGridNode ) fwdSharedUntrans(newmsg);
01950       else pmeNodeProxy[gridNodeInfo[node].real_node].recvUntrans(newmsg);
01951     } else pmeProxy[gridPeMap[gridNodeInfo[node].pe_start]].recvUntrans(newmsg);
01952   }
01953 
01954   trans_count = numGridPes;
01955 }

void ComputePmeMgr::submitReductions  ) 
 

Definition at line 3060 of file ComputePme.C.

References SimParameters::alchElecLambda, SimParameters::alchFepElecOn, SimParameters::alchFepWhamOn, SimParameters::alchLambda2, BigReal, SubmitReduction::item(), WorkDistrib::messageEnqueueWork(), Node::Object(), REDUCTION_ELECT_ENERGY_PME_TI_1, REDUCTION_ELECT_ENERGY_PME_TI_2, REDUCTION_ELECT_ENERGY_SLOW, REDUCTION_ELECT_ENERGY_SLOW_F, REDUCTION_STRAY_CHARGE_ERRORS, ResizeArray< Elem >::resize(), Node::simParameters, simParams, ResizeArray< Elem >::size(), and SubmitReduction::submit().

Referenced by ComputePme::doWork().

03060                                      {
03061 
03062     for ( int g=0; g<numGrids; ++g ) {
03063       float scale = 1.;
03064       if ( alchFepOn || alchThermIntOn ) {
03065         if ( g == 0 ) scale = elecLambdaUp;
03066         else if ( g == 1 ) scale = elecLambdaDown;
03067         else if ( g == 2 ) scale = (elecLambdaUp + elecLambdaDown - 1)*(-1);
03068         if (alchDecouple) {
03069           if ( g == 2 ) scale = 1-elecLambdaUp;
03070           else if ( g == 3 ) scale = 1-elecLambdaDown;
03071           else if ( g == 4 ) scale = (elecLambdaUp + elecLambdaDown - 1)*(-1);
03072         }
03073       } else if ( lesOn ) {
03074         scale = 1.0 / lesFactor;
03075       } else if ( pairOn ) {
03076         scale = ( g == 0 ? 1. : -1. );
03077       }
03078       reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += evir[g][0] * scale;
03079       reduction->item(REDUCTION_VIRIAL_SLOW_XX) += evir[g][1] * scale;
03080       reduction->item(REDUCTION_VIRIAL_SLOW_XY) += evir[g][2] * scale;
03081       reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += evir[g][3] * scale;
03082       reduction->item(REDUCTION_VIRIAL_SLOW_YX) += evir[g][2] * scale;
03083       reduction->item(REDUCTION_VIRIAL_SLOW_YY) += evir[g][4] * scale;
03084       reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += evir[g][5] * scale;
03085       reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += evir[g][3] * scale;
03086       reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += evir[g][5] * scale;
03087       reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += evir[g][6] * scale;
03088 
03089       if (amd_reduction) {
03090       amd_reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += evir[g][0] * scale;
03091       amd_reduction->item(REDUCTION_VIRIAL_SLOW_XX) += evir[g][1] * scale;
03092       amd_reduction->item(REDUCTION_VIRIAL_SLOW_XY) += evir[g][2] * scale;
03093       amd_reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += evir[g][3] * scale;
03094       amd_reduction->item(REDUCTION_VIRIAL_SLOW_YX) += evir[g][2] * scale;
03095       amd_reduction->item(REDUCTION_VIRIAL_SLOW_YY) += evir[g][4] * scale;
03096       amd_reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += evir[g][5] * scale;
03097       amd_reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += evir[g][3] * scale;
03098       amd_reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += evir[g][5] * scale;
03099       amd_reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += evir[g][6] * scale;
03100       }
03101 
03102       float scale2 = 0.;
03103 
03104       //alchElecLambdaStart = (alchFepOn || alchThermIntOn) ? simParams->alchElecLambdaStart : 0;
03105       SimParameters *simParams = Node::Object()->simParameters;
03106 
03107       if (alchFepOn) {
03108         BigReal elecLambda2Up=0.0, elecLambda2Down=0.0;
03109         if(simParams->alchFepWhamOn)    {
03110                 if(simParams->alchFepElecOn) {
03111                 elecLambda2Up = simParams->alchElecLambda;
03112                   elecLambda2Down =  1.0 - simParams->alchElecLambda;
03113                 }
03114                 else {
03115                 elecLambda2Up = 0.0;
03116                   elecLambda2Down =  1.0;
03117                 }
03118         }
03119         else    {
03120                 elecLambda2Up =  (simParams->alchLambda2 <= alchElecLambdaStart)? 0. : \
03121                       (simParams->alchLambda2 - alchElecLambdaStart) / (1. - alchElecLambdaStart);
03122                 elecLambda2Down =  ((1-simParams->alchLambda2) <= alchElecLambdaStart)? 0. : \
03123                       ((1-simParams->alchLambda2) - alchElecLambdaStart) / (1. - alchElecLambdaStart);
03124         }
03125         
03126         if ( g == 0 ) scale2 = elecLambda2Up;
03127         else if ( g == 1 ) scale2 = elecLambda2Down;
03128         else if ( g == 2 ) scale2 = (elecLambda2Up + elecLambda2Down - 1)*(-1);
03129         if (alchDecouple && g == 2 ) scale2 = 1 - elecLambda2Up;
03130         else if (alchDecouple && g == 3 ) scale2 = 1 - elecLambda2Down;
03131         else if (alchDecouple && g == 4 ) scale2 = (elecLambda2Up + elecLambda2Down - 1)*(-1);
03132       }
03133       if(simParams->alchFepWhamOn && simParams->alchFepElecOn)  {       // FEP with wham post-process
03134         if( g==0 )      scale2 = scale + 1.0;
03135         else if( g==1 ) scale2 = scale - 1.0;
03136         else if( g==2 ) scale2 = scale - 1.0;
03137         else if( g==3 ) scale2 = scale + 1.0;
03138       }
03139       reduction->item(REDUCTION_ELECT_ENERGY_SLOW_F) += evir[g][0] * scale2;
03140       if (amd_reduction) amd_reduction->item(REDUCTION_ELECT_ENERGY_SLOW_F) += evir[g][0] * scale2;
03141       
03142       if (alchThermIntOn) {
03143         
03144         // no decoupling:
03145         // part. 1 <-> all of system except partition 2: g[0] - g[2] 
03146         // (interactions between all atoms [partition 0 OR partition 1], 
03147         // minus all [within partition 0])
03148         // U = elecLambdaUp * (U[0] - U[2])
03149         // dU/dl = U[0] - U[2];
03150         
03151         // part. 2 <-> all of system except partition 1: g[1] - g[2] 
03152         // (interactions between all atoms [partition 0 OR partition 2], 
03153         // minus all [within partition 0])
03154         // U = elecLambdaDown * (U[1] - U[2])
03155         // dU/dl = U[1] - U[2];
03156 
03157         // alchDecouple:
03158         // part. 1 <-> part. 0: g[0] - g[2] - g[4] 
03159         // (interactions between all atoms [partition 0 OR partition 1]
03160         // minus all [within partition 1] minus all [within partition 0]
03161         // U = elecLambdaUp * (U[0] - U[4]) + (1-elecLambdaUp)* U[2]
03162         // dU/dl = U[0] - U[2] - U[4];
03163 
03164         // part. 2 <-> part. 0: g[1] - g[3] - g[4] 
03165         // (interactions between all atoms [partition 0 OR partition 2]
03166         // minus all [within partition 2] minus all [within partition 0]
03167         // U = elecLambdaDown * (U[1] - U[4]) + (1-elecLambdaDown)* U[3]
03168         // dU/dl = U[1] - U[3] - U[4];
03169         
03170         
03171         if ( g == 0 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_1) += evir[g][0];
03172         if ( g == 1 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_2) += evir[g][0];
03173         if (!alchDecouple) {
03174           if ( g == 2 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_1) -= evir[g][0];
03175           if ( g == 2 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_2) -= evir[g][0];
03176         }
03177         else {  // alchDecouple
03178           if ( g == 2 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_1) -= evir[g][0];
03179           if ( g == 3 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_2) -= evir[g][0];
03180           if ( g == 4 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_1) -= evir[g][0];
03181           if ( g == 4 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_2) -= evir[g][0];
03182         }
03183       }
03184     }
03185     reduction->item(REDUCTION_STRAY_CHARGE_ERRORS) += strayChargeErrors;
03186     reduction->submit();
03187     if (amd_reduction) amd_reduction->submit();
03188 
03189 
03190   for ( int i=0; i<heldComputes.size(); ++i ) {
03191     WorkDistrib::messageEnqueueWork(heldComputes[i]);
03192   }
03193   heldComputes.resize(0);
03194 }

void ComputePmeMgr::ungridCalc void   ) 
 

Definition at line 2125 of file ComputePme.C.

References j, PmeGrid::K3, WorkDistrib::messageEnqueueWork(), PmeGrid::order, and ResizeArray< Elem >::size().

02125                                    {
02126   // CkPrintf("ungridCalc on Pe(%d)\n",CkMyPe());
02127 
02128   // for (int j=0; j<myGrid.order-1; ++j) {
02129   //   fz_arr[myGrid.K3+j] = fz_arr[j];
02130   // }
02131   for (int i=0; i<q_count; ++i) {
02132     for (int j=0; j<myGrid.order-1; ++j) {
02133       q_list[i][myGrid.K3+j] = q_list[i][j];
02134     }
02135   }
02136 
02137   ungridForcesCount = pmeComputes.size();
02138 
02139   for ( int i=0; i<pmeComputes.size(); ++i ) {
02140     WorkDistrib::messageEnqueueWork(pmeComputes[i]);
02141     // pmeComputes[i]->ungridForces();
02142   }
02143   // submitReductions();  // must follow all ungridForces()
02144 
02145   ungrid_count = (usePencils ? numPencilsActive : numDestRecipPes );
02146 }


Friends And Related Function Documentation

friend class ComputePme [friend]
 

Definition at line 278 of file ComputePme.C.

friend class NodePmeMgr [friend]
 

Definition at line 279 of file ComputePme.C.


Member Data Documentation

CmiNodeLock ComputePmeMgr::fftw_plan_lock [static]
 

Definition at line 433 of file ComputePme.C.

Referenced by ComputePmeMgr(), initialize(), and ~ComputePmeMgr().


The documentation for this class was generated from the following file:
Generated on Sat May 25 04:07:25 2013 for NAMD by  doxygen 1.3.9.1