Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

ComputePmeMgr Class Reference

Inheritance diagram for ComputePmeMgr:

BOCclass List of all members.

Public Member Functions

 ComputePmeMgr ()
 ~ComputePmeMgr ()
void initialize (CkQdMsg *)
void initialize_pencils (CkQdMsg *)
void activate_pencils (CkQdMsg *)
void recvArrays (CProxy_PmeXPencil, CProxy_PmeYPencil, CProxy_PmeZPencil)
void sendGrid (void)
void recvGrid (PmeGridMsg *)
void gridCalc1 (void)
void sendTransBarrier (void)
void sendTrans (void)
void fwdSharedTrans (PmeTransMsg *)
void recvSharedTrans (PmeSharedTransMsg *)
void recvTrans (PmeTransMsg *)
void procTrans (PmeTransMsg *)
void gridCalc2 (void)
void gridCalc2R (void)
void fwdSharedUntrans (PmeUntransMsg *)
void recvSharedUntrans (PmeSharedUntransMsg *)
void sendUntrans (void)
void recvUntrans (PmeUntransMsg *)
void procUntrans (PmeUntransMsg *)
void gridCalc3 (void)
void sendUngrid (void)
void recvUngrid (PmeGridMsg *)
void recvAck (PmeAckMsg *)
void ungridCalc (void)
void setCompute (ComputePme *c)
int isPmeProcessor (int p)

Static Public Attributes

CmiNodeLock fftw_plan_lock

Friends

class ComputePme
class NodePmeMgr

Constructor & Destructor Documentation

ComputePmeMgr::ComputePmeMgr  ) 
 

Definition at line 527 of file ComputePme.C.

References fftw_plan_lock.

00527                              : pmeProxy(thisgroup), 
00528                                  pmeProxyDir(thisgroup), pmeCompute(0) {
00529 
00530   CkpvAccess(BOCclass_group).computePmeMgr = thisgroup;
00531   pmeNodeProxy = CkpvAccess(BOCclass_group).nodePmeMgr;
00532 
00533   pmeNodeProxy.ckLocalBranch()->initialize();
00534 
00535 #ifdef NAMD_FFTW
00536   if ( CmiMyRank() == 0 ) {
00537     fftw_plan_lock = CmiCreateLock();
00538   }
00539 #endif
00540 
00541   myKSpace = 0;
00542   kgrid = 0;
00543   work = 0;
00544   grid_count = 0;
00545   trans_count = 0;
00546   untrans_count = 0;
00547   ungrid_count = 0;
00548   gridmsg_reuse= new PmeGridMsg*[CkNumPes()];
00549   useBarrier = 0;
00550   sendTransBarrier_received = 0;
00551   usePencils = 0;
00552 }

ComputePmeMgr::~ComputePmeMgr  ) 
 

Definition at line 1506 of file ComputePme.C.

References fftw_plan_lock.

01506                               {
01507 
01508 #ifdef NAMD_FFTW
01509   if ( CmiMyRank() == 0 ) {
01510     CmiDestroyLock(fftw_plan_lock);
01511   }
01512 #endif
01513 
01514   delete myKSpace;
01515   delete [] localInfo;
01516   delete [] gridNodeInfo;
01517   delete [] transNodeInfo;
01518   delete [] gridPeMap;
01519   delete [] transPeMap;
01520   delete [] recipPeDest;
01521   delete [] gridPeOrder;
01522   delete [] gridNodeOrder;
01523   delete [] transNodeOrder;
01524   delete [] isPmeFlag;
01525   delete [] qgrid;
01526   if ( kgrid != qgrid ) delete [] kgrid;
01527   delete [] work;
01528   delete [] gridmsg_reuse;
01529 }


Member Function Documentation

void ComputePmeMgr::activate_pencils CkQdMsg *   ) 
 

Definition at line 1500 of file ComputePme.C.

01500                                                  {
01501   if ( ! usePencils ) return;
01502   if ( CkMyPe() == 0 ) zPencil.dummyRecvGrid(CkMyPe(),1);
01503 }

void ComputePmeMgr::fwdSharedTrans PmeTransMsg  ) 
 

Definition at line 1704 of file ComputePme.C.

References PmeSharedTransMsg::count, PmeSharedTransMsg::lock, PmeSharedTransMsg::msg, NodePmeInfo::npe, NodePmeInfo::pe_start, PME_TRANS_PRIORITY, PmeTransMsg::sequence, and SET_PRIORITY.

Referenced by NodePmeMgr::recvTrans(), and sendTrans().

01704                                                    {
01705   // CkPrintf("fwdSharedTrans on Pe(%d)\n",CkMyPe());
01706   int pe = transNodeInfo[myTransNode].pe_start;
01707   int npe = transNodeInfo[myTransNode].npe;
01708   CmiNodeLock lock = CmiCreateLock();
01709   int *count = new int; *count = npe;
01710   for (int i=0; i<npe; ++i, ++pe) {
01711     PmeSharedTransMsg *shmsg = new (PRIORITY_SIZE) PmeSharedTransMsg;
01712     SET_PRIORITY(shmsg,msg->sequence,PME_TRANS_PRIORITY)
01713     shmsg->msg = msg;
01714     shmsg->count = count;
01715     shmsg->lock = lock;
01716     pmeProxy[transPeMap[pe]].recvSharedTrans(shmsg);
01717   }
01718 }

void ComputePmeMgr::fwdSharedUntrans PmeUntransMsg  ) 
 

Definition at line 1928 of file ComputePme.C.

References PmeSharedUntransMsg::count, PmeSharedUntransMsg::lock, PmeSharedUntransMsg::msg, NodePmeInfo::npe, and NodePmeInfo::pe_start.

Referenced by NodePmeMgr::recvUntrans(), and sendUntrans().

01928                                                        {
01929   int pe = gridNodeInfo[myGridNode].pe_start;
01930   int npe = gridNodeInfo[myGridNode].npe;
01931   CmiNodeLock lock = CmiCreateLock();
01932   int *count = new int; *count = npe;
01933   for (int i=0; i<npe; ++i, ++pe) {
01934     PmeSharedUntransMsg *shmsg = new PmeSharedUntransMsg;
01935     shmsg->msg = msg;
01936     shmsg->count = count;
01937     shmsg->lock = lock;
01938     pmeProxy[gridPeMap[pe]].recvSharedUntrans(shmsg);
01939   }
01940 }

void ComputePmeMgr::gridCalc1 void   ) 
 

Definition at line 1614 of file ComputePme.C.

References PmeGrid::dim2, and PmeGrid::dim3.

01614                                   {
01615   // CkPrintf("gridCalc1 on Pe(%d)\n",CkMyPe());
01616 
01617 #ifdef NAMD_FFTW
01618   for ( int g=0; g<numGrids; ++g ) {
01619 #ifdef NAMD_FFTW_3
01620     fftwf_execute(forward_plan_yz[g]);
01621 #else
01622     rfftwnd_real_to_complex(forward_plan_yz, localInfo[myGridPe].nx,
01623         qgrid + qgrid_size * g, 1, myGrid.dim2 * myGrid.dim3, 0, 0, 0);
01624 #endif
01625 
01626   }
01627 #endif
01628 
01629   if ( ! useBarrier ) pmeProxyDir[CkMyPe()].sendTrans();
01630 }

void ComputePmeMgr::gridCalc2 void   ) 
 

Definition at line 1772 of file ComputePme.C.

References PmeGrid::dim3, gridCalc2R(), LocalPmeInfo::ny_after_transpose, and simParams.

01772                                   {
01773   // CkPrintf("gridCalc2 on Pe(%d)\n",CkMyPe());
01774 
01775 #if CMK_BLUEGENEL
01776   CmiNetworkProgressAfter (0);
01777 #endif
01778 
01779   int zdim = myGrid.dim3;
01780   // int y_start = localInfo[myTransPe].y_start_after_transpose;
01781   int ny = localInfo[myTransPe].ny_after_transpose;
01782 
01783   for ( int g=0; g<numGrids; ++g ) {
01784     // finish forward FFT (x dimension)
01785 #ifdef NAMD_FFTW
01786 #ifdef NAMD_FFTW_3
01787     fftwf_execute(forward_plan_x[g]);
01788 #else
01789     fftw(forward_plan_x, ny * zdim / 2, (fftw_complex *)(kgrid+qgrid_size*g),
01790         ny * zdim / 2, 1, work, 1, 0);
01791 #endif
01792 #endif
01793   }
01794 
01795 #ifdef OPENATOM_VERSION
01796     if ( ! simParams -> openatomOn ) { 
01797 #endif // OPENATOM_VERSION
01798       gridCalc2R();
01799 #ifdef OPENATOM_VERSION
01800     } else {
01801       gridCalc2Moa();
01802     }
01803 #endif // OPENATOM_VERSION
01804 }

void ComputePmeMgr::gridCalc2R void   ) 
 

Definition at line 1832 of file ComputePme.C.

References BigReal, PmeKSpace::compute_energy(), PmeGrid::dim3, and LocalPmeInfo::ny_after_transpose.

Referenced by gridCalc2().

01832                                    {
01833 
01834   int zdim = myGrid.dim3;
01835   // int y_start = localInfo[myTransPe].y_start_after_transpose;
01836   int ny = localInfo[myTransPe].ny_after_transpose;
01837 
01838   for ( int g=0; g<numGrids; ++g ) {
01839     // reciprocal space portion of PME
01840     BigReal ewaldcof = ComputeNonbondedUtil::ewaldcof;
01841     recip_evir2[g][0] = myKSpace->compute_energy(kgrid+qgrid_size*g,
01842                         lattice, ewaldcof, &(recip_evir2[g][1]));
01843     // CkPrintf("Ewald reciprocal energy = %f\n", recip_evir2[g][0]);
01844 
01845     // start backward FFT (x dimension)
01846 
01847 #ifdef NAMD_FFTW
01848 #ifdef NAMD_FFTW_3
01849     fftwf_execute(backward_plan_x[g]);
01850 #else
01851     fftw(backward_plan_x, ny * zdim / 2, (fftw_complex *)(kgrid+qgrid_size*g),
01852         ny * zdim / 2, 1, work, 1, 0);
01853 #endif
01854 #endif
01855   }
01856   
01857   pmeProxyDir[CkMyPe()].sendUntrans();
01858 }

void ComputePmeMgr::gridCalc3 void   ) 
 

Definition at line 2010 of file ComputePme.C.

References PmeGrid::dim2, and PmeGrid::dim3.

02010                                   {
02011   // CkPrintf("gridCalc3 on Pe(%d)\n",CkMyPe());
02012 
02013   // finish backward FFT
02014 #ifdef NAMD_FFTW
02015 
02016   for ( int g=0; g<numGrids; ++g ) {
02017 #ifdef NAMD_FFTW_3
02018     fftwf_execute(backward_plan_yz[g]);
02019 #else
02020     rfftwnd_complex_to_real(backward_plan_yz, localInfo[myGridPe].nx,
02021         (fftw_complex *) (qgrid + qgrid_size * g),
02022         1, myGrid.dim2 * myGrid.dim3 / 2, 0, 0, 0);
02023 #endif
02024   }
02025 
02026 #endif
02027 
02028   pmeProxyDir[CkMyPe()].sendUngrid();
02029 }

void ComputePmeMgr::initialize CkQdMsg *   ) 
 

Definition at line 617 of file ComputePme.C.

References Lattice::a(), Lattice::a_r(), ResizeArray< Elem >::add(), SimParameters::alchDecouple, SimParameters::alchElecLambdaStart, SimParameters::alchFepOn, SimParameters::alchThermIntOn, ResizeArray< Elem >::begin(), BigReal, PmeGrid::block1, PmeGrid::block2, PmeGrid::block3, SimParameters::cutoff, PmeGrid::dim2, PmeGrid::dim3, fftw_plan_lock, SimParameters::FFTWEstimate, SimParameters::FFTWPatient, generatePmePeList2(), PmePencilInitMsgData::grid, iINFO(), ResizeArray< Elem >::insert(), iout, j, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, SimParameters::lattice, SimParameters::lesFactor, SimParameters::lesOn, PatchMap::max_a(), PatchMap::min_a(), NAMD_bug(), NAMD_die(), PatchMap::node(), NodePmeInfo::npe, PatchMap::numNodesWithPatches(), PatchMap::numPatches(), PatchMap::numPatchesOnNode(), LocalPmeInfo::nx, LocalPmeInfo::ny_after_transpose, PatchMap::Object(), Node::Object(), PmeGrid::order, SimParameters::pairInteractionOn, SimParameters::pairInteractionSelf, SimParameters::patchDimension, NodePmeInfo::pe_start, pencilPMEProcessors, SimParameters::PMEBarrier, SimParameters::PMEGridSizeX, SimParameters::PMEGridSizeY, SimParameters::PMEGridSizeZ, SimParameters::PMEInterpOrder, SimParameters::PMEMinPoints, SimParameters::PMEMinSlices, PmePencilInitMsgData::pmeNodeProxy, SimParameters::PMEPencils, SimParameters::PMEProcessors, PmePencilInitMsgData::pmeProxy, NodePmeInfo::real_node, Random::reorder(), ResizeArray< Elem >::resize(), Node::simParameters, simParams, ResizeArray< Elem >::size(), SortableResizeArray< Elem >::sort(), Vector::unit(), LocalPmeInfo::x_start, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::xm, PmePencilInitMsgData::xPencil, LocalPmeInfo::y_start_after_transpose, PmePencilInitMsgData::yBlocks, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, PmePencilInitMsgData::zBlocks, PmePencilInitMsgData::zm, and PmePencilInitMsgData::zPencil.

00617                                            {
00618   delete msg;
00619 
00620   localInfo = new LocalPmeInfo[CkNumPes()];
00621   gridNodeInfo = new NodePmeInfo[CkNumNodes()];
00622   transNodeInfo = new NodePmeInfo[CkNumNodes()];
00623   gridPeMap = new int[CkNumPes()];
00624   transPeMap = new int[CkNumPes()];
00625   recipPeDest = new int[CkNumPes()];
00626   gridPeOrder = new int[CkNumPes()];
00627   gridNodeOrder = new int[CkNumNodes()];
00628   transNodeOrder = new int[CkNumNodes()];
00629   isPmeFlag = new char[CkNumPes()];  
00630 
00631   SimParameters *simParams = Node::Object()->simParameters;
00632   PatchMap *patchMap = PatchMap::Object();
00633 
00634   alchFepOn = simParams->alchFepOn;
00635   alchThermIntOn = simParams->alchThermIntOn;
00636   alchDecouple = (alchFepOn || alchThermIntOn) && (simParams->alchDecouple);
00637   alchElecLambdaStart = (alchFepOn || alchThermIntOn) ? 
00638     simParams->alchElecLambdaStart : 0;
00639   if (alchFepOn || alchThermIntOn) {
00640     numGrids = 2;
00641     if (alchDecouple) numGrids += 2;
00642     if (alchElecLambdaStart || alchThermIntOn) numGrids ++;
00643   }
00644   else numGrids = 1;
00645   lesOn = simParams->lesOn;
00646   useBarrier = simParams->PMEBarrier;
00647   if ( lesOn ) {
00648     lesFactor = simParams->lesFactor;
00649     numGrids = lesFactor;
00650   }
00651   selfOn = 0;
00652   pairOn = simParams->pairInteractionOn;
00653   if ( pairOn ) {
00654     selfOn = simParams->pairInteractionSelf;
00655     if ( selfOn ) pairOn = 0;  // make pairOn and selfOn exclusive
00656     numGrids = selfOn ? 1 : 3;
00657   }
00658 
00659   if ( numGrids != 1 || simParams->PMEPencils == 0 ) usePencils = 0;
00660   else if ( simParams->PMEPencils > 0 ) usePencils = 1;
00661   else {
00662     int nrps = simParams->PMEProcessors;
00663     if ( nrps <= 0 ) nrps = CkNumPes();
00664     if ( nrps > CkNumPes() ) nrps = CkNumPes();
00665     int dimx = simParams->PMEGridSizeX;
00666     int dimy = simParams->PMEGridSizeY;
00667     int maxslabs = 1 + (dimx - 1) / simParams->PMEMinSlices;
00668     if ( maxslabs > nrps ) maxslabs = nrps;
00669     int maxpencils = ( simParams->PMEGridSizeX * simParams->PMEGridSizeY
00670                 * simParams->PMEGridSizeZ ) / simParams->PMEMinPoints;
00671     if ( maxpencils > nrps ) maxpencils = nrps;
00672     if ( maxpencils > 3 * maxslabs ) usePencils = 1;
00673     else usePencils = 0;
00674   }
00675 
00676   if ( usePencils ) {
00677     int nrps = simParams->PMEProcessors;
00678     if ( nrps <= 0 ) nrps = CkNumPes();
00679     if ( nrps > CkNumPes() ) nrps = CkNumPes();
00680     if ( simParams->PMEPencils > 1 &&
00681          simParams->PMEPencils * simParams->PMEPencils <= nrps ) {
00682       xBlocks = yBlocks = zBlocks = simParams->PMEPencils;
00683     } else {
00684       int nb2 = ( simParams->PMEGridSizeX * simParams->PMEGridSizeY
00685                 * simParams->PMEGridSizeZ ) / simParams->PMEMinPoints;
00686       if ( nb2 > nrps ) nb2 = nrps;
00687       int nb = (int) sqrt((float)nb2);
00688       xBlocks = zBlocks = nb;
00689       yBlocks = nb2 / nb;
00690     }
00691 
00692     int dimx = simParams->PMEGridSizeX;
00693     int bx = 1 + ( dimx - 1 ) / xBlocks;
00694     xBlocks = 1 + ( dimx - 1 ) / bx;
00695 
00696     int dimy = simParams->PMEGridSizeY;
00697     int by = 1 + ( dimy - 1 ) / yBlocks;
00698     yBlocks = 1 + ( dimy - 1 ) / by;
00699 
00700     int dimz = simParams->PMEGridSizeZ / 2 + 1;  // complex
00701     int bz = 1 + ( dimz - 1 ) / zBlocks;
00702     zBlocks = 1 + ( dimz - 1 ) / bz;
00703 
00704     if ( ! CkMyPe() ) {
00705       iout << iINFO << "PME using " << xBlocks << " x " <<
00706         yBlocks << " x " << zBlocks <<
00707         " pencil grid for FFT and reciprocal sum.\n" << endi;
00708     }
00709   } else { // usePencils
00710 
00711   {  // decide how many pes to use for reciprocal sum
00712 
00713     // rules based on work available
00714     int minslices = simParams->PMEMinSlices;
00715     int dimx = simParams->PMEGridSizeX;
00716     int nrpx = ( dimx + minslices - 1 ) / minslices;
00717     int dimy = simParams->PMEGridSizeY;
00718     int nrpy = ( dimy + minslices - 1 ) / minslices;
00719 
00720     // rules based on processors available
00721     int nrpp = CkNumPes();
00722     // if ( nrpp > 32 ) nrpp = 32;  // cap to limit messages
00723     if ( nrpp < nrpx ) nrpx = nrpp;
00724     if ( nrpp < nrpy ) nrpy = nrpp;
00725 
00726     // user override
00727     int nrps = simParams->PMEProcessors;
00728     if ( nrps > CkNumPes() ) nrps = CkNumPes();
00729     if ( nrps > 0 ) nrpx = nrps;
00730     if ( nrps > 0 ) nrpy = nrps;
00731 
00732     // make sure there aren't any totally empty processors
00733     int bx = ( dimx + nrpx - 1 ) / nrpx;
00734     nrpx = ( dimx + bx - 1 ) / bx;
00735     int by = ( dimy + nrpy - 1 ) / nrpy;
00736     nrpy = ( dimy + by - 1 ) / by;
00737     if ( bx != ( dimx + nrpx - 1 ) / nrpx )
00738       NAMD_bug("Error in selecting number of PME processors.");
00739     if ( by != ( dimy + nrpy - 1 ) / nrpy )
00740       NAMD_bug("Error in selecting number of PME processors.");
00741 
00742     numGridPes = nrpx;
00743     numTransPes = nrpy;
00744   }
00745   if ( ! CkMyPe() ) {
00746     iout << iINFO << "PME using " << numGridPes << " and " << numTransPes <<
00747       " processors for FFT and reciprocal sum.\n" << endi;
00748   }
00749 
00750   int sum_npes = numTransPes + numGridPes;
00751   int max_npes = (numTransPes > numGridPes)?numTransPes:numGridPes;
00752 
00753 #if 0 // USE_TOPOMAP
00754   /* This code is being disabled permanently for slab PME on Blue Gene machines */
00755   PatchMap * pmap = PatchMap::Object();
00756   
00757   int patch_pes = pmap->numNodesWithPatches();
00758   TopoManager tmgr;
00759   if(tmgr.hasMultipleProcsPerNode())
00760     patch_pes *= 2;
00761 
00762   bool done = false;
00763   if(CkNumPes() > 2*sum_npes + patch_pes) {    
00764     done = generateBGLORBPmePeList(transPeMap, numTransPes);
00765     done &= generateBGLORBPmePeList(gridPeMap, numGridPes, transPeMap, numTransPes);    
00766   }
00767   else 
00768     if(CkNumPes() > 2 *max_npes + patch_pes) {
00769       done = generateBGLORBPmePeList(transPeMap, max_npes);
00770       gridPeMap = transPeMap;
00771     }
00772 
00773   if (!done)
00774 #endif
00775     {
00776       //generatePmePeList(transPeMap, max_npes);
00777       //gridPeMap = transPeMap;
00778       generatePmePeList2(gridPeMap, numGridPes, transPeMap, numTransPes);
00779     }
00780   
00781   myGridPe = -1;
00782   myGridNode = -1;
00783   int i = 0;
00784   for ( i=0; i<CkNumPes(); ++i )
00785     isPmeFlag[i] = 0;
00786   int node = -1;
00787   int real_node = -1;
00788   for ( i=0; i<numGridPes; ++i ) {
00789     if ( gridPeMap[i] == CkMyPe() ) myGridPe = i;
00790     isPmeFlag[gridPeMap[i]] |= 1;
00791     int real_node_i = CkNodeOf(gridPeMap[i]);
00792     if ( real_node_i == real_node ) {
00793       gridNodeInfo[node].npe += 1;
00794     } else {
00795       real_node = real_node_i;
00796       ++node;
00797       gridNodeInfo[node].real_node = real_node;
00798       gridNodeInfo[node].pe_start = i;
00799       gridNodeInfo[node].npe = 1;
00800     }
00801     if ( CkMyNode() == real_node_i ) myGridNode = node;
00802   }
00803   numGridNodes = node + 1;
00804   myTransPe = -1;
00805   myTransNode = -1;
00806   node = -1;
00807   real_node = -1;
00808   for ( i=0; i<numTransPes; ++i ) {
00809     if ( transPeMap[i] == CkMyPe() ) myTransPe = i;
00810     isPmeFlag[transPeMap[i]] |= 2;
00811     int real_node_i = CkNodeOf(transPeMap[i]);
00812     if ( real_node_i == real_node ) {
00813       transNodeInfo[node].npe += 1;
00814     } else {
00815       real_node = real_node_i;
00816       ++node;
00817       transNodeInfo[node].real_node = real_node;
00818       transNodeInfo[node].pe_start = i;
00819       transNodeInfo[node].npe = 1;
00820     }
00821     if ( CkMyNode() == real_node_i ) myTransNode = node;
00822   }
00823   numTransNodes = node + 1;
00824 
00825   if ( ! CkMyPe() ) {
00826     iout << iINFO << "PME USING " << numGridNodes << " GRID NODES AND "
00827          << numTransNodes << " TRANS NODES\n" << endi;
00828     iout << iINFO << "PME GRID LOCATIONS:";
00829     int i;
00830     for ( i=0; i<numGridPes && i<10; ++i ) {
00831       iout << " " << gridPeMap[i];
00832     }
00833     if ( i < numGridPes ) iout << " ...";
00834     iout << "\n" << endi;
00835     iout << iINFO << "PME TRANS LOCATIONS:";
00836     for ( i=0; i<numTransPes && i<10; ++i ) {
00837       iout << " " << transPeMap[i];
00838     }
00839     if ( i < numTransPes ) iout << " ...";
00840     iout << "\n" << endi;
00841   }
00842 
00843   { // generate random orderings for grid and trans messages
00844     int i;
00845     for ( i = 0; i < numGridPes; ++i ) {
00846       gridPeOrder[i] = i;
00847     }
00848     Random rand(CkMyPe());
00849     if ( myGridPe < 0 ) {
00850       rand.reorder(gridPeOrder,numGridPes);
00851     } else {  // self last
00852       gridPeOrder[myGridPe] = numGridPes-1;
00853       gridPeOrder[numGridPes-1] = myGridPe;
00854       rand.reorder(gridPeOrder,numGridPes-1);
00855     } 
00856     for ( i = 0; i < numGridNodes; ++i ) {
00857       gridNodeOrder[i] = i;
00858     }
00859     if ( myGridNode < 0 ) {
00860       rand.reorder(gridNodeOrder,numGridNodes);
00861     } else {  // self last
00862       gridNodeOrder[myGridNode] = numGridNodes-1;
00863       gridNodeOrder[numGridNodes-1] = myGridNode;
00864       rand.reorder(gridNodeOrder,numGridNodes-1);
00865     }
00866     for ( i = 0; i < numTransNodes; ++i ) {
00867       transNodeOrder[i] = i;
00868     }
00869     if ( myTransNode < 0 ) {
00870       rand.reorder(transNodeOrder,numTransNodes);
00871     } else {  // self last
00872       transNodeOrder[myTransNode] = numTransNodes-1;
00873       transNodeOrder[numTransNodes-1] = myTransNode;
00874       rand.reorder(transNodeOrder,numTransNodes-1);
00875     }
00876   }
00877   
00878   } // ! usePencils
00879 
00880   myGrid.K1 = simParams->PMEGridSizeX;
00881   myGrid.K2 = simParams->PMEGridSizeY;
00882   myGrid.K3 = simParams->PMEGridSizeZ;
00883   myGrid.order = simParams->PMEInterpOrder;
00884   myGrid.dim2 = myGrid.K2;
00885   myGrid.dim3 = 2 * (myGrid.K3/2 + 1);
00886 
00887   if ( ! usePencils ) {
00888     myGrid.block1 = ( myGrid.K1 + numGridPes - 1 ) / numGridPes;
00889     myGrid.block2 = ( myGrid.K2 + numTransPes - 1 ) / numTransPes;
00890     myGrid.block3 = myGrid.dim3 / 2;  // complex
00891   }
00892 
00893   if ( usePencils ) {
00894     myGrid.block1 = ( myGrid.K1 + xBlocks - 1 ) / xBlocks;
00895     myGrid.block2 = ( myGrid.K2 + yBlocks - 1 ) / yBlocks;
00896     myGrid.block3 = ( myGrid.K3/2 + 1 + zBlocks - 1 ) / zBlocks;  // complex
00897 
00898 
00899       int pe = 0;
00900       int x,y,z;
00901 
00902                 SortableResizeArray<int> zprocs(xBlocks*yBlocks);
00903                 SortableResizeArray<int> yprocs(xBlocks*zBlocks);
00904                 SortableResizeArray<int> xprocs(yBlocks*zBlocks);
00905                 {
00906                 int basepe = 0;  int npe = CkNumPes();
00907                         if ( npe > xBlocks*yBlocks &&
00908                                 npe > xBlocks*zBlocks &&
00909                                 npe > yBlocks*zBlocks ) {
00910                         // avoid node 0
00911                         ++basepe;
00912                         --npe;
00913                 }
00914 
00915       
00916                 // decide which pes to use by bit reversal and patch use
00917                 int i;
00918                 int ncpus = CkNumPes();
00919   
00920                 // find next highest power of two
00921                 int npow2 = 1;  int nbits = 0;
00922                 while ( npow2 < ncpus ) { npow2 *= 2; nbits += 1; }
00923   
00924                 // build bit reversal sequence
00925                 SortableResizeArray<int> patches, nopatches, pmeprocs;
00926                 PatchMap *pmap = PatchMap::Object();
00927                 i = 0;
00928                 for ( int icpu=0; icpu<ncpus; ++icpu ) {
00929                         int ri;
00930                         for ( ri = ncpus; ri >= ncpus; ++i ) {
00931                                 ri = 0;
00932                                 int pow2 = 1;
00933                                 int rpow2 = npow2 / 2;
00934                                 for ( int j=0; j<nbits; ++j ) {
00935                                 ri += rpow2 * ( ( i / pow2 ) % 2 );
00936                                 pow2 *= 2;  rpow2 /= 2;
00937                                 }
00938                         }
00939                         // seq[icpu] = ri;
00940                         if ( ri ) { // keep 0 for special case
00941                                 if ( pmap->numPatchesOnNode(ri) ) patches.add(ri);
00942                                 else nopatches.add(ri);
00943                         }
00944                 }
00945 
00946 #if USE_RANDOM_TOPO
00947             Random rand(CkMyPe());
00948             int *tmp = new int[patches.size()];
00949             int nn = patches.size();
00950             for (i=0;i<nn;i++)  tmp[i] = patches[i];
00951             rand.reorder(tmp, nn);
00952             patches.resize(0);
00953             for (i=0;i<nn;i++)  patches.add(tmp[i]);
00954             delete [] tmp;
00955             tmp = new int[nopatches.size()];
00956             nn = nopatches.size();
00957             for (i=0;i<nn;i++)  tmp[i] = nopatches[i];
00958             rand.reorder(tmp, nn);
00959             nopatches.resize(0);
00960             for (i=0;i<nn;i++)  nopatches.add(tmp[i]);
00961             delete [] tmp;
00962 #endif
00963 
00964                 // only use zero if it eliminates overloading or has patches
00965                 int useZero = 0;
00966                 int npens = xBlocks*yBlocks;
00967                 if ( npens % ncpus == 0 ) useZero = 1;
00968                 if ( npens == nopatches.size() + 1 ) useZero = 1;
00969                 npens += xBlocks*zBlocks;
00970                 if ( npens % ncpus == 0 ) useZero = 1;
00971                 if ( npens == nopatches.size() + 1 ) useZero = 1;
00972                 npens += yBlocks*zBlocks;
00973                 if ( npens % ncpus == 0 ) useZero = 1;
00974                 if ( npens == nopatches.size() + 1 ) useZero = 1;
00975 
00976                 // add nopatches then patches in reversed order
00977                 for ( i=nopatches.size()-1; i>=0; --i ) pmeprocs.add(nopatches[i]);
00978                 if ( useZero && ! pmap->numPatchesOnNode(0) ) pmeprocs.add(0);
00979                 for ( i=patches.size()-1; i>=0; --i ) pmeprocs.add(patches[i]);
00980                 if ( pmap->numPatchesOnNode(0) ) pmeprocs.add(0);
00981   
00982                 int npes = pmeprocs.size();
00983                 for ( i=0; i<xBlocks*yBlocks; ++i, ++pe ) zprocs[i] = pmeprocs[pe%npes];
00984                 if ( i>1 && zprocs[0] == zprocs[i-1] ) zprocs[0] = 0;
00985 #if !USE_RANDOM_TOPO
00986                 zprocs.sort();
00987 #endif
00988                 for ( i=0; i<xBlocks*zBlocks; ++i, ++pe ) yprocs[i] = pmeprocs[pe%npes];
00989                 if ( i>1 && yprocs[0] == yprocs[i-1] ) yprocs[0] = 0;
00990 #if !USE_RANDOM_TOPO
00991                 yprocs.sort();
00992 #endif
00993       for ( i=0; i<yBlocks*zBlocks; ++i, ++pe ) xprocs[i] = pmeprocs[pe%npes];
00994       if ( i>1 && xprocs[0] == xprocs[i-1] ) xprocs[0] = 0;
00995 #if !USE_RANDOM_TOPO
00996       xprocs.sort();
00997 #endif
00998 
00999 #if USE_TOPO_SFC
01000   CmiLock(tmgr_lock);
01001   //{
01002   TopoManager tmgr;
01003   int xdim = tmgr.getDimNX();
01004   int ydim = tmgr.getDimNY();
01005   int zdim = tmgr.getDimNZ();
01006   int xdim1 = find_level_grid(xdim);
01007   int ydim1 = find_level_grid(ydim);
01008   int zdim1 = find_level_grid(zdim);
01009   if(CkMyPe() == 0)
01010       printf("xdim: %d %d %d, %d %d %d\n", xdim, ydim, zdim, xdim1, ydim1, zdim1);
01011 
01012   vector<Coord> result;
01013   SFC_grid(xdim, ydim, zdim, xdim1, ydim1, zdim1, result);
01014   sort_sfc(xprocs, tmgr, result);
01015   sort_sfc(yprocs, tmgr, result);
01016   sort_sfc(zprocs, tmgr, result);
01017   //}
01018   CmiUnlock(tmgr_lock);
01019 #endif
01020 
01021       pencilPMEProcessors = new char [CkNumPes()];
01022       memset (pencilPMEProcessors, 0, sizeof(char) * CkNumPes());
01023 
01024 
01025                 if(CkMyPe() == 0){  
01026               iout << iINFO << "PME Z PENCIL LOCATIONS:";
01027           for ( i=0; i<zprocs.size() && i<10; ++i ) {
01028 #if USE_TOPO_SFC
01029               int x,y,z,t;
01030               tmgr.rankToCoordinates(zprocs[i], x,y, z, t);
01031               iout << " " << zprocs[i] << "(" << x << " " << y << " " << z << ")";
01032 #else
01033               iout << " " << zprocs[i];
01034 #endif
01035           }
01036           if ( i < zprocs.size() ) iout << " ...";
01037               iout << "\n" << endi;
01038                 }
01039 
01040       for (pe=0, x = 0; x < xBlocks; ++x)
01041         for (y = 0; y < yBlocks; ++y, ++pe ) {
01042           pencilPMEProcessors[zprocs[pe]] = 1;
01043         }
01044      
01045                 if(CkMyPe() == 0){  
01046               iout << iINFO << "PME Y PENCIL LOCATIONS:";
01047           for ( i=0; i<yprocs.size() && i<10; ++i ) {
01048 #if USE_TOPO_SFC
01049               int x,y,z,t;
01050               tmgr.rankToCoordinates(yprocs[i], x,y, z, t);
01051               iout << " " << yprocs[i] << "(" << x << " " << y << " " << z << ")";
01052 #else
01053               iout << " " << yprocs[i];
01054 #endif
01055           }
01056           if ( i < yprocs.size() ) iout << " ...";
01057               iout << "\n" << endi;
01058                 }
01059 
01060       for (pe=0, z = 0; z < zBlocks; ++z )
01061         for (x = 0; x < xBlocks; ++x, ++pe ) {
01062           pencilPMEProcessors[yprocs[pe]] = 1;
01063         }
01064     
01065                 if(CkMyPe() == 0){  
01066                 iout << iINFO << "PME X PENCIL LOCATIONS:";
01067                     for ( i=0; i<xprocs.size() && i<10; ++i ) {
01068 #if USE_TOPO_SFC
01069                 int x,y,z,t;
01070                 tmgr.rankToCoordinates(xprocs[i], x,y, z, t);
01071                 iout << " " << xprocs[i] << "(" << x << "  " << y << " " << z << ")";
01072 #else
01073                 iout << " " << xprocs[i];
01074 #endif
01075             }
01076                 if ( i < xprocs.size() ) iout << " ...";
01077                 iout << "\n" << endi;
01078                 }
01079 
01080       for (pe=0, y = 0; y < yBlocks; ++y )      
01081         for (z = 0; z < zBlocks; ++z, ++pe ) {
01082           pencilPMEProcessors[xprocs[pe]] = 1;
01083         }
01084         
01085     }
01086 
01087 
01088         // creating the pencil arrays
01089         if ( CkMyPe() == 0 ){
01090 #if 1
01091         CProxy_PmePencilMap zm = CProxy_PmePencilMap::ckNew(0,1,yBlocks,xBlocks*yBlocks,zprocs.begin());
01092         CProxy_PmePencilMap ym = CProxy_PmePencilMap::ckNew(2,0,xBlocks,zBlocks*xBlocks,yprocs.begin());
01093         CProxy_PmePencilMap xm = CProxy_PmePencilMap::ckNew(1,2,zBlocks,yBlocks*zBlocks,xprocs.begin());
01094         pmeNodeProxy.recvPencilMapProxies(xm,ym,zm);
01095         CkArrayOptions zo(xBlocks,yBlocks,1);  zo.setMap(zm);
01096         CkArrayOptions yo(xBlocks,1,zBlocks);  yo.setMap(ym);
01097         CkArrayOptions xo(1,yBlocks,zBlocks);  xo.setMap(xm);
01098 #if CHARM_VERSION > 60301
01099         zo.setAnytimeMigration(false);  zo.setStaticInsertion(true);
01100         yo.setAnytimeMigration(false);  yo.setStaticInsertion(true);
01101         xo.setAnytimeMigration(false);  xo.setStaticInsertion(true);
01102 #endif
01103         zPencil = CProxy_PmeZPencil::ckNew(zo);  // (xBlocks,yBlocks,1);
01104         yPencil = CProxy_PmeYPencil::ckNew(yo);  // (xBlocks,1,zBlocks);
01105         xPencil = CProxy_PmeXPencil::ckNew(xo);  // (1,yBlocks,zBlocks);
01106 #else
01107         zPencil = CProxy_PmeZPencil::ckNew();  // (xBlocks,yBlocks,1);
01108         yPencil = CProxy_PmeYPencil::ckNew();  // (xBlocks,1,zBlocks);
01109         xPencil = CProxy_PmeXPencil::ckNew();  // (1,yBlocks,zBlocks);
01110 
01111                 for (pe=0, x = 0; x < xBlocks; ++x)
01112                         for (y = 0; y < yBlocks; ++y, ++pe ) {
01113                                 zPencil(x,y,0).insert(zprocs[pe]);
01114                         }
01115         zPencil.doneInserting();
01116 
01117                 for (pe=0, z = 0; z < zBlocks; ++z )
01118                         for (x = 0; x < xBlocks; ++x, ++pe ) {
01119                                 yPencil(x,0,z).insert(yprocs[pe]);
01120                         }
01121         yPencil.doneInserting();
01122 
01123 
01124                 for (pe=0, y = 0; y < yBlocks; ++y )    
01125                         for (z = 0; z < zBlocks; ++z, ++pe ) {
01126                                 xPencil(0,y,z).insert(xprocs[pe]);
01127                         }
01128                 xPencil.doneInserting();     
01129 #endif
01130 
01131                 pmeProxy.recvArrays(xPencil,yPencil,zPencil);
01132                 PmePencilInitMsgData msgdata;
01133                 msgdata.grid = myGrid;
01134                 msgdata.xBlocks = xBlocks;
01135                 msgdata.yBlocks = yBlocks;
01136                 msgdata.zBlocks = zBlocks;
01137                 msgdata.xPencil = xPencil;
01138                 msgdata.yPencil = yPencil;
01139                 msgdata.zPencil = zPencil;
01140                 msgdata.pmeProxy = pmeProxyDir;
01141         msgdata.pmeNodeProxy = pmeNodeProxy;
01142         msgdata.xm = xm;
01143         msgdata.ym = ym;
01144         msgdata.zm = zm;
01145                 xPencil.init(new PmePencilInitMsg(msgdata));
01146                 yPencil.init(new PmePencilInitMsg(msgdata));
01147                 zPencil.init(new PmePencilInitMsg(msgdata));
01148         }
01149 
01150     return;  // continue in initialize_pencils() at next startup stage
01151   }
01152 
01153 
01154   int pe;
01155   int nx = 0;
01156   for ( pe = 0; pe < numGridPes; ++pe ) {
01157     localInfo[pe].x_start = nx;
01158     nx += myGrid.block1;
01159     if ( nx > myGrid.K1 ) nx = myGrid.K1;
01160     localInfo[pe].nx = nx - localInfo[pe].x_start;
01161   }
01162   int ny = 0;
01163   for ( pe = 0; pe < numTransPes; ++pe ) {
01164     localInfo[pe].y_start_after_transpose = ny;
01165     ny += myGrid.block2;
01166     if ( ny > myGrid.K2 ) ny = myGrid.K2;
01167     localInfo[pe].ny_after_transpose =
01168                         ny - localInfo[pe].y_start_after_transpose;
01169   }
01170 
01171   {  // decide how many pes this node exchanges charges with
01172 
01173   PatchMap *patchMap = PatchMap::Object();
01174   Lattice lattice = simParams->lattice;
01175   BigReal sysdima = lattice.a_r().unit() * lattice.a();
01176   BigReal cutoff = simParams->cutoff;
01177   BigReal patchdim = simParams->patchDimension;
01178   int numPatches = patchMap->numPatches();
01179   int numNodes = CkNumPes();
01180   int *source_flags = new int[numNodes];
01181   int node;
01182   for ( node=0; node<numNodes; ++node ) {
01183     source_flags[node] = 0;
01184     recipPeDest[node] = 0;
01185   }
01186 
01187   // // make sure that we don't get ahead of ourselves on this node
01188   // if ( CkMyPe() < numPatches && myRecipPe >= 0 ) {
01189   //   source_flags[CkMyPe()] = 1;
01190   //   recipPeDest[myRecipPe] = 1;
01191   // }
01192 
01193   for ( int pid=0; pid < numPatches; ++pid ) {
01194     int pnode = patchMap->node(pid);
01195     BigReal minx = patchMap->min_a(pid);
01196     BigReal maxx = patchMap->max_a(pid);
01197     BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
01198     // min1 (max1) is smallest (largest) grid line for this patch
01199     int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
01200     int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
01201     for ( int i=min1; i<=max1; ++i ) {
01202       int ix = i;
01203       while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
01204       while ( ix < 0 ) ix += myGrid.K1;
01205       // set source_flags[pnode] if this patch sends to our node
01206       if ( myGridPe >= 0 && ix >= localInfo[myGridPe].x_start &&
01207            ix < localInfo[myGridPe].x_start + localInfo[myGridPe].nx ) {
01208         source_flags[pnode] = 1;
01209       }
01210       // set dest_flags[] for node that our patch sends to
01211       if ( pnode == CkMyPe() ) {
01212         recipPeDest[ix / myGrid.block1] = 1;
01213       }
01214     }
01215   }
01216 
01217   numSources = 0;
01218   numDestRecipPes = 0;
01219   for ( node=0; node<numNodes; ++node ) {
01220     if ( source_flags[node] ) ++numSources;
01221     if ( recipPeDest[node] ) ++numDestRecipPes;
01222   }
01223 
01224 #if 0
01225   if ( numSources ) {
01226     iout << iINFO << "PME " << CkMyPe() << " sources:";
01227     for ( node=0; node<numNodes; ++node ) {
01228       if ( source_flags[node] ) iout << " " << node;
01229     }
01230     iout << "\n" << endi;
01231   }
01232 #endif
01233 
01234   delete [] source_flags;
01235 
01236   // CkPrintf("PME on node %d has %d sources and %d destinations\n",
01237   //           CkMyPe(), numSources, numDestRecipPes);
01238 
01239   }  // decide how many pes this node exchanges charges with (end)
01240 
01241   ungrid_count = numDestRecipPes;
01242 
01243   sendTransBarrier_received = 0;
01244 
01245   if ( myGridPe < 0 && myTransPe < 0 ) return;
01246   // the following only for nodes doing reciprocal sum
01247 
01248   if ( myTransPe >= 0 ) {
01249       int k2_start = localInfo[myTransPe].y_start_after_transpose;
01250       int k2_end = k2_start + localInfo[myTransPe].ny_after_transpose;
01251       #ifdef OPENATOM_VERSION
01252       if ( simParams->openatomOn ) { 
01253         CProxy_ComputeMoaMgr moaProxy(CkpvAccess(BOCclass_group).computeMoaMgr);
01254         myKSpace = new PmeKSpace(myGrid, k2_start, k2_end, 0, myGrid.dim3/2, moaProxy);
01255       } else {
01256         myKSpace = new PmeKSpace(myGrid, k2_start, k2_end, 0, myGrid.dim3/2);
01257       }
01258       #else  // OPENATOM_VERSION
01259       myKSpace = new PmeKSpace(myGrid, k2_start, k2_end, 0, myGrid.dim3/2);
01260       #endif // OPENATOM_VERSION
01261   }
01262 
01263   int local_size = myGrid.block1 * myGrid.K2 * myGrid.dim3;
01264   int local_size_2 = myGrid.block2 * myGrid.K1 * myGrid.dim3;
01265   if ( local_size < local_size_2 ) local_size = local_size_2;
01266   qgrid = new float[local_size*numGrids];
01267   if ( numGridPes > 1 || numTransPes > 1 ) {
01268     kgrid = new float[local_size*numGrids];
01269   } else {
01270     kgrid = qgrid;
01271   }
01272   qgrid_size = local_size;
01273 
01274   if ( myGridPe >= 0 ) {
01275   qgrid_start = localInfo[myGridPe].x_start * myGrid.K2 * myGrid.dim3;
01276   qgrid_len = localInfo[myGridPe].nx * myGrid.K2 * myGrid.dim3;
01277   fgrid_start = localInfo[myGridPe].x_start * myGrid.K2;
01278   fgrid_len = localInfo[myGridPe].nx * myGrid.K2;
01279   }
01280 
01281   int n[3]; n[0] = myGrid.K1; n[1] = myGrid.K2; n[2] = myGrid.K3;
01282 #ifdef NAMD_FFTW
01283   CmiLock(fftw_plan_lock);
01284 #ifdef NAMD_FFTW_3
01285   work = new fftwf_complex[n[0]];
01286   int fftwFlags = simParams->FFTWPatient ? FFTW_PATIENT  : simParams->FFTWEstimate ? FFTW_ESTIMATE  : FFTW_MEASURE ;
01287   if ( myGridPe >= 0 ) {
01288     forward_plan_yz=new fftwf_plan[numGrids];
01289     backward_plan_yz=new fftwf_plan[numGrids];
01290   }
01291   if ( myTransPe >= 0 ) {
01292     forward_plan_x=new fftwf_plan[numGrids];
01293     backward_plan_x=new fftwf_plan[numGrids];
01294   }
01295   /* need one plan per grid */
01296   if ( ! CkMyPe() ) iout << iINFO << "Optimizing 4 FFT steps.  1..." << endi;
01297   if ( myGridPe >= 0 ) {
01298     for( int g=0; g<numGrids; g++)
01299       {
01300         forward_plan_yz[g] = fftwf_plan_many_dft_r2c(2, n+1, 
01301                                                      localInfo[myGridPe].nx,
01302                                                      qgrid + qgrid_size * g,
01303                                                      NULL,
01304                                                      1,
01305                                                      myGrid.dim2 * myGrid.dim3,
01306                                                      (fftwf_complex *) 
01307                                                      qgrid + qgrid_size * g,
01308                                                      NULL,
01309                                                      1,
01310                                                      myGrid.dim2 * (myGrid.dim3/2),
01311                                                      fftwFlags);
01312       }
01313   }
01314   int zdim = myGrid.dim3;
01315   int xStride=localInfo[myTransPe].ny_after_transpose *( myGrid.dim3 / 2);
01316   if ( ! CkMyPe() ) iout << " 2..." << endi;
01317   if ( myTransPe >= 0 ) {
01318     for( int g=0; g<numGrids; g++)
01319       {
01320 
01321         forward_plan_x[g] = fftwf_plan_many_dft(1, n, xStride,
01322                                                 (fftwf_complex *)
01323                                                 (kgrid+qgrid_size*g),
01324                                                 NULL,
01325                                                 xStride,
01326                                                 1,
01327                                                 (fftwf_complex *)
01328                                                 (kgrid+qgrid_size*g),
01329                                                 NULL,
01330                                                 xStride,
01331                                                 1,
01332                                                 FFTW_FORWARD,fftwFlags);
01333         
01334       }
01335   }
01336   if ( ! CkMyPe() ) iout << " 3..." << endi;
01337   if ( myTransPe >= 0 ) {
01338     for( int g=0; g<numGrids; g++)
01339       {
01340         backward_plan_x[g] = fftwf_plan_many_dft(1, n, xStride,
01341                                                  (fftwf_complex *)
01342                                                  (kgrid+qgrid_size*g),
01343                                                  NULL,
01344                                                  xStride,
01345                                                  1,
01346                                                  (fftwf_complex *)
01347                                                  (kgrid+qgrid_size*g),
01348                                                  NULL,
01349                                                  xStride,
01350                                                  1,
01351                                                  FFTW_BACKWARD, fftwFlags);
01352 
01353       }
01354   }
01355   if ( ! CkMyPe() ) iout << " 4..." << endi;
01356   if ( myGridPe >= 0 ) {
01357     for( int g=0; g<numGrids; g++)
01358       {
01359         backward_plan_yz[g] = fftwf_plan_many_dft_c2r(2, n+1, 
01360                                                       localInfo[myGridPe].nx,
01361                                                       (fftwf_complex *)
01362                                                       qgrid + qgrid_size * g,
01363                                                       NULL,
01364                                                       1,
01365                                                       myGrid.dim2*(myGrid.dim3/2),
01366                                                       qgrid + qgrid_size * g,
01367                                                       NULL,
01368                                                       1,
01369                                                       myGrid.dim2 * myGrid.dim3,
01370                                                       fftwFlags);
01371       }
01372   }
01373   if ( ! CkMyPe() ) iout << "   Done.\n" << endi;
01374 
01375 #else
01376   work = new fftw_complex[n[0]];
01377 
01378   if ( ! CkMyPe() ) iout << iINFO << "Optimizing 4 FFT steps.  1..." << endi;
01379   if ( myGridPe >= 0 ) {
01380   forward_plan_yz = rfftwnd_create_plan_specific(2, n+1, FFTW_REAL_TO_COMPLEX,
01381         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
01382         | FFTW_IN_PLACE | FFTW_USE_WISDOM, qgrid, 1, 0, 0);
01383   }
01384   if ( ! CkMyPe() ) iout << " 2..." << endi;
01385   if ( myTransPe >= 0 ) {
01386       forward_plan_x = fftw_create_plan_specific(n[0], FFTW_FORWARD,
01387         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
01388         | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) kgrid,
01389         localInfo[myTransPe].ny_after_transpose * myGrid.dim3 / 2, work, 1);
01390   }
01391   if ( ! CkMyPe() ) iout << " 3..." << endi;
01392   if ( myTransPe >= 0 ) {
01393   backward_plan_x = fftw_create_plan_specific(n[0], FFTW_BACKWARD,
01394         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
01395         | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) kgrid,
01396         localInfo[myTransPe].ny_after_transpose * myGrid.dim3 / 2, work, 1);
01397   }
01398   if ( ! CkMyPe() ) iout << " 4..." << endi;
01399   if ( myGridPe >= 0 ) {
01400   backward_plan_yz = rfftwnd_create_plan_specific(2, n+1, FFTW_COMPLEX_TO_REAL,
01401         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
01402         | FFTW_IN_PLACE | FFTW_USE_WISDOM, qgrid, 1, 0, 0);
01403   }
01404   if ( ! CkMyPe() ) iout << "   Done.\n" << endi;
01405 #endif
01406   CmiUnlock(fftw_plan_lock);
01407 #else
01408   NAMD_die("Sorry, FFTW must be compiled in to use PME.");
01409 #endif
01410 
01411   if ( myGridPe >= 0 && numSources == 0 )
01412                 NAMD_bug("PME grid elements exist without sources.");
01413   grid_count = numSources;
01414   memset( (void*) qgrid, 0, qgrid_size * numGrids * sizeof(float) );
01415   trans_count = numGridPes;
01416 }

void ComputePmeMgr::initialize_pencils CkQdMsg *   ) 
 

Definition at line 1420 of file ComputePme.C.

References Lattice::a(), Lattice::a_r(), Lattice::b(), Lattice::b_r(), BigReal, PmeGrid::block1, PmeGrid::block2, SimParameters::cutoff, j, PmeGrid::K1, PmeGrid::K2, SimParameters::lattice, PatchMap::max_a(), PatchMap::max_b(), PatchMap::min_a(), PatchMap::min_b(), PatchMap::node(), PatchMap::numPatches(), PatchMap::Object(), Node::Object(), PmeGrid::order, SimParameters::patchDimension, Random::reorder(), Node::simParameters, simParams, and Vector::unit().

01420                                                    {
01421   delete msg;
01422   if ( ! usePencils ) return;
01423 
01424   SimParameters *simParams = Node::Object()->simParameters;
01425 
01426   PatchMap *patchMap = PatchMap::Object();
01427   Lattice lattice = simParams->lattice;
01428   BigReal sysdima = lattice.a_r().unit() * lattice.a();
01429   BigReal sysdimb = lattice.b_r().unit() * lattice.b();
01430   BigReal cutoff = simParams->cutoff;
01431   BigReal patchdim = simParams->patchDimension;
01432   int numPatches = patchMap->numPatches();
01433 
01434   pencilActive = new char[xBlocks*yBlocks];
01435   for ( int i=0; i<xBlocks; ++i ) {
01436     for ( int j=0; j<yBlocks; ++j ) {
01437       pencilActive[i*yBlocks+j] = 0;
01438     }
01439   }
01440 
01441   for ( int pid=0; pid < numPatches; ++pid ) {
01442     int pnode = patchMap->node(pid);
01443     if ( pnode != CkMyPe() ) continue;
01444 
01445     BigReal minx = patchMap->min_a(pid);
01446     BigReal maxx = patchMap->max_a(pid);
01447     BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
01448     // min1 (max1) is smallest (largest) grid line for this patch
01449     int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
01450     int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
01451 
01452     BigReal miny = patchMap->min_b(pid);
01453     BigReal maxy = patchMap->max_b(pid);
01454     BigReal marginb = 0.5 * ( patchdim - cutoff ) / sysdimb;
01455     // min2 (max2) is smallest (largest) grid line for this patch
01456     int min2 = ((int) floor(myGrid.K2 * (miny - marginb))) - myGrid.order + 1;
01457     int max2 = ((int) floor(myGrid.K2 * (maxy + marginb)));
01458 
01459     for ( int i=min1; i<=max1; ++i ) {
01460       int ix = i;
01461       while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
01462       while ( ix < 0 ) ix += myGrid.K1;
01463       for ( int j=min2; j<=max2; ++j ) {
01464         int jy = j;
01465         while ( jy >= myGrid.K2 ) jy -= myGrid.K2;
01466         while ( jy < 0 ) jy += myGrid.K2;
01467         pencilActive[(ix / myGrid.block1)*yBlocks + (jy / myGrid.block2)] = 1;
01468       }
01469     }
01470   }
01471 
01472   numPencilsActive = 0;
01473   for ( int i=0; i<xBlocks; ++i ) {
01474     for ( int j=0; j<yBlocks; ++j ) {
01475       if ( pencilActive[i*yBlocks+j] ) {
01476         ++numPencilsActive;
01477         zPencil(i,j,0).dummyRecvGrid(CkMyPe(),0);
01478       }
01479     }
01480   }
01481   activePencils = new ijpair[numPencilsActive];
01482   numPencilsActive = 0;
01483   for ( int i=0; i<xBlocks; ++i ) {
01484     for ( int j=0; j<yBlocks; ++j ) {
01485       if ( pencilActive[i*yBlocks+j] ) {
01486         activePencils[numPencilsActive++] = ijpair(i,j);
01487       }
01488     }
01489   }
01490   Random rand(CkMyPe());
01491   rand.reorder(activePencils,numPencilsActive);
01492   //if ( numPencilsActive ) {
01493   //  CkPrintf("node %d sending to %d pencils\n", CkMyPe(), numPencilsActive);
01494   //}
01495 
01496   ungrid_count = numPencilsActive;
01497 }

int ComputePmeMgr::isPmeProcessor int  p  ) 
 

Definition at line 443 of file ComputePme.C.

References pencilPMEProcessors.

00443                                       { 
00444   return ( usePencils ? pencilPMEProcessors[p] : isPmeFlag[p] );
00445 }

void ComputePmeMgr::procTrans PmeTransMsg  ) 
 

Definition at line 1738 of file ComputePme.C.

References PmeGrid::dim3, PmeTransMsg::lattice, NodePmeInfo::npe, PmeTransMsg::nx, LocalPmeInfo::ny_after_transpose, NodePmeInfo::pe_start, PmeTransMsg::qgrid, PmeTransMsg::sequence, PmeTransMsg::x_start, and LocalPmeInfo::y_start_after_transpose.

Referenced by recvSharedTrans(), and recvTrans().

01738                                               {
01739   // CkPrintf("procTrans on Pe(%d)\n",CkMyPe());
01740   if ( trans_count == numGridPes ) {
01741     lattice = msg->lattice;
01742     sequence = msg->sequence;
01743   }
01744 
01745  if ( msg->nx ) {
01746   int zdim = myGrid.dim3;
01747   NodePmeInfo &nodeInfo(transNodeInfo[myTransNode]);
01748   int first_pe = nodeInfo.pe_start;
01749   int last_pe = first_pe+nodeInfo.npe-1;
01750   int y_skip = localInfo[myTransPe].y_start_after_transpose
01751              - localInfo[first_pe].y_start_after_transpose;
01752   int ny_msg = localInfo[last_pe].y_start_after_transpose
01753              + localInfo[last_pe].ny_after_transpose
01754              - localInfo[first_pe].y_start_after_transpose;
01755   int ny = localInfo[myTransPe].ny_after_transpose;
01756   int x_start = msg->x_start;
01757   int nx = msg->nx;
01758   for ( int g=0; g<numGrids; ++g ) {
01759     CmiMemcpy((void*)(kgrid + qgrid_size * g + x_start*ny*zdim),
01760         (void*)(msg->qgrid + nx*(ny_msg*g+y_skip)*zdim),
01761         nx*ny*zdim*sizeof(float));
01762   }
01763  }
01764 
01765   --trans_count;
01766 
01767   if ( trans_count == 0 ) {
01768     pmeProxyDir[CkMyPe()].gridCalc2();
01769   }
01770 }

void ComputePmeMgr::procUntrans PmeUntransMsg  ) 
 

Definition at line 1960 of file ComputePme.C.

References PmeGrid::dim3, PmeUntransMsg::evir, PmeGrid::K2, NodePmeInfo::npe, LocalPmeInfo::nx, PmeUntransMsg::ny, NodePmeInfo::pe_start, PmeUntransMsg::qgrid, LocalPmeInfo::x_start, and PmeUntransMsg::y_start.

Referenced by recvSharedUntrans(), and recvUntrans().

01960                                                   {
01961   // CkPrintf("recvUntrans on Pe(%d)\n",CkMyPe());
01962   if ( untrans_count == numTransPes ) {
01963     for ( int g=0; g<numGrids; ++g ) {
01964       recip_evir[g] = 0.;
01965     }
01966   }
01967 
01968 #if CMK_BLUEGENEL
01969   CmiNetworkProgressAfter (0);
01970 #endif
01971 
01972   NodePmeInfo &nodeInfo(gridNodeInfo[myGridNode]);
01973   int first_pe = nodeInfo.pe_start;
01974   int g;
01975   if ( myGridPe == first_pe ) for ( g=0; g<numGrids; ++g ) {
01976     recip_evir[g] += msg->evir[g];
01977   }
01978 
01979  if ( msg->ny ) {
01980   int zdim = myGrid.dim3;
01981   int last_pe = first_pe+nodeInfo.npe-1;
01982   int x_skip = localInfo[myGridPe].x_start
01983              - localInfo[first_pe].x_start;
01984   int nx_msg = localInfo[last_pe].x_start
01985              + localInfo[last_pe].nx
01986              - localInfo[first_pe].x_start;
01987   int nx = localInfo[myGridPe].nx;
01988   int y_start = msg->y_start;
01989   int ny = msg->ny;
01990   int slicelen = myGrid.K2 * zdim;
01991   int cpylen = ny * zdim;
01992   for ( g=0; g<numGrids; ++g ) {
01993     float *q = qgrid + qgrid_size * g + y_start * zdim;
01994     float *qmsg = msg->qgrid + (nx_msg*g+x_skip) * cpylen;
01995     for ( int x = 0; x < nx; ++x ) {
01996       CmiMemcpy((void*)q, (void*)qmsg, cpylen*sizeof(float));
01997       q += slicelen;
01998       qmsg += cpylen;
01999     }
02000   }
02001  }
02002 
02003   --untrans_count;
02004 
02005   if ( untrans_count == 0 ) {
02006     pmeProxyDir[CkMyPe()].gridCalc3();
02007   }
02008 }

void ComputePmeMgr::recvAck PmeAckMsg  ) 
 

Definition at line 2087 of file ComputePme.C.

Referenced by recvUngrid().

02087                                           {
02088   if ( msg ) delete msg;
02089   --ungrid_count;
02090 
02091   if ( ungrid_count == 0 ) {
02092     pmeProxyDir[CkMyPe()].ungridCalc();
02093   }
02094 }

void ComputePmeMgr::recvArrays CProxy_PmeXPencil  ,
CProxy_PmeYPencil  ,
CProxy_PmeZPencil 
 

Definition at line 555 of file ComputePme.C.

00556                                                                        {
00557   xPencil = x;  yPencil = y;  zPencil = z;
00558   
00559     if(CmiMyRank()==0)
00560     {
00561       pmeNodeProxy.ckLocalBranch()->xPencil=x;
00562       pmeNodeProxy.ckLocalBranch()->yPencil=y;
00563       pmeNodeProxy.ckLocalBranch()->zPencil=z;
00564     }
00565 }

void ComputePmeMgr::recvGrid PmeGridMsg  ) 
 

Definition at line 1535 of file ComputePme.C.

References PmeGrid::dim3, PmeGridMsg::fgrid, PmeGridMsg::lattice, NAMD_bug(), PmeGridMsg::qgrid, PmeGridMsg::sequence, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by ComputePme::sendPencils().

01535                                             {
01536   // CkPrintf("recvGrid from %d on Pe(%d)\n",msg->sourceNode,CkMyPe());
01537   if ( grid_count == 0 ) {
01538     NAMD_bug("Message order failure in ComputePmeMgr::recvGrid\n");
01539   }
01540   if ( grid_count == numSources ) {
01541     lattice = msg->lattice;
01542     sequence = msg->sequence;
01543   }
01544 
01545   int zdim = myGrid.dim3;
01546   int zlistlen = msg->zlistlen;
01547   int *zlist = msg->zlist;
01548   float *qmsg = msg->qgrid;
01549   for ( int g=0; g<numGrids; ++g ) {
01550     char *f = msg->fgrid + fgrid_len * g;
01551     float *q = qgrid + qgrid_size * g;
01552     for ( int i=0; i<fgrid_len; ++i ) {
01553       if ( f[i] ) {
01554         for ( int k=0; k<zlistlen; ++k ) {
01555           q[zlist[k]] += *(qmsg++);
01556         }
01557       }
01558       q += zdim;
01559     }
01560   }
01561 
01562   gridmsg_reuse[numSources-grid_count] = msg;
01563   --grid_count;
01564 
01565   if ( grid_count == 0 ) {
01566     pmeProxyDir[CkMyPe()].gridCalc1();
01567     if ( useBarrier ) pmeProxyDir[0].sendTransBarrier();
01568   }
01569 }

void ComputePmeMgr::recvSharedTrans PmeSharedTransMsg  ) 
 

Definition at line 1720 of file ComputePme.C.

References PmeSharedTransMsg::count, PmeSharedTransMsg::lock, PmeSharedTransMsg::msg, and procTrans().

01720                                                           {
01721   procTrans(msg->msg);
01722   CmiLock(msg->lock);
01723   int count = --(*msg->count);
01724   CmiUnlock(msg->lock);
01725   if ( count == 0 ) {
01726     CmiDestroyLock(msg->lock);
01727     delete msg->count;
01728     delete msg->msg;
01729   }
01730   delete msg;
01731 }

void ComputePmeMgr::recvSharedUntrans PmeSharedUntransMsg  ) 
 

Definition at line 1942 of file ComputePme.C.

References PmeSharedUntransMsg::count, PmeSharedUntransMsg::lock, PmeSharedUntransMsg::msg, and procUntrans().

01942                                                               {
01943   procUntrans(msg->msg);
01944   CmiLock(msg->lock);
01945   int count = --(*msg->count);
01946   CmiUnlock(msg->lock);
01947   if ( count == 0 ) {
01948     CmiDestroyLock(msg->lock);
01949     delete msg->count;
01950     delete msg->msg;
01951   }
01952   delete msg;
01953 }

void ComputePmeMgr::recvTrans PmeTransMsg  ) 
 

Definition at line 1733 of file ComputePme.C.

References procTrans().

01733                                               {
01734   procTrans(msg);
01735   delete msg;
01736 }

void ComputePmeMgr::recvUngrid PmeGridMsg  ) 
 

Definition at line 2075 of file ComputePme.C.

References ComputePme::copyPencils(), ComputePme::copyResults(), NAMD_bug(), and recvAck().

02075                                               {
02076   // CkPrintf("recvUngrid on Pe(%d)\n",CkMyPe());
02077   if ( ungrid_count == 0 ) {
02078     NAMD_bug("Message order failure in ComputePmeMgr::recvUngrid\n");
02079   }
02080 
02081   if ( usePencils ) pmeCompute->copyPencils(msg);
02082   else pmeCompute->copyResults(msg);
02083   delete msg;
02084   recvAck(0);
02085 }

void ComputePmeMgr::recvUntrans PmeUntransMsg  ) 
 

Definition at line 1955 of file ComputePme.C.

References procUntrans().

01955                                                   {
01956   procUntrans(msg);
01957   delete msg;
01958 }

void ComputePmeMgr::sendGrid void   ) 
 

Definition at line 1531 of file ComputePme.C.

References ComputePme::sendData().

01531                                  {
01532   pmeCompute->sendData(numGridPes,gridPeOrder,recipPeDest,gridPeMap);
01533 }

void ComputePmeMgr::sendTrans void   ) 
 

Definition at line 1642 of file ComputePme.C.

References PmeGrid::dim3, fwdSharedTrans(), j, PmeGrid::K2, kgrid, PmeTransMsg::lattice, NodePmeInfo::npe, PmeTransMsg::nx, LocalPmeInfo::nx, LocalPmeInfo::ny_after_transpose, NodePmeInfo::pe_start, PME_TRANS_PRIORITY, PmeTransMsg::qgrid, qgrid_size, NodePmeInfo::real_node, PmeTransMsg::sequence, SET_PRIORITY, PmeTransMsg::sourceNode, PmeTransMsg::x_start, LocalPmeInfo::x_start, and LocalPmeInfo::y_start_after_transpose.

01642                                   {
01643   // CkPrintf("sendTrans on Pe(%d)\n",CkMyPe());
01644 
01645   // send data for transpose
01646   int zdim = myGrid.dim3;
01647   int nx = localInfo[myGridPe].nx;
01648   int x_start = localInfo[myGridPe].x_start;
01649   int slicelen = myGrid.K2 * zdim;
01650 
01651   ComputePmeMgr **mgrObjects = pmeNodeProxy.ckLocalBranch()->mgrObjects;
01652 
01653 #if CMK_BLUEGENEL
01654   CmiNetworkProgressAfter (0);
01655 #endif
01656 
01657   for (int j=0; j<numTransNodes; j++) {
01658     int node = transNodeOrder[j];  // different order on each node
01659     int pe = transNodeInfo[node].pe_start;
01660     int npe = transNodeInfo[node].npe;
01661     int totlen = 0;
01662     if ( node != myTransNode ) for (int i=0; i<npe; ++i, ++pe) {
01663       LocalPmeInfo &li = localInfo[pe];
01664       int cpylen = li.ny_after_transpose * zdim;
01665       totlen += cpylen;
01666     }
01667     PmeTransMsg *newmsg = new (nx * totlen * numGrids,
01668                                 PRIORITY_SIZE) PmeTransMsg;
01669     newmsg->sourceNode = myGridPe;
01670     newmsg->lattice = lattice;
01671     newmsg->x_start = x_start;
01672     newmsg->nx = nx;
01673     for ( int g=0; g<numGrids; ++g ) {
01674       float *qmsg = newmsg->qgrid + nx * totlen * g;
01675       pe = transNodeInfo[node].pe_start;
01676       for (int i=0; i<npe; ++i, ++pe) {
01677         LocalPmeInfo &li = localInfo[pe];
01678         int cpylen = li.ny_after_transpose * zdim;
01679         if ( node == myTransNode ) {
01680           ComputePmeMgr *m = mgrObjects[CkRankOf(transPeMap[pe])];
01681           qmsg = m->kgrid + m->qgrid_size * g + x_start*cpylen;
01682         }
01683         float *q = qgrid + qgrid_size * g + li.y_start_after_transpose * zdim;
01684         for ( int x = 0; x < nx; ++x ) {
01685           CmiMemcpy((void*)qmsg, (void*)q, cpylen*sizeof(float));
01686           q += slicelen;
01687           qmsg += cpylen;
01688         }
01689       }
01690     }
01691     newmsg->sequence = sequence;
01692     SET_PRIORITY(newmsg,sequence,PME_TRANS_PRIORITY)
01693     if ( node == myTransNode ) newmsg->nx = 0;
01694     if ( npe > 1 ) {
01695       if ( node == myTransNode ) fwdSharedTrans(newmsg);
01696       else pmeNodeProxy[transNodeInfo[node].real_node].recvTrans(newmsg);
01697     } else pmeProxy[transPeMap[transNodeInfo[node].pe_start]].recvTrans(newmsg);
01698   }
01699  
01700   untrans_count = numTransPes;
01701 
01702 }

void ComputePmeMgr::sendTransBarrier void   ) 
 

Definition at line 1632 of file ComputePme.C.

01632                                          {
01633   sendTransBarrier_received += 1;
01634   // CkPrintf("sendTransBarrier on %d %d\n",myGridPe,numGridPes-sendTransBarrier_received);
01635   if ( sendTransBarrier_received < numGridPes ) return;
01636   sendTransBarrier_received = 0;
01637   for ( int i=0; i<numGridPes; ++i ) {
01638     pmeProxyDir[gridPeMap[i]].sendTrans();
01639   }
01640 }

void ComputePmeMgr::sendUngrid void   ) 
 

Definition at line 2031 of file ComputePme.C.

References PmeGrid::dim3, PmeGridMsg::evir, PmeGridMsg::fgrid, j, PmeGridMsg::len, PME_UNGRID_PRIORITY, PmeGridMsg::qgrid, SET_PRIORITY, PmeGridMsg::sourceNode, PmeGridMsg::start, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

02031                                    {
02032 
02033   for ( int j=0; j<numSources; ++j ) {
02034     // int msglen = qgrid_len;
02035     PmeGridMsg *newmsg = gridmsg_reuse[j];
02036     int pe = newmsg->sourceNode;
02037     if ( j == 0 ) {  // only need these once
02038       for ( int g=0; g<numGrids; ++g ) {
02039         newmsg->evir[g] = recip_evir[g];
02040       }
02041     } else {
02042       for ( int g=0; g<numGrids; ++g ) {
02043         newmsg->evir[g] = 0.;
02044       }
02045     }
02046     int zdim = myGrid.dim3;
02047     int flen = newmsg->len;
02048     int fstart = newmsg->start;
02049     int zlistlen = newmsg->zlistlen;
02050     int *zlist = newmsg->zlist;
02051     float *qmsg = newmsg->qgrid;
02052     for ( int g=0; g<numGrids; ++g ) {
02053       char *f = newmsg->fgrid + fgrid_len * g;
02054       float *q = qgrid + qgrid_size * g + (fstart-fgrid_start) * zdim;
02055       for ( int i=0; i<flen; ++i ) {
02056         if ( f[i] ) {
02057           for ( int k=0; k<zlistlen; ++k ) {
02058             *(qmsg++) = q[zlist[k]];
02059           }
02060         }
02061         q += zdim;
02062       }
02063     }
02064     newmsg->sourceNode = myGridPe;
02065 
02066     SET_PRIORITY(newmsg,sequence,PME_UNGRID_PRIORITY)
02067     CmiEnableUrgentSend(1);
02068     pmeProxyDir[pe].recvUngrid(newmsg);
02069     CmiEnableUrgentSend(0);
02070   }
02071   grid_count = numSources;
02072   memset( (void*) qgrid, 0, qgrid_size * numGrids * sizeof(float) );
02073 }

void ComputePmeMgr::sendUntrans void   ) 
 

Definition at line 1860 of file ComputePme.C.

References PmeGrid::dim3, PmeUntransMsg::evir, fwdSharedUntrans(), j, PmeGrid::K2, NodePmeInfo::npe, LocalPmeInfo::nx, PmeUntransMsg::ny, LocalPmeInfo::ny_after_transpose, NodePmeInfo::pe_start, PME_UNTRANS_PRIORITY, qgrid, PmeUntransMsg::qgrid, qgrid_size, NodePmeInfo::real_node, SET_PRIORITY, PmeUntransMsg::sourceNode, LocalPmeInfo::x_start, PmeUntransMsg::y_start, and LocalPmeInfo::y_start_after_transpose.

01860                                     {
01861 
01862   int zdim = myGrid.dim3;
01863   int y_start = localInfo[myTransPe].y_start_after_transpose;
01864   int ny = localInfo[myTransPe].ny_after_transpose;
01865   int slicelen = myGrid.K2 * zdim;
01866 
01867   ComputePmeMgr **mgrObjects = pmeNodeProxy.ckLocalBranch()->mgrObjects;
01868 
01869 #if CMK_BLUEGENEL
01870   CmiNetworkProgressAfter (0);
01871 #endif
01872 
01873   // send data for reverse transpose
01874   for (int j=0; j<numGridNodes; j++) {
01875     int node = gridNodeOrder[j];  // different order on each node
01876     int pe = gridNodeInfo[node].pe_start;
01877     int npe = gridNodeInfo[node].npe;
01878     int totlen = 0;
01879     if ( node != myGridNode ) for (int i=0; i<npe; ++i, ++pe) {
01880       LocalPmeInfo &li = localInfo[pe];
01881       int cpylen = li.nx * zdim;
01882       totlen += cpylen;
01883     }
01884     PmeUntransMsg *newmsg = new (ny * totlen * numGrids,numGrids,
01885                                 PRIORITY_SIZE) PmeUntransMsg;
01886     newmsg->sourceNode = myTransPe;
01887     newmsg->y_start = y_start;
01888     newmsg->ny = ny;
01889     for ( int g=0; g<numGrids; ++g ) {
01890       if ( j == 0 ) {  // only need these once
01891         newmsg->evir[g] = recip_evir2[g];
01892       } else {
01893         newmsg->evir[g] = 0.;
01894       }
01895       float *qmsg = newmsg->qgrid + ny * totlen * g;
01896       pe = gridNodeInfo[node].pe_start;
01897       for (int i=0; i<npe; ++i, ++pe) {
01898         LocalPmeInfo &li = localInfo[pe];
01899         if ( node == myGridNode ) {
01900           ComputePmeMgr *m = mgrObjects[CkRankOf(gridPeMap[pe])];
01901           qmsg = m->qgrid + m->qgrid_size * g + y_start * zdim;
01902           float *q = kgrid + qgrid_size*g + li.x_start*ny*zdim;
01903           int cpylen = ny * zdim;
01904           for ( int x = 0; x < li.nx; ++x ) {
01905             CmiMemcpy((void*)qmsg, (void*)q, cpylen*sizeof(float));
01906             q += cpylen;
01907             qmsg += slicelen;
01908           }
01909         } else {
01910           CmiMemcpy((void*)qmsg,
01911                 (void*)(kgrid + qgrid_size*g + li.x_start*ny*zdim),
01912                 li.nx*ny*zdim*sizeof(float));
01913           qmsg += li.nx*ny*zdim;
01914         }
01915       }
01916     }
01917     SET_PRIORITY(newmsg,sequence,PME_UNTRANS_PRIORITY)
01918     if ( node == myGridNode ) newmsg->ny = 0;
01919     if ( npe > 1 ) {
01920       if ( node == myGridNode ) fwdSharedUntrans(newmsg);
01921       else pmeNodeProxy[gridNodeInfo[node].real_node].recvUntrans(newmsg);
01922     } else pmeProxy[gridPeMap[gridNodeInfo[node].pe_start]].recvUntrans(newmsg);
01923   }
01924 
01925   trans_count = numGridPes;
01926 }

void ComputePmeMgr::setCompute ComputePme c  )  [inline]
 

Definition at line 347 of file ComputePme.C.

References ComputePme::setMgr().

00347 { pmeCompute = c; c->setMgr(this); }

void ComputePmeMgr::ungridCalc void   ) 
 

Definition at line 2096 of file ComputePme.C.

References ComputePme::ungridForces().

02096                                    {
02097   // CkPrintf("ungridCalc on Pe(%d)\n",CkMyPe());
02098 
02099   pmeCompute->ungridForces();
02100 
02101   ungrid_count = (usePencils ? numPencilsActive : numDestRecipPes );
02102 }


Friends And Related Function Documentation

friend class ComputePme [friend]
 

Definition at line 312 of file ComputePme.C.

friend class NodePmeMgr [friend]
 

Definition at line 313 of file ComputePme.C.


Member Data Documentation

CmiNodeLock ComputePmeMgr::fftw_plan_lock [static]
 

Definition at line 436 of file ComputePme.C.

Referenced by ComputePmeMgr(), initialize(), and ~ComputePmeMgr().


The documentation for this class was generated from the following file:
Generated on Fri May 25 04:07:21 2012 for NAMD by  doxygen 1.3.9.1