PmeXPencil Class Reference

Inheritance diagram for PmeXPencil:

PmePencil< CBase_PmeXPencil > CBase_PmeXPencil List of all members.

Public Member Functions

PmeXPencil_SDAG_CODE PmeXPencil ()
 PmeXPencil (CkMigrateMessage *)
 ~PmeXPencil ()
void fft_init ()
void recv_trans (const PmeTransMsg *)
void forward_fft ()
void pme_kspace ()
void backward_fft ()
void send_untrans ()
void send_subset_untrans (int fromIdx, int toIdx, int evirIdx)
void node_process_trans (PmeTransMsg *)
void evir_init ()

Public Attributes

fftw_plan forward_plan
fftw_plan backward_plan
int ny
int nz
int recipEvirPe
PmeKSpacemyKSpace

Detailed Description

Definition at line 4735 of file ComputePme.C.


Constructor & Destructor Documentation

PmeXPencil_SDAG_CODE PmeXPencil::PmeXPencil (  )  [inline]

Definition at line 4738 of file ComputePme.C.

References PmePencil< CBase_PmeXPencil >::imsg, PmePencil< CBase_PmeXPencil >::imsgb, myKSpace, and recipEvirPe.

04738 { __sdag_init();  myKSpace = 0; setMigratable(false); imsg=imsgb=0; recipEvirPe = -999; }

PmeXPencil::PmeXPencil ( CkMigrateMessage *   )  [inline]

Definition at line 4739 of file ComputePme.C.

04739 { __sdag_init(); }

PmeXPencil::~PmeXPencil (  )  [inline]

Definition at line 4740 of file ComputePme.C.

04740                       {
04741         #ifdef NAMD_FFTW
04742         #ifdef NAMD_FFTW_3
04743                 delete [] forward_plans;
04744                 delete [] backward_plans;
04745         #endif
04746         #endif
04747         }


Member Function Documentation

void PmeXPencil::backward_fft (  ) 

Definition at line 5741 of file ComputePme.C.

References backward_plan, CKLOOP_CTRL_PME_BACKWARDFFT, PmePencil< CBase_PmeXPencil >::data, PmePencilInitMsgData::grid, PmePencil< CBase_PmeXPencil >::initdata, PmeGrid::K1, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeXPencil >::work, PmePencilInitMsgData::yBlocks, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_trans().

05741                               {
05742 #ifdef NAMD_FFTW
05743 #ifdef MANUAL_DEBUG_FFTW3
05744   dumpMatrixFloat3("bw_x_b", data, initdata.grid.K1, ny, nz, thisIndex.x, thisIndex.y, thisIndex.z);
05745 #endif
05746 
05747 #ifdef NAMD_FFTW_3
05748 #if     CMK_SMP && USE_CKLOOP
05749   int useCkLoop = Node::Object()->simParameters->useCkLoop;
05750   if(useCkLoop>=CKLOOP_CTRL_PME_BACKWARDFFT
05751      && CkNumPes() >= 2 * initdata.yBlocks * initdata.zBlocks) {
05752           //for(int i=0; i<numPlans; i++) fftwf_execute(backward_plans[i]);
05753           //transform the above loop
05754           CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)backward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
05755           return;
05756   }
05757 #endif
05758   fftwf_execute(backward_plan);
05759 #else
05760   fftw(backward_plan, ny*nz,
05761         ((fftw_complex *) data), ny*nz, 1, (fftw_complex *) work, 1, 0);
05762 #endif
05763 #ifdef MANUAL_DEBUG_FFTW3
05764   dumpMatrixFloat3("bw_x_a", data, initdata.grid.K1, ny, nz, thisIndex.x, thisIndex.y, thisIndex.z);
05765 #endif
05766 #endif
05767 }

void PmeXPencil::evir_init (  ) 

Definition at line 4793 of file ComputePme.C.

References findRecipEvirPe(), PmePencil< CBase_PmeXPencil >::initdata, PmePencilInitMsgData::pmeProxy, and recipEvirPe.

04793                            {
04794   recipEvirPe = findRecipEvirPe();
04795   initdata.pmeProxy[recipEvirPe].addRecipEvirClient();
04796 }

void PmeXPencil::fft_init (  ) 

Definition at line 5064 of file ComputePme.C.

References backward_plan, PmeGrid::block2, PmeGrid::block3, PmePencil< CBase_PmeXPencil >::data, PmeGrid::dim3, fftwf_malloc, forward_plan, PmePencilInitMsgData::grid, PmePencil< CBase_PmeXPencil >::initdata, PmeGrid::K1, PmeGrid::K2, myKSpace, NAMD_die(), PmePencil< CBase_PmeXPencil >::order_init(), PmePencilInitMsgData::pmeNodeProxy, Node::simParameters, simParams, PmePencil< CBase_PmeXPencil >::work, and PmePencilInitMsgData::xBlocks.

05064                           {
05065   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
05066   Node *node = nd.ckLocalBranch();
05067   SimParameters *simParams = node->simParameters;
05068 #if USE_NODE_PAR_RECEIVE
05069   ((NodePmeMgr *)CkLocalNodeBranch(initdata.pmeNodeProxy))->registerXPencil(thisIndex,this);
05070 #endif
05071 
05072   int K1 = initdata.grid.K1;
05073   int K2 = initdata.grid.K2;
05074   int dim3 = initdata.grid.dim3;
05075   int block2 = initdata.grid.block2;
05076   int block3 = initdata.grid.block3;
05077 
05078   ny = block2;
05079   if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
05080   nz = block3;
05081   if ( (thisIndex.z+1)*block3 > dim3/2 ) nz = dim3/2 - thisIndex.z*block3;
05082 
05083 #ifdef NAMD_FFTW
05084   CmiLock(ComputePmeMgr::fftw_plan_lock);
05085 
05086   data = (float *) fftwf_malloc( sizeof(float) * K1*ny*nz*2);
05087   work = new float[2*K1];
05088 
05089   order_init(initdata.xBlocks);
05090 
05091 #ifdef NAMD_FFTW_3
05092   /* need array of sizes for the how many */
05093   int fftwFlags = simParams->FFTWPatient ? FFTW_PATIENT  : simParams->FFTWEstimate ? FFTW_ESTIMATE  : FFTW_MEASURE ;
05094   int sizeLines=ny*nz;
05095   int planLineSizes[1];
05096   planLineSizes[0]=K1;
05097   forward_plan = fftwf_plan_many_dft(1, planLineSizes, sizeLines,
05098                                      (fftwf_complex *) data, NULL, sizeLines, 1,
05099                                      (fftwf_complex *) data, NULL, sizeLines, 1,
05100                                    FFTW_FORWARD,
05101                                      fftwFlags);
05102   backward_plan = fftwf_plan_many_dft(1, planLineSizes, sizeLines,
05103                                      (fftwf_complex *) data, NULL, sizeLines, 1,
05104                                      (fftwf_complex *) data, NULL, sizeLines, 1,
05105                                           FFTW_BACKWARD,
05106                                       fftwFlags);
05107 
05108 #if     CMK_SMP && USE_CKLOOP
05109   if(simParams->useCkLoop) {
05110           //How many FFT plans to be created? The grain-size issue!!.
05111           //Currently, I am choosing the min(nx, ny) to be coarse-grain
05112           numPlans = (ny<=nz?ny:nz);
05113           // limit attempted parallelism due to false sharing
05114           //if ( numPlans < CkMyNodeSize() ) numPlans = (ny>=nz?ny:nz);
05115           //if ( numPlans < CkMyNodeSize() ) numPlans = sizeLines;
05116           if ( sizeLines/numPlans < 4 ) numPlans = 1;
05117           int howmany = sizeLines/numPlans;
05118           forward_plans = new fftwf_plan[numPlans];
05119           backward_plans = new fftwf_plan[numPlans];
05120           for(int i=0; i<numPlans; i++) {
05121                   int curStride = i*howmany;              
05122                   forward_plans[i] = fftwf_plan_many_dft(1, planLineSizes, howmany,
05123                                                                                                          ((fftwf_complex *)data)+curStride, NULL, sizeLines, 1,
05124                                                                                                          ((fftwf_complex *)data)+curStride, NULL, sizeLines, 1,
05125                                                                                                         FFTW_FORWARD,
05126                                                                                                          fftwFlags);
05127 
05128                   backward_plans[i] = fftwf_plan_many_dft(1, planLineSizes, howmany,
05129                                                                                                          ((fftwf_complex *)data)+curStride, NULL, sizeLines, 1,
05130                                                                                                          ((fftwf_complex *)data)+curStride, NULL, sizeLines, 1,
05131                                                                                                           FFTW_BACKWARD,
05132                                                                                                          fftwFlags);
05133           }
05134   }else
05135 #endif
05136   {
05137           forward_plans = NULL;
05138           backward_plans = NULL;
05139   }
05140 #else
05141   forward_plan = fftw_create_plan_specific(K1, FFTW_FORWARD,
05142         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
05143         | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
05144         ny*nz, (fftw_complex *) work, 1);
05145   backward_plan = fftw_create_plan_specific(K1, FFTW_BACKWARD,
05146         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
05147         | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
05148         ny*nz, (fftw_complex *) work, 1);
05149 #endif
05150   CmiUnlock(ComputePmeMgr::fftw_plan_lock);
05151 #else
05152   NAMD_die("Sorry, FFTW must be compiled in to use PME.");
05153 #endif
05154 
05155   myKSpace = new PmeKSpace(initdata.grid,
05156                 thisIndex.y*block2, thisIndex.y*block2 + ny,
05157                 thisIndex.z*block3, thisIndex.z*block3 + nz);
05158 
05159 }

void PmeXPencil::forward_fft (  ) 

Definition at line 5682 of file ComputePme.C.

References CKLOOP_CTRL_PME_FORWARDFFT, PmePencil< CBase_PmeXPencil >::data, forward_plan, PmePencilInitMsgData::grid, PmePencil< CBase_PmeXPencil >::initdata, PmeGrid::K1, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeXPencil >::work, PmePencilInitMsgData::yBlocks, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_trans().

05682                              {
05683 #ifdef NAMD_FFTW
05684 
05685 #ifdef MANUAL_DEBUG_FFTW3
05686   dumpMatrixFloat3("fw_x_b", data, initdata.grid.K1, ny, nz, thisIndex.x, thisIndex.y, thisIndex.z);
05687 #endif
05688 
05689 #ifdef NAMD_FFTW_3
05690 #if     CMK_SMP && USE_CKLOOP
05691   int useCkLoop = Node::Object()->simParameters->useCkLoop;
05692   if(useCkLoop>=CKLOOP_CTRL_PME_FORWARDFFT
05693      && CkNumPes() >= 2 * initdata.yBlocks * initdata.zBlocks) {
05694           //for(int i=0; i<numPlans; i++) fftwf_execute(forward_plans[i]);
05695           //transform the above loop
05696           CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)forward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
05697           return;
05698   }
05699 #endif
05700   fftwf_execute(forward_plan);
05701 #else
05702   fftw(forward_plan, ny*nz,
05703         ((fftw_complex *) data), ny*nz, 1, (fftw_complex *) work, 1, 0);
05704 #endif
05705 #ifdef MANUAL_DEBUG_FFTW3
05706   dumpMatrixFloat3("fw_x_a", data, initdata.grid.K1, ny, nz, thisIndex.x, thisIndex.y, thisIndex.z);
05707 #endif
05708 
05709 #endif
05710 }

void PmeXPencil::node_process_trans ( PmeTransMsg  ) 

Definition at line 5625 of file ComputePme.C.

References backward_fft(), forward_fft(), PmePencil< CBase_PmeXPencil >::hasData, PmeTransMsg::hasData, PmePencil< CBase_PmeXPencil >::imsg, PmePencil< CBase_PmeXPencil >::initdata, PmePencil< CBase_PmeXPencil >::needs_reply, pme_kspace(), recv_trans(), send_untrans(), PmeTransMsg::sourceNode, and PmePencilInitMsgData::xBlocks.

Referenced by NodePmeMgr::recvXTrans().

05626 {
05627   if(msg->hasData) hasData=1;
05628   needs_reply[msg->sourceNode] = msg->hasData;
05629   recv_trans(msg);
05630   int limsg;
05631   CmiMemoryAtomicFetchAndInc(imsg,limsg);
05632   if(limsg+1 == initdata.xBlocks)
05633     {
05634       if(hasData){
05635         forward_fft();
05636         pme_kspace();
05637         backward_fft();
05638       }
05639       send_untrans();
05640       imsg=0;
05641       CmiMemoryWriteFence();
05642     }
05643 }

void PmeXPencil::pme_kspace (  ) 

Definition at line 5712 of file ComputePme.C.

References CKLOOP_CTRL_PME_KSPACE, PmeKSpace::compute_energy(), PmePencil< CBase_PmeXPencil >::data, PmePencil< CBase_PmeXPencil >::evir, ComputeNonbondedUtil::ewaldcof, PmePencil< CBase_PmeXPencil >::initdata, PmePencil< CBase_PmeXPencil >::lattice, myKSpace, Node::Object(), PmePencilInitMsgData::yBlocks, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_trans().

05712                             {
05713 
05714   evir = 0.;
05715 
05716 #ifdef FFTCHECK
05717   return;
05718 #endif
05719 
05720   BigReal ewaldcof = ComputeNonbondedUtil::ewaldcof;
05721 
05722   int useCkLoop = 0;
05723 #if CMK_SMP && USE_CKLOOP
05724   if ( Node::Object()->simParameters->useCkLoop >= CKLOOP_CTRL_PME_KSPACE
05725        && CkNumPes() >= 2 * initdata.yBlocks * initdata.zBlocks ) {
05726     useCkLoop = 1;
05727   }
05728 #endif
05729 
05730   int numGrids = 1;
05731   for ( int g=0; g<numGrids; ++g ) {
05732     evir[0] = myKSpace->compute_energy(data+0*g,
05733                 lattice, ewaldcof, &(evir[1]), useCkLoop);
05734   }
05735   
05736 #if USE_NODE_PAR_RECEIVE
05737     CmiMemoryWriteFence();
05738 #endif
05739 }

void PmeXPencil::recv_trans ( const PmeTransMsg  ) 

Definition at line 5645 of file ComputePme.C.

References PmeGrid::block1, PmePencil< CBase_PmeXPencil >::data, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeXPencil >::imsg, PmePencil< CBase_PmeXPencil >::initdata, j, PmeGrid::K1, PmeTransMsg::lattice, PmePencil< CBase_PmeXPencil >::lattice, PmeTransMsg::nx, PmeTransMsg::qgrid, PmeTransMsg::sequence, PmePencil< CBase_PmeXPencil >::sequence, and PmeTransMsg::sourceNode.

Referenced by node_process_trans().

05645                                                   {
05646   if ( imsg == 0 ) {
05647     lattice = msg->lattice;
05648     sequence = msg->sequence;
05649   }
05650   int block1 = initdata.grid.block1;
05651   int K1 = initdata.grid.K1;
05652   int ib = msg->sourceNode;
05653   int nx = msg->nx;
05654  if ( msg->hasData ) {
05655   const float *md = msg->qgrid;
05656   for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
05657    float *d = data + i*ny*nz*2;
05658    for ( int j=0; j<ny; ++j, d += nz*2 ) {
05659     for ( int k=0; k<nz; ++k ) {
05660 #ifdef ZEROCHECK
05661       if ( (*md) == 0. ) CkPrintf("0 in YX at %d %d %d %d %d %d %d %d %d\n",
05662         ib, thisIndex.y, thisIndex.z, i, j, k, nx, ny, nz);
05663 #endif
05664       d[2*k] = *(md++);
05665       d[2*k+1] = *(md++);
05666     }
05667    }
05668   }
05669  } else {
05670   for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
05671    float *d = data + i*ny*nz*2;
05672    for ( int j=0; j<ny; ++j, d += nz*2 ) {
05673     for ( int k=0; k<nz; ++k ) {
05674       d[2*k] = 0;
05675       d[2*k+1] = 0;
05676     }
05677    }
05678   }
05679  }
05680 }

void PmeXPencil::send_subset_untrans ( int  fromIdx,
int  toIdx,
int  evirIdx 
)

Definition at line 5775 of file ComputePme.C.

References PmeGrid::block1, PmePencil< CBase_PmeXPencil >::data, PmeUntransMsg::destElem, PmePencilInitMsgData::grid, PmePencil< CBase_PmeXPencil >::initdata, j, PmeGrid::K1, PmeUntransMsg::ny, PME_UNTRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PRIORITY_SIZE, PmeUntransMsg::qgrid, PmePencil< CBase_PmeXPencil >::send_order, PmePencil< CBase_PmeXPencil >::sequence, SET_PRIORITY, PmeUntransMsg::sourceNode, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::ym, and PmePencilInitMsgData::yPencil.

Referenced by PmeXPencilSendUntrans().

05775                                                                        {
05776         int xBlocks = initdata.xBlocks;
05777         int block1 = initdata.grid.block1;      
05778         int K1 = initdata.grid.K1;
05779 
05780         int ackL=0, ackH=-1;
05781         int unL=0, unH=-1;
05782         int send_evir=0;
05783         if(fromIdx >= evirIdx+1) {
05784                 //send PmeUntransMsg with has_evir=0
05785                 unL = fromIdx;
05786                 unH = toIdx;            
05787         } else if(toIdx <= evirIdx-1) {
05788                 //send PmeAckMsg
05789                 ackL=fromIdx;
05790                 ackH=toIdx;             
05791         } else {
05792                 //partially send PmeAckMsg and partially send PmeUntransMsg
05793                 ackL=fromIdx;
05794                 ackH=evirIdx-1;
05795                 send_evir=1;
05796                 unL=evirIdx+1;
05797                 unH=toIdx;
05798         }
05799 
05800         for(int isend=ackL; isend<=ackH; isend++) {
05801                 //send PmeAckMsg
05802         CmiEnableUrgentSend(1);
05803                 int ib = send_order[isend];
05804                 PmeAckMsg *msg = new (PRIORITY_SIZE) PmeAckMsg;
05805                 SET_PRIORITY(msg,sequence,PME_UNTRANS_PRIORITY)
05806                 initdata.yPencil(ib,0,thisIndex.z).recvAck(msg);
05807         CmiEnableUrgentSend(0);
05808     }
05809 
05810     CmiEnableUrgentSend(1);
05811         //send PmeUntransMsg with has_evir=1
05812         if(send_evir) {
05813                 int ib = send_order[evirIdx];
05814                 int nx = block1;
05815                 if ( (ib+1)*block1 > K1 ) nx = K1 - ib*block1;
05816                 PmeUntransMsg *msg = new (nx*ny*nz*2,PRIORITY_SIZE) PmeUntransMsg;              
05817                 msg->sourceNode = thisIndex.y;
05818                 msg->ny = ny;
05819                 float *md = msg->qgrid;
05820                 for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
05821                         float *d = data + i*ny*nz*2;
05822                         for ( int j=0; j<ny; ++j, d += nz*2 ) {
05823                                 for ( int k=0; k<nz; ++k ) {
05824                                         *(md++) = d[2*k];
05825                                         *(md++) = d[2*k+1];
05826                                 }
05827                         }
05828                 }
05829                 SET_PRIORITY(msg,sequence,PME_UNTRANS_PRIORITY)
05830 #if USE_NODE_PAR_RECEIVE
05831         msg->destElem=CkArrayIndex3D(ib,0, thisIndex.z);
05832         initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYUntrans(msg);
05833 #else
05834         initdata.yPencil(ib,0,thisIndex.z).recvUntrans(msg);
05835 #endif
05836          }
05837     CmiEnableUrgentSend(0);
05838         
05839         //send PmeUntransMsg with has_evir=0
05840         for(int isend=unL; isend<=unH; isend++) {
05841                 int ib = send_order[isend];
05842                 int nx = block1;
05843                 if ( (ib+1)*block1 > K1 ) nx = K1 - ib*block1;
05844                 PmeUntransMsg *msg = new (nx*ny*nz*2,PRIORITY_SIZE) PmeUntransMsg;
05845                 msg->sourceNode = thisIndex.y;
05846                 msg->ny = ny;
05847                 float *md = msg->qgrid;
05848                 for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
05849                         float *d = data + i*ny*nz*2;
05850                         for ( int j=0; j<ny; ++j, d += nz*2 ) {
05851                                 for ( int k=0; k<nz; ++k ) {
05852                                         *(md++) = d[2*k];
05853                                         *(md++) = d[2*k+1];
05854                                 }
05855                         }
05856                 }
05857                 SET_PRIORITY(msg,sequence,PME_UNTRANS_PRIORITY)
05858         CmiEnableUrgentSend(1);
05859 #if USE_NODE_PAR_RECEIVE
05860         msg->destElem=CkArrayIndex3D(ib,0, thisIndex.z);
05861 #if Y_PERSIST 
05862         CmiUsePersistentHandle(&untrans_handle[isend], 1);
05863 #endif
05864         initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYUntrans(msg);
05865 #if Y_PERSIST 
05866         CmiUsePersistentHandle(NULL, 0);
05867 #endif
05868 #else
05869 #if Y_PERSIST 
05870   //      CmiUsePersistentHandle(&untrans_handle[isend], 1);
05871 #endif
05872         initdata.yPencil(ib,0,thisIndex.z).recvUntrans(msg);
05873 #if Y_PERSIST 
05874    //     CmiUsePersistentHandle(NULL, 0);
05875 #endif
05876 #endif
05877         CmiEnableUrgentSend(0);
05878         }
05879 }

void PmeXPencil::send_untrans (  ) 

Definition at line 5881 of file ComputePme.C.

References PmeGrid::block1, CKLOOP_CTRL_PME_SENDUNTRANS, PmePencil< CBase_PmeXPencil >::data, PmeUntransMsg::destElem, PmePencil< CBase_PmeXPencil >::evir, PmeEvirMsg::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeXPencil >::initdata, j, PmeGrid::K1, PmePencil< CBase_PmeXPencil >::needs_reply, PmeUntransMsg::ny, Node::Object(), PME_UNGRID_PRIORITY, PME_UNTRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmePencilInitMsgData::pmeProxy, PmeXPencilSendUntrans(), PRIORITY_SIZE, PmeUntransMsg::qgrid, recipEvirPe, PmePencil< CBase_PmeXPencil >::send_order, PmePencil< CBase_PmeXPencil >::sequence, SET_PRIORITY, Node::simParameters, PmeUntransMsg::sourceNode, SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::yBlocks, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_trans().

05881                               {
05882 
05883   { // send energy and virial
05884     int numGrids = 1;
05885     PmeEvirMsg *newmsg = new (numGrids, PRIORITY_SIZE) PmeEvirMsg;
05886     newmsg->evir[0] = evir;
05887     SET_PRIORITY(newmsg,sequence,PME_UNGRID_PRIORITY)
05888     CmiEnableUrgentSend(1);
05889     initdata.pmeProxy[recipEvirPe].recvRecipEvir(newmsg);
05890     CmiEnableUrgentSend(0);
05891   }
05892 
05893 #if USE_PERSISTENT
05894   if (untrans_handle == NULL) setup_persistent();
05895 #endif
05896 #if     CMK_SMP && USE_CKLOOP
05897   int useCkLoop = Node::Object()->simParameters->useCkLoop;
05898   if(useCkLoop>=CKLOOP_CTRL_PME_SENDUNTRANS
05899      && CkNumPes() >= 2 * initdata.yBlocks * initdata.zBlocks) {
05900                 int xBlocks = initdata.xBlocks;
05901                 int evirIdx = 0;
05902                 for ( int isend=0; isend<xBlocks; ++isend ) {
05903                         int ib = send_order[isend];
05904                         if (needs_reply[ib]) {
05905                                 evirIdx = isend;
05906                                 break;
05907                         }
05908                 }
05909 
05910                 //basically: 
05911                 //[0,evirIdx-1]->send PmeAckMsg
05912                 //evirIdx->send PmeUntransMsg with has_evir=1
05913                 //[evirIdx+1, xBlocks-1]->send PmeUntransMsg with has_evir=0
05914                 //send_subset_untrans(0, xBlocks-1, evirIdx);
05915 #if USE_NODE_PAR_RECEIVE
05916                 //CkLoop_Parallelize(PmeXPencilSendUntrans, evirIdx, (void *)this, CkMyNodeSize(), 0, xBlocks-1, 1); //has to sync
05917                 CkLoop_Parallelize(PmeXPencilSendUntrans, evirIdx, (void *)this, xBlocks, 0, xBlocks-1, 1); //has to sync
05918 #else
05919         //CkLoop_Parallelize(PmeXPencilSendUntrans, evirIdx, (void *)this, CkMyNodeSize(), 0, xBlocks-1, 0); //not sync
05920                 CkLoop_Parallelize(PmeXPencilSendUntrans, evirIdx, (void *)this, xBlocks, 0, xBlocks-1, 0); //not sync
05921 #endif        
05922                 return;
05923   }
05924 #endif
05925   int xBlocks = initdata.xBlocks;
05926   int block1 = initdata.grid.block1;
05927   int K1 = initdata.grid.K1;
05928   int send_evir = 1;
05929   for ( int isend=0; isend<xBlocks; ++isend ) {
05930     int ib = send_order[isend];
05931     if ( ! needs_reply[ib] ) {
05932       PmeAckMsg *msg = new (PRIORITY_SIZE) PmeAckMsg;
05933       CmiEnableUrgentSend(1);
05934       SET_PRIORITY(msg,sequence,PME_UNTRANS_PRIORITY)
05935       initdata.yPencil(ib,0,thisIndex.z).recvAck(msg);
05936       CmiEnableUrgentSend(0);
05937       continue;
05938     }
05939     int nx = block1;
05940     if ( (ib+1)*block1 > K1 ) nx = K1 - ib*block1;
05941     PmeUntransMsg *msg = new (nx*ny*nz*2,PRIORITY_SIZE) PmeUntransMsg;
05942     if ( send_evir ) {
05943       send_evir = 0;
05944     }
05945     msg->sourceNode = thisIndex.y;
05946     msg->ny = ny;
05947     float *md = msg->qgrid;
05948     for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
05949      float *d = data + i*ny*nz*2;
05950      for ( int j=0; j<ny; ++j, d += nz*2 ) {
05951       for ( int k=0; k<nz; ++k ) {
05952         *(md++) = d[2*k];
05953         *(md++) = d[2*k+1];
05954       }
05955      }
05956     }
05957     SET_PRIORITY(msg,sequence,PME_UNTRANS_PRIORITY)
05958 
05959     CmiEnableUrgentSend(1);
05960 #if USE_NODE_PAR_RECEIVE
05961     msg->destElem=CkArrayIndex3D(ib,0, thisIndex.z);
05962 #if Y_PERSIST 
05963     CmiUsePersistentHandle(&untrans_handle[isend], 1);
05964 #endif
05965     initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYUntrans(msg);
05966 #if Y_PERSIST 
05967     CmiUsePersistentHandle(NULL, 0);
05968 #endif
05969 #else
05970 #if Y_PERSIST 
05971     CmiUsePersistentHandle(&untrans_handle[isend], 1);
05972 #endif
05973     initdata.yPencil(ib,0,thisIndex.z).recvUntrans(msg);
05974 #if Y_PERSIST 
05975     CmiUsePersistentHandle(NULL, 0);
05976 #endif
05977 #endif
05978     CmiEnableUrgentSend(0);
05979   }
05980 }


Member Data Documentation

fftw_plan PmeXPencil::backward_plan

Definition at line 4763 of file ComputePme.C.

Referenced by backward_fft(), and fft_init().

fftw_plan PmeXPencil::forward_plan

Definition at line 4763 of file ComputePme.C.

Referenced by fft_init(), and forward_fft().

PmeKSpace* PmeXPencil::myKSpace

Definition at line 4769 of file ComputePme.C.

Referenced by fft_init(), pme_kspace(), and PmeXPencil().

int PmeXPencil::ny

Definition at line 4766 of file ComputePme.C.

int PmeXPencil::nz

Definition at line 4766 of file ComputePme.C.

int PmeXPencil::recipEvirPe

Definition at line 4767 of file ComputePme.C.

Referenced by evir_init(), PmeXPencil(), and send_untrans().


The documentation for this class was generated from the following file:
Generated on Mon Sep 25 01:17:19 2017 for NAMD by  doxygen 1.4.7