NAMD
Public Member Functions | Public Attributes | List of all members
PmeXPencil Class Reference
Inheritance diagram for PmeXPencil:
PmePencil< CBase_PmeXPencil >

Public Member Functions

PmeXPencil_SDAG_CODE PmeXPencil ()
 
 PmeXPencil (CkMigrateMessage *)
 
 ~PmeXPencil ()
 
void fft_init ()
 
void recv_trans (const PmeTransMsg *)
 
void forward_fft ()
 
void pme_kspace ()
 
void backward_fft ()
 
void send_untrans ()
 
void send_subset_untrans (int fromIdx, int toIdx)
 
void node_process_trans (PmeTransMsg *)
 
void evir_init ()
 
- Public Member Functions inherited from PmePencil< CBase_PmeXPencil >
 PmePencil ()
 
 ~PmePencil ()
 
void base_init (PmePencilInitMsg *msg)
 
void order_init (int nBlocks)
 

Public Attributes

fftw_plan forward_plan
 
fftw_plan backward_plan
 
int ny
 
int nz
 
int recipEvirPe
 
PmeKSpacemyKSpace
 
- Public Attributes inherited from PmePencil< CBase_PmeXPencil >
PmePencilInitMsgData initdata
 
Lattice lattice
 
PmeReduction evir
 
int sequence
 
AtomicInt imsg
 
AtomicInt imsgb
 
int hasData
 
int offload
 
float * data
 
float * work
 
int * send_order
 
int * needs_reply
 

Additional Inherited Members

- Public Types inherited from PmePencil< CBase_PmeXPencil >
typedef int AtomicInt
 

Detailed Description

Definition at line 4788 of file ComputePme.C.

Constructor & Destructor Documentation

◆ PmeXPencil() [1/2]

PmeXPencil_SDAG_CODE PmeXPencil::PmeXPencil ( )
inline

Definition at line 4791 of file ComputePme.C.

References PmePencil< CBase_PmeXPencil >::imsg, PmePencil< CBase_PmeXPencil >::imsgb, myKSpace, and recipEvirPe.

4791 { __sdag_init(); myKSpace = 0; setMigratable(false); imsg=imsgb=0; recipEvirPe = -999; }
PmeKSpace * myKSpace
Definition: ComputePme.C:4822
int recipEvirPe
Definition: ComputePme.C:4820

◆ PmeXPencil() [2/2]

PmeXPencil::PmeXPencil ( CkMigrateMessage *  )
inline

Definition at line 4792 of file ComputePme.C.

4792 { __sdag_init(); }

◆ ~PmeXPencil()

PmeXPencil::~PmeXPencil ( )
inline

Definition at line 4793 of file ComputePme.C.

4793  {
4794  #ifdef NAMD_FFTW
4795  #ifdef NAMD_FFTW_3
4796  delete [] forward_plans;
4797  delete [] backward_plans;
4798  #endif
4799  #endif
4800  }

Member Function Documentation

◆ backward_fft()

void PmeXPencil::backward_fft ( )

Definition at line 5802 of file ComputePme.C.

References backward_plan, CKLOOP_CTRL_PME_BACKWARDFFT, PmePencil< CBase_PmeXPencil >::data, PmePencilInitMsgData::grid, PmePencil< CBase_PmeXPencil >::initdata, PmeGrid::K1, ny, nz, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeXPencil >::work, PmePencilInitMsgData::yBlocks, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_trans().

5802  {
5803 #ifdef NAMD_FFTW
5804 #ifdef MANUAL_DEBUG_FFTW3
5805  dumpMatrixFloat3("bw_x_b", data, initdata.grid.K1, ny, nz, thisIndex.x, thisIndex.y, thisIndex.z);
5806 #endif
5807 
5808 #ifdef NAMD_FFTW_3
5809 #if CMK_SMP && USE_CKLOOP
5810  int useCkLoop = Node::Object()->simParameters->useCkLoop;
5811  if(useCkLoop>=CKLOOP_CTRL_PME_BACKWARDFFT
5812  && CkNumPes() >= 2 * initdata.yBlocks * initdata.zBlocks) {
5813  //for(int i=0; i<numPlans; i++) fftwf_execute(backward_plans[i]);
5814  //transform the above loop
5815  CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)backward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
5816  return;
5817  }
5818 #endif
5819  fftwf_execute(backward_plan);
5820 #else
5821  fftw(backward_plan, ny*nz,
5822  ((fftw_complex *) data), ny*nz, 1, (fftw_complex *) work, 1, 0);
5823 #endif
5824 #ifdef MANUAL_DEBUG_FFTW3
5825  dumpMatrixFloat3("bw_x_a", data, initdata.grid.K1, ny, nz, thisIndex.x, thisIndex.y, thisIndex.z);
5826 #endif
5827 #endif
5828 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4624
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
static void PmeXZPencilFFT(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:5266
fftw_plan backward_plan
Definition: ComputePme.C:4816
#define CKLOOP_CTRL_PME_BACKWARDFFT

◆ evir_init()

void PmeXPencil::evir_init ( )

Definition at line 4846 of file ComputePme.C.

References findRecipEvirPe(), PmePencil< CBase_PmeXPencil >::initdata, PmePencilInitMsgData::pmeProxy, and recipEvirPe.

4846  {
4848  initdata.pmeProxy[recipEvirPe].addRecipEvirClient();
4849 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4624
CProxy_ComputePmeMgr pmeProxy
Definition: ComputePme.C:245
static int findRecipEvirPe()
Definition: ComputePme.C:269
int recipEvirPe
Definition: ComputePme.C:4820

◆ fft_init()

void PmeXPencil::fft_init ( )

Definition at line 5124 of file ComputePme.C.

References backward_plan, PmeGrid::block2, PmeGrid::block3, PmePencil< CBase_PmeXPencil >::data, PmeGrid::dim3, ComputePmeMgr::fftw_plan_lock, fftwf_malloc, forward_plan, PmePencilInitMsgData::grid, PmePencil< CBase_PmeXPencil >::initdata, PmeGrid::K1, PmeGrid::K2, myKSpace, NAMD_die(), ny, nz, PmePencil< CBase_PmeXPencil >::order_init(), PmePencilInitMsgData::pmeNodeProxy, Node::simParameters, simParams, PmePencil< CBase_PmeXPencil >::work, and PmePencilInitMsgData::xBlocks.

5124  {
5125  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
5126  Node *node = nd.ckLocalBranch();
5128 #if USE_NODE_PAR_RECEIVE
5129  ((NodePmeMgr *)CkLocalNodeBranch(initdata.pmeNodeProxy))->registerXPencil(thisIndex,this);
5130 #endif
5131 
5132  int K1 = initdata.grid.K1;
5133  int K2 = initdata.grid.K2;
5134  int dim3 = initdata.grid.dim3;
5135  int block2 = initdata.grid.block2;
5136  int block3 = initdata.grid.block3;
5137 
5138  ny = block2;
5139  if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
5140  nz = block3;
5141  if ( (thisIndex.z+1)*block3 > dim3/2 ) nz = dim3/2 - thisIndex.z*block3;
5142 
5143 #ifdef NAMD_FFTW
5145 
5146  data = (float *) fftwf_malloc( sizeof(float) * K1*ny*nz*2);
5147  work = new float[2*K1];
5148 
5150 
5151 #ifdef NAMD_FFTW_3
5152  /* need array of sizes for the how many */
5153  int fftwFlags = simParams->FFTWPatient ? FFTW_PATIENT : simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE ;
5154  int sizeLines=ny*nz;
5155  int planLineSizes[1];
5156  planLineSizes[0]=K1;
5157  forward_plan = fftwf_plan_many_dft(1, planLineSizes, sizeLines,
5158  (fftwf_complex *) data, NULL, sizeLines, 1,
5159  (fftwf_complex *) data, NULL, sizeLines, 1,
5160  FFTW_FORWARD,
5161  fftwFlags);
5162  backward_plan = fftwf_plan_many_dft(1, planLineSizes, sizeLines,
5163  (fftwf_complex *) data, NULL, sizeLines, 1,
5164  (fftwf_complex *) data, NULL, sizeLines, 1,
5165  FFTW_BACKWARD,
5166  fftwFlags);
5167 
5168 #if CMK_SMP && USE_CKLOOP
5169  if(simParams->useCkLoop) {
5170  //How many FFT plans to be created? The grain-size issue!!.
5171  //Currently, I am choosing the min(nx, ny) to be coarse-grain
5172  numPlans = (ny<=nz?ny:nz);
5173  // limit attempted parallelism due to false sharing
5174  //if ( numPlans < CkMyNodeSize() ) numPlans = (ny>=nz?ny:nz);
5175  //if ( numPlans < CkMyNodeSize() ) numPlans = sizeLines;
5176  if ( sizeLines/numPlans < 4 ) numPlans = 1;
5177  int howmany = sizeLines/numPlans;
5178  forward_plans = new fftwf_plan[numPlans];
5179  backward_plans = new fftwf_plan[numPlans];
5180  for(int i=0; i<numPlans; i++) {
5181  int curStride = i*howmany;
5182  forward_plans[i] = fftwf_plan_many_dft(1, planLineSizes, howmany,
5183  ((fftwf_complex *)data)+curStride, NULL, sizeLines, 1,
5184  ((fftwf_complex *)data)+curStride, NULL, sizeLines, 1,
5185  FFTW_FORWARD,
5186  fftwFlags);
5187 
5188  backward_plans[i] = fftwf_plan_many_dft(1, planLineSizes, howmany,
5189  ((fftwf_complex *)data)+curStride, NULL, sizeLines, 1,
5190  ((fftwf_complex *)data)+curStride, NULL, sizeLines, 1,
5191  FFTW_BACKWARD,
5192  fftwFlags);
5193  }
5194  }else
5195 #endif
5196  {
5197  forward_plans = NULL;
5198  backward_plans = NULL;
5199  }
5200 #else
5201  forward_plan = fftw_create_plan_specific(K1, FFTW_FORWARD,
5202  ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
5203  | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
5204  ny*nz, (fftw_complex *) work, 1);
5205  backward_plan = fftw_create_plan_specific(K1, FFTW_BACKWARD,
5206  ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
5207  | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
5208  ny*nz, (fftw_complex *) work, 1);
5209 #endif
5210  CmiUnlock(ComputePmeMgr::fftw_plan_lock);
5211 #else
5212  NAMD_die("Sorry, FFTW must be compiled in to use PME.");
5213 #endif
5214 
5216  thisIndex.y*block2, thisIndex.y*block2 + ny,
5217  thisIndex.z*block3, thisIndex.z*block3 + nz);
5218 
5219 }
static CmiNodeLock fftw_plan_lock
Definition: ComputePme.C:442
PmePencilInitMsgData initdata
Definition: ComputePme.C:4624
int dim3
Definition: PmeBase.h:22
Definition: Node.h:78
void order_init(int nBlocks)
Definition: ComputePme.C:4612
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:246
int block2
Definition: PmeBase.h:24
PmeKSpace * myKSpace
Definition: ComputePme.C:4822
int block3
Definition: PmeBase.h:24
void NAMD_die(const char *err_msg)
Definition: common.C:147
#define simParams
Definition: Output.C:131
fftw_plan backward_plan
Definition: ComputePme.C:4816
fftw_plan forward_plan
Definition: ComputePme.C:4816
#define fftwf_malloc
Definition: ComputePme.C:13

◆ forward_fft()

void PmeXPencil::forward_fft ( )

Definition at line 5742 of file ComputePme.C.

References CKLOOP_CTRL_PME_FORWARDFFT, PmePencil< CBase_PmeXPencil >::data, forward_plan, PmePencilInitMsgData::grid, PmePencil< CBase_PmeXPencil >::initdata, PmeGrid::K1, ny, nz, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeXPencil >::work, PmePencilInitMsgData::yBlocks, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_trans().

5742  {
5743 #ifdef NAMD_FFTW
5744 
5745 #ifdef MANUAL_DEBUG_FFTW3
5746  dumpMatrixFloat3("fw_x_b", data, initdata.grid.K1, ny, nz, thisIndex.x, thisIndex.y, thisIndex.z);
5747 #endif
5748 
5749 #ifdef NAMD_FFTW_3
5750 #if CMK_SMP && USE_CKLOOP
5751  int useCkLoop = Node::Object()->simParameters->useCkLoop;
5752  if(useCkLoop>=CKLOOP_CTRL_PME_FORWARDFFT
5753  && CkNumPes() >= 2 * initdata.yBlocks * initdata.zBlocks) {
5754  //for(int i=0; i<numPlans; i++) fftwf_execute(forward_plans[i]);
5755  //transform the above loop
5756  CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)forward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
5757  return;
5758  }
5759 #endif
5760  fftwf_execute(forward_plan);
5761 #else
5762  fftw(forward_plan, ny*nz,
5763  ((fftw_complex *) data), ny*nz, 1, (fftw_complex *) work, 1, 0);
5764 #endif
5765 #ifdef MANUAL_DEBUG_FFTW3
5766  dumpMatrixFloat3("fw_x_a", data, initdata.grid.K1, ny, nz, thisIndex.x, thisIndex.y, thisIndex.z);
5767 #endif
5768 
5769 #endif
5770 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4624
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
static void PmeXZPencilFFT(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:5266
#define CKLOOP_CTRL_PME_FORWARDFFT
Definition: SimParameters.h:97
fftw_plan forward_plan
Definition: ComputePme.C:4816

◆ node_process_trans()

void PmeXPencil::node_process_trans ( PmeTransMsg msg)

Definition at line 5685 of file ComputePme.C.

References backward_fft(), forward_fft(), PmeTransMsg::hasData, PmePencil< CBase_PmeXPencil >::hasData, PmePencil< CBase_PmeXPencil >::imsg, PmePencil< CBase_PmeXPencil >::initdata, PmePencil< CBase_PmeXPencil >::needs_reply, pme_kspace(), recv_trans(), send_untrans(), PmeTransMsg::sourceNode, and PmePencilInitMsgData::xBlocks.

Referenced by NodePmeMgr::recvXTrans().

5686 {
5687  if(msg->hasData) hasData=1;
5688  needs_reply[msg->sourceNode] = msg->hasData;
5689  recv_trans(msg);
5690  int limsg;
5691  CmiMemoryAtomicFetchAndInc(imsg,limsg);
5692  if(limsg+1 == initdata.xBlocks)
5693  {
5694  if(hasData){
5695  forward_fft();
5696  pme_kspace();
5697  backward_fft();
5698  }
5699  send_untrans();
5700  imsg=0;
5701  CmiMemoryWriteFence();
5702  }
5703 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4624
void recv_trans(const PmeTransMsg *)
Definition: ComputePme.C:5705
void pme_kspace()
Definition: ComputePme.C:5772
void forward_fft()
Definition: ComputePme.C:5742
void send_untrans()
Definition: ComputePme.C:5893
int sourceNode
Definition: ComputePme.C:159
void backward_fft()
Definition: ComputePme.C:5802

◆ pme_kspace()

void PmeXPencil::pme_kspace ( )

Definition at line 5772 of file ComputePme.C.

References CKLOOP_CTRL_PME_KSPACE, PmeKSpace::compute_energy(), PmePencil< CBase_PmeXPencil >::data, PmePencil< CBase_PmeXPencil >::evir, ComputeNonbondedUtil::ewaldcof, PmePencil< CBase_PmeXPencil >::initdata, PmePencil< CBase_PmeXPencil >::lattice, myKSpace, Node::Object(), PmePencilInitMsgData::yBlocks, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_trans().

5772  {
5773 
5774  evir = 0.;
5775 
5776 #ifdef FFTCHECK
5777  return;
5778 #endif
5779 
5781 
5782  int useCkLoop = 0;
5783 #if CMK_SMP && USE_CKLOOP
5784  if ( Node::Object()->simParameters->useCkLoop >= CKLOOP_CTRL_PME_KSPACE
5785  && CkNumPes() >= 2 * initdata.yBlocks * initdata.zBlocks ) {
5786  useCkLoop = 1;
5787  }
5788 #endif
5789 
5790  // XXX will need to extend pencil decomposition to support LJ-PME
5791  int numGrids = 1;
5792  for ( int g=0; g<numGrids; ++g ) {
5793  evir[0] = myKSpace->compute_energy(data+0*g,
5794  lattice, ewaldcof, &(evir[1]), useCkLoop);
5795  }
5796 
5797 #if USE_NODE_PAR_RECEIVE
5798  CmiMemoryWriteFence();
5799 #endif
5800 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4624
PmeKSpace * myKSpace
Definition: ComputePme.C:4822
double compute_energy(float q_arr[], const Lattice &lattice, double ewald, double virial[], int useCkLoop)
Definition: PmeKSpace.C:321
#define CKLOOP_CTRL_PME_KSPACE
Definition: SimParameters.h:99
double BigReal
Definition: common.h:123

◆ recv_trans()

void PmeXPencil::recv_trans ( const PmeTransMsg msg)

Definition at line 5705 of file ComputePme.C.

References PmeGrid::block1, PmePencil< CBase_PmeXPencil >::data, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeXPencil >::imsg, PmePencil< CBase_PmeXPencil >::initdata, PmeGrid::K1, PmeTransMsg::lattice, PmePencil< CBase_PmeXPencil >::lattice, PmeTransMsg::nx, ny, nz, PmeTransMsg::qgrid, PmeTransMsg::sequence, PmePencil< CBase_PmeXPencil >::sequence, and PmeTransMsg::sourceNode.

Referenced by node_process_trans().

5705  {
5706  if ( imsg == 0 ) {
5707  lattice = msg->lattice;
5708  sequence = msg->sequence;
5709  }
5710  int block1 = initdata.grid.block1;
5711  int K1 = initdata.grid.K1;
5712  int ib = msg->sourceNode;
5713  int nx = msg->nx;
5714  if ( msg->hasData ) {
5715  const float *md = msg->qgrid;
5716  for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
5717  float *d = data + i*ny*nz*2;
5718  for ( int j=0; j<ny; ++j, d += nz*2 ) {
5719  for ( int k=0; k<nz; ++k ) {
5720 #ifdef ZEROCHECK
5721  if ( (*md) == 0. ) CkPrintf("0 in YX at %d %d %d %d %d %d %d %d %d\n",
5722  ib, thisIndex.y, thisIndex.z, i, j, k, nx, ny, nz);
5723 #endif
5724  d[2*k] = *(md++);
5725  d[2*k+1] = *(md++);
5726  }
5727  }
5728  }
5729  } else {
5730  for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
5731  float *d = data + i*ny*nz*2;
5732  for ( int j=0; j<ny; ++j, d += nz*2 ) {
5733  for ( int k=0; k<nz; ++k ) {
5734  d[2*k] = 0;
5735  d[2*k+1] = 0;
5736  }
5737  }
5738  }
5739  }
5740 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4624
int K1
Definition: PmeBase.h:21
int block1
Definition: PmeBase.h:24
float * qgrid
Definition: ComputePme.C:165
int sourceNode
Definition: ComputePme.C:159
Lattice lattice
Definition: ComputePme.C:162

◆ send_subset_untrans()

void PmeXPencil::send_subset_untrans ( int  fromIdx,
int  toIdx 
)

Definition at line 5835 of file ComputePme.C.

References PmeGrid::block1, PmePencil< CBase_PmeXPencil >::data, PmeUntransMsg::destElem, PmePencilInitMsgData::grid, PmePencil< CBase_PmeXPencil >::initdata, PmeGrid::K1, PmePencil< CBase_PmeXPencil >::needs_reply, PmeUntransMsg::ny, ny, nz, PME_UNTRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PRIORITY_SIZE, PmeUntransMsg::qgrid, PmePencil< CBase_PmeXPencil >::send_order, PmePencil< CBase_PmeXPencil >::sequence, SET_PRIORITY, PmeUntransMsg::sourceNode, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::ym, and PmePencilInitMsgData::yPencil.

Referenced by PmeXPencilSendUntrans().

5835  {
5836  int xBlocks = initdata.xBlocks;
5837  int block1 = initdata.grid.block1;
5838  int K1 = initdata.grid.K1;
5839 
5840  for(int isend=fromIdx; isend<=toIdx; isend++) {
5841  int ib = send_order[isend];
5842  if ( ! needs_reply[ib] ) {
5843  PmeAckMsg *msg = new (PRIORITY_SIZE) PmeAckMsg;
5844  CmiEnableUrgentSend(1);
5846 #if USE_NODE_PAR_RECEIVE
5847  initdata.yPencil(ib,0,thisIndex.z).recvNodeAck(msg);
5848 #else
5849  initdata.yPencil(ib,0,thisIndex.z).recvAck(msg);
5850 #endif
5851  CmiEnableUrgentSend(0);
5852  continue;
5853  }
5854  int nx = block1;
5855  if ( (ib+1)*block1 > K1 ) nx = K1 - ib*block1;
5856  PmeUntransMsg *msg = new (nx*ny*nz*2,PRIORITY_SIZE) PmeUntransMsg;
5857  msg->sourceNode = thisIndex.y;
5858  msg->ny = ny;
5859  float *md = msg->qgrid;
5860  for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
5861  float *d = data + i*ny*nz*2;
5862  for ( int j=0; j<ny; ++j, d += nz*2 ) {
5863  for ( int k=0; k<nz; ++k ) {
5864  *(md++) = d[2*k];
5865  *(md++) = d[2*k+1];
5866  }
5867  }
5868  }
5870  CmiEnableUrgentSend(1);
5871 #if USE_NODE_PAR_RECEIVE
5872  msg->destElem=CkArrayIndex3D(ib,0, thisIndex.z);
5873 #if Y_PERSIST
5874  CmiUsePersistentHandle(&untrans_handle[isend], 1);
5875 #endif
5876  initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYUntrans(msg);
5877 #if Y_PERSIST
5878  CmiUsePersistentHandle(NULL, 0);
5879 #endif
5880 #else
5881 #if Y_PERSIST
5882  // CmiUsePersistentHandle(&untrans_handle[isend], 1);
5883 #endif
5884  initdata.yPencil(ib,0,thisIndex.z).recvUntrans(msg);
5885 #if Y_PERSIST
5886  // CmiUsePersistentHandle(NULL, 0);
5887 #endif
5888 #endif
5889  CmiEnableUrgentSend(0);
5890  }
5891 }
float * qgrid
Definition: ComputePme.C:182
PmePencilInitMsgData initdata
Definition: ComputePme.C:4624
int K1
Definition: PmeBase.h:21
int block1
Definition: PmeBase.h:24
CProxy_PmeYPencil yPencil
Definition: ComputePme.C:243
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:246
#define PRIORITY_SIZE
Definition: Priorities.h:13
#define PME_UNTRANS_PRIORITY
Definition: Priorities.h:33
CProxy_PmePencilMap ym
Definition: ComputePme.C:248
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
CkArrayIndex3D destElem
Definition: ComputePme.C:183

◆ send_untrans()

void PmeXPencil::send_untrans ( )

Definition at line 5893 of file ComputePme.C.

References PmeGrid::block1, CKLOOP_CTRL_PME_SENDUNTRANS, PmePencil< CBase_PmeXPencil >::data, PmeUntransMsg::destElem, PmeEvirMsg::evir, PmePencil< CBase_PmeXPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeXPencil >::initdata, PmeGrid::K1, PmePencil< CBase_PmeXPencil >::needs_reply, PmeUntransMsg::ny, ny, nz, Node::Object(), PME_UNGRID_PRIORITY, PME_UNTRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmePencilInitMsgData::pmeProxy, PmeXPencilSendUntrans(), PRIORITY_SIZE, PmeUntransMsg::qgrid, recipEvirPe, PmePencil< CBase_PmeXPencil >::send_order, PmePencil< CBase_PmeXPencil >::sequence, SET_PRIORITY, Node::simParameters, PmeUntransMsg::sourceNode, SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::yBlocks, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_trans().

5893  {
5894 
5895  { // send energy and virial
5896  int numGrids = 1;
5897  PmeEvirMsg *newmsg = new (numGrids, PRIORITY_SIZE) PmeEvirMsg;
5898  newmsg->evir[0] = evir;
5900  CmiEnableUrgentSend(1);
5901  initdata.pmeProxy[recipEvirPe].recvRecipEvir(newmsg);
5902  CmiEnableUrgentSend(0);
5903  }
5904 
5905 #if USE_PERSISTENT
5906  if (untrans_handle == NULL) setup_persistent();
5907 #endif
5908 #if CMK_SMP && USE_CKLOOP
5909  int useCkLoop = Node::Object()->simParameters->useCkLoop;
5910  if(useCkLoop>=CKLOOP_CTRL_PME_SENDUNTRANS
5911  && CkNumPes() >= 2 * initdata.yBlocks * initdata.zBlocks) {
5912  int xBlocks = initdata.xBlocks;
5913 
5914 #if USE_NODE_PAR_RECEIVE
5915  //CkLoop_Parallelize(PmeXPencilSendUntrans, 1, (void *)this, CkMyNodeSize(), 0, xBlocks-1, 1); //has to sync
5916  CkLoop_Parallelize(PmeXPencilSendUntrans, 1, (void *)this, xBlocks, 0, xBlocks-1, 1); //has to sync
5917 #else
5918  //CkLoop_Parallelize(PmeXPencilSendUntrans, 1, (void *)this, CkMyNodeSize(), 0, xBlocks-1, 0); //not sync
5919  CkLoop_Parallelize(PmeXPencilSendUntrans, 1, (void *)this, xBlocks, 0, xBlocks-1, 0); //not sync
5920 #endif
5921  return;
5922  }
5923 #endif
5924  int xBlocks = initdata.xBlocks;
5925  int block1 = initdata.grid.block1;
5926  int K1 = initdata.grid.K1;
5927  for ( int isend=0; isend<xBlocks; ++isend ) {
5928  int ib = send_order[isend];
5929  if ( ! needs_reply[ib] ) {
5930  PmeAckMsg *msg = new (PRIORITY_SIZE) PmeAckMsg;
5931  CmiEnableUrgentSend(1);
5933 #if USE_NODE_PAR_RECEIVE
5934  initdata.yPencil(ib,0,thisIndex.z).recvNodeAck(msg);
5935 #else
5936  initdata.yPencil(ib,0,thisIndex.z).recvAck(msg);
5937 #endif
5938  CmiEnableUrgentSend(0);
5939  continue;
5940  }
5941  int nx = block1;
5942  if ( (ib+1)*block1 > K1 ) nx = K1 - ib*block1;
5943  PmeUntransMsg *msg = new (nx*ny*nz*2,PRIORITY_SIZE) PmeUntransMsg;
5944  msg->sourceNode = thisIndex.y;
5945  msg->ny = ny;
5946  float *md = msg->qgrid;
5947  for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
5948  float *d = data + i*ny*nz*2;
5949  for ( int j=0; j<ny; ++j, d += nz*2 ) {
5950  for ( int k=0; k<nz; ++k ) {
5951  *(md++) = d[2*k];
5952  *(md++) = d[2*k+1];
5953  }
5954  }
5955  }
5957 
5958  CmiEnableUrgentSend(1);
5959 #if USE_NODE_PAR_RECEIVE
5960  msg->destElem=CkArrayIndex3D(ib,0, thisIndex.z);
5961 #if Y_PERSIST
5962  CmiUsePersistentHandle(&untrans_handle[isend], 1);
5963 #endif
5964  initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYUntrans(msg);
5965 #if Y_PERSIST
5966  CmiUsePersistentHandle(NULL, 0);
5967 #endif
5968 #else
5969 #if Y_PERSIST
5970  CmiUsePersistentHandle(&untrans_handle[isend], 1);
5971 #endif
5972  initdata.yPencil(ib,0,thisIndex.z).recvUntrans(msg);
5973 #if Y_PERSIST
5974  CmiUsePersistentHandle(NULL, 0);
5975 #endif
5976 #endif
5977  CmiEnableUrgentSend(0);
5978  }
5979 }
static Node * Object()
Definition: Node.h:86
float * qgrid
Definition: ComputePme.C:182
PmePencilInitMsgData initdata
Definition: ComputePme.C:4624
#define PME_UNGRID_PRIORITY
Definition: Priorities.h:74
CProxy_ComputePmeMgr pmeProxy
Definition: ComputePme.C:245
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
int block1
Definition: PmeBase.h:24
CProxy_PmeYPencil yPencil
Definition: ComputePme.C:243
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:246
PmeReduction * evir
Definition: ComputePme.C:195
#define CKLOOP_CTRL_PME_SENDUNTRANS
#define PRIORITY_SIZE
Definition: Priorities.h:13
static void PmeXPencilSendUntrans(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:5830
int recipEvirPe
Definition: ComputePme.C:4820
#define PME_UNTRANS_PRIORITY
Definition: Priorities.h:33
CProxy_PmePencilMap ym
Definition: ComputePme.C:248
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
CkArrayIndex3D destElem
Definition: ComputePme.C:183

Member Data Documentation

◆ backward_plan

fftw_plan PmeXPencil::backward_plan

Definition at line 4816 of file ComputePme.C.

Referenced by backward_fft(), and fft_init().

◆ forward_plan

fftw_plan PmeXPencil::forward_plan

Definition at line 4816 of file ComputePme.C.

Referenced by fft_init(), and forward_fft().

◆ myKSpace

PmeKSpace* PmeXPencil::myKSpace

Definition at line 4822 of file ComputePme.C.

Referenced by fft_init(), pme_kspace(), and PmeXPencil().

◆ ny

int PmeXPencil::ny

◆ nz

int PmeXPencil::nz

◆ recipEvirPe

int PmeXPencil::recipEvirPe

Definition at line 4820 of file ComputePme.C.

Referenced by evir_init(), PmeXPencil(), and send_untrans().


The documentation for this class was generated from the following file: