NAMD
Public Member Functions | Public Attributes | List of all members
PmeXPencil Class Reference
Inheritance diagram for PmeXPencil:
PmePencil< CBase_PmeXPencil >

Public Member Functions

PmeXPencil_SDAG_CODE PmeXPencil ()
 
 PmeXPencil (CkMigrateMessage *)
 
 ~PmeXPencil ()
 
void fft_init ()
 
void recv_trans (const PmeTransMsg *)
 
void forward_fft ()
 
void pme_kspace ()
 
void backward_fft ()
 
void send_untrans ()
 
void send_subset_untrans (int fromIdx, int toIdx)
 
void node_process_trans (PmeTransMsg *)
 
void evir_init ()
 
- Public Member Functions inherited from PmePencil< CBase_PmeXPencil >
 PmePencil ()
 
 ~PmePencil ()
 
void base_init (PmePencilInitMsg *msg)
 
void order_init (int nBlocks)
 

Public Attributes

fftw_plan forward_plan
 
fftw_plan backward_plan
 
int ny
 
int nz
 
int recipEvirPe
 
PmeKSpacemyKSpace
 
- Public Attributes inherited from PmePencil< CBase_PmeXPencil >
PmePencilInitMsgData initdata
 
Lattice lattice
 
PmeReduction evir
 
int sequence
 
AtomicInt imsg
 
AtomicInt imsgb
 
int hasData
 
int offload
 
float * data
 
float * work
 
int * send_order
 
int * needs_reply
 

Additional Inherited Members

- Public Types inherited from PmePencil< CBase_PmeXPencil >
typedef int AtomicInt
 

Detailed Description

Definition at line 4730 of file ComputePme.C.

Constructor & Destructor Documentation

◆ PmeXPencil() [1/2]

PmeXPencil_SDAG_CODE PmeXPencil::PmeXPencil ( )
inline

Definition at line 4733 of file ComputePme.C.

References PmePencil< CBase_PmeXPencil >::imsg, PmePencil< CBase_PmeXPencil >::imsgb, myKSpace, and recipEvirPe.

4733 { __sdag_init(); myKSpace = 0; setMigratable(false); imsg=imsgb=0; recipEvirPe = -999; }
PmeKSpace * myKSpace
Definition: ComputePme.C:4764
int recipEvirPe
Definition: ComputePme.C:4762

◆ PmeXPencil() [2/2]

PmeXPencil::PmeXPencil ( CkMigrateMessage *  )
inline

Definition at line 4734 of file ComputePme.C.

4734 { __sdag_init(); }

◆ ~PmeXPencil()

PmeXPencil::~PmeXPencil ( )
inline

Definition at line 4735 of file ComputePme.C.

4735  {
4736  #ifdef NAMD_FFTW
4737  #ifdef NAMD_FFTW_3
4738  delete [] forward_plans;
4739  delete [] backward_plans;
4740  #endif
4741  #endif
4742  }

Member Function Documentation

◆ backward_fft()

void PmeXPencil::backward_fft ( )

Definition at line 5743 of file ComputePme.C.

References backward_plan, CKLOOP_CTRL_PME_BACKWARDFFT, PmePencil< CBase_PmeXPencil >::data, PmePencilInitMsgData::grid, PmePencil< CBase_PmeXPencil >::initdata, PmeGrid::K1, ny, nz, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeXPencil >::work, PmePencilInitMsgData::yBlocks, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_trans().

5743  {
5744 #ifdef NAMD_FFTW
5745 #ifdef MANUAL_DEBUG_FFTW3
5746  dumpMatrixFloat3("bw_x_b", data, initdata.grid.K1, ny, nz, thisIndex.x, thisIndex.y, thisIndex.z);
5747 #endif
5748 
5749 #ifdef NAMD_FFTW_3
5750 #if CMK_SMP && USE_CKLOOP
5751  int useCkLoop = Node::Object()->simParameters->useCkLoop;
5752  if(useCkLoop>=CKLOOP_CTRL_PME_BACKWARDFFT
5753  && CkNumPes() >= 2 * initdata.yBlocks * initdata.zBlocks) {
5754  //for(int i=0; i<numPlans; i++) fftwf_execute(backward_plans[i]);
5755  //transform the above loop
5756  CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)backward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
5757  return;
5758  }
5759 #endif
5760  fftwf_execute(backward_plan);
5761 #else
5762  fftw(backward_plan, ny*nz,
5763  ((fftw_complex *) data), ny*nz, 1, (fftw_complex *) work, 1, 0);
5764 #endif
5765 #ifdef MANUAL_DEBUG_FFTW3
5766  dumpMatrixFloat3("bw_x_a", data, initdata.grid.K1, ny, nz, thisIndex.x, thisIndex.y, thisIndex.z);
5767 #endif
5768 #endif
5769 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
static void PmeXZPencilFFT(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:5208
fftw_plan backward_plan
Definition: ComputePme.C:4758
#define CKLOOP_CTRL_PME_BACKWARDFFT
Definition: SimParameters.h:99

◆ evir_init()

void PmeXPencil::evir_init ( )

Definition at line 4788 of file ComputePme.C.

References findRecipEvirPe(), PmePencil< CBase_PmeXPencil >::initdata, PmePencilInitMsgData::pmeProxy, and recipEvirPe.

4788  {
4790  initdata.pmeProxy[recipEvirPe].addRecipEvirClient();
4791 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
CProxy_ComputePmeMgr pmeProxy
Definition: ComputePme.C:243
static int findRecipEvirPe()
Definition: ComputePme.C:267
int recipEvirPe
Definition: ComputePme.C:4762

◆ fft_init()

void PmeXPencil::fft_init ( )

Definition at line 5066 of file ComputePme.C.

References backward_plan, PmeGrid::block2, PmeGrid::block3, PmePencil< CBase_PmeXPencil >::data, PmeGrid::dim3, ComputePmeMgr::fftw_plan_lock, fftwf_malloc, forward_plan, PmePencilInitMsgData::grid, PmePencil< CBase_PmeXPencil >::initdata, PmeGrid::K1, PmeGrid::K2, myKSpace, NAMD_die(), ny, nz, PmePencil< CBase_PmeXPencil >::order_init(), PmePencilInitMsgData::pmeNodeProxy, Node::simParameters, simParams, PmePencil< CBase_PmeXPencil >::work, and PmePencilInitMsgData::xBlocks.

5066  {
5067  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
5068  Node *node = nd.ckLocalBranch();
5070 #if USE_NODE_PAR_RECEIVE
5071  ((NodePmeMgr *)CkLocalNodeBranch(initdata.pmeNodeProxy))->registerXPencil(thisIndex,this);
5072 #endif
5073 
5074  int K1 = initdata.grid.K1;
5075  int K2 = initdata.grid.K2;
5076  int dim3 = initdata.grid.dim3;
5077  int block2 = initdata.grid.block2;
5078  int block3 = initdata.grid.block3;
5079 
5080  ny = block2;
5081  if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
5082  nz = block3;
5083  if ( (thisIndex.z+1)*block3 > dim3/2 ) nz = dim3/2 - thisIndex.z*block3;
5084 
5085 #ifdef NAMD_FFTW
5087 
5088  data = (float *) fftwf_malloc( sizeof(float) * K1*ny*nz*2);
5089  work = new float[2*K1];
5090 
5092 
5093 #ifdef NAMD_FFTW_3
5094  /* need array of sizes for the how many */
5095  int fftwFlags = simParams->FFTWPatient ? FFTW_PATIENT : simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE ;
5096  int sizeLines=ny*nz;
5097  int planLineSizes[1];
5098  planLineSizes[0]=K1;
5099  forward_plan = fftwf_plan_many_dft(1, planLineSizes, sizeLines,
5100  (fftwf_complex *) data, NULL, sizeLines, 1,
5101  (fftwf_complex *) data, NULL, sizeLines, 1,
5102  FFTW_FORWARD,
5103  fftwFlags);
5104  backward_plan = fftwf_plan_many_dft(1, planLineSizes, sizeLines,
5105  (fftwf_complex *) data, NULL, sizeLines, 1,
5106  (fftwf_complex *) data, NULL, sizeLines, 1,
5107  FFTW_BACKWARD,
5108  fftwFlags);
5109 
5110 #if CMK_SMP && USE_CKLOOP
5111  if(simParams->useCkLoop) {
5112  //How many FFT plans to be created? The grain-size issue!!.
5113  //Currently, I am choosing the min(nx, ny) to be coarse-grain
5114  numPlans = (ny<=nz?ny:nz);
5115  // limit attempted parallelism due to false sharing
5116  //if ( numPlans < CkMyNodeSize() ) numPlans = (ny>=nz?ny:nz);
5117  //if ( numPlans < CkMyNodeSize() ) numPlans = sizeLines;
5118  if ( sizeLines/numPlans < 4 ) numPlans = 1;
5119  int howmany = sizeLines/numPlans;
5120  forward_plans = new fftwf_plan[numPlans];
5121  backward_plans = new fftwf_plan[numPlans];
5122  for(int i=0; i<numPlans; i++) {
5123  int curStride = i*howmany;
5124  forward_plans[i] = fftwf_plan_many_dft(1, planLineSizes, howmany,
5125  ((fftwf_complex *)data)+curStride, NULL, sizeLines, 1,
5126  ((fftwf_complex *)data)+curStride, NULL, sizeLines, 1,
5127  FFTW_FORWARD,
5128  fftwFlags);
5129 
5130  backward_plans[i] = fftwf_plan_many_dft(1, planLineSizes, howmany,
5131  ((fftwf_complex *)data)+curStride, NULL, sizeLines, 1,
5132  ((fftwf_complex *)data)+curStride, NULL, sizeLines, 1,
5133  FFTW_BACKWARD,
5134  fftwFlags);
5135  }
5136  }else
5137 #endif
5138  {
5139  forward_plans = NULL;
5140  backward_plans = NULL;
5141  }
5142 #else
5143  forward_plan = fftw_create_plan_specific(K1, FFTW_FORWARD,
5144  ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
5145  | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
5146  ny*nz, (fftw_complex *) work, 1);
5147  backward_plan = fftw_create_plan_specific(K1, FFTW_BACKWARD,
5148  ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
5149  | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
5150  ny*nz, (fftw_complex *) work, 1);
5151 #endif
5152  CmiUnlock(ComputePmeMgr::fftw_plan_lock);
5153 #else
5154  NAMD_die("Sorry, FFTW must be compiled in to use PME.");
5155 #endif
5156 
5158  thisIndex.y*block2, thisIndex.y*block2 + ny,
5159  thisIndex.z*block3, thisIndex.z*block3 + nz);
5160 
5161 }
static CmiNodeLock fftw_plan_lock
Definition: ComputePme.C:440
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
int dim3
Definition: PmeBase.h:22
Definition: Node.h:78
void order_init(int nBlocks)
Definition: ComputePme.C:4554
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:244
int block2
Definition: PmeBase.h:24
PmeKSpace * myKSpace
Definition: ComputePme.C:4764
int block3
Definition: PmeBase.h:24
void NAMD_die(const char *err_msg)
Definition: common.C:147
#define simParams
Definition: Output.C:129
fftw_plan backward_plan
Definition: ComputePme.C:4758
fftw_plan forward_plan
Definition: ComputePme.C:4758
#define fftwf_malloc
Definition: ComputePme.C:13

◆ forward_fft()

void PmeXPencil::forward_fft ( )

Definition at line 5684 of file ComputePme.C.

References CKLOOP_CTRL_PME_FORWARDFFT, PmePencil< CBase_PmeXPencil >::data, forward_plan, PmePencilInitMsgData::grid, PmePencil< CBase_PmeXPencil >::initdata, PmeGrid::K1, ny, nz, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeXPencil >::work, PmePencilInitMsgData::yBlocks, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_trans().

5684  {
5685 #ifdef NAMD_FFTW
5686 
5687 #ifdef MANUAL_DEBUG_FFTW3
5688  dumpMatrixFloat3("fw_x_b", data, initdata.grid.K1, ny, nz, thisIndex.x, thisIndex.y, thisIndex.z);
5689 #endif
5690 
5691 #ifdef NAMD_FFTW_3
5692 #if CMK_SMP && USE_CKLOOP
5693  int useCkLoop = Node::Object()->simParameters->useCkLoop;
5694  if(useCkLoop>=CKLOOP_CTRL_PME_FORWARDFFT
5695  && CkNumPes() >= 2 * initdata.yBlocks * initdata.zBlocks) {
5696  //for(int i=0; i<numPlans; i++) fftwf_execute(forward_plans[i]);
5697  //transform the above loop
5698  CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)forward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
5699  return;
5700  }
5701 #endif
5702  fftwf_execute(forward_plan);
5703 #else
5704  fftw(forward_plan, ny*nz,
5705  ((fftw_complex *) data), ny*nz, 1, (fftw_complex *) work, 1, 0);
5706 #endif
5707 #ifdef MANUAL_DEBUG_FFTW3
5708  dumpMatrixFloat3("fw_x_a", data, initdata.grid.K1, ny, nz, thisIndex.x, thisIndex.y, thisIndex.z);
5709 #endif
5710 
5711 #endif
5712 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
static void PmeXZPencilFFT(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:5208
#define CKLOOP_CTRL_PME_FORWARDFFT
Definition: SimParameters.h:96
fftw_plan forward_plan
Definition: ComputePme.C:4758

◆ node_process_trans()

void PmeXPencil::node_process_trans ( PmeTransMsg msg)

Definition at line 5627 of file ComputePme.C.

References backward_fft(), forward_fft(), PmeTransMsg::hasData, PmePencil< CBase_PmeXPencil >::hasData, PmePencil< CBase_PmeXPencil >::imsg, PmePencil< CBase_PmeXPencil >::initdata, PmePencil< CBase_PmeXPencil >::needs_reply, pme_kspace(), recv_trans(), send_untrans(), PmeTransMsg::sourceNode, and PmePencilInitMsgData::xBlocks.

Referenced by NodePmeMgr::recvXTrans().

5628 {
5629  if(msg->hasData) hasData=1;
5630  needs_reply[msg->sourceNode] = msg->hasData;
5631  recv_trans(msg);
5632  int limsg;
5633  CmiMemoryAtomicFetchAndInc(imsg,limsg);
5634  if(limsg+1 == initdata.xBlocks)
5635  {
5636  if(hasData){
5637  forward_fft();
5638  pme_kspace();
5639  backward_fft();
5640  }
5641  send_untrans();
5642  imsg=0;
5643  CmiMemoryWriteFence();
5644  }
5645 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
void recv_trans(const PmeTransMsg *)
Definition: ComputePme.C:5647
void pme_kspace()
Definition: ComputePme.C:5714
void forward_fft()
Definition: ComputePme.C:5684
void send_untrans()
Definition: ComputePme.C:5834
int sourceNode
Definition: ComputePme.C:157
void backward_fft()
Definition: ComputePme.C:5743

◆ pme_kspace()

void PmeXPencil::pme_kspace ( )

Definition at line 5714 of file ComputePme.C.

References CKLOOP_CTRL_PME_KSPACE, PmeKSpace::compute_energy(), PmePencil< CBase_PmeXPencil >::data, PmePencil< CBase_PmeXPencil >::evir, ComputeNonbondedUtil::ewaldcof, PmePencil< CBase_PmeXPencil >::initdata, PmePencil< CBase_PmeXPencil >::lattice, myKSpace, Node::Object(), PmePencilInitMsgData::yBlocks, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_trans().

5714  {
5715 
5716  evir = 0.;
5717 
5718 #ifdef FFTCHECK
5719  return;
5720 #endif
5721 
5723 
5724  int useCkLoop = 0;
5725 #if CMK_SMP && USE_CKLOOP
5726  if ( Node::Object()->simParameters->useCkLoop >= CKLOOP_CTRL_PME_KSPACE
5727  && CkNumPes() >= 2 * initdata.yBlocks * initdata.zBlocks ) {
5728  useCkLoop = 1;
5729  }
5730 #endif
5731 
5732  int numGrids = 1;
5733  for ( int g=0; g<numGrids; ++g ) {
5734  evir[0] = myKSpace->compute_energy(data+0*g,
5735  lattice, ewaldcof, &(evir[1]), useCkLoop);
5736  }
5737 
5738 #if USE_NODE_PAR_RECEIVE
5739  CmiMemoryWriteFence();
5740 #endif
5741 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
PmeKSpace * myKSpace
Definition: ComputePme.C:4764
double compute_energy(float q_arr[], const Lattice &lattice, double ewald, double virial[], int useCkLoop)
Definition: PmeKSpace.C:321
#define CKLOOP_CTRL_PME_KSPACE
Definition: SimParameters.h:98
double BigReal
Definition: common.h:123

◆ recv_trans()

void PmeXPencil::recv_trans ( const PmeTransMsg msg)

Definition at line 5647 of file ComputePme.C.

References PmeGrid::block1, PmePencil< CBase_PmeXPencil >::data, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeXPencil >::imsg, PmePencil< CBase_PmeXPencil >::initdata, PmeGrid::K1, PmeTransMsg::lattice, PmePencil< CBase_PmeXPencil >::lattice, PmeTransMsg::nx, ny, nz, PmeTransMsg::qgrid, PmeTransMsg::sequence, PmePencil< CBase_PmeXPencil >::sequence, and PmeTransMsg::sourceNode.

Referenced by node_process_trans().

5647  {
5648  if ( imsg == 0 ) {
5649  lattice = msg->lattice;
5650  sequence = msg->sequence;
5651  }
5652  int block1 = initdata.grid.block1;
5653  int K1 = initdata.grid.K1;
5654  int ib = msg->sourceNode;
5655  int nx = msg->nx;
5656  if ( msg->hasData ) {
5657  const float *md = msg->qgrid;
5658  for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
5659  float *d = data + i*ny*nz*2;
5660  for ( int j=0; j<ny; ++j, d += nz*2 ) {
5661  for ( int k=0; k<nz; ++k ) {
5662 #ifdef ZEROCHECK
5663  if ( (*md) == 0. ) CkPrintf("0 in YX at %d %d %d %d %d %d %d %d %d\n",
5664  ib, thisIndex.y, thisIndex.z, i, j, k, nx, ny, nz);
5665 #endif
5666  d[2*k] = *(md++);
5667  d[2*k+1] = *(md++);
5668  }
5669  }
5670  }
5671  } else {
5672  for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
5673  float *d = data + i*ny*nz*2;
5674  for ( int j=0; j<ny; ++j, d += nz*2 ) {
5675  for ( int k=0; k<nz; ++k ) {
5676  d[2*k] = 0;
5677  d[2*k+1] = 0;
5678  }
5679  }
5680  }
5681  }
5682 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
int K1
Definition: PmeBase.h:21
int block1
Definition: PmeBase.h:24
float * qgrid
Definition: ComputePme.C:163
int sourceNode
Definition: ComputePme.C:157
Lattice lattice
Definition: ComputePme.C:160

◆ send_subset_untrans()

void PmeXPencil::send_subset_untrans ( int  fromIdx,
int  toIdx 
)

Definition at line 5776 of file ComputePme.C.

References PmeGrid::block1, PmePencil< CBase_PmeXPencil >::data, PmeUntransMsg::destElem, PmePencilInitMsgData::grid, PmePencil< CBase_PmeXPencil >::initdata, PmeGrid::K1, PmePencil< CBase_PmeXPencil >::needs_reply, PmeUntransMsg::ny, ny, nz, PME_UNTRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PRIORITY_SIZE, PmeUntransMsg::qgrid, PmePencil< CBase_PmeXPencil >::send_order, PmePencil< CBase_PmeXPencil >::sequence, SET_PRIORITY, PmeUntransMsg::sourceNode, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::ym, and PmePencilInitMsgData::yPencil.

Referenced by PmeXPencilSendUntrans().

5776  {
5777  int xBlocks = initdata.xBlocks;
5778  int block1 = initdata.grid.block1;
5779  int K1 = initdata.grid.K1;
5780 
5781  for(int isend=fromIdx; isend<=toIdx; isend++) {
5782  int ib = send_order[isend];
5783  if ( ! needs_reply[ib] ) {
5784  PmeAckMsg *msg = new (PRIORITY_SIZE) PmeAckMsg;
5785  CmiEnableUrgentSend(1);
5787 #if USE_NODE_PAR_RECEIVE
5788  initdata.yPencil(ib,0,thisIndex.z).recvNodeAck(msg);
5789 #else
5790  initdata.yPencil(ib,0,thisIndex.z).recvAck(msg);
5791 #endif
5792  CmiEnableUrgentSend(0);
5793  continue;
5794  }
5795  int nx = block1;
5796  if ( (ib+1)*block1 > K1 ) nx = K1 - ib*block1;
5797  PmeUntransMsg *msg = new (nx*ny*nz*2,PRIORITY_SIZE) PmeUntransMsg;
5798  msg->sourceNode = thisIndex.y;
5799  msg->ny = ny;
5800  float *md = msg->qgrid;
5801  for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
5802  float *d = data + i*ny*nz*2;
5803  for ( int j=0; j<ny; ++j, d += nz*2 ) {
5804  for ( int k=0; k<nz; ++k ) {
5805  *(md++) = d[2*k];
5806  *(md++) = d[2*k+1];
5807  }
5808  }
5809  }
5811  CmiEnableUrgentSend(1);
5812 #if USE_NODE_PAR_RECEIVE
5813  msg->destElem=CkArrayIndex3D(ib,0, thisIndex.z);
5814 #if Y_PERSIST
5815  CmiUsePersistentHandle(&untrans_handle[isend], 1);
5816 #endif
5817  initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYUntrans(msg);
5818 #if Y_PERSIST
5819  CmiUsePersistentHandle(NULL, 0);
5820 #endif
5821 #else
5822 #if Y_PERSIST
5823  // CmiUsePersistentHandle(&untrans_handle[isend], 1);
5824 #endif
5825  initdata.yPencil(ib,0,thisIndex.z).recvUntrans(msg);
5826 #if Y_PERSIST
5827  // CmiUsePersistentHandle(NULL, 0);
5828 #endif
5829 #endif
5830  CmiEnableUrgentSend(0);
5831  }
5832 }
float * qgrid
Definition: ComputePme.C:180
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
int K1
Definition: PmeBase.h:21
int block1
Definition: PmeBase.h:24
CProxy_PmeYPencil yPencil
Definition: ComputePme.C:241
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:244
#define PRIORITY_SIZE
Definition: Priorities.h:13
#define PME_UNTRANS_PRIORITY
Definition: Priorities.h:33
CProxy_PmePencilMap ym
Definition: ComputePme.C:246
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
CkArrayIndex3D destElem
Definition: ComputePme.C:181

◆ send_untrans()

void PmeXPencil::send_untrans ( )

Definition at line 5834 of file ComputePme.C.

References PmeGrid::block1, CKLOOP_CTRL_PME_SENDUNTRANS, PmePencil< CBase_PmeXPencil >::data, PmeUntransMsg::destElem, PmeEvirMsg::evir, PmePencil< CBase_PmeXPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeXPencil >::initdata, PmeGrid::K1, PmePencil< CBase_PmeXPencil >::needs_reply, PmeUntransMsg::ny, ny, nz, Node::Object(), PME_UNGRID_PRIORITY, PME_UNTRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmePencilInitMsgData::pmeProxy, PmeXPencilSendUntrans(), PRIORITY_SIZE, PmeUntransMsg::qgrid, recipEvirPe, PmePencil< CBase_PmeXPencil >::send_order, PmePencil< CBase_PmeXPencil >::sequence, SET_PRIORITY, Node::simParameters, PmeUntransMsg::sourceNode, SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::yBlocks, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_trans().

5834  {
5835 
5836  { // send energy and virial
5837  int numGrids = 1;
5838  PmeEvirMsg *newmsg = new (numGrids, PRIORITY_SIZE) PmeEvirMsg;
5839  newmsg->evir[0] = evir;
5841  CmiEnableUrgentSend(1);
5842  initdata.pmeProxy[recipEvirPe].recvRecipEvir(newmsg);
5843  CmiEnableUrgentSend(0);
5844  }
5845 
5846 #if USE_PERSISTENT
5847  if (untrans_handle == NULL) setup_persistent();
5848 #endif
5849 #if CMK_SMP && USE_CKLOOP
5850  int useCkLoop = Node::Object()->simParameters->useCkLoop;
5851  if(useCkLoop>=CKLOOP_CTRL_PME_SENDUNTRANS
5852  && CkNumPes() >= 2 * initdata.yBlocks * initdata.zBlocks) {
5853  int xBlocks = initdata.xBlocks;
5854 
5855 #if USE_NODE_PAR_RECEIVE
5856  //CkLoop_Parallelize(PmeXPencilSendUntrans, 1, (void *)this, CkMyNodeSize(), 0, xBlocks-1, 1); //has to sync
5857  CkLoop_Parallelize(PmeXPencilSendUntrans, 1, (void *)this, xBlocks, 0, xBlocks-1, 1); //has to sync
5858 #else
5859  //CkLoop_Parallelize(PmeXPencilSendUntrans, 1, (void *)this, CkMyNodeSize(), 0, xBlocks-1, 0); //not sync
5860  CkLoop_Parallelize(PmeXPencilSendUntrans, 1, (void *)this, xBlocks, 0, xBlocks-1, 0); //not sync
5861 #endif
5862  return;
5863  }
5864 #endif
5865  int xBlocks = initdata.xBlocks;
5866  int block1 = initdata.grid.block1;
5867  int K1 = initdata.grid.K1;
5868  for ( int isend=0; isend<xBlocks; ++isend ) {
5869  int ib = send_order[isend];
5870  if ( ! needs_reply[ib] ) {
5871  PmeAckMsg *msg = new (PRIORITY_SIZE) PmeAckMsg;
5872  CmiEnableUrgentSend(1);
5874 #if USE_NODE_PAR_RECEIVE
5875  initdata.yPencil(ib,0,thisIndex.z).recvNodeAck(msg);
5876 #else
5877  initdata.yPencil(ib,0,thisIndex.z).recvAck(msg);
5878 #endif
5879  CmiEnableUrgentSend(0);
5880  continue;
5881  }
5882  int nx = block1;
5883  if ( (ib+1)*block1 > K1 ) nx = K1 - ib*block1;
5884  PmeUntransMsg *msg = new (nx*ny*nz*2,PRIORITY_SIZE) PmeUntransMsg;
5885  msg->sourceNode = thisIndex.y;
5886  msg->ny = ny;
5887  float *md = msg->qgrid;
5888  for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
5889  float *d = data + i*ny*nz*2;
5890  for ( int j=0; j<ny; ++j, d += nz*2 ) {
5891  for ( int k=0; k<nz; ++k ) {
5892  *(md++) = d[2*k];
5893  *(md++) = d[2*k+1];
5894  }
5895  }
5896  }
5898 
5899  CmiEnableUrgentSend(1);
5900 #if USE_NODE_PAR_RECEIVE
5901  msg->destElem=CkArrayIndex3D(ib,0, thisIndex.z);
5902 #if Y_PERSIST
5903  CmiUsePersistentHandle(&untrans_handle[isend], 1);
5904 #endif
5905  initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYUntrans(msg);
5906 #if Y_PERSIST
5907  CmiUsePersistentHandle(NULL, 0);
5908 #endif
5909 #else
5910 #if Y_PERSIST
5911  CmiUsePersistentHandle(&untrans_handle[isend], 1);
5912 #endif
5913  initdata.yPencil(ib,0,thisIndex.z).recvUntrans(msg);
5914 #if Y_PERSIST
5915  CmiUsePersistentHandle(NULL, 0);
5916 #endif
5917 #endif
5918  CmiEnableUrgentSend(0);
5919  }
5920 }
static Node * Object()
Definition: Node.h:86
float * qgrid
Definition: ComputePme.C:180
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
#define PME_UNGRID_PRIORITY
Definition: Priorities.h:74
CProxy_ComputePmeMgr pmeProxy
Definition: ComputePme.C:243
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
int block1
Definition: PmeBase.h:24
CProxy_PmeYPencil yPencil
Definition: ComputePme.C:241
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:244
PmeReduction * evir
Definition: ComputePme.C:193
#define CKLOOP_CTRL_PME_SENDUNTRANS
#define PRIORITY_SIZE
Definition: Priorities.h:13
static void PmeXPencilSendUntrans(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:5771
int recipEvirPe
Definition: ComputePme.C:4762
#define PME_UNTRANS_PRIORITY
Definition: Priorities.h:33
CProxy_PmePencilMap ym
Definition: ComputePme.C:246
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
CkArrayIndex3D destElem
Definition: ComputePme.C:181

Member Data Documentation

◆ backward_plan

fftw_plan PmeXPencil::backward_plan

Definition at line 4758 of file ComputePme.C.

Referenced by backward_fft(), and fft_init().

◆ forward_plan

fftw_plan PmeXPencil::forward_plan

Definition at line 4758 of file ComputePme.C.

Referenced by fft_init(), and forward_fft().

◆ myKSpace

PmeKSpace* PmeXPencil::myKSpace

Definition at line 4764 of file ComputePme.C.

Referenced by fft_init(), pme_kspace(), and PmeXPencil().

◆ ny

int PmeXPencil::ny

◆ nz

int PmeXPencil::nz

◆ recipEvirPe

int PmeXPencil::recipEvirPe

Definition at line 4762 of file ComputePme.C.

Referenced by evir_init(), PmeXPencil(), and send_untrans().


The documentation for this class was generated from the following file: