NAMD
Public Member Functions | List of all members
PmeYPencil Class Reference
Inheritance diagram for PmeYPencil:
PmePencil< CBase_PmeYPencil >

Public Member Functions

PmeYPencil_SDAG_CODE PmeYPencil ()
 
 PmeYPencil (CkMigrateMessage *)
 
void fft_init ()
 
void recv_trans (const PmeTransMsg *)
 
void forward_fft ()
 
void forward_subset_fft (int fromIdx, int toIdx)
 
void send_trans ()
 
void send_subset_trans (int fromIdx, int toIdx)
 
void recv_untrans (const PmeUntransMsg *)
 
void node_process_trans (PmeTransMsg *)
 
void recvNodeAck (PmeAckMsg *)
 
void node_process_untrans (PmeUntransMsg *)
 
void backward_fft ()
 
void backward_subset_fft (int fromIdx, int toIdx)
 
void send_untrans ()
 
void send_subset_untrans (int fromIdx, int toIdx)
 
- Public Member Functions inherited from PmePencil< CBase_PmeYPencil >
 PmePencil ()
 
 ~PmePencil ()
 
void base_init (PmePencilInitMsg *msg)
 
void order_init (int nBlocks)
 

Additional Inherited Members

- Public Types inherited from PmePencil< CBase_PmeYPencil >
typedef int AtomicInt
 
- Public Attributes inherited from PmePencil< CBase_PmeYPencil >
PmePencilInitMsgData initdata
 
Lattice lattice
 
PmeReduction evir
 
int sequence
 
AtomicInt imsg
 
AtomicInt imsgb
 
int hasData
 
int offload
 
float * data
 
float * work
 
int * send_order
 
int * needs_reply
 

Detailed Description

Definition at line 4666 of file ComputePme.C.

Constructor & Destructor Documentation

◆ PmeYPencil() [1/2]

PmeYPencil_SDAG_CODE PmeYPencil::PmeYPencil ( )
inline

Definition at line 4669 of file ComputePme.C.

References PmePencil< CBase_PmeYPencil >::imsg, and PmePencil< CBase_PmeYPencil >::imsgb.

4669 { __sdag_init(); setMigratable(false); imsg=imsgb=0;}

◆ PmeYPencil() [2/2]

PmeYPencil::PmeYPencil ( CkMigrateMessage *  )
inline

Definition at line 4670 of file ComputePme.C.

4670 { __sdag_init(); }

Member Function Documentation

◆ backward_fft()

void PmeYPencil::backward_fft ( )

Definition at line 5963 of file ComputePme.C.

References CKLOOP_CTRL_PME_BACKWARDFFT, PmePencil< CBase_PmeYPencil >::data, PmePencilInitMsgData::grid, PmePencil< CBase_PmeYPencil >::initdata, PmeGrid::K2, Node::Object(), PmeYPencilBackwardFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeYPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_untrans().

5963  {
5964 #ifdef NAMD_FFTW
5965 #ifdef MANUAL_DEBUG_FFTW3
5966  dumpMatrixFloat3("bw_y_b", data, nx, initdata.grid.K2, nz, thisIndex.x, thisIndex.y, thisIndex.z);
5967 #endif
5968 
5969 #ifdef NAMD_FFTW_3
5970 #if CMK_SMP && USE_CKLOOP
5971  int useCkLoop = Node::Object()->simParameters->useCkLoop;
5972  if(useCkLoop>=CKLOOP_CTRL_PME_BACKWARDFFT
5973  && CkNumPes() >= 2 * initdata.xBlocks * initdata.zBlocks) {
5974  CkLoop_Parallelize(PmeYPencilBackwardFFT, 1, (void *)this, CkMyNodeSize(), 0, nx-1); //sync
5975  return;
5976  }
5977 #endif
5978  //the above is a transformation of the following loop using CkLoop
5979  for ( int i=0; i<nx; ++i ) {
5980 #if CMK_BLUEGENEL
5981  CmiNetworkProgress();
5982 #endif
5983  fftwf_execute_dft(backward_plan,
5984  ((fftwf_complex *) data) + i * nz * initdata.grid.K2,
5985  ((fftwf_complex *) data) + i * nz * initdata.grid.K2);
5986  }
5987 #else
5988  for ( int i=0; i<nx; ++i ) {
5989 #if CMK_BLUEGENEL
5990  CmiNetworkProgress();
5991 #endif
5992  fftw(backward_plan, nz,
5993  ((fftw_complex *) data) + i * nz * initdata.grid.K2,
5994  nz, 1, (fftw_complex *) work, 1, 0);
5995  }
5996 #endif
5997 
5998 #ifdef MANUAL_DEBUG_FFTW3
5999  dumpMatrixFloat3("bw_y_a", data, nx, initdata.grid.K2, nz, thisIndex.x, thisIndex.y, thisIndex.z);
6000 #endif
6001 
6002 #endif
6003 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
static void PmeYPencilBackwardFFT(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:5946
#define CKLOOP_CTRL_PME_BACKWARDFFT
Definition: SimParameters.h:99

◆ backward_subset_fft()

void PmeYPencil::backward_subset_fft ( int  fromIdx,
int  toIdx 
)

Definition at line 5951 of file ComputePme.C.

References PmePencil< CBase_PmeYPencil >::data, PmePencilInitMsgData::grid, PmePencil< CBase_PmeYPencil >::initdata, and PmeGrid::K2.

Referenced by PmeYPencilBackwardFFT().

5951  {
5952 #ifdef NAMD_FFTW
5953 #ifdef NAMD_FFTW_3
5954  for(int i=fromIdx; i<=toIdx; i++){
5955  fftwf_execute_dft(backward_plan,
5956  ((fftwf_complex *) data) + i * nz * initdata.grid.K2,
5957  ((fftwf_complex *) data) + i * nz * initdata.grid.K2);
5958  }
5959 #endif
5960 #endif
5961 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
int K2
Definition: PmeBase.h:21

◆ fft_init()

void PmeYPencil::fft_init ( )

Definition at line 4896 of file ComputePme.C.

References PmeGrid::block1, PmeGrid::block3, PmePencil< CBase_PmeYPencil >::data, PmeGrid::dim2, PmeGrid::dim3, PmePencil< CBase_PmeYPencil >::evir, ComputePmeMgr::fftw_plan_lock, fftwf_malloc, PmePencilInitMsgData::grid, PmePencil< CBase_PmeYPencil >::initdata, PmeGrid::K1, PmeGrid::K2, NAMD_die(), PmePencil< CBase_PmeYPencil >::order_init(), PmePencilInitMsgData::pmeNodeProxy, Node::simParameters, simParams, PmePencil< CBase_PmeYPencil >::work, and PmePencilInitMsgData::yBlocks.

4896  {
4897  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
4898  Node *node = nd.ckLocalBranch();
4900 
4901 #if USE_NODE_PAR_RECEIVE
4902  ((NodePmeMgr *)CkLocalNodeBranch(initdata.pmeNodeProxy))->registerYPencil(thisIndex,this);
4903 #endif
4904 
4905  int K1 = initdata.grid.K1;
4906  int K2 = initdata.grid.K2;
4907  int dim2 = initdata.grid.dim2;
4908  int dim3 = initdata.grid.dim3;
4909  int block1 = initdata.grid.block1;
4910  int block3 = initdata.grid.block3;
4911 
4912  nx = block1;
4913  if ( (thisIndex.x + 1) * block1 > K1 ) nx = K1 - thisIndex.x * block1;
4914  nz = block3;
4915  if ( (thisIndex.z+1)*block3 > dim3/2 ) nz = dim3/2 - thisIndex.z*block3;
4916 
4917 #ifdef NAMD_FFTW
4919 
4920  data = (float *) fftwf_malloc( sizeof(float) * nx*dim2*nz*2);
4921  work = new float[2*K2];
4922 
4924 
4925 #ifdef NAMD_FFTW_3
4926  /* need array of sizes for the dimensions */
4927  /* ideally this should be implementable as a single multidimensional
4928  * plan, but that has proven tricky to implement, so we maintain the
4929  * loop of 1d plan executions. */
4930  int sizeLines=nz;
4931  int planLineSizes[1];
4932  planLineSizes[0]=K2;
4933  int fftwFlags = simParams->FFTWPatient ? FFTW_PATIENT : simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE ;
4934  forward_plan = fftwf_plan_many_dft(1, planLineSizes, sizeLines,
4935  (fftwf_complex *) data, NULL, sizeLines, 1,
4936  (fftwf_complex *) data, NULL, sizeLines, 1,
4937  FFTW_FORWARD,
4938  fftwFlags);
4939  backward_plan = fftwf_plan_many_dft(1, planLineSizes, sizeLines,
4940  (fftwf_complex *) data, NULL, sizeLines, 1,
4941  (fftwf_complex *) data, NULL, sizeLines, 1,
4942  FFTW_BACKWARD,
4943  fftwFlags);
4944  CkAssert(forward_plan != NULL);
4945  CkAssert(backward_plan != NULL);
4946 #else
4947  forward_plan = fftw_create_plan_specific(K2, FFTW_FORWARD,
4948  ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
4949  | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
4950  nz, (fftw_complex *) work, 1);
4951  backward_plan = fftw_create_plan_specific(K2, FFTW_BACKWARD,
4952  ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
4953  | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
4954  nz, (fftw_complex *) work, 1);
4955 #endif
4956  CmiUnlock(ComputePmeMgr::fftw_plan_lock);
4957 #else
4958  NAMD_die("Sorry, FFTW must be compiled in to use PME.");
4959 #endif
4960 
4961 #if USE_NODE_PAR_RECEIVE
4962  evir = 0;
4963  CmiMemoryWriteFence();
4964 #endif
4965 }
int dim2
Definition: PmeBase.h:22
static CmiNodeLock fftw_plan_lock
Definition: ComputePme.C:440
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
int dim3
Definition: PmeBase.h:22
Definition: Node.h:78
void order_init(int nBlocks)
Definition: ComputePme.C:4554
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
int block1
Definition: PmeBase.h:24
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:244
int block3
Definition: PmeBase.h:24
void NAMD_die(const char *err_msg)
Definition: common.C:147
#define simParams
Definition: Output.C:129
#define fftwf_malloc
Definition: ComputePme.C:13

◆ forward_fft()

void PmeYPencil::forward_fft ( )

Definition at line 5453 of file ComputePme.C.

References CKLOOP_CTRL_PME_FORWARDFFT, PmePencil< CBase_PmeYPencil >::data, PmeGrid::dim2, PmePencil< CBase_PmeYPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeYPencil >::initdata, PmeGrid::K2, Node::Object(), PmeYPencilForwardFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeYPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_trans().

5453  {
5454  evir = 0.;
5455 #ifdef NAMD_FFTW
5456 #ifdef MANUAL_DEBUG_FFTW3
5457  dumpMatrixFloat3("fw_y_b", data, nx, initdata.grid.K2, nz, thisIndex.x, thisIndex.y, thisIndex.z);
5458 #endif
5459 
5460 #ifdef NAMD_FFTW_3
5461 #if CMK_SMP && USE_CKLOOP
5462  int useCkLoop = Node::Object()->simParameters->useCkLoop;
5463  if(useCkLoop>=CKLOOP_CTRL_PME_FORWARDFFT
5464  && CkNumPes() >= 2 * initdata.xBlocks * initdata.zBlocks) {
5465  CkLoop_Parallelize(PmeYPencilForwardFFT, 1, (void *)this, CkMyNodeSize(), 0, nx-1); //sync
5466  return;
5467  }
5468 #endif
5469  //the above is a transformation of the following loop using CkLoop
5470  for ( int i=0; i<nx; ++i ) {
5471  fftwf_execute_dft(forward_plan, ((fftwf_complex *) data) + i
5472  * nz * initdata.grid.K2,
5473  ((fftwf_complex *) data) + i * nz * initdata.grid.K2);
5474  }
5475 #else
5476  for ( int i=0; i<nx; ++i ) {
5477  fftw(forward_plan, nz,
5478  ((fftw_complex *) data) + i * nz * initdata.grid.K2,
5479  nz, 1, (fftw_complex *) work, 1, 0);
5480  }
5481 #endif
5482 #ifdef MANUAL_DEBUG_FFTW3
5483  dumpMatrixFloat3("fw_y_a", data, nx, initdata.grid.dim2, nz, thisIndex.x, thisIndex.y, thisIndex.z);
5484 #endif
5485 
5486 #endif
5487 }
static Node * Object()
Definition: Node.h:86
int dim2
Definition: PmeBase.h:22
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
#define CKLOOP_CTRL_PME_FORWARDFFT
Definition: SimParameters.h:96
static void PmeYPencilForwardFFT(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:5437

◆ forward_subset_fft()

void PmeYPencil::forward_subset_fft ( int  fromIdx,
int  toIdx 
)

Definition at line 5441 of file ComputePme.C.

References PmePencil< CBase_PmeYPencil >::data, PmePencilInitMsgData::grid, PmePencil< CBase_PmeYPencil >::initdata, and PmeGrid::K2.

Referenced by PmeYPencilForwardFFT().

5441  {
5442 #ifdef NAMD_FFTW
5443 #ifdef NAMD_FFTW_3
5444  for(int i=fromIdx; i<=toIdx; i++){
5445  fftwf_execute_dft(forward_plan, ((fftwf_complex *) data) + i
5446  * nz * initdata.grid.K2,
5447  ((fftwf_complex *) data) + i * nz * initdata.grid.K2);
5448  }
5449 #endif
5450 #endif
5451 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
int K2
Definition: PmeBase.h:21

◆ node_process_trans()

void PmeYPencil::node_process_trans ( PmeTransMsg msg)

Definition at line 4967 of file ComputePme.C.

References forward_fft(), PmeTransMsg::hasData, PmePencil< CBase_PmeYPencil >::hasData, PmePencil< CBase_PmeYPencil >::imsg, PmePencil< CBase_PmeYPencil >::initdata, PmePencil< CBase_PmeYPencil >::needs_reply, recv_trans(), send_trans(), PmeTransMsg::sourceNode, and PmePencilInitMsgData::yBlocks.

Referenced by NodePmeMgr::recvYTrans().

4968 {
4969  if ( msg->hasData ) hasData = 1;
4970  needs_reply[msg->sourceNode] = msg->hasData;
4971  recv_trans(msg);
4972  int limsg;
4973  CmiMemoryAtomicFetchAndInc(imsg,limsg);
4974  if(limsg+1 == initdata.yBlocks)
4975  {
4976  if ( hasData ) {
4977  forward_fft();
4978  }
4979  send_trans();
4980  imsg=0;
4981  CmiMemoryWriteFence();
4982  }
4983 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
void recv_trans(const PmeTransMsg *)
Definition: ComputePme.C:5400
void send_trans()
Definition: ComputePme.C:5551
int sourceNode
Definition: ComputePme.C:157
void forward_fft()
Definition: ComputePme.C:5453

◆ node_process_untrans()

void PmeYPencil::node_process_untrans ( PmeUntransMsg msg)

Definition at line 4990 of file ComputePme.C.

References backward_fft(), PmePencil< CBase_PmeYPencil >::hasData, PmePencil< CBase_PmeYPencil >::imsgb, PmePencil< CBase_PmeYPencil >::initdata, NAMD_bug(), recv_untrans(), send_untrans(), and PmePencilInitMsgData::yBlocks.

Referenced by recvNodeAck(), and NodePmeMgr::recvYUntrans().

4991 {
4992  if ( msg ) {
4993  if ( ! hasData ) NAMD_bug("PmeYPencil::node_process_untrans non-null msg but not hasData");
4994  recv_untrans(msg);
4995  } else if ( hasData ) NAMD_bug("PmeYPencil::node_process_untrans hasData but null msg");
4996  int limsg;
4997  CmiMemoryAtomicFetchAndInc(imsgb,limsg);
4998  if(limsg+1 == initdata.yBlocks)
4999  {
5000  if ( hasData ) {
5001  backward_fft();
5002  }
5003  hasData=0;
5004  imsgb=0;
5005  CmiMemoryWriteFence();
5006  send_untrans();
5007  }
5008 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
void NAMD_bug(const char *err_msg)
Definition: common.C:195
void recv_untrans(const PmeUntransMsg *)
Definition: ComputePme.C:5922
void backward_fft()
Definition: ComputePme.C:5963
void send_untrans()
Definition: ComputePme.C:6069

◆ recv_trans()

void PmeYPencil::recv_trans ( const PmeTransMsg msg)

Definition at line 5400 of file ComputePme.C.

References PmeGrid::block2, PmePencil< CBase_PmeYPencil >::data, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeYPencil >::imsg, PmePencil< CBase_PmeYPencil >::initdata, PmeGrid::K2, PmeTransMsg::lattice, PmePencil< CBase_PmeYPencil >::lattice, PmeTransMsg::nx, PmeTransMsg::qgrid, PmeTransMsg::sequence, PmePencil< CBase_PmeYPencil >::sequence, and PmeTransMsg::sourceNode.

Referenced by node_process_trans().

5400  {
5401  if ( imsg == 0 ) {
5402  lattice = msg->lattice;
5403  sequence = msg->sequence;
5404  }
5405  int block2 = initdata.grid.block2;
5406  int K2 = initdata.grid.K2;
5407  int jb = msg->sourceNode;
5408  int ny = msg->nx;
5409  if ( msg->hasData ) {
5410  const float *md = msg->qgrid;
5411  float *d = data;
5412  for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
5413  for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
5414  for ( int k=0; k<nz; ++k ) {
5415 #ifdef ZEROCHECK
5416  if ( (*md) == 0. ) CkPrintf("0 in ZY at %d %d %d %d %d %d %d %d %d\n",
5417  thisIndex.x, jb, thisIndex.z, i, j, k, nx, ny, nz);
5418 #endif
5419  d[2*(j*nz+k)] = *(md++);
5420  d[2*(j*nz+k)+1] = *(md++);
5421  }
5422  }
5423  }
5424  } else {
5425  float *d = data;
5426  for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
5427  for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
5428  for ( int k=0; k<nz; ++k ) {
5429  d[2*(j*nz+k)] = 0;
5430  d[2*(j*nz+k)+1] = 0;
5431  }
5432  }
5433  }
5434  }
5435 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
int K2
Definition: PmeBase.h:21
float * qgrid
Definition: ComputePme.C:163
int block2
Definition: PmeBase.h:24
int sourceNode
Definition: ComputePme.C:157
Lattice lattice
Definition: ComputePme.C:160

◆ recv_untrans()

void PmeYPencil::recv_untrans ( const PmeUntransMsg msg)

Definition at line 5922 of file ComputePme.C.

References PmeGrid::block2, PmePencil< CBase_PmeYPencil >::data, PmePencilInitMsgData::grid, PmePencil< CBase_PmeYPencil >::initdata, PmeGrid::K2, PmeUntransMsg::ny, PmeUntransMsg::qgrid, and PmeUntransMsg::sourceNode.

Referenced by node_process_untrans().

5922  {
5923  int block2 = initdata.grid.block2;
5924  int K2 = initdata.grid.K2;
5925  int jb = msg->sourceNode;
5926  int ny = msg->ny;
5927  const float *md = msg->qgrid;
5928  float *d = data;
5929  for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
5930 #if CMK_BLUEGENEL
5931  CmiNetworkProgress();
5932 #endif
5933  for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
5934  for ( int k=0; k<nz; ++k ) {
5935 #ifdef ZEROCHECK
5936  if ( (*md) == 0. ) CkPrintf("0 in XY at %d %d %d %d %d %d %d %d %d\n",
5937  thisIndex.x, jb, thisIndex.z, i, j, k, nx, ny, nz);
5938 #endif
5939  d[2*(j*nz+k)] = *(md++);
5940  d[2*(j*nz+k)+1] = *(md++);
5941  }
5942  }
5943  }
5944 }
float * qgrid
Definition: ComputePme.C:180
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
int K2
Definition: PmeBase.h:21
int block2
Definition: PmeBase.h:24

◆ recvNodeAck()

void PmeYPencil::recvNodeAck ( PmeAckMsg msg)

Definition at line 4985 of file ComputePme.C.

References node_process_untrans().

4985  {
4986  delete msg;
4988 }
void node_process_untrans(PmeUntransMsg *)
Definition: ComputePme.C:4990

◆ send_subset_trans()

void PmeYPencil::send_subset_trans ( int  fromIdx,
int  toIdx 
)

Definition at line 5494 of file ComputePme.C.

References PmeGrid::block2, PmePencil< CBase_PmeYPencil >::data, PmeTransMsg::destElem, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeYPencil >::hasData, PmePencil< CBase_PmeYPencil >::initdata, PmeGrid::K2, PmeTransMsg::lattice, PmePencil< CBase_PmeYPencil >::lattice, PmeTransMsg::nx, PME_TRANS2_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeYPencil >::send_order, PmeTransMsg::sequence, PmePencil< CBase_PmeYPencil >::sequence, SET_PRIORITY, PmeTransMsg::sourceNode, PmePencilInitMsgData::xm, PmePencilInitMsgData::xPencil, and PmePencilInitMsgData::yBlocks.

Referenced by PmeYPencilSendTrans().

5494  {
5495  int yBlocks = initdata.yBlocks;
5496  int block2 = initdata.grid.block2;
5497  int K2 = initdata.grid.K2;
5498  for ( int isend=fromIdx; isend<=toIdx; ++isend ) {
5499  int jb = send_order[isend];
5500  int ny = block2;
5501  if ( (jb+1)*block2 > K2 ) ny = K2 - jb*block2;
5502  int hd = ( hasData ? 1 : 0 );
5503  PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
5504  msg->lattice = lattice;
5505  msg->sourceNode = thisIndex.x;
5506  msg->hasData = hasData;
5507  msg->nx = nx;
5508  if ( hasData ) {
5509  float *md = msg->qgrid;
5510  const float *d = data;
5511  for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
5512  for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
5513  for ( int k=0; k<nz; ++k ) {
5514  *(md++) = d[2*(j*nz+k)];
5515  *(md++) = d[2*(j*nz+k)+1];
5516  #ifdef ZEROCHECK
5517  if ( *(md-2) == 0. ) CkPrintf("send 0 in YX at %d %d %d %d %d %d %d %d %d\n",
5518  thisIndex.x, jb, thisIndex.z, i, j, k, nx, ny, nz);
5519  #endif
5520  }
5521  }
5522  }
5523  if ( md != msg->qgrid + nx*ny*nz*2 ) CkPrintf("error in YX at %d %d %d\n",
5524  thisIndex.x, jb, thisIndex.z);
5525  }
5526  msg->sequence = sequence;
5528  CmiEnableUrgentSend(1);
5529 #if USE_NODE_PAR_RECEIVE
5530  msg->destElem=CkArrayIndex3D(0,jb,thisIndex.z);
5531 #if X_PERSIST
5532  CmiUsePersistentHandle(&trans_handle[isend], 1);
5533 #endif
5534  initdata.pmeNodeProxy[CmiNodeOf(initdata.xm.ckLocalBranch()->procNum(0,msg->destElem))].recvXTrans(msg);
5535 #if X_PERSIST
5536  CmiUsePersistentHandle(NULL, 0);
5537 #endif
5538 #else
5539 #if X_PERSIST
5540  CmiUsePersistentHandle(&trans_handle[isend], 1);
5541 #endif
5542  initdata.xPencil(0,jb,thisIndex.z).recvTrans(msg);
5543 #if X_PERSIST
5544  CmiUsePersistentHandle(NULL, 0);
5545 #endif
5546 #endif
5547  CmiEnableUrgentSend(0);
5548  }
5549 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
#define PME_TRANS2_PRIORITY
Definition: Priorities.h:32
int K2
Definition: PmeBase.h:21
CProxy_PmePencilMap xm
Definition: ComputePme.C:245
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:244
float * qgrid
Definition: ComputePme.C:163
int block2
Definition: PmeBase.h:24
#define PRIORITY_SIZE
Definition: Priorities.h:13
int sourceNode
Definition: ComputePme.C:157
CkArrayIndex3D destElem
Definition: ComputePme.C:164
Lattice lattice
Definition: ComputePme.C:160
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
CProxy_PmeXPencil xPencil
Definition: ComputePme.C:240

◆ send_subset_untrans()

void PmeYPencil::send_subset_untrans ( int  fromIdx,
int  toIdx 
)

Definition at line 6010 of file ComputePme.C.

References PmeGrid::block2, PmePencil< CBase_PmeYPencil >::data, PmeUntransMsg::destElem, PmePencilInitMsgData::grid, PmePencil< CBase_PmeYPencil >::initdata, PmeGrid::K2, PmePencil< CBase_PmeYPencil >::needs_reply, PmeUntransMsg::ny, PME_UNTRANS2_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PRIORITY_SIZE, PmeUntransMsg::qgrid, PmePencil< CBase_PmeYPencil >::send_order, PmePencil< CBase_PmeYPencil >::sequence, SET_PRIORITY, PmeUntransMsg::sourceNode, PmePencilInitMsgData::yBlocks, PmePencilInitMsgData::zm, and PmePencilInitMsgData::zPencil.

Referenced by PmeYPencilSendUntrans().

6010  {
6011  int yBlocks = initdata.yBlocks;
6012  int block2 = initdata.grid.block2;
6013  int K2 = initdata.grid.K2;
6014 
6015  for(int isend=fromIdx; isend<=toIdx; isend++) {
6016  int jb = send_order[isend];
6017  if ( ! needs_reply[jb] ) {
6018  PmeAckMsg *msg = new (PRIORITY_SIZE) PmeAckMsg;
6019  CmiEnableUrgentSend(1);
6021 #if USE_NODE_PAR_RECEIVE
6022  initdata.zPencil(thisIndex.x,jb,0).recvNodeAck(msg);
6023 #else
6024  initdata.zPencil(thisIndex.x,jb,0).recvAck(msg);
6025 #endif
6026  CmiEnableUrgentSend(0);
6027  continue;
6028  }
6029  int ny = block2;
6030  if ( (jb+1)*block2 > K2 ) ny = K2 - jb*block2;
6031  PmeUntransMsg *msg = new (nx*ny*nz*2,PRIORITY_SIZE) PmeUntransMsg;
6032  msg->sourceNode = thisIndex.z;
6033  msg->ny = nz;
6034  float *md = msg->qgrid;
6035  const float *d = data;
6036  for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
6037  for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
6038  for ( int k=0; k<nz; ++k ) {
6039  *(md++) = d[2*(j*nz+k)];
6040  *(md++) = d[2*(j*nz+k)+1];
6041  }
6042  }
6043  }
6045  CmiEnableUrgentSend(1);
6046 #if USE_NODE_PAR_RECEIVE
6047  msg->destElem=CkArrayIndex3D( thisIndex.x, jb, 0);
6048  // CkPrintf("[%d] sending to %d %d %d recvZUntrans on node %d\n", CkMyPe(), thisIndex.x, jb, 0, CmiNodeOf(initdata.zm.ckLocalBranch()->procNum(0,msg->destElem)));
6049 #if Z_PERSIST
6050  CmiUsePersistentHandle(&untrans_handle[isend], 1);
6051 #endif
6052  initdata.pmeNodeProxy[CmiNodeOf(initdata.zm.ckLocalBranch()->procNum(0,msg->destElem))].recvZUntrans(msg);
6053 #if Z_PERSIST
6054  CmiUsePersistentHandle(NULL, 0);
6055 #endif
6056 #else
6057 #if Z_PERSIST
6058  CmiUsePersistentHandle(&untrans_handle[isend], 1);
6059 #endif
6060  initdata.zPencil(thisIndex.x,jb,0).recvUntrans(msg);
6061 #if Z_PERSIST
6062  CmiUsePersistentHandle(NULL, 0);
6063 #endif
6064 #endif
6065  CmiEnableUrgentSend(0);
6066  }
6067 }
float * qgrid
Definition: ComputePme.C:180
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
int K2
Definition: PmeBase.h:21
CProxy_PmeZPencil zPencil
Definition: ComputePme.C:242
CProxy_PmePencilMap zm
Definition: ComputePme.C:247
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:244
int block2
Definition: PmeBase.h:24
#define PRIORITY_SIZE
Definition: Priorities.h:13
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
CkArrayIndex3D destElem
Definition: ComputePme.C:181
#define PME_UNTRANS2_PRIORITY
Definition: Priorities.h:34

◆ send_trans()

void PmeYPencil::send_trans ( )

Definition at line 5551 of file ComputePme.C.

References PmeGrid::block2, CKLOOP_CTRL_PME_SENDTRANS, PmePencil< CBase_PmeYPencil >::data, PmeTransMsg::destElem, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeYPencil >::hasData, PmePencil< CBase_PmeYPencil >::initdata, PmeGrid::K2, PmeTransMsg::lattice, PmePencil< CBase_PmeYPencil >::lattice, PmeTransMsg::nx, Node::Object(), PME_TRANS2_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmeYPencilSendTrans(), PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeYPencil >::send_order, PmeTransMsg::sequence, PmePencil< CBase_PmeYPencil >::sequence, SET_PRIORITY, Node::simParameters, PmeTransMsg::sourceNode, SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::xm, PmePencilInitMsgData::xPencil, PmePencilInitMsgData::yBlocks, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_trans().

5551  {
5552 #if USE_PERSISTENT
5553  if (trans_handle == NULL) setup_persistent();
5554 #endif
5555 #if CMK_SMP && USE_CKLOOP
5556  int useCkLoop = Node::Object()->simParameters->useCkLoop;
5557  if(useCkLoop>=CKLOOP_CTRL_PME_SENDTRANS
5558  && CkNumPes() >= 2 * initdata.xBlocks * initdata.zBlocks) {
5565  //send_subset_trans(0, initdata.yBlocks-1);
5566  CkLoop_Parallelize(PmeYPencilSendTrans, 1, (void *)this, CkMyNodeSize(), 0, initdata.yBlocks-1, 1); //not sync
5567  return;
5568  }
5569 #endif
5570  int yBlocks = initdata.yBlocks;
5571  int block2 = initdata.grid.block2;
5572  int K2 = initdata.grid.K2;
5573  for ( int isend=0; isend<yBlocks; ++isend ) {
5574  int jb = send_order[isend];
5575  int ny = block2;
5576  if ( (jb+1)*block2 > K2 ) ny = K2 - jb*block2;
5577  int hd = ( hasData ? 1 : 0 );
5578  PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
5579  msg->lattice = lattice;
5580  msg->sourceNode = thisIndex.x;
5581  msg->hasData = hasData;
5582  msg->nx = nx;
5583  if ( hasData ) {
5584  float *md = msg->qgrid;
5585  const float *d = data;
5586  for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
5587  for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
5588  for ( int k=0; k<nz; ++k ) {
5589  *(md++) = d[2*(j*nz+k)];
5590  *(md++) = d[2*(j*nz+k)+1];
5591 #ifdef ZEROCHECK
5592  if ( *(md-2) == 0. ) CkPrintf("send 0 in YX at %d %d %d %d %d %d %d %d %d\n",
5593  thisIndex.x, jb, thisIndex.z, i, j, k, nx, ny, nz);
5594 #endif
5595  }
5596  }
5597  }
5598  if ( md != msg->qgrid + nx*ny*nz*2 ) CkPrintf("error in YX at %d %d %d\n",
5599  thisIndex.x, jb, thisIndex.z);
5600  }
5601  msg->sequence = sequence;
5603  CmiEnableUrgentSend(1);
5604 #if USE_NODE_PAR_RECEIVE
5605  msg->destElem=CkArrayIndex3D(0,jb,thisIndex.z);
5606 #if X_PERSIST
5607  CmiUsePersistentHandle(&trans_handle[isend], 1);
5608 #endif
5609  initdata.pmeNodeProxy[CmiNodeOf(initdata.xm.ckLocalBranch()->procNum(0,msg->destElem))].recvXTrans(msg);
5610 #if X_PERSIST
5611  CmiUsePersistentHandle(NULL, 0);
5612 #endif
5613 #else
5614 #if X_PERSIST
5615  CmiUsePersistentHandle(&trans_handle[isend], 1);
5616 #endif
5617  initdata.xPencil(0,jb,thisIndex.z).recvTrans(msg);
5618 #if X_PERSIST
5619  CmiUsePersistentHandle(NULL, 0);
5620 #endif
5621 
5622 #endif
5623  CmiEnableUrgentSend(0);
5624  }
5625 }
static Node * Object()
Definition: Node.h:86
static void PmeYPencilSendTrans(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:5489
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
#define PME_TRANS2_PRIORITY
Definition: Priorities.h:32
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
CProxy_PmePencilMap xm
Definition: ComputePme.C:245
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:244
float * qgrid
Definition: ComputePme.C:163
int block2
Definition: PmeBase.h:24
#define PRIORITY_SIZE
Definition: Priorities.h:13
int sourceNode
Definition: ComputePme.C:157
#define CKLOOP_CTRL_PME_SENDTRANS
Definition: SimParameters.h:97
CkArrayIndex3D destElem
Definition: ComputePme.C:164
Lattice lattice
Definition: ComputePme.C:160
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
CProxy_PmeXPencil xPencil
Definition: ComputePme.C:240

◆ send_untrans()

void PmeYPencil::send_untrans ( )

Definition at line 6069 of file ComputePme.C.

References PmeGrid::block2, CKLOOP_CTRL_PME_SENDUNTRANS, PmePencil< CBase_PmeYPencil >::data, PmeUntransMsg::destElem, PmePencil< CBase_PmeYPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeYPencil >::initdata, PmeGrid::K2, PmePencil< CBase_PmeYPencil >::needs_reply, PmeUntransMsg::ny, Node::Object(), PME_UNTRANS2_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmeYPencilSendUntrans(), PRIORITY_SIZE, PmeUntransMsg::qgrid, PmePencil< CBase_PmeYPencil >::send_order, PmePencil< CBase_PmeYPencil >::sequence, SET_PRIORITY, Node::simParameters, PmeUntransMsg::sourceNode, SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::yBlocks, PmePencilInitMsgData::zBlocks, PmePencilInitMsgData::zm, and PmePencilInitMsgData::zPencil.

Referenced by node_process_untrans().

6069  {
6070 #if USE_PERSISTENT
6071  if (untrans_handle == NULL) setup_persistent();
6072 #endif
6073 #if CMK_SMP && USE_CKLOOP
6074  int useCkLoop = Node::Object()->simParameters->useCkLoop;
6075  if(useCkLoop>=CKLOOP_CTRL_PME_SENDUNTRANS
6076  && CkNumPes() >= 2 * initdata.xBlocks * initdata.zBlocks) {
6077  int yBlocks = initdata.yBlocks;
6078 
6079 #if USE_NODE_PAR_RECEIVE
6080  //CkLoop_Parallelize(PmeYPencilSendUntrans, 1, (void *)this, CkMyNodeSize(), 0, yBlocks-1, 1); //sync
6081  CkLoop_Parallelize(PmeYPencilSendUntrans, 1, (void *)this, yBlocks, 0, yBlocks-1, 1);
6082 #else
6083  //CkLoop_Parallelize(PmeYPencilSendUntrans, 1, (void *)this, CkMyNodeSize(), 0, yBlocks-1, 0); //not sync
6084  CkLoop_Parallelize(PmeYPencilSendUntrans, 1, (void *)this, yBlocks, 0, yBlocks-1, 0); //not sync
6085 #endif
6086  return;
6087  }
6088 #endif
6089  int yBlocks = initdata.yBlocks;
6090  int block2 = initdata.grid.block2;
6091  int K2 = initdata.grid.K2;
6092  for ( int isend=0; isend<yBlocks; ++isend ) {
6093  int jb = send_order[isend];
6094  if ( ! needs_reply[jb] ) {
6095  PmeAckMsg *msg = new (PRIORITY_SIZE) PmeAckMsg;
6096  CmiEnableUrgentSend(1);
6098 #if USE_NODE_PAR_RECEIVE
6099  initdata.zPencil(thisIndex.x,jb,0).recvNodeAck(msg);
6100 #else
6101  initdata.zPencil(thisIndex.x,jb,0).recvAck(msg);
6102 #endif
6103  CmiEnableUrgentSend(0);
6104  continue;
6105  }
6106  int ny = block2;
6107  if ( (jb+1)*block2 > K2 ) ny = K2 - jb*block2;
6108  PmeUntransMsg *msg = new (nx*ny*nz*2,PRIORITY_SIZE) PmeUntransMsg;
6109  msg->sourceNode = thisIndex.z;
6110  msg->ny = nz;
6111  float *md = msg->qgrid;
6112  const float *d = data;
6113  for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
6114  for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
6115  for ( int k=0; k<nz; ++k ) {
6116  *(md++) = d[2*(j*nz+k)];
6117  *(md++) = d[2*(j*nz+k)+1];
6118  }
6119  }
6120  }
6122 
6123  CmiEnableUrgentSend(1);
6124 #if USE_NODE_PAR_RECEIVE
6125  msg->destElem=CkArrayIndex3D( thisIndex.x, jb, 0);
6126  // CkPrintf("[%d] sending to %d %d %d recvZUntrans on node %d\n", CkMyPe(), thisIndex.x, jb, 0, CmiNodeOf(initdata.zm.ckLocalBranch()->procNum(0,msg->destElem)));
6127 #if Z_PERSIST
6128  CmiUsePersistentHandle(&untrans_handle[isend], 1);
6129 #endif
6130  initdata.pmeNodeProxy[CmiNodeOf(initdata.zm.ckLocalBranch()->procNum(0,msg->destElem))].recvZUntrans(msg);
6131 #if Z_PERSIST
6132  CmiUsePersistentHandle(NULL, 0);
6133 #endif
6134 #else
6135 #if Z_PERSIST
6136  CmiUsePersistentHandle(&untrans_handle[isend], 1);
6137 #endif
6138  initdata.zPencil(thisIndex.x,jb,0).recvUntrans(msg);
6139 #if Z_PERSIST
6140  CmiUsePersistentHandle(NULL, 0);
6141 #endif
6142 #endif
6143  CmiEnableUrgentSend(0);
6144  }
6145 
6146 #if USE_NODE_PAR_RECEIVE
6147  evir = 0.;
6148  CmiMemoryWriteFence();
6149 #endif
6150 }
static Node * Object()
Definition: Node.h:86
float * qgrid
Definition: ComputePme.C:180
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
int K2
Definition: PmeBase.h:21
CProxy_PmeZPencil zPencil
Definition: ComputePme.C:242
SimParameters * simParameters
Definition: Node.h:181
CProxy_PmePencilMap zm
Definition: ComputePme.C:247
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:244
int block2
Definition: PmeBase.h:24
#define CKLOOP_CTRL_PME_SENDUNTRANS
#define PRIORITY_SIZE
Definition: Priorities.h:13
static void PmeYPencilSendUntrans(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:6005
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
CkArrayIndex3D destElem
Definition: ComputePme.C:181
#define PME_UNTRANS2_PRIORITY
Definition: Priorities.h:34

The documentation for this class was generated from the following file: