NAMD
Public Member Functions | List of all members
PmeZPencil Class Reference
Inheritance diagram for PmeZPencil:
PmePencil< CBase_PmeZPencil >

Public Member Functions

PmeZPencil_SDAG_CODE PmeZPencil ()
 
 PmeZPencil (CkMigrateMessage *)
 
 ~PmeZPencil ()
 
void fft_init ()
 
void recv_grid (const PmeGridMsg *)
 
void forward_fft ()
 
void send_trans ()
 
void send_subset_trans (int fromIdx, int toIdx)
 
void recv_untrans (const PmeUntransMsg *)
 
void recvNodeAck (PmeAckMsg *)
 
void node_process_untrans (PmeUntransMsg *)
 
void node_process_grid (PmeGridMsg *)
 
void backward_fft ()
 
void send_ungrid (PmeGridMsg *)
 
void send_all_ungrid ()
 
void send_subset_ungrid (int fromIdx, int toIdx)
 
- Public Member Functions inherited from PmePencil< CBase_PmeZPencil >
 PmePencil ()
 
 ~PmePencil ()
 
void base_init (PmePencilInitMsg *msg)
 
void order_init (int nBlocks)
 

Additional Inherited Members

- Public Types inherited from PmePencil< CBase_PmeZPencil >
typedef int AtomicInt
 
- Public Attributes inherited from PmePencil< CBase_PmeZPencil >
PmePencilInitMsgData initdata
 
Lattice lattice
 
PmeReduction evir
 
int sequence
 
AtomicInt imsg
 
AtomicInt imsgb
 
int hasData
 
int offload
 
float * data
 
float * work
 
int * send_order
 
int * needs_reply
 

Detailed Description

Definition at line 4648 of file ComputePme.C.

Constructor & Destructor Documentation

◆ PmeZPencil() [1/2]

PmeZPencil_SDAG_CODE PmeZPencil::PmeZPencil ( )
inline

Definition at line 4651 of file ComputePme.C.

4651 { __sdag_init(); setMigratable(false); }

◆ PmeZPencil() [2/2]

PmeZPencil::PmeZPencil ( CkMigrateMessage *  )
inline

Definition at line 4652 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::imsg, and PmePencil< CBase_PmeZPencil >::imsgb.

4652 { __sdag_init(); setMigratable (false); imsg=imsgb=0;}

◆ ~PmeZPencil()

PmeZPencil::~PmeZPencil ( )
inline

Definition at line 4653 of file ComputePme.C.

4653  {
4654  #ifdef NAMD_FFTW
4655  #ifdef NAMD_FFTW_3
4656  delete [] forward_plans;
4657  delete [] backward_plans;
4658  #endif
4659  #endif
4660  }

Member Function Documentation

◆ backward_fft()

void PmeZPencil::backward_fft ( )

Definition at line 6239 of file ComputePme.C.

References CKLOOP_CTRL_PME_BACKWARDFFT, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_untrans().

6239  {
6240 #ifdef NAMD_FFTW
6241 #ifdef MANUAL_DEBUG_FFTW3
6242  dumpMatrixFloat3("bw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
6243 #endif
6244 #ifdef NAMD_FFTW_3
6245 #if CMK_SMP && USE_CKLOOP
6246  int useCkLoop = Node::Object()->simParameters->useCkLoop;
6247  if(useCkLoop>=CKLOOP_CTRL_PME_BACKWARDFFT
6248  && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
6249  //for(int i=0; i<numPlans; i++) fftwf_execute(backward_plans[i]);
6250  //transform the above loop
6251  CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)backward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
6252  return;
6253  }
6254 #endif
6255  fftwf_execute(backward_plan);
6256 #else
6257  rfftwnd_complex_to_real(backward_plan, nx*ny,
6258  (fftw_complex *) data, 1, initdata.grid.dim3/2, work, 1, 0);
6259 #endif
6260 #ifdef MANUAL_DEBUG_FFTW3
6261  dumpMatrixFloat3("bw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
6262 #endif
6263 
6264 #endif
6265 
6266 #if CMK_BLUEGENEL
6267  CmiNetworkProgress();
6268 #endif
6269 
6270 #ifdef FFTCHECK
6271  int dim3 = initdata.grid.dim3;
6272  int K1 = initdata.grid.K1;
6273  int K2 = initdata.grid.K2;
6274  int K3 = initdata.grid.K3;
6275  float scale = 1. / (1. * K1 * K2 * K3);
6276  float maxerr = 0.;
6277  float maxstd = 0.;
6278  int mi, mj, mk; mi = mj = mk = -1;
6279  float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
6280  const float *d = data;
6281  for ( int i=0; i<nx; ++i ) {
6282  for ( int j=0; j<ny; ++j, d += dim3 ) {
6283  for ( int k=0; k<K3; ++k ) {
6284  float std = 10. * (10. * (10. * std_base + i) + j) + k;
6285  float err = scale * d[k] - std;
6286  if ( fabsf(err) > fabsf(maxerr) ) {
6287  maxerr = err;
6288  maxstd = std;
6289  mi = i; mj = j; mk = k;
6290  }
6291  }
6292  }
6293  }
6294  CkPrintf("pencil %d %d max error %f at %d %d %d (should be %f)\n",
6295  thisIndex.x, thisIndex.y, maxerr, mi, mj, mk, maxstd);
6296 #endif
6297 
6298 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4624
int dim3
Definition: PmeBase.h:22
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
static void PmeXZPencilFFT(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:5266
int K3
Definition: PmeBase.h:21
#define CKLOOP_CTRL_PME_BACKWARDFFT

◆ fft_init()

void PmeZPencil::fft_init ( )

Definition at line 4851 of file ComputePme.C.

References PmeGrid::block1, PmeGrid::block2, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, ComputePmeMgr::fftw_plan_lock, fftwf_malloc, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, NAMD_die(), PmePencil< CBase_PmeZPencil >::order_init(), PmePencilInitMsgData::pmeNodeProxy, Node::simParameters, simParams, PmePencil< CBase_PmeZPencil >::work, and PmePencilInitMsgData::zBlocks.

4851  {
4852  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
4853  Node *node = nd.ckLocalBranch();
4855 
4856 #if USE_NODE_PAR_RECEIVE
4857  ((NodePmeMgr *)CkLocalNodeBranch(initdata.pmeNodeProxy))->registerZPencil(thisIndex,this);
4858 #endif
4859 
4860  int K1 = initdata.grid.K1;
4861  int K2 = initdata.grid.K2;
4862  int K3 = initdata.grid.K3;
4863  int dim3 = initdata.grid.dim3;
4864  int block1 = initdata.grid.block1;
4865  int block2 = initdata.grid.block2;
4866 
4867  nx = block1;
4868  if ( (thisIndex.x + 1) * block1 > K1 ) nx = K1 - thisIndex.x * block1;
4869  ny = block2;
4870  if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
4871 
4872 #ifdef NAMD_FFTW
4874 
4875  data = (float *) fftwf_malloc( sizeof(float) *nx*ny*dim3);
4876  work = new float[dim3];
4877 
4879 
4880 #ifdef NAMD_FFTW_3
4881  /* need array of sizes for the how many */
4882 
4883  int fftwFlags = simParams->FFTWPatient ? FFTW_PATIENT : simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE ;
4884  int sizeLines=nx*ny;
4885  int planLineSizes[1];
4886  planLineSizes[0]=K3;
4887  int ndim=initdata.grid.dim3; // storage space is initdata.grid.dim3
4888  int ndimHalf=ndim/2;
4889  forward_plan = fftwf_plan_many_dft_r2c(1, planLineSizes, sizeLines,
4890  (float *) data, NULL, 1,
4891  ndim,
4892  (fftwf_complex *) data, NULL, 1,
4893  ndimHalf,
4894  fftwFlags);
4895 
4896  backward_plan = fftwf_plan_many_dft_c2r(1, planLineSizes, sizeLines,
4897  (fftwf_complex *) data, NULL, 1,
4898  ndimHalf,
4899  (float *) data, NULL, 1,
4900  ndim,
4901  fftwFlags);
4902 #if CMK_SMP && USE_CKLOOP
4903  if(simParams->useCkLoop) {
4904  //How many FFT plans to be created? The grain-size issue!!.
4905  //Currently, I am choosing the min(nx, ny) to be coarse-grain
4906  numPlans = (nx<=ny?nx:ny);
4907  if ( numPlans < CkMyNodeSize() ) numPlans = (nx>=ny?nx:ny);
4908  if ( numPlans < CkMyNodeSize() ) numPlans = sizeLines;
4909  int howmany = sizeLines/numPlans;
4910  forward_plans = new fftwf_plan[numPlans];
4911  backward_plans = new fftwf_plan[numPlans];
4912  for(int i=0; i<numPlans; i++) {
4913  int dimStride = i*ndim*howmany;
4914  int dimHalfStride = i*ndimHalf*howmany;
4915  forward_plans[i] = fftwf_plan_many_dft_r2c(1, planLineSizes, howmany,
4916  ((float *)data)+dimStride, NULL, 1,
4917  ndim,
4918  ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
4919  ndimHalf,
4920  fftwFlags);
4921 
4922  backward_plans[i] = fftwf_plan_many_dft_c2r(1, planLineSizes, howmany,
4923  ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
4924  ndimHalf,
4925  ((float *)data)+dimStride, NULL, 1,
4926  ndim,
4927  fftwFlags);
4928  }
4929  }else
4930 #endif
4931  {
4932  forward_plans = NULL;
4933  backward_plans = NULL;
4934  }
4935 #else
4936  forward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_REAL_TO_COMPLEX,
4937  ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
4938  | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
4939  backward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_COMPLEX_TO_REAL,
4940  ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
4941  | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
4942 #endif
4943  CmiUnlock(ComputePmeMgr::fftw_plan_lock);
4944 #else
4945  NAMD_die("Sorry, FFTW must be compiled in to use PME.");
4946 #endif
4947 
4948 #if USE_NODE_PAR_RECEIVE
4949  evir = 0.;
4950  memset(data, 0, sizeof(float) * nx*ny*dim3);
4951 #endif
4952 }
static CmiNodeLock fftw_plan_lock
Definition: ComputePme.C:442
PmePencilInitMsgData initdata
Definition: ComputePme.C:4624
int dim3
Definition: PmeBase.h:22
Definition: Node.h:78
void order_init(int nBlocks)
Definition: ComputePme.C:4612
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
int block1
Definition: PmeBase.h:24
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:246
int block2
Definition: PmeBase.h:24
void NAMD_die(const char *err_msg)
Definition: common.C:147
#define simParams
Definition: Output.C:131
int K3
Definition: PmeBase.h:21
#define fftwf_malloc
Definition: ComputePme.C:13

◆ forward_fft()

void PmeZPencil::forward_fft ( )

Definition at line 5275 of file ComputePme.C.

References CKLOOP_CTRL_PME_FORWARDFFT, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_grid().

5275  {
5276  evir = 0.;
5277 #ifdef FFTCHECK
5278  int dim3 = initdata.grid.dim3;
5279  int K3 = initdata.grid.K3;
5280  float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
5281  float *d = data;
5282  for ( int i=0; i<nx; ++i ) {
5283  for ( int j=0; j<ny; ++j, d += dim3 ) {
5284  for ( int k=0; k<dim3; ++k ) {
5285  d[k] = 10. * (10. * (10. * std_base + i) + j) + k;
5286  }
5287  }
5288  }
5289 #endif
5290 #ifdef NAMD_FFTW
5291 #ifdef MANUAL_DEBUG_FFTW3
5292  dumpMatrixFloat3("fw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
5293 #endif
5294 #ifdef NAMD_FFTW_3
5295 #if CMK_SMP && USE_CKLOOP
5296  int useCkLoop = Node::Object()->simParameters->useCkLoop;
5297  if(useCkLoop>=CKLOOP_CTRL_PME_FORWARDFFT
5298  && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
5299  //for(int i=0; i<numPlans; i++) fftwf_execute(forward_plans[i]);
5300  //transform the above loop
5301  CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)forward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
5302  return;
5303  }
5304 #endif
5305  fftwf_execute(forward_plan);
5306 #else
5307  rfftwnd_real_to_complex(forward_plan, nx*ny,
5308  data, 1, initdata.grid.dim3, (fftw_complex *) work, 1, 0);
5309 #endif
5310 #ifdef MANUAL_DEBUG_FFTW3
5311  dumpMatrixFloat3("fw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
5312 #endif
5313 
5314 #endif
5315 #ifdef ZEROCHECK
5316  int dim3 = initdata.grid.dim3;
5317  int K3 = initdata.grid.K3;
5318  float *d = data;
5319  for ( int i=0; i<nx; ++i ) {
5320  for ( int j=0; j<ny; ++j, d += dim3 ) {
5321  for ( int k=0; k<dim3; ++k ) {
5322  if ( d[k] == 0. ) CkPrintf("0 in Z at %d %d %d %d %d %d %d %d %d\n",
5323  thisIndex.x, thisIndex.y, i, j, k, nx, ny, dim3);
5324  }
5325  }
5326  }
5327 #endif
5328 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4624
int dim3
Definition: PmeBase.h:22
SimParameters * simParameters
Definition: Node.h:181
static void PmeXZPencilFFT(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:5266
int K3
Definition: PmeBase.h:21
#define CKLOOP_CTRL_PME_FORWARDFFT
Definition: SimParameters.h:97

◆ node_process_grid()

void PmeZPencil::node_process_grid ( PmeGridMsg msg)

Definition at line 6380 of file ComputePme.C.

References ComputePmeMgr::fftw_plan_lock, forward_fft(), PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::imsg, recv_grid(), send_trans(), and ResizeArray< Elem >::size().

Referenced by NodePmeMgr::recvZGrid().

6381 {
6382 #if USE_NODE_PAR_RECEIVE
6384  CmiMemoryReadFence();
6385 #endif
6386  recv_grid(msg);
6387  if(msg->hasData) hasData=msg->hasData;
6388  int limsg;
6389  CmiMemoryAtomicFetchAndInc(imsg,limsg);
6390  grid_msgs[limsg] = msg;
6391  // CkPrintf("[%d] PmeZPencil node_process_grid for %d %d %d has %d of %d imsg %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z, limsg, grid_msgs.size(), imsg);
6392  if(limsg+1 == grid_msgs.size())
6393  {
6394 
6395  if (hasData)
6396  {
6397  forward_fft();
6398  }
6399  send_trans();
6400  imsg=0;
6401  CmiMemoryWriteFence();
6402  // CkPrintf("[%d] PmeZPencil grid node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
6403  }
6404 #if USE_NODE_PAR_RECEIVE
6405  CmiUnlock(ComputePmeMgr::fftw_plan_lock);
6406  CmiMemoryWriteFence();
6407 #endif
6408 }
static CmiNodeLock fftw_plan_lock
Definition: ComputePme.C:442
int size(void) const
Definition: ResizeArray.h:131
void forward_fft()
Definition: ComputePme.C:5275
void send_trans()
Definition: ComputePme.C:5388
void recv_grid(const PmeGridMsg *)
Definition: ComputePme.C:5224

◆ node_process_untrans()

void PmeZPencil::node_process_untrans ( PmeUntransMsg msg)

Definition at line 6415 of file ComputePme.C.

References backward_fft(), PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, ComputePmeMgr::fftw_plan_lock, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::imsgb, PmePencil< CBase_PmeZPencil >::initdata, NAMD_bug(), recv_untrans(), send_all_ungrid(), and PmePencilInitMsgData::zBlocks.

Referenced by recvNodeAck(), and NodePmeMgr::recvZUntrans().

6416 {
6417  if ( msg ) {
6418  if ( ! hasData ) NAMD_bug("PmeZPencil::node_process_untrans non-null msg but not hasData");
6419  recv_untrans(msg);
6420  } else if ( hasData ) NAMD_bug("PmeZPencil::node_process_untrans hasData but null msg");
6421 #if USE_NODE_PAR_RECEIVE
6422  CmiMemoryWriteFence();
6424 #endif
6425  int limsg;
6426  CmiMemoryAtomicFetchAndInc(imsgb,limsg);
6427  if(limsg+1 == initdata.zBlocks)
6428  {
6429 #if USE_NODE_PAR_RECEIVE
6430  CmiMemoryReadFence();
6431 #endif
6432  if(hasData) {
6433  backward_fft();
6434  }
6435  send_all_ungrid();
6436  hasData=0;
6437  imsgb=0;
6438  evir = 0;
6439  memset(data, 0, sizeof(float) * nx*ny* initdata.grid.dim3);
6440  CmiMemoryWriteFence();
6441  // CkPrintf("[%d] PmeZPencil untrans node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
6442  }
6443 #if USE_NODE_PAR_RECEIVE
6444  CmiUnlock(ComputePmeMgr::fftw_plan_lock);
6445 #endif
6446 }
static CmiNodeLock fftw_plan_lock
Definition: ComputePme.C:442
PmePencilInitMsgData initdata
Definition: ComputePme.C:4624
int dim3
Definition: PmeBase.h:22
void backward_fft()
Definition: ComputePme.C:6239
void send_all_ungrid()
Definition: ComputePme.C:6307
void NAMD_bug(const char *err_msg)
Definition: common.C:195
void recv_untrans(const PmeUntransMsg *)
Definition: ComputePme.C:6211

◆ recv_grid()

void PmeZPencil::recv_grid ( const PmeGridMsg msg)

Definition at line 5224 of file ComputePme.C.

References ResizeArray< Elem >::begin(), PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, PmeGridMsg::lattice, PmePencil< CBase_PmeZPencil >::lattice, PmeGridMsg::qgrid, PmeGridMsg::sequence, PmePencil< CBase_PmeZPencil >::sequence, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by node_process_grid().

5224  {
5225 
5226  int dim3 = initdata.grid.dim3;
5227  if ( imsg == 0 ) {
5228  lattice = msg->lattice;
5229  sequence = msg->sequence;
5230 #if ! USE_NODE_PAR_RECEIVE
5231  memset(data, 0, sizeof(float)*nx*ny*dim3);
5232 #endif
5233  }
5234 
5235  if ( ! msg->hasData ) return;
5236 
5237  int zlistlen = msg->zlistlen;
5238 #ifdef NAMD_KNL
5239  int * __restrict msg_zlist = msg->zlist;
5240  int * __restrict zlist = (int*)__builtin_assume_aligned(work_zlist.begin(),
5241  64);
5242  for ( int k=0; k<zlistlen; ++k ) {
5243  zlist[k] = msg_zlist[k];
5244  }
5245 #else
5246  int * __restrict zlist = msg->zlist;
5247 #endif
5248  char * __restrict fmsg = msg->fgrid;
5249  float * __restrict qmsg = msg->qgrid;
5250  float * __restrict d = data;
5251  int numGrids = 1; // pencil FFT doesn't support multiple grids
5252  for ( int g=0; g<numGrids; ++g ) {
5253  for ( int i=0; i<nx; ++i ) {
5254  for ( int j=0; j<ny; ++j, d += dim3 ) {
5255  if( *(fmsg++) ) {
5256  #pragma ivdep
5257  for ( int k=0; k<zlistlen; ++k ) {
5258  d[zlist[k]] += *(qmsg++);
5259  }
5260  }
5261  }
5262  }
5263  }
5264 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4624
int dim3
Definition: PmeBase.h:22
int sequence
Definition: ComputePme.C:144
Lattice lattice
Definition: ComputePme.C:146
float * qgrid
Definition: ComputePme.C:152
int * zlist
Definition: ComputePme.C:150
iterator begin(void)
Definition: ResizeArray.h:36
int zlistlen
Definition: ComputePme.C:149
char * fgrid
Definition: ComputePme.C:151

◆ recv_untrans()

void PmeZPencil::recv_untrans ( const PmeUntransMsg msg)

Definition at line 6211 of file ComputePme.C.

References PmeGrid::block3, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, PmeUntransMsg::ny, PmeUntransMsg::qgrid, and PmeUntransMsg::sourceNode.

Referenced by node_process_untrans().

6211  {
6212 #if ! USE_NODE_PAR_RECEIVE
6213  if(imsg==0) evir=0.;
6214 #endif
6215 
6216  int block3 = initdata.grid.block3;
6217  int dim3 = initdata.grid.dim3;
6218  int kb = msg->sourceNode;
6219  int nz = msg->ny;
6220  const float *md = msg->qgrid;
6221  float *d = data;
6222  for ( int i=0; i<nx; ++i ) {
6223 #if CMK_BLUEGENEL
6224  CmiNetworkProgress();
6225 #endif
6226  for ( int j=0; j<ny; ++j, d += dim3 ) {
6227  for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
6228 #ifdef ZEROCHECK
6229  if ( (*md) == 0. ) CkPrintf("0 in YZ at %d %d %d %d %d %d %d %d %d\n",
6230  thisIndex.x, thisIndex.y, kb, i, j, k, nx, ny, nz);
6231 #endif
6232  d[2*k] = *(md++);
6233  d[2*k+1] = *(md++);
6234  }
6235  }
6236  }
6237 }
float * qgrid
Definition: ComputePme.C:182
PmePencilInitMsgData initdata
Definition: ComputePme.C:4624
int dim3
Definition: PmeBase.h:22
int block3
Definition: PmeBase.h:24

◆ recvNodeAck()

void PmeZPencil::recvNodeAck ( PmeAckMsg msg)

Definition at line 6410 of file ComputePme.C.

References node_process_untrans().

6410  {
6411  delete msg;
6413 }
void node_process_untrans(PmeUntransMsg *)
Definition: ComputePme.C:6415

◆ send_all_ungrid()

void PmeZPencil::send_all_ungrid ( )

Definition at line 6307 of file ComputePme.C.

References CKLOOP_CTRL_PME_SENDUNTRANS, PmePencil< CBase_PmeZPencil >::initdata, Node::Object(), PmeZPencilSendUngrid(), send_subset_ungrid(), Node::simParameters, ResizeArray< Elem >::size(), SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_untrans().

6307  {
6308 
6309 #if CMK_SMP && USE_CKLOOP
6310  int useCkLoop = Node::Object()->simParameters->useCkLoop;
6311  if(useCkLoop>=CKLOOP_CTRL_PME_SENDUNTRANS
6312  && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
6313  //????What's the best value for numChunks?????
6314  CkLoop_Parallelize(PmeZPencilSendUngrid, 1, (void *)this, grid_msgs.size(), 0, grid_msgs.size()-1, 1); //has to sync
6315  return;
6316  }
6317 #endif
6318  send_subset_ungrid(0, grid_msgs.size()-1);
6319 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4624
int size(void) const
Definition: ResizeArray.h:131
SimParameters * simParameters
Definition: Node.h:181
static void PmeZPencilSendUngrid(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:6300
#define CKLOOP_CTRL_PME_SENDUNTRANS
void send_subset_ungrid(int fromIdx, int toIdx)
Definition: ComputePme.C:6321

◆ send_subset_trans()

void PmeZPencil::send_subset_trans ( int  fromIdx,
int  toIdx 
)

Definition at line 5336 of file ComputePme.C.

References PmeGrid::block3, PmePencil< CBase_PmeZPencil >::data, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, PmeTransMsg::lattice, PmePencil< CBase_PmeZPencil >::lattice, PmeTransMsg::nx, PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeZPencil >::send_order, PmeTransMsg::sequence, PmePencil< CBase_PmeZPencil >::sequence, SET_PRIORITY, PmeTransMsg::sourceNode, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by PmeZPencilSendTrans().

5336  {
5337  int zBlocks = initdata.zBlocks;
5338  int block3 = initdata.grid.block3;
5339  int dim3 = initdata.grid.dim3;
5340  for ( int isend=fromIdx; isend<=toIdx; ++isend ) {
5341  int kb = send_order[isend];
5342  int nz = block3;
5343  if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
5344  int hd = ( hasData ? 1 : 0 );
5345  PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
5346  msg->lattice = lattice;
5347  msg->sourceNode = thisIndex.y;
5348  msg->hasData = hasData;
5349  msg->nx = ny;
5350  if ( hasData ) {
5351  float *md = msg->qgrid;
5352  const float *d = data;
5353  for ( int i=0; i<nx; ++i ) {
5354  for ( int j=0; j<ny; ++j, d += dim3 ) {
5355  for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
5356  *(md++) = d[2*k];
5357  *(md++) = d[2*k+1];
5358  }
5359  }
5360  }
5361  }
5362  msg->sequence = sequence;
5364 
5365  CmiEnableUrgentSend(1);
5366 #if USE_NODE_PAR_RECEIVE
5367  msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
5368 #if Y_PERSIST
5369  CmiUsePersistentHandle(&trans_handle[isend], 1);
5370 #endif
5371  initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
5372 #if Y_PERSIST
5373  CmiUsePersistentHandle(NULL, 0);
5374 #endif
5375 #else
5376 #if Y_PERSIST
5377  CmiUsePersistentHandle(&trans_handle[isend], 1);
5378 #endif
5379  initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
5380 #if Y_PERSIST
5381  CmiUsePersistentHandle(NULL, 0);
5382 #endif
5383 #endif
5384  CmiEnableUrgentSend(0);
5385  }
5386 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4624
int dim3
Definition: PmeBase.h:22
CProxy_PmeYPencil yPencil
Definition: ComputePme.C:243
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:246
float * qgrid
Definition: ComputePme.C:165
#define PRIORITY_SIZE
Definition: Priorities.h:13
int sourceNode
Definition: ComputePme.C:159
#define PME_TRANS_PRIORITY
Definition: Priorities.h:31
int block3
Definition: PmeBase.h:24
CkArrayIndex3D destElem
Definition: ComputePme.C:166
Lattice lattice
Definition: ComputePme.C:162
CProxy_PmePencilMap ym
Definition: ComputePme.C:248
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18

◆ send_subset_ungrid()

void PmeZPencil::send_subset_ungrid ( int  fromIdx,
int  toIdx 
)

Definition at line 6321 of file ComputePme.C.

References send_ungrid().

Referenced by PmeZPencilSendUngrid(), and send_all_ungrid().

6321  {
6322  for (int limsg=fromIdx; limsg <=toIdx; ++limsg ) {
6323  PmeGridMsg *msg = grid_msgs[limsg];
6324  send_ungrid(msg);
6325  }
6326 }
void send_ungrid(PmeGridMsg *)
Definition: ComputePme.C:6328

◆ send_trans()

void PmeZPencil::send_trans ( )

Definition at line 5388 of file ComputePme.C.

References PmeGrid::block3, CKLOOP_CTRL_PME_SENDTRANS, PmePencil< CBase_PmeZPencil >::data, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, PmeTransMsg::lattice, PmePencil< CBase_PmeZPencil >::lattice, PmeTransMsg::nx, Node::Object(), PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmeZPencilSendTrans(), PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeZPencil >::send_order, PmeTransMsg::sequence, PmePencil< CBase_PmeZPencil >::sequence, SET_PRIORITY, Node::simParameters, PmeTransMsg::sourceNode, SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::yBlocks, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_grid().

5388  {
5389 #if USE_PERSISTENT
5390  if (trans_handle == NULL) setup_persistent();
5391 #endif
5392 #if CMK_SMP && USE_CKLOOP
5393  int useCkLoop = Node::Object()->simParameters->useCkLoop;
5394  if(useCkLoop>=CKLOOP_CTRL_PME_SENDTRANS
5395  && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
5402  //send_subset_trans(0, initdata.zBlocks-1);
5403  CkLoop_Parallelize(PmeZPencilSendTrans, 1, (void *)this, CkMyNodeSize(), 0, initdata.zBlocks-1, 1); //not sync
5404  return;
5405  }
5406 #endif
5407  int zBlocks = initdata.zBlocks;
5408  int block3 = initdata.grid.block3;
5409  int dim3 = initdata.grid.dim3;
5410  for ( int isend=0; isend<zBlocks; ++isend ) {
5411  int kb = send_order[isend];
5412  int nz = block3;
5413  if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
5414  int hd = ( hasData ? 1 : 0 );
5415  PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
5416  msg->lattice = lattice;
5417  msg->sourceNode = thisIndex.y;
5418  msg->hasData = hasData;
5419  msg->nx = ny;
5420  if ( hasData ) {
5421  float *md = msg->qgrid;
5422  const float *d = data;
5423  for ( int i=0; i<nx; ++i ) {
5424  for ( int j=0; j<ny; ++j, d += dim3 ) {
5425  for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
5426  *(md++) = d[2*k];
5427  *(md++) = d[2*k+1];
5428  }
5429  }
5430  }
5431  }
5432  msg->sequence = sequence;
5434 
5435  CmiEnableUrgentSend(1);
5436 #if USE_NODE_PAR_RECEIVE
5437  msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
5438 #if Y_PERSIST
5439  CmiUsePersistentHandle(&trans_handle[isend], 1);
5440 #endif
5441  initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
5442 #if Y_PERSIST
5443  CmiUsePersistentHandle(NULL, 0);
5444 #endif
5445 #else
5446 #if Y_PERSIST
5447  CmiUsePersistentHandle(&trans_handle[isend], 1);
5448 #endif
5449  initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
5450 #if Y_PERSIST
5451  CmiUsePersistentHandle(NULL, 0);
5452 #endif
5453 #endif
5454  CmiEnableUrgentSend(0);
5455  }
5456 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4624
int dim3
Definition: PmeBase.h:22
SimParameters * simParameters
Definition: Node.h:181
CProxy_PmeYPencil yPencil
Definition: ComputePme.C:243
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:246
float * qgrid
Definition: ComputePme.C:165
#define PRIORITY_SIZE
Definition: Priorities.h:13
int sourceNode
Definition: ComputePme.C:159
#define CKLOOP_CTRL_PME_SENDTRANS
Definition: SimParameters.h:98
#define PME_TRANS_PRIORITY
Definition: Priorities.h:31
int block3
Definition: PmeBase.h:24
static void PmeZPencilSendTrans(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:5331
CkArrayIndex3D destElem
Definition: ComputePme.C:166
Lattice lattice
Definition: ComputePme.C:162
CProxy_PmePencilMap ym
Definition: ComputePme.C:248
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18

◆ send_ungrid()

void PmeZPencil::send_ungrid ( PmeGridMsg msg)

Definition at line 6328 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, NAMD_bug(), PmePencil< CBase_PmeZPencil >::offload, PME_OFFLOAD_UNGRID_PRIORITY, PME_UNGRID_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmePencilInitMsgData::pmeProxy, PRIORITY_SIZE, PmeGridMsg::qgrid, PmePencil< CBase_PmeZPencil >::sequence, SET_PRIORITY, PmeGridMsg::sourceNode, PmePencilInitMsgData::yBlocks, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by send_subset_ungrid().

6328  {
6329 
6330 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
6331  const int UNGRID_PRIORITY = ( offload ? PME_OFFLOAD_UNGRID_PRIORITY : PME_UNGRID_PRIORITY );
6332 #else
6333  const int UNGRID_PRIORITY = PME_UNGRID_PRIORITY ;
6334 #endif
6335 
6336  int pe = msg->sourceNode;
6337  if ( ! msg->hasData ) {
6338  delete msg;
6339  PmeAckMsg *ackmsg = new (PRIORITY_SIZE) PmeAckMsg;
6340  SET_PRIORITY(ackmsg,sequence,UNGRID_PRIORITY)
6341  CmiEnableUrgentSend(1);
6342  initdata.pmeProxy[pe].recvAck(ackmsg);
6343  CmiEnableUrgentSend(0);
6344  return;
6345  }
6346  if ( ! hasData ) NAMD_bug("PmeZPencil::send_ungrid msg->hasData but not pencil->hasData");
6347  msg->sourceNode = thisIndex.x * initdata.yBlocks + thisIndex.y;
6348  int dim3 = initdata.grid.dim3;
6349  int zlistlen = msg->zlistlen;
6350  int *zlist = msg->zlist;
6351  char *fmsg = msg->fgrid;
6352  float *qmsg = msg->qgrid;
6353  float *d = data;
6354  int numGrids = 1; // pencil FFT doesn't support multiple grids
6355  for ( int g=0; g<numGrids; ++g ) {
6356 #if CMK_BLUEGENEL
6357  CmiNetworkProgress();
6358 #endif
6359  for ( int i=0; i<nx; ++i ) {
6360  for ( int j=0; j<ny; ++j, d += dim3 ) {
6361  if( *(fmsg++) ) {
6362  for ( int k=0; k<zlistlen; ++k ) {
6363  *(qmsg++) = d[zlist[k]];
6364  }
6365  }
6366  }
6367  }
6368  }
6369  SET_PRIORITY(msg,sequence,UNGRID_PRIORITY)
6370  CmiEnableUrgentSend(1);
6371 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
6372  if ( offload ) {
6373  initdata.pmeNodeProxy[CkNodeOf(pe)].recvUngrid(msg);
6374  } else
6375 #endif
6376  initdata.pmeProxy[pe].recvUngrid(msg);
6377  CmiEnableUrgentSend(0);
6378 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4624
#define PME_UNGRID_PRIORITY
Definition: Priorities.h:74
int dim3
Definition: PmeBase.h:22
CProxy_ComputePmeMgr pmeProxy
Definition: ComputePme.C:245
#define PME_OFFLOAD_UNGRID_PRIORITY
Definition: Priorities.h:42
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:246
int sourceNode
Definition: ComputePme.C:143
#define PRIORITY_SIZE
Definition: Priorities.h:13
void NAMD_bug(const char *err_msg)
Definition: common.C:195
float * qgrid
Definition: ComputePme.C:152
int * zlist
Definition: ComputePme.C:150
int zlistlen
Definition: ComputePme.C:149
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
char * fgrid
Definition: ComputePme.C:151

The documentation for this class was generated from the following file: