NAMD
Public Member Functions | List of all members
PmeZPencil Class Reference
Inheritance diagram for PmeZPencil:
PmePencil< CBase_PmeZPencil >

Public Member Functions

PmeZPencil_SDAG_CODE PmeZPencil ()
 
 PmeZPencil (CkMigrateMessage *)
 
 ~PmeZPencil ()
 
void fft_init ()
 
void recv_grid (const PmeGridMsg *)
 
void forward_fft ()
 
void send_trans ()
 
void send_subset_trans (int fromIdx, int toIdx)
 
void recv_untrans (const PmeUntransMsg *)
 
void recvNodeAck (PmeAckMsg *)
 
void node_process_untrans (PmeUntransMsg *)
 
void node_process_grid (PmeGridMsg *)
 
void backward_fft ()
 
void send_ungrid (PmeGridMsg *)
 
void send_all_ungrid ()
 
void send_subset_ungrid (int fromIdx, int toIdx)
 
- Public Member Functions inherited from PmePencil< CBase_PmeZPencil >
 PmePencil ()
 
 ~PmePencil ()
 
void base_init (PmePencilInitMsg *msg)
 
void order_init (int nBlocks)
 

Additional Inherited Members

- Public Types inherited from PmePencil< CBase_PmeZPencil >
typedef int AtomicInt
 
- Public Attributes inherited from PmePencil< CBase_PmeZPencil >
PmePencilInitMsgData initdata
 
Lattice lattice
 
PmeReduction evir
 
int sequence
 
AtomicInt imsg
 
AtomicInt imsgb
 
int hasData
 
int offload
 
float * data
 
float * work
 
int * send_order
 
int * needs_reply
 

Detailed Description

Definition at line 4590 of file ComputePme.C.

Constructor & Destructor Documentation

◆ PmeZPencil() [1/2]

PmeZPencil_SDAG_CODE PmeZPencil::PmeZPencil ( )
inline

Definition at line 4593 of file ComputePme.C.

4593 { __sdag_init(); setMigratable(false); }

◆ PmeZPencil() [2/2]

PmeZPencil::PmeZPencil ( CkMigrateMessage *  )
inline

Definition at line 4594 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::imsg, and PmePencil< CBase_PmeZPencil >::imsgb.

4594 { __sdag_init(); setMigratable (false); imsg=imsgb=0;}

◆ ~PmeZPencil()

PmeZPencil::~PmeZPencil ( )
inline

Definition at line 4595 of file ComputePme.C.

4595  {
4596  #ifdef NAMD_FFTW
4597  #ifdef NAMD_FFTW_3
4598  delete [] forward_plans;
4599  delete [] backward_plans;
4600  #endif
4601  #endif
4602  }

Member Function Documentation

◆ backward_fft()

void PmeZPencil::backward_fft ( )

Definition at line 6180 of file ComputePme.C.

References CKLOOP_CTRL_PME_BACKWARDFFT, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_untrans().

6180  {
6181 #ifdef NAMD_FFTW
6182 #ifdef MANUAL_DEBUG_FFTW3
6183  dumpMatrixFloat3("bw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
6184 #endif
6185 #ifdef NAMD_FFTW_3
6186 #if CMK_SMP && USE_CKLOOP
6187  int useCkLoop = Node::Object()->simParameters->useCkLoop;
6188  if(useCkLoop>=CKLOOP_CTRL_PME_BACKWARDFFT
6189  && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
6190  //for(int i=0; i<numPlans; i++) fftwf_execute(backward_plans[i]);
6191  //transform the above loop
6192  CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)backward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
6193  return;
6194  }
6195 #endif
6196  fftwf_execute(backward_plan);
6197 #else
6198  rfftwnd_complex_to_real(backward_plan, nx*ny,
6199  (fftw_complex *) data, 1, initdata.grid.dim3/2, work, 1, 0);
6200 #endif
6201 #ifdef MANUAL_DEBUG_FFTW3
6202  dumpMatrixFloat3("bw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
6203 #endif
6204 
6205 #endif
6206 
6207 #if CMK_BLUEGENEL
6208  CmiNetworkProgress();
6209 #endif
6210 
6211 #ifdef FFTCHECK
6212  int dim3 = initdata.grid.dim3;
6213  int K1 = initdata.grid.K1;
6214  int K2 = initdata.grid.K2;
6215  int K3 = initdata.grid.K3;
6216  float scale = 1. / (1. * K1 * K2 * K3);
6217  float maxerr = 0.;
6218  float maxstd = 0.;
6219  int mi, mj, mk; mi = mj = mk = -1;
6220  float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
6221  const float *d = data;
6222  for ( int i=0; i<nx; ++i ) {
6223  for ( int j=0; j<ny; ++j, d += dim3 ) {
6224  for ( int k=0; k<K3; ++k ) {
6225  float std = 10. * (10. * (10. * std_base + i) + j) + k;
6226  float err = scale * d[k] - std;
6227  if ( fabsf(err) > fabsf(maxerr) ) {
6228  maxerr = err;
6229  maxstd = std;
6230  mi = i; mj = j; mk = k;
6231  }
6232  }
6233  }
6234  }
6235  CkPrintf("pencil %d %d max error %f at %d %d %d (should be %f)\n",
6236  thisIndex.x, thisIndex.y, maxerr, mi, mj, mk, maxstd);
6237 #endif
6238 
6239 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
int dim3
Definition: PmeBase.h:22
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
static void PmeXZPencilFFT(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:5208
int K3
Definition: PmeBase.h:21
#define CKLOOP_CTRL_PME_BACKWARDFFT
Definition: SimParameters.h:99

◆ fft_init()

void PmeZPencil::fft_init ( )

Definition at line 4793 of file ComputePme.C.

References PmeGrid::block1, PmeGrid::block2, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, ComputePmeMgr::fftw_plan_lock, fftwf_malloc, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, NAMD_die(), PmePencil< CBase_PmeZPencil >::order_init(), PmePencilInitMsgData::pmeNodeProxy, Node::simParameters, simParams, PmePencil< CBase_PmeZPencil >::work, and PmePencilInitMsgData::zBlocks.

4793  {
4794  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
4795  Node *node = nd.ckLocalBranch();
4797 
4798 #if USE_NODE_PAR_RECEIVE
4799  ((NodePmeMgr *)CkLocalNodeBranch(initdata.pmeNodeProxy))->registerZPencil(thisIndex,this);
4800 #endif
4801 
4802  int K1 = initdata.grid.K1;
4803  int K2 = initdata.grid.K2;
4804  int K3 = initdata.grid.K3;
4805  int dim3 = initdata.grid.dim3;
4806  int block1 = initdata.grid.block1;
4807  int block2 = initdata.grid.block2;
4808 
4809  nx = block1;
4810  if ( (thisIndex.x + 1) * block1 > K1 ) nx = K1 - thisIndex.x * block1;
4811  ny = block2;
4812  if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
4813 
4814 #ifdef NAMD_FFTW
4816 
4817  data = (float *) fftwf_malloc( sizeof(float) *nx*ny*dim3);
4818  work = new float[dim3];
4819 
4821 
4822 #ifdef NAMD_FFTW_3
4823  /* need array of sizes for the how many */
4824 
4825  int fftwFlags = simParams->FFTWPatient ? FFTW_PATIENT : simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE ;
4826  int sizeLines=nx*ny;
4827  int planLineSizes[1];
4828  planLineSizes[0]=K3;
4829  int ndim=initdata.grid.dim3; // storage space is initdata.grid.dim3
4830  int ndimHalf=ndim/2;
4831  forward_plan = fftwf_plan_many_dft_r2c(1, planLineSizes, sizeLines,
4832  (float *) data, NULL, 1,
4833  ndim,
4834  (fftwf_complex *) data, NULL, 1,
4835  ndimHalf,
4836  fftwFlags);
4837 
4838  backward_plan = fftwf_plan_many_dft_c2r(1, planLineSizes, sizeLines,
4839  (fftwf_complex *) data, NULL, 1,
4840  ndimHalf,
4841  (float *) data, NULL, 1,
4842  ndim,
4843  fftwFlags);
4844 #if CMK_SMP && USE_CKLOOP
4845  if(simParams->useCkLoop) {
4846  //How many FFT plans to be created? The grain-size issue!!.
4847  //Currently, I am choosing the min(nx, ny) to be coarse-grain
4848  numPlans = (nx<=ny?nx:ny);
4849  if ( numPlans < CkMyNodeSize() ) numPlans = (nx>=ny?nx:ny);
4850  if ( numPlans < CkMyNodeSize() ) numPlans = sizeLines;
4851  int howmany = sizeLines/numPlans;
4852  forward_plans = new fftwf_plan[numPlans];
4853  backward_plans = new fftwf_plan[numPlans];
4854  for(int i=0; i<numPlans; i++) {
4855  int dimStride = i*ndim*howmany;
4856  int dimHalfStride = i*ndimHalf*howmany;
4857  forward_plans[i] = fftwf_plan_many_dft_r2c(1, planLineSizes, howmany,
4858  ((float *)data)+dimStride, NULL, 1,
4859  ndim,
4860  ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
4861  ndimHalf,
4862  fftwFlags);
4863 
4864  backward_plans[i] = fftwf_plan_many_dft_c2r(1, planLineSizes, howmany,
4865  ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
4866  ndimHalf,
4867  ((float *)data)+dimStride, NULL, 1,
4868  ndim,
4869  fftwFlags);
4870  }
4871  }else
4872 #endif
4873  {
4874  forward_plans = NULL;
4875  backward_plans = NULL;
4876  }
4877 #else
4878  forward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_REAL_TO_COMPLEX,
4879  ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
4880  | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
4881  backward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_COMPLEX_TO_REAL,
4882  ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
4883  | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
4884 #endif
4885  CmiUnlock(ComputePmeMgr::fftw_plan_lock);
4886 #else
4887  NAMD_die("Sorry, FFTW must be compiled in to use PME.");
4888 #endif
4889 
4890 #if USE_NODE_PAR_RECEIVE
4891  evir = 0.;
4892  memset(data, 0, sizeof(float) * nx*ny*dim3);
4893 #endif
4894 }
static CmiNodeLock fftw_plan_lock
Definition: ComputePme.C:440
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
int dim3
Definition: PmeBase.h:22
Definition: Node.h:78
void order_init(int nBlocks)
Definition: ComputePme.C:4554
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
int block1
Definition: PmeBase.h:24
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:244
int block2
Definition: PmeBase.h:24
void NAMD_die(const char *err_msg)
Definition: common.C:147
#define simParams
Definition: Output.C:129
int K3
Definition: PmeBase.h:21
#define fftwf_malloc
Definition: ComputePme.C:13

◆ forward_fft()

void PmeZPencil::forward_fft ( )

Definition at line 5217 of file ComputePme.C.

References CKLOOP_CTRL_PME_FORWARDFFT, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_grid().

5217  {
5218  evir = 0.;
5219 #ifdef FFTCHECK
5220  int dim3 = initdata.grid.dim3;
5221  int K3 = initdata.grid.K3;
5222  float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
5223  float *d = data;
5224  for ( int i=0; i<nx; ++i ) {
5225  for ( int j=0; j<ny; ++j, d += dim3 ) {
5226  for ( int k=0; k<dim3; ++k ) {
5227  d[k] = 10. * (10. * (10. * std_base + i) + j) + k;
5228  }
5229  }
5230  }
5231 #endif
5232 #ifdef NAMD_FFTW
5233 #ifdef MANUAL_DEBUG_FFTW3
5234  dumpMatrixFloat3("fw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
5235 #endif
5236 #ifdef NAMD_FFTW_3
5237 #if CMK_SMP && USE_CKLOOP
5238  int useCkLoop = Node::Object()->simParameters->useCkLoop;
5239  if(useCkLoop>=CKLOOP_CTRL_PME_FORWARDFFT
5240  && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
5241  //for(int i=0; i<numPlans; i++) fftwf_execute(forward_plans[i]);
5242  //transform the above loop
5243  CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)forward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
5244  return;
5245  }
5246 #endif
5247  fftwf_execute(forward_plan);
5248 #else
5249  rfftwnd_real_to_complex(forward_plan, nx*ny,
5250  data, 1, initdata.grid.dim3, (fftw_complex *) work, 1, 0);
5251 #endif
5252 #ifdef MANUAL_DEBUG_FFTW3
5253  dumpMatrixFloat3("fw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
5254 #endif
5255 
5256 #endif
5257 #ifdef ZEROCHECK
5258  int dim3 = initdata.grid.dim3;
5259  int K3 = initdata.grid.K3;
5260  float *d = data;
5261  for ( int i=0; i<nx; ++i ) {
5262  for ( int j=0; j<ny; ++j, d += dim3 ) {
5263  for ( int k=0; k<dim3; ++k ) {
5264  if ( d[k] == 0. ) CkPrintf("0 in Z at %d %d %d %d %d %d %d %d %d\n",
5265  thisIndex.x, thisIndex.y, i, j, k, nx, ny, dim3);
5266  }
5267  }
5268  }
5269 #endif
5270 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
int dim3
Definition: PmeBase.h:22
SimParameters * simParameters
Definition: Node.h:181
static void PmeXZPencilFFT(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:5208
int K3
Definition: PmeBase.h:21
#define CKLOOP_CTRL_PME_FORWARDFFT
Definition: SimParameters.h:96

◆ node_process_grid()

void PmeZPencil::node_process_grid ( PmeGridMsg msg)

Definition at line 6321 of file ComputePme.C.

References ComputePmeMgr::fftw_plan_lock, forward_fft(), PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::imsg, recv_grid(), send_trans(), and ResizeArray< Elem >::size().

Referenced by NodePmeMgr::recvZGrid().

6322 {
6323 #if USE_NODE_PAR_RECEIVE
6325  CmiMemoryReadFence();
6326 #endif
6327  recv_grid(msg);
6328  if(msg->hasData) hasData=msg->hasData;
6329  int limsg;
6330  CmiMemoryAtomicFetchAndInc(imsg,limsg);
6331  grid_msgs[limsg] = msg;
6332  // CkPrintf("[%d] PmeZPencil node_process_grid for %d %d %d has %d of %d imsg %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z, limsg, grid_msgs.size(), imsg);
6333  if(limsg+1 == grid_msgs.size())
6334  {
6335 
6336  if (hasData)
6337  {
6338  forward_fft();
6339  }
6340  send_trans();
6341  imsg=0;
6342  CmiMemoryWriteFence();
6343  // CkPrintf("[%d] PmeZPencil grid node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
6344  }
6345 #if USE_NODE_PAR_RECEIVE
6346  CmiUnlock(ComputePmeMgr::fftw_plan_lock);
6347  CmiMemoryWriteFence();
6348 #endif
6349 }
static CmiNodeLock fftw_plan_lock
Definition: ComputePme.C:440
int size(void) const
Definition: ResizeArray.h:131
void forward_fft()
Definition: ComputePme.C:5217
void send_trans()
Definition: ComputePme.C:5330
void recv_grid(const PmeGridMsg *)
Definition: ComputePme.C:5166

◆ node_process_untrans()

void PmeZPencil::node_process_untrans ( PmeUntransMsg msg)

Definition at line 6356 of file ComputePme.C.

References backward_fft(), PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, ComputePmeMgr::fftw_plan_lock, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::imsgb, PmePencil< CBase_PmeZPencil >::initdata, NAMD_bug(), recv_untrans(), send_all_ungrid(), and PmePencilInitMsgData::zBlocks.

Referenced by recvNodeAck(), and NodePmeMgr::recvZUntrans().

6357 {
6358  if ( msg ) {
6359  if ( ! hasData ) NAMD_bug("PmeZPencil::node_process_untrans non-null msg but not hasData");
6360  recv_untrans(msg);
6361  } else if ( hasData ) NAMD_bug("PmeZPencil::node_process_untrans hasData but null msg");
6362 #if USE_NODE_PAR_RECEIVE
6363  CmiMemoryWriteFence();
6365 #endif
6366  int limsg;
6367  CmiMemoryAtomicFetchAndInc(imsgb,limsg);
6368  if(limsg+1 == initdata.zBlocks)
6369  {
6370 #if USE_NODE_PAR_RECEIVE
6371  CmiMemoryReadFence();
6372 #endif
6373  if(hasData) {
6374  backward_fft();
6375  }
6376  send_all_ungrid();
6377  hasData=0;
6378  imsgb=0;
6379  evir = 0;
6380  memset(data, 0, sizeof(float) * nx*ny* initdata.grid.dim3);
6381  CmiMemoryWriteFence();
6382  // CkPrintf("[%d] PmeZPencil untrans node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
6383  }
6384 #if USE_NODE_PAR_RECEIVE
6385  CmiUnlock(ComputePmeMgr::fftw_plan_lock);
6386 #endif
6387 }
static CmiNodeLock fftw_plan_lock
Definition: ComputePme.C:440
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
int dim3
Definition: PmeBase.h:22
void backward_fft()
Definition: ComputePme.C:6180
void send_all_ungrid()
Definition: ComputePme.C:6248
void NAMD_bug(const char *err_msg)
Definition: common.C:195
void recv_untrans(const PmeUntransMsg *)
Definition: ComputePme.C:6152

◆ recv_grid()

void PmeZPencil::recv_grid ( const PmeGridMsg msg)

Definition at line 5166 of file ComputePme.C.

References ResizeArray< Elem >::begin(), PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, PmeGridMsg::lattice, PmePencil< CBase_PmeZPencil >::lattice, PmeGridMsg::qgrid, PmeGridMsg::sequence, PmePencil< CBase_PmeZPencil >::sequence, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by node_process_grid().

5166  {
5167 
5168  int dim3 = initdata.grid.dim3;
5169  if ( imsg == 0 ) {
5170  lattice = msg->lattice;
5171  sequence = msg->sequence;
5172 #if ! USE_NODE_PAR_RECEIVE
5173  memset(data, 0, sizeof(float)*nx*ny*dim3);
5174 #endif
5175  }
5176 
5177  if ( ! msg->hasData ) return;
5178 
5179  int zlistlen = msg->zlistlen;
5180 #ifdef NAMD_KNL
5181  int * __restrict msg_zlist = msg->zlist;
5182  int * __restrict zlist = (int*)__builtin_assume_aligned(work_zlist.begin(),
5183  64);
5184  for ( int k=0; k<zlistlen; ++k ) {
5185  zlist[k] = msg_zlist[k];
5186  }
5187 #else
5188  int * __restrict zlist = msg->zlist;
5189 #endif
5190  char * __restrict fmsg = msg->fgrid;
5191  float * __restrict qmsg = msg->qgrid;
5192  float * __restrict d = data;
5193  int numGrids = 1; // pencil FFT doesn't support multiple grids
5194  for ( int g=0; g<numGrids; ++g ) {
5195  for ( int i=0; i<nx; ++i ) {
5196  for ( int j=0; j<ny; ++j, d += dim3 ) {
5197  if( *(fmsg++) ) {
5198  #pragma ivdep
5199  for ( int k=0; k<zlistlen; ++k ) {
5200  d[zlist[k]] += *(qmsg++);
5201  }
5202  }
5203  }
5204  }
5205  }
5206 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
int dim3
Definition: PmeBase.h:22
int sequence
Definition: ComputePme.C:142
Lattice lattice
Definition: ComputePme.C:144
float * qgrid
Definition: ComputePme.C:150
int * zlist
Definition: ComputePme.C:148
iterator begin(void)
Definition: ResizeArray.h:36
int zlistlen
Definition: ComputePme.C:147
char * fgrid
Definition: ComputePme.C:149

◆ recv_untrans()

void PmeZPencil::recv_untrans ( const PmeUntransMsg msg)

Definition at line 6152 of file ComputePme.C.

References PmeGrid::block3, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, PmeUntransMsg::ny, PmeUntransMsg::qgrid, and PmeUntransMsg::sourceNode.

Referenced by node_process_untrans().

6152  {
6153 #if ! USE_NODE_PAR_RECEIVE
6154  if(imsg==0) evir=0.;
6155 #endif
6156 
6157  int block3 = initdata.grid.block3;
6158  int dim3 = initdata.grid.dim3;
6159  int kb = msg->sourceNode;
6160  int nz = msg->ny;
6161  const float *md = msg->qgrid;
6162  float *d = data;
6163  for ( int i=0; i<nx; ++i ) {
6164 #if CMK_BLUEGENEL
6165  CmiNetworkProgress();
6166 #endif
6167  for ( int j=0; j<ny; ++j, d += dim3 ) {
6168  for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
6169 #ifdef ZEROCHECK
6170  if ( (*md) == 0. ) CkPrintf("0 in YZ at %d %d %d %d %d %d %d %d %d\n",
6171  thisIndex.x, thisIndex.y, kb, i, j, k, nx, ny, nz);
6172 #endif
6173  d[2*k] = *(md++);
6174  d[2*k+1] = *(md++);
6175  }
6176  }
6177  }
6178 }
float * qgrid
Definition: ComputePme.C:180
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
int dim3
Definition: PmeBase.h:22
int block3
Definition: PmeBase.h:24

◆ recvNodeAck()

void PmeZPencil::recvNodeAck ( PmeAckMsg msg)

Definition at line 6351 of file ComputePme.C.

References node_process_untrans().

6351  {
6352  delete msg;
6354 }
void node_process_untrans(PmeUntransMsg *)
Definition: ComputePme.C:6356

◆ send_all_ungrid()

void PmeZPencil::send_all_ungrid ( )

Definition at line 6248 of file ComputePme.C.

References CKLOOP_CTRL_PME_SENDUNTRANS, PmePencil< CBase_PmeZPencil >::initdata, Node::Object(), PmeZPencilSendUngrid(), send_subset_ungrid(), Node::simParameters, ResizeArray< Elem >::size(), SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_untrans().

6248  {
6249 
6250 #if CMK_SMP && USE_CKLOOP
6251  int useCkLoop = Node::Object()->simParameters->useCkLoop;
6252  if(useCkLoop>=CKLOOP_CTRL_PME_SENDUNTRANS
6253  && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
6254  //????What's the best value for numChunks?????
6255  CkLoop_Parallelize(PmeZPencilSendUngrid, 1, (void *)this, grid_msgs.size(), 0, grid_msgs.size()-1, 1); //has to sync
6256  return;
6257  }
6258 #endif
6259  send_subset_ungrid(0, grid_msgs.size()-1);
6260 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
int size(void) const
Definition: ResizeArray.h:131
SimParameters * simParameters
Definition: Node.h:181
static void PmeZPencilSendUngrid(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:6241
#define CKLOOP_CTRL_PME_SENDUNTRANS
void send_subset_ungrid(int fromIdx, int toIdx)
Definition: ComputePme.C:6262

◆ send_subset_trans()

void PmeZPencil::send_subset_trans ( int  fromIdx,
int  toIdx 
)

Definition at line 5278 of file ComputePme.C.

References PmeGrid::block3, PmePencil< CBase_PmeZPencil >::data, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, PmeTransMsg::lattice, PmePencil< CBase_PmeZPencil >::lattice, PmeTransMsg::nx, PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeZPencil >::send_order, PmeTransMsg::sequence, PmePencil< CBase_PmeZPencil >::sequence, SET_PRIORITY, PmeTransMsg::sourceNode, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by PmeZPencilSendTrans().

5278  {
5279  int zBlocks = initdata.zBlocks;
5280  int block3 = initdata.grid.block3;
5281  int dim3 = initdata.grid.dim3;
5282  for ( int isend=fromIdx; isend<=toIdx; ++isend ) {
5283  int kb = send_order[isend];
5284  int nz = block3;
5285  if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
5286  int hd = ( hasData ? 1 : 0 );
5287  PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
5288  msg->lattice = lattice;
5289  msg->sourceNode = thisIndex.y;
5290  msg->hasData = hasData;
5291  msg->nx = ny;
5292  if ( hasData ) {
5293  float *md = msg->qgrid;
5294  const float *d = data;
5295  for ( int i=0; i<nx; ++i ) {
5296  for ( int j=0; j<ny; ++j, d += dim3 ) {
5297  for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
5298  *(md++) = d[2*k];
5299  *(md++) = d[2*k+1];
5300  }
5301  }
5302  }
5303  }
5304  msg->sequence = sequence;
5306 
5307  CmiEnableUrgentSend(1);
5308 #if USE_NODE_PAR_RECEIVE
5309  msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
5310 #if Y_PERSIST
5311  CmiUsePersistentHandle(&trans_handle[isend], 1);
5312 #endif
5313  initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
5314 #if Y_PERSIST
5315  CmiUsePersistentHandle(NULL, 0);
5316 #endif
5317 #else
5318 #if Y_PERSIST
5319  CmiUsePersistentHandle(&trans_handle[isend], 1);
5320 #endif
5321  initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
5322 #if Y_PERSIST
5323  CmiUsePersistentHandle(NULL, 0);
5324 #endif
5325 #endif
5326  CmiEnableUrgentSend(0);
5327  }
5328 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
int dim3
Definition: PmeBase.h:22
CProxy_PmeYPencil yPencil
Definition: ComputePme.C:241
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:244
float * qgrid
Definition: ComputePme.C:163
#define PRIORITY_SIZE
Definition: Priorities.h:13
int sourceNode
Definition: ComputePme.C:157
#define PME_TRANS_PRIORITY
Definition: Priorities.h:31
int block3
Definition: PmeBase.h:24
CkArrayIndex3D destElem
Definition: ComputePme.C:164
Lattice lattice
Definition: ComputePme.C:160
CProxy_PmePencilMap ym
Definition: ComputePme.C:246
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18

◆ send_subset_ungrid()

void PmeZPencil::send_subset_ungrid ( int  fromIdx,
int  toIdx 
)

Definition at line 6262 of file ComputePme.C.

References send_ungrid().

Referenced by PmeZPencilSendUngrid(), and send_all_ungrid().

6262  {
6263  for (int limsg=fromIdx; limsg <=toIdx; ++limsg ) {
6264  PmeGridMsg *msg = grid_msgs[limsg];
6265  send_ungrid(msg);
6266  }
6267 }
void send_ungrid(PmeGridMsg *)
Definition: ComputePme.C:6269

◆ send_trans()

void PmeZPencil::send_trans ( )

Definition at line 5330 of file ComputePme.C.

References PmeGrid::block3, CKLOOP_CTRL_PME_SENDTRANS, PmePencil< CBase_PmeZPencil >::data, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, PmeTransMsg::lattice, PmePencil< CBase_PmeZPencil >::lattice, PmeTransMsg::nx, Node::Object(), PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmeZPencilSendTrans(), PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeZPencil >::send_order, PmeTransMsg::sequence, PmePencil< CBase_PmeZPencil >::sequence, SET_PRIORITY, Node::simParameters, PmeTransMsg::sourceNode, SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::yBlocks, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_grid().

5330  {
5331 #if USE_PERSISTENT
5332  if (trans_handle == NULL) setup_persistent();
5333 #endif
5334 #if CMK_SMP && USE_CKLOOP
5335  int useCkLoop = Node::Object()->simParameters->useCkLoop;
5336  if(useCkLoop>=CKLOOP_CTRL_PME_SENDTRANS
5337  && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
5344  //send_subset_trans(0, initdata.zBlocks-1);
5345  CkLoop_Parallelize(PmeZPencilSendTrans, 1, (void *)this, CkMyNodeSize(), 0, initdata.zBlocks-1, 1); //not sync
5346  return;
5347  }
5348 #endif
5349  int zBlocks = initdata.zBlocks;
5350  int block3 = initdata.grid.block3;
5351  int dim3 = initdata.grid.dim3;
5352  for ( int isend=0; isend<zBlocks; ++isend ) {
5353  int kb = send_order[isend];
5354  int nz = block3;
5355  if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
5356  int hd = ( hasData ? 1 : 0 );
5357  PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
5358  msg->lattice = lattice;
5359  msg->sourceNode = thisIndex.y;
5360  msg->hasData = hasData;
5361  msg->nx = ny;
5362  if ( hasData ) {
5363  float *md = msg->qgrid;
5364  const float *d = data;
5365  for ( int i=0; i<nx; ++i ) {
5366  for ( int j=0; j<ny; ++j, d += dim3 ) {
5367  for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
5368  *(md++) = d[2*k];
5369  *(md++) = d[2*k+1];
5370  }
5371  }
5372  }
5373  }
5374  msg->sequence = sequence;
5376 
5377  CmiEnableUrgentSend(1);
5378 #if USE_NODE_PAR_RECEIVE
5379  msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
5380 #if Y_PERSIST
5381  CmiUsePersistentHandle(&trans_handle[isend], 1);
5382 #endif
5383  initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
5384 #if Y_PERSIST
5385  CmiUsePersistentHandle(NULL, 0);
5386 #endif
5387 #else
5388 #if Y_PERSIST
5389  CmiUsePersistentHandle(&trans_handle[isend], 1);
5390 #endif
5391  initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
5392 #if Y_PERSIST
5393  CmiUsePersistentHandle(NULL, 0);
5394 #endif
5395 #endif
5396  CmiEnableUrgentSend(0);
5397  }
5398 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
int dim3
Definition: PmeBase.h:22
SimParameters * simParameters
Definition: Node.h:181
CProxy_PmeYPencil yPencil
Definition: ComputePme.C:241
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:244
float * qgrid
Definition: ComputePme.C:163
#define PRIORITY_SIZE
Definition: Priorities.h:13
int sourceNode
Definition: ComputePme.C:157
#define CKLOOP_CTRL_PME_SENDTRANS
Definition: SimParameters.h:97
#define PME_TRANS_PRIORITY
Definition: Priorities.h:31
int block3
Definition: PmeBase.h:24
static void PmeZPencilSendTrans(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:5273
CkArrayIndex3D destElem
Definition: ComputePme.C:164
Lattice lattice
Definition: ComputePme.C:160
CProxy_PmePencilMap ym
Definition: ComputePme.C:246
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18

◆ send_ungrid()

void PmeZPencil::send_ungrid ( PmeGridMsg msg)

Definition at line 6269 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, NAMD_bug(), PmePencil< CBase_PmeZPencil >::offload, PME_OFFLOAD_UNGRID_PRIORITY, PME_UNGRID_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmePencilInitMsgData::pmeProxy, PRIORITY_SIZE, PmeGridMsg::qgrid, PmePencil< CBase_PmeZPencil >::sequence, SET_PRIORITY, PmeGridMsg::sourceNode, PmePencilInitMsgData::yBlocks, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by send_subset_ungrid().

6269  {
6270 
6271 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
6272  const int UNGRID_PRIORITY = ( offload ? PME_OFFLOAD_UNGRID_PRIORITY : PME_UNGRID_PRIORITY );
6273 #else
6274  const int UNGRID_PRIORITY = PME_UNGRID_PRIORITY ;
6275 #endif
6276 
6277  int pe = msg->sourceNode;
6278  if ( ! msg->hasData ) {
6279  delete msg;
6280  PmeAckMsg *ackmsg = new (PRIORITY_SIZE) PmeAckMsg;
6281  SET_PRIORITY(ackmsg,sequence,UNGRID_PRIORITY)
6282  CmiEnableUrgentSend(1);
6283  initdata.pmeProxy[pe].recvAck(ackmsg);
6284  CmiEnableUrgentSend(0);
6285  return;
6286  }
6287  if ( ! hasData ) NAMD_bug("PmeZPencil::send_ungrid msg->hasData but not pencil->hasData");
6288  msg->sourceNode = thisIndex.x * initdata.yBlocks + thisIndex.y;
6289  int dim3 = initdata.grid.dim3;
6290  int zlistlen = msg->zlistlen;
6291  int *zlist = msg->zlist;
6292  char *fmsg = msg->fgrid;
6293  float *qmsg = msg->qgrid;
6294  float *d = data;
6295  int numGrids = 1; // pencil FFT doesn't support multiple grids
6296  for ( int g=0; g<numGrids; ++g ) {
6297 #if CMK_BLUEGENEL
6298  CmiNetworkProgress();
6299 #endif
6300  for ( int i=0; i<nx; ++i ) {
6301  for ( int j=0; j<ny; ++j, d += dim3 ) {
6302  if( *(fmsg++) ) {
6303  for ( int k=0; k<zlistlen; ++k ) {
6304  *(qmsg++) = d[zlist[k]];
6305  }
6306  }
6307  }
6308  }
6309  }
6310  SET_PRIORITY(msg,sequence,UNGRID_PRIORITY)
6311  CmiEnableUrgentSend(1);
6312 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
6313  if ( offload ) {
6314  initdata.pmeNodeProxy[CkNodeOf(pe)].recvUngrid(msg);
6315  } else
6316 #endif
6317  initdata.pmeProxy[pe].recvUngrid(msg);
6318  CmiEnableUrgentSend(0);
6319 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4566
#define PME_UNGRID_PRIORITY
Definition: Priorities.h:74
int dim3
Definition: PmeBase.h:22
CProxy_ComputePmeMgr pmeProxy
Definition: ComputePme.C:243
#define PME_OFFLOAD_UNGRID_PRIORITY
Definition: Priorities.h:42
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:244
int sourceNode
Definition: ComputePme.C:141
#define PRIORITY_SIZE
Definition: Priorities.h:13
void NAMD_bug(const char *err_msg)
Definition: common.C:195
float * qgrid
Definition: ComputePme.C:150
int * zlist
Definition: ComputePme.C:148
int zlistlen
Definition: ComputePme.C:147
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
char * fgrid
Definition: ComputePme.C:149

The documentation for this class was generated from the following file: