NAMD
Public Member Functions | List of all members
PmeZPencil Class Reference
Inheritance diagram for PmeZPencil:
PmePencil< CBase_PmeZPencil >

Public Member Functions

PmeZPencil_SDAG_CODE PmeZPencil ()
 
 PmeZPencil (CkMigrateMessage *)
 
 ~PmeZPencil ()
 
void fft_init ()
 
void recv_grid (const PmeGridMsg *)
 
void forward_fft ()
 
void send_trans ()
 
void send_subset_trans (int fromIdx, int toIdx)
 
void recv_untrans (const PmeUntransMsg *)
 
void recvNodeAck (PmeAckMsg *)
 
void node_process_untrans (PmeUntransMsg *)
 
void node_process_grid (PmeGridMsg *)
 
void backward_fft ()
 
void send_ungrid (PmeGridMsg *)
 
void send_all_ungrid ()
 
void send_subset_ungrid (int fromIdx, int toIdx)
 
- Public Member Functions inherited from PmePencil< CBase_PmeZPencil >
 PmePencil ()
 
 ~PmePencil ()
 
void base_init (PmePencilInitMsg *msg)
 
void order_init (int nBlocks)
 

Additional Inherited Members

- Public Types inherited from PmePencil< CBase_PmeZPencil >
typedef int AtomicInt
 
- Public Attributes inherited from PmePencil< CBase_PmeZPencil >
PmePencilInitMsgData initdata
 
Lattice lattice
 
PmeReduction evir
 
int sequence
 
AtomicInt imsg
 
AtomicInt imsgb
 
int hasData
 
int offload
 
float * data
 
float * work
 
int * send_order
 
int * needs_reply
 

Detailed Description

Definition at line 4565 of file ComputePme.C.

Constructor & Destructor Documentation

PmeZPencil_SDAG_CODE PmeZPencil::PmeZPencil ( )
inline

Definition at line 4568 of file ComputePme.C.

4568 { __sdag_init(); setMigratable(false); }
PmeZPencil::PmeZPencil ( CkMigrateMessage *  )
inline

Definition at line 4569 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::imsg, and PmePencil< CBase_PmeZPencil >::imsgb.

4569 { __sdag_init(); setMigratable (false); imsg=imsgb=0;}
PmeZPencil::~PmeZPencil ( )
inline

Definition at line 4570 of file ComputePme.C.

4570  {
4571  #ifdef NAMD_FFTW
4572  #ifdef NAMD_FFTW_3
4573  delete [] forward_plans;
4574  delete [] backward_plans;
4575  #endif
4576  #endif
4577  }

Member Function Documentation

void PmeZPencil::backward_fft ( )

Definition at line 6155 of file ComputePme.C.

References CKLOOP_CTRL_PME_BACKWARDFFT, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_untrans().

6155  {
6156 #ifdef NAMD_FFTW
6157 #ifdef MANUAL_DEBUG_FFTW3
6158  dumpMatrixFloat3("bw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
6159 #endif
6160 #ifdef NAMD_FFTW_3
6161 #if CMK_SMP && USE_CKLOOP
6162  int useCkLoop = Node::Object()->simParameters->useCkLoop;
6163  if(useCkLoop>=CKLOOP_CTRL_PME_BACKWARDFFT
6164  && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
6165  //for(int i=0; i<numPlans; i++) fftwf_execute(backward_plans[i]);
6166  //transform the above loop
6167  CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)backward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
6168  return;
6169  }
6170 #endif
6171  fftwf_execute(backward_plan);
6172 #else
6173  rfftwnd_complex_to_real(backward_plan, nx*ny,
6174  (fftw_complex *) data, 1, initdata.grid.dim3/2, work, 1, 0);
6175 #endif
6176 #ifdef MANUAL_DEBUG_FFTW3
6177  dumpMatrixFloat3("bw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
6178 #endif
6179 
6180 #endif
6181 
6182 #if CMK_BLUEGENEL
6183  CmiNetworkProgress();
6184 #endif
6185 
6186 #ifdef FFTCHECK
6187  int dim3 = initdata.grid.dim3;
6188  int K1 = initdata.grid.K1;
6189  int K2 = initdata.grid.K2;
6190  int K3 = initdata.grid.K3;
6191  float scale = 1. / (1. * K1 * K2 * K3);
6192  float maxerr = 0.;
6193  float maxstd = 0.;
6194  int mi, mj, mk; mi = mj = mk = -1;
6195  float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
6196  const float *d = data;
6197  for ( int i=0; i<nx; ++i ) {
6198  for ( int j=0; j<ny; ++j, d += dim3 ) {
6199  for ( int k=0; k<K3; ++k ) {
6200  float std = 10. * (10. * (10. * std_base + i) + j) + k;
6201  float err = scale * d[k] - std;
6202  if ( fabsf(err) > fabsf(maxerr) ) {
6203  maxerr = err;
6204  maxstd = std;
6205  mi = i; mj = j; mk = k;
6206  }
6207  }
6208  }
6209  }
6210  CkPrintf("pencil %d %d max error %f at %d %d %d (should be %f)\n",
6211  thisIndex.x, thisIndex.y, maxerr, mi, mj, mk, maxstd);
6212 #endif
6213 
6214 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4541
int dim3
Definition: PmeBase.h:19
int K2
Definition: PmeBase.h:18
SimParameters * simParameters
Definition: Node.h:178
int K1
Definition: PmeBase.h:18
static void PmeXZPencilFFT(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:5183
int K3
Definition: PmeBase.h:18
#define CKLOOP_CTRL_PME_BACKWARDFFT
Definition: SimParameters.h:97
void PmeZPencil::fft_init ( )

Definition at line 4768 of file ComputePme.C.

References PmeGrid::block1, PmeGrid::block2, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, ComputePmeMgr::fftw_plan_lock, SimParameters::FFTWEstimate, fftwf_malloc, SimParameters::FFTWPatient, PmePencilInitMsgData::grid, if(), PmePencil< CBase_PmeZPencil >::initdata, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, NAMD_die(), PmePencil< CBase_PmeZPencil >::order_init(), PmePencilInitMsgData::pmeNodeProxy, Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, and PmePencilInitMsgData::zBlocks.

4768  {
4769  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
4770  Node *node = nd.ckLocalBranch();
4772 
4773 #if USE_NODE_PAR_RECEIVE
4774  ((NodePmeMgr *)CkLocalNodeBranch(initdata.pmeNodeProxy))->registerZPencil(thisIndex,this);
4775 #endif
4776 
4777  int K1 = initdata.grid.K1;
4778  int K2 = initdata.grid.K2;
4779  int K3 = initdata.grid.K3;
4780  int dim3 = initdata.grid.dim3;
4781  int block1 = initdata.grid.block1;
4782  int block2 = initdata.grid.block2;
4783 
4784  nx = block1;
4785  if ( (thisIndex.x + 1) * block1 > K1 ) nx = K1 - thisIndex.x * block1;
4786  ny = block2;
4787  if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
4788 
4789 #ifdef NAMD_FFTW
4791 
4792  data = (float *) fftwf_malloc( sizeof(float) *nx*ny*dim3);
4793  work = new float[dim3];
4794 
4796 
4797 #ifdef NAMD_FFTW_3
4798  /* need array of sizes for the how many */
4799 
4800  int fftwFlags = simParams->FFTWPatient ? FFTW_PATIENT : simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE ;
4801  int sizeLines=nx*ny;
4802  int planLineSizes[1];
4803  planLineSizes[0]=K3;
4804  int ndim=initdata.grid.dim3; // storage space is initdata.grid.dim3
4805  int ndimHalf=ndim/2;
4806  forward_plan = fftwf_plan_many_dft_r2c(1, planLineSizes, sizeLines,
4807  (float *) data, NULL, 1,
4808  ndim,
4809  (fftwf_complex *) data, NULL, 1,
4810  ndimHalf,
4811  fftwFlags);
4812 
4813  backward_plan = fftwf_plan_many_dft_c2r(1, planLineSizes, sizeLines,
4814  (fftwf_complex *) data, NULL, 1,
4815  ndimHalf,
4816  (float *) data, NULL, 1,
4817  ndim,
4818  fftwFlags);
4819 #if CMK_SMP && USE_CKLOOP
4820  if(simParams->useCkLoop) {
4821  //How many FFT plans to be created? The grain-size issue!!.
4822  //Currently, I am choosing the min(nx, ny) to be coarse-grain
4823  numPlans = (nx<=ny?nx:ny);
4824  if ( numPlans < CkMyNodeSize() ) numPlans = (nx>=ny?nx:ny);
4825  if ( numPlans < CkMyNodeSize() ) numPlans = sizeLines;
4826  int howmany = sizeLines/numPlans;
4827  forward_plans = new fftwf_plan[numPlans];
4828  backward_plans = new fftwf_plan[numPlans];
4829  for(int i=0; i<numPlans; i++) {
4830  int dimStride = i*ndim*howmany;
4831  int dimHalfStride = i*ndimHalf*howmany;
4832  forward_plans[i] = fftwf_plan_many_dft_r2c(1, planLineSizes, howmany,
4833  ((float *)data)+dimStride, NULL, 1,
4834  ndim,
4835  ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
4836  ndimHalf,
4837  fftwFlags);
4838 
4839  backward_plans[i] = fftwf_plan_many_dft_c2r(1, planLineSizes, howmany,
4840  ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
4841  ndimHalf,
4842  ((float *)data)+dimStride, NULL, 1,
4843  ndim,
4844  fftwFlags);
4845  }
4846  }else
4847 #endif
4848  {
4849  forward_plans = NULL;
4850  backward_plans = NULL;
4851  }
4852 #else
4853  forward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_REAL_TO_COMPLEX,
4854  ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
4855  | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
4856  backward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_COMPLEX_TO_REAL,
4857  ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
4858  | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
4859 #endif
4860  CmiUnlock(ComputePmeMgr::fftw_plan_lock);
4861 #else
4862  NAMD_die("Sorry, FFTW must be compiled in to use PME.");
4863 #endif
4864 
4865 #if USE_NODE_PAR_RECEIVE
4866  evir = 0.;
4867  memset(data, 0, sizeof(float) * nx*ny*dim3);
4868 #endif
4869 }
static CmiNodeLock fftw_plan_lock
Definition: ComputePme.C:420
PmePencilInitMsgData initdata
Definition: ComputePme.C:4541
int dim3
Definition: PmeBase.h:19
Definition: Node.h:78
void order_init(int nBlocks)
Definition: ComputePme.C:4529
int K2
Definition: PmeBase.h:18
SimParameters * simParameters
Definition: Node.h:178
int K1
Definition: PmeBase.h:18
int block1
Definition: PmeBase.h:21
if(ComputeNonbondedUtil::goMethod==2)
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:224
int block2
Definition: PmeBase.h:21
void NAMD_die(const char *err_msg)
Definition: common.C:85
#define simParams
Definition: Output.C:127
int K3
Definition: PmeBase.h:18
#define fftwf_malloc
Definition: ComputePme.C:13
void PmeZPencil::forward_fft ( )

Definition at line 5192 of file ComputePme.C.

References CKLOOP_CTRL_PME_FORWARDFFT, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_grid().

5192  {
5193  evir = 0.;
5194 #ifdef FFTCHECK
5195  int dim3 = initdata.grid.dim3;
5196  int K3 = initdata.grid.K3;
5197  float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
5198  float *d = data;
5199  for ( int i=0; i<nx; ++i ) {
5200  for ( int j=0; j<ny; ++j, d += dim3 ) {
5201  for ( int k=0; k<dim3; ++k ) {
5202  d[k] = 10. * (10. * (10. * std_base + i) + j) + k;
5203  }
5204  }
5205  }
5206 #endif
5207 #ifdef NAMD_FFTW
5208 #ifdef MANUAL_DEBUG_FFTW3
5209  dumpMatrixFloat3("fw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
5210 #endif
5211 #ifdef NAMD_FFTW_3
5212 #if CMK_SMP && USE_CKLOOP
5213  int useCkLoop = Node::Object()->simParameters->useCkLoop;
5214  if(useCkLoop>=CKLOOP_CTRL_PME_FORWARDFFT
5215  && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
5216  //for(int i=0; i<numPlans; i++) fftwf_execute(forward_plans[i]);
5217  //transform the above loop
5218  CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)forward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
5219  return;
5220  }
5221 #endif
5222  fftwf_execute(forward_plan);
5223 #else
5224  rfftwnd_real_to_complex(forward_plan, nx*ny,
5225  data, 1, initdata.grid.dim3, (fftw_complex *) work, 1, 0);
5226 #endif
5227 #ifdef MANUAL_DEBUG_FFTW3
5228  dumpMatrixFloat3("fw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
5229 #endif
5230 
5231 #endif
5232 #ifdef ZEROCHECK
5233  int dim3 = initdata.grid.dim3;
5234  int K3 = initdata.grid.K3;
5235  float *d = data;
5236  for ( int i=0; i<nx; ++i ) {
5237  for ( int j=0; j<ny; ++j, d += dim3 ) {
5238  for ( int k=0; k<dim3; ++k ) {
5239  if ( d[k] == 0. ) CkPrintf("0 in Z at %d %d %d %d %d %d %d %d %d\n",
5240  thisIndex.x, thisIndex.y, i, j, k, nx, ny, dim3);
5241  }
5242  }
5243  }
5244 #endif
5245 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4541
int dim3
Definition: PmeBase.h:19
SimParameters * simParameters
Definition: Node.h:178
static void PmeXZPencilFFT(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:5183
int K3
Definition: PmeBase.h:18
#define CKLOOP_CTRL_PME_FORWARDFFT
Definition: SimParameters.h:94
void PmeZPencil::node_process_grid ( PmeGridMsg msg)

Definition at line 6296 of file ComputePme.C.

References ComputePmeMgr::fftw_plan_lock, forward_fft(), PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::imsg, recv_grid(), send_trans(), and ResizeArray< T >::size().

Referenced by NodePmeMgr::recvZGrid().

6297 {
6298 #if USE_NODE_PAR_RECEIVE
6300  CmiMemoryReadFence();
6301 #endif
6302  recv_grid(msg);
6303  if(msg->hasData) hasData=msg->hasData;
6304  int limsg;
6305  CmiMemoryAtomicFetchAndInc(imsg,limsg);
6306  grid_msgs[limsg] = msg;
6307  // CkPrintf("[%d] PmeZPencil node_process_grid for %d %d %d has %d of %d imsg %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z, limsg, grid_msgs.size(), imsg);
6308  if(limsg+1 == grid_msgs.size())
6309  {
6310 
6311  if (hasData)
6312  {
6313  forward_fft();
6314  }
6315  send_trans();
6316  imsg=0;
6317  CmiMemoryWriteFence();
6318  // CkPrintf("[%d] PmeZPencil grid node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
6319  }
6320 #if USE_NODE_PAR_RECEIVE
6321  CmiUnlock(ComputePmeMgr::fftw_plan_lock);
6322  CmiMemoryWriteFence();
6323 #endif
6324 }
static CmiNodeLock fftw_plan_lock
Definition: ComputePme.C:420
void forward_fft()
Definition: ComputePme.C:5192
int size(void) const
Definition: ResizeArray.h:127
void send_trans()
Definition: ComputePme.C:5305
void recv_grid(const PmeGridMsg *)
Definition: ComputePme.C:5141
void PmeZPencil::node_process_untrans ( PmeUntransMsg msg)

Definition at line 6331 of file ComputePme.C.

References backward_fft(), PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, ComputePmeMgr::fftw_plan_lock, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::imsgb, PmePencil< CBase_PmeZPencil >::initdata, NAMD_bug(), recv_untrans(), send_all_ungrid(), and PmePencilInitMsgData::zBlocks.

Referenced by recvNodeAck(), and NodePmeMgr::recvZUntrans().

6332 {
6333  if ( msg ) {
6334  if ( ! hasData ) NAMD_bug("PmeZPencil::node_process_untrans non-null msg but not hasData");
6335  recv_untrans(msg);
6336  } else if ( hasData ) NAMD_bug("PmeZPencil::node_process_untrans hasData but null msg");
6337 #if USE_NODE_PAR_RECEIVE
6338  CmiMemoryWriteFence();
6340 #endif
6341  int limsg;
6342  CmiMemoryAtomicFetchAndInc(imsgb,limsg);
6343  if(limsg+1 == initdata.zBlocks)
6344  {
6345 #if USE_NODE_PAR_RECEIVE
6346  CmiMemoryReadFence();
6347 #endif
6348  if(hasData) {
6349  backward_fft();
6350  }
6351  send_all_ungrid();
6352  hasData=0;
6353  imsgb=0;
6354  evir = 0;
6355  memset(data, 0, sizeof(float) * nx*ny* initdata.grid.dim3);
6356  CmiMemoryWriteFence();
6357  // CkPrintf("[%d] PmeZPencil untrans node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
6358  }
6359 #if USE_NODE_PAR_RECEIVE
6360  CmiUnlock(ComputePmeMgr::fftw_plan_lock);
6361 #endif
6362 }
static CmiNodeLock fftw_plan_lock
Definition: ComputePme.C:420
PmePencilInitMsgData initdata
Definition: ComputePme.C:4541
int dim3
Definition: PmeBase.h:19
void backward_fft()
Definition: ComputePme.C:6155
void send_all_ungrid()
Definition: ComputePme.C:6223
void NAMD_bug(const char *err_msg)
Definition: common.C:129
void recv_untrans(const PmeUntransMsg *)
Definition: ComputePme.C:6127
void PmeZPencil::recv_grid ( const PmeGridMsg msg)

Definition at line 5141 of file ComputePme.C.

References ResizeArray< T >::begin(), PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, PmeGridMsg::lattice, PmePencil< CBase_PmeZPencil >::lattice, PmeGridMsg::qgrid, PmeGridMsg::sequence, PmePencil< CBase_PmeZPencil >::sequence, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by node_process_grid().

5141  {
5142 
5143  int dim3 = initdata.grid.dim3;
5144  if ( imsg == 0 ) {
5145  lattice = msg->lattice;
5146  sequence = msg->sequence;
5147 #if ! USE_NODE_PAR_RECEIVE
5148  memset(data, 0, sizeof(float)*nx*ny*dim3);
5149 #endif
5150  }
5151 
5152  if ( ! msg->hasData ) return;
5153 
5154  int zlistlen = msg->zlistlen;
5155 #ifdef NAMD_KNL
5156  int * __restrict msg_zlist = msg->zlist;
5157  int * __restrict zlist = work_zlist.begin();
5158  __assume_aligned(zlist,64);
5159  for ( int k=0; k<zlistlen; ++k ) {
5160  zlist[k] = msg_zlist[k];
5161  }
5162 #else
5163  int * __restrict zlist = msg->zlist;
5164 #endif
5165  char * __restrict fmsg = msg->fgrid;
5166  float * __restrict qmsg = msg->qgrid;
5167  float * __restrict d = data;
5168  int numGrids = 1; // pencil FFT doesn't support multiple grids
5169  for ( int g=0; g<numGrids; ++g ) {
5170  for ( int i=0; i<nx; ++i ) {
5171  for ( int j=0; j<ny; ++j, d += dim3 ) {
5172  if( *(fmsg++) ) {
5173  #pragma ivdep
5174  for ( int k=0; k<zlistlen; ++k ) {
5175  d[zlist[k]] += *(qmsg++);
5176  }
5177  }
5178  }
5179  }
5180  }
5181 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4541
int dim3
Definition: PmeBase.h:19
int sequence
Definition: ComputePme.C:122
Lattice lattice
Definition: ComputePme.C:124
float * qgrid
Definition: ComputePme.C:130
int * zlist
Definition: ComputePme.C:128
int zlistlen
Definition: ComputePme.C:127
char * fgrid
Definition: ComputePme.C:129
iterator begin(void)
Definition: ResizeArray.h:36
void PmeZPencil::recv_untrans ( const PmeUntransMsg msg)

Definition at line 6127 of file ComputePme.C.

References PmeGrid::block3, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, PmeUntransMsg::ny, PmeUntransMsg::qgrid, and PmeUntransMsg::sourceNode.

Referenced by node_process_untrans().

6127  {
6128 #if ! USE_NODE_PAR_RECEIVE
6129  if(imsg==0) evir=0.;
6130 #endif
6131 
6132  int block3 = initdata.grid.block3;
6133  int dim3 = initdata.grid.dim3;
6134  int kb = msg->sourceNode;
6135  int nz = msg->ny;
6136  const float *md = msg->qgrid;
6137  float *d = data;
6138  for ( int i=0; i<nx; ++i ) {
6139 #if CMK_BLUEGENEL
6140  CmiNetworkProgress();
6141 #endif
6142  for ( int j=0; j<ny; ++j, d += dim3 ) {
6143  for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
6144 #ifdef ZEROCHECK
6145  if ( (*md) == 0. ) CkPrintf("0 in YZ at %d %d %d %d %d %d %d %d %d\n",
6146  thisIndex.x, thisIndex.y, kb, i, j, k, nx, ny, nz);
6147 #endif
6148  d[2*k] = *(md++);
6149  d[2*k+1] = *(md++);
6150  }
6151  }
6152  }
6153 }
float * qgrid
Definition: ComputePme.C:160
PmePencilInitMsgData initdata
Definition: ComputePme.C:4541
int dim3
Definition: PmeBase.h:19
int block3
Definition: PmeBase.h:21
void PmeZPencil::recvNodeAck ( PmeAckMsg msg)

Definition at line 6326 of file ComputePme.C.

References node_process_untrans().

6326  {
6327  delete msg;
6329 }
void node_process_untrans(PmeUntransMsg *)
Definition: ComputePme.C:6331
void PmeZPencil::send_all_ungrid ( )

Definition at line 6223 of file ComputePme.C.

References CKLOOP_CTRL_PME_SENDUNTRANS, PmePencil< CBase_PmeZPencil >::initdata, Node::Object(), PmeZPencilSendUngrid(), send_subset_ungrid(), Node::simParameters, ResizeArray< T >::size(), SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_untrans().

6223  {
6224 
6225 #if CMK_SMP && USE_CKLOOP
6226  int useCkLoop = Node::Object()->simParameters->useCkLoop;
6227  if(useCkLoop>=CKLOOP_CTRL_PME_SENDUNTRANS
6228  && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
6229  //????What's the best value for numChunks?????
6230  CkLoop_Parallelize(PmeZPencilSendUngrid, 1, (void *)this, grid_msgs.size(), 0, grid_msgs.size()-1, 1); //has to sync
6231  return;
6232  }
6233 #endif
6234  send_subset_ungrid(0, grid_msgs.size()-1);
6235 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4541
SimParameters * simParameters
Definition: Node.h:178
static void PmeZPencilSendUngrid(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:6216
#define CKLOOP_CTRL_PME_SENDUNTRANS
Definition: SimParameters.h:98
void send_subset_ungrid(int fromIdx, int toIdx)
Definition: ComputePme.C:6237
int size(void) const
Definition: ResizeArray.h:127
void PmeZPencil::send_subset_trans ( int  fromIdx,
int  toIdx 
)

Definition at line 5253 of file ComputePme.C.

References PmeGrid::block3, PmePencil< CBase_PmeZPencil >::data, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, PmeTransMsg::lattice, PmePencil< CBase_PmeZPencil >::lattice, PmeTransMsg::nx, PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeZPencil >::send_order, PmeTransMsg::sequence, PmePencil< CBase_PmeZPencil >::sequence, SET_PRIORITY, PmeTransMsg::sourceNode, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by PmeZPencilSendTrans().

5253  {
5254  int zBlocks = initdata.zBlocks;
5255  int block3 = initdata.grid.block3;
5256  int dim3 = initdata.grid.dim3;
5257  for ( int isend=fromIdx; isend<=toIdx; ++isend ) {
5258  int kb = send_order[isend];
5259  int nz = block3;
5260  if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
5261  int hd = ( hasData ? 1 : 0 );
5262  PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
5263  msg->lattice = lattice;
5264  msg->sourceNode = thisIndex.y;
5265  msg->hasData = hasData;
5266  msg->nx = ny;
5267  if ( hasData ) {
5268  float *md = msg->qgrid;
5269  const float *d = data;
5270  for ( int i=0; i<nx; ++i ) {
5271  for ( int j=0; j<ny; ++j, d += dim3 ) {
5272  for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
5273  *(md++) = d[2*k];
5274  *(md++) = d[2*k+1];
5275  }
5276  }
5277  }
5278  }
5279  msg->sequence = sequence;
5281 
5282  CmiEnableUrgentSend(1);
5283 #if USE_NODE_PAR_RECEIVE
5284  msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
5285 #if Y_PERSIST
5286  CmiUsePersistentHandle(&trans_handle[isend], 1);
5287 #endif
5288  initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
5289 #if Y_PERSIST
5290  CmiUsePersistentHandle(NULL, 0);
5291 #endif
5292 #else
5293 #if Y_PERSIST
5294  CmiUsePersistentHandle(&trans_handle[isend], 1);
5295 #endif
5296  initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
5297 #if Y_PERSIST
5298  CmiUsePersistentHandle(NULL, 0);
5299 #endif
5300 #endif
5301  CmiEnableUrgentSend(0);
5302  }
5303 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4541
int dim3
Definition: PmeBase.h:19
CProxy_PmeYPencil yPencil
Definition: ComputePme.C:221
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:224
float * qgrid
Definition: ComputePme.C:143
#define PRIORITY_SIZE
Definition: Priorities.h:13
int sourceNode
Definition: ComputePme.C:137
#define PME_TRANS_PRIORITY
Definition: Priorities.h:31
int block3
Definition: PmeBase.h:21
CkArrayIndex3D destElem
Definition: ComputePme.C:144
Lattice lattice
Definition: ComputePme.C:140
CProxy_PmePencilMap ym
Definition: ComputePme.C:226
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
void PmeZPencil::send_subset_ungrid ( int  fromIdx,
int  toIdx 
)

Definition at line 6237 of file ComputePme.C.

References send_ungrid().

Referenced by PmeZPencilSendUngrid(), and send_all_ungrid().

6237  {
6238  for (int limsg=fromIdx; limsg <=toIdx; ++limsg ) {
6239  PmeGridMsg *msg = grid_msgs[limsg];
6240  send_ungrid(msg);
6241  }
6242 }
void send_ungrid(PmeGridMsg *)
Definition: ComputePme.C:6244
void PmeZPencil::send_trans ( )

Definition at line 5305 of file ComputePme.C.

References PmeGrid::block3, CKLOOP_CTRL_PME_SENDTRANS, PmePencil< CBase_PmeZPencil >::data, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, PmeTransMsg::lattice, PmePencil< CBase_PmeZPencil >::lattice, PmeTransMsg::nx, Node::Object(), PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmeZPencilSendTrans(), PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeZPencil >::send_order, PmeTransMsg::sequence, PmePencil< CBase_PmeZPencil >::sequence, SET_PRIORITY, Node::simParameters, PmeTransMsg::sourceNode, SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::yBlocks, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_grid().

5305  {
5306 #if USE_PERSISTENT
5307  if (trans_handle == NULL) setup_persistent();
5308 #endif
5309 #if CMK_SMP && USE_CKLOOP
5310  int useCkLoop = Node::Object()->simParameters->useCkLoop;
5311  if(useCkLoop>=CKLOOP_CTRL_PME_SENDTRANS
5312  && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
5319  //send_subset_trans(0, initdata.zBlocks-1);
5320  CkLoop_Parallelize(PmeZPencilSendTrans, 1, (void *)this, CkMyNodeSize(), 0, initdata.zBlocks-1, 1); //not sync
5321  return;
5322  }
5323 #endif
5324  int zBlocks = initdata.zBlocks;
5325  int block3 = initdata.grid.block3;
5326  int dim3 = initdata.grid.dim3;
5327  for ( int isend=0; isend<zBlocks; ++isend ) {
5328  int kb = send_order[isend];
5329  int nz = block3;
5330  if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
5331  int hd = ( hasData ? 1 : 0 );
5332  PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
5333  msg->lattice = lattice;
5334  msg->sourceNode = thisIndex.y;
5335  msg->hasData = hasData;
5336  msg->nx = ny;
5337  if ( hasData ) {
5338  float *md = msg->qgrid;
5339  const float *d = data;
5340  for ( int i=0; i<nx; ++i ) {
5341  for ( int j=0; j<ny; ++j, d += dim3 ) {
5342  for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
5343  *(md++) = d[2*k];
5344  *(md++) = d[2*k+1];
5345  }
5346  }
5347  }
5348  }
5349  msg->sequence = sequence;
5351 
5352  CmiEnableUrgentSend(1);
5353 #if USE_NODE_PAR_RECEIVE
5354  msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
5355 #if Y_PERSIST
5356  CmiUsePersistentHandle(&trans_handle[isend], 1);
5357 #endif
5358  initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
5359 #if Y_PERSIST
5360  CmiUsePersistentHandle(NULL, 0);
5361 #endif
5362 #else
5363 #if Y_PERSIST
5364  CmiUsePersistentHandle(&trans_handle[isend], 1);
5365 #endif
5366  initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
5367 #if Y_PERSIST
5368  CmiUsePersistentHandle(NULL, 0);
5369 #endif
5370 #endif
5371  CmiEnableUrgentSend(0);
5372  }
5373 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4541
int dim3
Definition: PmeBase.h:19
SimParameters * simParameters
Definition: Node.h:178
CProxy_PmeYPencil yPencil
Definition: ComputePme.C:221
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:224
float * qgrid
Definition: ComputePme.C:143
#define PRIORITY_SIZE
Definition: Priorities.h:13
int sourceNode
Definition: ComputePme.C:137
#define CKLOOP_CTRL_PME_SENDTRANS
Definition: SimParameters.h:95
#define PME_TRANS_PRIORITY
Definition: Priorities.h:31
int block3
Definition: PmeBase.h:21
static void PmeZPencilSendTrans(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:5248
CkArrayIndex3D destElem
Definition: ComputePme.C:144
Lattice lattice
Definition: ComputePme.C:140
CProxy_PmePencilMap ym
Definition: ComputePme.C:226
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
void PmeZPencil::send_ungrid ( PmeGridMsg msg)

Definition at line 6244 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, NAMD_bug(), PmePencil< CBase_PmeZPencil >::offload, PME_OFFLOAD_UNGRID_PRIORITY, PME_UNGRID_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmePencilInitMsgData::pmeProxy, PRIORITY_SIZE, PmeGridMsg::qgrid, PmePencil< CBase_PmeZPencil >::sequence, SET_PRIORITY, PmeGridMsg::sourceNode, PmePencilInitMsgData::yBlocks, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by send_subset_ungrid().

6244  {
6245 
6246 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
6247  const int UNGRID_PRIORITY = ( offload ? PME_OFFLOAD_UNGRID_PRIORITY : PME_UNGRID_PRIORITY );
6248 #else
6249  const int UNGRID_PRIORITY = PME_UNGRID_PRIORITY ;
6250 #endif
6251 
6252  int pe = msg->sourceNode;
6253  if ( ! msg->hasData ) {
6254  delete msg;
6255  PmeAckMsg *ackmsg = new (PRIORITY_SIZE) PmeAckMsg;
6256  SET_PRIORITY(ackmsg,sequence,UNGRID_PRIORITY)
6257  CmiEnableUrgentSend(1);
6258  initdata.pmeProxy[pe].recvAck(ackmsg);
6259  CmiEnableUrgentSend(0);
6260  return;
6261  }
6262  if ( ! hasData ) NAMD_bug("PmeZPencil::send_ungrid msg->hasData but not pencil->hasData");
6263  msg->sourceNode = thisIndex.x * initdata.yBlocks + thisIndex.y;
6264  int dim3 = initdata.grid.dim3;
6265  int zlistlen = msg->zlistlen;
6266  int *zlist = msg->zlist;
6267  char *fmsg = msg->fgrid;
6268  float *qmsg = msg->qgrid;
6269  float *d = data;
6270  int numGrids = 1; // pencil FFT doesn't support multiple grids
6271  for ( int g=0; g<numGrids; ++g ) {
6272 #if CMK_BLUEGENEL
6273  CmiNetworkProgress();
6274 #endif
6275  for ( int i=0; i<nx; ++i ) {
6276  for ( int j=0; j<ny; ++j, d += dim3 ) {
6277  if( *(fmsg++) ) {
6278  for ( int k=0; k<zlistlen; ++k ) {
6279  *(qmsg++) = d[zlist[k]];
6280  }
6281  }
6282  }
6283  }
6284  }
6285  SET_PRIORITY(msg,sequence,UNGRID_PRIORITY)
6286  CmiEnableUrgentSend(1);
6287 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
6288  if ( offload ) {
6289  initdata.pmeNodeProxy[CkNodeOf(pe)].recvUngrid(msg);
6290  } else
6291 #endif
6292  initdata.pmeProxy[pe].recvUngrid(msg);
6293  CmiEnableUrgentSend(0);
6294 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4541
#define PME_UNGRID_PRIORITY
Definition: Priorities.h:74
CProxy_ComputePmeMgr pmeProxy
Definition: ComputePme.C:223
if(ComputeNonbondedUtil::goMethod==2)
#define PME_OFFLOAD_UNGRID_PRIORITY
Definition: Priorities.h:42
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:224
int sourceNode
Definition: ComputePme.C:121
#define PRIORITY_SIZE
Definition: Priorities.h:13
void NAMD_bug(const char *err_msg)
Definition: common.C:129
gridSize y
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
gridSize x
void send_ungrid(PmeGridMsg *)
Definition: ComputePme.C:6244
void recvAck(DataMessage *dmsg)
Definition: DataExchanger.C:99
for(int i=0;i< n1;++i)

The documentation for this class was generated from the following file: