PmeZPencil Class Reference

Inheritance diagram for PmeZPencil:

PmePencil< CBase_PmeZPencil > CBase_PmeZPencil List of all members.

Public Member Functions

PmeZPencil_SDAG_CODE PmeZPencil ()
 PmeZPencil (CkMigrateMessage *)
 ~PmeZPencil ()
void fft_init ()
void recv_grid (const PmeGridMsg *)
void forward_fft ()
void send_trans ()
void send_subset_trans (int fromIdx, int toIdx)
void recv_untrans (const PmeUntransMsg *)
void node_process_untrans (PmeUntransMsg *)
void node_process_grid (PmeGridMsg *)
void backward_fft ()
void send_ungrid (PmeGridMsg *)
void send_all_ungrid ()
void send_subset_ungrid (int fromIdx, int toIdx, int specialIdx)

Detailed Description

Definition at line 4568 of file ComputePme.C.


Constructor & Destructor Documentation

PmeZPencil_SDAG_CODE PmeZPencil::PmeZPencil (  )  [inline]

Definition at line 4571 of file ComputePme.C.

04571 { __sdag_init(); setMigratable(false); }

PmeZPencil::PmeZPencil ( CkMigrateMessage *   )  [inline]

Definition at line 4572 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::imsg, and PmePencil< CBase_PmeZPencil >::imsgb.

04572 { __sdag_init();  setMigratable (false); imsg=imsgb=0;}

PmeZPencil::~PmeZPencil (  )  [inline]

Definition at line 4573 of file ComputePme.C.

04573                       {
04574         #ifdef NAMD_FFTW
04575         #ifdef NAMD_FFTW_3
04576                 delete [] forward_plans;
04577                 delete [] backward_plans;
04578         #endif
04579         #endif
04580         }


Member Function Documentation

void PmeZPencil::backward_fft (  ) 

Definition at line 6274 of file ComputePme.C.

References CKLOOP_CTRL_PME_BACKWARDFFT, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, j, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_untrans().

06274                               {
06275 #ifdef NAMD_FFTW
06276 #ifdef MANUAL_DEBUG_FFTW3
06277   dumpMatrixFloat3("bw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
06278 #endif
06279 #ifdef NAMD_FFTW_3
06280 #if     CMK_SMP && USE_CKLOOP
06281   int useCkLoop = Node::Object()->simParameters->useCkLoop;
06282   if(useCkLoop>=CKLOOP_CTRL_PME_BACKWARDFFT
06283      && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
06284           //for(int i=0; i<numPlans; i++) fftwf_execute(backward_plans[i]);
06285           //transform the above loop
06286           CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)backward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
06287           return;
06288   }
06289 #endif
06290   fftwf_execute(backward_plan);
06291 #else
06292   rfftwnd_complex_to_real(backward_plan, nx*ny,
06293             (fftw_complex *) data, 1, initdata.grid.dim3/2, work, 1, 0);
06294 #endif
06295 #ifdef MANUAL_DEBUG_FFTW3
06296   dumpMatrixFloat3("bw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
06297 #endif
06298 
06299 #endif
06300   
06301 #if CMK_BLUEGENEL
06302   CmiNetworkProgress();
06303 #endif
06304 
06305 #ifdef FFTCHECK
06306   int dim3 = initdata.grid.dim3;
06307   int K1 = initdata.grid.K1;
06308   int K2 = initdata.grid.K2;
06309   int K3 = initdata.grid.K3;
06310   float scale = 1. / (1. * K1 * K2 * K3);
06311   float maxerr = 0.;
06312   float maxstd = 0.;
06313   int mi, mj, mk;  mi = mj = mk = -1;
06314   float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
06315   const float *d = data;
06316   for ( int i=0; i<nx; ++i ) {
06317    for ( int j=0; j<ny; ++j, d += dim3 ) {
06318     for ( int k=0; k<K3; ++k ) {
06319       float std = 10. * (10. * (10. * std_base + i) + j) + k;
06320       float err = scale * d[k] - std;
06321       if ( fabsf(err) > fabsf(maxerr) ) {
06322         maxerr = err;
06323         maxstd = std;
06324         mi = i;  mj = j;  mk = k;
06325       }
06326     }
06327    }
06328   }
06329   CkPrintf("pencil %d %d max error %f at %d %d %d (should be %f)\n",
06330                 thisIndex.x, thisIndex.y, maxerr, mi, mj, mk, maxstd);
06331 #endif
06332 
06333 }

void PmeZPencil::fft_init (  ) 

Definition at line 4767 of file ComputePme.C.

References PmeGrid::block1, PmeGrid::block2, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, fftwf_malloc, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, NAMD_die(), PmePencil< CBase_PmeZPencil >::order_init(), PmePencilInitMsgData::pmeNodeProxy, Node::simParameters, simParams, PmePencil< CBase_PmeZPencil >::work, and PmePencilInitMsgData::zBlocks.

04767                           {
04768   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
04769   Node *node = nd.ckLocalBranch();
04770   SimParameters *simParams = node->simParameters;
04771 
04772 #if USE_NODE_PAR_RECEIVE
04773   ((NodePmeMgr *)CkLocalNodeBranch(initdata.pmeNodeProxy))->registerZPencil(thisIndex,this);
04774 #endif
04775 
04776   int K1 = initdata.grid.K1;
04777   int K2 = initdata.grid.K2;
04778   int K3 = initdata.grid.K3;
04779   int dim3 = initdata.grid.dim3;
04780   int block1 = initdata.grid.block1;
04781   int block2 = initdata.grid.block2;
04782 
04783   nx = block1;
04784   if ( (thisIndex.x + 1) * block1 > K1 ) nx = K1 - thisIndex.x * block1;
04785   ny = block2;
04786   if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
04787 
04788 #ifdef NAMD_FFTW
04789   CmiLock(ComputePmeMgr::fftw_plan_lock);
04790 
04791   data = (float *) fftwf_malloc( sizeof(float) *nx*ny*dim3);
04792   work = new float[dim3];
04793 
04794   order_init(initdata.zBlocks);
04795 
04796 #ifdef NAMD_FFTW_3
04797   /* need array of sizes for the how many */
04798 
04799   int fftwFlags = simParams->FFTWPatient ? FFTW_PATIENT  : simParams->FFTWEstimate ? FFTW_ESTIMATE  : FFTW_MEASURE ;
04800   int sizeLines=nx*ny;
04801   int planLineSizes[1];
04802   planLineSizes[0]=K3;
04803   int ndim=initdata.grid.dim3; // storage space is initdata.grid.dim3
04804   int ndimHalf=ndim/2;
04805   forward_plan = fftwf_plan_many_dft_r2c(1, planLineSizes, sizeLines,
04806                                          (float *) data, NULL, 1, 
04807                                          ndim,
04808                                          (fftwf_complex *) data, NULL, 1,
04809                                          ndimHalf,
04810                                          fftwFlags);
04811 
04812   backward_plan = fftwf_plan_many_dft_c2r(1, planLineSizes, sizeLines,
04813                                           (fftwf_complex *) data, NULL, 1, 
04814                                           ndimHalf,
04815                                           (float *) data, NULL, 1, 
04816                                           ndim,
04817                                           fftwFlags);
04818 #if     CMK_SMP && USE_CKLOOP
04819   if(simParams->useCkLoop) {
04820           //How many FFT plans to be created? The grain-size issue!!.
04821           //Currently, I am choosing the min(nx, ny) to be coarse-grain
04822           numPlans = (nx<=ny?nx:ny);
04823           if ( numPlans < CkMyNodeSize() ) numPlans = (nx>=ny?nx:ny);
04824           if ( numPlans < CkMyNodeSize() ) numPlans = sizeLines;
04825           int howmany = sizeLines/numPlans;
04826           forward_plans = new fftwf_plan[numPlans];
04827           backward_plans = new fftwf_plan[numPlans];
04828           for(int i=0; i<numPlans; i++) {
04829                   int dimStride = i*ndim*howmany;
04830                   int dimHalfStride = i*ndimHalf*howmany;
04831                   forward_plans[i] = fftwf_plan_many_dft_r2c(1, planLineSizes, howmany,
04832                                                                                                          ((float *)data)+dimStride, NULL, 1,
04833                                                                                                          ndim,
04834                                                                                                          ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
04835                                                                                                          ndimHalf,
04836                                                                                                          fftwFlags);
04837 
04838                   backward_plans[i] = fftwf_plan_many_dft_c2r(1, planLineSizes, howmany,
04839                                                                                                          ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
04840                                                                                                          ndimHalf,
04841                                                                                                          ((float *)data)+dimStride, NULL, 1,
04842                                                                                                          ndim,
04843                                                                                                          fftwFlags);
04844           }
04845   }else 
04846 #endif 
04847   {
04848           forward_plans = NULL;
04849           backward_plans = NULL;
04850   }
04851 #else
04852   forward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_REAL_TO_COMPLEX,
04853         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
04854         | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
04855   backward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_COMPLEX_TO_REAL,
04856         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
04857         | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
04858 #endif
04859   CmiUnlock(ComputePmeMgr::fftw_plan_lock);
04860 #else
04861   NAMD_die("Sorry, FFTW must be compiled in to use PME.");
04862 #endif
04863 
04864 #if USE_NODE_PAR_RECEIVE
04865     evir = 0.;
04866     memset(data, 0, sizeof(float) * nx*ny*dim3);
04867 #endif
04868 }

void PmeZPencil::forward_fft (  ) 

Definition at line 5184 of file ComputePme.C.

References CKLOOP_CTRL_PME_FORWARDFFT, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, j, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_grid().

05184                              {
05185   evir = 0.;
05186 #ifdef FFTCHECK
05187   int dim3 = initdata.grid.dim3;
05188   int K3 = initdata.grid.K3;
05189   float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
05190   float *d = data;
05191   for ( int i=0; i<nx; ++i ) {
05192    for ( int j=0; j<ny; ++j, d += dim3 ) {
05193     for ( int k=0; k<dim3; ++k ) {
05194       d[k] = 10. * (10. * (10. * std_base + i) + j) + k;
05195     }
05196    }
05197   }
05198 #endif
05199 #ifdef NAMD_FFTW
05200 #ifdef MANUAL_DEBUG_FFTW3
05201   dumpMatrixFloat3("fw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
05202 #endif
05203 #ifdef NAMD_FFTW_3
05204 #if     CMK_SMP && USE_CKLOOP
05205   int useCkLoop = Node::Object()->simParameters->useCkLoop;
05206   if(useCkLoop>=CKLOOP_CTRL_PME_FORWARDFFT
05207      && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
05208           //for(int i=0; i<numPlans; i++) fftwf_execute(forward_plans[i]);
05209           //transform the above loop
05210           CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)forward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
05211           return;
05212   }
05213 #endif
05214   fftwf_execute(forward_plan);
05215 #else
05216   rfftwnd_real_to_complex(forward_plan, nx*ny,
05217         data, 1, initdata.grid.dim3, (fftw_complex *) work, 1, 0);
05218 #endif
05219 #ifdef MANUAL_DEBUG_FFTW3
05220   dumpMatrixFloat3("fw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
05221 #endif
05222 
05223 #endif
05224 #ifdef ZEROCHECK
05225   int dim3 = initdata.grid.dim3;
05226   int K3 = initdata.grid.K3;
05227   float *d = data;
05228   for ( int i=0; i<nx; ++i ) {
05229    for ( int j=0; j<ny; ++j, d += dim3 ) {
05230     for ( int k=0; k<dim3; ++k ) {
05231       if ( d[k] == 0. ) CkPrintf("0 in Z at %d %d %d %d %d %d %d %d %d\n",
05232         thisIndex.x, thisIndex.y, i, j, k, nx, ny, dim3);
05233     }
05234    }
05235   }
05236 #endif
05237 }

void PmeZPencil::node_process_grid ( PmeGridMsg  ) 

Definition at line 6445 of file ComputePme.C.

References forward_fft(), PmePencil< CBase_PmeZPencil >::hasData, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::imsg, recv_grid(), send_trans(), and ResizeArray< Elem >::size().

Referenced by NodePmeMgr::recvZGrid().

06446 {
06447 #if USE_NODE_PAR_RECEIVE
06448   CmiLock(ComputePmeMgr::fftw_plan_lock);
06449   CmiMemoryReadFence();
06450 #endif
06451   recv_grid(msg);
06452   if(msg->hasData) hasData=msg->hasData;
06453   int limsg;
06454   CmiMemoryAtomicFetchAndInc(imsg,limsg);
06455   grid_msgs[limsg] = msg;
06456   //  CkPrintf("[%d] PmeZPencil node_process_grid for %d %d %d has %d of %d imsg %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z, limsg, grid_msgs.size(), imsg);      
06457   if(limsg+1 == grid_msgs.size())
06458     {
06459 
06460       if (hasData)
06461         {
06462           forward_fft();
06463         }
06464       send_trans();
06465       imsg=0;
06466       CmiMemoryWriteFence();
06467       //      CkPrintf("[%d] PmeZPencil grid node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
06468     }
06469 #if USE_NODE_PAR_RECEIVE
06470   CmiUnlock(ComputePmeMgr::fftw_plan_lock);
06471   CmiMemoryWriteFence();
06472 #endif
06473 }

void PmeZPencil::node_process_untrans ( PmeUntransMsg  ) 

Definition at line 6475 of file ComputePme.C.

References backward_fft(), PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::imsgb, PmePencil< CBase_PmeZPencil >::initdata, recv_untrans(), send_all_ungrid(), and PmePencilInitMsgData::zBlocks.

Referenced by NodePmeMgr::recvZUntrans().

06476 {
06477   recv_untrans(msg);
06478 #if USE_NODE_PAR_RECEIVE
06479   CmiMemoryWriteFence();
06480   CmiLock(ComputePmeMgr::fftw_plan_lock);
06481 #endif    
06482   int limsg;
06483   CmiMemoryAtomicFetchAndInc(imsgb,limsg);
06484   if(limsg+1 == initdata.zBlocks)
06485     {
06486 #if USE_NODE_PAR_RECEIVE
06487       CmiMemoryReadFence();
06488 #endif    
06489       if(hasData) // maybe this should be an assert
06490         {
06491           backward_fft();
06492         }
06493         
06494         send_all_ungrid();
06495     /*  int send_evir = 1;
06496       // TODO: this part should use Chao's output parallelization
06497       for ( limsg=0; limsg < grid_msgs.size(); ++limsg ) {
06498         PmeGridMsg *omsg = grid_msgs[limsg];
06499         if ( omsg->hasData ) {
06500           if ( send_evir ) {
06501             omsg->evir[0] = evir;
06502             send_evir = 0;
06503           } else {
06504             omsg->evir[0] = 0.;
06505           }
06506         }
06507         send_ungrid(omsg);
06508       } */
06509       imsgb=0;
06510       evir = 0;
06511       memset(data, 0, sizeof(float) * nx*ny* initdata.grid.dim3); 
06512       CmiMemoryWriteFence();
06513       //      CkPrintf("[%d] PmeZPencil untrans node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
06514     }
06515 #if USE_NODE_PAR_RECEIVE
06516   CmiUnlock(ComputePmeMgr::fftw_plan_lock);
06517 #endif
06518 }

void PmeZPencil::recv_grid ( const PmeGridMsg  ) 

Definition at line 5133 of file ComputePme.C.

References ResizeArray< Elem >::begin(), PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, j, PmeGridMsg::lattice, PmePencil< CBase_PmeZPencil >::lattice, PmeGridMsg::qgrid, PmeGridMsg::sequence, PmePencil< CBase_PmeZPencil >::sequence, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by node_process_grid().

05133                                                 {
05134 
05135   int dim3 = initdata.grid.dim3;
05136   if ( imsg == 0 ) {
05137     lattice = msg->lattice;
05138     sequence = msg->sequence;
05139 #if ! USE_NODE_PAR_RECEIVE
05140     memset(data, 0, sizeof(float)*nx*ny*dim3);
05141 #endif
05142   }
05143 
05144   if ( ! msg->hasData ) return;
05145 
05146   int zlistlen = msg->zlistlen;
05147 #ifdef NAMD_KNL
05148   int * __restrict msg_zlist = msg->zlist;
05149   int * __restrict zlist = work_zlist.begin();
05150   __assume_aligned(zlist,64);
05151   for ( int k=0; k<zlistlen; ++k ) {
05152     zlist[k] = msg_zlist[k];
05153   }
05154 #else
05155   int * __restrict zlist = msg->zlist;
05156 #endif
05157   char * __restrict fmsg = msg->fgrid;
05158   float * __restrict qmsg = msg->qgrid;
05159   float * __restrict d = data;
05160   int numGrids = 1;  // pencil FFT doesn't support multiple grids
05161   for ( int g=0; g<numGrids; ++g ) {
05162     for ( int i=0; i<nx; ++i ) {
05163      for ( int j=0; j<ny; ++j, d += dim3 ) {
05164       if( *(fmsg++) ) {
05165         #pragma ivdep
05166         for ( int k=0; k<zlistlen; ++k ) {
05167           d[zlist[k]] += *(qmsg++);
05168         }
05169       }
05170      }
05171     }
05172   }
05173 }

void PmeZPencil::recv_untrans ( const PmeUntransMsg  ) 

Definition at line 6246 of file ComputePme.C.

References PmeGrid::block3, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, j, PmeUntransMsg::ny, PmeUntransMsg::qgrid, and PmeUntransMsg::sourceNode.

Referenced by node_process_untrans().

06246                                                       {
06247 #if ! USE_NODE_PAR_RECEIVE
06248     if(imsg==0) evir=0.;
06249 #endif
06250 
06251   int block3 = initdata.grid.block3;
06252   int dim3 = initdata.grid.dim3;
06253   int kb = msg->sourceNode;
06254   int nz = msg->ny;
06255   const float *md = msg->qgrid;
06256   float *d = data;
06257   for ( int i=0; i<nx; ++i ) {
06258 #if CMK_BLUEGENEL
06259     CmiNetworkProgress();
06260 #endif   
06261     for ( int j=0; j<ny; ++j, d += dim3 ) {
06262       for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
06263 #ifdef ZEROCHECK
06264         if ( (*md) == 0. ) CkPrintf("0 in YZ at %d %d %d %d %d %d %d %d %d\n",
06265                                     thisIndex.x, thisIndex.y, kb, i, j, k, nx, ny, nz);
06266 #endif
06267         d[2*k] = *(md++);
06268         d[2*k+1] = *(md++);
06269       }
06270     }
06271   }
06272 }

void PmeZPencil::send_all_ungrid (  ) 

Definition at line 6343 of file ComputePme.C.

References CKLOOP_CTRL_PME_SENDUNTRANS, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, Node::Object(), PmeZPencilSendUngrid(), send_subset_ungrid(), Node::simParameters, ResizeArray< Elem >::size(), SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_untrans().

06343                                  {
06344 /* 
06345 //Original code: the transformation is to first extract the msg 
06346 //idx that will has evir value set. -Chao Mei  
06347         int send_evir = 1;
06348         for (int imsg=0; imsg < grid_msgs.size(); ++imsg ) {
06349                 PmeGridMsg *msg = grid_msgs[imsg];
06350                 if ( msg->hasData ) {
06351                         if ( send_evir ) {
06352                                 msg->evir[0] = evir;
06353                                 send_evir = 0;
06354                         } else {
06355                                 msg->evir[0] = 0.;
06356                         }
06357                 }
06358                 send_ungrid(msg);
06359         }
06360 */
06361         int evirIdx = 0;
06362         for(int imsg=0; imsg<grid_msgs.size(); imsg++) {
06363                 if(grid_msgs[imsg]->hasData) {
06364                         evirIdx = imsg;
06365                         break;
06366                 }
06367         }
06368 
06369 #if     CMK_SMP && USE_CKLOOP
06370         int useCkLoop = Node::Object()->simParameters->useCkLoop;
06371         if(useCkLoop>=CKLOOP_CTRL_PME_SENDUNTRANS
06372            && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
06373                 //????What's the best value for numChunks?????
06374 #if USE_NODE_PAR_RECEIVE        
06375                 //CkLoop_Parallelize(PmeZPencilSendUngrid, evirIdx, (void *)this, CkMyNodeSize(), 0, grid_msgs.size()-1, 1); //has to sync
06376                 CkLoop_Parallelize(PmeZPencilSendUngrid, evirIdx, (void *)this, grid_msgs.size(), 0, grid_msgs.size()-1, 1); //has to sync
06377 #else
06378         //CkLoop_Parallelize(PmeZPencilSendUngrid, evirIdx, (void *)this, CkMyNodeSize(), 0, grid_msgs.size()-1, 0); //not sync
06379                 CkLoop_Parallelize(PmeZPencilSendUngrid, evirIdx, (void *)this, grid_msgs.size(), 0, grid_msgs.size()-1, 0); //not sync
06380 #endif        
06381                 return;
06382         }
06383 #endif
06384         send_subset_ungrid(0, grid_msgs.size()-1, evirIdx);
06385 }

void PmeZPencil::send_subset_trans ( int  fromIdx,
int  toIdx 
)

Definition at line 5245 of file ComputePme.C.

References PmeGrid::block3, PmePencil< CBase_PmeZPencil >::data, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, j, PmePencil< CBase_PmeZPencil >::lattice, PmeTransMsg::lattice, PmeTransMsg::nx, PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeZPencil >::send_order, PmePencil< CBase_PmeZPencil >::sequence, PmeTransMsg::sequence, SET_PRIORITY, PmeTransMsg::sourceNode, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by PmeZPencilSendTrans().

05245                                                         {
05246         int zBlocks = initdata.zBlocks;
05247         int block3 = initdata.grid.block3;
05248         int dim3 = initdata.grid.dim3;
05249         for ( int isend=fromIdx; isend<=toIdx; ++isend ) {
05250           int kb = send_order[isend];
05251           int nz = block3;
05252           if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
05253           int hd = ( hasData ? 1 : 0 );
05254           PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
05255           msg->lattice = lattice;
05256           msg->sourceNode = thisIndex.y;
05257           msg->hasData = hasData;
05258           msg->nx = ny;
05259          if ( hasData ) {
05260           float *md = msg->qgrid;
05261           const float *d = data;
05262           for ( int i=0; i<nx; ++i ) {
05263            for ( int j=0; j<ny; ++j, d += dim3 ) {
05264                 for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
05265                   *(md++) = d[2*k];
05266                   *(md++) = d[2*k+1];
05267                 }
05268            }
05269           }
05270          }
05271           msg->sequence = sequence;
05272           SET_PRIORITY(msg,sequence,PME_TRANS_PRIORITY)
05273 
05274     CmiEnableUrgentSend(1);
05275 #if USE_NODE_PAR_RECEIVE
05276       msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
05277 #if Y_PERSIST 
05278       CmiUsePersistentHandle(&trans_handle[isend], 1);
05279 #endif
05280       initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
05281 #if Y_PERSIST 
05282       CmiUsePersistentHandle(NULL, 0);
05283 #endif    
05284 #else
05285 #if Y_PERSIST 
05286       CmiUsePersistentHandle(&trans_handle[isend], 1);
05287 #endif
05288       initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
05289 #if Y_PERSIST 
05290       CmiUsePersistentHandle(NULL, 0);
05291 #endif    
05292 #endif
05293     CmiEnableUrgentSend(0);
05294     }
05295 }

void PmeZPencil::send_subset_ungrid ( int  fromIdx,
int  toIdx,
int  specialIdx 
)

Definition at line 6387 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::imsg, and send_ungrid().

Referenced by PmeZPencilSendUngrid(), and send_all_ungrid().

06387                                                                          {
06388         for (int imsg=fromIdx; imsg <=toIdx; ++imsg ) {
06389                 PmeGridMsg *msg = grid_msgs[imsg];
06390                 send_ungrid(msg);
06391         }
06392 }

void PmeZPencil::send_trans (  ) 

Definition at line 5297 of file ComputePme.C.

References PmeGrid::block3, CKLOOP_CTRL_PME_SENDTRANS, PmePencil< CBase_PmeZPencil >::data, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, j, PmePencil< CBase_PmeZPencil >::lattice, PmeTransMsg::lattice, PmeTransMsg::nx, Node::Object(), PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmeZPencilSendTrans(), PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeZPencil >::send_order, PmePencil< CBase_PmeZPencil >::sequence, PmeTransMsg::sequence, SET_PRIORITY, Node::simParameters, PmeTransMsg::sourceNode, SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::yBlocks, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_grid().

05297                             {
05298 #if USE_PERSISTENT
05299     if (trans_handle == NULL) setup_persistent();
05300 #endif
05301 #if     CMK_SMP && USE_CKLOOP
05302         int useCkLoop = Node::Object()->simParameters->useCkLoop;
05303         if(useCkLoop>=CKLOOP_CTRL_PME_SENDTRANS
05304            && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
05311                 //send_subset_trans(0, initdata.zBlocks-1);
05312                 CkLoop_Parallelize(PmeZPencilSendTrans, 1, (void *)this, CkMyNodeSize(), 0, initdata.zBlocks-1, 1); //not sync
05313                 return;
05314         }
05315 #endif
05316   int zBlocks = initdata.zBlocks;
05317   int block3 = initdata.grid.block3;
05318   int dim3 = initdata.grid.dim3;
05319   for ( int isend=0; isend<zBlocks; ++isend ) {
05320     int kb = send_order[isend];
05321     int nz = block3;
05322     if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
05323     int hd = ( hasData ? 1 : 0 );
05324     PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
05325     msg->lattice = lattice;
05326     msg->sourceNode = thisIndex.y;
05327     msg->hasData = hasData;
05328     msg->nx = ny;
05329    if ( hasData ) {
05330     float *md = msg->qgrid;
05331     const float *d = data;
05332     for ( int i=0; i<nx; ++i ) {
05333      for ( int j=0; j<ny; ++j, d += dim3 ) {
05334       for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
05335         *(md++) = d[2*k];
05336         *(md++) = d[2*k+1];
05337       }
05338      }
05339     }
05340    }
05341     msg->sequence = sequence;
05342     SET_PRIORITY(msg,sequence,PME_TRANS_PRIORITY)
05343 
05344     CmiEnableUrgentSend(1);
05345 #if USE_NODE_PAR_RECEIVE
05346     msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
05347 #if Y_PERSIST 
05348     CmiUsePersistentHandle(&trans_handle[isend], 1);
05349 #endif
05350     initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
05351 #if Y_PERSIST 
05352     CmiUsePersistentHandle(NULL, 0);
05353 #endif    
05354 #else
05355 #if Y_PERSIST 
05356     CmiUsePersistentHandle(&trans_handle[isend], 1);
05357 #endif
05358     initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
05359 #if Y_PERSIST 
05360     CmiUsePersistentHandle(NULL, 0);
05361 #endif    
05362 #endif
05363     CmiEnableUrgentSend(0);
05364   }
05365 }

void PmeZPencil::send_ungrid ( PmeGridMsg  ) 

Definition at line 6394 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::initdata, j, PmePencil< CBase_PmeZPencil >::offload, PME_OFFLOAD_UNGRID_PRIORITY, PME_UNGRID_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmePencilInitMsgData::pmeProxy, PRIORITY_SIZE, PmeGridMsg::qgrid, PmePencil< CBase_PmeZPencil >::sequence, SET_PRIORITY, PmeGridMsg::sourceNode, PmePencilInitMsgData::yBlocks, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by send_subset_ungrid().

06394                                             {
06395 
06396 #ifdef NAMD_CUDA
06397   const int UNGRID_PRIORITY = ( offload ? PME_OFFLOAD_UNGRID_PRIORITY : PME_UNGRID_PRIORITY );
06398 #else
06399   const int UNGRID_PRIORITY = PME_UNGRID_PRIORITY ;
06400 #endif
06401 
06402   int pe = msg->sourceNode;
06403   if ( ! msg->hasData ) {
06404     delete msg;
06405     PmeAckMsg *ackmsg = new (PRIORITY_SIZE) PmeAckMsg;
06406     SET_PRIORITY(ackmsg,sequence,UNGRID_PRIORITY)
06407     CmiEnableUrgentSend(1);
06408     initdata.pmeProxy[pe].recvAck(ackmsg);
06409     CmiEnableUrgentSend(0);
06410     return;
06411   }
06412   msg->sourceNode = thisIndex.x * initdata.yBlocks + thisIndex.y;
06413   int dim3 = initdata.grid.dim3;
06414   int zlistlen = msg->zlistlen;
06415   int *zlist = msg->zlist;
06416   char *fmsg = msg->fgrid;
06417   float *qmsg = msg->qgrid;
06418   float *d = data;
06419   int numGrids = 1;  // pencil FFT doesn't support multiple grids
06420   for ( int g=0; g<numGrids; ++g ) {
06421 #if CMK_BLUEGENEL
06422     CmiNetworkProgress();
06423 #endif    
06424     for ( int i=0; i<nx; ++i ) {
06425       for ( int j=0; j<ny; ++j, d += dim3 ) {
06426         if( *(fmsg++) ) {
06427           for ( int k=0; k<zlistlen; ++k ) {
06428             *(qmsg++) = d[zlist[k]];
06429           }
06430         }
06431       }
06432     }
06433   }
06434   SET_PRIORITY(msg,sequence,UNGRID_PRIORITY)
06435     CmiEnableUrgentSend(1);
06436 #ifdef NAMD_CUDA
06437     if ( offload ) {
06438       initdata.pmeNodeProxy[CkNodeOf(pe)].recvUngrid(msg);
06439     } else
06440 #endif
06441   initdata.pmeProxy[pe].recvUngrid(msg);
06442     CmiEnableUrgentSend(0);
06443 }


The documentation for this class was generated from the following file:
Generated on Fri Jun 22 01:17:21 2018 for NAMD by  doxygen 1.4.7