PmeZPencil Class Reference

Inheritance diagram for PmeZPencil:

PmePencil< CBase_PmeZPencil > CBase_PmeZPencil List of all members.

Public Member Functions

PmeZPencil_SDAG_CODE PmeZPencil ()
 PmeZPencil (CkMigrateMessage *)
 ~PmeZPencil ()
void fft_init ()
void recv_grid (const PmeGridMsg *)
void forward_fft ()
void send_trans ()
void send_subset_trans (int fromIdx, int toIdx)
void recv_untrans (const PmeUntransMsg *)
void node_process_untrans (PmeUntransMsg *)
void node_process_grid (PmeGridMsg *)
void backward_fft ()
void send_ungrid (PmeGridMsg *)
void send_all_ungrid ()
void send_subset_ungrid (int fromIdx, int toIdx, int specialIdx)

Detailed Description

Definition at line 4606 of file ComputePme.C.


Constructor & Destructor Documentation

PmeZPencil_SDAG_CODE PmeZPencil::PmeZPencil (  )  [inline]

Definition at line 4609 of file ComputePme.C.

04609 { __sdag_init(); setMigratable(false); }

PmeZPencil::PmeZPencil ( CkMigrateMessage *   )  [inline]

Definition at line 4610 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::imsg, and PmePencil< CBase_PmeZPencil >::imsgb.

04610 { __sdag_init();  setMigratable (false); imsg=imsgb=0;}

PmeZPencil::~PmeZPencil (  )  [inline]

Definition at line 4611 of file ComputePme.C.

04611                       {
04612         #ifdef NAMD_FFTW
04613         #ifdef NAMD_FFTW_3
04614                 delete [] forward_plans;
04615                 delete [] backward_plans;
04616         #endif
04617         #endif
04618         }


Member Function Documentation

void PmeZPencil::backward_fft (  ) 

Definition at line 6312 of file ComputePme.C.

References CKLOOP_CTRL_PME_BACKWARDFFT, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, j, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_untrans().

06312                               {
06313 #ifdef NAMD_FFTW
06314 #ifdef MANUAL_DEBUG_FFTW3
06315   dumpMatrixFloat3("bw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
06316 #endif
06317 #ifdef NAMD_FFTW_3
06318 #if     CMK_SMP && USE_CKLOOP
06319   int useCkLoop = Node::Object()->simParameters->useCkLoop;
06320   if(useCkLoop>=CKLOOP_CTRL_PME_BACKWARDFFT
06321      && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
06322           //for(int i=0; i<numPlans; i++) fftwf_execute(backward_plans[i]);
06323           //transform the above loop
06324           CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)backward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
06325           return;
06326   }
06327 #endif
06328   fftwf_execute(backward_plan);
06329 #else
06330   rfftwnd_complex_to_real(backward_plan, nx*ny,
06331             (fftw_complex *) data, 1, initdata.grid.dim3/2, work, 1, 0);
06332 #endif
06333 #ifdef MANUAL_DEBUG_FFTW3
06334   dumpMatrixFloat3("bw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
06335 #endif
06336 
06337 #endif
06338   
06339 #if CMK_BLUEGENEL
06340   CmiNetworkProgress();
06341 #endif
06342 
06343 #ifdef FFTCHECK
06344   int dim3 = initdata.grid.dim3;
06345   int K1 = initdata.grid.K1;
06346   int K2 = initdata.grid.K2;
06347   int K3 = initdata.grid.K3;
06348   float scale = 1. / (1. * K1 * K2 * K3);
06349   float maxerr = 0.;
06350   float maxstd = 0.;
06351   int mi, mj, mk;  mi = mj = mk = -1;
06352   float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
06353   const float *d = data;
06354   for ( int i=0; i<nx; ++i ) {
06355    for ( int j=0; j<ny; ++j, d += dim3 ) {
06356     for ( int k=0; k<K3; ++k ) {
06357       float std = 10. * (10. * (10. * std_base + i) + j) + k;
06358       float err = scale * d[k] - std;
06359       if ( fabsf(err) > fabsf(maxerr) ) {
06360         maxerr = err;
06361         maxstd = std;
06362         mi = i;  mj = j;  mk = k;
06363       }
06364     }
06365    }
06366   }
06367   CkPrintf("pencil %d %d max error %f at %d %d %d (should be %f)\n",
06368                 thisIndex.x, thisIndex.y, maxerr, mi, mj, mk, maxstd);
06369 #endif
06370 
06371 }

void PmeZPencil::fft_init (  ) 

Definition at line 4805 of file ComputePme.C.

References PmeGrid::block1, PmeGrid::block2, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, fftwf_malloc, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, NAMD_die(), PmePencil< CBase_PmeZPencil >::order_init(), PmePencilInitMsgData::pmeNodeProxy, Node::simParameters, simParams, PmePencil< CBase_PmeZPencil >::work, and PmePencilInitMsgData::zBlocks.

04805                           {
04806   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
04807   Node *node = nd.ckLocalBranch();
04808   SimParameters *simParams = node->simParameters;
04809 
04810 #if USE_NODE_PAR_RECEIVE
04811   ((NodePmeMgr *)CkLocalNodeBranch(initdata.pmeNodeProxy))->registerZPencil(thisIndex,this);
04812 #endif
04813 
04814   int K1 = initdata.grid.K1;
04815   int K2 = initdata.grid.K2;
04816   int K3 = initdata.grid.K3;
04817   int dim3 = initdata.grid.dim3;
04818   int block1 = initdata.grid.block1;
04819   int block2 = initdata.grid.block2;
04820 
04821   nx = block1;
04822   if ( (thisIndex.x + 1) * block1 > K1 ) nx = K1 - thisIndex.x * block1;
04823   ny = block2;
04824   if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
04825 
04826 #ifdef NAMD_FFTW
04827   CmiLock(ComputePmeMgr::fftw_plan_lock);
04828 
04829   data = (float *) fftwf_malloc( sizeof(float) *nx*ny*dim3);
04830   work = new float[dim3];
04831 
04832   order_init(initdata.zBlocks);
04833 
04834 #ifdef NAMD_FFTW_3
04835   /* need array of sizes for the how many */
04836 
04837   int fftwFlags = simParams->FFTWPatient ? FFTW_PATIENT  : simParams->FFTWEstimate ? FFTW_ESTIMATE  : FFTW_MEASURE ;
04838   int sizeLines=nx*ny;
04839   int planLineSizes[1];
04840   planLineSizes[0]=K3;
04841   int ndim=initdata.grid.dim3; // storage space is initdata.grid.dim3
04842   int ndimHalf=ndim/2;
04843   forward_plan = fftwf_plan_many_dft_r2c(1, planLineSizes, sizeLines,
04844                                          (float *) data, NULL, 1, 
04845                                          ndim,
04846                                          (fftwf_complex *) data, NULL, 1,
04847                                          ndimHalf,
04848                                          fftwFlags);
04849 
04850   backward_plan = fftwf_plan_many_dft_c2r(1, planLineSizes, sizeLines,
04851                                           (fftwf_complex *) data, NULL, 1, 
04852                                           ndimHalf,
04853                                           (float *) data, NULL, 1, 
04854                                           ndim,
04855                                           fftwFlags);
04856 #if     CMK_SMP && USE_CKLOOP
04857   if(simParams->useCkLoop) {
04858           //How many FFT plans to be created? The grain-size issue!!.
04859           //Currently, I am choosing the min(nx, ny) to be coarse-grain
04860           numPlans = (nx<=ny?nx:ny);
04861           if ( numPlans < CkMyNodeSize() ) numPlans = (nx>=ny?nx:ny);
04862           if ( numPlans < CkMyNodeSize() ) numPlans = sizeLines;
04863           int howmany = sizeLines/numPlans;
04864           forward_plans = new fftwf_plan[numPlans];
04865           backward_plans = new fftwf_plan[numPlans];
04866           for(int i=0; i<numPlans; i++) {
04867                   int dimStride = i*ndim*howmany;
04868                   int dimHalfStride = i*ndimHalf*howmany;
04869                   forward_plans[i] = fftwf_plan_many_dft_r2c(1, planLineSizes, howmany,
04870                                                                                                          ((float *)data)+dimStride, NULL, 1,
04871                                                                                                          ndim,
04872                                                                                                          ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
04873                                                                                                          ndimHalf,
04874                                                                                                          fftwFlags);
04875 
04876                   backward_plans[i] = fftwf_plan_many_dft_c2r(1, planLineSizes, howmany,
04877                                                                                                          ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
04878                                                                                                          ndimHalf,
04879                                                                                                          ((float *)data)+dimStride, NULL, 1,
04880                                                                                                          ndim,
04881                                                                                                          fftwFlags);
04882           }
04883   }else 
04884 #endif 
04885   {
04886           forward_plans = NULL;
04887           backward_plans = NULL;
04888   }
04889 #else
04890   forward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_REAL_TO_COMPLEX,
04891         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
04892         | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
04893   backward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_COMPLEX_TO_REAL,
04894         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
04895         | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
04896 #endif
04897   CmiUnlock(ComputePmeMgr::fftw_plan_lock);
04898 #else
04899   NAMD_die("Sorry, FFTW must be compiled in to use PME.");
04900 #endif
04901 
04902 #if USE_NODE_PAR_RECEIVE
04903     evir = 0.;
04904     memset(data, 0, sizeof(float) * nx*ny*dim3);
04905 #endif
04906 }

void PmeZPencil::forward_fft (  ) 

Definition at line 5222 of file ComputePme.C.

References CKLOOP_CTRL_PME_FORWARDFFT, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, j, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_grid().

05222                              {
05223   evir = 0.;
05224 #ifdef FFTCHECK
05225   int dim3 = initdata.grid.dim3;
05226   int K3 = initdata.grid.K3;
05227   float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
05228   float *d = data;
05229   for ( int i=0; i<nx; ++i ) {
05230    for ( int j=0; j<ny; ++j, d += dim3 ) {
05231     for ( int k=0; k<dim3; ++k ) {
05232       d[k] = 10. * (10. * (10. * std_base + i) + j) + k;
05233     }
05234    }
05235   }
05236 #endif
05237 #ifdef NAMD_FFTW
05238 #ifdef MANUAL_DEBUG_FFTW3
05239   dumpMatrixFloat3("fw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
05240 #endif
05241 #ifdef NAMD_FFTW_3
05242 #if     CMK_SMP && USE_CKLOOP
05243   int useCkLoop = Node::Object()->simParameters->useCkLoop;
05244   if(useCkLoop>=CKLOOP_CTRL_PME_FORWARDFFT
05245      && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
05246           //for(int i=0; i<numPlans; i++) fftwf_execute(forward_plans[i]);
05247           //transform the above loop
05248           CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)forward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
05249           return;
05250   }
05251 #endif
05252   fftwf_execute(forward_plan);
05253 #else
05254   rfftwnd_real_to_complex(forward_plan, nx*ny,
05255         data, 1, initdata.grid.dim3, (fftw_complex *) work, 1, 0);
05256 #endif
05257 #ifdef MANUAL_DEBUG_FFTW3
05258   dumpMatrixFloat3("fw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
05259 #endif
05260 
05261 #endif
05262 #ifdef ZEROCHECK
05263   int dim3 = initdata.grid.dim3;
05264   int K3 = initdata.grid.K3;
05265   float *d = data;
05266   for ( int i=0; i<nx; ++i ) {
05267    for ( int j=0; j<ny; ++j, d += dim3 ) {
05268     for ( int k=0; k<dim3; ++k ) {
05269       if ( d[k] == 0. ) CkPrintf("0 in Z at %d %d %d %d %d %d %d %d %d\n",
05270         thisIndex.x, thisIndex.y, i, j, k, nx, ny, dim3);
05271     }
05272    }
05273   }
05274 #endif
05275 }

void PmeZPencil::node_process_grid ( PmeGridMsg  ) 

Definition at line 6483 of file ComputePme.C.

References forward_fft(), PmePencil< CBase_PmeZPencil >::hasData, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::imsg, recv_grid(), send_trans(), and ResizeArray< Elem >::size().

Referenced by NodePmeMgr::recvZGrid().

06484 {
06485 #if USE_NODE_PAR_RECEIVE
06486   CmiLock(ComputePmeMgr::fftw_plan_lock);
06487   CmiMemoryReadFence();
06488 #endif
06489   recv_grid(msg);
06490   if(msg->hasData) hasData=msg->hasData;
06491   int limsg;
06492   CmiMemoryAtomicFetchAndInc(imsg,limsg);
06493   grid_msgs[limsg] = msg;
06494   //  CkPrintf("[%d] PmeZPencil node_process_grid for %d %d %d has %d of %d imsg %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z, limsg, grid_msgs.size(), imsg);      
06495   if(limsg+1 == grid_msgs.size())
06496     {
06497 
06498       if (hasData)
06499         {
06500           forward_fft();
06501         }
06502       send_trans();
06503       imsg=0;
06504       CmiMemoryWriteFence();
06505       //      CkPrintf("[%d] PmeZPencil grid node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
06506     }
06507 #if USE_NODE_PAR_RECEIVE
06508   CmiUnlock(ComputePmeMgr::fftw_plan_lock);
06509   CmiMemoryWriteFence();
06510 #endif
06511 }

void PmeZPencil::node_process_untrans ( PmeUntransMsg  ) 

Definition at line 6513 of file ComputePme.C.

References backward_fft(), PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::imsgb, PmePencil< CBase_PmeZPencil >::initdata, recv_untrans(), send_all_ungrid(), and PmePencilInitMsgData::zBlocks.

Referenced by NodePmeMgr::recvZUntrans().

06514 {
06515   recv_untrans(msg);
06516 #if USE_NODE_PAR_RECEIVE
06517   CmiMemoryWriteFence();
06518   CmiLock(ComputePmeMgr::fftw_plan_lock);
06519 #endif    
06520   int limsg;
06521   CmiMemoryAtomicFetchAndInc(imsgb,limsg);
06522   if(limsg+1 == initdata.zBlocks)
06523     {
06524 #if USE_NODE_PAR_RECEIVE
06525       CmiMemoryReadFence();
06526 #endif    
06527       if(hasData) // maybe this should be an assert
06528         {
06529           backward_fft();
06530         }
06531         
06532         send_all_ungrid();
06533     /*  int send_evir = 1;
06534       // TODO: this part should use Chao's output parallelization
06535       for ( limsg=0; limsg < grid_msgs.size(); ++limsg ) {
06536         PmeGridMsg *omsg = grid_msgs[limsg];
06537         if ( omsg->hasData ) {
06538           if ( send_evir ) {
06539             omsg->evir[0] = evir;
06540             send_evir = 0;
06541           } else {
06542             omsg->evir[0] = 0.;
06543           }
06544         }
06545         send_ungrid(omsg);
06546       } */
06547       imsgb=0;
06548       evir = 0;
06549       memset(data, 0, sizeof(float) * nx*ny* initdata.grid.dim3); 
06550       CmiMemoryWriteFence();
06551       //      CkPrintf("[%d] PmeZPencil untrans node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
06552     }
06553 #if USE_NODE_PAR_RECEIVE
06554   CmiUnlock(ComputePmeMgr::fftw_plan_lock);
06555 #endif
06556 }

void PmeZPencil::recv_grid ( const PmeGridMsg  ) 

Definition at line 5171 of file ComputePme.C.

References ResizeArray< Elem >::begin(), PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, j, PmeGridMsg::lattice, PmePencil< CBase_PmeZPencil >::lattice, PmeGridMsg::qgrid, PmeGridMsg::sequence, PmePencil< CBase_PmeZPencil >::sequence, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by node_process_grid().

05171                                                 {
05172 
05173   int dim3 = initdata.grid.dim3;
05174   if ( imsg == 0 ) {
05175     lattice = msg->lattice;
05176     sequence = msg->sequence;
05177 #if ! USE_NODE_PAR_RECEIVE
05178     memset(data, 0, sizeof(float)*nx*ny*dim3);
05179 #endif
05180   }
05181 
05182   if ( ! msg->hasData ) return;
05183 
05184   int zlistlen = msg->zlistlen;
05185 #ifdef NAMD_KNL
05186   int * __restrict msg_zlist = msg->zlist;
05187   int * __restrict zlist = work_zlist.begin();
05188   __assume_aligned(zlist,64);
05189   for ( int k=0; k<zlistlen; ++k ) {
05190     zlist[k] = msg_zlist[k];
05191   }
05192 #else
05193   int * __restrict zlist = msg->zlist;
05194 #endif
05195   char * __restrict fmsg = msg->fgrid;
05196   float * __restrict qmsg = msg->qgrid;
05197   float * __restrict d = data;
05198   int numGrids = 1;  // pencil FFT doesn't support multiple grids
05199   for ( int g=0; g<numGrids; ++g ) {
05200     for ( int i=0; i<nx; ++i ) {
05201      for ( int j=0; j<ny; ++j, d += dim3 ) {
05202       if( *(fmsg++) ) {
05203         #pragma ivdep
05204         for ( int k=0; k<zlistlen; ++k ) {
05205           d[zlist[k]] += *(qmsg++);
05206         }
05207       }
05208      }
05209     }
05210   }
05211 }

void PmeZPencil::recv_untrans ( const PmeUntransMsg  ) 

Definition at line 6284 of file ComputePme.C.

References PmeGrid::block3, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, j, PmeUntransMsg::ny, PmeUntransMsg::qgrid, and PmeUntransMsg::sourceNode.

Referenced by node_process_untrans().

06284                                                       {
06285 #if ! USE_NODE_PAR_RECEIVE
06286     if(imsg==0) evir=0.;
06287 #endif
06288 
06289   int block3 = initdata.grid.block3;
06290   int dim3 = initdata.grid.dim3;
06291   int kb = msg->sourceNode;
06292   int nz = msg->ny;
06293   const float *md = msg->qgrid;
06294   float *d = data;
06295   for ( int i=0; i<nx; ++i ) {
06296 #if CMK_BLUEGENEL
06297     CmiNetworkProgress();
06298 #endif   
06299     for ( int j=0; j<ny; ++j, d += dim3 ) {
06300       for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
06301 #ifdef ZEROCHECK
06302         if ( (*md) == 0. ) CkPrintf("0 in YZ at %d %d %d %d %d %d %d %d %d\n",
06303                                     thisIndex.x, thisIndex.y, kb, i, j, k, nx, ny, nz);
06304 #endif
06305         d[2*k] = *(md++);
06306         d[2*k+1] = *(md++);
06307       }
06308     }
06309   }
06310 }

void PmeZPencil::send_all_ungrid (  ) 

Definition at line 6381 of file ComputePme.C.

References CKLOOP_CTRL_PME_SENDUNTRANS, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, Node::Object(), PmeZPencilSendUngrid(), send_subset_ungrid(), Node::simParameters, ResizeArray< Elem >::size(), SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_untrans().

06381                                  {
06382 /* 
06383 //Original code: the transformation is to first extract the msg 
06384 //idx that will has evir value set. -Chao Mei  
06385         int send_evir = 1;
06386         for (int imsg=0; imsg < grid_msgs.size(); ++imsg ) {
06387                 PmeGridMsg *msg = grid_msgs[imsg];
06388                 if ( msg->hasData ) {
06389                         if ( send_evir ) {
06390                                 msg->evir[0] = evir;
06391                                 send_evir = 0;
06392                         } else {
06393                                 msg->evir[0] = 0.;
06394                         }
06395                 }
06396                 send_ungrid(msg);
06397         }
06398 */
06399         int evirIdx = 0;
06400         for(int imsg=0; imsg<grid_msgs.size(); imsg++) {
06401                 if(grid_msgs[imsg]->hasData) {
06402                         evirIdx = imsg;
06403                         break;
06404                 }
06405         }
06406 
06407 #if     CMK_SMP && USE_CKLOOP
06408         int useCkLoop = Node::Object()->simParameters->useCkLoop;
06409         if(useCkLoop>=CKLOOP_CTRL_PME_SENDUNTRANS
06410            && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
06411                 //????What's the best value for numChunks?????
06412 #if USE_NODE_PAR_RECEIVE        
06413                 //CkLoop_Parallelize(PmeZPencilSendUngrid, evirIdx, (void *)this, CkMyNodeSize(), 0, grid_msgs.size()-1, 1); //has to sync
06414                 CkLoop_Parallelize(PmeZPencilSendUngrid, evirIdx, (void *)this, grid_msgs.size(), 0, grid_msgs.size()-1, 1); //has to sync
06415 #else
06416         //CkLoop_Parallelize(PmeZPencilSendUngrid, evirIdx, (void *)this, CkMyNodeSize(), 0, grid_msgs.size()-1, 0); //not sync
06417                 CkLoop_Parallelize(PmeZPencilSendUngrid, evirIdx, (void *)this, grid_msgs.size(), 0, grid_msgs.size()-1, 0); //not sync
06418 #endif        
06419                 return;
06420         }
06421 #endif
06422         send_subset_ungrid(0, grid_msgs.size()-1, evirIdx);
06423 }

void PmeZPencil::send_subset_trans ( int  fromIdx,
int  toIdx 
)

Definition at line 5283 of file ComputePme.C.

References PmeGrid::block3, PmePencil< CBase_PmeZPencil >::data, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, j, PmePencil< CBase_PmeZPencil >::lattice, PmeTransMsg::lattice, PmeTransMsg::nx, PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeZPencil >::send_order, PmePencil< CBase_PmeZPencil >::sequence, PmeTransMsg::sequence, SET_PRIORITY, PmeTransMsg::sourceNode, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by PmeZPencilSendTrans().

05283                                                         {
05284         int zBlocks = initdata.zBlocks;
05285         int block3 = initdata.grid.block3;
05286         int dim3 = initdata.grid.dim3;
05287         for ( int isend=fromIdx; isend<=toIdx; ++isend ) {
05288           int kb = send_order[isend];
05289           int nz = block3;
05290           if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
05291           int hd = ( hasData ? 1 : 0 );
05292           PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
05293           msg->lattice = lattice;
05294           msg->sourceNode = thisIndex.y;
05295           msg->hasData = hasData;
05296           msg->nx = ny;
05297          if ( hasData ) {
05298           float *md = msg->qgrid;
05299           const float *d = data;
05300           for ( int i=0; i<nx; ++i ) {
05301            for ( int j=0; j<ny; ++j, d += dim3 ) {
05302                 for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
05303                   *(md++) = d[2*k];
05304                   *(md++) = d[2*k+1];
05305                 }
05306            }
05307           }
05308          }
05309           msg->sequence = sequence;
05310           SET_PRIORITY(msg,sequence,PME_TRANS_PRIORITY)
05311 
05312     CmiEnableUrgentSend(1);
05313 #if USE_NODE_PAR_RECEIVE
05314       msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
05315 #if Y_PERSIST 
05316       CmiUsePersistentHandle(&trans_handle[isend], 1);
05317 #endif
05318       initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
05319 #if Y_PERSIST 
05320       CmiUsePersistentHandle(NULL, 0);
05321 #endif    
05322 #else
05323 #if Y_PERSIST 
05324       CmiUsePersistentHandle(&trans_handle[isend], 1);
05325 #endif
05326       initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
05327 #if Y_PERSIST 
05328       CmiUsePersistentHandle(NULL, 0);
05329 #endif    
05330 #endif
05331     CmiEnableUrgentSend(0);
05332     }
05333 }

void PmeZPencil::send_subset_ungrid ( int  fromIdx,
int  toIdx,
int  specialIdx 
)

Definition at line 6425 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::imsg, and send_ungrid().

Referenced by PmeZPencilSendUngrid(), and send_all_ungrid().

06425                                                                          {
06426         for (int imsg=fromIdx; imsg <=toIdx; ++imsg ) {
06427                 PmeGridMsg *msg = grid_msgs[imsg];
06428                 send_ungrid(msg);
06429         }
06430 }

void PmeZPencil::send_trans (  ) 

Definition at line 5335 of file ComputePme.C.

References PmeGrid::block3, CKLOOP_CTRL_PME_SENDTRANS, PmePencil< CBase_PmeZPencil >::data, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, j, PmePencil< CBase_PmeZPencil >::lattice, PmeTransMsg::lattice, PmeTransMsg::nx, Node::Object(), PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmeZPencilSendTrans(), PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeZPencil >::send_order, PmePencil< CBase_PmeZPencil >::sequence, PmeTransMsg::sequence, SET_PRIORITY, Node::simParameters, PmeTransMsg::sourceNode, SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::yBlocks, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_grid().

05335                             {
05336 #if USE_PERSISTENT
05337     if (trans_handle == NULL) setup_persistent();
05338 #endif
05339 #if     CMK_SMP && USE_CKLOOP
05340         int useCkLoop = Node::Object()->simParameters->useCkLoop;
05341         if(useCkLoop>=CKLOOP_CTRL_PME_SENDTRANS
05342            && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
05349                 //send_subset_trans(0, initdata.zBlocks-1);
05350                 CkLoop_Parallelize(PmeZPencilSendTrans, 1, (void *)this, CkMyNodeSize(), 0, initdata.zBlocks-1, 1); //not sync
05351                 return;
05352         }
05353 #endif
05354   int zBlocks = initdata.zBlocks;
05355   int block3 = initdata.grid.block3;
05356   int dim3 = initdata.grid.dim3;
05357   for ( int isend=0; isend<zBlocks; ++isend ) {
05358     int kb = send_order[isend];
05359     int nz = block3;
05360     if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
05361     int hd = ( hasData ? 1 : 0 );
05362     PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
05363     msg->lattice = lattice;
05364     msg->sourceNode = thisIndex.y;
05365     msg->hasData = hasData;
05366     msg->nx = ny;
05367    if ( hasData ) {
05368     float *md = msg->qgrid;
05369     const float *d = data;
05370     for ( int i=0; i<nx; ++i ) {
05371      for ( int j=0; j<ny; ++j, d += dim3 ) {
05372       for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
05373         *(md++) = d[2*k];
05374         *(md++) = d[2*k+1];
05375       }
05376      }
05377     }
05378    }
05379     msg->sequence = sequence;
05380     SET_PRIORITY(msg,sequence,PME_TRANS_PRIORITY)
05381 
05382     CmiEnableUrgentSend(1);
05383 #if USE_NODE_PAR_RECEIVE
05384     msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
05385 #if Y_PERSIST 
05386     CmiUsePersistentHandle(&trans_handle[isend], 1);
05387 #endif
05388     initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
05389 #if Y_PERSIST 
05390     CmiUsePersistentHandle(NULL, 0);
05391 #endif    
05392 #else
05393 #if Y_PERSIST 
05394     CmiUsePersistentHandle(&trans_handle[isend], 1);
05395 #endif
05396     initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
05397 #if Y_PERSIST 
05398     CmiUsePersistentHandle(NULL, 0);
05399 #endif    
05400 #endif
05401     CmiEnableUrgentSend(0);
05402   }
05403 }

void PmeZPencil::send_ungrid ( PmeGridMsg  ) 

Definition at line 6432 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::initdata, j, PmePencil< CBase_PmeZPencil >::offload, PME_OFFLOAD_UNGRID_PRIORITY, PME_UNGRID_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmePencilInitMsgData::pmeProxy, PRIORITY_SIZE, PmeGridMsg::qgrid, PmePencil< CBase_PmeZPencil >::sequence, SET_PRIORITY, PmeGridMsg::sourceNode, PmePencilInitMsgData::yBlocks, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by send_subset_ungrid().

06432                                             {
06433 
06434 #ifdef NAMD_CUDA
06435   const int UNGRID_PRIORITY = ( offload ? PME_OFFLOAD_UNGRID_PRIORITY : PME_UNGRID_PRIORITY );
06436 #else
06437   const int UNGRID_PRIORITY = PME_UNGRID_PRIORITY ;
06438 #endif
06439 
06440   int pe = msg->sourceNode;
06441   if ( ! msg->hasData ) {
06442     delete msg;
06443     PmeAckMsg *ackmsg = new (PRIORITY_SIZE) PmeAckMsg;
06444     SET_PRIORITY(ackmsg,sequence,UNGRID_PRIORITY)
06445     CmiEnableUrgentSend(1);
06446     initdata.pmeProxy[pe].recvAck(ackmsg);
06447     CmiEnableUrgentSend(0);
06448     return;
06449   }
06450   msg->sourceNode = thisIndex.x * initdata.yBlocks + thisIndex.y;
06451   int dim3 = initdata.grid.dim3;
06452   int zlistlen = msg->zlistlen;
06453   int *zlist = msg->zlist;
06454   char *fmsg = msg->fgrid;
06455   float *qmsg = msg->qgrid;
06456   float *d = data;
06457   int numGrids = 1;  // pencil FFT doesn't support multiple grids
06458   for ( int g=0; g<numGrids; ++g ) {
06459 #if CMK_BLUEGENEL
06460     CmiNetworkProgress();
06461 #endif    
06462     for ( int i=0; i<nx; ++i ) {
06463       for ( int j=0; j<ny; ++j, d += dim3 ) {
06464         if( *(fmsg++) ) {
06465           for ( int k=0; k<zlistlen; ++k ) {
06466             *(qmsg++) = d[zlist[k]];
06467           }
06468         }
06469       }
06470     }
06471   }
06472   SET_PRIORITY(msg,sequence,UNGRID_PRIORITY)
06473     CmiEnableUrgentSend(1);
06474 #ifdef NAMD_CUDA
06475     if ( offload ) {
06476       initdata.pmeNodeProxy[CkNodeOf(pe)].recvUngrid(msg);
06477     } else
06478 #endif
06479   initdata.pmeProxy[pe].recvUngrid(msg);
06480     CmiEnableUrgentSend(0);
06481 }


The documentation for this class was generated from the following file:
Generated on Thu Nov 23 01:17:20 2017 for NAMD by  doxygen 1.4.7