PmeZPencil Class Reference

Inheritance diagram for PmeZPencil:

PmePencil< CBase_PmeZPencil > CBase_PmeZPencil List of all members.

Public Member Functions

PmeZPencil_SDAG_CODE PmeZPencil ()
 PmeZPencil (CkMigrateMessage *)
 ~PmeZPencil ()
void fft_init ()
void recv_grid (const PmeGridMsg *)
void forward_fft ()
void send_trans ()
void send_subset_trans (int fromIdx, int toIdx)
void recv_untrans (const PmeUntransMsg *)
void node_process_untrans (PmeUntransMsg *)
void node_process_grid (PmeGridMsg *)
void backward_fft ()
void send_ungrid (PmeGridMsg *)
void send_all_ungrid ()
void send_subset_ungrid (int fromIdx, int toIdx, int specialIdx)

Detailed Description

Definition at line 4599 of file ComputePme.C.


Constructor & Destructor Documentation

PmeZPencil_SDAG_CODE PmeZPencil::PmeZPencil (  )  [inline]

Definition at line 4602 of file ComputePme.C.

04602 { __sdag_init(); setMigratable(false); }

PmeZPencil::PmeZPencil ( CkMigrateMessage *   )  [inline]

Definition at line 4603 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::imsg, and PmePencil< CBase_PmeZPencil >::imsgb.

04603 { __sdag_init();  setMigratable (false); imsg=imsgb=0;}

PmeZPencil::~PmeZPencil (  )  [inline]

Definition at line 4604 of file ComputePme.C.

04604                       {
04605         #ifdef NAMD_FFTW
04606         #ifdef NAMD_FFTW_3
04607                 delete [] forward_plans;
04608                 delete [] backward_plans;
04609         #endif
04610         #endif
04611         }


Member Function Documentation

void PmeZPencil::backward_fft (  ) 

Definition at line 6305 of file ComputePme.C.

References CKLOOP_CTRL_PME_BACKWARDFFT, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, j, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_untrans().

06305                               {
06306 #ifdef NAMD_FFTW
06307 #ifdef MANUAL_DEBUG_FFTW3
06308   dumpMatrixFloat3("bw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
06309 #endif
06310 #ifdef NAMD_FFTW_3
06311 #if     CMK_SMP && USE_CKLOOP
06312   int useCkLoop = Node::Object()->simParameters->useCkLoop;
06313   if(useCkLoop>=CKLOOP_CTRL_PME_BACKWARDFFT
06314      && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
06315           //for(int i=0; i<numPlans; i++) fftwf_execute(backward_plans[i]);
06316           //transform the above loop
06317           CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)backward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
06318           return;
06319   }
06320 #endif
06321   fftwf_execute(backward_plan);
06322 #else
06323   rfftwnd_complex_to_real(backward_plan, nx*ny,
06324             (fftw_complex *) data, 1, initdata.grid.dim3/2, work, 1, 0);
06325 #endif
06326 #ifdef MANUAL_DEBUG_FFTW3
06327   dumpMatrixFloat3("bw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
06328 #endif
06329 
06330 #endif
06331   
06332 #if CMK_BLUEGENEL
06333   CmiNetworkProgress();
06334 #endif
06335 
06336 #ifdef FFTCHECK
06337   int dim3 = initdata.grid.dim3;
06338   int K1 = initdata.grid.K1;
06339   int K2 = initdata.grid.K2;
06340   int K3 = initdata.grid.K3;
06341   float scale = 1. / (1. * K1 * K2 * K3);
06342   float maxerr = 0.;
06343   float maxstd = 0.;
06344   int mi, mj, mk;  mi = mj = mk = -1;
06345   float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
06346   const float *d = data;
06347   for ( int i=0; i<nx; ++i ) {
06348    for ( int j=0; j<ny; ++j, d += dim3 ) {
06349     for ( int k=0; k<K3; ++k ) {
06350       float std = 10. * (10. * (10. * std_base + i) + j) + k;
06351       float err = scale * d[k] - std;
06352       if ( fabsf(err) > fabsf(maxerr) ) {
06353         maxerr = err;
06354         maxstd = std;
06355         mi = i;  mj = j;  mk = k;
06356       }
06357     }
06358    }
06359   }
06360   CkPrintf("pencil %d %d max error %f at %d %d %d (should be %f)\n",
06361                 thisIndex.x, thisIndex.y, maxerr, mi, mj, mk, maxstd);
06362 #endif
06363 
06364 }

void PmeZPencil::fft_init (  ) 

Definition at line 4798 of file ComputePme.C.

References PmeGrid::block1, PmeGrid::block2, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, fftwf_malloc, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, NAMD_die(), PmePencil< CBase_PmeZPencil >::order_init(), PmePencilInitMsgData::pmeNodeProxy, Node::simParameters, simParams, PmePencil< CBase_PmeZPencil >::work, and PmePencilInitMsgData::zBlocks.

04798                           {
04799   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
04800   Node *node = nd.ckLocalBranch();
04801   SimParameters *simParams = node->simParameters;
04802 
04803 #if USE_NODE_PAR_RECEIVE
04804   ((NodePmeMgr *)CkLocalNodeBranch(initdata.pmeNodeProxy))->registerZPencil(thisIndex,this);
04805 #endif
04806 
04807   int K1 = initdata.grid.K1;
04808   int K2 = initdata.grid.K2;
04809   int K3 = initdata.grid.K3;
04810   int dim3 = initdata.grid.dim3;
04811   int block1 = initdata.grid.block1;
04812   int block2 = initdata.grid.block2;
04813 
04814   nx = block1;
04815   if ( (thisIndex.x + 1) * block1 > K1 ) nx = K1 - thisIndex.x * block1;
04816   ny = block2;
04817   if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
04818 
04819 #ifdef NAMD_FFTW
04820   CmiLock(ComputePmeMgr::fftw_plan_lock);
04821 
04822   data = (float *) fftwf_malloc( sizeof(float) *nx*ny*dim3);
04823   work = new float[dim3];
04824 
04825   order_init(initdata.zBlocks);
04826 
04827 #ifdef NAMD_FFTW_3
04828   /* need array of sizes for the how many */
04829 
04830   int fftwFlags = simParams->FFTWPatient ? FFTW_PATIENT  : simParams->FFTWEstimate ? FFTW_ESTIMATE  : FFTW_MEASURE ;
04831   int sizeLines=nx*ny;
04832   int planLineSizes[1];
04833   planLineSizes[0]=K3;
04834   int ndim=initdata.grid.dim3; // storage space is initdata.grid.dim3
04835   int ndimHalf=ndim/2;
04836   forward_plan = fftwf_plan_many_dft_r2c(1, planLineSizes, sizeLines,
04837                                          (float *) data, NULL, 1, 
04838                                          ndim,
04839                                          (fftwf_complex *) data, NULL, 1,
04840                                          ndimHalf,
04841                                          fftwFlags);
04842 
04843   backward_plan = fftwf_plan_many_dft_c2r(1, planLineSizes, sizeLines,
04844                                           (fftwf_complex *) data, NULL, 1, 
04845                                           ndimHalf,
04846                                           (float *) data, NULL, 1, 
04847                                           ndim,
04848                                           fftwFlags);
04849 #if     CMK_SMP && USE_CKLOOP
04850   if(simParams->useCkLoop) {
04851           //How many FFT plans to be created? The grain-size issue!!.
04852           //Currently, I am choosing the min(nx, ny) to be coarse-grain
04853           numPlans = (nx<=ny?nx:ny);
04854           if ( numPlans < CkMyNodeSize() ) numPlans = (nx>=ny?nx:ny);
04855           if ( numPlans < CkMyNodeSize() ) numPlans = sizeLines;
04856           int howmany = sizeLines/numPlans;
04857           forward_plans = new fftwf_plan[numPlans];
04858           backward_plans = new fftwf_plan[numPlans];
04859           for(int i=0; i<numPlans; i++) {
04860                   int dimStride = i*ndim*howmany;
04861                   int dimHalfStride = i*ndimHalf*howmany;
04862                   forward_plans[i] = fftwf_plan_many_dft_r2c(1, planLineSizes, howmany,
04863                                                                                                          ((float *)data)+dimStride, NULL, 1,
04864                                                                                                          ndim,
04865                                                                                                          ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
04866                                                                                                          ndimHalf,
04867                                                                                                          fftwFlags);
04868 
04869                   backward_plans[i] = fftwf_plan_many_dft_c2r(1, planLineSizes, howmany,
04870                                                                                                          ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
04871                                                                                                          ndimHalf,
04872                                                                                                          ((float *)data)+dimStride, NULL, 1,
04873                                                                                                          ndim,
04874                                                                                                          fftwFlags);
04875           }
04876   }else 
04877 #endif 
04878   {
04879           forward_plans = NULL;
04880           backward_plans = NULL;
04881   }
04882 #else
04883   forward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_REAL_TO_COMPLEX,
04884         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
04885         | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
04886   backward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_COMPLEX_TO_REAL,
04887         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
04888         | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
04889 #endif
04890   CmiUnlock(ComputePmeMgr::fftw_plan_lock);
04891 #else
04892   NAMD_die("Sorry, FFTW must be compiled in to use PME.");
04893 #endif
04894 
04895 #if USE_NODE_PAR_RECEIVE
04896     evir = 0.;
04897     memset(data, 0, sizeof(float) * nx*ny*dim3);
04898 #endif
04899 }

void PmeZPencil::forward_fft (  ) 

Definition at line 5215 of file ComputePme.C.

References CKLOOP_CTRL_PME_FORWARDFFT, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, j, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_grid().

05215                              {
05216   evir = 0.;
05217 #ifdef FFTCHECK
05218   int dim3 = initdata.grid.dim3;
05219   int K3 = initdata.grid.K3;
05220   float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
05221   float *d = data;
05222   for ( int i=0; i<nx; ++i ) {
05223    for ( int j=0; j<ny; ++j, d += dim3 ) {
05224     for ( int k=0; k<dim3; ++k ) {
05225       d[k] = 10. * (10. * (10. * std_base + i) + j) + k;
05226     }
05227    }
05228   }
05229 #endif
05230 #ifdef NAMD_FFTW
05231 #ifdef MANUAL_DEBUG_FFTW3
05232   dumpMatrixFloat3("fw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
05233 #endif
05234 #ifdef NAMD_FFTW_3
05235 #if     CMK_SMP && USE_CKLOOP
05236   int useCkLoop = Node::Object()->simParameters->useCkLoop;
05237   if(useCkLoop>=CKLOOP_CTRL_PME_FORWARDFFT
05238      && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
05239           //for(int i=0; i<numPlans; i++) fftwf_execute(forward_plans[i]);
05240           //transform the above loop
05241           CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)forward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
05242           return;
05243   }
05244 #endif
05245   fftwf_execute(forward_plan);
05246 #else
05247   rfftwnd_real_to_complex(forward_plan, nx*ny,
05248         data, 1, initdata.grid.dim3, (fftw_complex *) work, 1, 0);
05249 #endif
05250 #ifdef MANUAL_DEBUG_FFTW3
05251   dumpMatrixFloat3("fw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
05252 #endif
05253 
05254 #endif
05255 #ifdef ZEROCHECK
05256   int dim3 = initdata.grid.dim3;
05257   int K3 = initdata.grid.K3;
05258   float *d = data;
05259   for ( int i=0; i<nx; ++i ) {
05260    for ( int j=0; j<ny; ++j, d += dim3 ) {
05261     for ( int k=0; k<dim3; ++k ) {
05262       if ( d[k] == 0. ) CkPrintf("0 in Z at %d %d %d %d %d %d %d %d %d\n",
05263         thisIndex.x, thisIndex.y, i, j, k, nx, ny, dim3);
05264     }
05265    }
05266   }
05267 #endif
05268 }

void PmeZPencil::node_process_grid ( PmeGridMsg  ) 

Definition at line 6476 of file ComputePme.C.

References forward_fft(), PmePencil< CBase_PmeZPencil >::hasData, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::imsg, recv_grid(), send_trans(), and ResizeArray< Elem >::size().

Referenced by NodePmeMgr::recvZGrid().

06477 {
06478 #if USE_NODE_PAR_RECEIVE
06479   CmiLock(ComputePmeMgr::fftw_plan_lock);
06480   CmiMemoryReadFence();
06481 #endif
06482   recv_grid(msg);
06483   if(msg->hasData) hasData=msg->hasData;
06484   int limsg;
06485   CmiMemoryAtomicFetchAndInc(imsg,limsg);
06486   grid_msgs[limsg] = msg;
06487   //  CkPrintf("[%d] PmeZPencil node_process_grid for %d %d %d has %d of %d imsg %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z, limsg, grid_msgs.size(), imsg);      
06488   if(limsg+1 == grid_msgs.size())
06489     {
06490 
06491       if (hasData)
06492         {
06493           forward_fft();
06494         }
06495       send_trans();
06496       imsg=0;
06497       CmiMemoryWriteFence();
06498       //      CkPrintf("[%d] PmeZPencil grid node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
06499     }
06500 #if USE_NODE_PAR_RECEIVE
06501   CmiUnlock(ComputePmeMgr::fftw_plan_lock);
06502   CmiMemoryWriteFence();
06503 #endif
06504 }

void PmeZPencil::node_process_untrans ( PmeUntransMsg  ) 

Definition at line 6506 of file ComputePme.C.

References backward_fft(), PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::imsgb, PmePencil< CBase_PmeZPencil >::initdata, recv_untrans(), send_all_ungrid(), and PmePencilInitMsgData::zBlocks.

Referenced by NodePmeMgr::recvZUntrans().

06507 {
06508   recv_untrans(msg);
06509 #if USE_NODE_PAR_RECEIVE
06510   CmiMemoryWriteFence();
06511   CmiLock(ComputePmeMgr::fftw_plan_lock);
06512 #endif    
06513   int limsg;
06514   CmiMemoryAtomicFetchAndInc(imsgb,limsg);
06515   if(limsg+1 == initdata.zBlocks)
06516     {
06517 #if USE_NODE_PAR_RECEIVE
06518       CmiMemoryReadFence();
06519 #endif    
06520       if(hasData) // maybe this should be an assert
06521         {
06522           backward_fft();
06523         }
06524         
06525         send_all_ungrid();
06526     /*  int send_evir = 1;
06527       // TODO: this part should use Chao's output parallelization
06528       for ( limsg=0; limsg < grid_msgs.size(); ++limsg ) {
06529         PmeGridMsg *omsg = grid_msgs[limsg];
06530         if ( omsg->hasData ) {
06531           if ( send_evir ) {
06532             omsg->evir[0] = evir;
06533             send_evir = 0;
06534           } else {
06535             omsg->evir[0] = 0.;
06536           }
06537         }
06538         send_ungrid(omsg);
06539       } */
06540       imsgb=0;
06541       evir = 0;
06542       memset(data, 0, sizeof(float) * nx*ny* initdata.grid.dim3); 
06543       CmiMemoryWriteFence();
06544       //      CkPrintf("[%d] PmeZPencil untrans node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
06545     }
06546 #if USE_NODE_PAR_RECEIVE
06547   CmiUnlock(ComputePmeMgr::fftw_plan_lock);
06548 #endif
06549 }

void PmeZPencil::recv_grid ( const PmeGridMsg  ) 

Definition at line 5164 of file ComputePme.C.

References ResizeArray< Elem >::begin(), PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, j, PmeGridMsg::lattice, PmePencil< CBase_PmeZPencil >::lattice, PmeGridMsg::qgrid, PmeGridMsg::sequence, PmePencil< CBase_PmeZPencil >::sequence, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by node_process_grid().

05164                                                 {
05165 
05166   int dim3 = initdata.grid.dim3;
05167   if ( imsg == 0 ) {
05168     lattice = msg->lattice;
05169     sequence = msg->sequence;
05170 #if ! USE_NODE_PAR_RECEIVE
05171     memset(data, 0, sizeof(float)*nx*ny*dim3);
05172 #endif
05173   }
05174 
05175   if ( ! msg->hasData ) return;
05176 
05177   int zlistlen = msg->zlistlen;
05178 #ifdef NAMD_KNL
05179   int * __restrict msg_zlist = msg->zlist;
05180   int * __restrict zlist = work_zlist.begin();
05181   __assume_aligned(zlist,64);
05182   for ( int k=0; k<zlistlen; ++k ) {
05183     zlist[k] = msg_zlist[k];
05184   }
05185 #else
05186   int * __restrict zlist = msg->zlist;
05187 #endif
05188   char * __restrict fmsg = msg->fgrid;
05189   float * __restrict qmsg = msg->qgrid;
05190   float * __restrict d = data;
05191   int numGrids = 1;  // pencil FFT doesn't support multiple grids
05192   for ( int g=0; g<numGrids; ++g ) {
05193     for ( int i=0; i<nx; ++i ) {
05194      for ( int j=0; j<ny; ++j, d += dim3 ) {
05195       if( *(fmsg++) ) {
05196         #pragma ivdep
05197         for ( int k=0; k<zlistlen; ++k ) {
05198           d[zlist[k]] += *(qmsg++);
05199         }
05200       }
05201      }
05202     }
05203   }
05204 }

void PmeZPencil::recv_untrans ( const PmeUntransMsg  ) 

Definition at line 6277 of file ComputePme.C.

References PmeGrid::block3, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, j, PmeUntransMsg::ny, PmeUntransMsg::qgrid, and PmeUntransMsg::sourceNode.

Referenced by node_process_untrans().

06277                                                       {
06278 #if ! USE_NODE_PAR_RECEIVE
06279     if(imsg==0) evir=0.;
06280 #endif
06281 
06282   int block3 = initdata.grid.block3;
06283   int dim3 = initdata.grid.dim3;
06284   int kb = msg->sourceNode;
06285   int nz = msg->ny;
06286   const float *md = msg->qgrid;
06287   float *d = data;
06288   for ( int i=0; i<nx; ++i ) {
06289 #if CMK_BLUEGENEL
06290     CmiNetworkProgress();
06291 #endif   
06292     for ( int j=0; j<ny; ++j, d += dim3 ) {
06293       for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
06294 #ifdef ZEROCHECK
06295         if ( (*md) == 0. ) CkPrintf("0 in YZ at %d %d %d %d %d %d %d %d %d\n",
06296                                     thisIndex.x, thisIndex.y, kb, i, j, k, nx, ny, nz);
06297 #endif
06298         d[2*k] = *(md++);
06299         d[2*k+1] = *(md++);
06300       }
06301     }
06302   }
06303 }

void PmeZPencil::send_all_ungrid (  ) 

Definition at line 6374 of file ComputePme.C.

References CKLOOP_CTRL_PME_SENDUNTRANS, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, Node::Object(), PmeZPencilSendUngrid(), send_subset_ungrid(), Node::simParameters, ResizeArray< Elem >::size(), SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_untrans().

06374                                  {
06375 /* 
06376 //Original code: the transformation is to first extract the msg 
06377 //idx that will has evir value set. -Chao Mei  
06378         int send_evir = 1;
06379         for (int imsg=0; imsg < grid_msgs.size(); ++imsg ) {
06380                 PmeGridMsg *msg = grid_msgs[imsg];
06381                 if ( msg->hasData ) {
06382                         if ( send_evir ) {
06383                                 msg->evir[0] = evir;
06384                                 send_evir = 0;
06385                         } else {
06386                                 msg->evir[0] = 0.;
06387                         }
06388                 }
06389                 send_ungrid(msg);
06390         }
06391 */
06392         int evirIdx = 0;
06393         for(int imsg=0; imsg<grid_msgs.size(); imsg++) {
06394                 if(grid_msgs[imsg]->hasData) {
06395                         evirIdx = imsg;
06396                         break;
06397                 }
06398         }
06399 
06400 #if     CMK_SMP && USE_CKLOOP
06401         int useCkLoop = Node::Object()->simParameters->useCkLoop;
06402         if(useCkLoop>=CKLOOP_CTRL_PME_SENDUNTRANS
06403            && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
06404                 //????What's the best value for numChunks?????
06405 #if USE_NODE_PAR_RECEIVE        
06406                 //CkLoop_Parallelize(PmeZPencilSendUngrid, evirIdx, (void *)this, CkMyNodeSize(), 0, grid_msgs.size()-1, 1); //has to sync
06407                 CkLoop_Parallelize(PmeZPencilSendUngrid, evirIdx, (void *)this, grid_msgs.size(), 0, grid_msgs.size()-1, 1); //has to sync
06408 #else
06409         //CkLoop_Parallelize(PmeZPencilSendUngrid, evirIdx, (void *)this, CkMyNodeSize(), 0, grid_msgs.size()-1, 0); //not sync
06410                 CkLoop_Parallelize(PmeZPencilSendUngrid, evirIdx, (void *)this, grid_msgs.size(), 0, grid_msgs.size()-1, 0); //not sync
06411 #endif        
06412                 return;
06413         }
06414 #endif
06415         send_subset_ungrid(0, grid_msgs.size()-1, evirIdx);
06416 }

void PmeZPencil::send_subset_trans ( int  fromIdx,
int  toIdx 
)

Definition at line 5276 of file ComputePme.C.

References PmeGrid::block3, PmePencil< CBase_PmeZPencil >::data, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, j, PmePencil< CBase_PmeZPencil >::lattice, PmeTransMsg::lattice, PmeTransMsg::nx, PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeZPencil >::send_order, PmePencil< CBase_PmeZPencil >::sequence, PmeTransMsg::sequence, SET_PRIORITY, PmeTransMsg::sourceNode, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by PmeZPencilSendTrans().

05276                                                         {
05277         int zBlocks = initdata.zBlocks;
05278         int block3 = initdata.grid.block3;
05279         int dim3 = initdata.grid.dim3;
05280         for ( int isend=fromIdx; isend<=toIdx; ++isend ) {
05281           int kb = send_order[isend];
05282           int nz = block3;
05283           if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
05284           int hd = ( hasData ? 1 : 0 );
05285           PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
05286           msg->lattice = lattice;
05287           msg->sourceNode = thisIndex.y;
05288           msg->hasData = hasData;
05289           msg->nx = ny;
05290          if ( hasData ) {
05291           float *md = msg->qgrid;
05292           const float *d = data;
05293           for ( int i=0; i<nx; ++i ) {
05294            for ( int j=0; j<ny; ++j, d += dim3 ) {
05295                 for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
05296                   *(md++) = d[2*k];
05297                   *(md++) = d[2*k+1];
05298                 }
05299            }
05300           }
05301          }
05302           msg->sequence = sequence;
05303           SET_PRIORITY(msg,sequence,PME_TRANS_PRIORITY)
05304 
05305     CmiEnableUrgentSend(1);
05306 #if USE_NODE_PAR_RECEIVE
05307       msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
05308 #if Y_PERSIST 
05309       CmiUsePersistentHandle(&trans_handle[isend], 1);
05310 #endif
05311       initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
05312 #if Y_PERSIST 
05313       CmiUsePersistentHandle(NULL, 0);
05314 #endif    
05315 #else
05316 #if Y_PERSIST 
05317       CmiUsePersistentHandle(&trans_handle[isend], 1);
05318 #endif
05319       initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
05320 #if Y_PERSIST 
05321       CmiUsePersistentHandle(NULL, 0);
05322 #endif    
05323 #endif
05324     CmiEnableUrgentSend(0);
05325     }
05326 }

void PmeZPencil::send_subset_ungrid ( int  fromIdx,
int  toIdx,
int  specialIdx 
)

Definition at line 6418 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::imsg, and send_ungrid().

Referenced by PmeZPencilSendUngrid(), and send_all_ungrid().

06418                                                                          {
06419         for (int imsg=fromIdx; imsg <=toIdx; ++imsg ) {
06420                 PmeGridMsg *msg = grid_msgs[imsg];
06421                 send_ungrid(msg);
06422         }
06423 }

void PmeZPencil::send_trans (  ) 

Definition at line 5328 of file ComputePme.C.

References PmeGrid::block3, CKLOOP_CTRL_PME_SENDTRANS, PmePencil< CBase_PmeZPencil >::data, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, j, PmePencil< CBase_PmeZPencil >::lattice, PmeTransMsg::lattice, PmeTransMsg::nx, Node::Object(), PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmeZPencilSendTrans(), PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeZPencil >::send_order, PmePencil< CBase_PmeZPencil >::sequence, PmeTransMsg::sequence, SET_PRIORITY, Node::simParameters, PmeTransMsg::sourceNode, SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::yBlocks, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_grid().

05328                             {
05329 #if USE_PERSISTENT
05330     if (trans_handle == NULL) setup_persistent();
05331 #endif
05332 #if     CMK_SMP && USE_CKLOOP
05333         int useCkLoop = Node::Object()->simParameters->useCkLoop;
05334         if(useCkLoop>=CKLOOP_CTRL_PME_SENDTRANS
05335            && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
05342                 //send_subset_trans(0, initdata.zBlocks-1);
05343                 CkLoop_Parallelize(PmeZPencilSendTrans, 1, (void *)this, CkMyNodeSize(), 0, initdata.zBlocks-1, 1); //not sync
05344                 return;
05345         }
05346 #endif
05347   int zBlocks = initdata.zBlocks;
05348   int block3 = initdata.grid.block3;
05349   int dim3 = initdata.grid.dim3;
05350   for ( int isend=0; isend<zBlocks; ++isend ) {
05351     int kb = send_order[isend];
05352     int nz = block3;
05353     if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
05354     int hd = ( hasData ? 1 : 0 );
05355     PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
05356     msg->lattice = lattice;
05357     msg->sourceNode = thisIndex.y;
05358     msg->hasData = hasData;
05359     msg->nx = ny;
05360    if ( hasData ) {
05361     float *md = msg->qgrid;
05362     const float *d = data;
05363     for ( int i=0; i<nx; ++i ) {
05364      for ( int j=0; j<ny; ++j, d += dim3 ) {
05365       for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
05366         *(md++) = d[2*k];
05367         *(md++) = d[2*k+1];
05368       }
05369      }
05370     }
05371    }
05372     msg->sequence = sequence;
05373     SET_PRIORITY(msg,sequence,PME_TRANS_PRIORITY)
05374 
05375     CmiEnableUrgentSend(1);
05376 #if USE_NODE_PAR_RECEIVE
05377     msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
05378 #if Y_PERSIST 
05379     CmiUsePersistentHandle(&trans_handle[isend], 1);
05380 #endif
05381     initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
05382 #if Y_PERSIST 
05383     CmiUsePersistentHandle(NULL, 0);
05384 #endif    
05385 #else
05386 #if Y_PERSIST 
05387     CmiUsePersistentHandle(&trans_handle[isend], 1);
05388 #endif
05389     initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
05390 #if Y_PERSIST 
05391     CmiUsePersistentHandle(NULL, 0);
05392 #endif    
05393 #endif
05394     CmiEnableUrgentSend(0);
05395   }
05396 }

void PmeZPencil::send_ungrid ( PmeGridMsg  ) 

Definition at line 6425 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::initdata, j, PmePencil< CBase_PmeZPencil >::offload, PME_OFFLOAD_UNGRID_PRIORITY, PME_UNGRID_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmePencilInitMsgData::pmeProxy, PRIORITY_SIZE, PmeGridMsg::qgrid, PmePencil< CBase_PmeZPencil >::sequence, SET_PRIORITY, PmeGridMsg::sourceNode, PmePencilInitMsgData::yBlocks, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by send_subset_ungrid().

06425                                             {
06426 
06427 #ifdef NAMD_CUDA
06428   const int UNGRID_PRIORITY = ( offload ? PME_OFFLOAD_UNGRID_PRIORITY : PME_UNGRID_PRIORITY );
06429 #else
06430   const int UNGRID_PRIORITY = PME_UNGRID_PRIORITY ;
06431 #endif
06432 
06433   int pe = msg->sourceNode;
06434   if ( ! msg->hasData ) {
06435     delete msg;
06436     PmeAckMsg *ackmsg = new (PRIORITY_SIZE) PmeAckMsg;
06437     SET_PRIORITY(ackmsg,sequence,UNGRID_PRIORITY)
06438     CmiEnableUrgentSend(1);
06439     initdata.pmeProxy[pe].recvAck(ackmsg);
06440     CmiEnableUrgentSend(0);
06441     return;
06442   }
06443   msg->sourceNode = thisIndex.x * initdata.yBlocks + thisIndex.y;
06444   int dim3 = initdata.grid.dim3;
06445   int zlistlen = msg->zlistlen;
06446   int *zlist = msg->zlist;
06447   char *fmsg = msg->fgrid;
06448   float *qmsg = msg->qgrid;
06449   float *d = data;
06450   int numGrids = 1;  // pencil FFT doesn't support multiple grids
06451   for ( int g=0; g<numGrids; ++g ) {
06452 #if CMK_BLUEGENEL
06453     CmiNetworkProgress();
06454 #endif    
06455     for ( int i=0; i<nx; ++i ) {
06456       for ( int j=0; j<ny; ++j, d += dim3 ) {
06457         if( *(fmsg++) ) {
06458           for ( int k=0; k<zlistlen; ++k ) {
06459             *(qmsg++) = d[zlist[k]];
06460           }
06461         }
06462       }
06463     }
06464   }
06465   SET_PRIORITY(msg,sequence,UNGRID_PRIORITY)
06466     CmiEnableUrgentSend(1);
06467 #ifdef NAMD_CUDA
06468     if ( offload ) {
06469       initdata.pmeNodeProxy[CkNodeOf(pe)].recvUngrid(msg);
06470     } else
06471 #endif
06472   initdata.pmeProxy[pe].recvUngrid(msg);
06473     CmiEnableUrgentSend(0);
06474 }


The documentation for this class was generated from the following file:
Generated on Sat Sep 23 01:17:21 2017 for NAMD by  doxygen 1.4.7