PmeZPencil Class Reference

Inheritance diagram for PmeZPencil:

PmePencil< CBase_PmeZPencil > CBase_PmeZPencil List of all members.

Public Member Functions

PmeZPencil_SDAG_CODE PmeZPencil ()
 PmeZPencil (CkMigrateMessage *)
 ~PmeZPencil ()
void fft_init ()
void recv_grid (const PmeGridMsg *)
void forward_fft ()
void send_trans ()
void send_subset_trans (int fromIdx, int toIdx)
void recv_untrans (const PmeUntransMsg *)
void node_process_untrans (PmeUntransMsg *)
void node_process_grid (PmeGridMsg *)
void backward_fft ()
void send_ungrid (PmeGridMsg *)
void send_all_ungrid ()
void send_subset_ungrid (int fromIdx, int toIdx, int specialIdx)

Detailed Description

Definition at line 4567 of file ComputePme.C.


Constructor & Destructor Documentation

PmeZPencil_SDAG_CODE PmeZPencil::PmeZPencil (  )  [inline]

Definition at line 4570 of file ComputePme.C.

04570 { __sdag_init(); setMigratable(false); }

PmeZPencil::PmeZPencil ( CkMigrateMessage *   )  [inline]

Definition at line 4571 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::imsg, and PmePencil< CBase_PmeZPencil >::imsgb.

04571 { __sdag_init();  setMigratable (false); imsg=imsgb=0;}

PmeZPencil::~PmeZPencil (  )  [inline]

Definition at line 4572 of file ComputePme.C.

04572                       {
04573         #ifdef NAMD_FFTW
04574         #ifdef NAMD_FFTW_3
04575                 delete [] forward_plans;
04576                 delete [] backward_plans;
04577         #endif
04578         #endif
04579         }


Member Function Documentation

void PmeZPencil::backward_fft (  ) 

Definition at line 6273 of file ComputePme.C.

References CKLOOP_CTRL_PME_BACKWARDFFT, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, j, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_untrans().

06273                               {
06274 #ifdef NAMD_FFTW
06275 #ifdef MANUAL_DEBUG_FFTW3
06276   dumpMatrixFloat3("bw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
06277 #endif
06278 #ifdef NAMD_FFTW_3
06279 #if     CMK_SMP && USE_CKLOOP
06280   int useCkLoop = Node::Object()->simParameters->useCkLoop;
06281   if(useCkLoop>=CKLOOP_CTRL_PME_BACKWARDFFT
06282      && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
06283           //for(int i=0; i<numPlans; i++) fftwf_execute(backward_plans[i]);
06284           //transform the above loop
06285           CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)backward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
06286           return;
06287   }
06288 #endif
06289   fftwf_execute(backward_plan);
06290 #else
06291   rfftwnd_complex_to_real(backward_plan, nx*ny,
06292             (fftw_complex *) data, 1, initdata.grid.dim3/2, work, 1, 0);
06293 #endif
06294 #ifdef MANUAL_DEBUG_FFTW3
06295   dumpMatrixFloat3("bw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
06296 #endif
06297 
06298 #endif
06299   
06300 #if CMK_BLUEGENEL
06301   CmiNetworkProgress();
06302 #endif
06303 
06304 #ifdef FFTCHECK
06305   int dim3 = initdata.grid.dim3;
06306   int K1 = initdata.grid.K1;
06307   int K2 = initdata.grid.K2;
06308   int K3 = initdata.grid.K3;
06309   float scale = 1. / (1. * K1 * K2 * K3);
06310   float maxerr = 0.;
06311   float maxstd = 0.;
06312   int mi, mj, mk;  mi = mj = mk = -1;
06313   float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
06314   const float *d = data;
06315   for ( int i=0; i<nx; ++i ) {
06316    for ( int j=0; j<ny; ++j, d += dim3 ) {
06317     for ( int k=0; k<K3; ++k ) {
06318       float std = 10. * (10. * (10. * std_base + i) + j) + k;
06319       float err = scale * d[k] - std;
06320       if ( fabsf(err) > fabsf(maxerr) ) {
06321         maxerr = err;
06322         maxstd = std;
06323         mi = i;  mj = j;  mk = k;
06324       }
06325     }
06326    }
06327   }
06328   CkPrintf("pencil %d %d max error %f at %d %d %d (should be %f)\n",
06329                 thisIndex.x, thisIndex.y, maxerr, mi, mj, mk, maxstd);
06330 #endif
06331 
06332 }

void PmeZPencil::fft_init (  ) 

Definition at line 4766 of file ComputePme.C.

References PmeGrid::block1, PmeGrid::block2, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, fftwf_malloc, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, NAMD_die(), PmePencil< CBase_PmeZPencil >::order_init(), PmePencilInitMsgData::pmeNodeProxy, Node::simParameters, simParams, PmePencil< CBase_PmeZPencil >::work, and PmePencilInitMsgData::zBlocks.

04766                           {
04767   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
04768   Node *node = nd.ckLocalBranch();
04769   SimParameters *simParams = node->simParameters;
04770 
04771 #if USE_NODE_PAR_RECEIVE
04772   ((NodePmeMgr *)CkLocalNodeBranch(initdata.pmeNodeProxy))->registerZPencil(thisIndex,this);
04773 #endif
04774 
04775   int K1 = initdata.grid.K1;
04776   int K2 = initdata.grid.K2;
04777   int K3 = initdata.grid.K3;
04778   int dim3 = initdata.grid.dim3;
04779   int block1 = initdata.grid.block1;
04780   int block2 = initdata.grid.block2;
04781 
04782   nx = block1;
04783   if ( (thisIndex.x + 1) * block1 > K1 ) nx = K1 - thisIndex.x * block1;
04784   ny = block2;
04785   if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
04786 
04787 #ifdef NAMD_FFTW
04788   CmiLock(ComputePmeMgr::fftw_plan_lock);
04789 
04790   data = (float *) fftwf_malloc( sizeof(float) *nx*ny*dim3);
04791   work = new float[dim3];
04792 
04793   order_init(initdata.zBlocks);
04794 
04795 #ifdef NAMD_FFTW_3
04796   /* need array of sizes for the how many */
04797 
04798   int fftwFlags = simParams->FFTWPatient ? FFTW_PATIENT  : simParams->FFTWEstimate ? FFTW_ESTIMATE  : FFTW_MEASURE ;
04799   int sizeLines=nx*ny;
04800   int planLineSizes[1];
04801   planLineSizes[0]=K3;
04802   int ndim=initdata.grid.dim3; // storage space is initdata.grid.dim3
04803   int ndimHalf=ndim/2;
04804   forward_plan = fftwf_plan_many_dft_r2c(1, planLineSizes, sizeLines,
04805                                          (float *) data, NULL, 1, 
04806                                          ndim,
04807                                          (fftwf_complex *) data, NULL, 1,
04808                                          ndimHalf,
04809                                          fftwFlags);
04810 
04811   backward_plan = fftwf_plan_many_dft_c2r(1, planLineSizes, sizeLines,
04812                                           (fftwf_complex *) data, NULL, 1, 
04813                                           ndimHalf,
04814                                           (float *) data, NULL, 1, 
04815                                           ndim,
04816                                           fftwFlags);
04817 #if     CMK_SMP && USE_CKLOOP
04818   if(simParams->useCkLoop) {
04819           //How many FFT plans to be created? The grain-size issue!!.
04820           //Currently, I am choosing the min(nx, ny) to be coarse-grain
04821           numPlans = (nx<=ny?nx:ny);
04822           if ( numPlans < CkMyNodeSize() ) numPlans = (nx>=ny?nx:ny);
04823           if ( numPlans < CkMyNodeSize() ) numPlans = sizeLines;
04824           int howmany = sizeLines/numPlans;
04825           forward_plans = new fftwf_plan[numPlans];
04826           backward_plans = new fftwf_plan[numPlans];
04827           for(int i=0; i<numPlans; i++) {
04828                   int dimStride = i*ndim*howmany;
04829                   int dimHalfStride = i*ndimHalf*howmany;
04830                   forward_plans[i] = fftwf_plan_many_dft_r2c(1, planLineSizes, howmany,
04831                                                                                                          ((float *)data)+dimStride, NULL, 1,
04832                                                                                                          ndim,
04833                                                                                                          ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
04834                                                                                                          ndimHalf,
04835                                                                                                          fftwFlags);
04836 
04837                   backward_plans[i] = fftwf_plan_many_dft_c2r(1, planLineSizes, howmany,
04838                                                                                                          ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
04839                                                                                                          ndimHalf,
04840                                                                                                          ((float *)data)+dimStride, NULL, 1,
04841                                                                                                          ndim,
04842                                                                                                          fftwFlags);
04843           }
04844   }else 
04845 #endif 
04846   {
04847           forward_plans = NULL;
04848           backward_plans = NULL;
04849   }
04850 #else
04851   forward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_REAL_TO_COMPLEX,
04852         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
04853         | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
04854   backward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_COMPLEX_TO_REAL,
04855         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
04856         | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
04857 #endif
04858   CmiUnlock(ComputePmeMgr::fftw_plan_lock);
04859 #else
04860   NAMD_die("Sorry, FFTW must be compiled in to use PME.");
04861 #endif
04862 
04863 #if USE_NODE_PAR_RECEIVE
04864     evir = 0.;
04865     memset(data, 0, sizeof(float) * nx*ny*dim3);
04866 #endif
04867 }

void PmeZPencil::forward_fft (  ) 

Definition at line 5183 of file ComputePme.C.

References CKLOOP_CTRL_PME_FORWARDFFT, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, j, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_grid().

05183                              {
05184   evir = 0.;
05185 #ifdef FFTCHECK
05186   int dim3 = initdata.grid.dim3;
05187   int K3 = initdata.grid.K3;
05188   float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
05189   float *d = data;
05190   for ( int i=0; i<nx; ++i ) {
05191    for ( int j=0; j<ny; ++j, d += dim3 ) {
05192     for ( int k=0; k<dim3; ++k ) {
05193       d[k] = 10. * (10. * (10. * std_base + i) + j) + k;
05194     }
05195    }
05196   }
05197 #endif
05198 #ifdef NAMD_FFTW
05199 #ifdef MANUAL_DEBUG_FFTW3
05200   dumpMatrixFloat3("fw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
05201 #endif
05202 #ifdef NAMD_FFTW_3
05203 #if     CMK_SMP && USE_CKLOOP
05204   int useCkLoop = Node::Object()->simParameters->useCkLoop;
05205   if(useCkLoop>=CKLOOP_CTRL_PME_FORWARDFFT
05206      && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
05207           //for(int i=0; i<numPlans; i++) fftwf_execute(forward_plans[i]);
05208           //transform the above loop
05209           CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)forward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
05210           return;
05211   }
05212 #endif
05213   fftwf_execute(forward_plan);
05214 #else
05215   rfftwnd_real_to_complex(forward_plan, nx*ny,
05216         data, 1, initdata.grid.dim3, (fftw_complex *) work, 1, 0);
05217 #endif
05218 #ifdef MANUAL_DEBUG_FFTW3
05219   dumpMatrixFloat3("fw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
05220 #endif
05221 
05222 #endif
05223 #ifdef ZEROCHECK
05224   int dim3 = initdata.grid.dim3;
05225   int K3 = initdata.grid.K3;
05226   float *d = data;
05227   for ( int i=0; i<nx; ++i ) {
05228    for ( int j=0; j<ny; ++j, d += dim3 ) {
05229     for ( int k=0; k<dim3; ++k ) {
05230       if ( d[k] == 0. ) CkPrintf("0 in Z at %d %d %d %d %d %d %d %d %d\n",
05231         thisIndex.x, thisIndex.y, i, j, k, nx, ny, dim3);
05232     }
05233    }
05234   }
05235 #endif
05236 }

void PmeZPencil::node_process_grid ( PmeGridMsg  ) 

Definition at line 6444 of file ComputePme.C.

References forward_fft(), PmePencil< CBase_PmeZPencil >::hasData, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::imsg, recv_grid(), send_trans(), and ResizeArray< Elem >::size().

Referenced by NodePmeMgr::recvZGrid().

06445 {
06446 #if USE_NODE_PAR_RECEIVE
06447   CmiLock(ComputePmeMgr::fftw_plan_lock);
06448   CmiMemoryReadFence();
06449 #endif
06450   recv_grid(msg);
06451   if(msg->hasData) hasData=msg->hasData;
06452   int limsg;
06453   CmiMemoryAtomicFetchAndInc(imsg,limsg);
06454   grid_msgs[limsg] = msg;
06455   //  CkPrintf("[%d] PmeZPencil node_process_grid for %d %d %d has %d of %d imsg %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z, limsg, grid_msgs.size(), imsg);      
06456   if(limsg+1 == grid_msgs.size())
06457     {
06458 
06459       if (hasData)
06460         {
06461           forward_fft();
06462         }
06463       send_trans();
06464       imsg=0;
06465       CmiMemoryWriteFence();
06466       //      CkPrintf("[%d] PmeZPencil grid node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
06467     }
06468 #if USE_NODE_PAR_RECEIVE
06469   CmiUnlock(ComputePmeMgr::fftw_plan_lock);
06470   CmiMemoryWriteFence();
06471 #endif
06472 }

void PmeZPencil::node_process_untrans ( PmeUntransMsg  ) 

Definition at line 6474 of file ComputePme.C.

References backward_fft(), PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::imsgb, PmePencil< CBase_PmeZPencil >::initdata, recv_untrans(), send_all_ungrid(), and PmePencilInitMsgData::zBlocks.

Referenced by NodePmeMgr::recvZUntrans().

06475 {
06476   recv_untrans(msg);
06477 #if USE_NODE_PAR_RECEIVE
06478   CmiMemoryWriteFence();
06479   CmiLock(ComputePmeMgr::fftw_plan_lock);
06480 #endif    
06481   int limsg;
06482   CmiMemoryAtomicFetchAndInc(imsgb,limsg);
06483   if(limsg+1 == initdata.zBlocks)
06484     {
06485 #if USE_NODE_PAR_RECEIVE
06486       CmiMemoryReadFence();
06487 #endif    
06488       if(hasData) // maybe this should be an assert
06489         {
06490           backward_fft();
06491         }
06492         
06493         send_all_ungrid();
06494     /*  int send_evir = 1;
06495       // TODO: this part should use Chao's output parallelization
06496       for ( limsg=0; limsg < grid_msgs.size(); ++limsg ) {
06497         PmeGridMsg *omsg = grid_msgs[limsg];
06498         if ( omsg->hasData ) {
06499           if ( send_evir ) {
06500             omsg->evir[0] = evir;
06501             send_evir = 0;
06502           } else {
06503             omsg->evir[0] = 0.;
06504           }
06505         }
06506         send_ungrid(omsg);
06507       } */
06508       imsgb=0;
06509       evir = 0;
06510       memset(data, 0, sizeof(float) * nx*ny* initdata.grid.dim3); 
06511       CmiMemoryWriteFence();
06512       //      CkPrintf("[%d] PmeZPencil untrans node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
06513     }
06514 #if USE_NODE_PAR_RECEIVE
06515   CmiUnlock(ComputePmeMgr::fftw_plan_lock);
06516 #endif
06517 }

void PmeZPencil::recv_grid ( const PmeGridMsg  ) 

Definition at line 5132 of file ComputePme.C.

References ResizeArray< Elem >::begin(), PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, j, PmeGridMsg::lattice, PmePencil< CBase_PmeZPencil >::lattice, PmeGridMsg::qgrid, PmeGridMsg::sequence, PmePencil< CBase_PmeZPencil >::sequence, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by node_process_grid().

05132                                                 {
05133 
05134   int dim3 = initdata.grid.dim3;
05135   if ( imsg == 0 ) {
05136     lattice = msg->lattice;
05137     sequence = msg->sequence;
05138 #if ! USE_NODE_PAR_RECEIVE
05139     memset(data, 0, sizeof(float)*nx*ny*dim3);
05140 #endif
05141   }
05142 
05143   if ( ! msg->hasData ) return;
05144 
05145   int zlistlen = msg->zlistlen;
05146 #ifdef NAMD_KNL
05147   int * __restrict msg_zlist = msg->zlist;
05148   int * __restrict zlist = work_zlist.begin();
05149   __assume_aligned(zlist,64);
05150   for ( int k=0; k<zlistlen; ++k ) {
05151     zlist[k] = msg_zlist[k];
05152   }
05153 #else
05154   int * __restrict zlist = msg->zlist;
05155 #endif
05156   char * __restrict fmsg = msg->fgrid;
05157   float * __restrict qmsg = msg->qgrid;
05158   float * __restrict d = data;
05159   int numGrids = 1;  // pencil FFT doesn't support multiple grids
05160   for ( int g=0; g<numGrids; ++g ) {
05161     for ( int i=0; i<nx; ++i ) {
05162      for ( int j=0; j<ny; ++j, d += dim3 ) {
05163       if( *(fmsg++) ) {
05164         #pragma ivdep
05165         for ( int k=0; k<zlistlen; ++k ) {
05166           d[zlist[k]] += *(qmsg++);
05167         }
05168       }
05169      }
05170     }
05171   }
05172 }

void PmeZPencil::recv_untrans ( const PmeUntransMsg  ) 

Definition at line 6245 of file ComputePme.C.

References PmeGrid::block3, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, j, PmeUntransMsg::ny, PmeUntransMsg::qgrid, and PmeUntransMsg::sourceNode.

Referenced by node_process_untrans().

06245                                                       {
06246 #if ! USE_NODE_PAR_RECEIVE
06247     if(imsg==0) evir=0.;
06248 #endif
06249 
06250   int block3 = initdata.grid.block3;
06251   int dim3 = initdata.grid.dim3;
06252   int kb = msg->sourceNode;
06253   int nz = msg->ny;
06254   const float *md = msg->qgrid;
06255   float *d = data;
06256   for ( int i=0; i<nx; ++i ) {
06257 #if CMK_BLUEGENEL
06258     CmiNetworkProgress();
06259 #endif   
06260     for ( int j=0; j<ny; ++j, d += dim3 ) {
06261       for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
06262 #ifdef ZEROCHECK
06263         if ( (*md) == 0. ) CkPrintf("0 in YZ at %d %d %d %d %d %d %d %d %d\n",
06264                                     thisIndex.x, thisIndex.y, kb, i, j, k, nx, ny, nz);
06265 #endif
06266         d[2*k] = *(md++);
06267         d[2*k+1] = *(md++);
06268       }
06269     }
06270   }
06271 }

void PmeZPencil::send_all_ungrid (  ) 

Definition at line 6342 of file ComputePme.C.

References CKLOOP_CTRL_PME_SENDUNTRANS, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, Node::Object(), PmeZPencilSendUngrid(), send_subset_ungrid(), Node::simParameters, ResizeArray< Elem >::size(), SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_untrans().

06342                                  {
06343 /* 
06344 //Original code: the transformation is to first extract the msg 
06345 //idx that will has evir value set. -Chao Mei  
06346         int send_evir = 1;
06347         for (int imsg=0; imsg < grid_msgs.size(); ++imsg ) {
06348                 PmeGridMsg *msg = grid_msgs[imsg];
06349                 if ( msg->hasData ) {
06350                         if ( send_evir ) {
06351                                 msg->evir[0] = evir;
06352                                 send_evir = 0;
06353                         } else {
06354                                 msg->evir[0] = 0.;
06355                         }
06356                 }
06357                 send_ungrid(msg);
06358         }
06359 */
06360         int evirIdx = 0;
06361         for(int imsg=0; imsg<grid_msgs.size(); imsg++) {
06362                 if(grid_msgs[imsg]->hasData) {
06363                         evirIdx = imsg;
06364                         break;
06365                 }
06366         }
06367 
06368 #if     CMK_SMP && USE_CKLOOP
06369         int useCkLoop = Node::Object()->simParameters->useCkLoop;
06370         if(useCkLoop>=CKLOOP_CTRL_PME_SENDUNTRANS
06371            && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
06372                 //????What's the best value for numChunks?????
06373 #if USE_NODE_PAR_RECEIVE        
06374                 //CkLoop_Parallelize(PmeZPencilSendUngrid, evirIdx, (void *)this, CkMyNodeSize(), 0, grid_msgs.size()-1, 1); //has to sync
06375                 CkLoop_Parallelize(PmeZPencilSendUngrid, evirIdx, (void *)this, grid_msgs.size(), 0, grid_msgs.size()-1, 1); //has to sync
06376 #else
06377         //CkLoop_Parallelize(PmeZPencilSendUngrid, evirIdx, (void *)this, CkMyNodeSize(), 0, grid_msgs.size()-1, 0); //not sync
06378                 CkLoop_Parallelize(PmeZPencilSendUngrid, evirIdx, (void *)this, grid_msgs.size(), 0, grid_msgs.size()-1, 0); //not sync
06379 #endif        
06380                 return;
06381         }
06382 #endif
06383         send_subset_ungrid(0, grid_msgs.size()-1, evirIdx);
06384 }

void PmeZPencil::send_subset_trans ( int  fromIdx,
int  toIdx 
)

Definition at line 5244 of file ComputePme.C.

References PmeGrid::block3, PmePencil< CBase_PmeZPencil >::data, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, j, PmePencil< CBase_PmeZPencil >::lattice, PmeTransMsg::lattice, PmeTransMsg::nx, PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeZPencil >::send_order, PmePencil< CBase_PmeZPencil >::sequence, PmeTransMsg::sequence, SET_PRIORITY, PmeTransMsg::sourceNode, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by PmeZPencilSendTrans().

05244                                                         {
05245         int zBlocks = initdata.zBlocks;
05246         int block3 = initdata.grid.block3;
05247         int dim3 = initdata.grid.dim3;
05248         for ( int isend=fromIdx; isend<=toIdx; ++isend ) {
05249           int kb = send_order[isend];
05250           int nz = block3;
05251           if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
05252           int hd = ( hasData ? 1 : 0 );
05253           PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
05254           msg->lattice = lattice;
05255           msg->sourceNode = thisIndex.y;
05256           msg->hasData = hasData;
05257           msg->nx = ny;
05258          if ( hasData ) {
05259           float *md = msg->qgrid;
05260           const float *d = data;
05261           for ( int i=0; i<nx; ++i ) {
05262            for ( int j=0; j<ny; ++j, d += dim3 ) {
05263                 for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
05264                   *(md++) = d[2*k];
05265                   *(md++) = d[2*k+1];
05266                 }
05267            }
05268           }
05269          }
05270           msg->sequence = sequence;
05271           SET_PRIORITY(msg,sequence,PME_TRANS_PRIORITY)
05272 
05273     CmiEnableUrgentSend(1);
05274 #if USE_NODE_PAR_RECEIVE
05275       msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
05276 #if Y_PERSIST 
05277       CmiUsePersistentHandle(&trans_handle[isend], 1);
05278 #endif
05279       initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
05280 #if Y_PERSIST 
05281       CmiUsePersistentHandle(NULL, 0);
05282 #endif    
05283 #else
05284 #if Y_PERSIST 
05285       CmiUsePersistentHandle(&trans_handle[isend], 1);
05286 #endif
05287       initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
05288 #if Y_PERSIST 
05289       CmiUsePersistentHandle(NULL, 0);
05290 #endif    
05291 #endif
05292     CmiEnableUrgentSend(0);
05293     }
05294 }

void PmeZPencil::send_subset_ungrid ( int  fromIdx,
int  toIdx,
int  specialIdx 
)

Definition at line 6386 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::imsg, and send_ungrid().

Referenced by PmeZPencilSendUngrid(), and send_all_ungrid().

06386                                                                          {
06387         for (int imsg=fromIdx; imsg <=toIdx; ++imsg ) {
06388                 PmeGridMsg *msg = grid_msgs[imsg];
06389                 send_ungrid(msg);
06390         }
06391 }

void PmeZPencil::send_trans (  ) 

Definition at line 5296 of file ComputePme.C.

References PmeGrid::block3, CKLOOP_CTRL_PME_SENDTRANS, PmePencil< CBase_PmeZPencil >::data, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, j, PmePencil< CBase_PmeZPencil >::lattice, PmeTransMsg::lattice, PmeTransMsg::nx, Node::Object(), PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmeZPencilSendTrans(), PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeZPencil >::send_order, PmePencil< CBase_PmeZPencil >::sequence, PmeTransMsg::sequence, SET_PRIORITY, Node::simParameters, PmeTransMsg::sourceNode, SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::yBlocks, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_grid().

05296                             {
05297 #if USE_PERSISTENT
05298     if (trans_handle == NULL) setup_persistent();
05299 #endif
05300 #if     CMK_SMP && USE_CKLOOP
05301         int useCkLoop = Node::Object()->simParameters->useCkLoop;
05302         if(useCkLoop>=CKLOOP_CTRL_PME_SENDTRANS
05303            && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
05310                 //send_subset_trans(0, initdata.zBlocks-1);
05311                 CkLoop_Parallelize(PmeZPencilSendTrans, 1, (void *)this, CkMyNodeSize(), 0, initdata.zBlocks-1, 1); //not sync
05312                 return;
05313         }
05314 #endif
05315   int zBlocks = initdata.zBlocks;
05316   int block3 = initdata.grid.block3;
05317   int dim3 = initdata.grid.dim3;
05318   for ( int isend=0; isend<zBlocks; ++isend ) {
05319     int kb = send_order[isend];
05320     int nz = block3;
05321     if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
05322     int hd = ( hasData ? 1 : 0 );
05323     PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
05324     msg->lattice = lattice;
05325     msg->sourceNode = thisIndex.y;
05326     msg->hasData = hasData;
05327     msg->nx = ny;
05328    if ( hasData ) {
05329     float *md = msg->qgrid;
05330     const float *d = data;
05331     for ( int i=0; i<nx; ++i ) {
05332      for ( int j=0; j<ny; ++j, d += dim3 ) {
05333       for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
05334         *(md++) = d[2*k];
05335         *(md++) = d[2*k+1];
05336       }
05337      }
05338     }
05339    }
05340     msg->sequence = sequence;
05341     SET_PRIORITY(msg,sequence,PME_TRANS_PRIORITY)
05342 
05343     CmiEnableUrgentSend(1);
05344 #if USE_NODE_PAR_RECEIVE
05345     msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
05346 #if Y_PERSIST 
05347     CmiUsePersistentHandle(&trans_handle[isend], 1);
05348 #endif
05349     initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
05350 #if Y_PERSIST 
05351     CmiUsePersistentHandle(NULL, 0);
05352 #endif    
05353 #else
05354 #if Y_PERSIST 
05355     CmiUsePersistentHandle(&trans_handle[isend], 1);
05356 #endif
05357     initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
05358 #if Y_PERSIST 
05359     CmiUsePersistentHandle(NULL, 0);
05360 #endif    
05361 #endif
05362     CmiEnableUrgentSend(0);
05363   }
05364 }

void PmeZPencil::send_ungrid ( PmeGridMsg  ) 

Definition at line 6393 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::initdata, j, PmePencil< CBase_PmeZPencil >::offload, PME_OFFLOAD_UNGRID_PRIORITY, PME_UNGRID_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmePencilInitMsgData::pmeProxy, PRIORITY_SIZE, PmeGridMsg::qgrid, PmePencil< CBase_PmeZPencil >::sequence, SET_PRIORITY, PmeGridMsg::sourceNode, PmePencilInitMsgData::yBlocks, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by send_subset_ungrid().

06393                                             {
06394 
06395 #ifdef NAMD_CUDA
06396   const int UNGRID_PRIORITY = ( offload ? PME_OFFLOAD_UNGRID_PRIORITY : PME_UNGRID_PRIORITY );
06397 #else
06398   const int UNGRID_PRIORITY = PME_UNGRID_PRIORITY ;
06399 #endif
06400 
06401   int pe = msg->sourceNode;
06402   if ( ! msg->hasData ) {
06403     delete msg;
06404     PmeAckMsg *ackmsg = new (PRIORITY_SIZE) PmeAckMsg;
06405     SET_PRIORITY(ackmsg,sequence,UNGRID_PRIORITY)
06406     CmiEnableUrgentSend(1);
06407     initdata.pmeProxy[pe].recvAck(ackmsg);
06408     CmiEnableUrgentSend(0);
06409     return;
06410   }
06411   msg->sourceNode = thisIndex.x * initdata.yBlocks + thisIndex.y;
06412   int dim3 = initdata.grid.dim3;
06413   int zlistlen = msg->zlistlen;
06414   int *zlist = msg->zlist;
06415   char *fmsg = msg->fgrid;
06416   float *qmsg = msg->qgrid;
06417   float *d = data;
06418   int numGrids = 1;  // pencil FFT doesn't support multiple grids
06419   for ( int g=0; g<numGrids; ++g ) {
06420 #if CMK_BLUEGENEL
06421     CmiNetworkProgress();
06422 #endif    
06423     for ( int i=0; i<nx; ++i ) {
06424       for ( int j=0; j<ny; ++j, d += dim3 ) {
06425         if( *(fmsg++) ) {
06426           for ( int k=0; k<zlistlen; ++k ) {
06427             *(qmsg++) = d[zlist[k]];
06428           }
06429         }
06430       }
06431     }
06432   }
06433   SET_PRIORITY(msg,sequence,UNGRID_PRIORITY)
06434     CmiEnableUrgentSend(1);
06435 #ifdef NAMD_CUDA
06436     if ( offload ) {
06437       initdata.pmeNodeProxy[CkNodeOf(pe)].recvUngrid(msg);
06438     } else
06439 #endif
06440   initdata.pmeProxy[pe].recvUngrid(msg);
06441     CmiEnableUrgentSend(0);
06442 }


The documentation for this class was generated from the following file:
Generated on Sun Feb 25 01:17:22 2018 for NAMD by  doxygen 1.4.7