Inheritance diagram for PmeZPencil:

Public Member Functions | |
| PmeZPencil_SDAG_CODE | PmeZPencil () |
| PmeZPencil (CkMigrateMessage *) | |
| ~PmeZPencil () | |
| void | fft_init () |
| void | recv_grid (const PmeGridMsg *) |
| void | forward_fft () |
| void | send_trans () |
| void | send_subset_trans (int fromIdx, int toIdx) |
| void | recv_untrans (const PmeUntransMsg *) |
| void | node_process_untrans (PmeUntransMsg *) |
| void | node_process_grid (PmeGridMsg *) |
| void | backward_fft () |
| void | send_ungrid (PmeGridMsg *) |
| void | send_all_ungrid () |
| void | send_subset_ungrid (int fromIdx, int toIdx, int specialIdx) |
|
|
Definition at line 3432 of file ComputePme.C. 03432 { __sdag_init(); setMigratable(false); }
|
|
|
Definition at line 3433 of file ComputePme.C.
|
|
|
Definition at line 3434 of file ComputePme.C. 03434 {
03435 #ifdef NAMD_FFTW
03436 #ifdef NAMD_FFTW_3
03437 delete [] forward_plans;
03438 delete [] backward_plans;
03439 #endif
03440 #endif
03441 }
|
|
|
Definition at line 5108 of file ComputePme.C. References PmeGrid::dim3, PmePencilInitMsgData::grid, j, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, and SimParameters::useCkLoop. Referenced by node_process_untrans(). 05108 {
05109 #ifdef NAMD_FFTW
05110 #ifdef MANUAL_DEBUG_FFTW3
05111 dumpMatrixFloat3("bw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
05112 #endif
05113 #ifdef NAMD_FFTW_3
05114 #if USE_CKLOOP
05115 int useCkLoop = Node::Object()->simParameters->useCkLoop;
05116 if(useCkLoop>=CKLOOP_CTRL_PME_BACKWARDFFT) {
05117 //for(int i=0; i<numPlans; i++) fftwf_execute(backward_plans[i]);
05118 //transform the above loop
05119 CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)backward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
05120 return;
05121 }
05122 #endif
05123 fftwf_execute(backward_plan);
05124 #else
05125 rfftwnd_complex_to_real(backward_plan, nx*ny,
05126 (fftw_complex *) data, 1, initdata.grid.dim3/2, work, 1, 0);
05127 #endif
05128 #ifdef MANUAL_DEBUG_FFTW3
05129 dumpMatrixFloat3("bw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
05130 #endif
05131
05132 #endif
05133
05134 #if CMK_BLUEGENEL
05135 CmiNetworkProgress();
05136 #endif
05137
05138 #ifdef FFTCHECK
05139 int dim3 = initdata.grid.dim3;
05140 int K1 = initdata.grid.K1;
05141 int K2 = initdata.grid.K2;
05142 int K3 = initdata.grid.K3;
05143 float scale = 1. / (1. * K1 * K2 * K3);
05144 float maxerr = 0.;
05145 float maxstd = 0.;
05146 int mi, mj, mk; mi = mj = mk = -1;
05147 float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
05148 const float *d = data;
05149 for ( int i=0; i<nx; ++i ) {
05150 for ( int j=0; j<ny; ++j, d += dim3 ) {
05151 for ( int k=0; k<K3; ++k ) {
05152 float std = 10. * (10. * (10. * std_base + i) + j) + k;
05153 float err = scale * d[k] - std;
05154 if ( fabsf(err) > fabsf(maxerr) ) {
05155 maxerr = err;
05156 maxstd = std;
05157 mi = i; mj = j; mk = k;
05158 }
05159 }
05160 }
05161 }
05162 CkPrintf("pencil %d %d max error %f at %d %d %d (should be %f)\n",
05163 thisIndex.x, thisIndex.y, maxerr, mi, mj, mk, maxstd);
05164 #endif
05165
05166 }
|
|
|
Definition at line 3629 of file ComputePme.C. References PmeGrid::block1, PmeGrid::block2, PmeGrid::dim3, SimParameters::FFTWEstimate, fftwf_malloc, SimParameters::FFTWPatient, PmePencilInitMsgData::grid, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, NAMD_die(), PmePencil< CBase_PmeZPencil >::order_init(), PmePencilInitMsgData::pmeNodeProxy, Node::simParameters, simParams, SimParameters::useCkLoop, and PmePencilInitMsgData::zBlocks. 03629 {
03630 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
03631 Node *node = nd.ckLocalBranch();
03632 SimParameters *simParams = node->simParameters;
03633
03634 #if USE_NODE_PAR_RECEIVE
03635 ((NodePmeMgr *)CkLocalNodeBranch(initdata.pmeNodeProxy))->registerZPencil(thisIndex,this);
03636 #endif
03637
03638 int K1 = initdata.grid.K1;
03639 int K2 = initdata.grid.K2;
03640 int K3 = initdata.grid.K3;
03641 int dim3 = initdata.grid.dim3;
03642 int block1 = initdata.grid.block1;
03643 int block2 = initdata.grid.block2;
03644
03645 nx = block1;
03646 if ( (thisIndex.x + 1) * block1 > K1 ) nx = K1 - thisIndex.x * block1;
03647 ny = block2;
03648 if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
03649
03650 #ifdef NAMD_FFTW
03651 CmiLock(ComputePmeMgr::fftw_plan_lock);
03652
03653 data = (float *) fftwf_malloc( sizeof(float) *nx*ny*dim3);
03654 work = new float[dim3];
03655
03656 order_init(initdata.zBlocks);
03657
03658 #ifdef NAMD_FFTW_3
03659 /* need array of sizes for the how many */
03660
03661 int fftwFlags = simParams->FFTWPatient ? FFTW_PATIENT : simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE ;
03662 int sizeLines=nx*ny;
03663 int planLineSizes[1];
03664 planLineSizes[0]=K3;
03665 int ndim=initdata.grid.dim3; // storage space is initdata.grid.dim3
03666 int ndimHalf=ndim/2;
03667 forward_plan = fftwf_plan_many_dft_r2c(1, planLineSizes, sizeLines,
03668 (float *) data, NULL, 1,
03669 ndim,
03670 (fftwf_complex *) data, NULL, 1,
03671 ndimHalf,
03672 fftwFlags);
03673
03674 backward_plan = fftwf_plan_many_dft_c2r(1, planLineSizes, sizeLines,
03675 (fftwf_complex *) data, NULL, 1,
03676 ndimHalf,
03677 (float *) data, NULL, 1,
03678 ndim,
03679 fftwFlags);
03680 #if USE_CKLOOP
03681 if(simParams->useCkLoop) {
03682 //How many FFT plans to be created? The grain-size issue!!.
03683 //Currently, I am choosing the min(nx, ny) to be coarse-grain
03684 numPlans = (nx<=ny?nx:ny);
03685 int howmany = sizeLines/numPlans;
03686 forward_plans = new fftwf_plan[numPlans];
03687 backward_plans = new fftwf_plan[numPlans];
03688 for(int i=0; i<numPlans; i++) {
03689 int dimStride = i*ndim*howmany;
03690 int dimHalfStride = i*ndimHalf*howmany;
03691 forward_plans[i] = fftwf_plan_many_dft_r2c(1, planLineSizes, howmany,
03692 ((float *)data)+dimStride, NULL, 1,
03693 ndim,
03694 ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
03695 ndimHalf,
03696 fftwFlags);
03697
03698 backward_plans[i] = fftwf_plan_many_dft_c2r(1, planLineSizes, howmany,
03699 ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
03700 ndimHalf,
03701 ((float *)data)+dimStride, NULL, 1,
03702 ndim,
03703 fftwFlags);
03704 }
03705 }else
03706 #endif
03707 {
03708 forward_plans = NULL;
03709 backward_plans = NULL;
03710 }
03711 #else
03712 forward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_REAL_TO_COMPLEX,
03713 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
03714 | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
03715 backward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_COMPLEX_TO_REAL,
03716 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
03717 | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
03718 #endif
03719 CmiUnlock(ComputePmeMgr::fftw_plan_lock);
03720 #else
03721 NAMD_die("Sorry, FFTW must be compiled in to use PME.");
03722 #endif
03723
03724 #if USE_NODE_PAR_RECEIVE
03725 evir = 0.;
03726 memset(data, 0, sizeof(float) * nx*ny*dim3);
03727 #endif
03728 }
|
|
|
Definition at line 4030 of file ComputePme.C. References PmeGrid::dim3, PmePencilInitMsgData::grid, j, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, and SimParameters::useCkLoop. Referenced by node_process_grid(). 04030 {
04031 evir = 0.;
04032 #ifdef FFTCHECK
04033 int dim3 = initdata.grid.dim3;
04034 int K3 = initdata.grid.K3;
04035 float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
04036 float *d = data;
04037 for ( int i=0; i<nx; ++i ) {
04038 for ( int j=0; j<ny; ++j, d += dim3 ) {
04039 for ( int k=0; k<dim3; ++k ) {
04040 d[k] = 10. * (10. * (10. * std_base + i) + j) + k;
04041 }
04042 }
04043 }
04044 #endif
04045 #ifdef NAMD_FFTW
04046 #ifdef MANUAL_DEBUG_FFTW3
04047 dumpMatrixFloat3("fw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
04048 #endif
04049 #ifdef NAMD_FFTW_3
04050 #if USE_CKLOOP
04051 int useCkLoop = Node::Object()->simParameters->useCkLoop;
04052 if(useCkLoop>=CKLOOP_CTRL_PME_FORWARDFFT) {
04053 //for(int i=0; i<numPlans; i++) fftwf_execute(forward_plans[i]);
04054 //transform the above loop
04055 CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)forward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
04056 return;
04057 }
04058 #endif
04059 fftwf_execute(forward_plan);
04060 #else
04061 rfftwnd_real_to_complex(forward_plan, nx*ny,
04062 data, 1, initdata.grid.dim3, (fftw_complex *) work, 1, 0);
04063 #endif
04064 #ifdef MANUAL_DEBUG_FFTW3
04065 dumpMatrixFloat3("fw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
04066 #endif
04067
04068 #endif
04069 #ifdef ZEROCHECK
04070 int dim3 = initdata.grid.dim3;
04071 int K3 = initdata.grid.K3;
04072 float *d = data;
04073 for ( int i=0; i<nx; ++i ) {
04074 for ( int j=0; j<ny; ++j, d += dim3 ) {
04075 for ( int k=0; k<dim3; ++k ) {
04076 if ( d[k] == 0. ) CkPrintf("0 in Z at %d %d %d %d %d %d %d %d %d\n",
04077 thisIndex.x, thisIndex.y, i, j, k, nx, ny, dim3);
04078 }
04079 }
04080 }
04081 #endif
04082 }
|
|
|
Definition at line 5272 of file ComputePme.C. References forward_fft(), PmeGridMsg::hasData, recv_grid(), send_trans(), and ResizeArray< Elem >::size(). Referenced by NodePmeMgr::recvZGrid(). 05273 {
05274 #if USE_NODE_PAR_RECEIVE
05275 CmiLock(ComputePmeMgr::fftw_plan_lock);
05276 CmiMemoryReadFence();
05277 #endif
05278 recv_grid(msg);
05279 if(msg->hasData) hasData=msg->hasData;
05280 int limsg;
05281 CmiMemoryAtomicFetchAndInc(imsg,limsg);
05282 grid_msgs[limsg] = msg;
05283 // CkPrintf("[%d] PmeZPencil node_process_grid for %d %d %d has %d of %d imsg %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z, limsg, grid_msgs.size(), imsg);
05284 if(limsg+1 == grid_msgs.size())
05285 {
05286
05287 if (hasData)
05288 {
05289 forward_fft();
05290 }
05291 send_trans();
05292 imsg=0;
05293 CmiMemoryWriteFence();
05294 // CkPrintf("[%d] PmeZPencil grid node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
05295 }
05296 #if USE_NODE_PAR_RECEIVE
05297 CmiUnlock(ComputePmeMgr::fftw_plan_lock);
05298 CmiMemoryWriteFence();
05299 #endif
05300 }
|
|
|
Definition at line 5302 of file ComputePme.C. References backward_fft(), PmeGrid::dim3, PmePencilInitMsgData::grid, recv_untrans(), send_all_ungrid(), and PmePencilInitMsgData::zBlocks. Referenced by NodePmeMgr::recvZUntrans(). 05303 {
05304 #if USE_NODE_PAR_RECEIVE
05305 CmiLock(ComputePmeMgr::fftw_plan_lock);
05306 CmiMemoryReadFence();
05307 #endif
05308 recv_untrans(msg);
05309 int limsg;
05310 CmiMemoryAtomicFetchAndInc(imsgb,limsg);
05311 if(limsg+1 == initdata.zBlocks)
05312 {
05313 if(hasData) // maybe this should be an assert
05314 {
05315 backward_fft();
05316 }
05317
05318 send_all_ungrid();
05319 /* int send_evir = 1;
05320 // TODO: this part should use Chao's output parallelization
05321 for ( limsg=0; limsg < grid_msgs.size(); ++limsg ) {
05322 PmeGridMsg *omsg = grid_msgs[limsg];
05323 if ( omsg->hasData ) {
05324 if ( send_evir ) {
05325 omsg->evir[0] = evir;
05326 send_evir = 0;
05327 } else {
05328 omsg->evir[0] = 0.;
05329 }
05330 }
05331 send_ungrid(omsg);
05332 } */
05333 imsgb=0;
05334 evir = 0;
05335 memset(data, 0, sizeof(float) * nx*ny* initdata.grid.dim3);
05336 CmiMemoryWriteFence();
05337 // CkPrintf("[%d] PmeZPencil untrans node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
05338 }
05339 #if USE_NODE_PAR_RECEIVE
05340 CmiUnlock(ComputePmeMgr::fftw_plan_lock);
05341 CmiMemoryWriteFence();
05342 #endif
05343 }
|
|
|
Definition at line 3989 of file ComputePme.C. References PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmeGridMsg::hasData, j, PmeGridMsg::lattice, PmeGridMsg::qgrid, PmeGridMsg::sequence, PmeGridMsg::zlist, and PmeGridMsg::zlistlen. Referenced by node_process_grid(). 03989 {
03990
03991 int dim3 = initdata.grid.dim3;
03992 if ( imsg == 0 ) {
03993 lattice = msg->lattice;
03994 sequence = msg->sequence;
03995 #if ! USE_NODE_PAR_RECEIVE
03996 memset(data, 0, sizeof(float)*nx*ny*dim3);
03997 #endif
03998 }
03999
04000 if ( ! msg->hasData ) return;
04001
04002 int zlistlen = msg->zlistlen;
04003 int *zlist = msg->zlist;
04004 char *fmsg = msg->fgrid;
04005 float *qmsg = msg->qgrid;
04006 float *d = data;
04007 int numGrids = 1; // pencil FFT doesn't support multiple grids
04008 for ( int g=0; g<numGrids; ++g ) {
04009 for ( int i=0; i<nx; ++i ) {
04010 for ( int j=0; j<ny; ++j, d += dim3 ) {
04011 if( *(fmsg++) ) {
04012 for ( int k=0; k<zlistlen; ++k ) {
04013 d[zlist[k]] += *(qmsg++);
04014 }
04015 }
04016 }
04017 }
04018 }
04019 }
|
|
|
Definition at line 5079 of file ComputePme.C. References PmeGrid::block3, PmeGrid::dim3, PmeUntransMsg::evir, PmePencilInitMsgData::grid, PmeUntransMsg::has_evir, j, PmeUntransMsg::ny, PmeUntransMsg::qgrid, and PmeUntransMsg::sourceNode. Referenced by node_process_untrans(). 05079 {
05080 #if ! USE_NODE_PAR_RECEIVE
05081 if(imsg==0) evir=0.;
05082 #endif
05083
05084 if ( msg->has_evir ) evir += msg->evir[0];
05085 int block3 = initdata.grid.block3;
05086 int dim3 = initdata.grid.dim3;
05087 int kb = msg->sourceNode;
05088 int nz = msg->ny;
05089 const float *md = msg->qgrid;
05090 float *d = data;
05091 for ( int i=0; i<nx; ++i ) {
05092 #if CMK_BLUEGENEL
05093 CmiNetworkProgress();
05094 #endif
05095 for ( int j=0; j<ny; ++j, d += dim3 ) {
05096 for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
05097 #ifdef ZEROCHECK
05098 if ( (*md) == 0. ) CkPrintf("0 in YZ at %d %d %d %d %d %d %d %d %d\n",
05099 thisIndex.x, thisIndex.y, kb, i, j, k, nx, ny, nz);
05100 #endif
05101 d[2*k] = *(md++);
05102 d[2*k+1] = *(md++);
05103 }
05104 }
05105 }
05106 }
|
|
|
Definition at line 5176 of file ComputePme.C. References Bool, Node::Object(), PmeZPencilSendUngrid(), send_subset_ungrid(), Node::simParameters, ResizeArray< Elem >::size(), and SimParameters::useCkLoop. Referenced by node_process_untrans(). 05176 {
05177 /*
05178 //Original code: the transformation is to first extract the msg
05179 //idx that will has evir value set. -Chao Mei
05180 int send_evir = 1;
05181 for (int imsg=0; imsg < grid_msgs.size(); ++imsg ) {
05182 PmeGridMsg *msg = grid_msgs[imsg];
05183 if ( msg->hasData ) {
05184 if ( send_evir ) {
05185 msg->evir[0] = evir;
05186 send_evir = 0;
05187 } else {
05188 msg->evir[0] = 0.;
05189 }
05190 }
05191 send_ungrid(msg);
05192 }
05193 */
05194 int evirIdx = 0;
05195 for(int imsg=0; imsg<grid_msgs.size(); imsg++) {
05196 if(grid_msgs[imsg]->hasData) {
05197 evirIdx = imsg;
05198 break;
05199 }
05200 }
05201
05202 #if USE_CKLOOP
05203 Bool useCkLoop = Node::Object()->simParameters->useCkLoop;
05204 if(useCkLoop>=CKLOOP_CTRL_PME_SENDUNTRANS) {
05205 //????What's the best value for numChunks?????
05206 #if USE_NODE_PAR_RECEIVE
05207 //CkLoop_Parallelize(PmeZPencilSendUngrid, evirIdx, (void *)this, CkMyNodeSize(), 0, grid_msgs.size()-1, 1); //has to sync
05208 CkLoop_Parallelize(PmeZPencilSendUngrid, evirIdx, (void *)this, grid_msgs.size(), 0, grid_msgs.size()-1, 1); //has to sync
05209 #else
05210 //CkLoop_Parallelize(PmeZPencilSendUngrid, evirIdx, (void *)this, CkMyNodeSize(), 0, grid_msgs.size()-1, 0); //not sync
05211 CkLoop_Parallelize(PmeZPencilSendUngrid, evirIdx, (void *)this, grid_msgs.size(), 0, grid_msgs.size()-1, 0); //not sync
05212 #endif
05213 return;
05214 }
05215 #endif
05216 send_subset_ungrid(0, grid_msgs.size()-1, evirIdx);
05217 }
|
|
||||||||||||
|
Definition at line 4090 of file ComputePme.C. References PmeGrid::block3, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, j, PmeTransMsg::lattice, PmeTransMsg::nx, PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmeTransMsg::qgrid, PmeTransMsg::sequence, SET_PRIORITY, PmeTransMsg::sourceNode, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks. Referenced by PmeZPencilSendTrans(). 04090 {
04091 int zBlocks = initdata.zBlocks;
04092 int block3 = initdata.grid.block3;
04093 int dim3 = initdata.grid.dim3;
04094 for ( int isend=fromIdx; isend<=toIdx; ++isend ) {
04095 int kb = send_order[isend];
04096 int nz = block3;
04097 if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
04098 int hd = ( hasData ? 1 : 0 );
04099 PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
04100 msg->lattice = lattice;
04101 msg->sourceNode = thisIndex.y;
04102 msg->hasData = hasData;
04103 msg->nx = ny;
04104 if ( hasData ) {
04105 float *md = msg->qgrid;
04106 const float *d = data;
04107 for ( int i=0; i<nx; ++i ) {
04108 for ( int j=0; j<ny; ++j, d += dim3 ) {
04109 for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
04110 *(md++) = d[2*k];
04111 *(md++) = d[2*k+1];
04112 }
04113 }
04114 }
04115 }
04116 msg->sequence = sequence;
04117 SET_PRIORITY(msg,sequence,PME_TRANS_PRIORITY)
04118
04119 CmiEnableUrgentSend(1);
04120 #if USE_NODE_PAR_RECEIVE
04121 msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
04122 #if Y_PERSIST
04123 CmiUsePersistentHandle(&trans_handle[isend], 1);
04124 #endif
04125 initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
04126 #if Y_PERSIST
04127 CmiUsePersistentHandle(NULL, 0);
04128 #endif
04129 #else
04130 #if Y_PERSIST
04131 CmiUsePersistentHandle(&trans_handle[isend], 1);
04132 #endif
04133 initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
04134 #if Y_PERSIST
04135 CmiUsePersistentHandle(NULL, 0);
04136 #endif
04137 #endif
04138 CmiEnableUrgentSend(0);
04139 }
04140 }
|
|
||||||||||||||||
|
Definition at line 5219 of file ComputePme.C. References PmeGridMsg::evir, PmeGridMsg::hasData, and send_ungrid(). Referenced by PmeZPencilSendUngrid(), and send_all_ungrid(). 05219 {
05220 for (int imsg=fromIdx; imsg <=toIdx; ++imsg ) {
05221 PmeGridMsg *msg = grid_msgs[imsg];
05222 if ( msg->hasData) {
05223 if (imsg == specialIdx) {
05224 msg->evir[0] = evir;
05225 } else {
05226 msg->evir[0] = 0.;
05227 }
05228 }
05229 send_ungrid(msg);
05230 }
05231 }
|
|
|
Definition at line 4142 of file ComputePme.C. References PmeGrid::block3, Bool, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, j, PmeTransMsg::lattice, PmeTransMsg::nx, Node::Object(), PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmeZPencilSendTrans(), PmeTransMsg::qgrid, PmeTransMsg::sequence, SET_PRIORITY, Node::simParameters, PmeTransMsg::sourceNode, SimParameters::useCkLoop, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks. Referenced by node_process_grid(). 04142 {
04143 #if USE_PERSISTENT
04144 if (trans_handle == NULL) setup_persistent();
04145 #endif
04146 #if USE_CKLOOP
04147 Bool useCkLoop = Node::Object()->simParameters->useCkLoop;
04148 if(useCkLoop>=CKLOOP_CTRL_PME_SENDTRANS) {
04155 //send_subset_trans(0, initdata.zBlocks-1);
04156 CkLoop_Parallelize(PmeZPencilSendTrans, 1, (void *)this, CkMyNodeSize(), 0, initdata.zBlocks-1, 1); //not sync
04157 return;
04158 }
04159 #endif
04160 int zBlocks = initdata.zBlocks;
04161 int block3 = initdata.grid.block3;
04162 int dim3 = initdata.grid.dim3;
04163 for ( int isend=0; isend<zBlocks; ++isend ) {
04164 int kb = send_order[isend];
04165 int nz = block3;
04166 if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
04167 int hd = ( hasData ? 1 : 0 );
04168 PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
04169 msg->lattice = lattice;
04170 msg->sourceNode = thisIndex.y;
04171 msg->hasData = hasData;
04172 msg->nx = ny;
04173 if ( hasData ) {
04174 float *md = msg->qgrid;
04175 const float *d = data;
04176 for ( int i=0; i<nx; ++i ) {
04177 for ( int j=0; j<ny; ++j, d += dim3 ) {
04178 for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
04179 *(md++) = d[2*k];
04180 *(md++) = d[2*k+1];
04181 }
04182 }
04183 }
04184 }
04185 msg->sequence = sequence;
04186 SET_PRIORITY(msg,sequence,PME_TRANS_PRIORITY)
04187
04188 CmiEnableUrgentSend(1);
04189 #if USE_NODE_PAR_RECEIVE
04190 msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
04191 #if Y_PERSIST
04192 CmiUsePersistentHandle(&trans_handle[isend], 1);
04193 #endif
04194 initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
04195 #if Y_PERSIST
04196 CmiUsePersistentHandle(NULL, 0);
04197 #endif
04198 #else
04199 #if Y_PERSIST
04200 CmiUsePersistentHandle(&trans_handle[isend], 1);
04201 #endif
04202 initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
04203 #if Y_PERSIST
04204 CmiUsePersistentHandle(NULL, 0);
04205 #endif
04206 #endif
04207 CmiEnableUrgentSend(0);
04208 }
04209 }
|
|
|
Definition at line 5233 of file ComputePme.C. References PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmeGridMsg::hasData, j, PME_UNGRID_PRIORITY, PmePencilInitMsgData::pmeProxy, PmeGridMsg::qgrid, SET_PRIORITY, PmeGridMsg::sourceNode, PmePencilInitMsgData::yBlocks, PmeGridMsg::zlist, and PmeGridMsg::zlistlen. Referenced by send_subset_ungrid(). 05233 {
05234 int pe = msg->sourceNode;
05235 if ( ! msg->hasData ) {
05236 delete msg;
05237 PmeAckMsg *ackmsg = new (PRIORITY_SIZE) PmeAckMsg;
05238 SET_PRIORITY(ackmsg,sequence,PME_UNGRID_PRIORITY)
05239 CmiEnableUrgentSend(1);
05240 initdata.pmeProxy[pe].recvAck(ackmsg);
05241 CmiEnableUrgentSend(0);
05242 return;
05243 }
05244 msg->sourceNode = thisIndex.x * initdata.yBlocks + thisIndex.y;
05245 int dim3 = initdata.grid.dim3;
05246 int zlistlen = msg->zlistlen;
05247 int *zlist = msg->zlist;
05248 char *fmsg = msg->fgrid;
05249 float *qmsg = msg->qgrid;
05250 float *d = data;
05251 int numGrids = 1; // pencil FFT doesn't support multiple grids
05252 for ( int g=0; g<numGrids; ++g ) {
05253 #if CMK_BLUEGENEL
05254 CmiNetworkProgress();
05255 #endif
05256 for ( int i=0; i<nx; ++i ) {
05257 for ( int j=0; j<ny; ++j, d += dim3 ) {
05258 if( *(fmsg++) ) {
05259 for ( int k=0; k<zlistlen; ++k ) {
05260 *(qmsg++) = d[zlist[k]];
05261 }
05262 }
05263 }
05264 }
05265 }
05266 SET_PRIORITY(msg,sequence,PME_UNGRID_PRIORITY)
05267 CmiEnableUrgentSend(1);
05268 initdata.pmeProxy[pe].recvUngrid(msg);
05269 CmiEnableUrgentSend(0);
05270 }
|
1.3.9.1