Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

fftlib.C

Go to the documentation of this file.
00001 
00002 #include "fftlib.h"
00003 
00004 #include <assert.h>
00005 
00006 void call_ck_cb (void *arg) {
00007   CkCallbackWrapper *cw = (CkCallbackWrapper *)arg;
00008   cw->cb.send (cw->msg);
00009 }
00010 
00011 void call_ck_cb_recv_grid (void *arg) {
00012   CkCallbackWrapper *cw = (CkCallbackWrapper *)arg;
00013   OptPmeDummyMsg *msg = (OptPmeDummyMsg *)cw->msg;
00014   OptPmeZPencil *zp = (OptPmeZPencil *)cw->array;
00015   zp->many_to_manyRecvGrid(msg);
00016 }
00017 
00018 void call_ck_cb_recv_trans_y (void *arg) {
00019   CkCallbackWrapper *cw = (CkCallbackWrapper *)arg;
00020   OptPmeDummyMsg *msg = (OptPmeDummyMsg *)cw->msg;
00021   OptPmeYPencil *yp = (OptPmeYPencil *)cw->array;
00022   yp->many_to_manyRecvTrans(msg);
00023 }
00024 
00025 void call_ck_cb_recv_trans_x (void *arg) {
00026   CkCallbackWrapper *cw = (CkCallbackWrapper *)arg;
00027   OptPmeDummyMsg *msg = (OptPmeDummyMsg *)cw->msg;
00028   OptPmeXPencil *xp = (OptPmeXPencil *)cw->array;
00029   xp->many_to_manyRecvTrans(msg);
00030 }
00031 
00032 void call_ck_cb_recv_untrans_y (void *arg) {
00033   CkCallbackWrapper *cw = (CkCallbackWrapper *)arg;
00034   OptPmeDummyMsg *msg = (OptPmeDummyMsg *)cw->msg;
00035   OptPmeYPencil *yp = (OptPmeYPencil *)cw->array;
00036   yp->many_to_manyRecvUntrans(msg);
00037 }
00038 
00039 void call_ck_cb_recv_untrans_z (void *arg) {
00040   CkCallbackWrapper *cw = (CkCallbackWrapper *)arg;
00041   OptPmeDummyMsg *msg = (OptPmeDummyMsg *)cw->msg;
00042   OptPmeZPencil *zp = (OptPmeZPencil *)cw->array;
00043   zp->many_to_manyRecvUntrans(msg);
00044 }
00045 
00046 void OptPmeZPencil::fft_init() {
00047   
00048   //printf ("Initialize zpencil [%d,%d], on pd %d\n", thisIndex.x, thisIndex.y, CkMyPe());
00049 
00050   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00051   Node *node = nd.ckLocalBranch();
00052   SimParameters *simParams = node->simParameters;
00053 
00054   int K1 = initdata.grid.K1;
00055   int K2 = initdata.grid.K2;
00056   int K3 = initdata.grid.K3;
00057   int dim3 = initdata.grid.dim3;
00058   int block1 = initdata.grid.block1;
00059   int block2 = initdata.grid.block2;
00060 
00061   nx = block1;
00062   if ( (thisIndex.x + 1) * block1 > K1 ) nx = K1 - thisIndex.x * block1;
00063   ny = block2;
00064   if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
00065 
00066   data = new float[nx*ny*dim3];
00067   many_to_many_data = new float[nx*ny*dim3];
00068   many_to_many_nb = new int [initdata.zBlocks];
00069   work = new float[dim3];
00070 
00071   memset(data, 0, sizeof(float) * nx*ny*dim3);
00072   memset(many_to_many_data, 0, sizeof(float) * nx*ny*dim3);
00073 
00074   order_init(initdata.zBlocks);
00075 
00076 #ifdef NAMD_FFTW
00077   CmiLock(fftw_plan_lock);
00078 #ifdef NAMD_FFTW_3
00079   /* need array of sizes for the how many */
00080   int numLines=nx*ny;
00081   int planLineSizes[1];
00082   planLineSizes[0]=K3;
00083   CkAbort("what are we doing in here?");
00084   forward_plan = fftwf_plan_many_dft_r2c(1, planLineSizes, numLines,
00085                                      (float *) data, NULL, 1, 
00086                                          initdata.grid.dim3,
00087                                          (fftwf_complex *) data, NULL, 1, 0,
00088                                    ( simParams->FFTWEstimate ? FFTW_ESTIMATE 
00089                                      : FFTW_MEASURE ));
00090   backward_plan = fftwf_plan_many_dft_c2r(1, planLineSizes, numLines,
00091                                      (fftwf_complex *) data, NULL, 1, 
00092                                          initdata.grid.dim3/2,
00093                                      (float *) data, NULL, 1, 0,
00094                                    ( simParams->FFTWEstimate ? FFTW_ESTIMATE 
00095                                      : FFTW_MEASURE ));
00096 #else
00097 
00098   forward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_REAL_TO_COMPLEX,
00099         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00100         | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
00101   backward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_COMPLEX_TO_REAL,
00102         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00103         | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
00104 #endif
00105   CmiUnlock(fftw_plan_lock);
00106 #else
00107   NAMD_die("Sorry, FFTW must be compiled in to use PME.");
00108 #endif
00109 
00110   handle = NULL; 
00111 #if CHARM_VERSION > 60000
00112   handle = CmiDirect_manytomany_allocate_handle();
00113 #endif
00114 }
00115 
00116 void OptPmeYPencil::fft_init() {
00117   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00118   Node *node = nd.ckLocalBranch();
00119   SimParameters *simParams = node->simParameters;
00120 
00121   //  printf ("Initialize ypencil [%d,%d], on pd %d\n", thisIndex.x, thisIndex.y, CkMyPe());
00122 
00123   int K1 = initdata.grid.K1;
00124   int K2 = initdata.grid.K2;
00125   int dim2 = initdata.grid.dim2;
00126   int dim3 = initdata.grid.dim3;
00127   int block1 = initdata.grid.block1;
00128   int block3 = initdata.grid.block3;
00129 
00130   nx = block1;
00131   if ( (thisIndex.x + 1) * block1 > K1 ) nx = K1 - thisIndex.x * block1;
00132   nz = block3;
00133   if ( (thisIndex.z+1)*block3 > dim3/2 ) nz = dim3/2 - thisIndex.z*block3;
00134 
00135   data = new float[nx*dim2*nz*2];
00136   many_to_many_data = new float[nx*dim2*nz*2];
00137   many_to_many_nb = new int [initdata.yBlocks];
00138   work = new float[2*K2];
00139 
00140   memset(many_to_many_data, 0, sizeof(float) * nx*dim2*nz*2);
00141 
00142   order_init(initdata.yBlocks);
00143 
00144 #ifdef NAMD_FFTW
00145   CmiLock(fftw_plan_lock);
00146 #ifdef NAMD_FFTW_3
00147   /* need array of sizes for the dimensions */
00148   int numLines=nz;
00149   int planLineSizes[2];
00150   planLineSizes[0]=initdata.grid.K2;
00151   planLineSizes[1]=nz;
00152   forward_plan = fftwf_plan_many_dft(2, planLineSizes, numLines, 
00153                                      (fftwf_complex *) data, NULL, nz, 1,
00154                                      (fftwf_complex *) data, NULL, 1, 0,
00155                                      FFTW_FORWARD, 
00156                                      ( simParams->FFTWEstimate 
00157                                        ? FFTW_ESTIMATE : FFTW_MEASURE ));
00158   backward_plan = fftwf_plan_many_dft(2, planLineSizes, numLines, 
00159                                      (fftwf_complex *) data, NULL, nz, 1,
00160                                      (fftwf_complex *) data, NULL, 1, 0,
00161                                      FFTW_FORWARD, 
00162                                      ( simParams->FFTWEstimate 
00163                                        ? FFTW_ESTIMATE : FFTW_MEASURE ));
00164 #else
00165 
00166   forward_plan = fftw_create_plan_specific(K2, FFTW_FORWARD,
00167         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00168         | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
00169         nz, (fftw_complex *) work, 1);
00170   backward_plan = fftw_create_plan_specific(K2, FFTW_BACKWARD,
00171         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00172         | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
00173         nz, (fftw_complex *) work, 1);
00174 #endif
00175   CmiUnlock(fftw_plan_lock);
00176 #else
00177   NAMD_die("Sorry, FFTW must be compiled in to use PME.");
00178 #endif
00179 
00180   handle = NULL; 
00181 #if CHARM_VERSION > 60000
00182   handle = CmiDirect_manytomany_allocate_handle();
00183 #endif
00184   initialize_manytomany();
00185 }
00186 
00187 
00188 void OptPmeXPencil::fft_init() {
00189   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00190   Node *node = nd.ckLocalBranch();
00191   SimParameters *simParams = node->simParameters;
00192 
00193   //  printf ("Initialize xpencil [%d,%d], on pd %d\n", thisIndex.x, thisIndex.y, CkMyPe());
00194 
00195   lattice = simParams->lattice;
00196 
00197   int K1 = initdata.grid.K1;
00198   int K2 = initdata.grid.K2;
00199   int dim3 = initdata.grid.dim3;
00200   int block2 = initdata.grid.block2;
00201   int block3 = initdata.grid.block3;
00202 
00203   ny = block2;
00204   if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
00205   nz = block3;
00206   if ( (thisIndex.z+1)*block3 > dim3/2 ) nz = dim3/2 - thisIndex.z*block3;
00207 
00208   data = new float[K1*block2*block3*2];
00209   many_to_many_data = new float[K1*block2*block3*2];
00210   many_to_many_nb = new int [initdata.xBlocks];
00211   work = new float[2*K1];
00212 
00213   memset(many_to_many_data, 0, sizeof(float) * K1*block2*block3*2);
00214 
00215   order_init(initdata.xBlocks);
00216 
00217 #ifdef NAMD_FFTW
00218   CmiLock(fftw_plan_lock);
00219 #ifdef NAMD_FFTW_3
00220   /* need array of sizes for the how many */
00221   int numLines=ny*nz;
00222   int planLineSizes[1];
00223   planLineSizes[0]=K1;
00224   forward_plan = fftwf_plan_many_dft(1, planLineSizes, numLines,
00225                                      (fftwf_complex *) data, NULL, K1, 1,
00226                                      (fftwf_complex *) data, NULL, 1, 0,
00227                                    FFTW_FORWARD,
00228                                    ( simParams->FFTWEstimate ? FFTW_ESTIMATE 
00229                                      : FFTW_MEASURE ));
00230   backward_plan = fftwf_plan_many_dft(1, planLineSizes, numLines,
00231                                      (fftwf_complex *) data, NULL, K1, 1,
00232                                      (fftwf_complex *) data, NULL, 1, 0,
00233                                    FFTW_BACKWARD,
00234                                    ( simParams->FFTWEstimate ? FFTW_ESTIMATE 
00235                                      : FFTW_MEASURE ));
00236 #else
00237 
00238   forward_plan = fftw_create_plan_specific(K1, FFTW_FORWARD,
00239         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00240         | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
00241         ny*nz, (fftw_complex *) work, 1);
00242   backward_plan = fftw_create_plan_specific(K1, FFTW_BACKWARD,
00243         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00244         | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
00245         ny*nz, (fftw_complex *) work, 1);
00246 
00247   CmiUnlock(fftw_plan_lock);
00248 #endif
00249 #else
00250   NAMD_die("Sorry, FFTW must be compiled in to use PME.");
00251 #endif
00252 
00253   myKSpace = new PmeKSpace(initdata.grid,
00254                 thisIndex.y*block2, thisIndex.y*block2 + ny,
00255                 thisIndex.z*block3, thisIndex.z*block3 + nz);
00256 
00257   handle = NULL; 
00258 #if CHARM_VERSION > 60000
00259   handle = CmiDirect_manytomany_allocate_handle();
00260 #endif
00261   initialize_manytomany();
00262 
00263   constant_pressure = initdata.constant_pressure;
00264 
00265   reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
00266   if (simParams->accelMDOn) {
00267     amd_reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_AMD);
00268   } else {
00269     amd_reduction = NULL;
00270   }  
00271 }
00272 
00273 //#define FFTCHECK   // run a grid of integers through the fft
00274 // #define ZEROCHECK  // check for suspicious zeros in fft
00275 
00276 void OptPmeZPencil::recv_grid(const OptPmeGridMsg *msg) {
00277 
00278   int dim3 = initdata.grid.dim3;
00279   if ( imsg == 0 ) {
00280     memset(data, 0, sizeof(float) * nx*ny*dim3);
00281   }
00282 
00283   int xstart = msg->xstart - thisIndex.x * initdata.grid.block1;
00284   int ystart = msg->ystart - thisIndex.y * initdata.grid.block2;
00285   assert (xstart >= 0);
00286   assert (ystart >= 0);
00287   int xlen   = msg->xlen;
00288   int ylen   = msg->ylen;    
00289   assert (xstart + xlen <= nx);
00290   assert (ystart + ylen <= ny);
00291   
00292   float *qmsg = msg->qgrid;
00293   float *d = data;
00294   int zlen = msg->zlen;
00295   int zstart = msg->zstart;
00296   int zmax = zstart + zlen - 1;
00297   int k = 0;
00298 
00299   int K3 = initdata.grid.K3; 
00300   int K3_1 = initdata.grid.K3 - 1; 
00301   while (zstart < 0) {
00302     zstart += K3;
00303     zmax += K3;
00304   }
00305 
00306   for ( int i=xstart; i<xstart+xlen; ++i ) {
00307     for ( int j=ystart; j<ystart+ylen; ++j) {
00308       float *d = data + (i * ny + j) * dim3; 
00309       for ( k=zstart; k<=zmax; ++k ) {
00310         int kz = k;
00311         //if (kz >= K3) kz -= K3;
00312         kz = kz - ((unsigned)(K3_1 - kz)>>31)*K3;
00313         //assert (kz >= 0);
00314         //assert (kz <  K3);
00315         d[kz] += *(qmsg++);
00316       }
00317     }
00318   }
00319 
00320   if (_iter == MANY_TO_MANY_SETUP) {
00321     if (imsg == 0)
00322       m2m_recv_grid = new PatchGridElem [grid_msgs.size()];    
00323     
00324     m2m_recv_grid[imsg].xstart = xstart;
00325     m2m_recv_grid[imsg].xlen   = xlen;
00326     m2m_recv_grid[imsg].ystart = ystart;
00327     m2m_recv_grid[imsg].ylen   = ylen;
00328     m2m_recv_grid[imsg].zstart = zstart;
00329     m2m_recv_grid[imsg].zlen   = zlen;
00330     m2m_recv_grid[imsg].patchID = msg->patchID;
00331     
00332     if ( imsg == grid_msgs.size() - 1) 
00333         initialize_manytomany ();
00334   }
00335 }
00336 
00337 
00338 void OptPmeZPencil::forward_fft() {
00339 #ifdef FFTCHECK
00340   int dim3 = initdata.grid.dim3;
00341   int K3 = initdata.grid.K3;
00342   float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
00343   float *d = data;
00344   for ( int i=0; i<nx; ++i ) {
00345    for ( int j=0; j<ny; ++j, d += dim3 ) {
00346     for ( int k=0; k<dim3; ++k ) {
00347       d[k] = 10. * (10. * (10. * std_base + i) + j) + k;
00348     }
00349    }
00350   }
00351 #endif
00352 #ifdef NAMD_FFTW
00353 #ifdef NAMD_FFTW_3
00354   fftwf_execute(forward_plan);
00355 #else
00356   rfftwnd_real_to_complex(forward_plan, nx*ny,
00357                           data, 1, initdata.grid.dim3, (fftw_complex *) work, 1, 0);
00358 #endif
00359 #endif
00360 }
00361 
00362 void OptPmeZPencil::send_trans() {
00363   int zBlocks = initdata.zBlocks;
00364   int block3 = initdata.grid.block3;
00365   int dim3 = initdata.grid.dim3;
00366 
00367   //int offset = 0;
00368   for ( int isend=0; isend<zBlocks; ++isend ) {
00369     int kb = send_order[isend];
00370     int nz = block3;
00371     if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
00372     //assert (nz > 0);
00373     OptPmeFFTMsg *msg = new (nx*ny*nz*2,PRIORITY_SIZE) OptPmeFFTMsg;
00374     msg->sourceNode = thisIndex.y;
00375     msg->nx = ny;
00376     float *md = msg->qgrid;
00377     const float *d = data;
00378     for ( int i=0; i<nx; ++i ) {
00379       for ( int j=0; j<ny; ++j, d += dim3 ) {
00380         for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
00381           *(md++) = d[2*k];
00382           *(md++) = d[2*k+1];
00383         }
00384       }
00385     }          
00386     //    printf ("%d, %d: Zpencil Sending trans to %d, %d\n", thisIndex.x, thisIndex.y, thisIndex.x, kb);
00387     initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
00388   }
00389 }
00390 
00391 
00392 void OptPmeYPencil::recv_trans(const OptPmeFFTMsg *msg) {
00393 
00394   int block2 = initdata.grid.block2;
00395   int K2 = initdata.grid.K2;
00396   int jb = msg->sourceNode;
00397   int ny = msg->nx;
00398   const float *md = msg->qgrid;
00399   float *d = data;
00400   for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
00401     for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
00402       for ( int k=0; k<nz; ++k ) {
00403         d[2*(j*nz+k)] = *(md++);
00404         d[2*(j*nz+k)+1] = *(md++);
00405       }
00406     }
00407   }
00408 } 
00409 
00410 void OptPmeYPencil::forward_fft() {
00411 #ifdef NAMD_FFTW
00412 #ifdef NAMD_FFTW_3
00413   fftwf_execute(forward_plan);
00414 #else
00415   for ( int i=0; i<nx; ++i ) {
00416     fftw(forward_plan, nz,
00417          ((fftw_complex *) data) + i * nz * initdata.grid.K2,
00418          nz, 1, (fftw_complex *) work, 1, 0);
00419   }
00420 #endif
00421 #endif
00422 }
00423 
00424 void OptPmeYPencil::send_trans() {
00425   int yBlocks = initdata.yBlocks;
00426   int block2 = initdata.grid.block2;
00427   int K2 = initdata.grid.K2;
00428   
00429   for ( int isend=0; isend<yBlocks; ++isend ) {
00430     int jb = send_order[isend];
00431     int ny = block2;
00432     if ( (jb+1)*block2 > K2 ) ny = K2 - jb*block2;
00433     OptPmeFFTMsg *msg = new (nx*ny*nz*2,PRIORITY_SIZE) OptPmeFFTMsg;
00434     msg->sourceNode = thisIndex.x;
00435     msg->nx = nx;
00436     float *md = msg->qgrid;
00437     const float *d = data;
00438     for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
00439       for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
00440         for ( int k=0; k<nz; ++k ) {
00441           *(md++) = d[2*(j*nz+k)];
00442           *(md++) = d[2*(j*nz+k)+1];
00443         }
00444       }
00445     }
00446     if ( md != msg->qgrid + nx*ny*nz*2 ) CkPrintf("error in YX at %d %d %d\n",
00447                                                   thisIndex.x, jb, thisIndex.z);
00448     
00449     //printf ("%d, %d: Ypencil Sending trans to %d, %d\n", thisIndex.z, thisIndex.x, jb, thisIndex.z);
00450     initdata.xPencil(0,jb,thisIndex.z).recvTrans(msg);
00451   }
00452 }
00453 
00454 
00455 
00456 void OptPmeXPencil::recv_trans(const OptPmeFFTMsg *msg) {
00457 
00458   int block1 = initdata.grid.block1;
00459   int K1 = initdata.grid.K1;
00460   int ib = msg->sourceNode;
00461   int nx = msg->nx;
00462   const float *md = msg->qgrid;
00463   for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
00464     float *d = data + i*ny*nz*2;
00465     for ( int j=0; j<ny; ++j, d += nz*2 ) {
00466       for ( int k=0; k<nz; ++k ) {
00467         d[2*k] = *(md++);
00468         d[2*k+1] = *(md++);
00469       }
00470     }
00471   }
00472 }
00473 
00474 
00475 
00476 void OptPmeXPencil::forward_fft() {
00477 #ifdef NAMD_FFTW
00478 #ifdef NAMD_FFTW_3
00479   fftwf_execute(forward_plan);
00480 #else
00481 
00482   fftw(forward_plan, ny*nz,
00483        ((fftw_complex *) data), ny*nz, 1, (fftw_complex *) work, 1, 0);
00484 #endif
00485 #endif
00486 }
00487 
00488 void OptPmeXPencil::pme_kspace() {
00489   evir = 0.;  //set evir to 0
00490 
00491 #ifdef FFTCHECK
00492   return;
00493 #endif
00494 
00495   BigReal ewaldcof = ComputeNonbondedUtil::ewaldcof;
00496   evir[0] = myKSpace->compute_energy(data,
00497                                      lattice, ewaldcof, &(evir[1]));
00498 
00499   //contribute (7*sizeof(double), evir.begin(), CkReduction::sum_double, initdata.cb_energy);
00500 }
00501 
00502 void OptPmeXPencil::submit_evir() {
00503   double * cdata = (double *) evir.begin();
00504   reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += cdata[0];
00505   reduction->item(REDUCTION_VIRIAL_SLOW_XX) += cdata[1];
00506   reduction->item(REDUCTION_VIRIAL_SLOW_XY) += cdata[2];
00507   reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += cdata[3];
00508   reduction->item(REDUCTION_VIRIAL_SLOW_YX) += cdata[2];
00509   reduction->item(REDUCTION_VIRIAL_SLOW_YY) += cdata[4];
00510   reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += cdata[5];
00511   reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += cdata[3];
00512   reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += cdata[5];
00513   reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += cdata[6];   
00514 
00515   if (amd_reduction) {
00516     amd_reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += cdata[0];
00517     amd_reduction->item(REDUCTION_VIRIAL_SLOW_XX) += cdata[1];
00518     amd_reduction->item(REDUCTION_VIRIAL_SLOW_XY) += cdata[2];
00519     amd_reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += cdata[3];
00520     amd_reduction->item(REDUCTION_VIRIAL_SLOW_YX) += cdata[2];
00521     amd_reduction->item(REDUCTION_VIRIAL_SLOW_YY) += cdata[4];
00522     amd_reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += cdata[5];
00523     amd_reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += cdata[3];
00524     amd_reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += cdata[5];
00525     amd_reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += cdata[6];
00526   }
00527 
00528   SimParameters *simParams = Node::Object()->simParameters;
00529   int fef = simParams->fullElectFrequency;
00530   for (int i = 0; i < fef; i++) {
00531     reduction->submit();
00532     if (amd_reduction) amd_reduction->submit();
00533   }
00534 }
00535 
00536 void OptPmeXPencil::backward_fft() {
00537 #ifdef NAMD_FFTW
00538 #ifdef NAMD_FFTW_3
00539   fftwf_execute(backward_plan);
00540 #else
00541 
00542   fftw(backward_plan, ny*nz,
00543        ((fftw_complex *) data), ny*nz, 1, (fftw_complex *) work, 1, 0);
00544 #endif
00545 #endif
00546 }
00547 
00548 void OptPmeXPencil::send_untrans() {
00549   int xBlocks = initdata.xBlocks;
00550   int block1 = initdata.grid.block1;
00551   int K1 = initdata.grid.K1;
00552 
00553   for ( int isend=0; isend<xBlocks; ++isend ) {
00554     int ib = send_order[isend];
00555     int nx = block1;
00556     if ( (ib+1)*block1 > K1 ) nx = K1 - ib*block1;
00557     OptPmeFFTMsg *msg = new (nx*ny*nz*2,PRIORITY_SIZE) OptPmeFFTMsg;    
00558     msg->sourceNode = thisIndex.y;
00559     msg->nx = ny;
00560     float *md = msg->qgrid;
00561     for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
00562       float *d = data + i*ny*nz*2;
00563       for ( int j=0; j<ny; ++j, d += nz*2 ) {
00564         for ( int k=0; k<nz; ++k ) {
00565           *(md++) = d[2*k];
00566           *(md++) = d[2*k+1];
00567         }
00568       }
00569     }
00570     
00571     //printf ("%d, %d: Xpencil Sending untrans to %d, %d\n", thisIndex.y, thisIndex.z, ib, thisIndex.z);
00572     initdata.yPencil(ib,0,thisIndex.z).recvUntrans(msg);
00573   }
00574 }
00575 
00576 
00577 void OptPmeYPencil::recv_untrans(const OptPmeFFTMsg *msg) {
00578 
00579   int block2 = initdata.grid.block2;
00580   int K2 = initdata.grid.K2;
00581   int jb = msg->sourceNode;
00582   int ny = msg->nx;
00583   const float *md = msg->qgrid;
00584   float *d = data;
00585   for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
00586     for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
00587       for ( int k=0; k<nz; ++k ) {
00588         d[2*(j*nz+k)] = *(md++);
00589         d[2*(j*nz+k)+1] = *(md++);
00590       }
00591     }
00592   }
00593 } 
00594 
00595 
00596 
00597 void OptPmeYPencil::backward_fft() {
00598 #ifdef NAMD_FFTW
00599 #ifdef NAMD_FFTW_3
00600   fftwf_execute(backward_plan);
00601 #else
00602   for ( int i=0; i<nx; ++i ) {
00603 #if CMK_BLUEGENEL
00604     CmiNetworkProgress();
00605 #endif
00606 
00607     fftw(backward_plan, nz,
00608          ((fftw_complex *) data) + i * nz * initdata.grid.K2,
00609          nz, 1, (fftw_complex *) work, 1, 0);
00610   }
00611 #endif
00612 #endif
00613 }
00614 
00615 void OptPmeYPencil::send_untrans() {
00616   //printf ("%d, %d: In ypencil send_untrans called once \n", thisIndex.x, thisIndex.z);
00617 
00618   int yBlocks = initdata.yBlocks;
00619   int block2 = initdata.grid.block2;
00620   int K2 = initdata.grid.K2;
00621 
00622   for ( int isend=0; isend<yBlocks; ++isend ) {
00623     int jb = send_order[isend];
00624     //if ( ! needs_reply[jb] ) continue;
00625     int ny = block2;
00626     if ( (jb+1)*block2 > K2 ) ny = K2 - jb*block2;
00627     OptPmeFFTMsg *msg = new (nx*ny*nz*2,PRIORITY_SIZE) OptPmeFFTMsg;
00628     msg->sourceNode = thisIndex.z;
00629     msg->nx = nz;
00630     float *md = msg->qgrid;
00631     const float *d = data;
00632     for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
00633      for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
00634       for ( int k=0; k<nz; ++k ) {
00635         *(md++) = d[2*(j*nz+k)];
00636         *(md++) = d[2*(j*nz+k)+1];
00637       }
00638      }
00639     }
00640 
00641     //printf ("%d, %d: Sending untrans to %d, %d\n", thisIndex.z, thisIndex.x, thisIndex.x, jb);
00642     initdata.zPencil(thisIndex.x,jb,0).recvUntrans(msg);
00643   }
00644 }
00645 
00646 void OptPmeZPencil::recv_untrans(const OptPmeFFTMsg *msg) {
00647 
00648   //printf ("%d, %d In recv untrans\n", thisIndex.x, thisIndex.y);
00649 
00650   int block3 = initdata.grid.block3;
00651   int dim3 = initdata.grid.dim3;
00652   int kb = msg->sourceNode;
00653   int nz = msg->nx;
00654   const float *md = msg->qgrid;
00655   float *d = data;
00656   for ( int i=0; i<nx; ++i ) {
00657     for ( int j=0; j<ny; ++j, d += dim3 ) {
00658       for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
00659         d[2*k] = *(md++);
00660         d[2*k+1] = *(md++);
00661       }
00662     }
00663   }
00664 }
00665 
00666 
00667 void OptPmeZPencil::backward_fft() {
00668 #ifdef NAMD_FFTW
00669 #ifdef NAMD_FFTW_3
00670   fftwf_execute(backward_plan);
00671 #else
00672   rfftwnd_complex_to_real(backward_plan, nx*ny,
00673                           (fftw_complex *) data, 1, initdata.grid.dim3/2, work, 1, 0);
00674 #endif
00675 #endif
00676   
00677 #if CMK_BLUEGENEL
00678   CmiNetworkProgress();
00679 #endif
00680 
00681 #ifdef FFTCHECK
00682   int dim3 = initdata.grid.dim3;
00683   int K1 = initdata.grid.K1;
00684   int K2 = initdata.grid.K2;
00685   int K3 = initdata.grid.K3;
00686   float scale = 1. / (1. * K1 * K2 * K3);
00687   float maxerr = 0.;
00688   float maxstd = 0.;
00689   int mi, mj, mk;  mi = mj = mk = -1;
00690   float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
00691   const float *d = data;
00692   for ( int i=0; i<nx; ++i ) {
00693    for ( int j=0; j<ny; ++j, d += dim3 ) {
00694     for ( int k=0; k<K3; ++k ) {
00695       float std = 10. * (10. * (10. * std_base + i) + j) + k;
00696       float err = scale * d[k] - std;
00697       if ( fabsf(err) > fabsf(maxerr) ) {
00698         maxerr = err;
00699         maxstd = std;
00700         mi = i;  mj = j;  mk = k;
00701       }
00702     }
00703    }
00704   }
00705   CkPrintf("pencil %d %d max error %f at %d %d %d (should be %f)\n",
00706            thisIndex.x, thisIndex.y, maxerr, mi, mj, mk, maxstd);
00707 #endif
00708 }
00709 
00710 void OptPmeZPencil::send_ungrid(OptPmeGridMsg *msg) {
00711   int pe = msg->sourceNode;
00712   msg->sourceNode = thisIndex.x * initdata.yBlocks + thisIndex.y;
00713 
00714   int dim3 = initdata.grid.dim3;
00715   int xstart = msg->xstart - thisIndex.x * initdata.grid.block1;
00716   int ystart = msg->ystart - thisIndex.y * initdata.grid.block2;
00717   assert (xstart >= 0);
00718   assert (ystart >= 0);
00719   int xlen   = msg->xlen;
00720   int ylen   = msg->ylen;
00721   assert (xstart + xlen <= nx);
00722   assert (ystart + ylen <= ny);
00723 
00724   float *qmsg = msg->qgrid;
00725   float *d = data;
00726   int zstart = msg->zstart;
00727   int zlen = msg->zlen;
00728   int zmax = zstart + zlen - 1;
00729   
00730   int K3_1 = initdata.grid.K3 - 1; 
00731   int K3 = initdata.grid.K3;
00732   if (zstart < 0) {
00733     zstart += K3;
00734     zmax += K3;
00735   }
00736   
00737   int k = 0;
00738   for ( int i=xstart; i<xstart+xlen; ++i ) {
00739     for ( int j=ystart; j<ystart+ylen; ++j) {
00740       float *d = data + (i * ny + j) * dim3;
00741       for ( k=zstart; k<=zmax; ++k ) {
00742         int kz = k;
00743         //if (kz >= K3) kz -= K3;
00744         kz = kz - ((unsigned)(K3_1 - kz)>>31)*K3;
00745         *(qmsg++) = d[kz];
00746       }
00747     }
00748   }
00749   
00750   initdata.pmeProxy[pe].recvUngrid(msg);
00751 }
00752 
00753 
00757 
00758 void OptPmeZPencil::many_to_many_recv_grid () {
00759   int dim3 = initdata.grid.dim3;  
00760   memset(data, 0, sizeof(float) * nx*ny*dim3);
00761   int K3 = initdata.grid.K3;
00762   int K3_1 = initdata.grid.K3 - 1;
00763   int k = 0;
00764   for (int idx = 0; idx < grid_msgs.size(); idx++) {
00765     int xstart = m2m_recv_grid[idx].xstart; 
00766     int xlen   = m2m_recv_grid[idx].xlen; 
00767     int ystart = m2m_recv_grid[idx].ystart; 
00768     int ylen   = m2m_recv_grid[idx].ylen; 
00769     int zstart = m2m_recv_grid[idx].zstart; 
00770     int zlen   = m2m_recv_grid[idx].zlen;   
00771     
00772     float *qmsg = m2m_recv_grid[idx].data;
00773     for ( int i=xstart; i<xstart+xlen; ++i ) {
00774       for ( int j=ystart; j<ystart+ylen; ++j) {
00775         float *d = data + (i * ny + j) * dim3;  
00776 
00777 #pragma disjoint (*qmsg, *d)
00778 #pragma unroll (4)
00779         for ( k=zstart; k<zstart+zlen; ++k ) {
00780           int kz = k;
00781           //if (kz >= K3) kz -= K3;
00782           kz = kz - ((unsigned)(K3_1 - kz)>>31)*K3;
00783           //assert (kz >= 0);
00784           //assert (kz <  K3);
00785           d[kz] += *(qmsg++);
00786         }
00787       }
00788     } 
00789   }
00790 }
00791 
00792 void OptPmeZPencil::many_to_many_send_trans() {
00793   int zBlocks = initdata.zBlocks;
00794   int block3 = initdata.grid.block3;
00795   int dim3 = initdata.grid.dim3;
00796 
00797   float *md = many_to_many_data;
00798   if (single_pencil) {
00799     const float *d = data;
00800     for ( int kb=0; kb<zBlocks; ++kb ) {
00801       *(md++) = d[2*kb];
00802       *(md++) = d[2*kb+1];
00803     }
00804   }
00805   else {
00806     int nz = block3;
00807     for ( int kb=0; kb<zBlocks; ++kb ) {
00808       nz = block3;
00809       if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
00810       
00811       const float *d = data;
00812       for ( int i=0; i<nx; ++i ) {
00813         for ( int j=0; j<ny; ++j, d += dim3 ) {
00814           for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
00815             *(md++) = d[2*k];
00816             *(md++) = d[2*k+1];
00817           }
00818         }
00819       }
00820     }
00821   }
00822 
00823 #if CHARM_VERSION > 60000
00824   CmiDirect_manytomany_start (handle, PHASE_YF);
00825 #endif
00826 }
00827 
00828 void OptPmeYPencil::many_to_many_recv_trans () {  
00829   int block2 = initdata.grid.block2;
00830   int K2 = initdata.grid.K2;
00831   
00832   const float *md = many_to_many_data;
00833   if (single_pencil) {
00834     float *d = data;
00835     for (int jb = 0; jb < initdata.yBlocks; jb++ ) {
00836       d[2*jb]    = *(md++);
00837       d[2*jb +1] = *(md++);
00838     }
00839   }
00840   else {
00841     for (int jb = 0; jb < initdata.yBlocks; jb++ ) {
00842       int ny = many_to_many_nb[jb];  
00843       float *d = data;
00844       for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
00845         for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
00846           for ( int k=0; k<nz; ++k ) {
00847             d[2*(j*nz+k)] = *(md++);
00848             d[2*(j*nz+k)+1] = *(md++);
00849           }
00850         }
00851       }
00852     }
00853   }
00854 }
00855 
00856 void OptPmeYPencil::many_to_many_send(int phase) {
00857   int yBlocks = initdata.yBlocks;
00858   int block2 = initdata.grid.block2;
00859   int K2 = initdata.grid.K2;
00860   
00861   float *md = many_to_many_data;
00862   int ny = block2;
00863   if (single_pencil) {
00864     const float *d = data;
00865     for ( int jb=0; jb<yBlocks; ++jb ) {
00866       *(md++) = d[2*jb];
00867       *(md++) = d[2*jb+1];
00868     }
00869   }
00870   else {
00871     for ( int jb=0; jb<yBlocks; ++jb ) {
00872       ny = block2;
00873       if ( (jb+1)*block2 > K2 ) ny = K2 - jb*block2;
00874       
00875       const float *d = data;
00876       for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
00877         for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
00878           for ( int k=0; k<nz; ++k ) {
00879             *(md++) = d[2*(j*nz+k)];
00880             *(md++) = d[2*(j*nz+k)+1];
00881           }
00882         }
00883       }
00884     }
00885   }
00886 
00887 #if CHARM_VERSION > 60000
00888   CmiDirect_manytomany_start (handle, phase);  
00889 #endif
00890 }
00891 
00892 void OptPmeXPencil::many_to_many_recv_trans () {
00893   
00894   int block1 = initdata.grid.block1;
00895   int K1 = initdata.grid.K1;
00896   
00897   const float *md = many_to_many_data;
00898   if (single_pencil) {
00899     for (int ib =0; ib < initdata.xBlocks; ib++ ) {
00900       data[2*ib]   = *(md++);
00901       data[2*ib+1] = *(md++);
00902     }
00903   }
00904   else {
00905     for (int ib =0; ib < initdata.xBlocks; ib++ ) {
00906       int nx = many_to_many_nb[ib];    
00907       for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
00908         float *d = data + i*ny*nz*2;
00909         for ( int j=0; j<ny; ++j, d += nz*2 ) {
00910           for ( int k=0; k<nz; ++k ) {
00911             d[2*k] = *(md++);
00912             d[2*k+1] = *(md++);
00913           }
00914         }
00915       }
00916     }
00917   }
00918 }
00919 
00920 void OptPmeXPencil::many_to_many_send_untrans() {
00921   int xBlocks = initdata.xBlocks;
00922   int block1 = initdata.grid.block1;
00923   int K1 = initdata.grid.K1;
00924   
00925   int     nx = block1;
00926   float * md = many_to_many_data;
00927   if (single_pencil) {
00928     float *d = data;
00929     for ( int ib=0; ib<xBlocks; ++ib ) {
00930       *(md++) = d[2*ib];
00931       *(md++) = d[2*ib+1];
00932     }
00933   }
00934   else {
00935     for ( int ib=0; ib<xBlocks; ++ib ) {
00936       nx = block1;
00937       if ( (ib+1)*block1 > K1 ) nx = K1 - ib*block1;
00938       
00939       for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
00940         float *d = data + i*ny*nz*2;
00941         for ( int j=0; j<ny; ++j, d += nz*2 ) {
00942           for ( int k=0; k<nz; ++k ) {
00943             *(md++) = d[2*k];
00944             *(md++) = d[2*k+1];
00945           }
00946         }
00947       }
00948     }
00949   }
00950 #if CHARM_VERSION > 60000
00951   CmiDirect_manytomany_start (handle, PHASE_YB);
00952 #endif
00953 }
00954 
00955 void OptPmeYPencil::many_to_many_recv_untrans () {  
00956   int block2 = initdata.grid.block2;
00957   int K2 = initdata.grid.K2;
00958   
00959   const float *md = many_to_many_data;
00960   if (single_pencil) {
00961     float *d = data;
00962     for (int jb = 0; jb < initdata.yBlocks; jb++ ) {        
00963       d[2*jb]   = *(md++);
00964       d[2*jb+1] = *(md++);
00965     }
00966   }
00967   else {
00968     for (int jb = 0; jb < initdata.yBlocks; jb++ ) {
00969       int ny = many_to_many_nb[jb];  
00970       float *d = data;
00971       for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
00972         for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
00973           for ( int k=0; k<nz; ++k ) {
00974             d[2*(j*nz+k)] = *(md++);
00975             d[2*(j*nz+k)+1] = *(md++);
00976           }
00977         }
00978       }
00979     }
00980   }
00981 }
00982 
00983 void OptPmeZPencil::many_to_many_recv_untrans() {  
00984   //printf ("%d, %d In recv untrans\n", thisIndex.x, thisIndex.y);                                                                             
00985   int block3 = initdata.grid.block3;
00986   int dim3 = initdata.grid.dim3;
00987   
00988   const float *md = many_to_many_data;
00989   if (single_pencil) {
00990     float *d = data;
00991     for (int kb = 0; kb < initdata.zBlocks; kb++ ) {
00992       d[2*kb]   = *(md++);
00993       d[2*kb+1] = *(md++);
00994     }
00995   }
00996   else {
00997     for (int kb = 0; kb < initdata.zBlocks; kb++ ) {
00998       int nz = many_to_many_nb[kb];
00999       float *d = data;
01000       for ( int i=0; i<nx; ++i ) {
01001         for ( int j=0; j<ny; ++j, d += dim3 ) {
01002           for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
01003             d[2*k] = *(md++);
01004             d[2*k+1] = *(md++);
01005           }
01006         }
01007       }
01008     }
01009   }
01010 }
01011 
01012 void OptPmeZPencil::many_to_many_send_ungrid () {
01013   int dim3 = initdata.grid.dim3;
01014   int K3 = initdata.grid.K3;
01015   int K3_1 = initdata.grid.K3 - 1;
01016   int k = 0;
01017   for (int idx = 0; idx < grid_msgs.size(); idx++) {
01018     int xstart = m2m_recv_grid[idx].xstart; 
01019     int xlen   = m2m_recv_grid[idx].xlen; 
01020     int ystart = m2m_recv_grid[idx].ystart; 
01021     int ylen   = m2m_recv_grid[idx].ylen; 
01022     int zstart = m2m_recv_grid[idx].zstart; 
01023     int zlen   = m2m_recv_grid[idx].zlen;   
01024     
01025     float *qmsg = m2m_recv_grid[idx].data;
01026     for ( int i=xstart; i<xstart+xlen; ++i ) {
01027       for ( int j=ystart; j<ystart+ylen; ++j) {
01028         float *d = data + (i * ny + j) * dim3;  
01029 
01030 #pragma disjoint (*d, *qmsg)
01031 #pragma unroll (4)
01032         for ( k=zstart; k<zstart+zlen; ++k ) {
01033           int kz = k;
01034           //if (kz >= K3) kz -= K3;
01035           kz = kz - ((unsigned)(K3_1 - kz)>>31)*K3;
01036           //assert (kz >= 0);
01037           //assert (kz <  K3);
01038           *(qmsg++) = d[kz];
01039         }
01040       }
01041     } 
01042   }
01043   
01044 #if CHARM_VERSION > 60000
01045   CmiDirect_manytomany_start (handle, PHASE_UG);
01046 #endif
01047 }
01048 
01049 
01050 
01051 void  OptPmeZPencil::initialize_manytomany () {  
01052 #if CHARM_VERSION > 60000
01053   int idx = 0;
01054   int totalcount = 0;
01055   for (idx = 0; idx < grid_msgs.size(); idx ++) 
01056     totalcount += m2m_recv_grid[idx].xlen * m2m_recv_grid[idx].ylen * m2m_recv_grid[idx].zlen;
01057   many_to_many_gr_data = new float [totalcount];
01058   
01059   CkArrayIndex3D aidx (thisIndex.x, thisIndex.y, thisIndex.z);
01060   cbw_recvgrid.cb = CkCallback (CkIndex_OptPmeZPencil::many_to_manyRecvGrid(NULL), aidx, thisProxy.ckGetArrayID());
01061   cbw_recvgrid.msg = new (PRIORITY_SIZE) OptPmeDummyMsg;
01062   cbw_recvgrid.array = this;
01063   PatchMap *patchMap = PatchMap::Object();
01064   CmiDirect_manytomany_initialize_recvbase (handle, PHASE_GR, 
01065                                             call_ck_cb_recv_grid, 
01066                                             &cbw_recvgrid, 
01067                                             (char *)many_to_many_gr_data, 
01068                                             patchMap->numPatches(), -1);
01069   
01070   CmiDirect_manytomany_initialize_sendbase (handle, PHASE_UG, NULL, NULL, (char *)many_to_many_gr_data, 
01071                                             grid_msgs.size(), thisIndex.x *initdata.yBlocks + thisIndex.y);
01072   
01073   int offset = 0;
01074   for (idx = 0; idx < grid_msgs.size(); idx ++) {
01075     m2m_recv_grid[idx].data = (float *) ((char *)many_to_many_gr_data + offset);
01076     int fcount = m2m_recv_grid[idx].xlen * m2m_recv_grid[idx].ylen * m2m_recv_grid[idx].zlen;
01077     CmiDirect_manytomany_initialize_recv (handle, PHASE_GR, m2m_recv_grid[idx].patchID, 
01078                                           offset, fcount *sizeof(float), patchMap->node(m2m_recv_grid[idx].patchID));
01079     CmiDirect_manytomany_initialize_send (handle, PHASE_UG, idx, offset, fcount *sizeof(float), 
01080                                           patchMap->node(m2m_recv_grid[idx].patchID));
01081     offset += fcount * sizeof(float);
01082   }        
01083 
01084   int zBlocks = initdata.zBlocks;
01085   int block3 = initdata.grid.block3;
01086   int dim3 = initdata.grid.dim3;
01087 
01088   //Initialize send trans
01089   CmiDirect_manytomany_initialize_sendbase (handle, PHASE_YF, NULL, NULL, (char *)many_to_many_data, zBlocks, thisIndex.y);
01090 
01091   //Initialize recv untrans
01092   cbw_recvuntrans.cb = CkCallback(CkIndex_OptPmeZPencil::many_to_manyRecvUntrans(NULL), aidx, thisProxy.ckGetArrayID());
01093   cbw_recvuntrans.msg    = new (PRIORITY_SIZE) OptPmeDummyMsg;
01094   cbw_recvuntrans.array  = this;
01095   CmiDirect_manytomany_initialize_recvbase (handle, PHASE_ZB, 
01096                                             call_ck_cb_recv_untrans_z, 
01097                                             &cbw_recvuntrans, 
01098                                             (char *)many_to_many_data, 
01099                                             initdata.zBlocks, -1);
01100   single_pencil = false;
01101   if (nx == 1 && ny == 1 && zBlocks <= dim3/2 && block3==1)
01102     single_pencil = true;
01103   
01104   for ( int kb=0; kb<zBlocks; ++kb ) {
01105     int nz = block3;
01106     if ( (kb+1)*block3 > dim3/2 ) {
01107       single_pencil = false;
01108       nz = dim3/2 - kb*block3;
01109     }
01110     
01111     //Initialize send trans
01112     CkArrayIndex3D index (thisIndex.x,0,kb);
01113     CProxy_OptPmePencilMapY yproxy (global_map_y);
01114     int pe = yproxy.ckLocalBranch()->procNum(0, index);
01115     CmiDirect_manytomany_initialize_send (handle, PHASE_YF, kb, kb*2*nx*ny*block3*sizeof(float), 2*nx*ny*nz*sizeof(float), pe);
01116     
01117     //Initialize recv untrans
01118     CmiDirect_manytomany_initialize_recv (handle, PHASE_ZB, kb, kb*nx*ny*block3*2*sizeof(float), nx*ny*nz*2*sizeof(float), pe);
01119     many_to_many_nb [kb] = nz;
01120   }
01121 
01122 #endif
01123 }
01124 
01125 void  OptPmeYPencil::initialize_manytomany () {
01126 #if CHARM_VERSION > 60000
01127   int yBlocks = initdata.yBlocks;
01128   int block2 = initdata.grid.block2;
01129   int K2 = initdata.grid.K2;
01130 
01131   //send trans
01132   CmiDirect_manytomany_initialize_sendbase (handle, PHASE_XF, NULL, NULL, (char *)many_to_many_data, yBlocks, thisIndex.x);
01133   //send untrans
01134   CmiDirect_manytomany_initialize_sendbase (handle, PHASE_ZB, NULL, NULL, (char *)many_to_many_data, yBlocks, thisIndex.z);
01135 
01136   //recv trans
01137   CkArrayIndex3D idx (thisIndex.x, thisIndex.y, thisIndex.z);
01138   cbw_recvtrans.cb = CkCallback (CkIndex_OptPmeYPencil::many_to_manyRecvTrans(NULL), idx, thisProxy.ckGetArrayID());
01139   cbw_recvtrans.msg    = new (PRIORITY_SIZE) OptPmeDummyMsg;
01140   cbw_recvtrans.array  = this;
01141   CmiDirect_manytomany_initialize_recvbase (handle, PHASE_YF, 
01142                                             call_ck_cb_recv_trans_y, 
01143                                             &cbw_recvtrans, 
01144                                             (char *)many_to_many_data, 
01145                                             initdata.yBlocks, -1);
01146 
01147   //recv untrans
01148   cbw_recvuntrans.cb = CkCallback(CkIndex_OptPmeYPencil::many_to_manyRecvUntrans(NULL), idx, thisProxy.ckGetArrayID());
01149   cbw_recvuntrans.msg    = new (PRIORITY_SIZE) OptPmeDummyMsg;
01150   cbw_recvuntrans.array  = this;
01151   CmiDirect_manytomany_initialize_recvbase (handle, PHASE_YB, 
01152                                             call_ck_cb_recv_untrans_y, 
01153                                             &cbw_recvuntrans, 
01154                                             (char *)many_to_many_data, 
01155                                             initdata.yBlocks, -1);
01156 
01157   single_pencil = false;
01158   if (nz == 1 && nx == 1 && yBlocks <= K2 && block2==1)
01159     single_pencil = true;
01160   
01161   for ( int jb=0; jb<yBlocks; ++jb ) {
01162     int ny = block2;
01163     if ( (jb+1)*block2 > K2 ) {
01164       single_pencil = false;
01165       ny = K2 - jb*block2;
01166     }
01167 
01168     //send untrans
01169     CkArrayIndex3D index (thisIndex.x,jb,0);
01170     CProxy_OptPmePencilMapZ zproxy (global_map_z);
01171     int pe = zproxy.ckLocalBranch()->procNum(0, index);
01172     CmiDirect_manytomany_initialize_send (handle, PHASE_ZB, jb, 2*nx*block2*nz*jb*sizeof(float), 2*nx*ny*nz*sizeof(float), pe);
01173 
01174     //recv trans
01175     CmiDirect_manytomany_initialize_recv (handle, PHASE_YF, jb, jb*nx*block2*nz*2*sizeof(float), nx*ny*nz*2*sizeof(float), pe);
01176     
01177     //send trans
01178     index = CkArrayIndex3D (0,jb,thisIndex.z);
01179     CProxy_OptPmePencilMapX xproxy (global_map_x);
01180     pe = xproxy.ckLocalBranch()->procNum(0, index);
01181     CmiDirect_manytomany_initialize_send (handle, PHASE_XF, jb, 2*nx*block2*nz*jb*sizeof(float), 2*nx*ny*nz*sizeof(float), pe);
01182     
01183     //Recv untrans
01184     CmiDirect_manytomany_initialize_recv (handle, PHASE_YB, jb, jb*nx*block2*nz*2*sizeof(float), nx*ny*nz*2*sizeof(float), pe);
01185 
01186     many_to_many_nb [jb] = ny;
01187   }
01188 #endif
01189 }
01190 
01191 void  OptPmeXPencil::initialize_manytomany () {  
01192 #if CHARM_VERSION > 60000
01193   int xBlocks = initdata.xBlocks;
01194   int block1 = initdata.grid.block1;
01195   int K1 = initdata.grid.K1;
01196   
01197   CmiDirect_manytomany_initialize_sendbase (handle, PHASE_YB, NULL, NULL, (char *)many_to_many_data, xBlocks, thisIndex.y);
01198   CkArrayIndex3D idx (thisIndex.x, thisIndex.y, thisIndex.z);
01199   cbw_recvtrans.cb = CkCallback(CkIndex_OptPmeXPencil::many_to_manyRecvTrans(NULL), idx, thisProxy.ckGetArrayID());
01200   cbw_recvtrans.msg    = new (PRIORITY_SIZE) OptPmeDummyMsg;
01201   cbw_recvtrans.array  = this;
01202   CmiDirect_manytomany_initialize_recvbase (handle, PHASE_XF, 
01203                                             call_ck_cb_recv_trans_x, 
01204                                             &cbw_recvtrans,  
01205                                             (char *)many_to_many_data, 
01206                                             initdata.xBlocks, -1);
01207 
01208   single_pencil = false;
01209   if (ny == 1 && nz == 1 && xBlocks <= K1 && block1==1)
01210     single_pencil = true;
01211 
01212   for ( int ib=0; ib<xBlocks; ++ib ) {
01213     int nx = block1;
01214     if ( (ib+1)*block1 > K1 ) {
01215       single_pencil = false;
01216       nx = K1 - ib*block1;
01217     }
01218     
01219     CkArrayIndex3D index (ib,0,thisIndex.z);
01220     CProxy_OptPmePencilMapY yproxy (global_map_y);
01221     int pe = yproxy.ckLocalBranch()->procNum(0, index);
01222     CmiDirect_manytomany_initialize_send (handle, PHASE_YB, ib, 2*block1*ny*nz*ib*sizeof(float), 2*nx*ny*nz*sizeof(float), pe);
01223     
01224     CmiDirect_manytomany_initialize_recv (handle, PHASE_XF, ib, ib*block1*ny*nz*2*sizeof(float), nx*ny*nz*2*sizeof(float), pe);
01225     many_to_many_nb [ib] = nx;
01226   }
01227 #endif
01228 }
01229 
01230 
01234 
01235 
01236 #include "PmeFFTLib.def.h"

Generated on Wed Jun 19 04:08:17 2013 for NAMD by  doxygen 1.3.9.1