Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

fftlib.C

Go to the documentation of this file.
00001 
00002 #include "fftlib.h"
00003 
00004 #include <assert.h>
00005 
00006 void call_ck_cb (void *arg) {
00007   CkCallbackWrapper *cw = (CkCallbackWrapper *)arg;
00008   cw->cb.send (cw->msg);
00009 }
00010 
00011 void OptPmeZPencil::fft_init() {
00012   
00013   //printf ("Initialize zpencil [%d,%d], on pd %d\n", thisIndex.x, thisIndex.y, CkMyPe());
00014 
00015   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00016   Node *node = nd.ckLocalBranch();
00017   SimParameters *simParams = node->simParameters;
00018 
00019   int K1 = initdata.grid.K1;
00020   int K2 = initdata.grid.K2;
00021   int K3 = initdata.grid.K3;
00022   int dim3 = initdata.grid.dim3;
00023   int block1 = initdata.grid.block1;
00024   int block2 = initdata.grid.block2;
00025 
00026   nx = block1;
00027   if ( (thisIndex.x + 1) * block1 > K1 ) nx = K1 - thisIndex.x * block1;
00028   ny = block2;
00029   if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
00030 
00031   data = new float[nx*ny*dim3];
00032   many_to_many_data = new float[nx*ny*dim3];
00033   many_to_many_nb = new int [initdata.zBlocks];
00034   work = new float[dim3];
00035 
00036   memset(data, 0, sizeof(float) * nx*ny*dim3);
00037   memset(many_to_many_data, 0, sizeof(float) * nx*ny*dim3);
00038 
00039   order_init(initdata.zBlocks);
00040 
00041 #ifdef NAMD_FFTW
00042   CmiLock(fftw_plan_lock);
00043 #ifdef NAMD_FFTW_3
00044   /* need array of sizes for the how many */
00045   int numLines=nx*ny;
00046   int planLineSizes[1];
00047   planLineSizes[0]=K3;
00048   CkAbort("what are we doing in here?");
00049   forward_plan = fftwf_plan_many_dft_r2c(1, planLineSizes, numLines,
00050                                      (float *) data, NULL, 1, 
00051                                          initdata.grid.dim3,
00052                                          (fftwf_complex *) data, NULL, 1, 0,
00053                                    ( simParams->FFTWEstimate ? FFTW_ESTIMATE 
00054                                      : FFTW_MEASURE ));
00055   backward_plan = fftwf_plan_many_dft_c2r(1, planLineSizes, numLines,
00056                                      (fftwf_complex *) data, NULL, 1, 
00057                                          initdata.grid.dim3/2,
00058                                      (float *) data, NULL, 1, 0,
00059                                    ( simParams->FFTWEstimate ? FFTW_ESTIMATE 
00060                                      : FFTW_MEASURE ));
00061 #else
00062 
00063   forward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_REAL_TO_COMPLEX,
00064         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00065         | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
00066   backward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_COMPLEX_TO_REAL,
00067         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00068         | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
00069 #endif
00070   CmiUnlock(fftw_plan_lock);
00071 #else
00072   NAMD_die("Sorry, FFTW must be compiled in to use PME.");
00073 #endif
00074 
00075   handle = NULL; 
00076 #if CHARM_VERSION > 60000
00077   handle = CmiDirect_manytomany_allocate_handle();
00078 #endif
00079 }
00080 
00081 void OptPmeYPencil::fft_init() {
00082   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00083   Node *node = nd.ckLocalBranch();
00084   SimParameters *simParams = node->simParameters;
00085 
00086   //  printf ("Initialize ypencil [%d,%d], on pd %d\n", thisIndex.x, thisIndex.y, CkMyPe());
00087 
00088   int K1 = initdata.grid.K1;
00089   int K2 = initdata.grid.K2;
00090   int dim2 = initdata.grid.dim2;
00091   int dim3 = initdata.grid.dim3;
00092   int block1 = initdata.grid.block1;
00093   int block3 = initdata.grid.block3;
00094 
00095   nx = block1;
00096   if ( (thisIndex.x + 1) * block1 > K1 ) nx = K1 - thisIndex.x * block1;
00097   nz = block3;
00098   if ( (thisIndex.z+1)*block3 > dim3/2 ) nz = dim3/2 - thisIndex.z*block3;
00099 
00100   data = new float[nx*dim2*nz*2];
00101   many_to_many_data = new float[nx*dim2*nz*2];
00102   many_to_many_nb = new int [initdata.yBlocks];
00103   work = new float[2*K2];
00104 
00105   memset(many_to_many_data, 0, sizeof(float) * nx*dim2*nz*2);
00106 
00107   order_init(initdata.yBlocks);
00108 
00109 #ifdef NAMD_FFTW
00110   CmiLock(fftw_plan_lock);
00111 #ifdef NAMD_FFTW_3
00112   /* need array of sizes for the dimensions */
00113   int numLines=nz;
00114   int planLineSizes[2];
00115   planLineSizes[0]=initdata.grid.K2;
00116   planLineSizes[1]=nz;
00117   forward_plan = fftwf_plan_many_dft(2, planLineSizes, numLines, 
00118                                      (fftwf_complex *) data, NULL, nz, 1,
00119                                      (fftwf_complex *) data, NULL, 1, 0,
00120                                      FFTW_FORWARD, 
00121                                      ( simParams->FFTWEstimate 
00122                                        ? FFTW_ESTIMATE : FFTW_MEASURE ));
00123   backward_plan = fftwf_plan_many_dft(2, planLineSizes, numLines, 
00124                                      (fftwf_complex *) data, NULL, nz, 1,
00125                                      (fftwf_complex *) data, NULL, 1, 0,
00126                                      FFTW_FORWARD, 
00127                                      ( simParams->FFTWEstimate 
00128                                        ? FFTW_ESTIMATE : FFTW_MEASURE ));
00129 #else
00130 
00131   forward_plan = fftw_create_plan_specific(K2, FFTW_FORWARD,
00132         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00133         | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
00134         nz, (fftw_complex *) work, 1);
00135   backward_plan = fftw_create_plan_specific(K2, FFTW_BACKWARD,
00136         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00137         | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
00138         nz, (fftw_complex *) work, 1);
00139 #endif
00140   CmiUnlock(fftw_plan_lock);
00141 #else
00142   NAMD_die("Sorry, FFTW must be compiled in to use PME.");
00143 #endif
00144 
00145   handle = NULL; 
00146 #if CHARM_VERSION > 60000
00147   handle = CmiDirect_manytomany_allocate_handle();
00148 #endif
00149   initialize_manytomany();
00150 }
00151 
00152 
00153 void OptPmeXPencil::fft_init() {
00154   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00155   Node *node = nd.ckLocalBranch();
00156   SimParameters *simParams = node->simParameters;
00157 
00158   //  printf ("Initialize xpencil [%d,%d], on pd %d\n", thisIndex.x, thisIndex.y, CkMyPe());
00159 
00160   lattice = simParams->lattice;
00161 
00162   int K1 = initdata.grid.K1;
00163   int K2 = initdata.grid.K2;
00164   int dim3 = initdata.grid.dim3;
00165   int block2 = initdata.grid.block2;
00166   int block3 = initdata.grid.block3;
00167 
00168   ny = block2;
00169   if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
00170   nz = block3;
00171   if ( (thisIndex.z+1)*block3 > dim3/2 ) nz = dim3/2 - thisIndex.z*block3;
00172 
00173   data = new float[K1*block2*block3*2];
00174   many_to_many_data = new float[K1*block2*block3*2];
00175   many_to_many_nb = new int [initdata.xBlocks];
00176   work = new float[2*K1];
00177 
00178   memset(many_to_many_data, 0, sizeof(float) * K1*block2*block3*2);
00179 
00180   order_init(initdata.xBlocks);
00181 
00182 #ifdef NAMD_FFTW
00183   CmiLock(fftw_plan_lock);
00184 #ifdef NAMD_FFTW_3
00185   /* need array of sizes for the how many */
00186   int numLines=ny*nz;
00187   int planLineSizes[1];
00188   planLineSizes[0]=K1;
00189   forward_plan = fftwf_plan_many_dft(1, planLineSizes, numLines,
00190                                      (fftwf_complex *) data, NULL, K1, 1,
00191                                      (fftwf_complex *) data, NULL, 1, 0,
00192                                    FFTW_FORWARD,
00193                                    ( simParams->FFTWEstimate ? FFTW_ESTIMATE 
00194                                      : FFTW_MEASURE ));
00195   backward_plan = fftwf_plan_many_dft(1, planLineSizes, numLines,
00196                                      (fftwf_complex *) data, NULL, K1, 1,
00197                                      (fftwf_complex *) data, NULL, 1, 0,
00198                                    FFTW_BACKWARD,
00199                                    ( simParams->FFTWEstimate ? FFTW_ESTIMATE 
00200                                      : FFTW_MEASURE ));
00201 #else
00202 
00203   forward_plan = fftw_create_plan_specific(K1, FFTW_FORWARD,
00204         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00205         | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
00206         ny*nz, (fftw_complex *) work, 1);
00207   backward_plan = fftw_create_plan_specific(K1, FFTW_BACKWARD,
00208         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00209         | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
00210         ny*nz, (fftw_complex *) work, 1);
00211 
00212   CmiUnlock(fftw_plan_lock);
00213 #endif
00214 #else
00215   NAMD_die("Sorry, FFTW must be compiled in to use PME.");
00216 #endif
00217 
00218   myKSpace = new PmeKSpace(initdata.grid,
00219                 thisIndex.y*block2, thisIndex.y*block2 + ny,
00220                 thisIndex.z*block3, thisIndex.z*block3 + nz);
00221 
00222   handle = NULL; 
00223 #if CHARM_VERSION > 60000
00224   handle = CmiDirect_manytomany_allocate_handle();
00225 #endif
00226   initialize_manytomany();
00227 
00228   constant_pressure = initdata.constant_pressure;
00229 }
00230 
00231 //#define FFTCHECK   // run a grid of integers through the fft
00232 // #define ZEROCHECK  // check for suspicious zeros in fft
00233 
00234 void OptPmeZPencil::recv_grid(const OptPmeGridMsg *msg) {
00235 
00236   int dim3 = initdata.grid.dim3;
00237   if ( imsg == 0 ) {
00238     memset(data, 0, sizeof(float) * nx*ny*dim3);
00239   }
00240 
00241   int xstart = msg->xstart - thisIndex.x * initdata.grid.block1;
00242   int ystart = msg->ystart - thisIndex.y * initdata.grid.block2;
00243   assert (xstart >= 0);
00244   assert (ystart >= 0);
00245   int xlen   = msg->xlen;
00246   int ylen   = msg->ylen;    
00247   assert (xstart + xlen <= nx);
00248   assert (ystart + ylen <= ny);
00249   
00250   float *qmsg = msg->qgrid;
00251   float *d = data;
00252   int zlen = msg->zlen;
00253   int zstart = msg->zstart;
00254   int zmax = zstart + zlen - 1;
00255   int k = 0;
00256 
00257   int K3 = initdata.grid.K3; 
00258   int K3_1 = initdata.grid.K3 - 1; 
00259   while (zstart < 0) {
00260     zstart += K3;
00261     zmax += K3;
00262   }
00263 
00264   for ( int i=xstart; i<xstart+xlen; ++i ) {
00265     for ( int j=ystart; j<ystart+ylen; ++j) {
00266       float *d = data + (i * ny + j) * dim3; 
00267       for ( k=zstart; k<=zmax; ++k ) {
00268         int kz = k;
00269         //if (kz >= K3) kz -= K3;
00270         kz = kz - ((unsigned)(K3_1 - kz)>>31)*K3;
00271         //assert (kz >= 0);
00272         //assert (kz <  K3);
00273         d[kz] += *(qmsg++);
00274       }
00275     }
00276   }
00277 
00278   if (_iter == MANY_TO_MANY_SETUP) {
00279     if (imsg == 0)
00280       m2m_recv_grid = new PatchGridElem [grid_msgs.size()];    
00281     
00282     m2m_recv_grid[imsg].xstart = xstart;
00283     m2m_recv_grid[imsg].xlen   = xlen;
00284     m2m_recv_grid[imsg].ystart = ystart;
00285     m2m_recv_grid[imsg].ylen   = ylen;
00286     m2m_recv_grid[imsg].zstart = zstart;
00287     m2m_recv_grid[imsg].zlen   = zlen;
00288     m2m_recv_grid[imsg].patchID = msg->patchID;
00289     
00290     if ( imsg == grid_msgs.size() - 1) 
00291         initialize_manytomany ();
00292   }
00293 }
00294 
00295 
00296 void OptPmeZPencil::forward_fft() {
00297 #ifdef FFTCHECK
00298   int dim3 = initdata.grid.dim3;
00299   int K3 = initdata.grid.K3;
00300   float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
00301   float *d = data;
00302   for ( int i=0; i<nx; ++i ) {
00303    for ( int j=0; j<ny; ++j, d += dim3 ) {
00304     for ( int k=0; k<dim3; ++k ) {
00305       d[k] = 10. * (10. * (10. * std_base + i) + j) + k;
00306     }
00307    }
00308   }
00309 #endif
00310 #ifdef NAMD_FFTW
00311 #ifdef NAMD_FFTW_3
00312   fftwf_execute(forward_plan);
00313 #else
00314   rfftwnd_real_to_complex(forward_plan, nx*ny,
00315                           data, 1, initdata.grid.dim3, (fftw_complex *) work, 1, 0);
00316 #endif
00317 #endif
00318 }
00319 
00320 void OptPmeZPencil::send_trans() {
00321   int zBlocks = initdata.zBlocks;
00322   int block3 = initdata.grid.block3;
00323   int dim3 = initdata.grid.dim3;
00324 
00325   //int offset = 0;
00326   for ( int isend=0; isend<zBlocks; ++isend ) {
00327     int kb = send_order[isend];
00328     int nz = block3;
00329     if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
00330     //assert (nz > 0);
00331     OptPmeFFTMsg *msg = new (nx*ny*nz*2,PRIORITY_SIZE) OptPmeFFTMsg;
00332     msg->sourceNode = thisIndex.y;
00333     msg->nx = ny;
00334     float *md = msg->qgrid;
00335     const float *d = data;
00336     for ( int i=0; i<nx; ++i ) {
00337       for ( int j=0; j<ny; ++j, d += dim3 ) {
00338         for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
00339           *(md++) = d[2*k];
00340           *(md++) = d[2*k+1];
00341         }
00342       }
00343     }
00344     
00345     //    printf ("%d, %d: Zpencil Sending trans to %d, %d\n", thisIndex.x, thisIndex.y, thisIndex.x, kb);
00346     initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
00347   }
00348 }
00349 
00350 
00351 void OptPmeYPencil::recv_trans(const OptPmeFFTMsg *msg) {
00352 
00353   int block2 = initdata.grid.block2;
00354   int K2 = initdata.grid.K2;
00355   int jb = msg->sourceNode;
00356   int ny = msg->nx;
00357   const float *md = msg->qgrid;
00358   float *d = data;
00359   for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
00360     for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
00361       for ( int k=0; k<nz; ++k ) {
00362         d[2*(j*nz+k)] = *(md++);
00363         d[2*(j*nz+k)+1] = *(md++);
00364       }
00365     }
00366   }
00367 } 
00368 
00369 void OptPmeYPencil::forward_fft() {
00370 #ifdef NAMD_FFTW
00371 #ifdef NAMD_FFTW_3
00372   fftwf_execute(forward_plan);
00373 #else
00374   for ( int i=0; i<nx; ++i ) {
00375     fftw(forward_plan, nz,
00376          ((fftw_complex *) data) + i * nz * initdata.grid.K2,
00377          nz, 1, (fftw_complex *) work, 1, 0);
00378   }
00379 #endif
00380 #endif
00381 }
00382 
00383 void OptPmeYPencil::send_trans() {
00384   int yBlocks = initdata.yBlocks;
00385   int block2 = initdata.grid.block2;
00386   int K2 = initdata.grid.K2;
00387   
00388   for ( int isend=0; isend<yBlocks; ++isend ) {
00389     int jb = send_order[isend];
00390     int ny = block2;
00391     if ( (jb+1)*block2 > K2 ) ny = K2 - jb*block2;
00392     OptPmeFFTMsg *msg = new (nx*ny*nz*2,PRIORITY_SIZE) OptPmeFFTMsg;
00393     msg->sourceNode = thisIndex.x;
00394     msg->nx = nx;
00395     float *md = msg->qgrid;
00396     const float *d = data;
00397     for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
00398       for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
00399         for ( int k=0; k<nz; ++k ) {
00400           *(md++) = d[2*(j*nz+k)];
00401           *(md++) = d[2*(j*nz+k)+1];
00402         }
00403       }
00404     }
00405     if ( md != msg->qgrid + nx*ny*nz*2 ) CkPrintf("error in YX at %d %d %d\n",
00406                                                   thisIndex.x, jb, thisIndex.z);
00407     
00408     //printf ("%d, %d: Ypencil Sending trans to %d, %d\n", thisIndex.z, thisIndex.x, jb, thisIndex.z);
00409     initdata.xPencil(0,jb,thisIndex.z).recvTrans(msg);
00410   }
00411 }
00412 
00413 
00414 
00415 void OptPmeXPencil::recv_trans(const OptPmeFFTMsg *msg) {
00416 
00417   int block1 = initdata.grid.block1;
00418   int K1 = initdata.grid.K1;
00419   int ib = msg->sourceNode;
00420   int nx = msg->nx;
00421   const float *md = msg->qgrid;
00422   for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
00423     float *d = data + i*ny*nz*2;
00424     for ( int j=0; j<ny; ++j, d += nz*2 ) {
00425       for ( int k=0; k<nz; ++k ) {
00426         d[2*k] = *(md++);
00427         d[2*k+1] = *(md++);
00428       }
00429     }
00430   }
00431 }
00432 
00433 
00434 
00435 void OptPmeXPencil::forward_fft() {
00436 #ifdef NAMD_FFTW
00437 #ifdef NAMD_FFTW_3
00438   fftwf_execute(forward_plan);
00439 #else
00440 
00441   fftw(forward_plan, ny*nz,
00442        ((fftw_complex *) data), ny*nz, 1, (fftw_complex *) work, 1, 0);
00443 #endif
00444 #endif
00445 }
00446 
00447 void OptPmeXPencil::pme_kspace() {
00448 
00449   evir = 0.;
00450 
00451 #ifdef FFTCHECK
00452   return;
00453 #endif
00454 
00455   BigReal ewaldcof = ComputeNonbondedUtil::ewaldcof;
00456   evir[0] = myKSpace->compute_energy(data,
00457                                      lattice, ewaldcof, &(evir[1]));
00458 
00459   contribute (7*sizeof(double), evir.begin(), CkReduction::sum_double, initdata.cb_energy);
00460   
00461 }
00462 
00463 void OptPmeXPencil::backward_fft() {
00464 #ifdef NAMD_FFTW
00465 #ifdef NAMD_FFTW_3
00466   fftwf_execute(backward_plan);
00467 #else
00468 
00469   fftw(backward_plan, ny*nz,
00470        ((fftw_complex *) data), ny*nz, 1, (fftw_complex *) work, 1, 0);
00471 #endif
00472 #endif
00473 }
00474 
00475 void OptPmeXPencil::send_untrans() {
00476   int xBlocks = initdata.xBlocks;
00477   int block1 = initdata.grid.block1;
00478   int K1 = initdata.grid.K1;
00479 
00480   for ( int isend=0; isend<xBlocks; ++isend ) {
00481     int ib = send_order[isend];
00482     int nx = block1;
00483     if ( (ib+1)*block1 > K1 ) nx = K1 - ib*block1;
00484     OptPmeFFTMsg *msg = new (nx*ny*nz*2,PRIORITY_SIZE) OptPmeFFTMsg;    
00485     msg->sourceNode = thisIndex.y;
00486     msg->nx = ny;
00487     float *md = msg->qgrid;
00488     for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
00489       float *d = data + i*ny*nz*2;
00490       for ( int j=0; j<ny; ++j, d += nz*2 ) {
00491         for ( int k=0; k<nz; ++k ) {
00492           *(md++) = d[2*k];
00493           *(md++) = d[2*k+1];
00494         }
00495       }
00496     }
00497     
00498     //printf ("%d, %d: Xpencil Sending untrans to %d, %d\n", thisIndex.y, thisIndex.z, ib, thisIndex.z);
00499     initdata.yPencil(ib,0,thisIndex.z).recvUntrans(msg);
00500   }
00501 }
00502 
00503 
00504 void OptPmeYPencil::recv_untrans(const OptPmeFFTMsg *msg) {
00505 
00506   int block2 = initdata.grid.block2;
00507   int K2 = initdata.grid.K2;
00508   int jb = msg->sourceNode;
00509   int ny = msg->nx;
00510   const float *md = msg->qgrid;
00511   float *d = data;
00512   for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
00513     for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
00514       for ( int k=0; k<nz; ++k ) {
00515         d[2*(j*nz+k)] = *(md++);
00516         d[2*(j*nz+k)+1] = *(md++);
00517       }
00518     }
00519   }
00520 } 
00521 
00522 
00523 
00524 void OptPmeYPencil::backward_fft() {
00525 #ifdef NAMD_FFTW
00526 #ifdef NAMD_FFTW_3
00527   fftwf_execute(backward_plan);
00528 #else
00529   for ( int i=0; i<nx; ++i ) {
00530 #if CMK_BLUEGENEL
00531     CmiNetworkProgress();
00532 #endif
00533 
00534     fftw(backward_plan, nz,
00535          ((fftw_complex *) data) + i * nz * initdata.grid.K2,
00536          nz, 1, (fftw_complex *) work, 1, 0);
00537   }
00538 #endif
00539 #endif
00540 }
00541 
00542 void OptPmeYPencil::send_untrans() {
00543   //printf ("%d, %d: In ypencil send_untrans called once \n", thisIndex.x, thisIndex.z);
00544 
00545   int yBlocks = initdata.yBlocks;
00546   int block2 = initdata.grid.block2;
00547   int K2 = initdata.grid.K2;
00548 
00549   for ( int isend=0; isend<yBlocks; ++isend ) {
00550     int jb = send_order[isend];
00551     //if ( ! needs_reply[jb] ) continue;
00552     int ny = block2;
00553     if ( (jb+1)*block2 > K2 ) ny = K2 - jb*block2;
00554     OptPmeFFTMsg *msg = new (nx*ny*nz*2,PRIORITY_SIZE) OptPmeFFTMsg;
00555     msg->sourceNode = thisIndex.z;
00556     msg->nx = nz;
00557     float *md = msg->qgrid;
00558     const float *d = data;
00559     for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
00560      for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
00561       for ( int k=0; k<nz; ++k ) {
00562         *(md++) = d[2*(j*nz+k)];
00563         *(md++) = d[2*(j*nz+k)+1];
00564       }
00565      }
00566     }
00567 
00568     //printf ("%d, %d: Sending untrans to %d, %d\n", thisIndex.z, thisIndex.x, thisIndex.x, jb);
00569     initdata.zPencil(thisIndex.x,jb,0).recvUntrans(msg);
00570   }
00571 }
00572 
00573 void OptPmeZPencil::recv_untrans(const OptPmeFFTMsg *msg) {
00574 
00575   //printf ("%d, %d In recv untrans\n", thisIndex.x, thisIndex.y);
00576 
00577   int block3 = initdata.grid.block3;
00578   int dim3 = initdata.grid.dim3;
00579   int kb = msg->sourceNode;
00580   int nz = msg->nx;
00581   const float *md = msg->qgrid;
00582   float *d = data;
00583   for ( int i=0; i<nx; ++i ) {
00584     for ( int j=0; j<ny; ++j, d += dim3 ) {
00585       for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
00586         d[2*k] = *(md++);
00587         d[2*k+1] = *(md++);
00588       }
00589     }
00590   }
00591 }
00592 
00593 
00594 void OptPmeZPencil::backward_fft() {
00595 #ifdef NAMD_FFTW
00596 #ifdef NAMD_FFTW_3
00597   fftwf_execute(backward_plan);
00598 #else
00599   rfftwnd_complex_to_real(backward_plan, nx*ny,
00600                           (fftw_complex *) data, 1, initdata.grid.dim3/2, work, 1, 0);
00601 #endif
00602 #endif
00603   
00604 #if CMK_BLUEGENEL
00605   CmiNetworkProgress();
00606 #endif
00607 
00608 #ifdef FFTCHECK
00609   int dim3 = initdata.grid.dim3;
00610   int K1 = initdata.grid.K1;
00611   int K2 = initdata.grid.K2;
00612   int K3 = initdata.grid.K3;
00613   float scale = 1. / (1. * K1 * K2 * K3);
00614   float maxerr = 0.;
00615   float maxstd = 0.;
00616   int mi, mj, mk;  mi = mj = mk = -1;
00617   float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
00618   const float *d = data;
00619   for ( int i=0; i<nx; ++i ) {
00620    for ( int j=0; j<ny; ++j, d += dim3 ) {
00621     for ( int k=0; k<K3; ++k ) {
00622       float std = 10. * (10. * (10. * std_base + i) + j) + k;
00623       float err = scale * d[k] - std;
00624       if ( fabsf(err) > fabsf(maxerr) ) {
00625         maxerr = err;
00626         maxstd = std;
00627         mi = i;  mj = j;  mk = k;
00628       }
00629     }
00630    }
00631   }
00632   CkPrintf("pencil %d %d max error %f at %d %d %d (should be %f)\n",
00633            thisIndex.x, thisIndex.y, maxerr, mi, mj, mk, maxstd);
00634 #endif
00635 }
00636 
00637 void OptPmeZPencil::send_ungrid(OptPmeGridMsg *msg) {
00638   int pe = msg->sourceNode;
00639   msg->sourceNode = thisIndex.x * initdata.yBlocks + thisIndex.y;
00640 
00641   int dim3 = initdata.grid.dim3;
00642   int xstart = msg->xstart - thisIndex.x * initdata.grid.block1;
00643   int ystart = msg->ystart - thisIndex.y * initdata.grid.block2;
00644   assert (xstart >= 0);
00645   assert (ystart >= 0);
00646   int xlen   = msg->xlen;
00647   int ylen   = msg->ylen;
00648   assert (xstart + xlen <= nx);
00649   assert (ystart + ylen <= ny);
00650 
00651   float *qmsg = msg->qgrid;
00652   float *d = data;
00653   int zstart = msg->zstart;
00654   int zlen = msg->zlen;
00655   int zmax = zstart + zlen - 1;
00656   
00657   int K3_1 = initdata.grid.K3 - 1; 
00658   int K3 = initdata.grid.K3;
00659   if (zstart < 0) {
00660     zstart += K3;
00661     zmax += K3;
00662   }
00663   
00664   int k = 0;
00665   for ( int i=xstart; i<xstart+xlen; ++i ) {
00666     for ( int j=ystart; j<ystart+ylen; ++j) {
00667       float *d = data + (i * ny + j) * dim3;
00668       for ( k=zstart; k<=zmax; ++k ) {
00669         int kz = k;
00670         //if (kz >= K3) kz -= K3;
00671         kz = kz - ((unsigned)(K3_1 - kz)>>31)*K3;
00672         *(qmsg++) = d[kz];
00673       }
00674     }
00675   }
00676   
00677   initdata.pmeProxy[pe].recvUngrid(msg);
00678 }
00679 
00680 
00684 
00685 void OptPmeZPencil::many_to_many_recv_grid () {
00686   int dim3 = initdata.grid.dim3;  
00687   memset(data, 0, sizeof(float) * nx*ny*dim3);
00688   int K3 = initdata.grid.K3;
00689   int K3_1 = initdata.grid.K3 - 1;
00690   int k = 0;
00691   for (int idx = 0; idx < grid_msgs.size(); idx++) {
00692     int xstart = m2m_recv_grid[idx].xstart; 
00693     int xlen   = m2m_recv_grid[idx].xlen; 
00694     int ystart = m2m_recv_grid[idx].ystart; 
00695     int ylen   = m2m_recv_grid[idx].ylen; 
00696     int zstart = m2m_recv_grid[idx].zstart; 
00697     int zlen   = m2m_recv_grid[idx].zlen;   
00698     
00699     float *qmsg = m2m_recv_grid[idx].data;
00700     for ( int i=xstart; i<xstart+xlen; ++i ) {
00701       for ( int j=ystart; j<ystart+ylen; ++j) {
00702         float *d = data + (i * ny + j) * dim3;  
00703 
00704 #pragma disjoint (*qmsg, *d)
00705 #pragma unroll (4)
00706         for ( k=zstart; k<zstart+zlen; ++k ) {
00707           int kz = k;
00708           //if (kz >= K3) kz -= K3;
00709           kz = kz - ((unsigned)(K3_1 - kz)>>31)*K3;
00710           //assert (kz >= 0);
00711           //assert (kz <  K3);
00712           d[kz] += *(qmsg++);
00713         }
00714       }
00715     } 
00716   }
00717 }
00718 
00719 void OptPmeZPencil::many_to_many_send_trans() {
00720   int zBlocks = initdata.zBlocks;
00721   int block3 = initdata.grid.block3;
00722   int dim3 = initdata.grid.dim3;
00723 
00724   float *md = many_to_many_data;
00725   int nz = block3;
00726   for ( int kb=0; kb<zBlocks; ++kb ) {
00727     nz = block3;
00728     if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
00729     
00730     const float *d = data;
00731     for ( int i=0; i<nx; ++i ) {
00732       for ( int j=0; j<ny; ++j, d += dim3 ) {
00733         for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
00734           *(md++) = d[2*k];
00735           *(md++) = d[2*k+1];
00736         }
00737       }
00738     }
00739   }
00740   
00741 #if CHARM_VERSION > 60000
00742   CmiDirect_manytomany_start (handle, PHASE_YF);
00743 #endif
00744 }
00745 
00746 void OptPmeYPencil::many_to_many_recv_trans () {  
00747   int block2 = initdata.grid.block2;
00748   int K2 = initdata.grid.K2;
00749   
00750   const float *md = many_to_many_data;
00751   for (int jb = 0; jb < initdata.yBlocks; jb++ ) {
00752     int ny = many_to_many_nb[jb];  
00753     float *d = data;
00754     for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
00755       for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
00756         for ( int k=0; k<nz; ++k ) {
00757           d[2*(j*nz+k)] = *(md++);
00758           d[2*(j*nz+k)+1] = *(md++);
00759         }
00760       }
00761     }
00762   }
00763 }
00764 
00765 void OptPmeYPencil::many_to_many_send(int phase) {
00766   int yBlocks = initdata.yBlocks;
00767   int block2 = initdata.grid.block2;
00768   int K2 = initdata.grid.K2;
00769   
00770   float *md = many_to_many_data;
00771   int ny = block2;
00772   for ( int jb=0; jb<yBlocks; ++jb ) {
00773     ny = block2;
00774     if ( (jb+1)*block2 > K2 ) ny = K2 - jb*block2;
00775     
00776     const float *d = data;
00777     for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
00778       for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
00779         for ( int k=0; k<nz; ++k ) {
00780           *(md++) = d[2*(j*nz+k)];
00781           *(md++) = d[2*(j*nz+k)+1];
00782         }
00783       }
00784     }
00785   }
00786 
00787 #if CHARM_VERSION > 60000
00788   CmiDirect_manytomany_start (handle, phase);  
00789 #endif
00790 }
00791 
00792 void OptPmeXPencil::many_to_many_recv_trans () {
00793   
00794   int block1 = initdata.grid.block1;
00795   int K1 = initdata.grid.K1;
00796   
00797   const float *md = many_to_many_data;
00798   for (int ib =0; ib < initdata.xBlocks; ib++ ) {
00799     int nx = many_to_many_nb[ib];    
00800     for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
00801       float *d = data + i*ny*nz*2;
00802       for ( int j=0; j<ny; ++j, d += nz*2 ) {
00803         for ( int k=0; k<nz; ++k ) {
00804           d[2*k] = *(md++);
00805           d[2*k+1] = *(md++);
00806         }
00807       }
00808     }
00809   }
00810 }
00811 
00812 void OptPmeXPencil::many_to_many_send_untrans() {
00813   int xBlocks = initdata.xBlocks;
00814   int block1 = initdata.grid.block1;
00815   int K1 = initdata.grid.K1;
00816 
00817   int     nx = block1;
00818   float * md = many_to_many_data;
00819   for ( int ib=0; ib<xBlocks; ++ib ) {
00820     nx = block1;
00821     if ( (ib+1)*block1 > K1 ) nx = K1 - ib*block1;
00822     
00823     for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
00824       float *d = data + i*ny*nz*2;
00825       for ( int j=0; j<ny; ++j, d += nz*2 ) {
00826         for ( int k=0; k<nz; ++k ) {
00827           *(md++) = d[2*k];
00828           *(md++) = d[2*k+1];
00829         }
00830       }
00831     }
00832   }
00833 #if CHARM_VERSION > 60000
00834   CmiDirect_manytomany_start (handle, PHASE_YB);
00835 #endif
00836 }
00837 
00838 void OptPmeYPencil::many_to_many_recv_untrans () {  
00839   int block2 = initdata.grid.block2;
00840   int K2 = initdata.grid.K2;
00841   
00842   const float *md = many_to_many_data;
00843   for (int jb = 0; jb < initdata.yBlocks; jb++ ) {
00844     int ny = many_to_many_nb[jb];  
00845     float *d = data;
00846     for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
00847       for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
00848         for ( int k=0; k<nz; ++k ) {
00849           d[2*(j*nz+k)] = *(md++);
00850           d[2*(j*nz+k)+1] = *(md++);
00851         }
00852       }
00853     }
00854   }
00855 }
00856 
00857 void OptPmeZPencil::many_to_many_recv_untrans() {  
00858   //printf ("%d, %d In recv untrans\n", thisIndex.x, thisIndex.y);                                                                             
00859   int block3 = initdata.grid.block3;
00860   int dim3 = initdata.grid.dim3;
00861   
00862   const float *md = many_to_many_data;
00863   for (int kb = 0; kb < initdata.zBlocks; kb++ ) {
00864     int nz = many_to_many_nb[kb];
00865     float *d = data;
00866     for ( int i=0; i<nx; ++i ) {
00867       for ( int j=0; j<ny; ++j, d += dim3 ) {
00868         for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
00869           d[2*k] = *(md++);
00870           d[2*k+1] = *(md++);
00871         }
00872       }
00873     }
00874   }
00875 }
00876 
00877 void OptPmeZPencil::many_to_many_send_ungrid () {
00878   int dim3 = initdata.grid.dim3;
00879   int K3 = initdata.grid.K3;
00880   int K3_1 = initdata.grid.K3 - 1;
00881   int k = 0;
00882   for (int idx = 0; idx < grid_msgs.size(); idx++) {
00883     int xstart = m2m_recv_grid[idx].xstart; 
00884     int xlen   = m2m_recv_grid[idx].xlen; 
00885     int ystart = m2m_recv_grid[idx].ystart; 
00886     int ylen   = m2m_recv_grid[idx].ylen; 
00887     int zstart = m2m_recv_grid[idx].zstart; 
00888     int zlen   = m2m_recv_grid[idx].zlen;   
00889     
00890     float *qmsg = m2m_recv_grid[idx].data;
00891     for ( int i=xstart; i<xstart+xlen; ++i ) {
00892       for ( int j=ystart; j<ystart+ylen; ++j) {
00893         float *d = data + (i * ny + j) * dim3;  
00894 
00895 #pragma disjoint (*d, *qmsg)
00896 #pragma unroll (4)
00897         for ( k=zstart; k<zstart+zlen; ++k ) {
00898           int kz = k;
00899           //if (kz >= K3) kz -= K3;
00900           kz = kz - ((unsigned)(K3_1 - kz)>>31)*K3;
00901           //assert (kz >= 0);
00902           //assert (kz <  K3);
00903           *(qmsg++) = d[kz];
00904         }
00905       }
00906     } 
00907   }
00908   
00909 #if CHARM_VERSION > 60000
00910   CmiDirect_manytomany_start (handle, PHASE_UG);
00911 #endif
00912 }
00913 
00914 
00915 
00916 void  OptPmeZPencil::initialize_manytomany () {  
00917 #if CHARM_VERSION > 60000
00918   int idx = 0;
00919   int totalcount = 0;
00920   for (idx = 0; idx < grid_msgs.size(); idx ++) 
00921     totalcount += m2m_recv_grid[idx].xlen * m2m_recv_grid[idx].ylen * m2m_recv_grid[idx].zlen;
00922   many_to_many_gr_data = new float [totalcount];
00923   
00924   CkArrayIndex3D aidx (thisIndex.x, thisIndex.y, thisIndex.z);
00925   cbw_recvgrid.cb = CkCallback (CkIndex_OptPmeZPencil::many_to_manyRecvGrid(NULL), aidx, thisProxy.ckGetArrayID());
00926   cbw_recvgrid.msg = new (PRIORITY_SIZE) OptPmeDummyMsg;
00927   PatchMap *patchMap = PatchMap::Object();
00928   CmiDirect_manytomany_initialize_recvbase (handle, PHASE_GR, 
00929                                             call_ck_cb, &cbw_recvgrid, 
00930                                             (char *)many_to_many_gr_data, patchMap->numPatches(), -1);
00931   
00932   CmiDirect_manytomany_initialize_sendbase (handle, PHASE_UG, NULL, NULL, (char *)many_to_many_gr_data, 
00933                                             grid_msgs.size(), thisIndex.x *initdata.yBlocks + thisIndex.y);
00934   
00935   int offset = 0;
00936   for (idx = 0; idx < grid_msgs.size(); idx ++) {
00937     m2m_recv_grid[idx].data = (float *) ((char *)many_to_many_gr_data + offset);
00938     int fcount = m2m_recv_grid[idx].xlen * m2m_recv_grid[idx].ylen * m2m_recv_grid[idx].zlen;
00939     CmiDirect_manytomany_initialize_recv (handle, PHASE_GR, m2m_recv_grid[idx].patchID, 
00940                                           offset, fcount *sizeof(float), patchMap->node(m2m_recv_grid[idx].patchID));
00941     CmiDirect_manytomany_initialize_send (handle, PHASE_UG, idx, offset, fcount *sizeof(float), 
00942                                           patchMap->node(m2m_recv_grid[idx].patchID));
00943     offset += fcount * sizeof(float);
00944   }        
00945 
00946   int zBlocks = initdata.zBlocks;
00947   int block3 = initdata.grid.block3;
00948   int dim3 = initdata.grid.dim3;
00949 
00950   //Initialize send trans
00951   CmiDirect_manytomany_initialize_sendbase (handle, PHASE_YF, NULL, NULL, (char *)many_to_many_data, zBlocks, thisIndex.y);
00952 
00953   //Initialize recv untrans
00954   cbw_recvuntrans.cb = CkCallback(CkIndex_OptPmeZPencil::many_to_manyRecvUntrans(NULL), aidx, thisProxy.ckGetArrayID());
00955   cbw_recvuntrans.msg = new (PRIORITY_SIZE) OptPmeDummyMsg;
00956   CmiDirect_manytomany_initialize_recvbase (handle, PHASE_ZB, call_ck_cb, &cbw_recvuntrans, 
00957                                             (char *)many_to_many_data, initdata.zBlocks, -1);
00958 
00959   for ( int kb=0; kb<zBlocks; ++kb ) {
00960     int nz = block3;
00961     if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
00962     
00963     //Initialize send trans
00964     CkArrayIndex3D index (thisIndex.x,0,kb);
00965     CProxy_OptPmePencilMapY yproxy (global_map_y);
00966     int pe = yproxy.ckLocalBranch()->procNum(0, index);
00967     CmiDirect_manytomany_initialize_send (handle, PHASE_YF, kb, kb*2*nx*ny*block3*sizeof(float), 2*nx*ny*nz*sizeof(float), pe);
00968     
00969     //Initialize recv untrans
00970     CmiDirect_manytomany_initialize_recv (handle, PHASE_ZB, kb, kb*nx*ny*block3*2*sizeof(float), nx*ny*nz*2*sizeof(float), pe);
00971     many_to_many_nb [kb] = nz;
00972   }
00973 #endif
00974 }
00975 
00976 void  OptPmeYPencil::initialize_manytomany () {
00977 #if CHARM_VERSION > 60000
00978   int yBlocks = initdata.yBlocks;
00979   int block2 = initdata.grid.block2;
00980   int K2 = initdata.grid.K2;
00981 
00982   //send trans
00983   CmiDirect_manytomany_initialize_sendbase (handle, PHASE_XF, NULL, NULL, (char *)many_to_many_data, yBlocks, thisIndex.x);
00984   //send untrans
00985   CmiDirect_manytomany_initialize_sendbase (handle, PHASE_ZB, NULL, NULL, (char *)many_to_many_data, yBlocks, thisIndex.z);
00986 
00987   //recv trans
00988   CkArrayIndex3D idx (thisIndex.x, thisIndex.y, thisIndex.z);
00989   cbw_recvtrans.cb = CkCallback (CkIndex_OptPmeYPencil::many_to_manyRecvTrans(NULL), idx, thisProxy.ckGetArrayID());
00990   cbw_recvtrans.msg = new (PRIORITY_SIZE) OptPmeDummyMsg;
00991   CmiDirect_manytomany_initialize_recvbase (handle, PHASE_YF, call_ck_cb, &cbw_recvtrans, 
00992                                             (char *)many_to_many_data, initdata.yBlocks, -1);
00993 
00994   //recv untrans
00995   cbw_recvuntrans.cb = CkCallback(CkIndex_OptPmeYPencil::many_to_manyRecvUntrans(NULL), idx, thisProxy.ckGetArrayID());
00996   cbw_recvuntrans.msg = new (PRIORITY_SIZE) OptPmeDummyMsg;
00997   CmiDirect_manytomany_initialize_recvbase (handle, PHASE_YB, call_ck_cb, 
00998                                             &cbw_recvuntrans, (char *)many_to_many_data, initdata.yBlocks, -1);
00999   
01000   for ( int jb=0; jb<yBlocks; ++jb ) {
01001     int ny = block2;
01002     if ( (jb+1)*block2 > K2 ) ny = K2 - jb*block2;
01003 
01004     //send untrans
01005     CkArrayIndex3D index (thisIndex.x,jb,0);
01006     CProxy_OptPmePencilMapZ zproxy (global_map_z);
01007     int pe = zproxy.ckLocalBranch()->procNum(0, index);
01008     CmiDirect_manytomany_initialize_send (handle, PHASE_ZB, jb, 2*nx*block2*nz*jb*sizeof(float), 2*nx*ny*nz*sizeof(float), pe);
01009 
01010     //recv trans
01011     CmiDirect_manytomany_initialize_recv (handle, PHASE_YF, jb, jb*nx*block2*nz*2*sizeof(float), nx*ny*nz*2*sizeof(float), pe);
01012     
01013     //send trans
01014     index = CkArrayIndex3D (0,jb,thisIndex.z);
01015     CProxy_OptPmePencilMapX xproxy (global_map_x);
01016     pe = xproxy.ckLocalBranch()->procNum(0, index);
01017     CmiDirect_manytomany_initialize_send (handle, PHASE_XF, jb, 2*nx*block2*nz*jb*sizeof(float), 2*nx*ny*nz*sizeof(float), pe);
01018     
01019     //Recv untrans
01020     CmiDirect_manytomany_initialize_recv (handle, PHASE_YB, jb, jb*nx*block2*nz*2*sizeof(float), nx*ny*nz*2*sizeof(float), pe);
01021 
01022     many_to_many_nb [jb] = ny;
01023   }
01024 #endif
01025 }
01026 
01027 void  OptPmeXPencil::initialize_manytomany () {  
01028 #if CHARM_VERSION > 60000
01029   int xBlocks = initdata.xBlocks;
01030   int block1 = initdata.grid.block1;
01031   int K1 = initdata.grid.K1;
01032   
01033   CmiDirect_manytomany_initialize_sendbase (handle, PHASE_YB, NULL, NULL, (char *)many_to_many_data, xBlocks, thisIndex.y);
01034   CkArrayIndex3D idx (thisIndex.x, thisIndex.y, thisIndex.z);
01035   cbw_recvtrans.cb = CkCallback(CkIndex_OptPmeXPencil::many_to_manyRecvTrans(NULL), idx, thisProxy.ckGetArrayID());
01036   cbw_recvtrans.msg = new (PRIORITY_SIZE) OptPmeDummyMsg;
01037   CmiDirect_manytomany_initialize_recvbase (handle, PHASE_XF, call_ck_cb, 
01038                                             &cbw_recvtrans,  (char *)many_to_many_data, 
01039                                             initdata.xBlocks, -1);
01040   
01041   for ( int ib=0; ib<xBlocks; ++ib ) {
01042     int nx = block1;
01043     if ( (ib+1)*block1 > K1 ) nx = K1 - ib*block1;
01044     
01045     CkArrayIndex3D index (ib,0,thisIndex.z);
01046     CProxy_OptPmePencilMapY yproxy (global_map_y);
01047     int pe = yproxy.ckLocalBranch()->procNum(0, index);
01048     CmiDirect_manytomany_initialize_send (handle, PHASE_YB, ib, 2*block1*ny*nz*ib*sizeof(float), 2*nx*ny*nz*sizeof(float), pe);
01049     
01050     CmiDirect_manytomany_initialize_recv (handle, PHASE_XF, ib, ib*block1*ny*nz*2*sizeof(float), nx*ny*nz*2*sizeof(float), pe);
01051     many_to_many_nb [ib] = nx;
01052   }
01053 #endif
01054 }
01055 
01056 
01060 
01061 
01062 #include "PmeFFTLib.def.h"

Generated on Fri May 25 04:07:15 2012 for NAMD by  doxygen 1.3.9.1