Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

fftlib.C

Go to the documentation of this file.
00001 
00002 #include "fftlib.h"
00003 
00004 #include <assert.h>
00005 
00006 void call_ck_cb (void *arg) {
00007   CkCallbackWrapper *cw = (CkCallbackWrapper *)arg;
00008   cw->cb.send (cw->msg);
00009 }
00010 
00011 void OptPmeZPencil::fft_init() {
00012   
00013   //printf ("Initialize zpencil [%d,%d], on pd %d\n", thisIndex.x, thisIndex.y, CkMyPe());
00014 
00015   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00016   Node *node = nd.ckLocalBranch();
00017   SimParameters *simParams = node->simParameters;
00018 
00019   int K1 = initdata.grid.K1;
00020   int K2 = initdata.grid.K2;
00021   int K3 = initdata.grid.K3;
00022   int dim3 = initdata.grid.dim3;
00023   int block1 = initdata.grid.block1;
00024   int block2 = initdata.grid.block2;
00025 
00026   nx = block1;
00027   if ( (thisIndex.x + 1) * block1 > K1 ) nx = K1 - thisIndex.x * block1;
00028   ny = block2;
00029   if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
00030 
00031   data = new float[nx*ny*dim3];
00032   many_to_many_data = new float[nx*ny*dim3];
00033   many_to_many_nb = new int [initdata.zBlocks];
00034   work = new float[dim3];
00035 
00036   memset(data, 0, sizeof(float) * nx*ny*dim3);
00037   memset(many_to_many_data, 0, sizeof(float) * nx*ny*dim3);
00038 
00039   order_init(initdata.zBlocks);
00040 
00041 #ifdef NAMD_FFTW
00042   CmiLock(fftw_plan_lock);
00043 
00044   forward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_REAL_TO_COMPLEX,
00045         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00046         | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
00047   backward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_COMPLEX_TO_REAL,
00048         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00049         | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
00050 
00051   CmiUnlock(fftw_plan_lock);
00052 #else
00053   NAMD_die("Sorry, FFTW must be compiled in to use PME.");
00054 #endif
00055 
00056   handle = NULL; 
00057 #if CHARM_VERSION > 60000
00058   handle = CmiDirect_manytomany_allocate_handle();
00059 #endif
00060 }
00061 
00062 void OptPmeYPencil::fft_init() {
00063   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00064   Node *node = nd.ckLocalBranch();
00065   SimParameters *simParams = node->simParameters;
00066 
00067   //  printf ("Initialize ypencil [%d,%d], on pd %d\n", thisIndex.x, thisIndex.y, CkMyPe());
00068 
00069   int K1 = initdata.grid.K1;
00070   int K2 = initdata.grid.K2;
00071   int dim2 = initdata.grid.dim2;
00072   int dim3 = initdata.grid.dim3;
00073   int block1 = initdata.grid.block1;
00074   int block3 = initdata.grid.block3;
00075 
00076   nx = block1;
00077   if ( (thisIndex.x + 1) * block1 > K1 ) nx = K1 - thisIndex.x * block1;
00078   nz = block3;
00079   if ( (thisIndex.z+1)*block3 > dim3/2 ) nz = dim3/2 - thisIndex.z*block3;
00080 
00081   data = new float[nx*dim2*nz*2];
00082   many_to_many_data = new float[nx*dim2*nz*2];
00083   many_to_many_nb = new int [initdata.yBlocks];
00084   work = new float[2*K2];
00085 
00086   memset(many_to_many_data, 0, sizeof(float) * nx*dim2*nz*2);
00087 
00088   order_init(initdata.yBlocks);
00089 
00090 #ifdef NAMD_FFTW
00091   CmiLock(fftw_plan_lock);
00092 
00093   forward_plan = fftw_create_plan_specific(K2, FFTW_FORWARD,
00094         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00095         | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
00096         nz, (fftw_complex *) work, 1);
00097   backward_plan = fftw_create_plan_specific(K2, FFTW_BACKWARD,
00098         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00099         | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
00100         nz, (fftw_complex *) work, 1);
00101 
00102   CmiUnlock(fftw_plan_lock);
00103 #else
00104   NAMD_die("Sorry, FFTW must be compiled in to use PME.");
00105 #endif
00106 
00107   handle = NULL; 
00108 #if CHARM_VERSION > 60000
00109   handle = CmiDirect_manytomany_allocate_handle();
00110 #endif
00111   initialize_manytomany();
00112 }
00113 
00114 
00115 void OptPmeXPencil::fft_init() {
00116   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
00117   Node *node = nd.ckLocalBranch();
00118   SimParameters *simParams = node->simParameters;
00119 
00120   //  printf ("Initialize xpencil [%d,%d], on pd %d\n", thisIndex.x, thisIndex.y, CkMyPe());
00121 
00122   lattice = simParams->lattice;
00123 
00124   int K1 = initdata.grid.K1;
00125   int K2 = initdata.grid.K2;
00126   int dim3 = initdata.grid.dim3;
00127   int block2 = initdata.grid.block2;
00128   int block3 = initdata.grid.block3;
00129 
00130   ny = block2;
00131   if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
00132   nz = block3;
00133   if ( (thisIndex.z+1)*block3 > dim3/2 ) nz = dim3/2 - thisIndex.z*block3;
00134 
00135   data = new float[K1*block2*block3*2];
00136   many_to_many_data = new float[K1*block2*block3*2];
00137   many_to_many_nb = new int [initdata.xBlocks];
00138   work = new float[2*K1];
00139 
00140   memset(many_to_many_data, 0, sizeof(float) * K1*block2*block3*2);
00141 
00142   order_init(initdata.xBlocks);
00143 
00144 #ifdef NAMD_FFTW
00145   CmiLock(fftw_plan_lock);
00146 
00147   forward_plan = fftw_create_plan_specific(K1, FFTW_FORWARD,
00148         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00149         | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
00150         ny*nz, (fftw_complex *) work, 1);
00151   backward_plan = fftw_create_plan_specific(K1, FFTW_BACKWARD,
00152         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00153         | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
00154         ny*nz, (fftw_complex *) work, 1);
00155 
00156   CmiUnlock(fftw_plan_lock);
00157 #else
00158   NAMD_die("Sorry, FFTW must be compiled in to use PME.");
00159 #endif
00160 
00161   myKSpace = new PmeKSpace(initdata.grid,
00162                 thisIndex.y*block2, thisIndex.y*block2 + ny,
00163                 thisIndex.z*block3, thisIndex.z*block3 + nz);
00164 
00165   handle = NULL; 
00166 #if CHARM_VERSION > 60000
00167   handle = CmiDirect_manytomany_allocate_handle();
00168 #endif
00169   initialize_manytomany();
00170 
00171   constant_pressure = initdata.constant_pressure;
00172 }
00173 
00174 //#define FFTCHECK   // run a grid of integers through the fft
00175 // #define ZEROCHECK  // check for suspicious zeros in fft
00176 
00177 void OptPmeZPencil::recv_grid(const OptPmeGridMsg *msg) {
00178 
00179   int dim3 = initdata.grid.dim3;
00180   if ( imsg == 0 ) {
00181     memset(data, 0, sizeof(float) * nx*ny*dim3);
00182   }
00183 
00184   int xstart = msg->xstart - thisIndex.x * initdata.grid.block1;
00185   int ystart = msg->ystart - thisIndex.y * initdata.grid.block2;
00186   assert (xstart >= 0);
00187   assert (ystart >= 0);
00188   int xlen   = msg->xlen;
00189   int ylen   = msg->ylen;    
00190   assert (xstart + xlen <= nx);
00191   assert (ystart + ylen <= ny);
00192   
00193   float *qmsg = msg->qgrid;
00194   float *d = data;
00195   int zlen = msg->zlen;
00196   int zstart = msg->zstart;
00197   int zmax = zstart + zlen - 1;
00198   int k = 0;
00199 
00200   int K3 = initdata.grid.K3; 
00201   int K3_1 = initdata.grid.K3 - 1; 
00202   while (zstart < 0) {
00203     zstart += K3;
00204     zmax += K3;
00205   }
00206 
00207   for ( int i=xstart; i<xstart+xlen; ++i ) {
00208     for ( int j=ystart; j<ystart+ylen; ++j) {
00209       float *d = data + (i * ny + j) * dim3; 
00210       for ( k=zstart; k<=zmax; ++k ) {
00211         int kz = k;
00212         //if (kz >= K3) kz -= K3;
00213         kz = kz - ((unsigned)(K3_1 - kz)>>31)*K3;
00214         //assert (kz >= 0);
00215         //assert (kz <  K3);
00216         d[kz] += *(qmsg++);
00217       }
00218     }
00219   }
00220 
00221   if (_iter == MANY_TO_MANY_SETUP) {
00222     if (imsg == 0)
00223       m2m_recv_grid = new PatchGridElem [grid_msgs.size()];    
00224     
00225     m2m_recv_grid[imsg].xstart = xstart;
00226     m2m_recv_grid[imsg].xlen   = xlen;
00227     m2m_recv_grid[imsg].ystart = ystart;
00228     m2m_recv_grid[imsg].ylen   = ylen;
00229     m2m_recv_grid[imsg].zstart = zstart;
00230     m2m_recv_grid[imsg].zlen   = zlen;
00231     m2m_recv_grid[imsg].patchID = msg->patchID;
00232     
00233     if ( imsg == grid_msgs.size() - 1) 
00234         initialize_manytomany ();
00235   }
00236 }
00237 
00238 
00239 void OptPmeZPencil::forward_fft() {
00240 #ifdef FFTCHECK
00241   int dim3 = initdata.grid.dim3;
00242   int K3 = initdata.grid.K3;
00243   float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
00244   float *d = data;
00245   for ( int i=0; i<nx; ++i ) {
00246    for ( int j=0; j<ny; ++j, d += dim3 ) {
00247     for ( int k=0; k<dim3; ++k ) {
00248       d[k] = 10. * (10. * (10. * std_base + i) + j) + k;
00249     }
00250    }
00251   }
00252 #endif
00253 #ifdef NAMD_FFTW
00254   rfftwnd_real_to_complex(forward_plan, nx*ny,
00255                           data, 1, initdata.grid.dim3, (fftw_complex *) work, 1, 0);
00256 #endif
00257 }
00258 
00259 void OptPmeZPencil::send_trans() {
00260   int zBlocks = initdata.zBlocks;
00261   int block3 = initdata.grid.block3;
00262   int dim3 = initdata.grid.dim3;
00263 
00264   //int offset = 0;
00265   for ( int isend=0; isend<zBlocks; ++isend ) {
00266     int kb = send_order[isend];
00267     int nz = block3;
00268     if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
00269     //assert (nz > 0);
00270     OptPmeFFTMsg *msg = new (nx*ny*nz*2,PRIORITY_SIZE) OptPmeFFTMsg;
00271     msg->sourceNode = thisIndex.y;
00272     msg->nx = ny;
00273     float *md = msg->qgrid;
00274     const float *d = data;
00275     for ( int i=0; i<nx; ++i ) {
00276       for ( int j=0; j<ny; ++j, d += dim3 ) {
00277         for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
00278           *(md++) = d[2*k];
00279           *(md++) = d[2*k+1];
00280         }
00281       }
00282     }
00283     
00284     //    printf ("%d, %d: Zpencil Sending trans to %d, %d\n", thisIndex.x, thisIndex.y, thisIndex.x, kb);
00285     initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
00286   }
00287 }
00288 
00289 
00290 void OptPmeYPencil::recv_trans(const OptPmeFFTMsg *msg) {
00291 
00292   int block2 = initdata.grid.block2;
00293   int K2 = initdata.grid.K2;
00294   int jb = msg->sourceNode;
00295   int ny = msg->nx;
00296   const float *md = msg->qgrid;
00297   float *d = data;
00298   for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
00299     for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
00300       for ( int k=0; k<nz; ++k ) {
00301         d[2*(j*nz+k)] = *(md++);
00302         d[2*(j*nz+k)+1] = *(md++);
00303       }
00304     }
00305   }
00306 } 
00307 
00308 void OptPmeYPencil::forward_fft() {
00309 #ifdef NAMD_FFTW
00310   for ( int i=0; i<nx; ++i ) {
00311     fftw(forward_plan, nz,
00312          ((fftw_complex *) data) + i * nz * initdata.grid.K2,
00313          nz, 1, (fftw_complex *) work, 1, 0);
00314   }
00315 #endif
00316 }
00317 
00318 void OptPmeYPencil::send_trans() {
00319   int yBlocks = initdata.yBlocks;
00320   int block2 = initdata.grid.block2;
00321   int K2 = initdata.grid.K2;
00322   
00323   for ( int isend=0; isend<yBlocks; ++isend ) {
00324     int jb = send_order[isend];
00325     int ny = block2;
00326     if ( (jb+1)*block2 > K2 ) ny = K2 - jb*block2;
00327     OptPmeFFTMsg *msg = new (nx*ny*nz*2,PRIORITY_SIZE) OptPmeFFTMsg;
00328     msg->sourceNode = thisIndex.x;
00329     msg->nx = nx;
00330     float *md = msg->qgrid;
00331     const float *d = data;
00332     for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
00333       for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
00334         for ( int k=0; k<nz; ++k ) {
00335           *(md++) = d[2*(j*nz+k)];
00336           *(md++) = d[2*(j*nz+k)+1];
00337         }
00338       }
00339     }
00340     if ( md != msg->qgrid + nx*ny*nz*2 ) CkPrintf("error in YX at %d %d %d\n",
00341                                                   thisIndex.x, jb, thisIndex.z);
00342     
00343     //printf ("%d, %d: Ypencil Sending trans to %d, %d\n", thisIndex.z, thisIndex.x, jb, thisIndex.z);
00344     initdata.xPencil(0,jb,thisIndex.z).recvTrans(msg);
00345   }
00346 }
00347 
00348 
00349 
00350 void OptPmeXPencil::recv_trans(const OptPmeFFTMsg *msg) {
00351 
00352   int block1 = initdata.grid.block1;
00353   int K1 = initdata.grid.K1;
00354   int ib = msg->sourceNode;
00355   int nx = msg->nx;
00356   const float *md = msg->qgrid;
00357   for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
00358     float *d = data + i*ny*nz*2;
00359     for ( int j=0; j<ny; ++j, d += nz*2 ) {
00360       for ( int k=0; k<nz; ++k ) {
00361         d[2*k] = *(md++);
00362         d[2*k+1] = *(md++);
00363       }
00364     }
00365   }
00366 }
00367 
00368 
00369 
00370 void OptPmeXPencil::forward_fft() {
00371 #ifdef NAMD_FFTW
00372   fftw(forward_plan, ny*nz,
00373        ((fftw_complex *) data), ny*nz, 1, (fftw_complex *) work, 1, 0);
00374 #endif
00375 }
00376 
00377 void OptPmeXPencil::pme_kspace() {
00378 
00379   evir = 0.;
00380 
00381 #ifdef FFTCHECK
00382   return;
00383 #endif
00384 
00385   BigReal ewaldcof = ComputeNonbondedUtil::ewaldcof;
00386   evir[0] = myKSpace->compute_energy(data,
00387                                      lattice, ewaldcof, &(evir[1]));
00388 
00389   contribute (7*sizeof(double), evir.begin(), CkReduction::sum_double, initdata.cb_energy);
00390   
00391 }
00392 
00393 void OptPmeXPencil::backward_fft() {
00394 #ifdef NAMD_FFTW
00395   fftw(backward_plan, ny*nz,
00396        ((fftw_complex *) data), ny*nz, 1, (fftw_complex *) work, 1, 0);
00397 #endif
00398 }
00399 
00400 void OptPmeXPencil::send_untrans() {
00401   int xBlocks = initdata.xBlocks;
00402   int block1 = initdata.grid.block1;
00403   int K1 = initdata.grid.K1;
00404 
00405   for ( int isend=0; isend<xBlocks; ++isend ) {
00406     int ib = send_order[isend];
00407     int nx = block1;
00408     if ( (ib+1)*block1 > K1 ) nx = K1 - ib*block1;
00409     OptPmeFFTMsg *msg = new (nx*ny*nz*2,PRIORITY_SIZE) OptPmeFFTMsg;    
00410     msg->sourceNode = thisIndex.y;
00411     msg->nx = ny;
00412     float *md = msg->qgrid;
00413     for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
00414       float *d = data + i*ny*nz*2;
00415       for ( int j=0; j<ny; ++j, d += nz*2 ) {
00416         for ( int k=0; k<nz; ++k ) {
00417           *(md++) = d[2*k];
00418           *(md++) = d[2*k+1];
00419         }
00420       }
00421     }
00422     
00423     //printf ("%d, %d: Xpencil Sending untrans to %d, %d\n", thisIndex.y, thisIndex.z, ib, thisIndex.z);
00424     initdata.yPencil(ib,0,thisIndex.z).recvUntrans(msg);
00425   }
00426 }
00427 
00428 
00429 void OptPmeYPencil::recv_untrans(const OptPmeFFTMsg *msg) {
00430 
00431   int block2 = initdata.grid.block2;
00432   int K2 = initdata.grid.K2;
00433   int jb = msg->sourceNode;
00434   int ny = msg->nx;
00435   const float *md = msg->qgrid;
00436   float *d = data;
00437   for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
00438     for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
00439       for ( int k=0; k<nz; ++k ) {
00440         d[2*(j*nz+k)] = *(md++);
00441         d[2*(j*nz+k)+1] = *(md++);
00442       }
00443     }
00444   }
00445 } 
00446 
00447 
00448 
00449 void OptPmeYPencil::backward_fft() {
00450 #ifdef NAMD_FFTW
00451   for ( int i=0; i<nx; ++i ) {
00452 #if CMK_BLUEGENEL
00453     CmiNetworkProgress();
00454 #endif
00455 
00456     fftw(backward_plan, nz,
00457          ((fftw_complex *) data) + i * nz * initdata.grid.K2,
00458          nz, 1, (fftw_complex *) work, 1, 0);
00459   }
00460 #endif
00461 }
00462 
00463 void OptPmeYPencil::send_untrans() {
00464   //printf ("%d, %d: In ypencil send_untrans called once \n", thisIndex.x, thisIndex.z);
00465 
00466   int yBlocks = initdata.yBlocks;
00467   int block2 = initdata.grid.block2;
00468   int K2 = initdata.grid.K2;
00469 
00470   for ( int isend=0; isend<yBlocks; ++isend ) {
00471     int jb = send_order[isend];
00472     //if ( ! needs_reply[jb] ) continue;
00473     int ny = block2;
00474     if ( (jb+1)*block2 > K2 ) ny = K2 - jb*block2;
00475     OptPmeFFTMsg *msg = new (nx*ny*nz*2,PRIORITY_SIZE) OptPmeFFTMsg;
00476     msg->sourceNode = thisIndex.z;
00477     msg->nx = nz;
00478     float *md = msg->qgrid;
00479     const float *d = data;
00480     for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
00481      for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
00482       for ( int k=0; k<nz; ++k ) {
00483         *(md++) = d[2*(j*nz+k)];
00484         *(md++) = d[2*(j*nz+k)+1];
00485       }
00486      }
00487     }
00488 
00489     //printf ("%d, %d: Sending untrans to %d, %d\n", thisIndex.z, thisIndex.x, thisIndex.x, jb);
00490     initdata.zPencil(thisIndex.x,jb,0).recvUntrans(msg);
00491   }
00492 }
00493 
00494 void OptPmeZPencil::recv_untrans(const OptPmeFFTMsg *msg) {
00495 
00496   //printf ("%d, %d In recv untrans\n", thisIndex.x, thisIndex.y);
00497 
00498   int block3 = initdata.grid.block3;
00499   int dim3 = initdata.grid.dim3;
00500   int kb = msg->sourceNode;
00501   int nz = msg->nx;
00502   const float *md = msg->qgrid;
00503   float *d = data;
00504   for ( int i=0; i<nx; ++i ) {
00505     for ( int j=0; j<ny; ++j, d += dim3 ) {
00506       for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
00507         d[2*k] = *(md++);
00508         d[2*k+1] = *(md++);
00509       }
00510     }
00511   }
00512 }
00513 
00514 
00515 void OptPmeZPencil::backward_fft() {
00516 #ifdef NAMD_FFTW
00517   rfftwnd_complex_to_real(backward_plan, nx*ny,
00518                           (fftw_complex *) data, 1, initdata.grid.dim3/2, work, 1, 0);
00519 #endif
00520   
00521 #if CMK_BLUEGENEL
00522   CmiNetworkProgress();
00523 #endif
00524 
00525 #ifdef FFTCHECK
00526   int dim3 = initdata.grid.dim3;
00527   int K1 = initdata.grid.K1;
00528   int K2 = initdata.grid.K2;
00529   int K3 = initdata.grid.K3;
00530   float scale = 1. / (1. * K1 * K2 * K3);
00531   float maxerr = 0.;
00532   float maxstd = 0.;
00533   int mi, mj, mk;  mi = mj = mk = -1;
00534   float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
00535   const float *d = data;
00536   for ( int i=0; i<nx; ++i ) {
00537    for ( int j=0; j<ny; ++j, d += dim3 ) {
00538     for ( int k=0; k<K3; ++k ) {
00539       float std = 10. * (10. * (10. * std_base + i) + j) + k;
00540       float err = scale * d[k] - std;
00541       if ( fabsf(err) > fabsf(maxerr) ) {
00542         maxerr = err;
00543         maxstd = std;
00544         mi = i;  mj = j;  mk = k;
00545       }
00546     }
00547    }
00548   }
00549   CkPrintf("pencil %d %d max error %f at %d %d %d (should be %f)\n",
00550            thisIndex.x, thisIndex.y, maxerr, mi, mj, mk, maxstd);
00551 #endif
00552 }
00553 
00554 void OptPmeZPencil::send_ungrid(OptPmeGridMsg *msg) {
00555   int pe = msg->sourceNode;
00556   msg->sourceNode = thisIndex.x * initdata.yBlocks + thisIndex.y;
00557 
00558   int dim3 = initdata.grid.dim3;
00559   int xstart = msg->xstart - thisIndex.x * initdata.grid.block1;
00560   int ystart = msg->ystart - thisIndex.y * initdata.grid.block2;
00561   assert (xstart >= 0);
00562   assert (ystart >= 0);
00563   int xlen   = msg->xlen;
00564   int ylen   = msg->ylen;
00565   assert (xstart + xlen <= nx);
00566   assert (ystart + ylen <= ny);
00567 
00568   float *qmsg = msg->qgrid;
00569   float *d = data;
00570   int zstart = msg->zstart;
00571   int zlen = msg->zlen;
00572   int zmax = zstart + zlen - 1;
00573   
00574   int K3_1 = initdata.grid.K3 - 1; 
00575   int K3 = initdata.grid.K3;
00576   if (zstart < 0) {
00577     zstart += K3;
00578     zmax += K3;
00579   }
00580   
00581   int k = 0;
00582   for ( int i=xstart; i<xstart+xlen; ++i ) {
00583     for ( int j=ystart; j<ystart+ylen; ++j) {
00584       float *d = data + (i * ny + j) * dim3;
00585       for ( k=zstart; k<=zmax; ++k ) {
00586         int kz = k;
00587         //if (kz >= K3) kz -= K3;
00588         kz = kz - ((unsigned)(K3_1 - kz)>>31)*K3;
00589         *(qmsg++) = d[kz];
00590       }
00591     }
00592   }
00593   
00594   initdata.pmeProxy[pe].recvUngrid(msg);
00595 }
00596 
00597 
00601 
00602 void OptPmeZPencil::many_to_many_recv_grid () {
00603   int dim3 = initdata.grid.dim3;  
00604   memset(data, 0, sizeof(float) * nx*ny*dim3);
00605   int K3 = initdata.grid.K3;
00606   int K3_1 = initdata.grid.K3 - 1;
00607   int k = 0;
00608   for (int idx = 0; idx < grid_msgs.size(); idx++) {
00609     int xstart = m2m_recv_grid[idx].xstart; 
00610     int xlen   = m2m_recv_grid[idx].xlen; 
00611     int ystart = m2m_recv_grid[idx].ystart; 
00612     int ylen   = m2m_recv_grid[idx].ylen; 
00613     int zstart = m2m_recv_grid[idx].zstart; 
00614     int zlen   = m2m_recv_grid[idx].zlen;   
00615     
00616     float *qmsg = m2m_recv_grid[idx].data;
00617     for ( int i=xstart; i<xstart+xlen; ++i ) {
00618       for ( int j=ystart; j<ystart+ylen; ++j) {
00619         float *d = data + (i * ny + j) * dim3;  
00620 
00621 #pragma disjoint (*qmsg, *d)
00622 #pragma unroll (4)
00623         for ( k=zstart; k<zstart+zlen; ++k ) {
00624           int kz = k;
00625           //if (kz >= K3) kz -= K3;
00626           kz = kz - ((unsigned)(K3_1 - kz)>>31)*K3;
00627           //assert (kz >= 0);
00628           //assert (kz <  K3);
00629           d[kz] += *(qmsg++);
00630         }
00631       }
00632     } 
00633   }
00634 }
00635 
00636 void OptPmeZPencil::many_to_many_send_trans() {
00637   int zBlocks = initdata.zBlocks;
00638   int block3 = initdata.grid.block3;
00639   int dim3 = initdata.grid.dim3;
00640 
00641   float *md = many_to_many_data;
00642   int nz = block3;
00643   for ( int kb=0; kb<zBlocks; ++kb ) {
00644     nz = block3;
00645     if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
00646     
00647     const float *d = data;
00648     for ( int i=0; i<nx; ++i ) {
00649       for ( int j=0; j<ny; ++j, d += dim3 ) {
00650         for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
00651           *(md++) = d[2*k];
00652           *(md++) = d[2*k+1];
00653         }
00654       }
00655     }
00656   }
00657   
00658 #if CHARM_VERSION > 60000
00659   CmiDirect_manytomany_start (handle, PHASE_YF);
00660 #endif
00661 }
00662 
00663 void OptPmeYPencil::many_to_many_recv_trans () {  
00664   int block2 = initdata.grid.block2;
00665   int K2 = initdata.grid.K2;
00666   
00667   const float *md = many_to_many_data;
00668   for (int jb = 0; jb < initdata.yBlocks; jb++ ) {
00669     int ny = many_to_many_nb[jb];  
00670     float *d = data;
00671     for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
00672       for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
00673         for ( int k=0; k<nz; ++k ) {
00674           d[2*(j*nz+k)] = *(md++);
00675           d[2*(j*nz+k)+1] = *(md++);
00676         }
00677       }
00678     }
00679   }
00680 }
00681 
00682 void OptPmeYPencil::many_to_many_send(int phase) {
00683   int yBlocks = initdata.yBlocks;
00684   int block2 = initdata.grid.block2;
00685   int K2 = initdata.grid.K2;
00686   
00687   float *md = many_to_many_data;
00688   int ny = block2;
00689   for ( int jb=0; jb<yBlocks; ++jb ) {
00690     ny = block2;
00691     if ( (jb+1)*block2 > K2 ) ny = K2 - jb*block2;
00692     
00693     const float *d = data;
00694     for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
00695       for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
00696         for ( int k=0; k<nz; ++k ) {
00697           *(md++) = d[2*(j*nz+k)];
00698           *(md++) = d[2*(j*nz+k)+1];
00699         }
00700       }
00701     }
00702   }
00703 
00704 #if CHARM_VERSION > 60000
00705   CmiDirect_manytomany_start (handle, phase);  
00706 #endif
00707 }
00708 
00709 void OptPmeXPencil::many_to_many_recv_trans () {
00710   
00711   int block1 = initdata.grid.block1;
00712   int K1 = initdata.grid.K1;
00713   
00714   const float *md = many_to_many_data;
00715   for (int ib =0; ib < initdata.xBlocks; ib++ ) {
00716     int nx = many_to_many_nb[ib];    
00717     for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
00718       float *d = data + i*ny*nz*2;
00719       for ( int j=0; j<ny; ++j, d += nz*2 ) {
00720         for ( int k=0; k<nz; ++k ) {
00721           d[2*k] = *(md++);
00722           d[2*k+1] = *(md++);
00723         }
00724       }
00725     }
00726   }
00727 }
00728 
00729 void OptPmeXPencil::many_to_many_send_untrans() {
00730   int xBlocks = initdata.xBlocks;
00731   int block1 = initdata.grid.block1;
00732   int K1 = initdata.grid.K1;
00733 
00734   int     nx = block1;
00735   float * md = many_to_many_data;
00736   for ( int ib=0; ib<xBlocks; ++ib ) {
00737     nx = block1;
00738     if ( (ib+1)*block1 > K1 ) nx = K1 - ib*block1;
00739     
00740     for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
00741       float *d = data + i*ny*nz*2;
00742       for ( int j=0; j<ny; ++j, d += nz*2 ) {
00743         for ( int k=0; k<nz; ++k ) {
00744           *(md++) = d[2*k];
00745           *(md++) = d[2*k+1];
00746         }
00747       }
00748     }
00749   }
00750 #if CHARM_VERSION > 60000
00751   CmiDirect_manytomany_start (handle, PHASE_YB);
00752 #endif
00753 }
00754 
00755 void OptPmeYPencil::many_to_many_recv_untrans () {  
00756   int block2 = initdata.grid.block2;
00757   int K2 = initdata.grid.K2;
00758   
00759   const float *md = many_to_many_data;
00760   for (int jb = 0; jb < initdata.yBlocks; jb++ ) {
00761     int ny = many_to_many_nb[jb];  
00762     float *d = data;
00763     for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
00764       for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
00765         for ( int k=0; k<nz; ++k ) {
00766           d[2*(j*nz+k)] = *(md++);
00767           d[2*(j*nz+k)+1] = *(md++);
00768         }
00769       }
00770     }
00771   }
00772 }
00773 
00774 void OptPmeZPencil::many_to_many_recv_untrans() {  
00775   //printf ("%d, %d In recv untrans\n", thisIndex.x, thisIndex.y);                                                                             
00776   int block3 = initdata.grid.block3;
00777   int dim3 = initdata.grid.dim3;
00778   
00779   const float *md = many_to_many_data;
00780   for (int kb = 0; kb < initdata.zBlocks; kb++ ) {
00781     int nz = many_to_many_nb[kb];
00782     float *d = data;
00783     for ( int i=0; i<nx; ++i ) {
00784       for ( int j=0; j<ny; ++j, d += dim3 ) {
00785         for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
00786           d[2*k] = *(md++);
00787           d[2*k+1] = *(md++);
00788         }
00789       }
00790     }
00791   }
00792 }
00793 
00794 void OptPmeZPencil::many_to_many_send_ungrid () {
00795   int dim3 = initdata.grid.dim3;
00796   int K3 = initdata.grid.K3;
00797   int K3_1 = initdata.grid.K3 - 1;
00798   int k = 0;
00799   for (int idx = 0; idx < grid_msgs.size(); idx++) {
00800     int xstart = m2m_recv_grid[idx].xstart; 
00801     int xlen   = m2m_recv_grid[idx].xlen; 
00802     int ystart = m2m_recv_grid[idx].ystart; 
00803     int ylen   = m2m_recv_grid[idx].ylen; 
00804     int zstart = m2m_recv_grid[idx].zstart; 
00805     int zlen   = m2m_recv_grid[idx].zlen;   
00806     
00807     float *qmsg = m2m_recv_grid[idx].data;
00808     for ( int i=xstart; i<xstart+xlen; ++i ) {
00809       for ( int j=ystart; j<ystart+ylen; ++j) {
00810         float *d = data + (i * ny + j) * dim3;  
00811 
00812 #pragma disjoint (*d, *qmsg)
00813 #pragma unroll (4)
00814         for ( k=zstart; k<zstart+zlen; ++k ) {
00815           int kz = k;
00816           //if (kz >= K3) kz -= K3;
00817           kz = kz - ((unsigned)(K3_1 - kz)>>31)*K3;
00818           //assert (kz >= 0);
00819           //assert (kz <  K3);
00820           *(qmsg++) = d[kz];
00821         }
00822       }
00823     } 
00824   }
00825   
00826 #if CHARM_VERSION > 60000
00827   CmiDirect_manytomany_start (handle, PHASE_UG);
00828 #endif
00829 }
00830 
00831 
00832 
00833 void  OptPmeZPencil::initialize_manytomany () {  
00834 #if CHARM_VERSION > 60000
00835   int idx = 0;
00836   int totalcount = 0;
00837   for (idx = 0; idx < grid_msgs.size(); idx ++) 
00838     totalcount += m2m_recv_grid[idx].xlen * m2m_recv_grid[idx].ylen * m2m_recv_grid[idx].zlen;
00839   many_to_many_gr_data = new float [totalcount];
00840   
00841   CkArrayIndex3D aidx (thisIndex.x, thisIndex.y, thisIndex.z);
00842   cbw_recvgrid.cb = CkCallback (CkIndex_OptPmeZPencil::many_to_manyRecvGrid(NULL), aidx, thisProxy.ckGetArrayID());
00843   cbw_recvgrid.msg = new (PRIORITY_SIZE) OptPmeDummyMsg;
00844   PatchMap *patchMap = PatchMap::Object();
00845   CmiDirect_manytomany_initialize_recvbase (handle, PHASE_GR, 
00846                                             call_ck_cb, &cbw_recvgrid, 
00847                                             (char *)many_to_many_gr_data, patchMap->numPatches(), -1);
00848   
00849   CmiDirect_manytomany_initialize_sendbase (handle, PHASE_UG, NULL, NULL, (char *)many_to_many_gr_data, 
00850                                             grid_msgs.size(), thisIndex.x *initdata.yBlocks + thisIndex.y);
00851   
00852   int offset = 0;
00853   for (idx = 0; idx < grid_msgs.size(); idx ++) {
00854     m2m_recv_grid[idx].data = (float *) ((char *)many_to_many_gr_data + offset);
00855     int fcount = m2m_recv_grid[idx].xlen * m2m_recv_grid[idx].ylen * m2m_recv_grid[idx].zlen;
00856     CmiDirect_manytomany_initialize_recv (handle, PHASE_GR, m2m_recv_grid[idx].patchID, 
00857                                           offset, fcount *sizeof(float), patchMap->node(m2m_recv_grid[idx].patchID));
00858     CmiDirect_manytomany_initialize_send (handle, PHASE_UG, idx, offset, fcount *sizeof(float), 
00859                                           patchMap->node(m2m_recv_grid[idx].patchID));
00860     offset += fcount * sizeof(float);
00861   }        
00862 
00863   int zBlocks = initdata.zBlocks;
00864   int block3 = initdata.grid.block3;
00865   int dim3 = initdata.grid.dim3;
00866 
00867   //Initialize send trans
00868   CmiDirect_manytomany_initialize_sendbase (handle, PHASE_YF, NULL, NULL, (char *)many_to_many_data, zBlocks, thisIndex.y);
00869 
00870   //Initialize recv untrans
00871   cbw_recvuntrans.cb = CkCallback(CkIndex_OptPmeZPencil::many_to_manyRecvUntrans(NULL), aidx, thisProxy.ckGetArrayID());
00872   cbw_recvuntrans.msg = new (PRIORITY_SIZE) OptPmeDummyMsg;
00873   CmiDirect_manytomany_initialize_recvbase (handle, PHASE_ZB, call_ck_cb, &cbw_recvuntrans, 
00874                                             (char *)many_to_many_data, initdata.zBlocks, -1);
00875 
00876   for ( int kb=0; kb<zBlocks; ++kb ) {
00877     int nz = block3;
00878     if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
00879     
00880     //Initialize send trans
00881     CkArrayIndex3D index (thisIndex.x,0,kb);
00882     CProxy_OptPmePencilMapY yproxy (global_map_y);
00883     int pe = yproxy.ckLocalBranch()->procNum(0, index);
00884     CmiDirect_manytomany_initialize_send (handle, PHASE_YF, kb, kb*2*nx*ny*block3*sizeof(float), 2*nx*ny*nz*sizeof(float), pe);
00885     
00886     //Initialize recv untrans
00887     CmiDirect_manytomany_initialize_recv (handle, PHASE_ZB, kb, kb*nx*ny*block3*2*sizeof(float), nx*ny*nz*2*sizeof(float), pe);
00888     many_to_many_nb [kb] = nz;
00889   }
00890 #endif
00891 }
00892 
00893 void  OptPmeYPencil::initialize_manytomany () {
00894 #if CHARM_VERSION > 60000
00895   int yBlocks = initdata.yBlocks;
00896   int block2 = initdata.grid.block2;
00897   int K2 = initdata.grid.K2;
00898 
00899   //send trans
00900   CmiDirect_manytomany_initialize_sendbase (handle, PHASE_XF, NULL, NULL, (char *)many_to_many_data, yBlocks, thisIndex.x);
00901   //send untrans
00902   CmiDirect_manytomany_initialize_sendbase (handle, PHASE_ZB, NULL, NULL, (char *)many_to_many_data, yBlocks, thisIndex.z);
00903 
00904   //recv trans
00905   CkArrayIndex3D idx (thisIndex.x, thisIndex.y, thisIndex.z);
00906   cbw_recvtrans.cb = CkCallback (CkIndex_OptPmeYPencil::many_to_manyRecvTrans(NULL), idx, thisProxy.ckGetArrayID());
00907   cbw_recvtrans.msg = new (PRIORITY_SIZE) OptPmeDummyMsg;
00908   CmiDirect_manytomany_initialize_recvbase (handle, PHASE_YF, call_ck_cb, &cbw_recvtrans, 
00909                                             (char *)many_to_many_data, initdata.yBlocks, -1);
00910 
00911   //recv untrans
00912   cbw_recvuntrans.cb = CkCallback(CkIndex_OptPmeYPencil::many_to_manyRecvUntrans(NULL), idx, thisProxy.ckGetArrayID());
00913   cbw_recvuntrans.msg = new (PRIORITY_SIZE) OptPmeDummyMsg;
00914   CmiDirect_manytomany_initialize_recvbase (handle, PHASE_YB, call_ck_cb, 
00915                                             &cbw_recvuntrans, (char *)many_to_many_data, initdata.yBlocks, -1);
00916   
00917   for ( int jb=0; jb<yBlocks; ++jb ) {
00918     int ny = block2;
00919     if ( (jb+1)*block2 > K2 ) ny = K2 - jb*block2;
00920 
00921     //send untrans
00922     CkArrayIndex3D index (thisIndex.x,jb,0);
00923     CProxy_OptPmePencilMapZ zproxy (global_map_z);
00924     int pe = zproxy.ckLocalBranch()->procNum(0, index);
00925     CmiDirect_manytomany_initialize_send (handle, PHASE_ZB, jb, 2*nx*block2*nz*jb*sizeof(float), 2*nx*ny*nz*sizeof(float), pe);
00926 
00927     //recv trans
00928     CmiDirect_manytomany_initialize_recv (handle, PHASE_YF, jb, jb*nx*block2*nz*2*sizeof(float), nx*ny*nz*2*sizeof(float), pe);
00929     
00930     //send trans
00931     index = CkArrayIndex3D (0,jb,thisIndex.z);
00932     CProxy_OptPmePencilMapX xproxy (global_map_x);
00933     pe = xproxy.ckLocalBranch()->procNum(0, index);
00934     CmiDirect_manytomany_initialize_send (handle, PHASE_XF, jb, 2*nx*block2*nz*jb*sizeof(float), 2*nx*ny*nz*sizeof(float), pe);
00935     
00936     //Recv untrans
00937     CmiDirect_manytomany_initialize_recv (handle, PHASE_YB, jb, jb*nx*block2*nz*2*sizeof(float), nx*ny*nz*2*sizeof(float), pe);
00938 
00939     many_to_many_nb [jb] = ny;
00940   }
00941 #endif
00942 }
00943 
00944 void  OptPmeXPencil::initialize_manytomany () {  
00945 #if CHARM_VERSION > 60000
00946   int xBlocks = initdata.xBlocks;
00947   int block1 = initdata.grid.block1;
00948   int K1 = initdata.grid.K1;
00949   
00950   CmiDirect_manytomany_initialize_sendbase (handle, PHASE_YB, NULL, NULL, (char *)many_to_many_data, xBlocks, thisIndex.y);
00951   CkArrayIndex3D idx (thisIndex.x, thisIndex.y, thisIndex.z);
00952   cbw_recvtrans.cb = CkCallback(CkIndex_OptPmeXPencil::many_to_manyRecvTrans(NULL), idx, thisProxy.ckGetArrayID());
00953   cbw_recvtrans.msg = new (PRIORITY_SIZE) OptPmeDummyMsg;
00954   CmiDirect_manytomany_initialize_recvbase (handle, PHASE_XF, call_ck_cb, 
00955                                             &cbw_recvtrans,  (char *)many_to_many_data, 
00956                                             initdata.xBlocks, -1);
00957   
00958   for ( int ib=0; ib<xBlocks; ++ib ) {
00959     int nx = block1;
00960     if ( (ib+1)*block1 > K1 ) nx = K1 - ib*block1;
00961     
00962     CkArrayIndex3D index (ib,0,thisIndex.z);
00963     CProxy_OptPmePencilMapY yproxy (global_map_y);
00964     int pe = yproxy.ckLocalBranch()->procNum(0, index);
00965     CmiDirect_manytomany_initialize_send (handle, PHASE_YB, ib, 2*block1*ny*nz*ib*sizeof(float), 2*nx*ny*nz*sizeof(float), pe);
00966     
00967     CmiDirect_manytomany_initialize_recv (handle, PHASE_XF, ib, ib*block1*ny*nz*2*sizeof(float), nx*ny*nz*2*sizeof(float), pe);
00968     many_to_many_nb [ib] = nx;
00969   }
00970 #endif
00971 }
00972 
00973 
00977 
00978 
00979 #include "PmeFFTLib.def.h"

Generated on Mon Nov 23 04:59:20 2009 for NAMD by  doxygen 1.3.9.1