Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

OptPme.C

Go to the documentation of this file.
00001 
00007 #ifdef NAMD_FFTW
00008 #ifdef NAMD_FFTW_NO_TYPE_PREFIX
00009 #include <fftw.h>
00010 #include <rfftw.h>
00011 #else
00012 #include <sfftw.h>
00013 #include <srfftw.h>
00014 #endif
00015 #endif
00016 
00017 #include <assert.h>
00018 
00019 #include "InfoStream.h"
00020 #include "Node.h"
00021 #include "PatchMap.h"
00022 #include "PatchMap.inl"
00023 #include "AtomMap.h"
00024 #include "OptPme.h"
00025 #include "OptPmeMgr.decl.h"
00026 #include "OptPmeRealSpace.h"
00027 #include "PmeKSpace.h"
00028 #include "ComputeNonbondedUtil.h"
00029 #include "PatchMgr.h"
00030 #include "Molecule.h"
00031 #include "ReductionMgr.h"
00032 //#include "ComputeMgr.h"
00033 #include "ComputeMgr.decl.h"
00034 // #define DEBUGM
00035 #define MIN_DEBUG_LEVEL 3
00036 #include "Debug.h"
00037 #include "SimParameters.h"
00038 #include "WorkDistrib.h"
00039 #include "varsizemsg.h"
00040 #include "Random.h"
00041 #include "Priorities.h"
00042 
00043 
00044 extern char *pencilPMEProcessors;
00045 
00046 #include "fftlib.h"
00047 #include "fftmap.h"
00048 
00049 //Very large integer
00050 int many_to_many_start = 0x7fffffff;
00051 
00052 class OptPmeMgr : public BOCclass {
00053 public:
00054   friend class OptPmeCompute;
00055   OptPmeMgr();
00056   ~OptPmeMgr();
00057 
00058   void initialize(CkQdMsg*);
00059   void initialize_pencils(CkQdMsg*);
00060   void activate_pencils(CkQdMsg*);
00061   void recvArrays(CProxy_OptPmeXPencil, CProxy_OptPmeYPencil, CProxy_OptPmeZPencil);
00062 
00063   void recvUngrid(OptPmeGridMsg *);
00064   void ungridCalc(OptPmeDummyMsg *);
00065   void recvEvir (CkReductionMsg *msg);
00066 
00067   void setCompute(OptPmeCompute *c) { pmeCompute = c; c->setMgr(this); }
00068 
00069 private:
00070   CProxy_OptPmeMgr pmeProxy;
00071   CProxy_OptPmeMgr pmeProxyDir;
00072   OptPmeCompute *pmeCompute;
00073   PmeGrid myGrid;
00074   PmeKSpace *myKSpace;
00075 
00076   CProxy_OptPmeXPencil xPencil;
00077   CProxy_OptPmeYPencil yPencil;
00078   CProxy_OptPmeZPencil zPencil;
00079   int    numPencilsActive;
00080   int    ungrid_count, usePencils;
00081   SubmitReduction *reduction;
00082   int    _iter;
00083   void   *handle;
00084   bool   constant_pressure;    //Does the simulation need constant pressure
00085 };
00086 
00087 
00088 void pme_f2d (double *dst, float *src, int N);
00089 void pme_d2f (float *dst, double *src, int N);
00090 
00091 static inline void initializePmeGrid (SimParameters *simParams, PmeGrid &grid) {
00092     int xBlocks = 0, yBlocks = 0, zBlocks= 0;
00093 
00094     if ( simParams->PMEPencils > 1 ) {
00095       xBlocks = yBlocks = zBlocks = simParams->PMEPencils;
00096     } else {
00097       int nb2 = ( simParams->PMEGridSizeX * simParams->PMEGridSizeY
00098                   * simParams->PMEGridSizeZ ) / simParams->PMEMinPoints;
00099       if ( nb2 > CkNumPes() ) nb2 = CkNumPes();
00100       int nb = (int) sqrt((float)nb2);
00101       xBlocks = zBlocks = nb;
00102       yBlocks = nb2 / nb;
00103     }
00104     
00105     int dimx = simParams->PMEGridSizeX;
00106     int bx = 1 + ( dimx - 1 ) / xBlocks;
00107     xBlocks = 1 + ( dimx - 1 ) / bx;
00108     
00109     int dimy = simParams->PMEGridSizeY;
00110     int by = 1 + ( dimy - 1 ) / yBlocks;
00111     yBlocks = 1 + ( dimy - 1 ) / by;
00112     
00113     int dimz = simParams->PMEGridSizeZ / 2 + 1;  // complex
00114     int bz = 1 + ( dimz - 1 ) / zBlocks;
00115     zBlocks = 1 + ( dimz - 1 ) / bz;
00116 
00117     grid.xBlocks = xBlocks;
00118     grid.yBlocks = yBlocks;
00119     grid.zBlocks = zBlocks;
00120 
00121     grid.K1 = simParams->PMEGridSizeX;
00122     grid.K2 = simParams->PMEGridSizeY;
00123     grid.K3 = simParams->PMEGridSizeZ;
00124     grid.order = simParams->PMEInterpOrder;
00125     grid.dim2 = grid.K2;
00126     grid.dim3 = 2 * (grid.K3/2 + 1);
00127 
00128     grid.block1 = ( grid.K1 + xBlocks - 1 ) / xBlocks;
00129     grid.block2 = ( grid.K2 + yBlocks - 1 ) / yBlocks;
00130     grid.block3 = ( grid.K3/2 + 1 + zBlocks - 1 ) / zBlocks;  // complex
00131 }
00132 
00133 #ifdef NAMD_FFTW
00134 static CmiNodeLock fftw_plan_lock;
00135 #endif
00136 
00137 
00138 static inline void scale_n_copy_coordinates(CompAtom *x, PmeParticle p[], 
00139                                             int &N, 
00140                                             Lattice &lattice, PmeGrid grid,
00141                                             //double **qline,
00142                                             double xmin, double xlen,
00143                                             double ymin, double ylen,
00144                                             double zmin, double zlen,
00145                                             int &scg) {
00146   Vector origin = lattice.origin();
00147   Vector recip1 = lattice.a_r();
00148   Vector recip2 = lattice.b_r();
00149   Vector recip3 = lattice.c_r();
00150   double ox = origin.x;
00151   double oy = origin.y;
00152   double oz = origin.z;
00153   double r1x = recip1.x;
00154   double r1y = recip1.y;
00155   double r1z = recip1.z;
00156   double r2x = recip2.x;
00157   double r2y = recip2.y;
00158   double r2z = recip2.z;
00159   double r3x = recip3.x;
00160   double r3y = recip3.y;
00161   double r3z = recip3.z;
00162   int K1 = grid.K1;
00163   int K2 = grid.K2;
00164   int K3 = grid.K3;
00165 
00166   const BigReal coloumb_sqrt = sqrt( COLOUMB * ComputeNonbondedUtil::scaling
00167                                      * ComputeNonbondedUtil::dielectric_1 );
00168 
00169 
00170   int natoms = 0;
00171   for (int i=0; i<N; i++) {
00172     double px = x[i].position.x - ox;
00173     double py = x[i].position.y - oy;
00174     double pz = x[i].position.z - oz;
00175     double sx = px*r1x + py*r1y + pz*r1z;
00176     double sy = px*r2x + py*r2y + pz*r2z;
00177     double sz = px*r3x + py*r3y + pz*r3z;
00178     p[natoms].x = K1 * ( sx - floor(sx) );
00179     p[natoms].y = K2 * ( sy - floor(sy) );
00180     p[natoms].z = K3 * ( sz - floor(sz) );
00181     //  Check for rare rounding condition where K * ( 1 - epsilon ) == K      
00182     //  which was observed with g++ on Intel x86 architecture. 
00183     if ( p[natoms].x == K1 ) p[natoms].x = 0;
00184     if ( p[natoms].y == K2 ) p[natoms].y = 0;
00185     if ( p[natoms].z == K3 ) p[natoms].z = 0;
00186 
00187     BigReal u1,u2,u3;
00188     u1 = (int) (p[natoms].x - xmin);
00189     if (u1 >= grid.K1) u1 -= grid.K1;    
00190     u2 = (int) (p[natoms].y - ymin);
00191     if (u2 >= grid.K2) u2 -= grid.K2;    
00192     u3 = (int) (p[natoms].z - zmin);
00193     if (u3 >= grid.K3) u3 -= grid.K3;
00194     
00195     if ( (u1 < 0.0) || (u1 >= xlen) || 
00196          (u2 < 0.0) || (u2 >= ylen) || 
00197          (u3 < 0.0) || (u3 >= zlen) ) {
00198       scg ++;
00199       continue;
00200     }
00201     
00202     p[natoms].cg = coloumb_sqrt * x[i].charge;
00203     natoms ++;
00204   }
00205   N = natoms;
00206 }
00207 
00208 
00209 OptPmeMgr::OptPmeMgr() : pmeProxy(thisgroup), 
00210                                  pmeProxyDir(thisgroup), pmeCompute(0) {
00211 
00212   CkpvAccess(BOCclass_group).computePmeMgr = thisgroup;
00213 
00214   myKSpace = 0;
00215   ungrid_count = 0;
00216   usePencils = 0;
00217 
00218 #ifdef NAMD_FFTW
00219   if ( CmiMyRank() == 0 ) {
00220     fftw_plan_lock = CmiCreateLock();
00221   }
00222 #endif    
00223 }
00224 
00225 
00226 void OptPmeMgr::recvArrays(CProxy_OptPmeXPencil x, CProxy_OptPmeYPencil y, CProxy_OptPmeZPencil z) {
00227   xPencil = x;  yPencil = y;  zPencil = z;
00228 }
00229 
00230 void OptPmeMgr::initialize(CkQdMsg *msg) {
00231     delete msg;
00232 
00233     _iter = 0;
00234 
00235 #if CHARM_VERSION > 60000
00236     handle = CmiDirect_manytomany_allocate_handle ();
00237 #endif
00238 
00239     SimParameters *simParams = Node::Object()->simParameters;
00240     PatchMap *patchMap = PatchMap::Object();
00241     
00242     if (simParams->PMEPencils > 0)
00243       usePencils = 1;
00244     else return;
00245 
00246     initializePmeGrid (simParams, myGrid);    
00247 
00248     if (simParams->langevinPistonOn || simParams->berendsenPressureOn)  
00249       constant_pressure = true;
00250     else
00251       constant_pressure = false;      
00252 
00253     bool useManyToMany = simParams->useManyToMany;
00254     //Many-to-many requires that patches and pmepencils are all on different processors
00255     int npes = patchMap->numPatches() + 
00256                myGrid.xBlocks *  myGrid.yBlocks + 
00257                myGrid.zBlocks *  myGrid.xBlocks +
00258                myGrid.yBlocks *  myGrid.zBlocks;
00259 
00260     if (npes >= CkNumPes()) {
00261       if (CkMyPe() == 0)
00262         printf ("Warning : Not enough processors for the many-to-many optimization \n");      
00263       useManyToMany = false;
00264     }
00265     
00266 #if CHARM_VERSION > 60000
00267     if (useManyToMany)  {
00268       if (CkMyPe() == 0)
00269         printf ("Enabling the Many-to-many optimization\n");
00270       //defaults to max integer
00271       many_to_many_start = MANY_TO_MANY_START;
00272     }
00273 #endif
00274 
00275     if ( CkMyPe() == 0) {
00276       iout << iINFO << "PME using " << myGrid.xBlocks << " x " <<
00277         myGrid.yBlocks << " x " << myGrid.zBlocks <<
00278         " pencil grid for FFT and reciprocal sum.\n" << endi;
00279 
00280       pencilPMEProcessors = new char [CkNumPes()];
00281       memset (pencilPMEProcessors, 0, sizeof(char) * CkNumPes());
00282       
00283       CProxy_OptPmePencilMapZ   mapz;      
00284       CProxy_OptPmePencilMapY   mapy;
00285       CProxy_OptPmePencilMapX   mapx;
00286       
00287       mapz = CProxy_OptPmePencilMapZ::ckNew(myGrid.xBlocks, myGrid.yBlocks, myGrid.zBlocks);      
00288       mapy = CProxy_OptPmePencilMapY::ckNew(myGrid.xBlocks, myGrid.yBlocks, myGrid.zBlocks);
00289       mapx = CProxy_OptPmePencilMapX::ckNew(myGrid.xBlocks, myGrid.yBlocks, myGrid.zBlocks);
00290       
00291       CkArrayOptions optsz;
00292       optsz.setMap (mapz);
00293       CkArrayOptions optsy;
00294       optsy.setMap (mapy);
00295       CkArrayOptions optsx;
00296       optsx.setMap (mapx);
00297       
00298       zPencil = CProxy_OptPmeZPencil::ckNew(optsz);  
00299       yPencil = CProxy_OptPmeYPencil::ckNew(optsy);  
00300       xPencil = CProxy_OptPmeXPencil::ckNew(optsx);  
00301       
00302       int x,y,z;
00303       for (x = 0; x < myGrid.xBlocks; ++x)
00304         for (y = 0; y < myGrid.yBlocks; ++y ) {
00305           zPencil(x,y,0).insert();
00306         }
00307       zPencil.doneInserting();
00308       
00309       for (z = 0; z < myGrid.zBlocks; ++z )
00310         for (x = 0; x < myGrid.xBlocks; ++x ) {
00311           yPencil(x,0,z).insert();
00312         }
00313       yPencil.doneInserting();
00314       
00315       for (y = 0; y < myGrid.yBlocks; ++y )     
00316         for (z = 0; z < myGrid.zBlocks; ++z ) {
00317           xPencil(0,y,z).insert();
00318         }
00319       xPencil.doneInserting();      
00320       
00321       pmeProxy.recvArrays(xPencil,yPencil,zPencil);
00322       OptPmePencilInitMsgData msgdata;
00323       msgdata.grid = myGrid;
00324       msgdata.xBlocks = myGrid.xBlocks;
00325       msgdata.yBlocks = myGrid.yBlocks;
00326       msgdata.zBlocks = myGrid.zBlocks;
00327       msgdata.xPencil = xPencil;
00328       msgdata.yPencil = yPencil;
00329       msgdata.zPencil = zPencil;
00330       msgdata.constant_pressure = constant_pressure;
00331 
00332       CkCallback cb (CkIndex_OptPmeMgr::recvEvir(NULL), thisProxy[0]);
00333       msgdata.cb_energy = cb;
00334 
00335       msgdata.pmeProxy = pmeProxyDir;
00336       xPencil.init(new OptPmePencilInitMsg(msgdata));
00337       yPencil.init(new OptPmePencilInitMsg(msgdata));
00338       zPencil.init(new OptPmePencilInitMsg(msgdata));
00339       
00340       reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
00341 
00342 #ifndef NAMD_FFTW
00343       NAMD_die("Sorry, FFTW must be compiled in to use PME.");
00344 #endif
00345     }
00346 
00347 }
00348 
00349 void OptPmeMgr::initialize_pencils(CkQdMsg *msg) {
00350   delete msg;
00351 
00352   SimParameters *simParams = Node::Object()->simParameters;
00353 
00354   PatchMap *patchMap = PatchMap::Object();
00355   Lattice lattice = simParams->lattice;
00356   BigReal sysdima = lattice.a_r().unit() * lattice.a();
00357   BigReal sysdimb = lattice.b_r().unit() * lattice.b();
00358   BigReal cutoff = simParams->cutoff;
00359   BigReal patchdim = simParams->patchDimension;
00360   int numPatches = patchMap->numPatches();
00361 
00362   char *pencilActive = new char[myGrid.xBlocks*myGrid.yBlocks];
00363   for ( int i=0; i<myGrid.xBlocks; ++i ) {
00364     for ( int j=0; j<myGrid.yBlocks; ++j ) {
00365       pencilActive[i*myGrid.yBlocks+j] = 0;
00366     }
00367   }
00368 
00369   //Right now we only support one patch per processor
00370   assert (patchMap->numPatchesOnNode(CkMyPe()) <= 1);
00371   for ( int pid=0; pid < numPatches; ++pid ) {
00372     int pnode = patchMap->node(pid);
00373     if ( pnode != CkMyPe() ) continue;
00374 
00375     BigReal minx = patchMap->min_a(pid);
00376     BigReal maxx = patchMap->max_a(pid);
00377     BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
00378     // min1 (max1) is smallest (largest) grid line for this patch
00379     int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
00380     int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
00381 
00382     BigReal miny = patchMap->min_b(pid);
00383     BigReal maxy = patchMap->max_b(pid);
00384     BigReal marginb = 0.5 * ( patchdim - cutoff ) / sysdimb;
00385     // min2 (max2) is smallest (largest) grid line for this patch
00386     int min2 = ((int) floor(myGrid.K2 * (miny - marginb))) - myGrid.order + 1;
00387     int max2 = ((int) floor(myGrid.K2 * (maxy + marginb)));
00388 
00389     for ( int i=min1; i<=max1; ++i ) {
00390       int ix = i;
00391       while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
00392       while ( ix < 0 ) ix += myGrid.K1;
00393       for ( int j=min2; j<=max2; ++j ) {
00394         int jy = j;
00395         while ( jy >= myGrid.K2 ) jy -= myGrid.K2;
00396         while ( jy < 0 ) jy += myGrid.K2;
00397         pencilActive[(ix / myGrid.block1)*myGrid.yBlocks + (jy / myGrid.block2)] = 1;
00398       }
00399     }
00400   }
00401 
00402   numPencilsActive = 0;
00403   for ( int i=0; i<myGrid.xBlocks; ++i ) {
00404     for ( int j=0; j<myGrid.yBlocks; ++j ) {
00405       if ( pencilActive[i*myGrid.yBlocks+j] ) {
00406         ++numPencilsActive;
00407 
00408         zPencil(i,j,0).dummyRecvGrid(CkMyPe(),0);
00409       }
00410     }
00411   }
00412 
00413   ungrid_count = numPencilsActive;
00414   delete [] pencilActive;
00415 }
00416 
00417 
00418 void OptPmeMgr::activate_pencils(CkQdMsg *msg) {
00419   if ( ! usePencils ) return;
00420   if ( CkMyPe() == 0 ) zPencil.dummyRecvGrid(CkMyPe(),1);
00421 }
00422 
00423 
00424 OptPmeMgr::~OptPmeMgr() {
00425   delete myKSpace;
00426 }
00427 
00428 void OptPmeMgr::recvUngrid(OptPmeGridMsg *msg) {
00429   if ( ungrid_count == 0 ) {
00430     NAMD_bug("Message order failure in OptPmeMgr::recvUngrid\n");
00431   }
00432     
00433   pmeCompute->copyPencils(msg);
00434   delete msg;
00435   --ungrid_count;
00436 
00437   if ( ungrid_count == 0 ) {
00438     //CkPrintf("recvUngrid on Pe(%d)\n",CkMyPe());
00439     ungridCalc(NULL);
00440   }
00441 }
00442 
00443 void OptPmeMgr::ungridCalc(OptPmeDummyMsg *msg) {    
00444   pmeCompute->ungridForces();
00445   ungrid_count = numPencilsActive;
00446 }
00447 
00448 
00449 void OptPmeMgr::recvEvir (CkReductionMsg *msg) {
00450 
00451   assert (CkMyPe() == 0);
00452 
00453   double *data = (double *) msg->getData();
00454   assert (msg->getSize() == 7 * sizeof(double));
00455 
00456   //printf ("[%d]: Received Evir\n", CkMyPe());
00457 
00458   double scale = 1.;
00459   reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += data[0] * scale;
00460   reduction->item(REDUCTION_VIRIAL_SLOW_XX) += data[1] * scale;
00461   reduction->item(REDUCTION_VIRIAL_SLOW_XY) += data[2] * scale;
00462   reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += data[3] * scale;
00463   reduction->item(REDUCTION_VIRIAL_SLOW_YX) += data[2] * scale;
00464   reduction->item(REDUCTION_VIRIAL_SLOW_YY) += data[4] * scale;
00465   reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += data[5] * scale;
00466   reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += data[3] * scale;
00467   reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += data[5] * scale;
00468   reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += data[6] * scale;   
00469 
00470   delete msg;
00471 
00472   SimParameters *simParams = Node::Object()->simParameters;
00473   int fef = simParams->fullElectFrequency;
00474   for (int i = 0; i < fef; i++)
00475     reduction->submit();
00476 }
00477 
00478 OptPmeCompute::OptPmeCompute(ComputeID c) :
00479   ComputeHomePatches(c)
00480 {
00481   DebugM(4,"OptPmeCompute created.\n");
00482 
00483   CProxy_OptPmeMgr::ckLocalBranch(
00484         CkpvAccess(BOCclass_group).computePmeMgr)->setCompute(this);
00485 
00486   _initialized = false;
00487 
00488   useAvgPositions = 1;
00489 
00490   reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
00491 }
00492 
00493 void recv_ungrid_done (void *m) {
00494   OptPmeDummyMsg *msg =  (OptPmeDummyMsg *) m;  
00495   CProxy_OptPmeMgr pmeProxy (CkpvAccess(BOCclass_group).computePmeMgr);
00496   pmeProxy[CkMyPe()].ungridCalc (msg);
00497 }  
00498 
00499 
00500 void OptPmeCompute::resetPatchCoordinates (const Lattice &lattice) {
00501   PatchMap *patchMap = PatchMap::Object();    
00502   SimParameters *simParams = Node::Object()->simParameters;
00503 
00504   BigReal sysdima = lattice.a_r().unit() * lattice.a();
00505   BigReal sysdimb = lattice.b_r().unit() * lattice.b();
00506   BigReal sysdimc = lattice.c_r().unit() * lattice.c();
00507   BigReal cutoff = simParams->cutoff;
00508   BigReal patchdim = simParams->patchDimension;
00509 
00510   int pid = patchList[0].patchID; 
00511   assert (patchList.size() == 1);
00512 
00513   //printf ("Patch[%d]: zstart = %d, zlen = %d, maxz %f, minz %f, marginec %f\n", pid, zstart, zlen, maxz, minz, marginc);
00514 
00515   BigReal minx = patchMap->min_a(pid);
00516   BigReal maxx = patchMap->max_a(pid);
00517   BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
00518   // min1 (max1) is smallest (largest) grid line for this patch
00519   int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
00520   int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
00521 
00522   BigReal miny = patchMap->min_b(pid);
00523   BigReal maxy = patchMap->max_b(pid);
00524   BigReal marginb = 0.5 * ( patchdim - cutoff ) / sysdimb;
00525   // min2 (max2) is smallest (largest) grid line for this patch
00526   int min2 = ((int) floor(myGrid.K2 * (miny - marginb))) - myGrid.order + 1;
00527   int max2 = ((int) floor(myGrid.K2 * (maxy + marginb)));
00528 
00529   BigReal minz = patchMap->min_c(pid);
00530   BigReal maxz = patchMap->max_c(pid);
00531   BigReal marginc = 0.5 * ( patchdim - cutoff ) / sysdimc;
00532   // min3 (max3) is smallest (largest) grid line for this patch
00533   int min3 = ((int) floor(myGrid.K3 * (minz - marginc))) - myGrid.order + 1;
00534   int max3 = ((int) floor(myGrid.K3 * (maxz + marginc)));
00535 
00536   xlen = max1 - min1 + 1;
00537   xstart = min1;  
00538   
00539   ylen = max2 - min2 + 1;
00540   ystart = min2;  
00541   
00542   zlen = max3 - min3 + 1;
00543   zstart = min3;  
00544 }
00545 
00546 void OptPmeCompute::initializeOptPmeCompute () {
00547   
00548   _initialized = true;
00549   
00550   strayChargeErrors = 0;
00551 
00552   SimParameters *simParams = Node::Object()->simParameters;
00553   PatchMap *patchMap = PatchMap::Object();
00554 
00555   initializePmeGrid (simParams, myGrid);
00556 
00557   alchFepOn    = simParams->alchFepOn;
00558   alchThermIntOn = simParams->alchThermIntOn;
00559   lesOn    = simParams->lesOn;
00560   pairOn   = simParams->pairInteractionOn;
00561 
00562   assert (!alchFepOn);
00563   assert (!alchThermIntOn);
00564   assert (!lesOn);
00565   assert (!pairOn);
00566   
00567   qsize = myGrid.K1 * myGrid.dim2 * myGrid.dim3;
00568   fsize = myGrid.K1 * myGrid.dim2;
00569   q_arr = new double*[fsize];
00570   memset( (void*) q_arr, 0, fsize * sizeof(double*) );
00571 
00572   assert (myMgr != NULL);
00573 
00574   const Lattice lattice = simParams->lattice;
00575   resetPatchCoordinates (lattice);
00576   
00577   PencilElement *pencilActive = new PencilElement [myGrid.xBlocks * myGrid.yBlocks];
00578   memset (pencilActive, 0, myGrid.xBlocks * myGrid.yBlocks * sizeof(PencilElement));
00579 
00580   for ( int i=xstart; i<=xstart+xlen-1; ++i ) {
00581     int ix = i;
00582     while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
00583     while ( ix < 0 ) ix += myGrid.K1;
00584     for ( int j=ystart; j<=ystart+ylen-1; ++j ) {
00585       int jy = j;
00586       while ( jy >= myGrid.K2 ) jy -= myGrid.K2;
00587       while ( jy < 0 ) jy += myGrid.K2;
00588       
00589       int pencil_idx = (ix / myGrid.block1)*myGrid.yBlocks + (jy / myGrid.block2);
00590       //If not initialized yet, initialize this pencil
00591       if (! pencilActive[pencil_idx].isActive) {
00592         pencilActive[pencil_idx].isActive = 1;
00593         pencilActive[pencil_idx].xmin = ix;
00594         pencilActive[pencil_idx].xmax = ix;
00595         pencilActive[pencil_idx].ymin = jy;
00596         pencilActive[pencil_idx].ymax = jy;
00597         pencilActive[pencil_idx].ib = ix / myGrid.block1;
00598         pencilActive[pencil_idx].jb = jy / myGrid.block2;
00599       }
00600       else { //pencil has been initialized, update the min and max
00601         if (pencilActive[pencil_idx].xmin > ix)
00602           pencilActive[pencil_idx].xmin = ix;
00603         if (pencilActive[pencil_idx].xmax < ix)
00604           pencilActive[pencil_idx].xmax = ix;   
00605         if (pencilActive[pencil_idx].ymin > jy)
00606           pencilActive[pencil_idx].ymin = jy;
00607         if (pencilActive[pencil_idx].ymax < jy)
00608           pencilActive[pencil_idx].ymax = jy;
00609       }
00610     }
00611   }
00612   
00613   int nactive = 0; 
00614   for (int ib=0; ib<myGrid.xBlocks; ++ib) 
00615     for (int jb=0; jb<myGrid.yBlocks; ++jb) 
00616       if (pencilActive[ib * myGrid.yBlocks + jb].isActive) 
00617         nactive ++;
00618 
00619   assert (nactive == myMgr->numPencilsActive);  
00620   
00621   nzlines = xlen * ylen;
00622   zline_storage = new double [nzlines * zlen];
00623   sp_zstorage = new float [nzlines * zlen];  
00624 
00625   //printf ("%d: Allocate %d bytes of storage for %d PME Pencils\n", CkMyPe(), 
00626   //      nzlines * zlen * (int)sizeof(double), nactive);
00627   
00628   assert (zline_storage != NULL);
00629   double * zblock = zline_storage;
00630   
00631   for (int ib=0; ib<myGrid.xBlocks; ++ib) {
00632     for (int jb=0; jb<myGrid.yBlocks; ++jb) {
00633       int index = ib * myGrid.yBlocks + jb;
00634       if (pencilActive[index].isActive) {
00635         pencilActive[index].data = zblock;
00636         for (int i = pencilActive[index].xmin; i <= pencilActive[index].xmax; i++)
00637           for (int j = pencilActive[index].ymin; j <= pencilActive[index].ymax; j++) {      
00638             q_arr[i*myGrid.dim2 + j] = zblock;
00639             zblock += zlen;
00640             assert ((char *) zblock <= (char *)(zline_storage + nzlines*zlen));
00641           }
00642       }
00643     }
00644   }
00645   
00646   pencilVec.resize (nactive);
00647   nactive = 0;
00648   for (int ib=0; ib<myGrid.xBlocks; ++ib) 
00649     for (int jb=0; jb<myGrid.yBlocks; ++jb) 
00650       if (pencilActive[ib*myGrid.yBlocks + jb].isActive) 
00651         pencilVec[nactive ++] = pencilActive [ib * myGrid.yBlocks + jb];
00652 
00653   //We dont need the sparse array anymore
00654   delete [] pencilActive;
00655 
00656 #if CHARM_VERSION > 60000
00657   /******************************* Initialize Many to Many ***********************************/
00658   OptPmeDummyMsg *m = new (PRIORITY_SIZE) OptPmeDummyMsg;
00659   CmiDirect_manytomany_initialize_recvbase (myMgr->handle, PHASE_UG, 
00660                                             recv_ungrid_done, m, (char *)sp_zstorage, 
00661                                             myGrid.xBlocks*myGrid.yBlocks, -1); 
00662   
00663   CkCallback cbi (CkCallback::ignore);
00664   CmiDirect_manytomany_initialize_sendbase (myMgr->handle, PHASE_GR, NULL, NULL, 
00665                                             (char *)sp_zstorage, 
00666                                             pencilVec.size(), patchList[0].patchID);
00667 
00668   for (int idx = 0; idx < pencilVec.size(); idx++) {
00669     int ib =  pencilVec[idx].ib;
00670     int jb =  pencilVec[idx].jb;
00671     double * data = pencilVec[idx].data;
00672     int offset = (data - zline_storage)*sizeof(float);
00673     int xlen   = pencilVec[idx].xmax - pencilVec[idx].xmin + 1;
00674     int ylen   = pencilVec[idx].ymax - pencilVec[idx].ymin + 1;
00675     int fcount = xlen * ylen * zlen;
00676 
00677     CkArrayIndex3D index (ib, jb, 0);
00678     CProxy_OptPmePencilMapZ zproxy (global_map_z);
00679     int pe = zproxy.ckLocalBranch()->procNum(0, index);
00680     CmiDirect_manytomany_initialize_send (myMgr->handle, PHASE_GR, idx, 
00681                                            offset, fcount*sizeof(float), pe);
00682     
00683     int srcnode = ib * myGrid.yBlocks + jb;
00684     CmiDirect_manytomany_initialize_recv (myMgr->handle, PHASE_UG, 
00685                                           srcnode, offset, fcount*sizeof(float), pe);
00686   }
00687   /********************************** End initialize many to many ****************************/
00688 #endif
00689 
00690 }
00691 
00692 OptPmeCompute::~OptPmeCompute()
00693 {
00694   delete [] zline_storage;
00695   delete [] q_arr;
00696 }
00697 
00698 
00699 void OptPmeCompute::doWork()
00700 {
00701   DebugM(4,"Entering OptPmeCompute::doWork().\n");
00702 
00703 #ifdef TRACE_COMPUTE_OBJECTS
00704   double traceObjStartTime = CmiWallTimer();
00705 #endif
00706 
00707   if (!_initialized) initializeOptPmeCompute();
00708 
00709   ResizeArrayIter<PatchElem> ap(patchList);
00710 
00711   // Skip computations if nothing to do.
00712   if ( ! patchList[0].p->flags.doFullElectrostatics )
00713   {
00714     for (ap = ap.begin(); ap != ap.end(); ap++) {
00715       CompAtom *x = (*ap).positionBox->open();
00716       Results *r = (*ap).forceBox->open();
00717       (*ap).positionBox->close(&x);
00718       (*ap).forceBox->close(&r);
00719     }
00720     reduction->submit();
00721     return;
00722   }
00723 
00724   myMgr->_iter ++;  //this is a pme step
00725 
00726   // allocate storage
00727   numLocalAtoms = 0;
00728   for (ap = ap.begin(); ap != ap.end(); ap++) {
00729     numLocalAtoms += (*ap).p->getNumAtoms();
00730   }
00731 
00732   Lattice &lattice = patchList[0].p->flags.lattice;
00733 
00734   localData = new PmeParticle[numLocalAtoms];
00735   // get positions and charges
00736   PmeParticle * data_ptr = localData;
00737 
00738   int natoms = 0;
00739   if (myMgr->constant_pressure)
00740     resetPatchCoordinates(lattice);  //Update patch coordinates with new lattice
00741 
00742   for (ap = ap.begin(); ap != ap.end(); ap++) {
00743 #ifdef NETWORK_PROGRESS
00744     CmiNetworkProgress();
00745 #endif
00746     
00747     CompAtom *x = (*ap).positionBox->open();
00748     if ( patchList[0].p->flags.doMolly ) {
00749       (*ap).positionBox->close(&x);
00750       x = (*ap).avgPositionBox->open();
00751     }
00752     int numAtoms = (*ap).p->getNumAtoms();        
00753     int order_1  = myGrid.order - 1;
00754     scale_n_copy_coordinates(x, localData, numAtoms, 
00755                              lattice, myGrid,
00756                              xstart + order_1, xlen - order_1,
00757                              ystart + order_1, ylen - order_1,
00758                              zstart + order_1, zlen - order_1,
00759                              strayChargeErrors);
00760     natoms += numAtoms;
00761     
00762     if ( patchList[0].p->flags.doMolly ) { (*ap).avgPositionBox->close(&x); }
00763     else { (*ap).positionBox->close(&x); }
00764   }
00765 
00766   numLocalAtoms = natoms;  //Exclude all atoms out of range
00767 
00768   // calculate self energy
00769   BigReal ewaldcof = ComputeNonbondedUtil::ewaldcof;
00770   evir[0] = 0;
00771   BigReal selfEnergy = 0;
00772   data_ptr = localData;
00773   int i;
00774   for(i=0; i<numLocalAtoms; ++i)
00775   {
00776     selfEnergy += data_ptr->cg * data_ptr->cg;
00777     ++data_ptr;
00778   }
00779   selfEnergy *= -1. * ewaldcof / SQRT_PI;
00780   evir[0][0] += selfEnergy;
00781 
00782   double **q = q_arr;
00783   memset( (void*) zline_storage, 0, zlen * nzlines * sizeof(double) );
00784 
00785   myRealSpace[0] = new OptPmeRealSpace(myGrid,numLocalAtoms);
00786   //scale_coordinates(localData, numLocalAtoms, lattice, myGrid);
00787   if (!strayChargeErrors)
00788     myRealSpace[0]->fill_charges(q, localData, zstart, zlen);
00789 
00790 #ifdef TRACE_COMPUTE_OBJECTS
00791   traceUserBracketEvent(TRACE_COMPOBJ_IDOFFSET+this->cid, traceObjStartTime, CmiWallTimer());
00792 #endif
00793 
00794   if (myMgr->constant_pressure && patchList[0].patchID == 0)
00795     myMgr->xPencil.recvLattice (lattice);
00796   
00797   if (myMgr->_iter <= many_to_many_start)
00798       sendPencils();
00799 #if CHARM_VERSION > 60000
00800   else {
00801       pme_d2f (sp_zstorage, zline_storage, nzlines * zlen);
00802       CmiDirect_manytomany_start (myMgr->handle, PHASE_GR);
00803   }
00804 #endif
00805 }
00806 
00807 
00808 
00809 void OptPmeCompute::sendPencils() {  
00810 
00811   assert ( myMgr->usePencils );
00812 
00813   //iout << iPE << " Sending charge grid for " << numLocalAtoms << " atoms to FFT with " << myMgr->numPencilsActive << " messages" <<".\n" << endi;
00814 
00815   int xBlocks = myGrid.xBlocks;
00816   int yBlocks = myGrid.yBlocks;
00817   int zBlocks = myGrid.zBlocks;
00818 
00819   int K1 = myGrid.K1;
00820   int K2 = myGrid.K2;
00821   int dim2 = myGrid.dim2;
00822   int dim3 = myGrid.dim3;
00823   int block1 = myGrid.block1;
00824   int block2 = myGrid.block2;
00825 
00826   //Lattice lattice = patchList[0].p->flags.lattice;
00827 
00828   int nactive = 0;  
00829   for (int idx = 0; idx < pencilVec.size(); idx++) {
00830     int xstart = pencilVec[idx].xmin;
00831     int ystart = pencilVec[idx].ymin;
00832     int xlen   = pencilVec[idx].xmax - pencilVec[idx].xmin + 1;
00833     int ylen   = pencilVec[idx].ymax - pencilVec[idx].ymin + 1;
00834     int ib     = pencilVec[idx].ib;
00835     int jb     = pencilVec[idx].jb;
00836     double *data = pencilVec[idx].data;
00837     
00838     int fcount = xlen * ylen * zlen;
00839     OptPmeGridMsg *msg = new (fcount, PRIORITY_SIZE) OptPmeGridMsg;
00840     msg->zstart = zstart;
00841     msg->zlen   = zlen;
00842     msg->xstart = xstart;
00843     msg->xlen   = xlen;
00844     msg->ystart = ystart;
00845     msg->ylen   = ylen;
00846     msg->sourceNode = CkMyPe();
00847     msg->patchID    = patchList[0].patchID;
00848     
00849     float *qmsg = msg->qgrid;   
00850 #pragma disjoint (*data, *qmsg)
00851 #pragma unroll(8)
00852     for ( int k=0; k< fcount; ++k ) 
00853       *(qmsg++) = data[k];    
00854     
00855     myMgr->zPencil(ib,jb,0).recvGrid(msg);
00856   }
00857 }
00858 
00859 
00860 void OptPmeCompute::copyPencils(OptPmeGridMsg *msg) {
00861 
00862   if (!_initialized) initializeOptPmeCompute();
00863 
00864   int ibegin = msg->xstart;
00865   int iend   = msg->xstart + msg->xlen;
00866   int jbegin = msg->ystart;
00867   int jend   = msg->ylen;
00868   int fcount = zlen * msg->xlen * msg->ylen;
00869 
00870   float *qmsg = msg->qgrid;
00871   double *data = q_arr[ibegin * myGrid.dim2 + jbegin];  
00872 
00873 #pragma disjoint (*qmsg, *data)
00874 #pragma unroll(8)
00875   for ( int k=0; k<fcount; ++k ) 
00876     data[k] = *(qmsg++);
00877 }
00878 
00879 void OptPmeCompute::ungridForces() {
00880 
00881     //printf ("%d: In OptPMECompute::ungridforces\n", CkMyPe());
00882 
00883     if (myMgr->_iter > many_to_many_start)
00884       pme_f2d (zline_storage, sp_zstorage, nzlines * zlen);
00885 
00886     SimParameters *simParams = Node::Object()->simParameters;
00887     Vector *localResults = new Vector[numLocalAtoms];
00888     //memset (localResults, 0, sizeof (Vector) * numLocalAtoms);
00889 
00890     Vector *gridResults;
00891     gridResults = localResults;
00892 
00893     Vector pairForce = 0.;
00894     Lattice lattice = patchList[0].p->flags.lattice;
00895     if(!simParams->commOnly) {
00896 #ifdef NETWORK_PROGRESS
00897       CmiNetworkProgress();
00898 #endif
00899       
00900       if (!strayChargeErrors) {
00901         myRealSpace[0]->compute_forces(q_arr, localData, gridResults, zstart, zlen);
00902         scale_forces(gridResults, numLocalAtoms, lattice);
00903       }
00904       delete myRealSpace[0];
00905     }
00906     delete [] localData;
00907     //    delete [] localPartition;
00908     
00909     Vector *results_ptr = localResults;
00910     ResizeArrayIter<PatchElem> ap(patchList);
00911     
00912     // add in forces
00913     for (ap = ap.begin(); ap != ap.end(); ap++) {
00914       Results *r = (*ap).forceBox->open();
00915       Force *f = r->f[Results::slow];
00916       int numAtoms = (*ap).p->getNumAtoms();
00917       
00918       if ( ! strayChargeErrors && ! simParams->commOnly ) {
00919         for(int i=0; i<numAtoms; ++i) {
00920           f[i].x += results_ptr->x;
00921           f[i].y += results_ptr->y;
00922           f[i].z += results_ptr->z;
00923           ++results_ptr;
00924         }
00925       }
00926   
00927       (*ap).forceBox->close(&r);
00928     }
00929 
00930     delete [] localResults;
00931    
00932     double scale = 1.;
00933 
00934     reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += evir[0][0] * scale;
00935     reduction->item(REDUCTION_STRAY_CHARGE_ERRORS) += strayChargeErrors;
00936     strayChargeErrors = 0;
00937     reduction->submit();
00938 }
00939 
00940 
00941 
00942 #include "fftlib.C"
00943 #include "OptPmeMgr.def.h"
00944 
00945 void pme_f2d (double *dst, float *src, int N) {
00946 #pragma disjoint (*src, *dst)  
00947 #pragma unroll (8)
00948   for (int i = 0; i < N; i++) 
00949     dst[i] = src[i];
00950 }
00951 
00952 void pme_d2f (float *dst, double *src, int N) {
00953 #pragma disjoint (*src, *dst)  
00954 #pragma unroll (8)
00955   for (int i = 0; i < N; i++) 
00956     dst[i] = src[i];
00957 }

Generated on Mon Nov 23 04:59:22 2009 for NAMD by  doxygen 1.3.9.1