OptPme.C

Go to the documentation of this file.
00001 
00007 #ifdef NAMD_FFTW
00008 #ifdef NAMD_FFTW_3
00009 #include <fftw3.h>
00010 #else
00011 #ifdef NAMD_FFTW_NO_TYPE_PREFIX
00012 #include <fftw.h>
00013 #include <rfftw.h>
00014 #else
00015 #include <sfftw.h>
00016 #include <srfftw.h>
00017 #endif
00018 #endif
00019 #endif
00020 
00021 #include <assert.h>
00022 
00023 #include "InfoStream.h"
00024 #include "Node.h"
00025 #include "PatchMap.h"
00026 #include "PatchMap.inl"
00027 #include "AtomMap.h"
00028 #include "OptPme.h"
00029 #include "OptPmeMgr.decl.h"
00030 #include "OptPmeRealSpace.h"
00031 #include "PmeKSpace.h"
00032 #include "ComputeNonbondedUtil.h"
00033 #include "PatchMgr.h"
00034 #include "Molecule.h"
00035 #include "ReductionMgr.h"
00036 //#include "ComputeMgr.h"
00037 #include "ComputeMgr.decl.h"
00038 // #define DEBUGM
00039 #define MIN_DEBUG_LEVEL 3
00040 #include "Debug.h"
00041 #include "SimParameters.h"
00042 #include "WorkDistrib.h"
00043 #include "varsizemsg.h"
00044 #include "Random.h"
00045 #include "Priorities.h"
00046 #include "PmeBase.inl"
00047 
00048 extern char *pencilPMEProcessors;
00049 
00050 #include "fftlib.h"
00051 #include "fftmap.h"
00052 
00053 //Very large integer
00054 int many_to_many_start = 0x7fffffff;
00055 
00056 class OptPmeMgr : public CBase_OptPmeMgr {
00057 public:
00058   friend class OptPmeCompute;
00059   OptPmeMgr();
00060   ~OptPmeMgr();
00061 
00062   void initialize(CkQdMsg*);
00063   void initialize_pencils(CkQdMsg*);
00064   void activate_pencils(CkQdMsg*);
00065   void recvArrays(CProxy_OptPmeXPencil, CProxy_OptPmeYPencil, CProxy_OptPmeZPencil);
00066 
00067   void recvUngrid(OptPmeGridMsg *);
00068   void ungridCalc(OptPmeDummyMsg *);
00069   void ungridCalc_subcompute(OptPmeSubComputeMsg *);
00070   void ungridCalc_subcompute_done(OptPmeSubComputeMsg *);
00071   void doWorkOnPeer(OptPmeSubComputeMsg *);
00072   void recvEvir (CkReductionMsg *msg);
00073 
00074   void setCompute(OptPmeCompute *c) { pmeCompute = c; c->setMgr(this); }
00075 
00076 private:
00077   CProxy_OptPmeMgr pmeProxy;
00078   CProxy_OptPmeMgr pmeProxyDir;
00079   OptPmeCompute *pmeCompute;
00080   PmeGrid myGrid;
00081   PmeKSpace *myKSpace;
00082 
00083   CProxy_OptPmeXPencil xPencil;
00084   CProxy_OptPmeYPencil yPencil;
00085   CProxy_OptPmeZPencil zPencil;
00086   int    numPencilsActive;
00087   int    ungrid_count;
00088   SubmitReduction *reduction;
00089   int    _iter;
00090   void   *handle;
00091   bool   constant_pressure;    //Does the simulation need constant pressure
00092   int    subcompute_count;
00093 
00094   int peersAllocated;
00095   int peers [SUBCOMPUTE_NPAR];
00096   OptPmeSubComputeMsg *subcompute_msgs[SUBCOMPUTE_NPAR];
00097 };
00098 
00099 
00100 void pme_f2d (double *dst, float *src, int N);
00101 void pme_d2f (float *dst, double *src, int N);
00102 
00103 static inline void initializePmeGrid (SimParameters *simParams, PmeGrid &grid) {
00104     int xBlocks = 0, yBlocks = 0, zBlocks= 0;
00105 
00106     if ( simParams->PMEPencils > 1 ) {
00107       xBlocks = yBlocks = zBlocks = simParams->PMEPencils;
00108     } else {
00109       int nb2 = ( simParams->PMEGridSizeX * simParams->PMEGridSizeY
00110                   * simParams->PMEGridSizeZ ) / simParams->PMEMinPoints;
00111       if ( nb2 > CkNumPes() ) nb2 = CkNumPes();
00112       if ( nb2 < 1 ) nb2 = 1;
00113       int nb = (int) sqrt((float)nb2);
00114       if ( nb < 1 ) nb = 1;
00115       xBlocks = zBlocks = nb;
00116       yBlocks = nb2 / nb;
00117     }
00118     
00119     int dimx = simParams->PMEGridSizeX;
00120     int bx = 1 + ( dimx - 1 ) / xBlocks;
00121     xBlocks = 1 + ( dimx - 1 ) / bx;
00122     
00123     int dimy = simParams->PMEGridSizeY;
00124     int by = 1 + ( dimy - 1 ) / yBlocks;
00125     yBlocks = 1 + ( dimy - 1 ) / by;
00126     
00127     int dimz = simParams->PMEGridSizeZ / 2 + 1;  // complex
00128     int bz = 1 + ( dimz - 1 ) / zBlocks;
00129     zBlocks = 1 + ( dimz - 1 ) / bz;
00130 
00131     grid.xBlocks = xBlocks;
00132     grid.yBlocks = yBlocks;
00133     grid.zBlocks = zBlocks;
00134 
00135     grid.K1 = simParams->PMEGridSizeX;
00136     grid.K2 = simParams->PMEGridSizeY;
00137     grid.K3 = simParams->PMEGridSizeZ;
00138     grid.order = simParams->PMEInterpOrder;
00139     grid.dim2 = grid.K2;
00140     grid.dim3 = 2 * (grid.K3/2 + 1);
00141 
00142     grid.block1 = ( grid.K1 + xBlocks - 1 ) / xBlocks;
00143     grid.block2 = ( grid.K2 + yBlocks - 1 ) / yBlocks;
00144     grid.block3 = ( grid.K3/2 + 1 + zBlocks - 1 ) / zBlocks;  // complex
00145 }
00146 
00147 #ifdef NAMD_FFTW
00148 static CmiNodeLock fftw_plan_lock;
00149 #endif
00150 
00151 
00152 static inline void scale_n_copy_coordinates(CompAtom *x, PmeParticle p[], 
00153                                             int &N, 
00154                                             Lattice &lattice, PmeGrid grid,
00155                                             //double **qline,
00156                                             double xmin, double xlen,
00157                                             double ymin, double ylen,
00158                                             double zmin, double zlen,
00159                                             int &scg) {
00160   Vector origin = lattice.origin();
00161   Vector recip1 = lattice.a_r();
00162   Vector recip2 = lattice.b_r();
00163   Vector recip3 = lattice.c_r();
00164   double ox = origin.x;
00165   double oy = origin.y;
00166   double oz = origin.z;
00167   double r1x = recip1.x;
00168   double r1y = recip1.y;
00169   double r1z = recip1.z;
00170   double r2x = recip2.x;
00171   double r2y = recip2.y;
00172   double r2z = recip2.z;
00173   double r3x = recip3.x;
00174   double r3y = recip3.y;
00175   double r3z = recip3.z;
00176   int K1 = grid.K1;
00177   int K2 = grid.K2;
00178   int K3 = grid.K3;
00179 
00180   const BigReal coulomb_sqrt = sqrt( COULOMB * ComputeNonbondedUtil::scaling
00181                                      * ComputeNonbondedUtil::dielectric_1 );
00182 
00183 
00184   int natoms = 0;
00185   for (int i=0; i<N; i++) {
00186     double px = x[i].position.x - ox;
00187     double py = x[i].position.y - oy;
00188     double pz = x[i].position.z - oz;
00189     double sx = px*r1x + py*r1y + pz*r1z;
00190     double sy = px*r2x + py*r2y + pz*r2z;
00191     double sz = px*r3x + py*r3y + pz*r3z;
00192     p[natoms].x = K1 * ( sx - floor(sx) );
00193     p[natoms].y = K2 * ( sy - floor(sy) );
00194     p[natoms].z = K3 * ( sz - floor(sz) );
00195 #ifndef ARCH_POWERPC
00196     //  Check for rare rounding condition where K * ( 1 - epsilon ) == K      
00197     //  which was observed with g++ on Intel x86 architecture. 
00198     if ( p[natoms].x == K1 ) p[natoms].x = 0;
00199     if ( p[natoms].y == K2 ) p[natoms].y = 0;
00200     if ( p[natoms].z == K3 ) p[natoms].z = 0;
00201 #endif
00202 
00203 #if 1 //stray charge detection
00204     BigReal u1,u2,u3;
00205     u1 = (int) (p[natoms].x - xmin);
00206     if (u1 >= grid.K1) u1 -= grid.K1;    
00207     u2 = (int) (p[natoms].y - ymin);
00208     if (u2 >= grid.K2) u2 -= grid.K2;    
00209     u3 = (int) (p[natoms].z - zmin);
00210     if (u3 >= grid.K3) u3 -= grid.K3;
00211     
00212     if ( (u1 < 0.0) || (u1 >= xlen) || 
00213          (u2 < 0.0) || (u2 >= ylen) || 
00214          (u3 < 0.0) || (u3 >= zlen) ) {
00215       scg ++;
00216       continue;
00217     }
00218 #endif
00219     
00220     p[natoms].cg = coulomb_sqrt * x[i].charge;
00221     natoms ++;
00222   }
00223   N = natoms;
00224 }
00225 
00226 
00227 OptPmeMgr::OptPmeMgr() : pmeProxy(thisgroup), 
00228                                  pmeProxyDir(thisgroup), pmeCompute(0) {
00229 
00230   CkpvAccess(BOCclass_group).computePmeMgr = thisgroup;
00231 
00232   myKSpace = 0;
00233   ungrid_count = 0;
00234   peersAllocated = 0;
00235 
00236 #ifdef NAMD_FFTW
00237   if ( CmiMyRank() == 0 ) {
00238     fftw_plan_lock = CmiCreateLock();
00239   }
00240 #endif    
00241 }
00242 
00243 
00244 void OptPmeMgr::recvArrays(CProxy_OptPmeXPencil x, CProxy_OptPmeYPencil y, CProxy_OptPmeZPencil z) {
00245   xPencil = x;  yPencil = y;  zPencil = z;
00246 }
00247 
00248 void OptPmeMgr::initialize(CkQdMsg *msg) {
00249     delete msg;
00250 
00251     _iter = 0;
00252 
00253     handle = CmiDirect_manytomany_allocate_handle ();
00254 
00255     SimParameters *simParams = Node::Object()->simParameters;
00256     PatchMap *patchMap = PatchMap::Object();
00257     
00258     initializePmeGrid (simParams, myGrid);    
00259 
00260     if (simParams->langevinPistonOn || simParams->berendsenPressureOn)  
00261       constant_pressure = true;
00262     else
00263       constant_pressure = false;      
00264 
00265     bool useManyToMany = simParams->useManyToMany;
00266     //Many-to-many requires that patches and pmepencils are all on different processors
00267     //int npes = patchMap->numPatches() + 
00268     //         myGrid.xBlocks *  myGrid.yBlocks + 
00269     //         myGrid.zBlocks *  myGrid.xBlocks +
00270     //         myGrid.yBlocks *  myGrid.zBlocks;
00271     
00272     int npes = patchMap->numPatches();
00273     if (npes < myGrid.xBlocks *  myGrid.yBlocks)
00274       npes = myGrid.xBlocks *  myGrid.yBlocks;
00275     if (npes <  myGrid.zBlocks *  myGrid.xBlocks)
00276       npes = myGrid.zBlocks *  myGrid.xBlocks;
00277     if (npes < myGrid.yBlocks *  myGrid.zBlocks)
00278       npes = myGrid.yBlocks *  myGrid.zBlocks;
00279     
00280    if (npes >= CkNumPes()) {
00281       if (CkMyPe() == 0)
00282         printf ("Warning : Not enough processors for the many-to-many optimization \n");      
00283       useManyToMany = false;
00284     }
00285     
00286     if (useManyToMany)  {
00287       if (CkMyPe() == 0)
00288         printf ("Enabling the Many-to-many optimization\n");
00289       //defaults to max integer
00290       many_to_many_start = MANY_TO_MANY_START;
00291     }
00292 
00293     if (CkMyRank() == 0) { //create the pencil pme processor map
00294       pencilPMEProcessors = new char [CkNumPes()];
00295       memset (pencilPMEProcessors, 0, sizeof(char) * CkNumPes());
00296     }
00297 
00298     if ( CkMyPe() == 0) {
00299       iout << iINFO << "PME using " << myGrid.xBlocks << " x " <<
00300         myGrid.yBlocks << " x " << myGrid.zBlocks <<
00301         " pencil grid for FFT and reciprocal sum.\n" << endi;
00302       
00303       CProxy_OptPmePencilMapZ   mapz;      
00304       CProxy_OptPmePencilMapY   mapy;
00305       CProxy_OptPmePencilMapX   mapx;
00306       
00307       mapz = CProxy_OptPmePencilMapZ::ckNew(myGrid.xBlocks, myGrid.yBlocks, myGrid.zBlocks);      
00308       mapy = CProxy_OptPmePencilMapY::ckNew(myGrid.xBlocks, myGrid.yBlocks, myGrid.zBlocks);
00309       mapx = CProxy_OptPmePencilMapX::ckNew(myGrid.xBlocks, myGrid.yBlocks, myGrid.zBlocks);
00310       
00311       CkArrayOptions optsz;
00312       optsz.setMap (mapz);
00313       CkArrayOptions optsy;
00314       optsy.setMap (mapy);
00315       CkArrayOptions optsx;
00316       optsx.setMap (mapx);
00317       
00318       zPencil = CProxy_OptPmeZPencil::ckNew(optsz);  
00319       yPencil = CProxy_OptPmeYPencil::ckNew(optsy);  
00320       xPencil = CProxy_OptPmeXPencil::ckNew(optsx);  
00321       
00322       int x,y,z;
00323       for (x = 0; x < myGrid.xBlocks; ++x)
00324         for (y = 0; y < myGrid.yBlocks; ++y ) {
00325           zPencil(x,y,0).insert();
00326         }
00327       zPencil.doneInserting();
00328       
00329       for (z = 0; z < myGrid.zBlocks; ++z )
00330         for (x = 0; x < myGrid.xBlocks; ++x ) {
00331           yPencil(x,0,z).insert();
00332         }
00333       yPencil.doneInserting();
00334       
00335       for (y = 0; y < myGrid.yBlocks; ++y )     
00336         for (z = 0; z < myGrid.zBlocks; ++z ) {
00337           xPencil(0,y,z).insert();
00338         }
00339       xPencil.doneInserting();      
00340       
00341       pmeProxy.recvArrays(xPencil,yPencil,zPencil);
00342       OptPmePencilInitMsgData msgdata;
00343       msgdata.grid = myGrid;
00344       msgdata.xBlocks = myGrid.xBlocks;
00345       msgdata.yBlocks = myGrid.yBlocks;
00346       msgdata.zBlocks = myGrid.zBlocks;
00347       msgdata.xPencil = xPencil;
00348       msgdata.yPencil = yPencil;
00349       msgdata.zPencil = zPencil;
00350       msgdata.constant_pressure = constant_pressure;
00351 
00352       CkCallback cb (CkIndex_OptPmeMgr::recvEvir(NULL), thisProxy[0]);
00353       msgdata.cb_energy = cb;
00354 
00355       msgdata.pmeProxy = pmeProxyDir;
00356       xPencil.init(new OptPmePencilInitMsg(msgdata));
00357       yPencil.init(new OptPmePencilInitMsg(msgdata));
00358       zPencil.init(new OptPmePencilInitMsg(msgdata));
00359      
00360 #if 0 
00361       reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
00362 #endif
00363 
00364 #ifndef NAMD_FFTW
00365       NAMD_die("Sorry, FFTW must be compiled in to use PME.");
00366 #endif
00367     }
00368 
00369 }
00370 
00371 void OptPmeMgr::initialize_pencils(CkQdMsg *msg) {
00372   delete msg;
00373 
00374   SimParameters *simParams = Node::Object()->simParameters;
00375 
00376   PatchMap *patchMap = PatchMap::Object();
00377   Lattice lattice = simParams->lattice;
00378   BigReal sysdima = lattice.a_r().unit() * lattice.a();
00379   BigReal sysdimb = lattice.b_r().unit() * lattice.b();
00380   BigReal cutoff = simParams->cutoff;
00381   BigReal patchdim = simParams->patchDimension;
00382   int numPatches = patchMap->numPatches();
00383 
00384   //fprintf(stderr, "Node %d PE %d trying to allocate %d bytes\n", CmiMyNode(), CmiMyPe(), myGrid.xBlocks*myGrid.yBlocks);
00385 
00386   char *pencilActive = new char[myGrid.xBlocks*myGrid.yBlocks];
00387   for ( int i=0; i<myGrid.xBlocks; ++i ) {
00388     for ( int j=0; j<myGrid.yBlocks; ++j ) {
00389       pencilActive[i*myGrid.yBlocks+j] = 0;
00390     }
00391   }
00392 
00393   //Right now we only support one patch per processor
00394   assert (patchMap->numPatchesOnNode(CkMyPe()) <= 1);
00395   for ( int pid=0; pid < numPatches; ++pid ) {
00396     int pnode = patchMap->node(pid);
00397     if ( pnode != CkMyPe() ) continue;
00398 
00399     BigReal minx = patchMap->min_a(pid);
00400     BigReal maxx = patchMap->max_a(pid);
00401     BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
00402     // min1 (max1) is smallest (largest) grid line for this patch
00403     int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
00404     int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
00405 
00406     BigReal miny = patchMap->min_b(pid);
00407     BigReal maxy = patchMap->max_b(pid);
00408     BigReal marginb = 0.5 * ( patchdim - cutoff ) / sysdimb;
00409     // min2 (max2) is smallest (largest) grid line for this patch
00410     int min2 = ((int) floor(myGrid.K2 * (miny - marginb))) - myGrid.order + 1;
00411     int max2 = ((int) floor(myGrid.K2 * (maxy + marginb)));
00412 
00413     for ( int i=min1; i<=max1; ++i ) {
00414       int ix = i;
00415       while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
00416       while ( ix < 0 ) ix += myGrid.K1;
00417       for ( int j=min2; j<=max2; ++j ) {
00418         int jy = j;
00419         while ( jy >= myGrid.K2 ) jy -= myGrid.K2;
00420         while ( jy < 0 ) jy += myGrid.K2;
00421         pencilActive[(ix / myGrid.block1)*myGrid.yBlocks + (jy / myGrid.block2)] = 1;
00422       }
00423     }
00424   }
00425 
00426   numPencilsActive = 0;
00427   for ( int i=0; i<myGrid.xBlocks; ++i ) {
00428     for ( int j=0; j<myGrid.yBlocks; ++j ) {
00429       if ( pencilActive[i*myGrid.yBlocks+j] ) {
00430         ++numPencilsActive;
00431 
00432         zPencil(i,j,0).dummyRecvGrid(CkMyPe(),0);
00433       }
00434     }
00435   }
00436 
00437   ungrid_count = numPencilsActive;
00438   delete [] pencilActive;  
00439 }
00440 
00441 
00442 void OptPmeMgr::activate_pencils(CkQdMsg *msg) {
00443   if ( CkMyPe() == 0 ) zPencil.dummyRecvGrid(CkMyPe(),1);
00444 }
00445 
00446 
00447 OptPmeMgr::~OptPmeMgr() {
00448   delete myKSpace;
00449 }
00450 
00451 void OptPmeMgr::recvUngrid(OptPmeGridMsg *msg) {
00452   if ( ungrid_count == 0 ) {
00453     NAMD_bug("Message order failure in OptPmeMgr::recvUngrid\n");
00454   }
00455     
00456   pmeCompute->copyPencils(msg);
00457   delete msg;
00458   --ungrid_count;
00459 
00460   if ( ungrid_count == 0 ) {
00461     //CkPrintf("recvUngrid on Pe(%d)\n",CkMyPe());
00462     ungridCalc(NULL);
00463   }
00464 }
00465 
00466 void OptPmeMgr::ungridCalc(OptPmeDummyMsg *dmsg) {    
00467   pmeCompute->ungridForces_init();
00468   if ( CmiMyNodeSize() >= SUBCOMPUTE_NPAR ) {
00469     int npar = SUBCOMPUTE_NPAR;
00470     OptPmeSubComputeMsg *smsg = NULL;
00471 
00472     if (!peersAllocated) {
00473       peersAllocated = 1;
00474       int next_rank = CmiMyRank();   
00475       PatchMap *patchMap = PatchMap::Object();          
00476 
00477       for (int i = 1; i < npar; ++i) {      
00478         smsg = new (PRIORITY_SIZE) OptPmeSubComputeMsg;
00479         subcompute_msgs[i] = smsg;
00480         smsg->src_pe  = CkMyPe();
00481         smsg->compute = pmeCompute;
00482 
00483         next_rank ++;
00484         if (next_rank >= CmiMyNodeSize())
00485           next_rank = 0;
00486         int n = 0;
00487         int nr = next_rank;
00488         while(n < CmiMyNodeSize() &&
00489               patchMap->numPatchesOnNode(CmiNodeFirst(CmiMyNode())+nr) > 0)
00490         {
00491           nr ++;
00492           if (nr >= CmiMyNodeSize())
00493             nr = 0;
00494           n++;
00495         }
00496         if (n < CmiMyNodeSize()) 
00497           next_rank = nr;  //we are successful, so save this rank
00498         
00499         smsg->dest = next_rank;
00500       }
00501 
00502       //Local subcompute msg
00503       smsg = new (PRIORITY_SIZE) OptPmeSubComputeMsg;
00504       subcompute_msgs[0] = smsg;
00505       smsg->src_pe  = CkMyPe();
00506       smsg->compute = pmeCompute;
00507       smsg->dest    = CmiMyRank();
00508     }
00509 
00510     int start  = 0;
00511     int nlocal = pmeCompute->getNumLocalAtoms();
00512     //CmiAssert (npar <= nlocal);
00513     if (nlocal < npar)
00514       npar = nlocal;
00515     if (npar == 0)
00516       npar = 1;
00517     int n_per_iter = nlocal / npar;
00518     //We dont handle the case where there are very few atoms
00519     subcompute_count = npar;
00520 
00521     for (int i = 0; i < npar; ++i) {      
00522       smsg = subcompute_msgs[i];
00523       smsg->start   = start;
00524       smsg->end     = start + n_per_iter;
00525       start += n_per_iter;
00526       if (i == npar - 1)
00527         smsg->end = nlocal;
00528       pmeProxy[CmiNodeFirst(CmiMyNode())+smsg->dest].ungridCalc_subcompute(smsg);
00529     }    
00530   }
00531   else {
00532     pmeCompute->ungridForces_compute(0, 0);
00533     pmeCompute->ungridForces_finalize();
00534     ungrid_count = numPencilsActive; 
00535   }
00536 }
00537 
00538 void OptPmeMgr::ungridCalc_subcompute(OptPmeSubComputeMsg *msg){ 
00539   OptPmeCompute *compute = (OptPmeCompute *) msg->compute;
00540   compute->ungridForces_compute(msg->start, msg->end);
00541   pmeProxy[msg->src_pe].ungridCalc_subcompute_done(msg);
00542 }
00543 
00544 void OptPmeMgr::ungridCalc_subcompute_done(OptPmeSubComputeMsg *msg){  
00545   subcompute_count --;
00546   //delete msg; //message pointers saved
00547   if (subcompute_count == 0) {
00548     pmeCompute->ungridForces_finalize();
00549     ungrid_count = numPencilsActive; 
00550   }
00551 }
00552 
00553 void OptPmeMgr::doWorkOnPeer(OptPmeSubComputeMsg *msg) {
00554   OptPmeCompute *compute = (OptPmeCompute *) msg->compute;
00555   compute->doWorkOnPeer();
00556   //  delete msg; //saved in compute
00557 }
00558 
00559 void OptPmeMgr::recvEvir (CkReductionMsg *msg) {
00560 
00561   assert (CkMyPe() == 0);
00562 
00563   double *data = (double *) msg->getData();
00564   assert (msg->getSize() == 7 * sizeof(double));
00565 
00566   //printf ("[%d]: Received Evir\n", CkMyPe());
00567 
00568   double scale = 1.;
00569   reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += data[0] * scale;
00570   reduction->item(REDUCTION_VIRIAL_SLOW_XX) += data[1] * scale;
00571   reduction->item(REDUCTION_VIRIAL_SLOW_XY) += data[2] * scale;
00572   reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += data[3] * scale;
00573   reduction->item(REDUCTION_VIRIAL_SLOW_YX) += data[2] * scale;
00574   reduction->item(REDUCTION_VIRIAL_SLOW_YY) += data[4] * scale;
00575   reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += data[5] * scale;
00576   reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += data[3] * scale;
00577   reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += data[5] * scale;
00578   reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += data[6] * scale;   
00579 
00580   delete msg;
00581 
00582   SimParameters *simParams = Node::Object()->simParameters;
00583   int fef = simParams->fullElectFrequency;
00584   for (int i = 0; i < fef; i++) {
00585     reduction->submit();
00586   }
00587 }
00588 
00589 OptPmeCompute::OptPmeCompute(ComputeID c) :
00590   ComputeHomePatches(c)
00591 {
00592   DebugM(4,"OptPmeCompute created.\n");
00593 
00594   CProxy_OptPmeMgr::ckLocalBranch(
00595         CkpvAccess(BOCclass_group).computePmeMgr)->setCompute(this);
00596 
00597   _initialized = false;
00598 
00599   useAvgPositions = 1;
00600   localResults = NULL;
00601 
00602   reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
00603   SimParameters *simParams = Node::Object()->simParameters;
00604 }
00605 
00606 void recv_ungrid_done (void *m) {
00607   OptPmeDummyMsg *msg =  (OptPmeDummyMsg *) m;  
00608   CProxy_OptPmeMgr pmeProxy (CkpvAccess(BOCclass_group).computePmeMgr);
00609   pmeProxy[msg->to_pe].ungridCalc (msg);
00610 }  
00611 
00612 
00613 void OptPmeCompute::resetPatchCoordinates (const Lattice &lattice) {
00614   PatchMap *patchMap = PatchMap::Object();    
00615   SimParameters *simParams = Node::Object()->simParameters;
00616 
00617   BigReal sysdima = lattice.a_r().unit() * lattice.a();
00618   BigReal sysdimb = lattice.b_r().unit() * lattice.b();
00619   BigReal sysdimc = lattice.c_r().unit() * lattice.c();
00620   BigReal cutoff = simParams->cutoff;
00621   BigReal patchdim = simParams->patchDimension;
00622 
00623   int pid = patchList[0].patchID; 
00624   assert (patchList.size() == 1);
00625 
00626   //printf ("Patch[%d]: zstart = %d, zlen = %d, maxz %f, minz %f, marginec %f\n", pid, zstart, zlen, maxz, minz, marginc);
00627 
00628   BigReal minx = patchMap->min_a(pid);
00629   BigReal maxx = patchMap->max_a(pid);
00630   BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
00631   // min1 (max1) is smallest (largest) grid line for this patch
00632   int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
00633   int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
00634 
00635   BigReal miny = patchMap->min_b(pid);
00636   BigReal maxy = patchMap->max_b(pid);
00637   BigReal marginb = 0.5 * ( patchdim - cutoff ) / sysdimb;
00638   // min2 (max2) is smallest (largest) grid line for this patch
00639   int min2 = ((int) floor(myGrid.K2 * (miny - marginb))) - myGrid.order + 1;
00640   int max2 = ((int) floor(myGrid.K2 * (maxy + marginb)));
00641 
00642   BigReal minz = patchMap->min_c(pid);
00643   BigReal maxz = patchMap->max_c(pid);
00644   BigReal marginc = 0.5 * ( patchdim - cutoff ) / sysdimc;
00645   // min3 (max3) is smallest (largest) grid line for this patch
00646   int min3 = ((int) floor(myGrid.K3 * (minz - marginc))) - myGrid.order + 1;
00647   int max3 = ((int) floor(myGrid.K3 * (maxz + marginc)));
00648 
00649   xlen = max1 - min1 + 1;
00650   xstart = min1;  
00651   
00652   ylen = max2 - min2 + 1;
00653   ystart = min2;  
00654   
00655   zlen = max3 - min3 + 1;
00656   zstart = min3;  
00657 }
00658 
00659 void OptPmeCompute::initializeOptPmeCompute () {
00660   
00661   _initialized = true;
00662   
00663   strayChargeErrors = 0;
00664 
00665   SimParameters *simParams = Node::Object()->simParameters;
00666   PatchMap *patchMap = PatchMap::Object();
00667 
00668   initializePmeGrid (simParams, myGrid);
00669 
00670   alchFepOn    = simParams->alchFepOn;
00671   alchThermIntOn = simParams->alchThermIntOn;
00672   lesOn    = simParams->lesOn;
00673   pairOn   = simParams->pairInteractionOn;
00674 
00675   assert (!alchFepOn);
00676   assert (!alchThermIntOn);
00677   assert (!lesOn);
00678   assert (!pairOn);
00679   
00680   qsize = myGrid.K1 * myGrid.dim2 * myGrid.dim3;
00681   fsize = myGrid.K1 * myGrid.dim2;
00682   q_arr = new double*[fsize];
00683   memset( (void*) q_arr, 0, fsize * sizeof(double*) );
00684 
00685   assert (myMgr != NULL);
00686 
00687   const Lattice lattice = simParams->lattice;
00688   resetPatchCoordinates (lattice);
00689   
00690   PencilElement *pencilActive = new PencilElement [myGrid.xBlocks * myGrid.yBlocks];
00691   memset (pencilActive, 0, myGrid.xBlocks * myGrid.yBlocks * sizeof(PencilElement));
00692 
00693   for ( int i=xstart; i<=xstart+xlen-1; ++i ) {
00694     int ix = i;
00695     while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
00696     while ( ix < 0 ) ix += myGrid.K1;
00697     for ( int j=ystart; j<=ystart+ylen-1; ++j ) {
00698       int jy = j;
00699       while ( jy >= myGrid.K2 ) jy -= myGrid.K2;
00700       while ( jy < 0 ) jy += myGrid.K2;
00701       
00702       int pencil_idx = (ix / myGrid.block1)*myGrid.yBlocks + (jy / myGrid.block2);
00703       //If not initialized yet, initialize this pencil
00704       if (! pencilActive[pencil_idx].isActive) {
00705         pencilActive[pencil_idx].isActive = 1;
00706         pencilActive[pencil_idx].xmin = ix;
00707         pencilActive[pencil_idx].xmax = ix;
00708         pencilActive[pencil_idx].ymin = jy;
00709         pencilActive[pencil_idx].ymax = jy;
00710         pencilActive[pencil_idx].ib = ix / myGrid.block1;
00711         pencilActive[pencil_idx].jb = jy / myGrid.block2;
00712       }
00713       else { //pencil has been initialized, update the min and max
00714         if (pencilActive[pencil_idx].xmin > ix)
00715           pencilActive[pencil_idx].xmin = ix;
00716         if (pencilActive[pencil_idx].xmax < ix)
00717           pencilActive[pencil_idx].xmax = ix;   
00718         if (pencilActive[pencil_idx].ymin > jy)
00719           pencilActive[pencil_idx].ymin = jy;
00720         if (pencilActive[pencil_idx].ymax < jy)
00721           pencilActive[pencil_idx].ymax = jy;
00722       }
00723     }
00724   }
00725   
00726   int nactive = 0; 
00727   for (int ib=0; ib<myGrid.xBlocks; ++ib) 
00728     for (int jb=0; jb<myGrid.yBlocks; ++jb) 
00729       if (pencilActive[ib * myGrid.yBlocks + jb].isActive) 
00730         nactive ++;
00731 
00732   assert (nactive == myMgr->numPencilsActive);  
00733   
00734   nzlines = xlen * ylen;
00735   zline_storage = new double [nzlines * zlen];
00736   sp_zstorage = new float [nzlines * zlen];  
00737 
00738   //printf ("%d: Allocate %d bytes of storage for %d PME Pencils\n", CkMyPe(), 
00739   //      nzlines * zlen * (int)sizeof(double), nactive);
00740   
00741   assert (zline_storage != NULL);
00742   double * zblock = zline_storage;
00743   
00744   for (int ib=0; ib<myGrid.xBlocks; ++ib) {
00745     for (int jb=0; jb<myGrid.yBlocks; ++jb) {
00746       int index = ib * myGrid.yBlocks + jb;
00747       if (pencilActive[index].isActive) {
00748         pencilActive[index].data = zblock;
00749         for (int i = pencilActive[index].xmin; i <= pencilActive[index].xmax; i++)
00750           for (int j = pencilActive[index].ymin; j <= pencilActive[index].ymax; j++) {      
00751             q_arr[i*myGrid.dim2 + j] = zblock;
00752             zblock += zlen;
00753             assert ((char *) zblock <= (char *)(zline_storage + nzlines*zlen));
00754           }
00755       }
00756     }
00757   }
00758   
00759   pencilVec.resize (nactive);
00760   nactive = 0;
00761   for (int ib=0; ib<myGrid.xBlocks; ++ib) 
00762     for (int jb=0; jb<myGrid.yBlocks; ++jb) 
00763       if (pencilActive[ib*myGrid.yBlocks + jb].isActive) 
00764         pencilVec[nactive ++] = pencilActive [ib * myGrid.yBlocks + jb];
00765 
00766   //We dont need the sparse array anymore
00767   delete [] pencilActive;
00768   
00769   /******************************* Initialize Many to Many ***********************************/
00770   OptPmeDummyMsg *m = new (PRIORITY_SIZE) OptPmeDummyMsg;
00771   m->to_pe = CkMyPe();
00772   CmiDirect_manytomany_initialize_recvbase (myMgr->handle, PHASE_UG, 
00773                                             recv_ungrid_done, m, (char *)sp_zstorage, 
00774                                             myGrid.xBlocks*myGrid.yBlocks, -1); 
00775   
00776   CkCallback cbi (CkCallback::ignore);
00777   CmiDirect_manytomany_initialize_sendbase (myMgr->handle, PHASE_GR, NULL, NULL, 
00778                                             (char *)sp_zstorage, 
00779                                             pencilVec.size(), patchList[0].patchID);
00780 
00781   for (int idx = 0; idx < pencilVec.size(); idx++) {
00782     int ib =  pencilVec[idx].ib;
00783     int jb =  pencilVec[idx].jb;
00784     double * data = pencilVec[idx].data;
00785     int offset = (data - zline_storage)*sizeof(float);
00786     int xlen   = pencilVec[idx].xmax - pencilVec[idx].xmin + 1;
00787     int ylen   = pencilVec[idx].ymax - pencilVec[idx].ymin + 1;
00788     int fcount = xlen * ylen * zlen;
00789 
00790     CkArrayIndex3D index (ib, jb, 0);
00791     CProxy_OptPmePencilMapZ zproxy (global_map_z);
00792     int pe = zproxy.ckLocalBranch()->procNum(0, index);
00793     CmiDirect_manytomany_initialize_send (myMgr->handle, PHASE_GR, idx, 
00794                                            offset, fcount*sizeof(float), pe);
00795     
00796     int srcnode = ib * myGrid.yBlocks + jb;
00797     CmiDirect_manytomany_initialize_recv (myMgr->handle, PHASE_UG, 
00798                                           srcnode, offset, fcount*sizeof(float), pe);
00799   }
00800   /********************************** End initialize many to many ****************************/
00801 
00802 }
00803 
00804 OptPmeCompute::~OptPmeCompute()
00805 {
00806   delete [] zline_storage;
00807   delete [] sp_zstorage;
00808   delete [] q_arr;
00809 }
00810 
00811 
00812 void OptPmeCompute::doWork()
00813 {
00814   DebugM(4,"Entering OptPmeCompute::doWork().\n");
00815 
00816 #ifdef TRACE_COMPUTE_OBJECTS
00817   double traceObjStartTime = CmiWallTimer();
00818 #endif
00819 
00820   if (!_initialized) initializeOptPmeCompute();
00821 
00822   ResizeArrayIter<PatchElem> ap(patchList);
00823 
00824   // Skip computations if nothing to do.
00825   if ( ! patchList[0].p->flags.doFullElectrostatics )
00826   {
00827     for (ap = ap.begin(); ap != ap.end(); ap++) {
00828       CompAtom *x = (*ap).positionBox->open();
00829       Results *r = (*ap).forceBox->open();
00830       (*ap).positionBox->close(&x);
00831       (*ap).forceBox->close(&r);
00832     }
00833     reduction->submit();
00834     return;
00835   }
00836 
00837   myMgr->_iter ++;  //this is a pme step
00838 
00839   // allocate storage
00840   numLocalAtoms = 0;
00841   for (ap = ap.begin(); ap != ap.end(); ap++) {
00842     numLocalAtoms += (*ap).p->getNumAtoms();
00843   }
00844 
00845   Lattice &lattice = patchList[0].p->flags.lattice;
00846 
00847   localData = new PmeParticle[numLocalAtoms];
00848   // get positions and charges
00849   PmeParticle * data_ptr = localData;
00850 
00851   int natoms = 0;
00852   //  if (myMgr->constant_pressure)
00853   //resetPatchCoordinates(lattice);  //Update patch coordinates with new lattice
00854 
00855   for (ap = ap.begin(); ap != ap.end(); ap++) {
00856 #ifdef NETWORK_PROGRESS
00857     CmiNetworkProgress();
00858 #endif
00859     
00860     CompAtom *x = (*ap).positionBox->open();
00861     if ( patchList[0].p->flags.doMolly ) {
00862       (*ap).positionBox->close(&x);
00863       x = (*ap).avgPositionBox->open();
00864     }
00865     int numAtoms = (*ap).p->getNumAtoms();        
00866     int order_1  = myGrid.order - 1;
00867     scale_n_copy_coordinates(x, localData, numAtoms, 
00868                              lattice, myGrid,
00869                              xstart + order_1, xlen - order_1,
00870                              ystart + order_1, ylen - order_1,
00871                              zstart + order_1, zlen - order_1,
00872                              strayChargeErrors);
00873     natoms += numAtoms;
00874     
00875     if ( patchList[0].p->flags.doMolly ) { (*ap).avgPositionBox->close(&x); }
00876     else { (*ap).positionBox->close(&x); }
00877   }
00878 
00879   numLocalAtoms = natoms;  //Exclude all atoms out of range
00880 
00881   // calculate self energy
00882   BigReal ewaldcof = ComputeNonbondedUtil::ewaldcof;
00883   evir = 0;
00884   BigReal selfEnergy = 0;
00885   data_ptr = localData;
00886   int i;
00887   for(i=0; i<numLocalAtoms; ++i)
00888   {
00889     selfEnergy += data_ptr->cg * data_ptr->cg;
00890     ++data_ptr;
00891   }
00892   selfEnergy *= -1. * ewaldcof / SQRT_PI;
00893   evir[0] += selfEnergy;
00894 
00895 #if 0
00896   if (myMgr->_iter > many_to_many_start) {
00897     OptPmeSubComputeMsg *smsg = myMgr->subcompute_msgs[1]; //not self
00898     CProxy_OptPmeMgr pmeProxy (CkpvAccess(BOCclass_group).computePmeMgr);
00899     pmeProxy[CmiNodeFirst(CmiMyNode())+smsg->dest].doWorkOnPeer(smsg);
00900   }
00901   else
00902 #endif
00903     doWorkOnPeer();
00904 
00905 #ifdef TRACE_COMPUTE_OBJECTS
00906   traceUserBracketEvent(TRACE_COMPOBJ_IDOFFSET+this->cid, traceObjStartTime, CmiWallTimer());
00907 #endif
00908 
00909 }
00910 
00911 void OptPmeCompute::doWorkOnPeer()
00912 {
00913   Lattice &lattice = patchList[0].p->flags.lattice;
00914   double **q = q_arr;
00915   memset( (void*) zline_storage, 0, zlen * nzlines * sizeof(double) );
00916 
00917   myRealSpace = new OptPmeRealSpace(myGrid,numLocalAtoms);
00918   if (!strayChargeErrors)
00919     myRealSpace->fill_charges(q, localData, zstart, zlen);
00920 
00921   if (myMgr->constant_pressure && patchList[0].patchID == 0)
00922     myMgr->xPencil.recvLattice (lattice);
00923   
00924   if (myMgr->_iter <= many_to_many_start)
00925     sendPencils();
00926   else {
00927     pme_d2f (sp_zstorage, zline_storage, nzlines * zlen);
00928     CmiDirect_manytomany_start (myMgr->handle, PHASE_GR);
00929   }
00930 }
00931 
00932 
00933 
00934 void OptPmeCompute::sendPencils() {  
00935 
00936   //iout << iPE << " Sending charge grid for " << numLocalAtoms << " atoms to FFT with " << myMgr->numPencilsActive << " messages" <<".\n" << endi;
00937 
00938   int xBlocks = myGrid.xBlocks;
00939   int yBlocks = myGrid.yBlocks;
00940   int zBlocks = myGrid.zBlocks;
00941 
00942   int K1 = myGrid.K1;
00943   int K2 = myGrid.K2;
00944   int dim2 = myGrid.dim2;
00945   int dim3 = myGrid.dim3;
00946   int block1 = myGrid.block1;
00947   int block2 = myGrid.block2;
00948 
00949   //Lattice lattice = patchList[0].p->flags.lattice;
00950 
00951   int nactive = 0;  
00952   for (int idx = 0; idx < pencilVec.size(); idx++) {
00953     int xstart = pencilVec[idx].xmin;
00954     int ystart = pencilVec[idx].ymin;
00955     int xlen   = pencilVec[idx].xmax - pencilVec[idx].xmin + 1;
00956     int ylen   = pencilVec[idx].ymax - pencilVec[idx].ymin + 1;
00957     int ib     = pencilVec[idx].ib;
00958     int jb     = pencilVec[idx].jb;
00959     double *data = pencilVec[idx].data;
00960     
00961     int fcount = xlen * ylen * zlen;
00962     OptPmeGridMsg *msg = new (fcount, PRIORITY_SIZE) OptPmeGridMsg;
00963     msg->zstart = zstart;
00964     msg->zlen   = zlen;
00965     msg->xstart = xstart;
00966     msg->xlen   = xlen;
00967     msg->ystart = ystart;
00968     msg->ylen   = ylen;
00969     msg->sourceNode = CkMyPe();
00970     msg->patchID    = patchList[0].patchID;
00971     
00972     float *qmsg = msg->qgrid;   
00973 #pragma disjoint (*data, *qmsg)
00974 #pragma unroll(8)
00975     for ( int k=0; k< fcount; ++k ) 
00976       *(qmsg++) = data[k];    
00977     
00978     myMgr->zPencil(ib,jb,0).recvGrid(msg);
00979   }
00980 }
00981 
00982 
00983 void OptPmeCompute::copyPencils(OptPmeGridMsg *msg) {
00984 
00985   if (!_initialized) initializeOptPmeCompute();
00986 
00987   int ibegin = msg->xstart;
00988   int iend   = msg->xstart + msg->xlen;
00989   int jbegin = msg->ystart;
00990   int jend   = msg->ylen;
00991   int fcount = zlen * msg->xlen * msg->ylen;
00992 
00993   float *qmsg = msg->qgrid;
00994   double *data = q_arr[ibegin * myGrid.dim2 + jbegin];  
00995 
00996 #pragma disjoint (*qmsg, *data)
00997 #pragma unroll(8)
00998   for ( int k=0; k<fcount; ++k ) 
00999     data[k] = *(qmsg++);
01000 }
01001 
01002 void OptPmeCompute::ungridForces_init() {
01003 
01004     //printf ("%d: In OptPMECompute::ungridforces_init\n", CkMyPe());
01005 
01006     if (myMgr->_iter > many_to_many_start)
01007       pme_f2d (zline_storage, sp_zstorage, nzlines * zlen);
01008 
01009     localResults = new Vector[numLocalAtoms];
01010     //memset (localResults, 0, sizeof (Vector) * numLocalAtoms);
01011 }
01012 
01013 void OptPmeCompute::ungridForces_compute(int    istart,
01014                                          int    iend) 
01015 {
01016     Vector *gridResults;
01017     gridResults = localResults;
01018 
01019     if (iend == 0)
01020       iend = numLocalAtoms;
01021     
01022     SimParameters *simParams = Node::Object()->simParameters;
01023     Vector pairForce = 0.;
01024     Lattice &lattice = patchList[0].p->flags.lattice;
01025     if(!simParams->commOnly) {
01026 #ifdef NETWORK_PROGRESS
01027       CmiNetworkProgress();
01028 #endif      
01029       if (!strayChargeErrors) {
01030         myRealSpace->compute_forces(q_arr, localData, gridResults, 
01031                                     zstart, zlen, istart, iend);
01032         scale_forces(gridResults + istart, iend - istart, lattice);
01033       }
01034     }
01035 }
01036 
01037 void OptPmeCompute::ungridForces_finalize() {
01038     SimParameters *simParams = Node::Object()->simParameters;
01039     delete myRealSpace;
01040     
01041     delete [] localData;
01042     //    delete [] localPartition;
01043     
01044     Vector *results_ptr = localResults;
01045     ResizeArrayIter<PatchElem> ap(patchList);
01046     
01047     // add in forces
01048     for (ap = ap.begin(); ap != ap.end(); ap++) {
01049       Results *r = (*ap).forceBox->open();
01050       Force *f = r->f[Results::slow];
01051       int numAtoms = (*ap).p->getNumAtoms();
01052       
01053       if ( ! strayChargeErrors && ! simParams->commOnly ) {
01054         for(int i=0; i<numAtoms; ++i) {
01055           f[i].x += results_ptr->x;
01056           f[i].y += results_ptr->y;
01057           f[i].z += results_ptr->z;
01058           ++results_ptr;
01059         }
01060       }
01061   
01062       (*ap).forceBox->close(&r);
01063     }
01064 
01065     delete [] localResults;
01066    
01067     double scale = 1.;
01068 
01069     reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += evir[0] * scale;
01070     reduction->item(REDUCTION_STRAY_CHARGE_ERRORS) += strayChargeErrors;
01071     strayChargeErrors = 0;
01072     reduction->submit();
01073 }
01074 
01075 
01076 
01077 #include "fftlib.C"
01078 #include "OptPmeMgr.def.h"
01079 
01080 void pme_f2d (double *dst, float *src, int N) {
01081 #pragma disjoint (*src, *dst)  
01082 #pragma unroll (8)
01083   for (int i = 0; i < N; i++) 
01084     dst[i] = src[i];
01085 }
01086 
01087 void pme_d2f (float *dst, double *src, int N) {
01088 #pragma disjoint (*src, *dst)  
01089 #pragma unroll (8)
01090   for (int i = 0; i < N; i++) 
01091     dst[i] = src[i];
01092 }

Generated on Tue Sep 19 01:17:13 2017 for NAMD by  doxygen 1.4.7