Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

OptPme.C

Go to the documentation of this file.
00001 
00007 #ifdef NAMD_FFTW
00008 #ifdef NAMD_FFTW_3
00009 #include <fftw3.h>
00010 #else
00011 #ifdef NAMD_FFTW_NO_TYPE_PREFIX
00012 #include <fftw.h>
00013 #include <rfftw.h>
00014 #else
00015 #include <sfftw.h>
00016 #include <srfftw.h>
00017 #endif
00018 #endif
00019 #endif
00020 
00021 #include <assert.h>
00022 
00023 #include "InfoStream.h"
00024 #include "Node.h"
00025 #include "PatchMap.h"
00026 #include "PatchMap.inl"
00027 #include "AtomMap.h"
00028 #include "OptPme.h"
00029 #include "OptPmeMgr.decl.h"
00030 #include "OptPmeRealSpace.h"
00031 #include "PmeKSpace.h"
00032 #include "ComputeNonbondedUtil.h"
00033 #include "PatchMgr.h"
00034 #include "Molecule.h"
00035 #include "ReductionMgr.h"
00036 //#include "ComputeMgr.h"
00037 #include "ComputeMgr.decl.h"
00038 // #define DEBUGM
00039 #define MIN_DEBUG_LEVEL 3
00040 #include "Debug.h"
00041 #include "SimParameters.h"
00042 #include "WorkDistrib.h"
00043 #include "varsizemsg.h"
00044 #include "Random.h"
00045 #include "Priorities.h"
00046 
00047 
00048 extern char *pencilPMEProcessors;
00049 
00050 #include "fftlib.h"
00051 #include "fftmap.h"
00052 
00053 //Very large integer
00054 int many_to_many_start = 0x7fffffff;
00055 
00056 class OptPmeMgr : public BOCclass {
00057 public:
00058   friend class OptPmeCompute;
00059   OptPmeMgr();
00060   ~OptPmeMgr();
00061 
00062   void initialize(CkQdMsg*);
00063   void initialize_pencils(CkQdMsg*);
00064   void activate_pencils(CkQdMsg*);
00065   void recvArrays(CProxy_OptPmeXPencil, CProxy_OptPmeYPencil, CProxy_OptPmeZPencil);
00066 
00067   void recvUngrid(OptPmeGridMsg *);
00068   void ungridCalc(OptPmeDummyMsg *);
00069   void recvEvir (CkReductionMsg *msg);
00070 
00071   void setCompute(OptPmeCompute *c) { pmeCompute = c; c->setMgr(this); }
00072 
00073 private:
00074   CProxy_OptPmeMgr pmeProxy;
00075   CProxy_OptPmeMgr pmeProxyDir;
00076   OptPmeCompute *pmeCompute;
00077   PmeGrid myGrid;
00078   PmeKSpace *myKSpace;
00079 
00080   CProxy_OptPmeXPencil xPencil;
00081   CProxy_OptPmeYPencil yPencil;
00082   CProxy_OptPmeZPencil zPencil;
00083   int    numPencilsActive;
00084   int    ungrid_count, usePencils;
00085   SubmitReduction *reduction;
00086   SubmitReduction *amd_reduction;
00087   int    _iter;
00088   void   *handle;
00089   bool   constant_pressure;    //Does the simulation need constant pressure
00090 };
00091 
00092 
00093 void pme_f2d (double *dst, float *src, int N);
00094 void pme_d2f (float *dst, double *src, int N);
00095 
00096 static inline void initializePmeGrid (SimParameters *simParams, PmeGrid &grid) {
00097     int xBlocks = 0, yBlocks = 0, zBlocks= 0;
00098 
00099     if ( simParams->PMEPencils > 1 ) {
00100       xBlocks = yBlocks = zBlocks = simParams->PMEPencils;
00101     } else {
00102       int nb2 = ( simParams->PMEGridSizeX * simParams->PMEGridSizeY
00103                   * simParams->PMEGridSizeZ ) / simParams->PMEMinPoints;
00104       if ( nb2 > CkNumPes() ) nb2 = CkNumPes();
00105       int nb = (int) sqrt((float)nb2);
00106       xBlocks = zBlocks = nb;
00107       yBlocks = nb2 / nb;
00108     }
00109     
00110     int dimx = simParams->PMEGridSizeX;
00111     int bx = 1 + ( dimx - 1 ) / xBlocks;
00112     xBlocks = 1 + ( dimx - 1 ) / bx;
00113     
00114     int dimy = simParams->PMEGridSizeY;
00115     int by = 1 + ( dimy - 1 ) / yBlocks;
00116     yBlocks = 1 + ( dimy - 1 ) / by;
00117     
00118     int dimz = simParams->PMEGridSizeZ / 2 + 1;  // complex
00119     int bz = 1 + ( dimz - 1 ) / zBlocks;
00120     zBlocks = 1 + ( dimz - 1 ) / bz;
00121 
00122     grid.xBlocks = xBlocks;
00123     grid.yBlocks = yBlocks;
00124     grid.zBlocks = zBlocks;
00125 
00126     grid.K1 = simParams->PMEGridSizeX;
00127     grid.K2 = simParams->PMEGridSizeY;
00128     grid.K3 = simParams->PMEGridSizeZ;
00129     grid.order = simParams->PMEInterpOrder;
00130     grid.dim2 = grid.K2;
00131     grid.dim3 = 2 * (grid.K3/2 + 1);
00132 
00133     grid.block1 = ( grid.K1 + xBlocks - 1 ) / xBlocks;
00134     grid.block2 = ( grid.K2 + yBlocks - 1 ) / yBlocks;
00135     grid.block3 = ( grid.K3/2 + 1 + zBlocks - 1 ) / zBlocks;  // complex
00136 }
00137 
00138 #ifdef NAMD_FFTW
00139 static CmiNodeLock fftw_plan_lock;
00140 #endif
00141 
00142 
00143 static inline void scale_n_copy_coordinates(CompAtom *x, PmeParticle p[], 
00144                                             int &N, 
00145                                             Lattice &lattice, PmeGrid grid,
00146                                             //double **qline,
00147                                             double xmin, double xlen,
00148                                             double ymin, double ylen,
00149                                             double zmin, double zlen,
00150                                             int &scg) {
00151   Vector origin = lattice.origin();
00152   Vector recip1 = lattice.a_r();
00153   Vector recip2 = lattice.b_r();
00154   Vector recip3 = lattice.c_r();
00155   double ox = origin.x;
00156   double oy = origin.y;
00157   double oz = origin.z;
00158   double r1x = recip1.x;
00159   double r1y = recip1.y;
00160   double r1z = recip1.z;
00161   double r2x = recip2.x;
00162   double r2y = recip2.y;
00163   double r2z = recip2.z;
00164   double r3x = recip3.x;
00165   double r3y = recip3.y;
00166   double r3z = recip3.z;
00167   int K1 = grid.K1;
00168   int K2 = grid.K2;
00169   int K3 = grid.K3;
00170 
00171   const BigReal coulomb_sqrt = sqrt( COULOMB * ComputeNonbondedUtil::scaling
00172                                      * ComputeNonbondedUtil::dielectric_1 );
00173 
00174 
00175   int natoms = 0;
00176   for (int i=0; i<N; i++) {
00177     double px = x[i].position.x - ox;
00178     double py = x[i].position.y - oy;
00179     double pz = x[i].position.z - oz;
00180     double sx = px*r1x + py*r1y + pz*r1z;
00181     double sy = px*r2x + py*r2y + pz*r2z;
00182     double sz = px*r3x + py*r3y + pz*r3z;
00183     p[natoms].x = K1 * ( sx - floor(sx) );
00184     p[natoms].y = K2 * ( sy - floor(sy) );
00185     p[natoms].z = K3 * ( sz - floor(sz) );
00186     //  Check for rare rounding condition where K * ( 1 - epsilon ) == K      
00187     //  which was observed with g++ on Intel x86 architecture. 
00188     if ( p[natoms].x == K1 ) p[natoms].x = 0;
00189     if ( p[natoms].y == K2 ) p[natoms].y = 0;
00190     if ( p[natoms].z == K3 ) p[natoms].z = 0;
00191 
00192     BigReal u1,u2,u3;
00193     u1 = (int) (p[natoms].x - xmin);
00194     if (u1 >= grid.K1) u1 -= grid.K1;    
00195     u2 = (int) (p[natoms].y - ymin);
00196     if (u2 >= grid.K2) u2 -= grid.K2;    
00197     u3 = (int) (p[natoms].z - zmin);
00198     if (u3 >= grid.K3) u3 -= grid.K3;
00199     
00200     if ( (u1 < 0.0) || (u1 >= xlen) || 
00201          (u2 < 0.0) || (u2 >= ylen) || 
00202          (u3 < 0.0) || (u3 >= zlen) ) {
00203       scg ++;
00204       continue;
00205     }
00206     
00207     p[natoms].cg = coulomb_sqrt * x[i].charge;
00208     natoms ++;
00209   }
00210   N = natoms;
00211 }
00212 
00213 
00214 OptPmeMgr::OptPmeMgr() : pmeProxy(thisgroup), 
00215                                  pmeProxyDir(thisgroup), pmeCompute(0) {
00216 
00217   CkpvAccess(BOCclass_group).computePmeMgr = thisgroup;
00218 
00219   myKSpace = 0;
00220   ungrid_count = 0;
00221   usePencils = 0;
00222 
00223 #ifdef NAMD_FFTW
00224   if ( CmiMyRank() == 0 ) {
00225     fftw_plan_lock = CmiCreateLock();
00226   }
00227 #endif    
00228 }
00229 
00230 
00231 void OptPmeMgr::recvArrays(CProxy_OptPmeXPencil x, CProxy_OptPmeYPencil y, CProxy_OptPmeZPencil z) {
00232   xPencil = x;  yPencil = y;  zPencil = z;
00233 }
00234 
00235 void OptPmeMgr::initialize(CkQdMsg *msg) {
00236     delete msg;
00237 
00238     _iter = 0;
00239 
00240 #if CHARM_VERSION > 60000
00241     handle = CmiDirect_manytomany_allocate_handle ();
00242 #endif
00243 
00244     SimParameters *simParams = Node::Object()->simParameters;
00245     PatchMap *patchMap = PatchMap::Object();
00246     
00247     if (simParams->PMEPencils > 0)
00248       usePencils = 1;
00249     else return;
00250 
00251     initializePmeGrid (simParams, myGrid);    
00252 
00253     if (simParams->langevinPistonOn || simParams->berendsenPressureOn)  
00254       constant_pressure = true;
00255     else
00256       constant_pressure = false;      
00257 
00258     bool useManyToMany = simParams->useManyToMany;
00259     //Many-to-many requires that patches and pmepencils are all on different processors
00260     int npes = patchMap->numPatches() + 
00261                myGrid.xBlocks *  myGrid.yBlocks + 
00262                myGrid.zBlocks *  myGrid.xBlocks +
00263                myGrid.yBlocks *  myGrid.zBlocks;
00264 
00265     if (npes >= CkNumPes()) {
00266       if (CkMyPe() == 0)
00267         printf ("Warning : Not enough processors for the many-to-many optimization \n");      
00268       useManyToMany = false;
00269     }
00270     
00271 #if CHARM_VERSION > 60000
00272     if (useManyToMany)  {
00273       if (CkMyPe() == 0)
00274         printf ("Enabling the Many-to-many optimization\n");
00275       //defaults to max integer
00276       many_to_many_start = MANY_TO_MANY_START;
00277     }
00278 #endif
00279 
00280     if ( CkMyPe() == 0) {
00281       iout << iINFO << "PME using " << myGrid.xBlocks << " x " <<
00282         myGrid.yBlocks << " x " << myGrid.zBlocks <<
00283         " pencil grid for FFT and reciprocal sum.\n" << endi;
00284 
00285       pencilPMEProcessors = new char [CkNumPes()];
00286       memset (pencilPMEProcessors, 0, sizeof(char) * CkNumPes());
00287       
00288       CProxy_OptPmePencilMapZ   mapz;      
00289       CProxy_OptPmePencilMapY   mapy;
00290       CProxy_OptPmePencilMapX   mapx;
00291       
00292       mapz = CProxy_OptPmePencilMapZ::ckNew(myGrid.xBlocks, myGrid.yBlocks, myGrid.zBlocks);      
00293       mapy = CProxy_OptPmePencilMapY::ckNew(myGrid.xBlocks, myGrid.yBlocks, myGrid.zBlocks);
00294       mapx = CProxy_OptPmePencilMapX::ckNew(myGrid.xBlocks, myGrid.yBlocks, myGrid.zBlocks);
00295       
00296       CkArrayOptions optsz;
00297       optsz.setMap (mapz);
00298       CkArrayOptions optsy;
00299       optsy.setMap (mapy);
00300       CkArrayOptions optsx;
00301       optsx.setMap (mapx);
00302       
00303       zPencil = CProxy_OptPmeZPencil::ckNew(optsz);  
00304       yPencil = CProxy_OptPmeYPencil::ckNew(optsy);  
00305       xPencil = CProxy_OptPmeXPencil::ckNew(optsx);  
00306       
00307       int x,y,z;
00308       for (x = 0; x < myGrid.xBlocks; ++x)
00309         for (y = 0; y < myGrid.yBlocks; ++y ) {
00310           zPencil(x,y,0).insert();
00311         }
00312       zPencil.doneInserting();
00313       
00314       for (z = 0; z < myGrid.zBlocks; ++z )
00315         for (x = 0; x < myGrid.xBlocks; ++x ) {
00316           yPencil(x,0,z).insert();
00317         }
00318       yPencil.doneInserting();
00319       
00320       for (y = 0; y < myGrid.yBlocks; ++y )     
00321         for (z = 0; z < myGrid.zBlocks; ++z ) {
00322           xPencil(0,y,z).insert();
00323         }
00324       xPencil.doneInserting();      
00325       
00326       pmeProxy.recvArrays(xPencil,yPencil,zPencil);
00327       OptPmePencilInitMsgData msgdata;
00328       msgdata.grid = myGrid;
00329       msgdata.xBlocks = myGrid.xBlocks;
00330       msgdata.yBlocks = myGrid.yBlocks;
00331       msgdata.zBlocks = myGrid.zBlocks;
00332       msgdata.xPencil = xPencil;
00333       msgdata.yPencil = yPencil;
00334       msgdata.zPencil = zPencil;
00335       msgdata.constant_pressure = constant_pressure;
00336 
00337       CkCallback cb (CkIndex_OptPmeMgr::recvEvir(NULL), thisProxy[0]);
00338       msgdata.cb_energy = cb;
00339 
00340       msgdata.pmeProxy = pmeProxyDir;
00341       xPencil.init(new OptPmePencilInitMsg(msgdata));
00342       yPencil.init(new OptPmePencilInitMsg(msgdata));
00343       zPencil.init(new OptPmePencilInitMsg(msgdata));
00344       
00345       reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
00346       if (simParams->accelMDOn) {
00347          amd_reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_AMD);
00348       } else {
00349          amd_reduction = NULL;
00350       }
00351 
00352 #ifndef NAMD_FFTW
00353       NAMD_die("Sorry, FFTW must be compiled in to use PME.");
00354 #endif
00355     }
00356 
00357 }
00358 
00359 void OptPmeMgr::initialize_pencils(CkQdMsg *msg) {
00360   delete msg;
00361 
00362   SimParameters *simParams = Node::Object()->simParameters;
00363 
00364   PatchMap *patchMap = PatchMap::Object();
00365   Lattice lattice = simParams->lattice;
00366   BigReal sysdima = lattice.a_r().unit() * lattice.a();
00367   BigReal sysdimb = lattice.b_r().unit() * lattice.b();
00368   BigReal cutoff = simParams->cutoff;
00369   BigReal patchdim = simParams->patchDimension;
00370   int numPatches = patchMap->numPatches();
00371 
00372   char *pencilActive = new char[myGrid.xBlocks*myGrid.yBlocks];
00373   for ( int i=0; i<myGrid.xBlocks; ++i ) {
00374     for ( int j=0; j<myGrid.yBlocks; ++j ) {
00375       pencilActive[i*myGrid.yBlocks+j] = 0;
00376     }
00377   }
00378 
00379   //Right now we only support one patch per processor
00380   assert (patchMap->numPatchesOnNode(CkMyPe()) <= 1);
00381   for ( int pid=0; pid < numPatches; ++pid ) {
00382     int pnode = patchMap->node(pid);
00383     if ( pnode != CkMyPe() ) continue;
00384 
00385     BigReal minx = patchMap->min_a(pid);
00386     BigReal maxx = patchMap->max_a(pid);
00387     BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
00388     // min1 (max1) is smallest (largest) grid line for this patch
00389     int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
00390     int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
00391 
00392     BigReal miny = patchMap->min_b(pid);
00393     BigReal maxy = patchMap->max_b(pid);
00394     BigReal marginb = 0.5 * ( patchdim - cutoff ) / sysdimb;
00395     // min2 (max2) is smallest (largest) grid line for this patch
00396     int min2 = ((int) floor(myGrid.K2 * (miny - marginb))) - myGrid.order + 1;
00397     int max2 = ((int) floor(myGrid.K2 * (maxy + marginb)));
00398 
00399     for ( int i=min1; i<=max1; ++i ) {
00400       int ix = i;
00401       while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
00402       while ( ix < 0 ) ix += myGrid.K1;
00403       for ( int j=min2; j<=max2; ++j ) {
00404         int jy = j;
00405         while ( jy >= myGrid.K2 ) jy -= myGrid.K2;
00406         while ( jy < 0 ) jy += myGrid.K2;
00407         pencilActive[(ix / myGrid.block1)*myGrid.yBlocks + (jy / myGrid.block2)] = 1;
00408       }
00409     }
00410   }
00411 
00412   numPencilsActive = 0;
00413   for ( int i=0; i<myGrid.xBlocks; ++i ) {
00414     for ( int j=0; j<myGrid.yBlocks; ++j ) {
00415       if ( pencilActive[i*myGrid.yBlocks+j] ) {
00416         ++numPencilsActive;
00417 
00418         zPencil(i,j,0).dummyRecvGrid(CkMyPe(),0);
00419       }
00420     }
00421   }
00422 
00423   ungrid_count = numPencilsActive;
00424   delete [] pencilActive;
00425 }
00426 
00427 
00428 void OptPmeMgr::activate_pencils(CkQdMsg *msg) {
00429   if ( ! usePencils ) return;
00430   if ( CkMyPe() == 0 ) zPencil.dummyRecvGrid(CkMyPe(),1);
00431 }
00432 
00433 
00434 OptPmeMgr::~OptPmeMgr() {
00435   delete myKSpace;
00436 }
00437 
00438 void OptPmeMgr::recvUngrid(OptPmeGridMsg *msg) {
00439   if ( ungrid_count == 0 ) {
00440     NAMD_bug("Message order failure in OptPmeMgr::recvUngrid\n");
00441   }
00442     
00443   pmeCompute->copyPencils(msg);
00444   delete msg;
00445   --ungrid_count;
00446 
00447   if ( ungrid_count == 0 ) {
00448     //CkPrintf("recvUngrid on Pe(%d)\n",CkMyPe());
00449     ungridCalc(NULL);
00450   }
00451 }
00452 
00453 void OptPmeMgr::ungridCalc(OptPmeDummyMsg *msg) {    
00454   pmeCompute->ungridForces();
00455   ungrid_count = numPencilsActive;
00456 }
00457 
00458 
00459 void OptPmeMgr::recvEvir (CkReductionMsg *msg) {
00460 
00461   assert (CkMyPe() == 0);
00462 
00463   double *data = (double *) msg->getData();
00464   assert (msg->getSize() == 7 * sizeof(double));
00465 
00466   //printf ("[%d]: Received Evir\n", CkMyPe());
00467 
00468   double scale = 1.;
00469   reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += data[0] * scale;
00470   reduction->item(REDUCTION_VIRIAL_SLOW_XX) += data[1] * scale;
00471   reduction->item(REDUCTION_VIRIAL_SLOW_XY) += data[2] * scale;
00472   reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += data[3] * scale;
00473   reduction->item(REDUCTION_VIRIAL_SLOW_YX) += data[2] * scale;
00474   reduction->item(REDUCTION_VIRIAL_SLOW_YY) += data[4] * scale;
00475   reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += data[5] * scale;
00476   reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += data[3] * scale;
00477   reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += data[5] * scale;
00478   reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += data[6] * scale;   
00479 
00480   if (amd_reduction) {
00481     amd_reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += data[0] * scale;
00482     amd_reduction->item(REDUCTION_VIRIAL_SLOW_XX) += data[1] * scale;
00483     amd_reduction->item(REDUCTION_VIRIAL_SLOW_XY) += data[2] * scale;
00484     amd_reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += data[3] * scale;
00485     amd_reduction->item(REDUCTION_VIRIAL_SLOW_YX) += data[2] * scale;
00486     amd_reduction->item(REDUCTION_VIRIAL_SLOW_YY) += data[4] * scale;
00487     amd_reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += data[5] * scale;
00488     amd_reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += data[3] * scale;
00489     amd_reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += data[5] * scale;
00490     amd_reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += data[6] * scale;
00491   }
00492 
00493   delete msg;
00494 
00495   SimParameters *simParams = Node::Object()->simParameters;
00496   int fef = simParams->fullElectFrequency;
00497   for (int i = 0; i < fef; i++) {
00498     reduction->submit();
00499     if (amd_reduction) amd_reduction->submit();
00500   }
00501 }
00502 
00503 OptPmeCompute::OptPmeCompute(ComputeID c) :
00504   ComputeHomePatches(c)
00505 {
00506   DebugM(4,"OptPmeCompute created.\n");
00507 
00508   CProxy_OptPmeMgr::ckLocalBranch(
00509         CkpvAccess(BOCclass_group).computePmeMgr)->setCompute(this);
00510 
00511   _initialized = false;
00512 
00513   useAvgPositions = 1;
00514 
00515   reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
00516   SimParameters *simParams = Node::Object()->simParameters;
00517   if (simParams->accelMDOn) {
00518     amd_reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_AMD);
00519   } else {
00520     amd_reduction = NULL;
00521   }
00522 }
00523 
00524 void recv_ungrid_done (void *m) {
00525   OptPmeDummyMsg *msg =  (OptPmeDummyMsg *) m;  
00526   CProxy_OptPmeMgr pmeProxy (CkpvAccess(BOCclass_group).computePmeMgr);
00527   pmeProxy[CkMyPe()].ungridCalc (msg);
00528 }  
00529 
00530 
00531 void OptPmeCompute::resetPatchCoordinates (const Lattice &lattice) {
00532   PatchMap *patchMap = PatchMap::Object();    
00533   SimParameters *simParams = Node::Object()->simParameters;
00534 
00535   BigReal sysdima = lattice.a_r().unit() * lattice.a();
00536   BigReal sysdimb = lattice.b_r().unit() * lattice.b();
00537   BigReal sysdimc = lattice.c_r().unit() * lattice.c();
00538   BigReal cutoff = simParams->cutoff;
00539   BigReal patchdim = simParams->patchDimension;
00540 
00541   int pid = patchList[0].patchID; 
00542   assert (patchList.size() == 1);
00543 
00544   //printf ("Patch[%d]: zstart = %d, zlen = %d, maxz %f, minz %f, marginec %f\n", pid, zstart, zlen, maxz, minz, marginc);
00545 
00546   BigReal minx = patchMap->min_a(pid);
00547   BigReal maxx = patchMap->max_a(pid);
00548   BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
00549   // min1 (max1) is smallest (largest) grid line for this patch
00550   int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
00551   int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
00552 
00553   BigReal miny = patchMap->min_b(pid);
00554   BigReal maxy = patchMap->max_b(pid);
00555   BigReal marginb = 0.5 * ( patchdim - cutoff ) / sysdimb;
00556   // min2 (max2) is smallest (largest) grid line for this patch
00557   int min2 = ((int) floor(myGrid.K2 * (miny - marginb))) - myGrid.order + 1;
00558   int max2 = ((int) floor(myGrid.K2 * (maxy + marginb)));
00559 
00560   BigReal minz = patchMap->min_c(pid);
00561   BigReal maxz = patchMap->max_c(pid);
00562   BigReal marginc = 0.5 * ( patchdim - cutoff ) / sysdimc;
00563   // min3 (max3) is smallest (largest) grid line for this patch
00564   int min3 = ((int) floor(myGrid.K3 * (minz - marginc))) - myGrid.order + 1;
00565   int max3 = ((int) floor(myGrid.K3 * (maxz + marginc)));
00566 
00567   xlen = max1 - min1 + 1;
00568   xstart = min1;  
00569   
00570   ylen = max2 - min2 + 1;
00571   ystart = min2;  
00572   
00573   zlen = max3 - min3 + 1;
00574   zstart = min3;  
00575 }
00576 
00577 void OptPmeCompute::initializeOptPmeCompute () {
00578   
00579   _initialized = true;
00580   
00581   strayChargeErrors = 0;
00582 
00583   SimParameters *simParams = Node::Object()->simParameters;
00584   PatchMap *patchMap = PatchMap::Object();
00585 
00586   initializePmeGrid (simParams, myGrid);
00587 
00588   alchFepOn    = simParams->alchFepOn;
00589   alchThermIntOn = simParams->alchThermIntOn;
00590   lesOn    = simParams->lesOn;
00591   pairOn   = simParams->pairInteractionOn;
00592 
00593   assert (!alchFepOn);
00594   assert (!alchThermIntOn);
00595   assert (!lesOn);
00596   assert (!pairOn);
00597   
00598   qsize = myGrid.K1 * myGrid.dim2 * myGrid.dim3;
00599   fsize = myGrid.K1 * myGrid.dim2;
00600   q_arr = new double*[fsize];
00601   memset( (void*) q_arr, 0, fsize * sizeof(double*) );
00602 
00603   assert (myMgr != NULL);
00604 
00605   const Lattice lattice = simParams->lattice;
00606   resetPatchCoordinates (lattice);
00607   
00608   PencilElement *pencilActive = new PencilElement [myGrid.xBlocks * myGrid.yBlocks];
00609   memset (pencilActive, 0, myGrid.xBlocks * myGrid.yBlocks * sizeof(PencilElement));
00610 
00611   for ( int i=xstart; i<=xstart+xlen-1; ++i ) {
00612     int ix = i;
00613     while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
00614     while ( ix < 0 ) ix += myGrid.K1;
00615     for ( int j=ystart; j<=ystart+ylen-1; ++j ) {
00616       int jy = j;
00617       while ( jy >= myGrid.K2 ) jy -= myGrid.K2;
00618       while ( jy < 0 ) jy += myGrid.K2;
00619       
00620       int pencil_idx = (ix / myGrid.block1)*myGrid.yBlocks + (jy / myGrid.block2);
00621       //If not initialized yet, initialize this pencil
00622       if (! pencilActive[pencil_idx].isActive) {
00623         pencilActive[pencil_idx].isActive = 1;
00624         pencilActive[pencil_idx].xmin = ix;
00625         pencilActive[pencil_idx].xmax = ix;
00626         pencilActive[pencil_idx].ymin = jy;
00627         pencilActive[pencil_idx].ymax = jy;
00628         pencilActive[pencil_idx].ib = ix / myGrid.block1;
00629         pencilActive[pencil_idx].jb = jy / myGrid.block2;
00630       }
00631       else { //pencil has been initialized, update the min and max
00632         if (pencilActive[pencil_idx].xmin > ix)
00633           pencilActive[pencil_idx].xmin = ix;
00634         if (pencilActive[pencil_idx].xmax < ix)
00635           pencilActive[pencil_idx].xmax = ix;   
00636         if (pencilActive[pencil_idx].ymin > jy)
00637           pencilActive[pencil_idx].ymin = jy;
00638         if (pencilActive[pencil_idx].ymax < jy)
00639           pencilActive[pencil_idx].ymax = jy;
00640       }
00641     }
00642   }
00643   
00644   int nactive = 0; 
00645   for (int ib=0; ib<myGrid.xBlocks; ++ib) 
00646     for (int jb=0; jb<myGrid.yBlocks; ++jb) 
00647       if (pencilActive[ib * myGrid.yBlocks + jb].isActive) 
00648         nactive ++;
00649 
00650   assert (nactive == myMgr->numPencilsActive);  
00651   
00652   nzlines = xlen * ylen;
00653   zline_storage = new double [nzlines * zlen];
00654   sp_zstorage = new float [nzlines * zlen];  
00655 
00656   //printf ("%d: Allocate %d bytes of storage for %d PME Pencils\n", CkMyPe(), 
00657   //      nzlines * zlen * (int)sizeof(double), nactive);
00658   
00659   assert (zline_storage != NULL);
00660   double * zblock = zline_storage;
00661   
00662   for (int ib=0; ib<myGrid.xBlocks; ++ib) {
00663     for (int jb=0; jb<myGrid.yBlocks; ++jb) {
00664       int index = ib * myGrid.yBlocks + jb;
00665       if (pencilActive[index].isActive) {
00666         pencilActive[index].data = zblock;
00667         for (int i = pencilActive[index].xmin; i <= pencilActive[index].xmax; i++)
00668           for (int j = pencilActive[index].ymin; j <= pencilActive[index].ymax; j++) {      
00669             q_arr[i*myGrid.dim2 + j] = zblock;
00670             zblock += zlen;
00671             assert ((char *) zblock <= (char *)(zline_storage + nzlines*zlen));
00672           }
00673       }
00674     }
00675   }
00676   
00677   pencilVec.resize (nactive);
00678   nactive = 0;
00679   for (int ib=0; ib<myGrid.xBlocks; ++ib) 
00680     for (int jb=0; jb<myGrid.yBlocks; ++jb) 
00681       if (pencilActive[ib*myGrid.yBlocks + jb].isActive) 
00682         pencilVec[nactive ++] = pencilActive [ib * myGrid.yBlocks + jb];
00683 
00684   //We dont need the sparse array anymore
00685   delete [] pencilActive;
00686 
00687 #if CHARM_VERSION > 60000
00688   /******************************* Initialize Many to Many ***********************************/
00689   OptPmeDummyMsg *m = new (PRIORITY_SIZE) OptPmeDummyMsg;
00690   CmiDirect_manytomany_initialize_recvbase (myMgr->handle, PHASE_UG, 
00691                                             recv_ungrid_done, m, (char *)sp_zstorage, 
00692                                             myGrid.xBlocks*myGrid.yBlocks, -1); 
00693   
00694   CkCallback cbi (CkCallback::ignore);
00695   CmiDirect_manytomany_initialize_sendbase (myMgr->handle, PHASE_GR, NULL, NULL, 
00696                                             (char *)sp_zstorage, 
00697                                             pencilVec.size(), patchList[0].patchID);
00698 
00699   for (int idx = 0; idx < pencilVec.size(); idx++) {
00700     int ib =  pencilVec[idx].ib;
00701     int jb =  pencilVec[idx].jb;
00702     double * data = pencilVec[idx].data;
00703     int offset = (data - zline_storage)*sizeof(float);
00704     int xlen   = pencilVec[idx].xmax - pencilVec[idx].xmin + 1;
00705     int ylen   = pencilVec[idx].ymax - pencilVec[idx].ymin + 1;
00706     int fcount = xlen * ylen * zlen;
00707 
00708     CkArrayIndex3D index (ib, jb, 0);
00709     CProxy_OptPmePencilMapZ zproxy (global_map_z);
00710     int pe = zproxy.ckLocalBranch()->procNum(0, index);
00711     CmiDirect_manytomany_initialize_send (myMgr->handle, PHASE_GR, idx, 
00712                                            offset, fcount*sizeof(float), pe);
00713     
00714     int srcnode = ib * myGrid.yBlocks + jb;
00715     CmiDirect_manytomany_initialize_recv (myMgr->handle, PHASE_UG, 
00716                                           srcnode, offset, fcount*sizeof(float), pe);
00717   }
00718   /********************************** End initialize many to many ****************************/
00719 #endif
00720 
00721 }
00722 
00723 OptPmeCompute::~OptPmeCompute()
00724 {
00725   delete [] zline_storage;
00726   delete [] q_arr;
00727 }
00728 
00729 
00730 void OptPmeCompute::doWork()
00731 {
00732   DebugM(4,"Entering OptPmeCompute::doWork().\n");
00733 
00734 #ifdef TRACE_COMPUTE_OBJECTS
00735   double traceObjStartTime = CmiWallTimer();
00736 #endif
00737 
00738   if (!_initialized) initializeOptPmeCompute();
00739 
00740   ResizeArrayIter<PatchElem> ap(patchList);
00741 
00742   // Skip computations if nothing to do.
00743   if ( ! patchList[0].p->flags.doFullElectrostatics )
00744   {
00745     for (ap = ap.begin(); ap != ap.end(); ap++) {
00746       CompAtom *x = (*ap).positionBox->open();
00747       Results *r = (*ap).forceBox->open();
00748       (*ap).positionBox->close(&x);
00749       (*ap).forceBox->close(&r);
00750     }
00751     reduction->submit();
00752     if (amd_reduction) amd_reduction->submit();
00753     return;
00754   }
00755 
00756   myMgr->_iter ++;  //this is a pme step
00757 
00758   // allocate storage
00759   numLocalAtoms = 0;
00760   for (ap = ap.begin(); ap != ap.end(); ap++) {
00761     numLocalAtoms += (*ap).p->getNumAtoms();
00762   }
00763 
00764   Lattice &lattice = patchList[0].p->flags.lattice;
00765 
00766   localData = new PmeParticle[numLocalAtoms];
00767   // get positions and charges
00768   PmeParticle * data_ptr = localData;
00769 
00770   int natoms = 0;
00771   if (myMgr->constant_pressure)
00772     resetPatchCoordinates(lattice);  //Update patch coordinates with new lattice
00773 
00774   for (ap = ap.begin(); ap != ap.end(); ap++) {
00775 #ifdef NETWORK_PROGRESS
00776     CmiNetworkProgress();
00777 #endif
00778     
00779     CompAtom *x = (*ap).positionBox->open();
00780     if ( patchList[0].p->flags.doMolly ) {
00781       (*ap).positionBox->close(&x);
00782       x = (*ap).avgPositionBox->open();
00783     }
00784     int numAtoms = (*ap).p->getNumAtoms();        
00785     int order_1  = myGrid.order - 1;
00786     scale_n_copy_coordinates(x, localData, numAtoms, 
00787                              lattice, myGrid,
00788                              xstart + order_1, xlen - order_1,
00789                              ystart + order_1, ylen - order_1,
00790                              zstart + order_1, zlen - order_1,
00791                              strayChargeErrors);
00792     natoms += numAtoms;
00793     
00794     if ( patchList[0].p->flags.doMolly ) { (*ap).avgPositionBox->close(&x); }
00795     else { (*ap).positionBox->close(&x); }
00796   }
00797 
00798   numLocalAtoms = natoms;  //Exclude all atoms out of range
00799 
00800   // calculate self energy
00801   BigReal ewaldcof = ComputeNonbondedUtil::ewaldcof;
00802   evir = 0;
00803   BigReal selfEnergy = 0;
00804   data_ptr = localData;
00805   int i;
00806   for(i=0; i<numLocalAtoms; ++i)
00807   {
00808     selfEnergy += data_ptr->cg * data_ptr->cg;
00809     ++data_ptr;
00810   }
00811   selfEnergy *= -1. * ewaldcof / SQRT_PI;
00812   evir[0] += selfEnergy;
00813 
00814   double **q = q_arr;
00815   memset( (void*) zline_storage, 0, zlen * nzlines * sizeof(double) );
00816 
00817   myRealSpace = new OptPmeRealSpace(myGrid,numLocalAtoms);
00818   //scale_coordinates(localData, numLocalAtoms, lattice, myGrid);
00819   if (!strayChargeErrors)
00820     myRealSpace->fill_charges(q, localData, zstart, zlen);
00821 
00822 #ifdef TRACE_COMPUTE_OBJECTS
00823   traceUserBracketEvent(TRACE_COMPOBJ_IDOFFSET+this->cid, traceObjStartTime, CmiWallTimer());
00824 #endif
00825 
00826   if (myMgr->constant_pressure && patchList[0].patchID == 0)
00827     myMgr->xPencil.recvLattice (lattice);
00828   
00829   if (myMgr->_iter <= many_to_many_start)
00830       sendPencils();
00831 #if CHARM_VERSION > 60000
00832   else {
00833       pme_d2f (sp_zstorage, zline_storage, nzlines * zlen);
00834       CmiDirect_manytomany_start (myMgr->handle, PHASE_GR);
00835   }
00836 #endif
00837 }
00838 
00839 
00840 
00841 void OptPmeCompute::sendPencils() {  
00842 
00843   assert ( myMgr->usePencils );
00844 
00845   //iout << iPE << " Sending charge grid for " << numLocalAtoms << " atoms to FFT with " << myMgr->numPencilsActive << " messages" <<".\n" << endi;
00846 
00847   int xBlocks = myGrid.xBlocks;
00848   int yBlocks = myGrid.yBlocks;
00849   int zBlocks = myGrid.zBlocks;
00850 
00851   int K1 = myGrid.K1;
00852   int K2 = myGrid.K2;
00853   int dim2 = myGrid.dim2;
00854   int dim3 = myGrid.dim3;
00855   int block1 = myGrid.block1;
00856   int block2 = myGrid.block2;
00857 
00858   //Lattice lattice = patchList[0].p->flags.lattice;
00859 
00860   int nactive = 0;  
00861   for (int idx = 0; idx < pencilVec.size(); idx++) {
00862     int xstart = pencilVec[idx].xmin;
00863     int ystart = pencilVec[idx].ymin;
00864     int xlen   = pencilVec[idx].xmax - pencilVec[idx].xmin + 1;
00865     int ylen   = pencilVec[idx].ymax - pencilVec[idx].ymin + 1;
00866     int ib     = pencilVec[idx].ib;
00867     int jb     = pencilVec[idx].jb;
00868     double *data = pencilVec[idx].data;
00869     
00870     int fcount = xlen * ylen * zlen;
00871     OptPmeGridMsg *msg = new (fcount, PRIORITY_SIZE) OptPmeGridMsg;
00872     msg->zstart = zstart;
00873     msg->zlen   = zlen;
00874     msg->xstart = xstart;
00875     msg->xlen   = xlen;
00876     msg->ystart = ystart;
00877     msg->ylen   = ylen;
00878     msg->sourceNode = CkMyPe();
00879     msg->patchID    = patchList[0].patchID;
00880     
00881     float *qmsg = msg->qgrid;   
00882 #pragma disjoint (*data, *qmsg)
00883 #pragma unroll(8)
00884     for ( int k=0; k< fcount; ++k ) 
00885       *(qmsg++) = data[k];    
00886     
00887     myMgr->zPencil(ib,jb,0).recvGrid(msg);
00888   }
00889 }
00890 
00891 
00892 void OptPmeCompute::copyPencils(OptPmeGridMsg *msg) {
00893 
00894   if (!_initialized) initializeOptPmeCompute();
00895 
00896   int ibegin = msg->xstart;
00897   int iend   = msg->xstart + msg->xlen;
00898   int jbegin = msg->ystart;
00899   int jend   = msg->ylen;
00900   int fcount = zlen * msg->xlen * msg->ylen;
00901 
00902   float *qmsg = msg->qgrid;
00903   double *data = q_arr[ibegin * myGrid.dim2 + jbegin];  
00904 
00905 #pragma disjoint (*qmsg, *data)
00906 #pragma unroll(8)
00907   for ( int k=0; k<fcount; ++k ) 
00908     data[k] = *(qmsg++);
00909 }
00910 
00911 void OptPmeCompute::ungridForces() {
00912 
00913     //printf ("%d: In OptPMECompute::ungridforces\n", CkMyPe());
00914 
00915     if (myMgr->_iter > many_to_many_start)
00916       pme_f2d (zline_storage, sp_zstorage, nzlines * zlen);
00917 
00918     SimParameters *simParams = Node::Object()->simParameters;
00919     Vector *localResults = new Vector[numLocalAtoms];
00920     //memset (localResults, 0, sizeof (Vector) * numLocalAtoms);
00921 
00922     Vector *gridResults;
00923     gridResults = localResults;
00924 
00925     Vector pairForce = 0.;
00926     Lattice lattice = patchList[0].p->flags.lattice;
00927     if(!simParams->commOnly) {
00928 #ifdef NETWORK_PROGRESS
00929       CmiNetworkProgress();
00930 #endif
00931       
00932       if (!strayChargeErrors) {
00933         myRealSpace->compute_forces(q_arr, localData, gridResults, zstart, zlen);
00934         scale_forces(gridResults, numLocalAtoms, lattice);
00935       }
00936       delete myRealSpace;
00937     }
00938     delete [] localData;
00939     //    delete [] localPartition;
00940     
00941     Vector *results_ptr = localResults;
00942     ResizeArrayIter<PatchElem> ap(patchList);
00943     
00944     // add in forces
00945     for (ap = ap.begin(); ap != ap.end(); ap++) {
00946       Results *r = (*ap).forceBox->open();
00947       Force *f = r->f[Results::slow];
00948       int numAtoms = (*ap).p->getNumAtoms();
00949       
00950       if ( ! strayChargeErrors && ! simParams->commOnly ) {
00951         for(int i=0; i<numAtoms; ++i) {
00952           f[i].x += results_ptr->x;
00953           f[i].y += results_ptr->y;
00954           f[i].z += results_ptr->z;
00955           ++results_ptr;
00956         }
00957       }
00958   
00959       (*ap).forceBox->close(&r);
00960     }
00961 
00962     delete [] localResults;
00963    
00964     double scale = 1.;
00965 
00966     reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += evir[0] * scale;
00967     reduction->item(REDUCTION_STRAY_CHARGE_ERRORS) += strayChargeErrors;
00968     strayChargeErrors = 0;
00969     reduction->submit();
00970     if (amd_reduction) {
00971       amd_reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += evir[0] * scale;
00972       amd_reduction->submit();
00973     }
00974 }
00975 
00976 
00977 
00978 #include "fftlib.C"
00979 #include "OptPmeMgr.def.h"
00980 
00981 void pme_f2d (double *dst, float *src, int N) {
00982 #pragma disjoint (*src, *dst)  
00983 #pragma unroll (8)
00984   for (int i = 0; i < N; i++) 
00985     dst[i] = src[i];
00986 }
00987 
00988 void pme_d2f (float *dst, double *src, int N) {
00989 #pragma disjoint (*src, *dst)  
00990 #pragma unroll (8)
00991   for (int i = 0; i < N; i++) 
00992     dst[i] = src[i];
00993 }

Generated on Fri May 25 04:07:16 2012 for NAMD by  doxygen 1.3.9.1