Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

OptPme.C

Go to the documentation of this file.
00001 
00007 #ifdef NAMD_FFTW
00008 #ifdef NAMD_FFTW_3
00009 #include <fftw3.h>
00010 #else
00011 #ifdef NAMD_FFTW_NO_TYPE_PREFIX
00012 #include <fftw.h>
00013 #include <rfftw.h>
00014 #else
00015 #include <sfftw.h>
00016 #include <srfftw.h>
00017 #endif
00018 #endif
00019 #endif
00020 
00021 #include <assert.h>
00022 
00023 #include "InfoStream.h"
00024 #include "Node.h"
00025 #include "PatchMap.h"
00026 #include "PatchMap.inl"
00027 #include "AtomMap.h"
00028 #include "OptPme.h"
00029 #include "OptPmeMgr.decl.h"
00030 #include "OptPmeRealSpace.h"
00031 #include "PmeKSpace.h"
00032 #include "ComputeNonbondedUtil.h"
00033 #include "PatchMgr.h"
00034 #include "Molecule.h"
00035 #include "ReductionMgr.h"
00036 //#include "ComputeMgr.h"
00037 #include "ComputeMgr.decl.h"
00038 // #define DEBUGM
00039 #define MIN_DEBUG_LEVEL 3
00040 #include "Debug.h"
00041 #include "SimParameters.h"
00042 #include "WorkDistrib.h"
00043 #include "varsizemsg.h"
00044 #include "Random.h"
00045 #include "Priorities.h"
00046 #include "PmeBase.inl"
00047 
00048 extern char *pencilPMEProcessors;
00049 
00050 #include "fftlib.h"
00051 #include "fftmap.h"
00052 
00053 //Very large integer
00054 int many_to_many_start = 0x7fffffff;
00055 
00056 class OptPmeMgr : public BOCclass {
00057 public:
00058   friend class OptPmeCompute;
00059   OptPmeMgr();
00060   ~OptPmeMgr();
00061 
00062   void initialize(CkQdMsg*);
00063   void initialize_pencils(CkQdMsg*);
00064   void activate_pencils(CkQdMsg*);
00065   void recvArrays(CProxy_OptPmeXPencil, CProxy_OptPmeYPencil, CProxy_OptPmeZPencil);
00066 
00067   void recvUngrid(OptPmeGridMsg *);
00068   void ungridCalc(OptPmeDummyMsg *);
00069   void ungridCalc_subcompute(OptPmeSubComputeMsg *);
00070   void ungridCalc_subcompute_done(OptPmeSubComputeMsg *);
00071   void doWorkOnPeer(OptPmeSubComputeMsg *);
00072   void recvEvir (CkReductionMsg *msg);
00073 
00074   void setCompute(OptPmeCompute *c) { pmeCompute = c; c->setMgr(this); }
00075 
00076 private:
00077   CProxy_OptPmeMgr pmeProxy;
00078   CProxy_OptPmeMgr pmeProxyDir;
00079   OptPmeCompute *pmeCompute;
00080   PmeGrid myGrid;
00081   PmeKSpace *myKSpace;
00082 
00083   CProxy_OptPmeXPencil xPencil;
00084   CProxy_OptPmeYPencil yPencil;
00085   CProxy_OptPmeZPencil zPencil;
00086   int    numPencilsActive;
00087   int    ungrid_count;
00088   SubmitReduction *reduction;
00089   SubmitReduction *amd_reduction;
00090   int    _iter;
00091   void   *handle;
00092   bool   constant_pressure;    //Does the simulation need constant pressure
00093   int    subcompute_count;
00094 
00095   int peersAllocated;
00096   int peers [SUBCOMPUTE_NPAR];
00097   OptPmeSubComputeMsg *subcompute_msgs[SUBCOMPUTE_NPAR];
00098 };
00099 
00100 
00101 void pme_f2d (double *dst, float *src, int N);
00102 void pme_d2f (float *dst, double *src, int N);
00103 
00104 static inline void initializePmeGrid (SimParameters *simParams, PmeGrid &grid) {
00105     int xBlocks = 0, yBlocks = 0, zBlocks= 0;
00106 
00107     if ( simParams->PMEPencils > 1 ) {
00108       xBlocks = yBlocks = zBlocks = simParams->PMEPencils;
00109     } else {
00110       int nb2 = ( simParams->PMEGridSizeX * simParams->PMEGridSizeY
00111                   * simParams->PMEGridSizeZ ) / simParams->PMEMinPoints;
00112       if ( nb2 > CkNumPes() ) nb2 = CkNumPes();
00113       if ( nb2 < 1 ) nb2 = 1;
00114       int nb = (int) sqrt((float)nb2);
00115       if ( nb < 1 ) nb = 1;
00116       xBlocks = zBlocks = nb;
00117       yBlocks = nb2 / nb;
00118     }
00119     
00120     int dimx = simParams->PMEGridSizeX;
00121     int bx = 1 + ( dimx - 1 ) / xBlocks;
00122     xBlocks = 1 + ( dimx - 1 ) / bx;
00123     
00124     int dimy = simParams->PMEGridSizeY;
00125     int by = 1 + ( dimy - 1 ) / yBlocks;
00126     yBlocks = 1 + ( dimy - 1 ) / by;
00127     
00128     int dimz = simParams->PMEGridSizeZ / 2 + 1;  // complex
00129     int bz = 1 + ( dimz - 1 ) / zBlocks;
00130     zBlocks = 1 + ( dimz - 1 ) / bz;
00131 
00132     grid.xBlocks = xBlocks;
00133     grid.yBlocks = yBlocks;
00134     grid.zBlocks = zBlocks;
00135 
00136     grid.K1 = simParams->PMEGridSizeX;
00137     grid.K2 = simParams->PMEGridSizeY;
00138     grid.K3 = simParams->PMEGridSizeZ;
00139     grid.order = simParams->PMEInterpOrder;
00140     grid.dim2 = grid.K2;
00141     grid.dim3 = 2 * (grid.K3/2 + 1);
00142 
00143     grid.block1 = ( grid.K1 + xBlocks - 1 ) / xBlocks;
00144     grid.block2 = ( grid.K2 + yBlocks - 1 ) / yBlocks;
00145     grid.block3 = ( grid.K3/2 + 1 + zBlocks - 1 ) / zBlocks;  // complex
00146 }
00147 
00148 #ifdef NAMD_FFTW
00149 static CmiNodeLock fftw_plan_lock;
00150 #endif
00151 
00152 
00153 static inline void scale_n_copy_coordinates(CompAtom *x, PmeParticle p[], 
00154                                             int &N, 
00155                                             Lattice &lattice, PmeGrid grid,
00156                                             //double **qline,
00157                                             double xmin, double xlen,
00158                                             double ymin, double ylen,
00159                                             double zmin, double zlen,
00160                                             int &scg) {
00161   Vector origin = lattice.origin();
00162   Vector recip1 = lattice.a_r();
00163   Vector recip2 = lattice.b_r();
00164   Vector recip3 = lattice.c_r();
00165   double ox = origin.x;
00166   double oy = origin.y;
00167   double oz = origin.z;
00168   double r1x = recip1.x;
00169   double r1y = recip1.y;
00170   double r1z = recip1.z;
00171   double r2x = recip2.x;
00172   double r2y = recip2.y;
00173   double r2z = recip2.z;
00174   double r3x = recip3.x;
00175   double r3y = recip3.y;
00176   double r3z = recip3.z;
00177   int K1 = grid.K1;
00178   int K2 = grid.K2;
00179   int K3 = grid.K3;
00180 
00181   const BigReal coulomb_sqrt = sqrt( COULOMB * ComputeNonbondedUtil::scaling
00182                                      * ComputeNonbondedUtil::dielectric_1 );
00183 
00184 
00185   int natoms = 0;
00186   for (int i=0; i<N; i++) {
00187     double px = x[i].position.x - ox;
00188     double py = x[i].position.y - oy;
00189     double pz = x[i].position.z - oz;
00190     double sx = px*r1x + py*r1y + pz*r1z;
00191     double sy = px*r2x + py*r2y + pz*r2z;
00192     double sz = px*r3x + py*r3y + pz*r3z;
00193     p[natoms].x = K1 * ( sx - floor(sx) );
00194     p[natoms].y = K2 * ( sy - floor(sy) );
00195     p[natoms].z = K3 * ( sz - floor(sz) );
00196 #ifndef ARCH_POWERPC
00197     //  Check for rare rounding condition where K * ( 1 - epsilon ) == K      
00198     //  which was observed with g++ on Intel x86 architecture. 
00199     if ( p[natoms].x == K1 ) p[natoms].x = 0;
00200     if ( p[natoms].y == K2 ) p[natoms].y = 0;
00201     if ( p[natoms].z == K3 ) p[natoms].z = 0;
00202 #endif
00203 
00204 #if 1 //stray charge detection
00205     BigReal u1,u2,u3;
00206     u1 = (int) (p[natoms].x - xmin);
00207     if (u1 >= grid.K1) u1 -= grid.K1;    
00208     u2 = (int) (p[natoms].y - ymin);
00209     if (u2 >= grid.K2) u2 -= grid.K2;    
00210     u3 = (int) (p[natoms].z - zmin);
00211     if (u3 >= grid.K3) u3 -= grid.K3;
00212     
00213     if ( (u1 < 0.0) || (u1 >= xlen) || 
00214          (u2 < 0.0) || (u2 >= ylen) || 
00215          (u3 < 0.0) || (u3 >= zlen) ) {
00216       scg ++;
00217       continue;
00218     }
00219 #endif
00220     
00221     p[natoms].cg = coulomb_sqrt * x[i].charge;
00222     natoms ++;
00223   }
00224   N = natoms;
00225 }
00226 
00227 
00228 OptPmeMgr::OptPmeMgr() : pmeProxy(thisgroup), 
00229                                  pmeProxyDir(thisgroup), pmeCompute(0) {
00230 
00231   CkpvAccess(BOCclass_group).computePmeMgr = thisgroup;
00232 
00233   myKSpace = 0;
00234   ungrid_count = 0;
00235   peersAllocated = 0;
00236 
00237 #ifdef NAMD_FFTW
00238   if ( CmiMyRank() == 0 ) {
00239     fftw_plan_lock = CmiCreateLock();
00240   }
00241 #endif    
00242 }
00243 
00244 
00245 void OptPmeMgr::recvArrays(CProxy_OptPmeXPencil x, CProxy_OptPmeYPencil y, CProxy_OptPmeZPencil z) {
00246   xPencil = x;  yPencil = y;  zPencil = z;
00247 }
00248 
00249 void OptPmeMgr::initialize(CkQdMsg *msg) {
00250     delete msg;
00251 
00252     _iter = 0;
00253 
00254 #if CHARM_VERSION > 60000
00255     handle = CmiDirect_manytomany_allocate_handle ();
00256 #endif
00257 
00258     SimParameters *simParams = Node::Object()->simParameters;
00259     PatchMap *patchMap = PatchMap::Object();
00260     
00261     initializePmeGrid (simParams, myGrid);    
00262 
00263     if (simParams->langevinPistonOn || simParams->berendsenPressureOn)  
00264       constant_pressure = true;
00265     else
00266       constant_pressure = false;      
00267 
00268     bool useManyToMany = simParams->useManyToMany;
00269     //Many-to-many requires that patches and pmepencils are all on different processors
00270     //int npes = patchMap->numPatches() + 
00271     //         myGrid.xBlocks *  myGrid.yBlocks + 
00272     //         myGrid.zBlocks *  myGrid.xBlocks +
00273     //         myGrid.yBlocks *  myGrid.zBlocks;
00274     
00275     int npes = patchMap->numPatches();
00276     if (npes < myGrid.xBlocks *  myGrid.yBlocks)
00277       npes = myGrid.xBlocks *  myGrid.yBlocks;
00278     if (npes <  myGrid.zBlocks *  myGrid.xBlocks)
00279       npes = myGrid.zBlocks *  myGrid.xBlocks;
00280     if (npes < myGrid.yBlocks *  myGrid.zBlocks)
00281       npes = myGrid.yBlocks *  myGrid.zBlocks;
00282     
00283    if (npes >= CkNumPes()) {
00284       if (CkMyPe() == 0)
00285         printf ("Warning : Not enough processors for the many-to-many optimization \n");      
00286       useManyToMany = false;
00287     }
00288     
00289 #if CHARM_VERSION > 60000
00290     if (useManyToMany)  {
00291       if (CkMyPe() == 0)
00292         printf ("Enabling the Many-to-many optimization\n");
00293       //defaults to max integer
00294       many_to_many_start = MANY_TO_MANY_START;
00295     }
00296 #endif
00297 
00298     if (CkMyRank() == 0) { //create the pencil pme processor map
00299       pencilPMEProcessors = new char [CkNumPes()];
00300       memset (pencilPMEProcessors, 0, sizeof(char) * CkNumPes());
00301     }
00302 
00303     if ( CkMyPe() == 0) {
00304       iout << iINFO << "PME using " << myGrid.xBlocks << " x " <<
00305         myGrid.yBlocks << " x " << myGrid.zBlocks <<
00306         " pencil grid for FFT and reciprocal sum.\n" << endi;
00307       
00308       CProxy_OptPmePencilMapZ   mapz;      
00309       CProxy_OptPmePencilMapY   mapy;
00310       CProxy_OptPmePencilMapX   mapx;
00311       
00312       mapz = CProxy_OptPmePencilMapZ::ckNew(myGrid.xBlocks, myGrid.yBlocks, myGrid.zBlocks);      
00313       mapy = CProxy_OptPmePencilMapY::ckNew(myGrid.xBlocks, myGrid.yBlocks, myGrid.zBlocks);
00314       mapx = CProxy_OptPmePencilMapX::ckNew(myGrid.xBlocks, myGrid.yBlocks, myGrid.zBlocks);
00315       
00316       CkArrayOptions optsz;
00317       optsz.setMap (mapz);
00318       CkArrayOptions optsy;
00319       optsy.setMap (mapy);
00320       CkArrayOptions optsx;
00321       optsx.setMap (mapx);
00322       
00323       zPencil = CProxy_OptPmeZPencil::ckNew(optsz);  
00324       yPencil = CProxy_OptPmeYPencil::ckNew(optsy);  
00325       xPencil = CProxy_OptPmeXPencil::ckNew(optsx);  
00326       
00327       int x,y,z;
00328       for (x = 0; x < myGrid.xBlocks; ++x)
00329         for (y = 0; y < myGrid.yBlocks; ++y ) {
00330           zPencil(x,y,0).insert();
00331         }
00332       zPencil.doneInserting();
00333       
00334       for (z = 0; z < myGrid.zBlocks; ++z )
00335         for (x = 0; x < myGrid.xBlocks; ++x ) {
00336           yPencil(x,0,z).insert();
00337         }
00338       yPencil.doneInserting();
00339       
00340       for (y = 0; y < myGrid.yBlocks; ++y )     
00341         for (z = 0; z < myGrid.zBlocks; ++z ) {
00342           xPencil(0,y,z).insert();
00343         }
00344       xPencil.doneInserting();      
00345       
00346       pmeProxy.recvArrays(xPencil,yPencil,zPencil);
00347       OptPmePencilInitMsgData msgdata;
00348       msgdata.grid = myGrid;
00349       msgdata.xBlocks = myGrid.xBlocks;
00350       msgdata.yBlocks = myGrid.yBlocks;
00351       msgdata.zBlocks = myGrid.zBlocks;
00352       msgdata.xPencil = xPencil;
00353       msgdata.yPencil = yPencil;
00354       msgdata.zPencil = zPencil;
00355       msgdata.constant_pressure = constant_pressure;
00356 
00357       CkCallback cb (CkIndex_OptPmeMgr::recvEvir(NULL), thisProxy[0]);
00358       msgdata.cb_energy = cb;
00359 
00360       msgdata.pmeProxy = pmeProxyDir;
00361       xPencil.init(new OptPmePencilInitMsg(msgdata));
00362       yPencil.init(new OptPmePencilInitMsg(msgdata));
00363       zPencil.init(new OptPmePencilInitMsg(msgdata));
00364      
00365 #if 0 
00366       reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
00367       if (simParams->accelMDOn) {
00368          amd_reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_AMD);
00369       } else {
00370          amd_reduction = NULL;
00371       }
00372 #endif
00373 
00374 #ifndef NAMD_FFTW
00375       NAMD_die("Sorry, FFTW must be compiled in to use PME.");
00376 #endif
00377     }
00378 
00379 }
00380 
00381 void OptPmeMgr::initialize_pencils(CkQdMsg *msg) {
00382   delete msg;
00383 
00384   SimParameters *simParams = Node::Object()->simParameters;
00385 
00386   PatchMap *patchMap = PatchMap::Object();
00387   Lattice lattice = simParams->lattice;
00388   BigReal sysdima = lattice.a_r().unit() * lattice.a();
00389   BigReal sysdimb = lattice.b_r().unit() * lattice.b();
00390   BigReal cutoff = simParams->cutoff;
00391   BigReal patchdim = simParams->patchDimension;
00392   int numPatches = patchMap->numPatches();
00393 
00394   //fprintf(stderr, "Node %d PE %d trying to allocate %d bytes\n", CmiMyNode(), CmiMyPe(), myGrid.xBlocks*myGrid.yBlocks);
00395 
00396   char *pencilActive = new char[myGrid.xBlocks*myGrid.yBlocks];
00397   for ( int i=0; i<myGrid.xBlocks; ++i ) {
00398     for ( int j=0; j<myGrid.yBlocks; ++j ) {
00399       pencilActive[i*myGrid.yBlocks+j] = 0;
00400     }
00401   }
00402 
00403   //Right now we only support one patch per processor
00404   assert (patchMap->numPatchesOnNode(CkMyPe()) <= 1);
00405   for ( int pid=0; pid < numPatches; ++pid ) {
00406     int pnode = patchMap->node(pid);
00407     if ( pnode != CkMyPe() ) continue;
00408 
00409     BigReal minx = patchMap->min_a(pid);
00410     BigReal maxx = patchMap->max_a(pid);
00411     BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
00412     // min1 (max1) is smallest (largest) grid line for this patch
00413     int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
00414     int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
00415 
00416     BigReal miny = patchMap->min_b(pid);
00417     BigReal maxy = patchMap->max_b(pid);
00418     BigReal marginb = 0.5 * ( patchdim - cutoff ) / sysdimb;
00419     // min2 (max2) is smallest (largest) grid line for this patch
00420     int min2 = ((int) floor(myGrid.K2 * (miny - marginb))) - myGrid.order + 1;
00421     int max2 = ((int) floor(myGrid.K2 * (maxy + marginb)));
00422 
00423     for ( int i=min1; i<=max1; ++i ) {
00424       int ix = i;
00425       while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
00426       while ( ix < 0 ) ix += myGrid.K1;
00427       for ( int j=min2; j<=max2; ++j ) {
00428         int jy = j;
00429         while ( jy >= myGrid.K2 ) jy -= myGrid.K2;
00430         while ( jy < 0 ) jy += myGrid.K2;
00431         pencilActive[(ix / myGrid.block1)*myGrid.yBlocks + (jy / myGrid.block2)] = 1;
00432       }
00433     }
00434   }
00435 
00436   numPencilsActive = 0;
00437   for ( int i=0; i<myGrid.xBlocks; ++i ) {
00438     for ( int j=0; j<myGrid.yBlocks; ++j ) {
00439       if ( pencilActive[i*myGrid.yBlocks+j] ) {
00440         ++numPencilsActive;
00441 
00442         zPencil(i,j,0).dummyRecvGrid(CkMyPe(),0);
00443       }
00444     }
00445   }
00446 
00447   ungrid_count = numPencilsActive;
00448   delete [] pencilActive;  
00449 }
00450 
00451 
00452 void OptPmeMgr::activate_pencils(CkQdMsg *msg) {
00453   if ( CkMyPe() == 0 ) zPencil.dummyRecvGrid(CkMyPe(),1);
00454 }
00455 
00456 
00457 OptPmeMgr::~OptPmeMgr() {
00458   delete myKSpace;
00459 }
00460 
00461 void OptPmeMgr::recvUngrid(OptPmeGridMsg *msg) {
00462   if ( ungrid_count == 0 ) {
00463     NAMD_bug("Message order failure in OptPmeMgr::recvUngrid\n");
00464   }
00465     
00466   pmeCompute->copyPencils(msg);
00467   delete msg;
00468   --ungrid_count;
00469 
00470   if ( ungrid_count == 0 ) {
00471     //CkPrintf("recvUngrid on Pe(%d)\n",CkMyPe());
00472     ungridCalc(NULL);
00473   }
00474 }
00475 
00476 void OptPmeMgr::ungridCalc(OptPmeDummyMsg *dmsg) {    
00477   pmeCompute->ungridForces_init();
00478   if ( CmiMyNodeSize() >= SUBCOMPUTE_NPAR ) {
00479     int npar = SUBCOMPUTE_NPAR;
00480     OptPmeSubComputeMsg *smsg = NULL;
00481 
00482     if (!peersAllocated) {
00483       peersAllocated = 1;
00484       int next_rank = CmiMyRank();   
00485       PatchMap *patchMap = PatchMap::Object();          
00486 
00487       for (int i = 1; i < npar; ++i) {      
00488         smsg = new (PRIORITY_SIZE) OptPmeSubComputeMsg;
00489         subcompute_msgs[i] = smsg;
00490         smsg->src_pe  = CkMyPe();
00491         smsg->compute = pmeCompute;
00492 
00493         next_rank ++;
00494         if (next_rank >= CmiMyNodeSize())
00495           next_rank = 0;
00496         int n = 0;
00497         int nr = next_rank;
00498         while(n < CmiMyNodeSize() &&
00499               patchMap->numPatchesOnNode(CmiNodeFirst(CmiMyNode())+nr) > 0)
00500         {
00501           nr ++;
00502           if (nr >= CmiMyNodeSize())
00503             nr = 0;
00504           n++;
00505         }
00506         if (n < CmiMyNodeSize()) 
00507           next_rank = nr;  //we are successful, so save this rank
00508         
00509         smsg->dest = next_rank;
00510       }
00511 
00512       //Local subcompute msg
00513       smsg = new (PRIORITY_SIZE) OptPmeSubComputeMsg;
00514       subcompute_msgs[0] = smsg;
00515       smsg->src_pe  = CkMyPe();
00516       smsg->compute = pmeCompute;
00517       smsg->dest    = CmiMyRank();
00518     }
00519 
00520     int start  = 0;
00521     int nlocal = pmeCompute->getNumLocalAtoms();
00522     //CmiAssert (npar <= nlocal);
00523     if (nlocal < npar)
00524       npar = nlocal;
00525     if (npar == 0)
00526       npar = 1;
00527     int n_per_iter = nlocal / npar;
00528     //We dont handle the case where there are very few atoms
00529     subcompute_count = npar;
00530 
00531     for (int i = 0; i < npar; ++i) {      
00532       smsg = subcompute_msgs[i];
00533       smsg->start   = start;
00534       smsg->end     = start + n_per_iter;
00535       start += n_per_iter;
00536       if (i == npar - 1)
00537         smsg->end = nlocal;
00538       pmeProxy[CmiNodeFirst(CmiMyNode())+smsg->dest].ungridCalc_subcompute(smsg);
00539     }    
00540   }
00541   else {
00542     pmeCompute->ungridForces_compute(0, 0);
00543     pmeCompute->ungridForces_finalize();
00544     ungrid_count = numPencilsActive; 
00545   }
00546 }
00547 
00548 void OptPmeMgr::ungridCalc_subcompute(OptPmeSubComputeMsg *msg){ 
00549   OptPmeCompute *compute = (OptPmeCompute *) msg->compute;
00550   compute->ungridForces_compute(msg->start, msg->end);
00551   pmeProxy[msg->src_pe].ungridCalc_subcompute_done(msg);
00552 }
00553 
00554 void OptPmeMgr::ungridCalc_subcompute_done(OptPmeSubComputeMsg *msg){  
00555   subcompute_count --;
00556   //delete msg; //message pointers saved
00557   if (subcompute_count == 0) {
00558     pmeCompute->ungridForces_finalize();
00559     ungrid_count = numPencilsActive; 
00560   }
00561 }
00562 
00563 void OptPmeMgr::doWorkOnPeer(OptPmeSubComputeMsg *msg) {
00564   OptPmeCompute *compute = (OptPmeCompute *) msg->compute;
00565   compute->doWorkOnPeer();
00566   //  delete msg; //saved in compute
00567 }
00568 
00569 void OptPmeMgr::recvEvir (CkReductionMsg *msg) {
00570 
00571   assert (CkMyPe() == 0);
00572 
00573   double *data = (double *) msg->getData();
00574   assert (msg->getSize() == 7 * sizeof(double));
00575 
00576   //printf ("[%d]: Received Evir\n", CkMyPe());
00577 
00578   double scale = 1.;
00579   reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += data[0] * scale;
00580   reduction->item(REDUCTION_VIRIAL_SLOW_XX) += data[1] * scale;
00581   reduction->item(REDUCTION_VIRIAL_SLOW_XY) += data[2] * scale;
00582   reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += data[3] * scale;
00583   reduction->item(REDUCTION_VIRIAL_SLOW_YX) += data[2] * scale;
00584   reduction->item(REDUCTION_VIRIAL_SLOW_YY) += data[4] * scale;
00585   reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += data[5] * scale;
00586   reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += data[3] * scale;
00587   reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += data[5] * scale;
00588   reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += data[6] * scale;   
00589 
00590   if (amd_reduction) {
00591     amd_reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += data[0] * scale;
00592     amd_reduction->item(REDUCTION_VIRIAL_SLOW_XX) += data[1] * scale;
00593     amd_reduction->item(REDUCTION_VIRIAL_SLOW_XY) += data[2] * scale;
00594     amd_reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += data[3] * scale;
00595     amd_reduction->item(REDUCTION_VIRIAL_SLOW_YX) += data[2] * scale;
00596     amd_reduction->item(REDUCTION_VIRIAL_SLOW_YY) += data[4] * scale;
00597     amd_reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += data[5] * scale;
00598     amd_reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += data[3] * scale;
00599     amd_reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += data[5] * scale;
00600     amd_reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += data[6] * scale;
00601   }
00602 
00603   delete msg;
00604 
00605   SimParameters *simParams = Node::Object()->simParameters;
00606   int fef = simParams->fullElectFrequency;
00607   for (int i = 0; i < fef; i++) {
00608     reduction->submit();
00609     if (amd_reduction) amd_reduction->submit();
00610   }
00611 }
00612 
00613 OptPmeCompute::OptPmeCompute(ComputeID c) :
00614   ComputeHomePatches(c)
00615 {
00616   DebugM(4,"OptPmeCompute created.\n");
00617 
00618   CProxy_OptPmeMgr::ckLocalBranch(
00619         CkpvAccess(BOCclass_group).computePmeMgr)->setCompute(this);
00620 
00621   _initialized = false;
00622 
00623   useAvgPositions = 1;
00624   localResults = NULL;
00625 
00626   reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
00627   SimParameters *simParams = Node::Object()->simParameters;
00628   if (simParams->accelMDOn) {
00629     amd_reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_AMD);
00630   } else {
00631     amd_reduction = NULL;
00632   }
00633 }
00634 
00635 void recv_ungrid_done (void *m) {
00636   OptPmeDummyMsg *msg =  (OptPmeDummyMsg *) m;  
00637   CProxy_OptPmeMgr pmeProxy (CkpvAccess(BOCclass_group).computePmeMgr);
00638   pmeProxy[msg->to_pe].ungridCalc (msg);
00639 }  
00640 
00641 
00642 void OptPmeCompute::resetPatchCoordinates (const Lattice &lattice) {
00643   PatchMap *patchMap = PatchMap::Object();    
00644   SimParameters *simParams = Node::Object()->simParameters;
00645 
00646   BigReal sysdima = lattice.a_r().unit() * lattice.a();
00647   BigReal sysdimb = lattice.b_r().unit() * lattice.b();
00648   BigReal sysdimc = lattice.c_r().unit() * lattice.c();
00649   BigReal cutoff = simParams->cutoff;
00650   BigReal patchdim = simParams->patchDimension;
00651 
00652   int pid = patchList[0].patchID; 
00653   assert (patchList.size() == 1);
00654 
00655   //printf ("Patch[%d]: zstart = %d, zlen = %d, maxz %f, minz %f, marginec %f\n", pid, zstart, zlen, maxz, minz, marginc);
00656 
00657   BigReal minx = patchMap->min_a(pid);
00658   BigReal maxx = patchMap->max_a(pid);
00659   BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
00660   // min1 (max1) is smallest (largest) grid line for this patch
00661   int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
00662   int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
00663 
00664   BigReal miny = patchMap->min_b(pid);
00665   BigReal maxy = patchMap->max_b(pid);
00666   BigReal marginb = 0.5 * ( patchdim - cutoff ) / sysdimb;
00667   // min2 (max2) is smallest (largest) grid line for this patch
00668   int min2 = ((int) floor(myGrid.K2 * (miny - marginb))) - myGrid.order + 1;
00669   int max2 = ((int) floor(myGrid.K2 * (maxy + marginb)));
00670 
00671   BigReal minz = patchMap->min_c(pid);
00672   BigReal maxz = patchMap->max_c(pid);
00673   BigReal marginc = 0.5 * ( patchdim - cutoff ) / sysdimc;
00674   // min3 (max3) is smallest (largest) grid line for this patch
00675   int min3 = ((int) floor(myGrid.K3 * (minz - marginc))) - myGrid.order + 1;
00676   int max3 = ((int) floor(myGrid.K3 * (maxz + marginc)));
00677 
00678   xlen = max1 - min1 + 1;
00679   xstart = min1;  
00680   
00681   ylen = max2 - min2 + 1;
00682   ystart = min2;  
00683   
00684   zlen = max3 - min3 + 1;
00685   zstart = min3;  
00686 }
00687 
00688 void OptPmeCompute::initializeOptPmeCompute () {
00689   
00690   _initialized = true;
00691   
00692   strayChargeErrors = 0;
00693 
00694   SimParameters *simParams = Node::Object()->simParameters;
00695   PatchMap *patchMap = PatchMap::Object();
00696 
00697   initializePmeGrid (simParams, myGrid);
00698 
00699   alchFepOn    = simParams->alchFepOn;
00700   alchThermIntOn = simParams->alchThermIntOn;
00701   lesOn    = simParams->lesOn;
00702   pairOn   = simParams->pairInteractionOn;
00703 
00704   assert (!alchFepOn);
00705   assert (!alchThermIntOn);
00706   assert (!lesOn);
00707   assert (!pairOn);
00708   
00709   qsize = myGrid.K1 * myGrid.dim2 * myGrid.dim3;
00710   fsize = myGrid.K1 * myGrid.dim2;
00711   q_arr = new double*[fsize];
00712   memset( (void*) q_arr, 0, fsize * sizeof(double*) );
00713 
00714   assert (myMgr != NULL);
00715 
00716   const Lattice lattice = simParams->lattice;
00717   resetPatchCoordinates (lattice);
00718   
00719   PencilElement *pencilActive = new PencilElement [myGrid.xBlocks * myGrid.yBlocks];
00720   memset (pencilActive, 0, myGrid.xBlocks * myGrid.yBlocks * sizeof(PencilElement));
00721 
00722   for ( int i=xstart; i<=xstart+xlen-1; ++i ) {
00723     int ix = i;
00724     while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
00725     while ( ix < 0 ) ix += myGrid.K1;
00726     for ( int j=ystart; j<=ystart+ylen-1; ++j ) {
00727       int jy = j;
00728       while ( jy >= myGrid.K2 ) jy -= myGrid.K2;
00729       while ( jy < 0 ) jy += myGrid.K2;
00730       
00731       int pencil_idx = (ix / myGrid.block1)*myGrid.yBlocks + (jy / myGrid.block2);
00732       //If not initialized yet, initialize this pencil
00733       if (! pencilActive[pencil_idx].isActive) {
00734         pencilActive[pencil_idx].isActive = 1;
00735         pencilActive[pencil_idx].xmin = ix;
00736         pencilActive[pencil_idx].xmax = ix;
00737         pencilActive[pencil_idx].ymin = jy;
00738         pencilActive[pencil_idx].ymax = jy;
00739         pencilActive[pencil_idx].ib = ix / myGrid.block1;
00740         pencilActive[pencil_idx].jb = jy / myGrid.block2;
00741       }
00742       else { //pencil has been initialized, update the min and max
00743         if (pencilActive[pencil_idx].xmin > ix)
00744           pencilActive[pencil_idx].xmin = ix;
00745         if (pencilActive[pencil_idx].xmax < ix)
00746           pencilActive[pencil_idx].xmax = ix;   
00747         if (pencilActive[pencil_idx].ymin > jy)
00748           pencilActive[pencil_idx].ymin = jy;
00749         if (pencilActive[pencil_idx].ymax < jy)
00750           pencilActive[pencil_idx].ymax = jy;
00751       }
00752     }
00753   }
00754   
00755   int nactive = 0; 
00756   for (int ib=0; ib<myGrid.xBlocks; ++ib) 
00757     for (int jb=0; jb<myGrid.yBlocks; ++jb) 
00758       if (pencilActive[ib * myGrid.yBlocks + jb].isActive) 
00759         nactive ++;
00760 
00761   assert (nactive == myMgr->numPencilsActive);  
00762   
00763   nzlines = xlen * ylen;
00764   zline_storage = new double [nzlines * zlen];
00765   sp_zstorage = new float [nzlines * zlen];  
00766 
00767   //printf ("%d: Allocate %d bytes of storage for %d PME Pencils\n", CkMyPe(), 
00768   //      nzlines * zlen * (int)sizeof(double), nactive);
00769   
00770   assert (zline_storage != NULL);
00771   double * zblock = zline_storage;
00772   
00773   for (int ib=0; ib<myGrid.xBlocks; ++ib) {
00774     for (int jb=0; jb<myGrid.yBlocks; ++jb) {
00775       int index = ib * myGrid.yBlocks + jb;
00776       if (pencilActive[index].isActive) {
00777         pencilActive[index].data = zblock;
00778         for (int i = pencilActive[index].xmin; i <= pencilActive[index].xmax; i++)
00779           for (int j = pencilActive[index].ymin; j <= pencilActive[index].ymax; j++) {      
00780             q_arr[i*myGrid.dim2 + j] = zblock;
00781             zblock += zlen;
00782             assert ((char *) zblock <= (char *)(zline_storage + nzlines*zlen));
00783           }
00784       }
00785     }
00786   }
00787   
00788   pencilVec.resize (nactive);
00789   nactive = 0;
00790   for (int ib=0; ib<myGrid.xBlocks; ++ib) 
00791     for (int jb=0; jb<myGrid.yBlocks; ++jb) 
00792       if (pencilActive[ib*myGrid.yBlocks + jb].isActive) 
00793         pencilVec[nactive ++] = pencilActive [ib * myGrid.yBlocks + jb];
00794 
00795   //We dont need the sparse array anymore
00796   delete [] pencilActive;
00797   
00798 #if CHARM_VERSION > 60000
00799   /******************************* Initialize Many to Many ***********************************/
00800   OptPmeDummyMsg *m = new (PRIORITY_SIZE) OptPmeDummyMsg;
00801   m->to_pe = CkMyPe();
00802   CmiDirect_manytomany_initialize_recvbase (myMgr->handle, PHASE_UG, 
00803                                             recv_ungrid_done, m, (char *)sp_zstorage, 
00804                                             myGrid.xBlocks*myGrid.yBlocks, -1); 
00805   
00806   CkCallback cbi (CkCallback::ignore);
00807   CmiDirect_manytomany_initialize_sendbase (myMgr->handle, PHASE_GR, NULL, NULL, 
00808                                             (char *)sp_zstorage, 
00809                                             pencilVec.size(), patchList[0].patchID);
00810 
00811   for (int idx = 0; idx < pencilVec.size(); idx++) {
00812     int ib =  pencilVec[idx].ib;
00813     int jb =  pencilVec[idx].jb;
00814     double * data = pencilVec[idx].data;
00815     int offset = (data - zline_storage)*sizeof(float);
00816     int xlen   = pencilVec[idx].xmax - pencilVec[idx].xmin + 1;
00817     int ylen   = pencilVec[idx].ymax - pencilVec[idx].ymin + 1;
00818     int fcount = xlen * ylen * zlen;
00819 
00820     CkArrayIndex3D index (ib, jb, 0);
00821     CProxy_OptPmePencilMapZ zproxy (global_map_z);
00822     int pe = zproxy.ckLocalBranch()->procNum(0, index);
00823     CmiDirect_manytomany_initialize_send (myMgr->handle, PHASE_GR, idx, 
00824                                            offset, fcount*sizeof(float), pe);
00825     
00826     int srcnode = ib * myGrid.yBlocks + jb;
00827     CmiDirect_manytomany_initialize_recv (myMgr->handle, PHASE_UG, 
00828                                           srcnode, offset, fcount*sizeof(float), pe);
00829   }
00830   /********************************** End initialize many to many ****************************/
00831 #endif
00832 
00833 }
00834 
00835 OptPmeCompute::~OptPmeCompute()
00836 {
00837   delete [] zline_storage;
00838   delete [] sp_zstorage;
00839   delete [] q_arr;
00840 }
00841 
00842 
00843 void OptPmeCompute::doWork()
00844 {
00845   DebugM(4,"Entering OptPmeCompute::doWork().\n");
00846 
00847 #ifdef TRACE_COMPUTE_OBJECTS
00848   double traceObjStartTime = CmiWallTimer();
00849 #endif
00850 
00851   if (!_initialized) initializeOptPmeCompute();
00852 
00853   ResizeArrayIter<PatchElem> ap(patchList);
00854 
00855   // Skip computations if nothing to do.
00856   if ( ! patchList[0].p->flags.doFullElectrostatics )
00857   {
00858     for (ap = ap.begin(); ap != ap.end(); ap++) {
00859       CompAtom *x = (*ap).positionBox->open();
00860       Results *r = (*ap).forceBox->open();
00861       (*ap).positionBox->close(&x);
00862       (*ap).forceBox->close(&r);
00863     }
00864     reduction->submit();
00865     if (amd_reduction) amd_reduction->submit();
00866     return;
00867   }
00868 
00869   myMgr->_iter ++;  //this is a pme step
00870 
00871   // allocate storage
00872   numLocalAtoms = 0;
00873   for (ap = ap.begin(); ap != ap.end(); ap++) {
00874     numLocalAtoms += (*ap).p->getNumAtoms();
00875   }
00876 
00877   Lattice &lattice = patchList[0].p->flags.lattice;
00878 
00879   localData = new PmeParticle[numLocalAtoms];
00880   // get positions and charges
00881   PmeParticle * data_ptr = localData;
00882 
00883   int natoms = 0;
00884   //  if (myMgr->constant_pressure)
00885   //resetPatchCoordinates(lattice);  //Update patch coordinates with new lattice
00886 
00887   for (ap = ap.begin(); ap != ap.end(); ap++) {
00888 #ifdef NETWORK_PROGRESS
00889     CmiNetworkProgress();
00890 #endif
00891     
00892     CompAtom *x = (*ap).positionBox->open();
00893     if ( patchList[0].p->flags.doMolly ) {
00894       (*ap).positionBox->close(&x);
00895       x = (*ap).avgPositionBox->open();
00896     }
00897     int numAtoms = (*ap).p->getNumAtoms();        
00898     int order_1  = myGrid.order - 1;
00899     scale_n_copy_coordinates(x, localData, numAtoms, 
00900                              lattice, myGrid,
00901                              xstart + order_1, xlen - order_1,
00902                              ystart + order_1, ylen - order_1,
00903                              zstart + order_1, zlen - order_1,
00904                              strayChargeErrors);
00905     natoms += numAtoms;
00906     
00907     if ( patchList[0].p->flags.doMolly ) { (*ap).avgPositionBox->close(&x); }
00908     else { (*ap).positionBox->close(&x); }
00909   }
00910 
00911   numLocalAtoms = natoms;  //Exclude all atoms out of range
00912 
00913   // calculate self energy
00914   BigReal ewaldcof = ComputeNonbondedUtil::ewaldcof;
00915   evir = 0;
00916   BigReal selfEnergy = 0;
00917   data_ptr = localData;
00918   int i;
00919   for(i=0; i<numLocalAtoms; ++i)
00920   {
00921     selfEnergy += data_ptr->cg * data_ptr->cg;
00922     ++data_ptr;
00923   }
00924   selfEnergy *= -1. * ewaldcof / SQRT_PI;
00925   evir[0] += selfEnergy;
00926 
00927 #if 0
00928   if (myMgr->_iter > many_to_many_start) {
00929     OptPmeSubComputeMsg *smsg = myMgr->subcompute_msgs[1]; //not self
00930     CProxy_OptPmeMgr pmeProxy (CkpvAccess(BOCclass_group).computePmeMgr);
00931     pmeProxy[CmiNodeFirst(CmiMyNode())+smsg->dest].doWorkOnPeer(smsg);
00932   }
00933   else
00934 #endif
00935     doWorkOnPeer();
00936 
00937 #ifdef TRACE_COMPUTE_OBJECTS
00938   traceUserBracketEvent(TRACE_COMPOBJ_IDOFFSET+this->cid, traceObjStartTime, CmiWallTimer());
00939 #endif
00940 
00941 }
00942 
00943 void OptPmeCompute::doWorkOnPeer()
00944 {
00945   Lattice &lattice = patchList[0].p->flags.lattice;
00946   double **q = q_arr;
00947   memset( (void*) zline_storage, 0, zlen * nzlines * sizeof(double) );
00948 
00949   myRealSpace = new OptPmeRealSpace(myGrid,numLocalAtoms);
00950   if (!strayChargeErrors)
00951     myRealSpace->fill_charges(q, localData, zstart, zlen);
00952 
00953   if (myMgr->constant_pressure && patchList[0].patchID == 0)
00954     myMgr->xPencil.recvLattice (lattice);
00955   
00956   if (myMgr->_iter <= many_to_many_start)
00957     sendPencils();
00958 #if CHARM_VERSION > 60000
00959   else {
00960     pme_d2f (sp_zstorage, zline_storage, nzlines * zlen);
00961     CmiDirect_manytomany_start (myMgr->handle, PHASE_GR);
00962   }
00963 #endif
00964 }
00965 
00966 
00967 
00968 void OptPmeCompute::sendPencils() {  
00969 
00970   //iout << iPE << " Sending charge grid for " << numLocalAtoms << " atoms to FFT with " << myMgr->numPencilsActive << " messages" <<".\n" << endi;
00971 
00972   int xBlocks = myGrid.xBlocks;
00973   int yBlocks = myGrid.yBlocks;
00974   int zBlocks = myGrid.zBlocks;
00975 
00976   int K1 = myGrid.K1;
00977   int K2 = myGrid.K2;
00978   int dim2 = myGrid.dim2;
00979   int dim3 = myGrid.dim3;
00980   int block1 = myGrid.block1;
00981   int block2 = myGrid.block2;
00982 
00983   //Lattice lattice = patchList[0].p->flags.lattice;
00984 
00985   int nactive = 0;  
00986   for (int idx = 0; idx < pencilVec.size(); idx++) {
00987     int xstart = pencilVec[idx].xmin;
00988     int ystart = pencilVec[idx].ymin;
00989     int xlen   = pencilVec[idx].xmax - pencilVec[idx].xmin + 1;
00990     int ylen   = pencilVec[idx].ymax - pencilVec[idx].ymin + 1;
00991     int ib     = pencilVec[idx].ib;
00992     int jb     = pencilVec[idx].jb;
00993     double *data = pencilVec[idx].data;
00994     
00995     int fcount = xlen * ylen * zlen;
00996     OptPmeGridMsg *msg = new (fcount, PRIORITY_SIZE) OptPmeGridMsg;
00997     msg->zstart = zstart;
00998     msg->zlen   = zlen;
00999     msg->xstart = xstart;
01000     msg->xlen   = xlen;
01001     msg->ystart = ystart;
01002     msg->ylen   = ylen;
01003     msg->sourceNode = CkMyPe();
01004     msg->patchID    = patchList[0].patchID;
01005     
01006     float *qmsg = msg->qgrid;   
01007 #pragma disjoint (*data, *qmsg)
01008 #pragma unroll(8)
01009     for ( int k=0; k< fcount; ++k ) 
01010       *(qmsg++) = data[k];    
01011     
01012     myMgr->zPencil(ib,jb,0).recvGrid(msg);
01013   }
01014 }
01015 
01016 
01017 void OptPmeCompute::copyPencils(OptPmeGridMsg *msg) {
01018 
01019   if (!_initialized) initializeOptPmeCompute();
01020 
01021   int ibegin = msg->xstart;
01022   int iend   = msg->xstart + msg->xlen;
01023   int jbegin = msg->ystart;
01024   int jend   = msg->ylen;
01025   int fcount = zlen * msg->xlen * msg->ylen;
01026 
01027   float *qmsg = msg->qgrid;
01028   double *data = q_arr[ibegin * myGrid.dim2 + jbegin];  
01029 
01030 #pragma disjoint (*qmsg, *data)
01031 #pragma unroll(8)
01032   for ( int k=0; k<fcount; ++k ) 
01033     data[k] = *(qmsg++);
01034 }
01035 
01036 void OptPmeCompute::ungridForces_init() {
01037 
01038     //printf ("%d: In OptPMECompute::ungridforces_init\n", CkMyPe());
01039 
01040     if (myMgr->_iter > many_to_many_start)
01041       pme_f2d (zline_storage, sp_zstorage, nzlines * zlen);
01042 
01043     localResults = new Vector[numLocalAtoms];
01044     //memset (localResults, 0, sizeof (Vector) * numLocalAtoms);
01045 }
01046 
01047 void OptPmeCompute::ungridForces_compute(int    istart,
01048                                          int    iend) 
01049 {
01050     Vector *gridResults;
01051     gridResults = localResults;
01052 
01053     if (iend == 0)
01054       iend = numLocalAtoms;
01055     
01056     SimParameters *simParams = Node::Object()->simParameters;
01057     Vector pairForce = 0.;
01058     Lattice &lattice = patchList[0].p->flags.lattice;
01059     if(!simParams->commOnly) {
01060 #ifdef NETWORK_PROGRESS
01061       CmiNetworkProgress();
01062 #endif      
01063       if (!strayChargeErrors) {
01064         myRealSpace->compute_forces(q_arr, localData, gridResults, 
01065                                     zstart, zlen, istart, iend);
01066         scale_forces(gridResults + istart, iend - istart, lattice);
01067       }
01068     }
01069 }
01070 
01071 void OptPmeCompute::ungridForces_finalize() {
01072     SimParameters *simParams = Node::Object()->simParameters;
01073     delete myRealSpace;
01074     
01075     delete [] localData;
01076     //    delete [] localPartition;
01077     
01078     Vector *results_ptr = localResults;
01079     ResizeArrayIter<PatchElem> ap(patchList);
01080     
01081     // add in forces
01082     for (ap = ap.begin(); ap != ap.end(); ap++) {
01083       Results *r = (*ap).forceBox->open();
01084       Force *f = r->f[Results::slow];
01085       int numAtoms = (*ap).p->getNumAtoms();
01086       
01087       if ( ! strayChargeErrors && ! simParams->commOnly ) {
01088         for(int i=0; i<numAtoms; ++i) {
01089           f[i].x += results_ptr->x;
01090           f[i].y += results_ptr->y;
01091           f[i].z += results_ptr->z;
01092           ++results_ptr;
01093         }
01094       }
01095   
01096       (*ap).forceBox->close(&r);
01097     }
01098 
01099     delete [] localResults;
01100    
01101     double scale = 1.;
01102 
01103     reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += evir[0] * scale;
01104     reduction->item(REDUCTION_STRAY_CHARGE_ERRORS) += strayChargeErrors;
01105     strayChargeErrors = 0;
01106     reduction->submit();
01107     if (amd_reduction) {
01108       amd_reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += evir[0] * scale;
01109       amd_reduction->submit();
01110     }
01111 }
01112 
01113 
01114 
01115 #include "fftlib.C"
01116 #include "OptPmeMgr.def.h"
01117 
01118 void pme_f2d (double *dst, float *src, int N) {
01119 #pragma disjoint (*src, *dst)  
01120 #pragma unroll (8)
01121   for (int i = 0; i < N; i++) 
01122     dst[i] = src[i];
01123 }
01124 
01125 void pme_d2f (float *dst, double *src, int N) {
01126 #pragma disjoint (*src, *dst)  
01127 #pragma unroll (8)
01128   for (int i = 0; i < N; i++) 
01129     dst[i] = src[i];
01130 }

Generated on Sun May 19 04:07:47 2013 for NAMD by  doxygen 1.3.9.1