ComputeMsmMsa.C

Go to the documentation of this file.
00001 
00007 #ifdef CHARM_HAS_MSA
00008 
00009 #include "InfoStream.h"
00010 #include "Node.h"
00011 #include "PDB.h"
00012 #include "PatchMap.h"
00013 #include "PatchMap.inl"
00014 #include "AtomMap.h"
00015 #include "ComputeMsmMsa.h"
00016 #include "ComputeMsmMsaMgr.decl.h"
00017 #include "PatchMgr.h"
00018 #include "Molecule.h"
00019 #include "ReductionMgr.h"
00020 #include "ComputeMgr.h"
00021 #include "ComputeMgr.decl.h"
00022 // #define DEBUGM
00023 #define MIN_DEBUG_LEVEL 3
00024 #include "Debug.h"
00025 #include "SimParameters.h"
00026 #include "WorkDistrib.h"
00027 #include "varsizemsg.h"
00028 #include <stdlib.h>
00029 #include <stdio.h>
00030 #include <errno.h>
00031 #include "pup_stl.h"
00032 #include "MsmMacros.h"
00033 
00034 
00035 void MsmMsaData::pup(PUP::er &p)
00036 {
00037   p|ispx, p|ispy, p|ispz;
00038   p|hx_1, p|hy_1, p|hz_1;
00039   p|a;
00040   p|origin_x, p|origin_y, p|origin_z;
00041   p|nlevels, p|maxlevels, p|toplevel;
00042   p|approx, p|split;
00043   p|qh, p|eh;
00044   p|grid_len;
00045   p|grid_idstart;
00046   p|scaling;
00047   p|gc_len;
00048   p|gc_idstart;
00049   p|gc;
00050   p|gctop_len;
00051   p|gctop_idstart;
00052   p|gctop;
00053   p|num_clients_qh, p|num_clients_eh;
00054   p|num_anterpolation_chares;
00055   p|num_interpolation_chares;
00056   p|num_restriction_chares;
00057   p|num_prolongation_chares;
00058   p|num_gridcutoff_chares;
00059   p|dim_gridcutoff_chares;
00060   p|dim_gridtransfer_chares;
00061   p|num_total_restriction_chares;
00062   p|num_total_prolongation_chares;
00063   p|num_total_gridcutoff_chares;
00064   p|num_energy_chares;
00065   p|num_points_per_chare;
00066   p|self_energy_const;
00067 }
00068 
00069 void MsmMsaData::print()
00070 {
00071 #if 0
00072   printf("MSM data:\n");
00073   printf("ispx=%d ispy=%d ispz=%d\n", ispx, ispy, ispz);
00074   printf("hx=%g  hy=%g  hz=%g\n", 1/hx_1, 1/hy_1, 1/hz_1);
00075   printf("a=%g\n", a);
00076   printf("origin = %g %g %g\n", origin_x, origin_y, origin_z);
00077   printf("nlevels=%d maxlevels=%d toplevel=%d\n", nlevels, maxlevels, toplevel);
00078   printf("approx=%d split=%d\n", approx, split);
00079 #endif
00080 }
00081 
00082 struct ComputeMsmMsaAtom {
00083   Position position;
00084   float msmcharge;    // scaled for MSM
00085   int id;
00086 };
00087 
00088 class MsmMsaCoordMsg : public CMessage_MsmMsaCoordMsg {
00089 public:
00090   int numAtoms;
00091   Lattice lattice;
00092   ComputeMsmMsaAtom *coord;
00093 };
00094 
00095 class ComputeMsmMsaMgr : public CBase_ComputeMsmMsaMgr {
00096 public:
00097   ComputeMsmMsaMgr();                    // entry
00098   ~ComputeMsmMsaMgr();
00099 
00100   void initialize(CkQdMsg *);         // entry with message
00101 
00102   void setCompute(ComputeMsmMsa *c) { msmCompute = c; }  // local
00103 
00104   void recvMsmMsaData(const MsmMsaData &);  // entry with parameter marshalling
00105 
00106   void initWorkers(CkQdMsg *);        // entry with message
00107   void startWorkers(CkQdMsg *);       // entry with message
00108 
00109   void anterpolate(MsmMsaCoordMsg *);    // entry with coordinates message
00110   void interpolate(CkQdMsg *);        // entry with message
00111 
00112   const MsmMsaData &getMsmMsaData() const { return msmData; }  // local
00113 
00114 private:
00115   CProxy_ComputeMsmMsaMgr msmProxy;
00116   ComputeMsmMsa *msmCompute;
00117 
00118   MsmMsaCoordMsg *coordMsg;
00119   int numAtoms;
00120   MsmMsaForce *force;
00121   int numForces;
00122 
00123   MsmMsaData msmData;
00124 
00125   CProxy_MsmMsaLevel msmLevelProxy;   // 1D chare array (number of grid levels)
00126   CProxy_MsmMsaEnergy msmEnergyProxy; // 1D chare array
00127 
00128   MsmMsaGrid::Accum qhacc;  // accumulate charge grid for anterpolation
00129   MsmMsaGrid::Read qhread;  // read charge grid after anterpolation
00130   MsmMsaGrid::Accum ehacc;  // accumulate potential grid before interpolation
00131   MsmMsaGrid::Read ehread;  // read potential grid for interpolation
00132   int msa_setup_anterpolation;   // have MSAs been initially set up?
00133   int msa_setup_interpolation;   // have MSAs been initially set up?
00134 };
00135 
00136 class MsmMsaLevel : public CBase_MsmMsaLevel {
00137 public:
00138   MsmMsaLevel(MsmMsaGrid &qh, MsmMsaGrid &eh, MsmMsaGrid &q2h, MsmMsaGrid &e2h);
00139   MsmMsaLevel(MsmMsaGrid &qh, MsmMsaGrid &eh);
00140   MsmMsaLevel(CkMigrateMessage *m) { }
00141   void compute();
00142 private:
00143   int lastlevel;
00144   CProxy_MsmMsaGridCutoff gridcutoff;
00145   CProxy_MsmMsaRestriction restriction;
00146   CProxy_MsmMsaProlongation prolongation;
00147 };
00148 
00149 class MsmMsaGridCutoff : public CBase_MsmMsaGridCutoff {
00150 public:
00151   MsmMsaGridCutoff(int level, MsmMsaGrid &qh, MsmMsaGrid &eh);
00152   MsmMsaGridCutoff(CkMigrateMessage *m) { }
00153   void compute();
00154 private:
00155   MsmMsaGrid qh, eh;    // MSA handles to this level charge and potential grids
00156   MsmMsaGrid::Accum qhacc, ehacc;
00157   MsmMsaGrid::Read qhread, ehread;
00158   int msa_setup;
00159   int mylevel;       // which level am I on?
00160   int mia, mja, mka; // my lowest grid point index of my sub-cube
00161   int mib, mjb, mkb; // my highest grid point index of my sub-cube
00162 };
00163 
00164 class MsmMsaRestriction : public CBase_MsmMsaRestriction {
00165 public:
00166   MsmMsaRestriction(int level, MsmMsaGrid &qh, MsmMsaGrid &q2h);
00167   MsmMsaRestriction(CkMigrateMessage *m) { }
00168   void compute();
00169 private:
00170   MsmMsaGrid qh, q2h;   // MSA handles to charge grids mylevel, (mylevel+1)
00171   MsmMsaGrid::Accum qhacc, q2hacc;
00172   MsmMsaGrid::Read qhread, q2hread;
00173   int msa_setup;
00174   int mylevel;       // which level am I on?
00175   int mia, mja, mka; // my lowest grid point index of (mylevel+1) sub-cube
00176   int mib, mjb, mkb; // my highest grid point index of (mylevel+1) sub-cube
00177 };
00178 
00179 class MsmMsaProlongation : public CBase_MsmMsaProlongation {
00180 public:
00181   MsmMsaProlongation(int level, MsmMsaGrid &eh, MsmMsaGrid &e2h);
00182   MsmMsaProlongation(CkMigrateMessage *m) { }
00183   void compute();
00184 private:
00185   MsmMsaGrid eh, e2h;   // MSA handles to potential grids mylevel, (mylevel+1)
00186   MsmMsaGrid::Accum ehacc, e2hacc;
00187   MsmMsaGrid::Read ehread, e2hread;
00188   int msa_setup;
00189   int mylevel;       // which level am I on?
00190   int mia, mja, mka; // my lowest grid point index of (mylevel+1) sub-cube
00191   int mib, mjb, mkb; // my highest grid point index of (mylevel+1) sub-cube
00192 };
00193 
00194 class MsmMsaEnergy : public CBase_MsmMsaEnergy {
00195 public:
00196   MsmMsaEnergy(MsmMsaGrid &qh, MsmMsaGrid &eh);
00197   MsmMsaEnergy(CkMigrateMessage *m) { }
00198   ~MsmMsaEnergy();
00199   void compute();
00200 private:
00201   MsmMsaGrid qh, eh;
00202   MsmMsaGrid::Accum qhacc, ehacc;
00203   MsmMsaGrid::Read qhread, ehread;
00204   float *qhbuffer;
00205   int msa_setup;
00206   int mli, mlj, mlk; // my 3D index within level 0
00207   int mia, mja, mka; // my lowest grid point index of my sub-cube
00208   int mib, mjb, mkb; // my highest grid point index of my sub-cube
00209   SubmitReduction *reduction;
00210 };
00211 
00212 
00213 ComputeMsmMsa::ComputeMsmMsa(ComputeID c) :
00214   ComputeHomePatches(c)
00215 {
00216   CProxy_ComputeMsmMsaMgr::ckLocalBranch(
00217       CkpvAccess(BOCclass_group).computeMsmMsaMgr)->setCompute(this);
00218   SimParameters *simParams = Node::Object()->simParameters;
00219   qscaling = sqrt(COULOMB / simParams->dielectric);
00220   reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
00221 }
00222 
00223 ComputeMsmMsa::~ComputeMsmMsa()
00224 {
00225 }
00226 
00227 void ComputeMsmMsa::doWork()
00228 {
00229   ResizeArrayIter<PatchElem> ap(patchList);
00230 
00231   // Skip computations if nothing to do.
00232   if ( ! patchList[0].p->flags.doFullElectrostatics ) {
00233     for (ap = ap.begin();  ap != ap.end();  ap++) {
00234       CompAtom *x = (*ap).positionBox->open();
00235       Results *r = (*ap).forceBox->open();
00236       (*ap).positionBox->close(&x);
00237       (*ap).forceBox->close(&r);
00238       reduction->submit();
00239     }
00240     return;
00241   }
00242 
00243   // allocate message
00244   int numLocalAtoms = 0;
00245   for (ap = ap.begin();  ap != ap.end();  ap++) {
00246     numLocalAtoms += (*ap).p->getNumAtoms();
00247   }
00248 
00249   MsmMsaCoordMsg *msg = new (numLocalAtoms, 0) MsmMsaCoordMsg;
00250   msg->numAtoms = numLocalAtoms;
00251   msg->lattice = patchList[0].p->flags.lattice;
00252   ComputeMsmMsaAtom *data_ptr = msg->coord;
00253 
00254   // get positions
00255   for (ap = ap.begin();  ap != ap.end();  ap++) {
00256     CompAtom *x = (*ap).positionBox->open();
00257     CompAtomExt *xExt = (*ap).p->getCompAtomExtInfo();
00258     if ( patchList[0].p->flags.doMolly ) {
00259       (*ap).positionBox->close(&x);
00260       x = (*ap).avgPositionBox->open();
00261     }
00262     int numAtoms = (*ap).p->getNumAtoms();
00263 
00264     for (int i=0;  i < numAtoms;  i++) {
00265       data_ptr->position = x[i].position;
00266       data_ptr->msmcharge = qscaling * x[i].charge;
00267       data_ptr->id = xExt[i].id;
00268       ++data_ptr;
00269     }
00270 
00271     if ( patchList[0].p->flags.doMolly ) { (*ap).avgPositionBox->close(&x); }
00272     else { (*ap).positionBox->close(&x); }
00273   }
00274 
00275   CProxy_ComputeMsmMsaMgr msmProxy(
00276       CkpvAccess(BOCclass_group).computeMsmMsaMgr);
00277   msmProxy[CkMyPe()].anterpolate(msg);
00278   msmProxy[CkMyPe()].interpolate(new CkQdMsg);
00279 }
00280 
00281 
00282 void ComputeMsmMsa::saveResults(int n, const MsmMsaForce force[], double self_energy)
00283 {
00284   ResizeArrayIter<PatchElem> ap(patchList);
00285 
00286   // add in forces
00287   int j = 0;
00288   for (ap = ap.begin();  ap != ap.end();  ap++) {
00289     Results *r = (*ap).forceBox->open();
00290     Force *f = r->f[Results::slow];
00291     int numAtoms = (*ap).p->getNumAtoms();
00292     for (int i=0;  i < numAtoms;  i++, j++) {
00293       f[i].x += force[j].x;
00294       f[i].y += force[j].y;
00295       f[i].z += force[j].z;
00296     }
00297     (*ap).forceBox->close(&r);
00298   }
00299   reduction->item(REDUCTION_ELECT_ENERGY_SLOW) -= self_energy;
00300   reduction->submit();
00301 }
00302 
00303 
00304 ComputeMsmMsaMgr::ComputeMsmMsaMgr() :
00305   msmProxy(thisgroup), msmCompute(0),
00306   coordMsg(0), numAtoms(0), force(0), numForces(0),
00307   msa_setup_anterpolation(0), msa_setup_interpolation(0)
00308 {
00309   CkpvAccess(BOCclass_group).computeMsmMsaMgr = thisgroup;
00310 }
00311 
00312 ComputeMsmMsaMgr::~ComputeMsmMsaMgr()
00313 {
00314   delete [] force;
00315 }
00316 
00317 
00318 static void rescale_nonperiodic_cell(
00319     Vector &u, Vector &v, Vector &w, Vector &c,
00320     Vector &ru, Vector &rv, Vector &rw,
00321     const Vector &smin, const Vector &smax,
00322     double padding, double gridspacing, int isperiodic)
00323 {
00324   const double ZERO_TOLERANCE = 1e-4;
00325   double xlen = 1, inv_xlen = 1;  // don't rescale periodic directions
00326   double ylen = 1, inv_ylen = 1;
00327   double zlen = 1, inv_zlen = 1;
00328   double ulen_1 = u.rlength();
00329   double vlen_1 = v.rlength();
00330   double wlen_1 = w.rlength();
00331   double rpadx = padding * ulen_1;  // padding distance in recip space
00332   double rpady = padding * vlen_1;
00333   double rpadz = padding * wlen_1;
00334   double rhx = gridspacing * ulen_1;  // grid spacing in recip space
00335   double rhy = gridspacing * vlen_1;
00336   double rhz = gridspacing * wlen_1;
00337   double xmin = smin.x, xmax = smax.x;  // scaled coordinate extremes
00338   double ymin = smin.y, ymax = smax.y;
00339   double zmin = smin.z, zmax = smax.z;
00340   Vector rc = 0;  // don't move center along periodic directions
00341   int is_periodic_x = (isperiodic & PERIODIC_VEC1);
00342   int is_periodic_y = (isperiodic & PERIODIC_VEC2);
00343   int is_periodic_z = (isperiodic & PERIODIC_VEC3);
00344 
00345 #if 0
00346   /* affine linear transformation of coordinates to reciprocal space,
00347    * where periodic vector directions map into [-0.5, 0.5) */
00348   //printf("*** center=%.4f %.4f %.4f\n", c.x, c.y, c.z);
00349   d = coord[0].position - c;
00350   s.x = ru * d;  // dot product
00351   s.y = rv * d;
00352   s.z = rw * d;
00353   xmin = xmax = s.x;
00354   ymin = ymax = s.y;
00355   zmin = zmax = s.z;
00356   for (i = 1;  i < numatoms;  i++) {
00357     d = coord[i].position - c;
00358     s.x = ru * d;  // dot product
00359     s.y = rv * d;
00360     s.z = rw * d;
00361     if      (s.x < xmin)  xmin = s.x;
00362     else if (s.x > xmax)  xmax = s.x;
00363     if      (s.y < ymin)  ymin = s.y;
00364     else if (s.y > ymax)  ymax = s.y;
00365     if      (s.z < zmin)  zmin = s.z;
00366     else if (s.z > zmax)  zmax = s.z;
00367   }
00368 #if 0
00369   printf("*** xmin=%.4f  xmax=%.4f\n", xmin, xmax);
00370   printf("*** ymin=%.4f  ymax=%.4f\n", ymin, ymax);
00371   printf("*** zmin=%.4f  zmax=%.4f\n", zmin, zmax);
00372 #endif
00373 #endif
00374 
00375   if ( ! is_periodic_x) {
00376     xmax += rpadx;  // pad the edges
00377     xmin -= rpadx;
00378     if (rhx > 0) {  // restrict center to rhx lattice points
00379       double mupper = ceil(xmax / (2*rhx));
00380       double mlower = floor(xmin / (2*rhx));
00381       xmax = 2*rhx*mupper;
00382       xmin = 2*rhx*mlower;
00383     }
00384     rc.x = 0.5*(xmin + xmax);
00385     xlen = xmax - xmin;
00386     if (xlen < ZERO_TOLERANCE) {
00387       xlen = 1;  // leave scaling unchanged
00388     }
00389     else {
00390       inv_xlen = 1/xlen;
00391     }
00392   }
00393 
00394   if ( ! is_periodic_y) {
00395     ymax += rpady;  // pad the edges
00396     ymin -= rpady;
00397     if (rhy > 0) {  // restrict center to rhy lattice points
00398       double mupper = ceil(ymax / (2*rhy));
00399       double mlower = floor(ymin / (2*rhy));
00400       ymax = 2*rhy*mupper;
00401       ymin = 2*rhy*mlower;
00402     }
00403     rc.y = 0.5*(ymin + ymax);
00404     ylen = ymax - ymin;
00405     if (ylen < ZERO_TOLERANCE) {
00406       ylen = 1;  // leave scaling unchanged
00407     }
00408     else {
00409       inv_ylen = 1/ylen;
00410     }
00411   }
00412 
00413   if ( ! is_periodic_z) {
00414     zmax += rpadz;  // pad the edges
00415     zmin -= rpadz;
00416     if (rhz > 0) {  // restrict center to rhz lattice points
00417       double mupper = ceil(zmax / (2*rhz));
00418       double mlower = floor(zmin / (2*rhz));
00419       zmax = 2*rhz*mupper;
00420       zmin = 2*rhz*mlower;
00421     }
00422     rc.z = 0.5*(zmin + zmax);
00423     zlen = zmax - zmin;
00424     if (zlen < ZERO_TOLERANCE) {
00425       zlen = 1;  // leave scaling unchanged
00426     }
00427     else {
00428       inv_zlen = 1/zlen;
00429     }
00430   }
00431 
00432 #if 0
00433   printf("xmin=%g  xmax=%g\n", xmin, xmax);
00434   printf("ymin=%g  ymax=%g\n", ymin, ymax);
00435   printf("zmin=%g  zmax=%g\n", zmin, zmax);
00436   printf("xlen=%g  ylen=%g  zlen=%g\n", xlen, ylen, zlen);
00437   printf("rc_x=%g  rc_y=%g  rc_z=%g\n", rc.x, rc.y, rc.z);
00438 #endif
00439 
00440   // transform new center back to real space, then rescale basis vectors
00441   c.x = (u.x*rc.x + v.x*rc.y + w.x*rc.z) + c.x;
00442   c.y = (u.y*rc.x + v.y*rc.y + w.y*rc.z) + c.y;
00443   c.z = (u.z*rc.x + v.z*rc.y + w.z*rc.z) + c.z;
00444 
00445 #if 0
00446   printf("c_x=%g  c_y=%g  c_z=%g\n", c.x, c.y, c.z);
00447 #endif
00448 
00449   u *= xlen;
00450   v *= ylen;
00451   w *= zlen;
00452 
00453   ru *= inv_xlen;
00454   rv *= inv_ylen;
00455   rw *= inv_zlen;
00456 }
00457 
00458 
00459 struct MsmMsaInterpParams {
00460   int nu;
00461   int stencil;
00462   int omega;
00463 };
00464 
00465 // ordering must be identical to APPROX enum constants
00466 static const MsmMsaInterpParams InterpParams[] = {
00467   { 1, 4, 6 },    // cubic
00468   { 2, 6, 10 },   // quintic
00469   { 2, 6, 10 },   // quintic, C2
00470   { 3, 8, 14 },   // septic
00471   { 3, 8, 14 },   // septic, C3
00472   { 4, 10, 18 },  // nonic
00473   { 4, 10, 18 },  // nonic, C4
00474   { 1, 4, 6 },    // B-spline
00475 };
00476 
00477 
00478 static int setup_hgrid_1d(
00479     double len,           // cell length
00480     double gridspacing,   // desired grid spacing
00481     double &hh,           // determine h to fit cell length
00482     int &nn,              // determine number of h's covering cell
00483     int &aindex,          // determine smallest grid index
00484     int &bindex,          // determine largest grid index
00485     int isperiodic,       // is this dimension periodic?
00486     int approx            // which approximation
00487     ) {
00488 
00489   const int nu = InterpParams[approx].nu;  // interp stencil radius
00490 
00491   // ASSERT(hmax > 0);
00492   if (isperiodic) {
00493     const double hmin = (4./5) * gridspacing;  // minimum bound on h
00494     const double hmax = 1.5 * hmin;
00495     double h = len;
00496     int n = 1;    // start with one grid point across domain
00497     while (h >= hmax) {
00498       h *= 0.5;   // halve h
00499       n <<= 1;    // double grid points
00500     }
00501     if (h < hmin) {
00502       if (n < 4) {  // either len is too small or hmin is too large
00503         return -1;
00504       }
00505       h *= (4./3);  // scale h by 4/3
00506       n >>= 2;      // scale n by 3/4
00507       n *= 3;
00508     }
00509     // now we have:  hmin <= h < hmax
00510     // now we have:  n is power of two times no more than one power of 3
00511     hh = h;
00512     nn = n;
00513     aindex = 0;
00514     bindex = n-1;
00515   }
00516   else {  // non-periodic
00517     double h = gridspacing;
00518     int n = (int) floorf(len / h) + 1;
00519     hh = h;
00520     nn = n;
00521     aindex = -nu;
00522     bindex = n + nu;
00523   }
00524   return 0;
00525 }
00526 
00527 
00528 void ComputeMsmMsaMgr::initialize(CkQdMsg *msg)
00529 {
00530   delete msg;
00531   //printf("MSM initialize PE=%d\n", CkMyPe());
00532   if (CkMyPe() != 0) return;  // initialize only on PE 0, broadcast MsmMsaData
00533 
00534   // initialize MSM here
00535   SimParameters *simParams = Node::Object()->simParameters;
00536   const double a = simParams->cutoff;
00537   const double gridspacing = simParams->MSMGridSpacing;
00538   const double padding = simParams->MSMPadding;
00539   const int approx = simParams->MSMApprox;
00540   const int split = simParams->MSMSplit;
00541   int nlevels = simParams->MSMLevels;
00542   const Lattice &lattice = simParams->lattice;
00543   int msmflags = 0;
00544   msmflags |= (lattice.a_p() ? PERIODIC_VEC1 : 0);
00545   msmflags |= (lattice.b_p() ? PERIODIC_VEC2 : 0);
00546   msmflags |= (lattice.c_p() ? PERIODIC_VEC3 : 0);
00547   int allperiodic = (PERIODIC_VEC1 | PERIODIC_VEC2 | PERIODIC_VEC3);
00548   Vector u(lattice.a()), v(lattice.b()), w(lattice.c());
00549   Vector c(lattice.origin());
00550   Vector ru(lattice.a_r()), rv(lattice.b_r()), rw(lattice.c_r());
00551   if ( (msmflags & allperiodic) != allperiodic ) {
00552     ScaledPosition smin, smax;
00553     Node::Object()->pdb->get_extremes(smin, smax);
00554     //printf("smin = %g %g %g  smax = %g %g %g\n",
00555     //    smin.x, smin.y, smin.z, smax.x, smax.y, smax.z);
00556     rescale_nonperiodic_cell(u, v, w, c, ru, rv, rw, smin, smax,
00557         padding, gridspacing, msmflags);
00558   }
00559 
00560   if (u.x <= 0 || u.y != 0 || u.z != 0 ||
00561       v.x != 0 || v.y <= 0 || v.z != 0 ||
00562       w.x != 0 || w.y != 0 || w.z <= 0) {
00563     NAMD_die("MSM requires cell basis be along x, y, and z coordinate axes.");
00564   }
00565 
00566   // setup grids
00567   MsmMsaData &p = msmData;  // the MSM data will be broadcast to all PEs
00568 
00569   const int nu = InterpParams[approx].nu;
00570   const int omega = InterpParams[approx].omega;
00571   const int ispx = ((msmflags & PERIODIC_VEC1) != 0);
00572   const int ispy = ((msmflags & PERIODIC_VEC2) != 0);
00573   const int ispz = ((msmflags & PERIODIC_VEC3) != 0);
00574   const int ispany = ((msmflags & PERIODIC_ALL) != 0);
00575 
00576   const double xlen = u.x;  // XXX want to extend to non-orthogonal cells
00577   const double ylen = v.y;
00578   const double zlen = w.z;
00579 
00580   double scaling;
00581   double d;  // temporary for SPOLY derivative
00582 
00583   int ia, ib, ja, jb, ka, kb, ni, nj, nk;
00584   int nx, ny, nz;  // counts the grid points that span just the domain
00585 
00586   double hx, hy, hz;
00587   double gx, gy, gz;
00588 
00589   int i, j, k, n;
00590   int index;
00591   int level, toplevel, maxlevels;
00592   int lastnelems = 1;
00593   int isclamped = 0;
00594   int done, alldone;
00595 
00596   int rc = 0;  // return code
00597 
00598   //CkPrintf("ispx=%d  ispy=%d  ispz=%d\n", ispx, ispy, ispz);
00599   p.ispx = ispx;
00600   p.ispy = ispy;
00601   p.ispz = ispz;
00602 
00603   rc = setup_hgrid_1d(xlen, gridspacing, hx, nx, ia, ib, ispx, approx);
00604   if (rc)  NAMD_die("MSM failed to setup grid along x dimension.");
00605 
00606   rc = setup_hgrid_1d(ylen, gridspacing, hy, ny, ja, jb, ispy, approx);
00607   if (rc)  NAMD_die("MSM failed to setup grid along y dimension.");
00608 
00609   rc = setup_hgrid_1d(zlen, gridspacing, hz, nz, ka, kb, ispz, approx);
00610   if (rc)  NAMD_die("MSM failed to setup grid along z dimension.");
00611 
00612   p.a = (float) a;  // cutoff distance
00613 
00614   p.hx_1 = (float) (1/hx);  // inverse of grid spacing
00615   p.hy_1 = (float) (1/hy);
00616   p.hz_1 = (float) (1/hz);
00617 
00618   // XXX set coordinate for h-grid (0,0,0) point
00619   gx = c.x - ((nx >> 1) * hx);
00620   gy = c.y - ((ny >> 1) * hy);
00621   gz = c.z - ((nz >> 1) * hz);
00622 
00623   p.origin_x = (float) gx;  // grid(0,0,0) location
00624   p.origin_y = (float) gy;
00625   p.origin_z = (float) gz;
00626 
00627   ni = ib - ia + 1;
00628   nj = jb - ja + 1;
00629   nk = kb - ka + 1;
00630 
00631   p.approx = approx;
00632   p.split = split;
00633 
00634   if (nlevels <= 0) {
00635     // automatically set number of levels
00636     n = ni;
00637     if (n < nj) n = nj;
00638     if (n < nk) n = nk;
00639     for (maxlevels = 1;  n > 0;  n >>= 1)  maxlevels++;
00640     nlevels = maxlevels;
00641     if (ispany == 0) {  // no periodicity
00642       int omega3 = omega * omega * omega;
00643       int nhalf = (int) sqrtf(ni*nj*nk);  // scale down for performance?
00644       lastnelems = (nhalf > omega3 ? nhalf : omega3);
00645       isclamped = 1;
00646     }
00647   }
00648   else {
00649     // user-defined number of levels
00650     maxlevels = nlevels;
00651   }
00652 
00653   p.maxlevels = maxlevels;
00654   p.grid_len.resize(maxlevels);
00655   p.grid_idstart.resize(maxlevels);
00656   p.scaling.resize(maxlevels);
00657 
00658 #if 0
00659   /* allocate any additional levels that may be needed */
00660   if (pm->maxlevels < maxlevels) {
00661     void *vqh, *veh, *vgc;
00662     if (issprec) {
00663       vqh = realloc(pm->qh, maxlevels * sizeof(NL_MsmMsagrid_float));
00664       if (NULL == vqh) return NL_MSM_ERROR_MALLOC;
00665       veh = realloc(pm->eh, maxlevels * sizeof(NL_MsmMsagrid_float));
00666       if (NULL == veh) return NL_MSM_ERROR_MALLOC;
00667       vgc = realloc(pm->gc, maxlevels * sizeof(NL_MsmMsagrid_float));
00668       if (NULL == vgc) return NL_MSM_ERROR_MALLOC;
00669       pm->qh_f = (NL_MsmMsagrid_float *) vqh;
00670       pm->eh_f = (NL_MsmMsagrid_float *) veh;
00671       pm->gc_f = (NL_MsmMsagrid_float *) vgc;
00672       /* initialize the newest grids appended to array */
00673       for (level = pm->maxlevels;  level < maxlevels;  level++) {
00674         GRID_INIT( &(pm->qh_f[level]) );
00675         GRID_INIT( &(pm->eh_f[level]) );
00676         GRID_INIT( &(pm->gc_f[level]) );
00677       }
00678     }
00679     else {
00680       vqh = realloc(pm->qh, maxlevels * sizeof(NL_MsmMsagrid_double));
00681       if (NULL == vqh) return NL_MSM_ERROR_MALLOC;
00682       veh = realloc(pm->eh, maxlevels * sizeof(NL_MsmMsagrid_double));
00683       if (NULL == veh) return NL_MSM_ERROR_MALLOC;
00684       vgc = realloc(pm->gc, maxlevels * sizeof(NL_MsmMsagrid_double));
00685       if (NULL == vgc) return NL_MSM_ERROR_MALLOC;
00686       pm->qh = (NL_MsmMsagrid_double *) vqh;
00687       pm->eh = (NL_MsmMsagrid_double *) veh;
00688       pm->gc = (NL_MsmMsagrid_double *) vgc;
00689       /* initialize the newest grids appended to array */
00690       for (level = pm->maxlevels;  level < maxlevels;  level++) {
00691         GRID_INIT( &(pm->qh[level]) );
00692         GRID_INIT( &(pm->eh[level]) );
00693         GRID_INIT( &(pm->gc[level]) );
00694       }
00695     }
00696     pm->maxlevels = maxlevels;
00697   }
00698 #endif
00699 
00700   level = 0;
00701   done = 0;
00702   alldone = 0;
00703   scaling = 1.0;
00704   do {
00705 #if 0
00706     if (issprec) {
00707       GRID_RESIZE( &(pm->qh_f[level]), float, ia, ni, ja, nj, ka, nk);
00708       GRID_RESIZE( &(pm->eh_f[level]), float, ia, ni, ja, nj, ka, nk);
00709     }
00710     else {
00711       GRID_RESIZE( &(pm->qh[level]), double, ia, ni, ja, nj, ka, nk);
00712       GRID_RESIZE( &(pm->eh[level]), double, ia, ni, ja, nj, ka, nk);
00713     }
00714 #endif
00715     p.grid_len[level] = Int3(ni,nj,nk);
00716     p.grid_idstart[level] = Int3(ia,ja,ka);
00717     p.scaling[level] = (float) scaling;
00718     scaling *= 0.5;
00719 
00720     if (++level == nlevels)    done |= 0x07;  // user limit on levels
00721 
00722     alldone = (done == 0x07);  // make sure all dimensions are done
00723 
00724     if (isclamped) {
00725       int nelems = ni * nj * nk;
00726       if (nelems <= lastnelems)  done |= 0x07;
00727     }
00728 
00729     if (ispx) {
00730       ni >>= 1;
00731       ib = ni-1;
00732       if (ni & 1)              done |= 0x07;  // == 3 or 1
00733       else if (ni == 2)        done |= 0x01;  // can do one more
00734     }
00735     else {
00736       ia = -((-ia+1)/2) - nu;
00737       ib = (ib+1)/2 + nu;
00738       ni = ib - ia + 1;
00739       if (ni <= omega)         done |= 0x01;  // can do more restrictions
00740     }
00741 
00742     if (ispy) {
00743       nj >>= 1;
00744       jb = nj-1;
00745       if (nj & 1)              done |= 0x07;  // == 3 or 1
00746       else if (nj == 2)        done |= 0x02;  // can do one more
00747     }
00748     else {
00749       ja = -((-ja+1)/2) - nu;
00750       jb = (jb+1)/2 + nu;
00751       nj = jb - ja + 1;
00752       if (nj <= omega)         done |= 0x02;  // can do more restrictions
00753     }
00754 
00755     if (ispz) {
00756       nk >>= 1;
00757       kb = nk-1;
00758       if (nk & 1)              done |= 0x07;  // == 3 or 1
00759       else if (nk == 2)        done |= 0x04;  // can do one more
00760     }
00761     else {
00762       ka = -((-ka+1)/2) - nu;
00763       kb = (kb+1)/2 + nu;
00764       nk = kb - ka + 1;
00765       if (nk <= omega)         done |= 0x04;  // can do more restrictions
00766     }
00767 
00768   } while ( ! alldone );
00769   nlevels = level;
00770   p.nlevels = nlevels;
00771 
00772   toplevel = (ispany ? nlevels : nlevels - 1);
00773   p.toplevel = -1;  // set only for non-periodic system
00774 
00775   // ellipsoid axes for grid cutoff weights
00776   ni = (int) ceil(2*a/hx) - 1;
00777   nj = (int) ceil(2*a/hy) - 1;
00778   nk = (int) ceil(2*a/hz) - 1;
00779 
00780   p.gc_len = Int3(2*ni+1, 2*nj+1, 2*nk+1);
00781   p.gc_idstart = Int3(-ni, -nj, -nk);
00782   p.gc.resize( (2*ni+1)*(2*nj+1)*(2*nk+1) );
00783 
00784   index = 0;
00785   for (k = -nk;  k <= nk;  k++) {
00786     for (j = -nj;  j <= nj;  j++) {
00787       for (i = -ni;  i <= ni;  i++) {
00788         double s, t, gs, gt, g;
00789         s = sqrt((i*hx)*(i*hx) + (j*hy)*(j*hy) + (k*hz)*(k*hz)) / a;
00790         t = 0.5 * s;
00791         if (t >= 1) {
00792           g = 0;
00793         }
00794         else if (s >= 1) {
00795           gs = 1/s;
00796           SPOLY(&gt, &d, t, split);
00797           g = (gs - 0.5 * gt) / a;
00798         }
00799         else {
00800           SPOLY(&gs, &d, s, split);
00801           SPOLY(&gt, &d, t, split);
00802           g = (gs - 0.5 * gt) / a;
00803         }
00804         p.gc[index] = (float) g;
00805         index++;
00806       }
00807     }
00808   }
00809 
00810   if (toplevel < nlevels) {
00811     // nonperiodic in all dimensions,
00812     // calculate top level weights, ellipsoid axes are length of grid
00813 #ifdef MSM_DEBUG
00814     CkPrintf("MSM setting top level weights\n");
00815 #endif
00816     ni = p.grid_len[toplevel].nx;
00817     nj = p.grid_len[toplevel].ny;
00818     nk = p.grid_len[toplevel].nz;
00819 
00820     p.toplevel = toplevel;
00821     p.gctop_len = Int3(2*ni+1, 2*nj+1, 2*nk+1);
00822     p.gctop_idstart = Int3(-ni, -nj, -nk);
00823     p.gctop.resize( (2*ni+1)*(2*nj+1)*(2*nk+1) );
00824 
00825     index = 0;
00826     for (k = -nk;  k <= nk;  k++) {
00827       for (j = -nj;  j <= nj;  j++) {
00828         for (i = -ni;  i <= ni;  i++) {
00829           double s, gs;
00830           s = sqrt((i*hx)*(i*hx) + (j*hy)*(j*hy) + (k*hz)*(k*hz)) / a;
00831           if (s >= 1) {
00832             gs = 1/s;
00833           }
00834           else {
00835             SPOLY(&gs, &d, s, split);
00836           }
00837           p.gctop[index] = (float) (gs / a);
00838           index++;
00839         }
00840       }
00841     } // end loops over k-j-i for coarsest level weights
00842   }
00843 
00844   // calculate self energy factor for splitting
00845   double s, gs;
00846   s = 0;
00847   SPOLY(&gs, &d, s, split);
00848   p.self_energy_const = gs / a;
00849 #if 0
00850   s = 0;
00851   for (n = 0;  n < natoms;  n++) {
00852     double q = atom[4*n + 3];
00853     s += q * q;
00854   }
00855   self_energy *= 0.5*s;
00856 #endif
00857 
00858   // count number of client chares to the MSAs
00859   p.num_points_per_chare.nx = simParams->MSMBlockSizeX;
00860   p.num_points_per_chare.ny = simParams->MSMBlockSizeY;
00861   p.num_points_per_chare.nz = simParams->MSMBlockSizeZ;
00862   if (nlevels > 1) {
00863     p.num_restriction_chares.resize(nlevels-1);
00864     p.num_prolongation_chares.resize(nlevels-1);
00865     p.dim_gridtransfer_chares.resize(nlevels-1);
00866   }
00867   p.num_gridcutoff_chares.resize(nlevels);
00868   p.dim_gridcutoff_chares.resize(nlevels);
00869   p.num_clients_qh.resize(nlevels);
00870   p.num_clients_eh.resize(nlevels);
00871 
00872   p.num_anterpolation_chares = (PatchMap::Object())->numNodesWithPatches();
00873   p.num_interpolation_chares = p.num_anterpolation_chares;
00874   p.num_total_gridcutoff_chares = 0;
00875   p.num_total_restriction_chares = 0;
00876   p.num_total_prolongation_chares = 0;
00877   for (n = 0;  n < nlevels;  n++) {
00878     int ni = p.grid_len[n].nx;
00879     int nj = p.grid_len[n].ny;
00880     int nk = p.grid_len[n].nz;
00881     int nci = ROUNDUP_QUOTIENT(ni, p.num_points_per_chare.nx);
00882     int ncj = ROUNDUP_QUOTIENT(nj, p.num_points_per_chare.ny);
00883     int nck = ROUNDUP_QUOTIENT(nk, p.num_points_per_chare.nz);
00884     int nc = nci * ncj * nck;
00885 #ifdef MSM_DEBUG
00886     CkPrintf("n=%d  nc=%d  nci=%d ncj=%d nck=%d\n", n, nc, nci, ncj, nck);
00887 #endif
00888     p.num_gridcutoff_chares[n] = nc;
00889     p.dim_gridcutoff_chares[n].nx = nci;
00890     p.dim_gridcutoff_chares[n].ny = ncj;
00891     p.dim_gridcutoff_chares[n].nz = nck;
00892     p.num_total_gridcutoff_chares += nc;
00893     if (n > 0) {
00894       p.num_restriction_chares[n-1] = nc;
00895       p.num_prolongation_chares[n-1] = nc;
00896       p.dim_gridtransfer_chares[n-1].nx = nci;
00897       p.dim_gridtransfer_chares[n-1].ny = ncj;
00898       p.dim_gridtransfer_chares[n-1].nz = nck;
00899       p.num_total_restriction_chares += nc;
00900       p.num_total_prolongation_chares += nc;
00901     }
00902     else {
00903       p.num_energy_chares = nc;
00904     }
00905   }
00906 
00907   p.num_clients_qh[0] = p.num_anterpolation_chares +
00908     p.num_gridcutoff_chares[0] + p.num_energy_chares;
00909   p.num_clients_eh[0] = p.num_interpolation_chares +
00910     p.num_gridcutoff_chares[0] + p.num_energy_chares;
00911   if (nlevels > 1) {
00912     p.num_clients_qh[0] += p.num_restriction_chares[0];
00913     p.num_clients_eh[0] += p.num_prolongation_chares[0];
00914   }
00915   for (n = 1;  n < nlevels;  n++) {
00916     p.num_clients_qh[n] = p.num_gridcutoff_chares[n] +
00917       p.num_restriction_chares[n-1];
00918     p.num_clients_eh[n] = p.num_gridcutoff_chares[n] +
00919       p.num_prolongation_chares[n-1];
00920     if (n < nlevels-1) {
00921       p.num_clients_qh[n] += p.num_restriction_chares[n];
00922       p.num_clients_eh[n] += p.num_prolongation_chares[n];
00923     }
00924   }
00925 
00926 #ifdef MSM_DEBUG
00927   CkPrintf("nlevels = %d\n", nlevels);
00928   for (n = 0;  n < nlevels;  n++) {
00929     CkPrintf("num clients qh[%d] = %d\n", n, p.num_clients_qh[n]);
00930     CkPrintf("num clients eh[%d] = %d\n", n, p.num_clients_eh[n]);
00931   }
00932   CkPrintf("num anterpolation chares = %d\n", p.num_anterpolation_chares);
00933   CkPrintf("num interpolation chares = %d\n", p.num_interpolation_chares);
00934   for (n = 0;  n < nlevels;  n++) {
00935     CkPrintf("num grid cutoff chares[%d] = %d\n",
00936         n, p.num_gridcutoff_chares[n]);
00937   }
00938   for (n = 0;  n < nlevels-1;  n++) {
00939     CkPrintf("num restriction chares[%d] = %d\n",
00940         n, p.num_restriction_chares[n]);
00941   }
00942   for (n = 0;  n < nlevels-1;  n++) {
00943     CkPrintf("num prolongation chares[%d] = %d\n",
00944         n, p.num_prolongation_chares[n]);
00945   }
00946 #endif
00947 
00948 #if 0
00949   // allocate MSAs
00950   if (qh || eh) {
00951     CkAbort("attempted to reallocate MSAs");
00952   }
00953   char *qh_memory = new char[nlevels * sizeof(MsmMsaGrid)];
00954   qh = static_cast<MsmMsaGrid *>((void *)qh_memory);
00955   char *eh_memory = new char[nlevels * sizeof(MsmMsaGrid)];
00956   eh = static_cast<MsmMsaGrid *>((void *)eh_memory);
00957 #else
00958   p.qh.resize(nlevels);
00959   p.eh.resize(nlevels);
00960 #endif
00961   for (n = 0;  n < nlevels;  n++) {
00962     ia = p.grid_idstart[n].nx;
00963     ib = p.grid_len[n].nx + ia - 1;
00964     ja = p.grid_idstart[n].ny;
00965     jb = p.grid_len[n].ny + ja - 1;
00966     ka = p.grid_idstart[n].nz;
00967     kb = p.grid_len[n].nz + ka - 1;
00968 #if 0
00969     // using placement new to call non-default constructor
00970     // on each MsmMsaGrid element
00971     new(qh_memory + n*sizeof(MsmMsaGrid))MsmMsaGrid(ia, ib, ja, jb, ka, kb,
00972         p.num_clients_qh[n]);
00973     new(eh_memory + n*sizeof(MsmMsaGrid))MsmMsaGrid(ia, ib, ja, jb, ka, kb,
00974         p.num_clients_eh[n]);
00975 #else
00976     p.qh[n] = MsmMsaGrid(ia, ib, ja, jb, ka, kb, p.num_clients_qh[n]);
00977     p.eh[n] = MsmMsaGrid(ia, ib, ja, jb, ka, kb, p.num_clients_eh[n]);
00978 #endif
00979   }
00980   msmProxy.recvMsmMsaData(msmData);  // broadcast MsmMsaData to chare group
00981 }
00982 
00983 
00984 void ComputeMsmMsaMgr::recvMsmMsaData(const MsmMsaData &m)
00985 {
00986   //printf("MSM recvMsmMsaData PE=%d\n", CkMyPe());
00987   if (CkMyPe() != 0) {
00988     msmData = m;
00989   }
00990   else {
00991     msmData.print();  // PE 0
00992   }
00993 }
00994 
00995 
00996 void ComputeMsmMsaMgr::initWorkers(CkQdMsg *msg)
00997 {
00998   delete msg;
00999   //printf("MSM initWorkers PE=%d\n", CkMyPe());
01000   if (CkMyPe() != 0) return;
01001   // PE 0 creates the compute chare arrays
01002 
01003   MsmMsaData &p = msmData;
01004   int n;
01005   msmLevelProxy = CProxy_MsmMsaLevel::ckNew();  // create empty chare array
01006   for (n = 0;  n < p.nlevels-1;  n++) {
01007     msmLevelProxy[n].insert(p.qh[n], p.eh[n], p.qh[n+1], p.eh[n+1]);
01008   }
01009   msmLevelProxy[n].insert(p.qh[n], p.eh[n]);  // top level
01010   msmLevelProxy.doneInserting();
01011 #ifdef MSM_DEBUG
01012   CkPrintf("Created %d MsmMsaLevel chares\n", p.nlevels);
01013 #endif
01014 
01015   msmEnergyProxy = CProxy_MsmMsaEnergy::ckNew(p.qh[0], p.eh[0], p.num_energy_chares);
01016 #ifdef MSM_DEBUG
01017   CkPrintf("Created %d MsmMsaEnergy chares\n", p.num_energy_chares);
01018 #endif
01019 }
01020 
01021 
01022 void ComputeMsmMsaMgr::startWorkers(CkQdMsg *msg)
01023 {
01024   delete msg;
01025   //printf("MSM startWorkers PE=%d\n", CkMyPe());
01026   if (CkMyPe() != 0) return;
01027   // PE 0 starts the workers;
01028   // they loop forever, waiting for compute work from the MSAs
01029 
01030   msmLevelProxy.compute();
01031   msmEnergyProxy.compute();
01032 }
01033 
01034 
01036 enum { MAX_POLY_DEGREE = 9 };
01037 
01040 static const int PolyDegree[APPROX_END] = {
01041   3, 5, 5, 7, 7, 9, 9, 3,
01042 };
01043 
01044 
01045 void ComputeMsmMsaMgr::anterpolate(MsmMsaCoordMsg *msg)
01046 {
01047 #ifdef MSM_DEBUG
01048   CkPrintf("Anterpolation compute %d starting, PE %d\n", thisIndex, CkMyPe());
01049 #endif
01050   coordMsg = msg;  // save message for interpolation
01051   ComputeMsmMsaAtom *coord = coordMsg->coord;
01052   int numAtoms = coordMsg->numAtoms;
01053   MsmMsaData &p = msmData;
01054   if ( ! msa_setup_anterpolation) {
01055     p.qh[0].enroll(p.num_clients_qh[0]);
01056     qhacc = p.qh[0].getInitialAccum();
01057     qhread = qhacc.syncToRead();  // immediately sync to read phase
01058     msa_setup_anterpolation = 1;
01059   }
01060   int ni = p.grid_len[0].nx;
01061   int nj = p.grid_len[0].ny;
01062   int nk = p.grid_len[0].nz;
01063   int ia = p.grid_idstart[0].nx;
01064   int ja = p.grid_idstart[0].ny;
01065   int ka = p.grid_idstart[0].nz;
01066   int ib = ia + ni - 1;
01067   int jb = ja + nj - 1;
01068   int kb = ka + nk - 1;
01069   qhacc = qhread.syncToEAccum();  // we accumulate to it
01070 #ifdef MSM_DEBUG
01071   CkPrintf("Anterpolation compute %d doing work, PE %d\n", thisIndex, CkMyPe());
01072 #endif
01073   //
01074   // here is the compute work
01075   //
01076 
01077   float xphi[MAX_POLY_DEGREE+1];  // Phi stencil along x-dimension
01078   float yphi[MAX_POLY_DEGREE+1];  // Phi stencil along y-dimension
01079   float zphi[MAX_POLY_DEGREE+1];  // Phi stencil along z-dimension
01080 
01081   int n;
01082   const int s_size = PolyDegree[p.approx] + 1;         // stencil size
01083   const int s_edge = (PolyDegree[p.approx] - 1) >> 1;  // stencil "edge" size
01084 
01085   for (n = 0;  n < numAtoms;  n++) {
01086     // atomic charge
01087     float q = coord[n].msmcharge;
01088     if (0==q) continue;
01089 
01090     // distance between atom and origin measured in grid points
01091     // (does subtraction and division in double, then converts to float)
01092     float rx_hx = (coord[n].position.x - p.origin_x) * p.hx_1;
01093     float ry_hy = (coord[n].position.y - p.origin_y) * p.hy_1;
01094     float rz_hz = (coord[n].position.z - p.origin_z) * p.hz_1;
01095 
01096     // find smallest numbered grid point in stencil
01097     int ilo = (int) floorf(rx_hx) - s_edge;
01098     int jlo = (int) floorf(ry_hy) - s_edge;
01099     int klo = (int) floorf(rz_hz) - s_edge;
01100 
01101     // calculate Phi stencils along each dimension
01102     float delta;
01103     delta = rx_hx - (float) ilo;
01104     STENCIL_1D(xphi, delta, p.approx);
01105     delta = ry_hy - (float) jlo;
01106     STENCIL_1D(yphi, delta, p.approx);
01107     delta = rz_hz - (float) klo;
01108     STENCIL_1D(zphi, delta, p.approx);
01109 
01110     // test to see if stencil is within edges of grid
01111     int iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
01112                      ja <= jlo && (jlo+(s_size-1)) <= jb &&
01113                      ka <= klo && (klo+(s_size-1)) <= kb );
01114 
01115     if ( iswithin ) {  // no wrapping needed
01116       // determine charge on cube of grid points around atom
01117       int i, j, k;
01118       for (k = 0;  k < s_size;  k++) {
01119         float ck = zphi[k] * q;
01120         for (j = 0;  j < s_size;  j++) {
01121           float cjk = yphi[j] * ck;
01122           for (i = 0;  i < s_size;  i++) {
01123             qhacc.accumulate(i+ilo,j+jlo,k+klo) += xphi[i] * cjk;
01124           }
01125         }
01126       }
01127     } // if is within
01128     else {  // requires wrapping around grid
01129       int ip, jp, kp;
01130 
01131       // adjust ilo, jlo, klo so they are within grid indexing
01132       if (p.ispx) {
01133         if      (ilo < ia) do { ilo += ni; } while (ilo < ia);
01134         else if (ilo > ib) do { ilo -= ni; } while (ilo > ib);
01135       }
01136       else if (ilo < ia || (ilo+(s_size-1)) > ib) {
01137         CkPrintf("Error anterpolation:  ilo=%d  out-of-range [%d,%d]\n",
01138             ilo, ia, (ib-s_size+1));
01139         continue /* XXX NL_MSM_ERROR_RANGE */;
01140       }
01141 
01142       if (p.ispy) {
01143         if      (jlo < ja) do { jlo += nj; } while (jlo < ja);
01144         else if (jlo > jb) do { jlo -= nj; } while (jlo > jb);
01145       }
01146       else if (jlo < ja || (jlo+(s_size-1)) > jb) {
01147         CkPrintf("Error anterpolation:  jlo=%d  out-of-range [%d,%d]\n",
01148             jlo, ja, (jb-s_size+1));
01149         continue /* XXX NL_MSM_ERROR_RANGE */;
01150       }
01151 
01152       if (p.ispz) {
01153         if      (klo < ka) do { klo += nk; } while (klo < ka);
01154         else if (klo > kb) do { klo -= nk; } while (klo > kb);
01155       }
01156       else if (klo < ka || (klo+(s_size-1)) > kb) {
01157         CkPrintf("Error anterpolation:  klo=%d  out-of-range [%d,%d]\n",
01158             klo, ka, (kb-s_size+1));
01159         continue /* XXX NL_MSM_ERROR_RANGE */;
01160       }
01161 
01162       // determine charge on cube of grid points around atom, with wrapping
01163       int i, j, k;
01164       for (k = 0, kp = klo;  k < s_size;  k++, kp++) {
01165         if (kp > kb) kp = ka;  /* wrap stencil around grid */
01166         float ck = zphi[k] * q;
01167         for (j = 0, jp = jlo;  j < s_size;  j++, jp++) {
01168           if (jp > jb) jp = ja;  /* wrap stencil around grid */
01169           float cjk = yphi[j] * ck;
01170           for (i = 0, ip = ilo;  i < s_size;  i++, ip++) {
01171             if (ip > ib) ip = ia;  /* wrap stencil around grid */
01172             qhacc.accumulate(ip,jp,kp) += xphi[i] * cjk;
01173           }
01174         }
01175       }
01176     } // else
01177   } // end loop over atoms
01178 
01179   //
01180   // end of compute work
01181   //
01182   qhread = qhacc.syncToRead();  // let consumers read
01183 #ifdef MSM_DEBUG
01184   CkPrintf("Anterpolation compute %d exiting, PE %d\n", thisIndex, CkMyPe());
01185 #endif
01186 }
01187 
01188 
01189 void ComputeMsmMsaMgr::interpolate(CkQdMsg *msg)
01190 {
01191   delete msg;
01192 #ifdef MSM_DEBUG
01193   CkPrintf("Interpolation compute %d starting, PE %d\n", thisIndex, CkMyPe());
01194 #endif
01195   ComputeMsmMsaAtom *coord = coordMsg->coord;
01196   int numAtoms = coordMsg->numAtoms;
01197   if (numForces < numAtoms) {
01198     delete [] force;
01199     force = new MsmMsaForce[numAtoms];
01200     numForces = numAtoms;
01201   }
01202   MsmMsaData &p = msmData;
01203   if ( ! msa_setup_interpolation) {
01204     p.eh[0].enroll(p.num_clients_eh[0]);
01205     ehacc = p.eh[0].getInitialAccum();
01206     ehread = ehacc.syncToRead();  // immediately sync to read phase
01207     msa_setup_interpolation = 1;
01208   }
01209   int ni = p.grid_len[0].nx;
01210   int nj = p.grid_len[0].ny;
01211   int nk = p.grid_len[0].nz;
01212   int ia = p.grid_idstart[0].nx;
01213   int ja = p.grid_idstart[0].ny;
01214   int ka = p.grid_idstart[0].nz;
01215   int ib = ia + ni - 1;
01216   int jb = ja + nj - 1;
01217   int kb = ka + nk - 1;
01218   ehacc = ehread.syncToEAccum();  // let producers accumulate
01219   ehread = ehacc.syncToRead();    // then we read it
01220 #ifdef MSM_DEBUG
01221   CkPrintf("Interpolation compute %d doing work, PE %d\n", thisIndex, CkMyPe());
01222 #endif
01223 #ifdef MSM_DEBUG
01224   if (thisIndex==0) {
01225 #if 1
01226     CkPrintf("+++ eh[0,0,0] = %g\n", ehread.get(0,0,0));
01227 #else
01228     int i, j, k;
01229     for (k = ka;  k <= kb;  k++) {
01230       for (j = ja;  j <= jb;  j++) {
01231         for (i = ia;  i <= ib;  i++) {
01232           CkPrintf("+++ eh[%d,%d,%d] = %g\n", i, j, k, ehread.get(i,j,k));
01233         }
01234       }
01235     }
01236 #endif
01237   }
01238 #endif
01239   //
01240   // here is the compute work
01241   //
01242   double qself = 0;  // accumulate charge for self-energy
01243 
01244   float xphi[MAX_POLY_DEGREE+1];  // Phi stencil along x-dimension
01245   float yphi[MAX_POLY_DEGREE+1];  // Phi stencil along y-dimension
01246   float zphi[MAX_POLY_DEGREE+1];  // Phi stencil along z-dimension
01247   float dxphi[MAX_POLY_DEGREE+1]; // derivative of Phi along x-dimension
01248   float dyphi[MAX_POLY_DEGREE+1]; // derivative of Phi along y-dimension
01249   float dzphi[MAX_POLY_DEGREE+1]; // derivative of Phi along z-dimension
01250   float fx, fy, fz;  // accumulate each force
01251 
01252   int n;
01253   const int s_size = PolyDegree[p.approx] + 1;         // stencil size
01254   const int s_edge = (PolyDegree[p.approx] - 1) >> 1;  // stencil "edge" size
01255 
01256   for (n = 0;  n < numAtoms;  n++) {
01257     // atomic charge
01258     float q = coord[n].msmcharge;
01259     if (0==q) {
01260       force[n].x = 0;
01261       force[n].y = 0;
01262       force[n].z = 0;
01263       continue;
01264     }
01265     qself += q*q;
01266 
01267     // distance between atom and origin measured in grid points
01268     // (does subtraction and division in double, then converts to float)
01269     float rx_hx = (coord[n].position.x - p.origin_x) * p.hx_1;
01270     float ry_hy = (coord[n].position.y - p.origin_y) * p.hy_1;
01271     float rz_hz = (coord[n].position.z - p.origin_z) * p.hz_1;
01272 
01273     // find smallest numbered grid point in stencil
01274     int ilo = (int) floorf(rx_hx) - s_edge;
01275     int jlo = (int) floorf(ry_hy) - s_edge;
01276     int klo = (int) floorf(rz_hz) - s_edge;
01277 
01278     // calculate Phi stencils along each dimension
01279     float delta;
01280     delta = rx_hx - (float) ilo;
01281     D_STENCIL_1D(dxphi, xphi, p.hx_1, delta, p.approx);
01282     delta = ry_hy - (float) jlo;
01283     D_STENCIL_1D(dyphi, yphi, p.hy_1, delta, p.approx);
01284     delta = rz_hz - (float) klo;
01285     D_STENCIL_1D(dzphi, zphi, p.hz_1, delta, p.approx);
01286 
01287     // test to see if stencil is within edges of grid
01288     int iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
01289                      ja <= jlo && (jlo+(s_size-1)) <= jb &&
01290                      ka <= klo && (klo+(s_size-1)) <= kb );
01291 
01292     if ( iswithin ) {  // no wrapping needed
01293       // determine charge on cube of grid points around atom
01294       int i, j, k;
01295       fx = fy = fz = 0;
01296       for (k = 0;  k < s_size;  k++) {
01297         for (j = 0;  j < s_size;  j++) {
01298           float cx = yphi[j] * zphi[k];
01299           float cy = dyphi[j] * zphi[k];
01300           float cz = yphi[j] * dzphi[k];
01301           for (i = 0;  i < s_size;  i++) {
01302             float e = ehread.get(i+ilo,j+jlo,k+klo);
01303             fx += e * dxphi[i] * cx;
01304             fy += e * xphi[i] * cy;
01305             fz += e * xphi[i] * cz;
01306           }
01307         }
01308       }
01309     } // if is within
01310     else {  // requires wrapping around grid
01311       int ip, jp, kp;
01312 
01313       // adjust ilo, jlo, klo so they are within grid indexing
01314       if (p.ispx) {
01315         if      (ilo < ia) do { ilo += ni; } while (ilo < ia);
01316         else if (ilo > ib) do { ilo -= ni; } while (ilo > ib);
01317       }
01318       else if (ilo < ia || (ilo+(s_size-1)) > ib) {
01319         CkPrintf("Error interpolation:  ilo=%d  out-of-range [%d,%d]\n",
01320             ilo, ia, (ib-s_size+1));
01321         continue /* XXX NL_MSM_ERROR_RANGE */;
01322       }
01323 
01324       if (p.ispy) {
01325         if      (jlo < ja) do { jlo += nj; } while (jlo < ja);
01326         else if (jlo > jb) do { jlo -= nj; } while (jlo > jb);
01327       }
01328       else if (jlo < ja || (jlo+(s_size-1)) > jb) {
01329         CkPrintf("Error interpolation:  jlo=%d  out-of-range [%d,%d]\n",
01330             jlo, ja, (jb-s_size+1));
01331         continue /* XXX NL_MSM_ERROR_RANGE */;
01332       }
01333 
01334       if (p.ispz) {
01335         if      (klo < ka) do { klo += nk; } while (klo < ka);
01336         else if (klo > kb) do { klo -= nk; } while (klo > kb);
01337       }
01338       else if (klo < ka || (klo+(s_size-1)) > kb) {
01339         CkPrintf("Error interpolation:  klo=%d  out-of-range [%d,%d]\n",
01340             klo, ka, (kb-s_size+1));
01341         continue /* XXX NL_MSM_ERROR_RANGE */;
01342       }
01343 
01344       // determine charge on cube of grid points around atom, with wrapping
01345       int i, j, k;
01346       fx = fy = fz = 0;
01347       for (k = 0, kp = klo;  k < s_size;  k++, kp++) {
01348         if (kp > kb) kp = ka;  /* wrap stencil around grid */
01349         for (j = 0, jp = jlo;  j < s_size;  j++, jp++) {
01350           if (jp > jb) jp = ja;  /* wrap stencil around grid */
01351           float cx = yphi[j] * zphi[k];
01352           float cy = dyphi[j] * zphi[k];
01353           float cz = yphi[j] * dzphi[k];
01354           for (i = 0, ip = ilo;  i < s_size;  i++, ip++) {
01355             if (ip > ib) ip = ia;  /* wrap stencil around grid */
01356             float e = ehread.get(ip,jp,kp);
01357             fx += e * dxphi[i] * cx;
01358             fy += e * xphi[i] * cy;
01359             fz += e * xphi[i] * cz;
01360           }
01361         }
01362       }
01363     } // else
01364 
01365     // update force
01366     force[n].x = -q * fx;
01367     force[n].y = -q * fy;
01368     force[n].z = -q * fz;
01369 
01370   } // end loop over atoms
01371   double self_energy = 0.5*p.self_energy_const*qself;
01372 
01373   //
01374   // end of compute work
01375   //
01376 #ifdef MSM_DEBUG
01377   CkPrintf("Interpolation compute %d read sync done, PE %d\n",
01378       thisIndex, CkMyPe());
01379 #endif
01380   delete coordMsg;  // get rid of earlier coordMsg
01381   msmCompute->saveResults(numAtoms, force, self_energy);
01382 #if 0
01383   int startAtomID = thisIndex * MsmMsa::NUM_ATOMS_PER_CHARE;
01384   msmProxy.doneForces(startAtomID, nforces, forcebuffer);
01385 #endif
01386 #ifdef MSM_DEBUG
01387   CkPrintf("Interpolation compute %d exiting, PE %d\n", thisIndex, CkMyPe());
01388 #endif
01389 }
01390 
01391 
01392 MsmMsaLevel::MsmMsaLevel(MsmMsaGrid &qh, MsmMsaGrid &eh, MsmMsaGrid &q2h, MsmMsaGrid &e2h)
01393 {
01394   const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
01395       CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
01396   int level = thisIndex;
01397   lastlevel = -1;  // this is an intermediate level
01398   const Int3 &dg = p.dim_gridcutoff_chares[level];
01399   gridcutoff = CProxy_MsmMsaGridCutoff::ckNew(level, qh, eh, dg.nx, dg.ny, dg.nz);
01400   const Int3 &dt = p.dim_gridtransfer_chares[level];
01401   restriction = CProxy_MsmMsaRestriction::ckNew(level, qh, q2h, dt.nx, dt.ny, dt.nz);
01402   prolongation =CProxy_MsmMsaProlongation::ckNew(level, eh, e2h, dt.nx, dt.ny, dt.nz);
01403 #ifdef MSM_DEBUG
01404   CkPrintf("Created %d grid cutoff chares\n", p.num_gridcutoff_chares[level]);
01405 #endif
01406 }
01407 
01408 
01409 MsmMsaLevel::MsmMsaLevel(MsmMsaGrid &qh, MsmMsaGrid &eh)
01410 {
01411   const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
01412       CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
01413   int level = thisIndex;
01414   lastlevel = level;
01415   const Int3 &dg = p.dim_gridcutoff_chares[level];
01416   gridcutoff = CProxy_MsmMsaGridCutoff::ckNew(level, qh, eh, dg.nx, dg.ny, dg.nz);
01417 #ifdef MSM_DEBUG
01418   CkPrintf("Created %d grid cutoff chares\n", p.num_gridcutoff_chares[level]);
01419 #endif
01420 }
01421 
01422 
01423 void MsmMsaLevel::compute()
01424 {
01425   if (thisIndex != lastlevel) {
01426     restriction.compute();
01427     prolongation.compute();
01428   }
01429   gridcutoff.compute();
01430 }
01431 
01432 
01433 MsmMsaGridCutoff::MsmMsaGridCutoff(int level, MsmMsaGrid &qh_, MsmMsaGrid &eh_)
01434   : qh(qh_), eh(eh_)
01435 {
01436   mylevel = level;
01437   const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
01438       CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
01439   // find the points on my part of my level's qh and eh grids
01440   const Int3 &len = p.grid_len[mylevel];
01441   const Int3 &idstart = p.grid_idstart[mylevel];
01442   mia = idstart.nx + thisIndex.x * p.num_points_per_chare.nx;
01443   mja = idstart.ny + thisIndex.y * p.num_points_per_chare.ny;
01444   mka = idstart.nz + thisIndex.z * p.num_points_per_chare.nz;
01445   mib = mia + p.num_points_per_chare.nx - 1;
01446   mjb = mja + p.num_points_per_chare.ny - 1;
01447   mkb = mka + p.num_points_per_chare.nz - 1;
01448   if (mib > idstart.nx + len.nx - 1) {
01449     mib = idstart.nx + len.nx - 1;
01450   }
01451   if (mjb > idstart.ny + len.ny - 1) {
01452     mjb = idstart.ny + len.ny - 1;
01453   }
01454   if (mkb > idstart.nz + len.nz - 1) {
01455     mkb = idstart.nz + len.nz - 1;
01456   }
01457   msa_setup = 0;
01458 }
01459 
01460 
01461 void MsmMsaGridCutoff::compute()
01462 {
01463 #ifdef MSM_DEBUG
01464   CkPrintf("Grid cutoff compute (%d,%d,%d) PE %d\n",
01465       thisIndex.x, thisIndex.y, thisIndex.z, CkMyPe());
01466 #endif
01467   const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
01468       CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
01469   if ( ! msa_setup) {
01470     qh.enroll(p.num_clients_qh[mylevel]);
01471     eh.enroll(p.num_clients_eh[mylevel]);
01472     qhacc = qh.getInitialAccum();
01473     qhread = qhacc.syncToRead();  // immediately sync to read phase
01474     ehacc = eh.getInitialAccum();
01475     ehread = ehacc.syncToRead();  // immediately sync to read phase
01476     msa_setup = 1;
01477   }
01478 for (;;) {  // loop forever
01479   qhacc = qhread.syncToEAccum();  // let producers accumulate
01480   qhread = qhacc.syncToRead();    // then we read charge
01481   ehacc = ehread.syncToEAccum();  // and we accumulate potential
01482   //
01483   // here is the compute work
01484   //
01485 
01486   int ni = p.grid_len[mylevel].nx;
01487   int nj = p.grid_len[mylevel].ny;
01488   int nk = p.grid_len[mylevel].nz;
01489   int ia = p.grid_idstart[mylevel].nx;
01490   int ja = p.grid_idstart[mylevel].ny;
01491   int ka = p.grid_idstart[mylevel].nz;
01492   int ib = ia + ni - 1;
01493   int jb = ja + nj - 1;
01494   int kb = ka + nk - 1;
01495   int ispnone = ! ( p.ispx || p.ispy || p.ispz );
01496   int gia, gib, gja, gjb, gka, gkb, gni, gnj;
01497   const float *gc = 0;
01498   if (mylevel != p.toplevel) {
01499     gc = &(p.gc[0]);
01500     gia = p.gc_idstart.nx;
01501     gib = gia + p.gc_len.nx - 1;
01502     gja = p.gc_idstart.ny;
01503     gjb = gja + p.gc_len.ny - 1;
01504     gka = p.gc_idstart.nz;
01505     gkb = gka + p.gc_len.nz - 1;
01506     gni = p.gc_len.nx;
01507     gnj = p.gc_len.ny;
01508   }
01509   else {
01510     gc = &(p.gctop[0]);
01511     gia = p.gctop_idstart.nx;
01512     gib = gia + p.gctop_len.nx - 1;
01513     gja = p.gctop_idstart.ny;
01514     gjb = gja + p.gctop_len.ny - 1;
01515     gka = p.gctop_idstart.nz;
01516     gkb = gka + p.gctop_len.nz - 1;
01517     gni = p.gctop_len.nx;
01518     gnj = p.gctop_len.ny;
01519   }
01520   int i, j, k;
01521   int gia_clip, gib_clip;
01522   int gja_clip, gjb_clip;
01523   int gka_clip, gkb_clip;
01524   int id, jd, kd;
01525   int kgoff, jkgoff, ngindex;
01526 
01527   float scaling = p.scaling[mylevel];
01528   float eh_sum = 0;
01529 
01530 #ifdef MSM_DEBUG
01531   if (mylevel==0 && thisIndex.x==0 && thisIndex.y==0 && thisIndex.z==0) {
01532     int index = 0;
01533     CkPrintf("+++ qh[0,0,0] = %g\n", qhread.get(0,0,0));
01534     CkPrintf("+++ p.toplevel = %d\n", p.toplevel);
01535     CkPrintf("+++ scaling = %g\n", scaling);
01536 #if 0
01537     for (k = gka;  k <= gkb;  k++) {
01538       for (j = gja;  j <= gjb;  j++) {
01539         for (i = gia;  i <= gib;  i++, index++) {
01540           printf("+++ gc[%d,%d,%d] = %g\n", i, j, k, gc[index]);
01541         }
01542       }
01543     }
01544 #endif
01545   }
01546 #endif
01547 
01548   if ( ispnone ) {  // non-periodic boundaries
01549 
01550     // loop over my sub-grid points
01551     for (k = mka;  k <= mkb;  k++) {
01552 
01553       // clip gc ranges to keep offset for k index within grid
01554       gka_clip = (k + gka < ka ? ka - k : gka);
01555       gkb_clip = (k + gkb > kb ? kb - k : gkb);
01556 
01557       for (j = mja;  j <= mjb;  j++) {
01558 
01559         // clip gc ranges to keep offset for j index within grid
01560         gja_clip = (j + gja < ja ? ja - j : gja);
01561         gjb_clip = (j + gjb > jb ? jb - j : gjb);
01562 
01563         for (i = mia;  i <= mib;  i++) {
01564 
01565           // clip gc ranges to keep offset for i index within grid
01566           gia_clip = (i + gia < ia ? ia - i : gia);
01567           gib_clip = (i + gib > ib ? ib - i : gib);
01568 
01569           // sum over "sphere" of weighted charge
01570           eh_sum = 0;
01571           for (kd = gka_clip;  kd <= gkb_clip;  kd++) {
01572             kgoff = (kd-gka) * gnj;       // find gc flat index
01573 
01574             for (jd = gja_clip;  jd <= gjb_clip;  jd++) {
01575               jkgoff = (kgoff + (jd-gja)) * gni;       // find gc flat index
01576 
01577               for (id = gia_clip;  id <= gib_clip;  id++) {
01578                 ngindex = jkgoff + (id-gia);       // gc flat index
01579                 // sum weighted charge
01580                 eh_sum += qhread.get(i+id,j+jd,k+kd) * gc[ngindex];
01581               }
01582             }
01583           } // end loop over "sphere" of charge
01584 
01585           // add contribution into MSA
01586           ehacc.accumulate(i,j,k) += scaling * eh_sum;
01587         }
01588       }
01589     } // end loop over my sub-grid points
01590 
01591   } // if non-periodic boundaries
01592   else {
01593     // some boundary is periodic
01594     int ilo, jlo, klo;
01595     int ip, jp, kp;
01596 
01597     // loop over my sub-grid points
01598     for (k = mka;  k <= mkb;  k++) {
01599       klo = k + gka;
01600       if ( ! p.ispz ) {  // non-periodic z
01601         // clip gc ranges to keep offset for k index within grid
01602         gka_clip = (k + gka < ka ? ka - k : gka);
01603         gkb_clip = (k + gkb > kb ? kb - k : gkb);
01604         if (klo < ka) klo = ka;  // keep lowest qh index within grid
01605       }
01606       else {  // periodic z
01607         gka_clip = gka;
01608         gkb_clip = gkb;
01609         if (klo < ka) do { klo += nk; } while (klo < ka);
01610       }
01611       // ASSERT(klo <= kb);
01612 
01613       for (j = mja;  j <= mjb;  j++) {
01614         jlo = j + gja;
01615         if ( ! p.ispy ) {  // non-periodic y
01616           // clip gc ranges to keep offset for j index within grid
01617           gja_clip = (j + gja < ja ? ja - j : gja);
01618           gjb_clip = (j + gjb > jb ? jb - j : gjb);
01619           if (jlo < ja) jlo = ja;  // keep lowest qh index within grid
01620         }
01621         else {  // periodic y
01622           gja_clip = gja;
01623           gjb_clip = gjb;
01624           if (jlo < ja) do { jlo += nj; } while (jlo < ja);
01625         }
01626         // ASSERT(jlo <= jb);
01627 
01628         for (i = mia;  i <= mib;  i++) {
01629           ilo = i + gia;
01630           if ( ! p.ispx ) {  // nonperiodic x
01631             // clip gc ranges to keep offset for i index within grid
01632             gia_clip = (i + gia < ia ? ia - i : gia);
01633             gib_clip = (i + gib > ib ? ib - i : gib);
01634             if (ilo < ia) ilo = ia;  // keep lowest qh index within grid
01635           }
01636           else {  // periodic x
01637             gia_clip = gia;
01638             gib_clip = gib;
01639             if (ilo < ia) do { ilo += ni; } while (ilo < ia);
01640           }
01641           // ASSERT(ilo <= ib);
01642 
01643           // sum over "sphere" of weighted charge
01644           eh_sum = 0;
01645           for (kd = gka_clip, kp = klo;  kd <= gkb_clip;  kd++, kp++) {
01646             // clipping makes conditional always fail for nonperiodic
01647             if (kp > kb) kp = ka;  // wrap z direction
01648             kgoff = (kd-gka) * gnj;      // find gc flat index
01649 
01650             for (jd = gja_clip, jp = jlo;  jd <= gjb_clip;  jd++, jp++) {
01651               // clipping makes conditional always fail for nonperiodic
01652               if (jp > jb) jp = ja;              // wrap y direction
01653               jkgoff = (kgoff + (jd-gja)) * gni;       // find gc flat index */
01654 
01655               for (id = gia_clip, ip = ilo;  id <= gib_clip;  id++, ip++) {
01656                 // clipping makes conditional always fail for nonperiodic
01657                 if (ip > ib) ip = ia;   // wrap x direction
01658                 ngindex = jkgoff + (id-gia);  // gc flat index
01659                 // sum weighted charge
01660                 eh_sum += qhread.get(ip,jp,kp) * gc[ngindex];
01661               }
01662             }
01663           } // end loop over "sphere" of charge
01664 
01665           // add contribution into MSA
01666           ehacc.accumulate(i,j,k) += scaling * eh_sum;
01667         }
01668       }
01669     } // end loop over my sub-grid points
01670 
01671   } // else some boundary is periodic
01672 
01673   //
01674   // end of compute work
01675   //
01676   ehread = ehacc.syncToRead();  // let consumers read potential
01677 #ifdef MSM_DEBUG
01678   CkPrintf("Grid cutoff compute %d exiting, PE %d\n", thisIndex, CkMyPe());
01679 #endif
01680 } // loop forever
01681 }
01682 
01683 
01686 enum { MAX_NSTENCIL = 11 };
01687 
01689 static const int Nstencil[APPROX_END] = {
01690   5, 7, 7, 9, 9, 11, 11, 7
01691 };
01692 
01695 static const int IndexOffset[APPROX_END][MAX_NSTENCIL] = {
01696   /* cubic */
01697   {-3, -1, 0, 1, 3},
01698 
01699   /* quintic C1 */
01700   {-5, -3, -1, 0, 1, 3, 5},
01701 
01702   /* quintic C2  (same as quintic C1) */
01703   {-5, -3, -1, 0, 1, 3, 5},
01704 
01705   /* septic C1 */
01706   {-7, -5, -3, -1, 0, 1, 3, 5, 7},
01707 
01708   /* septic C3  (same as septic C3) */
01709   {-7, -5, -3, -1, 0, 1, 3, 5, 7},
01710 
01711   /* nonic C1 */
01712   {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
01713 
01714   /* nonic C4  (same as nonic C1) */
01715   {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
01716 
01717   /* bspline */
01718   {-3, -2, -1, 0, 1, 2, 3},
01719 };
01720 
01723 static const float PhiStencil[APPROX_END][MAX_NSTENCIL] = {
01724   /* cubic */
01725   {-1.f/16, 9.f/16, 1, 9.f/16, -1.f/16},
01726 
01727   /* quintic C1 */
01728   {3.f/256, -25.f/256, 75.f/128, 1, 75.f/128, -25.f/256, 3.f/256},
01729 
01730   /* quintic C2  (same as quintic C1) */
01731   {3.f/256, -25.f/256, 75.f/128, 1, 75.f/128, -25.f/256, 3.f/256},
01732 
01733   /* septic C1 */
01734   { -5.f/2048, 49.f/2048, -245.f/2048, 1225.f/2048, 1, 1225.f/2048,
01735     -245.f/2048, 49.f/2048, -5.f/2048 },
01736 
01737   /* septic C3  (same as septic C3) */
01738   { -5.f/2048, 49.f/2048, -245.f/2048, 1225.f/2048, 1, 1225.f/2048,
01739     -245.f/2048, 49.f/2048, -5.f/2048 },
01740 
01741   /* nonic C1 */
01742   { 35.f/65536, -405.f/65536, 567.f/16384, -2205.f/16384, 
01743     19845.f/32768, 1, 19845.f/32768, -2205.f/16384, 567.f/16384, 
01744     -405.f/65536, 35.f/65536 },
01745 
01746   /* nonic C4  (same as nonic C1) */
01747   { 35.f/65536, -405.f/65536, 567.f/16384, -2205.f/16384, 
01748     19845.f/32768, 1, 19845.f/32768, -2205.f/16384, 567.f/16384, 
01749     -405.f/65536, 35.f/65536 },
01750 
01751   /* bspline */
01752   { 1.f/48, 1.f/6, 23.f/48, 2.f/3, 23.f/48, 1.f/6, 1.f/48 },
01753 };
01754 
01755 
01756 MsmMsaRestriction::MsmMsaRestriction(int level, MsmMsaGrid &qh_, MsmMsaGrid &q2h_)
01757   : qh(qh_), q2h(q2h_)
01758 {
01759   mylevel = level;
01760   const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
01761       CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
01762   // find the points on my sub-grid of (mylevel+1) grid
01763   const Int3 &len = p.grid_len[mylevel+1];
01764   const Int3 &idstart = p.grid_idstart[mylevel+1];
01765   mia = idstart.nx + thisIndex.x * p.num_points_per_chare.nx;
01766   mja = idstart.ny + thisIndex.y * p.num_points_per_chare.ny;
01767   mka = idstart.nz + thisIndex.z * p.num_points_per_chare.nz;
01768   mib = mia + p.num_points_per_chare.nx - 1;
01769   mjb = mja + p.num_points_per_chare.ny - 1;
01770   mkb = mka + p.num_points_per_chare.nz - 1;
01771   if (mib > idstart.nx + len.nx - 1) {
01772     mib = idstart.nx + len.nx - 1;
01773   }
01774   if (mjb > idstart.ny + len.ny - 1) {
01775     mjb = idstart.ny + len.ny - 1;
01776   }
01777   if (mkb > idstart.nz + len.nz - 1) {
01778     mkb = idstart.nz + len.nz - 1;
01779   }
01780   msa_setup = 0;
01781 }
01782 
01783 
01784 void MsmMsaRestriction::compute()
01785 {
01786 #ifdef MSM_DEBUG
01787   CkPrintf("Restriction compute %d, PE %d\n", thisIndex, CkMyPe());
01788 #endif
01789   const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
01790       CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
01791   if ( ! msa_setup) {
01792     qh.enroll(p.num_clients_qh[mylevel]);
01793     q2h.enroll(p.num_clients_qh[mylevel+1]);
01794     qhacc = qh.getInitialAccum();
01795     qhread = qhacc.syncToRead();  // immediately sync to read phase
01796     q2hacc = q2h.getInitialAccum();
01797     q2hread = q2hacc.syncToRead();  // immediately sync to read phase
01798     msa_setup = 1;
01799   }
01800 for (;;) {  // loop forever
01801   qhacc = qhread.syncToEAccum();  // let producers accumulate h-level charge
01802   qhread = qhacc.syncToRead();  // then we read h-level charge
01803   q2hacc = q2hread.syncToEAccum();  // and we accumulate 2h-level charge
01804 #ifdef MSM_DEBUG
01805   CkPrintf("Restriction compute %d doing work, PE %d\n", thisIndex, CkMyPe());
01806 #endif
01807   //
01808   // here is the compute work
01809   //
01810 
01811   const int nstencil = Nstencil[p.approx];
01812   const int *offset = IndexOffset[p.approx];
01813   const float *phi = PhiStencil[p.approx];
01814 
01815   int ni1 = p.grid_len[mylevel].nx;
01816   int nj1 = p.grid_len[mylevel].ny;
01817   int nk1 = p.grid_len[mylevel].nz;
01818   int ia1 = p.grid_idstart[mylevel].nx;
01819   int ja1 = p.grid_idstart[mylevel].ny;
01820   int ka1 = p.grid_idstart[mylevel].nz;
01821   int ib1 = ia1 + ni1 - 1;
01822   int jb1 = ja1 + nj1 - 1;
01823   int kb1 = ka1 + nk1 - 1;
01824   int i, j, k, i1, j1, k1, i2, j2, k2;
01825   int i1off, j1off, k1off;
01826 
01827   float q2h_sum, cjk;
01828 
01829   for (k2 = mka;  k2 <= mkb;  k2++) {
01830     k1 = k2 * 2;
01831     for (j2 = mja;  j2 <= mjb;  j2++) {
01832       j1 = j2 * 2;
01833       for (i2 = mia;  i2 <= mib;  i2++) {
01834         i1 = i2 * 2;
01835 
01836         q2h_sum = 0;
01837         for (k = 0;  k < nstencil;  k++) {
01838           k1off = k1 + offset[k];
01839           if (k1off < ka1) {
01840             if (p.ispz) do { k1off += nk1; } while (k1off < ka1);
01841             else continue;
01842           }
01843           else if (k1off > kb1) {
01844             if (p.ispz) do { k1off -= nk1; } while (k1off > kb1);
01845             else break;
01846           }
01847           for (j = 0;  j < nstencil;  j++) {
01848             j1off = j1 + offset[j];
01849             if (j1off < ja1) {
01850               if (p.ispy) do { j1off += nj1; } while (j1off < ja1);
01851               else continue;
01852             }
01853             else if (j1off > jb1) {
01854               if (p.ispy) do { j1off -= nj1; } while (j1off > jb1);
01855               else break;
01856             }
01857             cjk = phi[j] * phi[k];
01858             for (i = 0;  i < nstencil;  i++) {
01859               i1off = i1 + offset[i];
01860               if (i1off < ia1) {
01861                 if (p.ispx) do { i1off += ni1; } while (i1off < ia1);
01862                 else continue;
01863               }
01864               else if (i1off > ib1) {
01865                 if (p.ispx) do { i1off -= ni1; } while (i1off > ib1);
01866                 else break;
01867               }
01868               q2h_sum += qhread.get(i1off,j1off,k1off) * phi[i] * cjk;
01869             }
01870           }
01871         } // end loop over finer grid stencil
01872 
01873         q2hacc.accumulate(i2,j2,k2) += q2h_sum;
01874       }
01875     }
01876   } // end loop over each coarser grid point
01877 
01878   //
01879   // end of compute work
01880   //
01881   q2hread = q2hacc.syncToRead();  // let consumers read 2h-level charge
01882 #ifdef MSM_DEBUG
01883   CkPrintf("Restriction compute %d exiting, PE %d\n", thisIndex, CkMyPe());
01884 #endif
01885 } // loop forever
01886 }
01887 
01888 
01889 MsmMsaProlongation::MsmMsaProlongation(int level, MsmMsaGrid &eh_, MsmMsaGrid &e2h_) 
01890   : eh(eh_), e2h(e2h_)
01891 {
01892   mylevel = level;
01893   const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
01894       CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
01895   // find the points on my sub-grid of (mylevel+1) grid
01896   const Int3 &len = p.grid_len[mylevel+1];
01897   const Int3 &idstart = p.grid_idstart[mylevel+1];
01898   mia = idstart.nx + thisIndex.x * p.num_points_per_chare.nx;
01899   mja = idstart.ny + thisIndex.y * p.num_points_per_chare.ny;
01900   mka = idstart.nz + thisIndex.z * p.num_points_per_chare.nz;
01901   mib = mia + p.num_points_per_chare.nx - 1;
01902   mjb = mja + p.num_points_per_chare.ny - 1;
01903   mkb = mka + p.num_points_per_chare.nz - 1;
01904   if (mib > idstart.nx + len.nx - 1) {
01905     mib = idstart.nx + len.nx - 1;
01906   }
01907   if (mjb > idstart.ny + len.ny - 1) {
01908     mjb = idstart.ny + len.ny - 1;
01909   }
01910   if (mkb > idstart.nz + len.nz - 1) {
01911     mkb = idstart.nz + len.nz - 1;
01912   }
01913   msa_setup = 0;
01914 }
01915 
01916 
01917 void MsmMsaProlongation::compute()
01918 {
01919 #ifdef MSM_DEBUG
01920   CkPrintf("Prolongation compute %d, PE %d\n", thisIndex, CkMyPe());
01921 #endif
01922   const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
01923       CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
01924   if ( ! msa_setup) {
01925     eh.enroll(p.num_clients_eh[mylevel]);
01926     e2h.enroll(p.num_clients_eh[mylevel+1]);
01927     ehacc = eh.getInitialAccum();
01928     ehread = ehacc.syncToRead();  // immediately sync to read phase
01929     e2hacc = e2h.getInitialAccum();
01930     e2hread = e2hacc.syncToRead();  // immediately sync to read phase
01931     msa_setup = 1;
01932   }
01933 for (;;) {  // loop forever
01934   ehacc = ehread.syncToEAccum();  // we accumulate h-level potential
01935   e2hacc = e2hread.syncToEAccum();  // let producers accumulate 2h-level
01936   e2hread = e2hacc.syncToRead();  // then we read 2h-level potentials
01937 #ifdef MSM_DEBUG
01938   CkPrintf("Prolongation compute %d doing work, PE %d\n", thisIndex, CkMyPe());
01939 #endif
01940   //
01941   // here is the compute work
01942   //
01943 
01944   const int nstencil = Nstencil[p.approx];
01945   const int *offset = IndexOffset[p.approx];
01946   const float *phi = PhiStencil[p.approx];
01947 
01948   int ni1 = p.grid_len[mylevel].nx;
01949   int nj1 = p.grid_len[mylevel].ny;
01950   int nk1 = p.grid_len[mylevel].nz;
01951   int ia1 = p.grid_idstart[mylevel].nx;
01952   int ja1 = p.grid_idstart[mylevel].ny;
01953   int ka1 = p.grid_idstart[mylevel].nz;
01954   int ib1 = ia1 + ni1 - 1;
01955   int jb1 = ja1 + nj1 - 1;
01956   int kb1 = ka1 + nk1 - 1;
01957   int i, j, k, i1, j1, k1, i2, j2, k2;
01958   int i1off, j1off, k1off;
01959 
01960   float cjk;
01961 
01962   for (k2 = mka;  k2 <= mkb;  k2++) {
01963     k1 = k2 * 2;
01964     for (j2 = mja;  j2 <= mjb;  j2++) {
01965       j1 = j2 * 2;
01966       for (i2 = mia;  i2 <= mib;  i2++) {
01967         i1 = i2 * 2;
01968 
01969         for (k = 0;  k < nstencil;  k++) {
01970           k1off = k1 + offset[k];
01971           if (k1off < ka1) {
01972             if (p.ispz) do { k1off += nk1; } while (k1off < ka1);
01973             else continue;
01974           }
01975           else if (k1off > kb1) {
01976             if (p.ispz) do { k1off -= nk1; } while (k1off > kb1);
01977             else break;
01978           }
01979           for (j = 0;  j < nstencil;  j++) {
01980             j1off = j1 + offset[j];
01981             if (j1off < ja1) {
01982               if (p.ispy) do { j1off += nj1; } while (j1off < ja1);
01983               else continue;
01984             }
01985             else if (j1off > jb1) {
01986               if (p.ispy) do { j1off -= nj1; } while (j1off > jb1);
01987               else break;
01988             }
01989             cjk = phi[j] * phi[k];
01990             for (i = 0;  i < nstencil;  i++) {
01991               i1off = i1 + offset[i];
01992               if (i1off < ia1) {
01993                 if (p.ispx) do { i1off += ni1; } while (i1off < ia1);
01994                 else continue;
01995               }
01996               else if (i1off > ib1) {
01997                 if (p.ispx) do { i1off -= ni1; } while (i1off > ib1);
01998                 else break;
01999               }
02000               ehacc.accumulate(i1off,j1off,k1off) +=
02001                 e2hread(i2,j2,k2) * phi[i] * cjk;
02002             }
02003           }
02004         } // end loop over finer grid stencil
02005 
02006       }
02007     }
02008   } // end loop over each coarser grid point
02009 
02010   //
02011   // end of compute work
02012   //
02013   ehread = ehacc.syncToRead();  // let consumers read h-level potential
02014 #ifdef MSM_DEBUG
02015   CkPrintf("Prolongation compute %d exiting, PE %d\n", thisIndex, CkMyPe());
02016 #endif
02017 } // loop forever
02018 }
02019 
02020 
02021 MsmMsaEnergy::MsmMsaEnergy(MsmMsaGrid &qh_, MsmMsaGrid &eh_) : qh(qh_), eh(eh_)
02022 {
02023   const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
02024       CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
02025   int npoints = p.num_points_per_chare.nx * 
02026     p.num_points_per_chare.ny * p.num_points_per_chare.nz;
02027   qhbuffer = new float[npoints];
02028   int ni = p.grid_len[0].nx;
02029   int nj = p.grid_len[0].ny;
02030   int nk = p.grid_len[0].nz;
02031   int nci = ROUNDUP_QUOTIENT(ni, p.num_points_per_chare.nx);
02032   int ncj = ROUNDUP_QUOTIENT(nj, p.num_points_per_chare.ny);
02033   int nck = ROUNDUP_QUOTIENT(nk, p.num_points_per_chare.nz);
02034   mlk = thisIndex / (ncj * nci);
02035   int krem = thisIndex % (ncj * nci);
02036   mlj = krem / nci;
02037   mli = krem % nci;
02038   // find the points on my part of my level's qh and eh grids
02039   mia = p.grid_idstart[0].nx + mli * p.num_points_per_chare.nx;
02040   mja = p.grid_idstart[0].ny + mlj * p.num_points_per_chare.ny;
02041   mka = p.grid_idstart[0].nz + mlk * p.num_points_per_chare.nz;
02042   mib = mia + p.num_points_per_chare.nx - 1;
02043   mjb = mja + p.num_points_per_chare.ny - 1;
02044   mkb = mka + p.num_points_per_chare.nz - 1;
02045   if (mib > p.grid_idstart[0].nx + ni - 1) {
02046     mib = p.grid_idstart[0].nx + ni - 1;
02047   }
02048   if (mjb > p.grid_idstart[0].ny + nj - 1) {
02049     mjb = p.grid_idstart[0].ny + nj - 1;
02050   }
02051   if (mkb > p.grid_idstart[0].nz + nk - 1) {
02052     mkb = p.grid_idstart[0].nz + nk - 1;
02053   }
02054   msa_setup = 0;
02055   reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
02056 }
02057 
02058 MsmMsaEnergy::~MsmMsaEnergy()
02059 {
02060   delete[] qhbuffer;
02061 }
02062 
02063 
02064 void MsmMsaEnergy::compute()
02065 {
02066 #ifdef MSM_DEBUG
02067   CkPrintf("Energy compute %d, PE %d\n", thisIndex, CkMyPe());
02068 #endif
02069   const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
02070       CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
02071   if ( ! msa_setup) {
02072     qh.enroll(p.num_clients_qh[0]);
02073     eh.enroll(p.num_clients_eh[0]);
02074     qhacc = qh.getInitialAccum();
02075     qhread = qhacc.syncToRead();  // immediately sync to read phase
02076     ehacc = eh.getInitialAccum();
02077     ehread = ehacc.syncToRead();
02078     msa_setup = 1;
02079   }
02080 for (;;) {  // loop forever
02081   double sum = 0;
02082   qhacc = qhread.syncToEAccum();  // let producers accumulate charge
02083   qhread = qhacc.syncToRead();  // then we read it
02084   //
02085   // Must be very careful of phase sync order between eh and qh
02086   // qhread sync done must be after enrollment in eh
02087   // (otherwise block gridcutoff)
02088   // but before ehacc sync to read
02089   // (otherwise block prolongation)
02090   //
02091   // Since we can't have both qh and eh open for reading at once,
02092   // we read qh values into local storage
02093   //
02094   int i, j, k, index;
02095   for (index = 0, k = mka;  k <= mkb;  k++) {
02096     for (j = mja;  j <= mjb;  j++) {
02097       for (i = mia;  i <= mib;  i++, index++) {
02098         qhbuffer[index] = qhread.get(i,j,k);
02099       }
02100     }
02101   }
02102   //
02103   // Must do qhread sync done BEFORE ehacc sync to read
02104   //
02105   ehacc = ehread.syncToEAccum();  // let producers accumulate potential
02106   ehread = ehacc.syncToRead();  // then we read it
02107 #ifdef MSM_DEBUG
02108   CkPrintf("Energy compute %d doing work, PE %d\n", thisIndex, CkMyPe());
02109 #endif
02110   for (index = 0, k = mka;  k <= mkb;  k++) {
02111     for (j = mja;  j <= mjb;  j++) {
02112       for (i = mia;  i <= mib;  i++, index++) {
02113         sum += qhbuffer[index] * ehread.get(i,j,k);
02114       }
02115     }
02116   }
02117   //contribute(sizeof(double), &sum, CkReduction::sum_double);
02118   reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += 0.5*sum;
02119   reduction->submit();
02120 #ifdef MSM_DEBUG
02121   CkPrintf("Energy compute %d exiting, PE %d\n", thisIndex, CkMyPe());
02122 #endif
02123 } // loop forever
02124 }
02125 
02126 
02127 #include "ComputeMsmMsaMgr.def.h"
02128 
02129 #endif // CHARM_HAS_MSA
02130 

Generated on Mon Nov 20 01:17:11 2017 for NAMD by  doxygen 1.4.7