ComputeMsm.C

Go to the documentation of this file.
00001 
00007 #include "InfoStream.h"
00008 #include "Node.h"
00009 #include "PDB.h"
00010 #include "PatchMap.h"
00011 #include "PatchMap.inl"
00012 #include "AtomMap.h"
00013 #include "ComputeMsm.h"
00014 #include "PatchMgr.h"
00015 #include "Molecule.h"
00016 #include "ReductionMgr.h"
00017 #include "ComputeMgr.h"
00018 #include "ComputeMgr.decl.h"
00019 #include "Debug.h"
00020 #include "SimParameters.h"
00021 #include "WorkDistrib.h"
00022 #include "Priorities.h"
00023 #include "varsizemsg.h"
00024 //#include "ckmulticast.h"
00025 #include <stdio.h>
00026 #include "MsmMap.h"
00027 
00028 // MSM (multilevel summation method)
00029 // has O(N) algorithmic complexity
00030 
00031 // use multicast reduction of grids from sections of MsmGridCutoff
00032 #define MSM_REDUCE_GRID
00033 //#undef MSM_REDUCE_GRID
00034 
00035 // use the decomposition of grid cutoff to create more work units
00036 #define MSM_GRID_CUTOFF_DECOMP
00037 //#undef MSM_GRID_CUTOFF_DECOMP
00038 
00039 // skip over pairs of blocks that do not actually interact
00040 #define MSM_SKIP_TOO_DISTANT_BLOCKS
00041 //#undef MSM_SKIP_TOO_DISTANT_BLOCKS
00042 
00043 // skip over pairs of blocks whose overlap is beyond nonzero gc sphere
00044 // this search is more expensive than MSM_SKIP_TOO_DISTANT_BLOCKS
00045 // and does not eliminate many block pairs
00046 #define MSM_SKIP_BEYOND_SPHERE
00047 //#undef MSM_SKIP_BEYOND_SPHERE
00048 
00049 // node aware mapping of chare arrays
00050 #define MSM_NODE_MAPPING
00051 //#undef MSM_NODE_MAPPING
00052 
00053 #define MSM_NODE_MAPPING_STATS
00054 #undef MSM_NODE_MAPPING_STATS
00055 
00056 // top of hierarchy calculates smaller blocks of charge to 
00057 // unfolded image blocks of potential, up to the desired block size,
00058 // then sums the unfolded images of potential back into the 
00059 // actual potential block, thereby greatly reducing the number of 
00060 // block pairs that would otherwise be scheduled
00061 #define MSM_FOLD_FACTOR
00062 //#undef MSM_FOLD_FACTOR
00063 
00064 // report timings for compute routines
00065 // for diagnostic purposes only
00066 #define MSM_TIMING
00067 #undef MSM_TIMING
00068 
00069 // report profiling for compute routines
00070 // for diagnostic purposes only
00071 #define MSM_PROFILING
00072 #undef MSM_PROFILING
00073 
00074 // use fixed size grid message
00075 // XXX probably does not work anymore
00076 #define MSM_FIXED_SIZE_GRID_MSG
00077 #undef MSM_FIXED_SIZE_GRID_MSG
00078 
00079 // turn off computation
00080 // for diagnostic purposes only
00081 //#define MSM_COMM_ONLY
00082 
00083 // print diagnostics for memory alignment (for grid cutoff calculation)
00084 // for diagnostic purposes only
00085 #define DEBUG_MEMORY_ALIGNMENT
00086 #undef DEBUG_MEMORY_ALIGNMENT
00087 
00088 
00089 //
00090 // This is the main message that gets passed between compute chares.
00091 // It is used to bundle blocks of charge (sendUp and send to MsmGridCutoff) 
00092 // and blocks of potential (sendAcross, sendDown, and sendPatch).  
00093 //
00094 // Higher priority has a numerically lower value.  
00095 //
00096 // The priorities are set as follows:
00097 //
00098 //   sendUp priority = level+1
00099 //
00100 //   (send to MsmGridCutoff) and sendAcross priority
00101 //       = nlevels + 2*(nlevels - level) - 1
00102 //
00103 //   sendDown and sendPatch priority
00104 //       = nlevels + 2*(nlevels - level)
00105 //
00106 // This puts the priority on going up the hierarchy before going across 
00107 // and puts the priority on finishing the top levels and down before 
00108 // finishing the lower levels.
00109 //
00110 
00111 class GridMsg : public CkMcastBaseMsg, public CMessage_GridMsg {
00112   public:
00113     char *gdata;
00114     int idnum;
00115     int nlower_i;
00116     int nlower_j;
00117     int nlower_k;
00118     int nextent_i;
00119     int nextent_j;
00120     int nextent_k;
00121     int nbytes;
00122     int seqnum;  // sequence number is used for message priority
00123 
00124     // put a grid into an allocated message to be sent
00125     template <class T>
00126     void put(const msm::Grid<T>& g, int id, int seq) {
00127       idnum = id;
00128       nlower_i = g.lower().i;
00129       nlower_j = g.lower().j;
00130       nlower_k = g.lower().k;
00131       nextent_i = g.extent().i;
00132       nextent_j = g.extent().j;
00133       nextent_k = g.extent().k;
00134       nbytes = g.data().len()*sizeof(T);
00135       seqnum = seq;
00136       memcpy(gdata, g.data().buffer(), nbytes);
00137     }
00138 
00139     // get the grid from a received message
00140     template <class T>
00141     void get(msm::Grid<T>& g, int& id, int& seq) {
00142       id = idnum;
00143       g.set(nlower_i, nextent_i, nlower_j, nextent_j,
00144           nlower_k, nextent_k);
00145       seq = seqnum;
00146       ASSERT(g.data().len()*sizeof(T) == nbytes);
00147       memcpy(g.data().buffer(), gdata, nbytes);
00148     }
00149 };
00150 
00151 
00152 class MsmBlockProxyMsg : public CMessage_MsmBlockProxyMsg {
00153   public:
00154     enum { maxlevels = 32 };
00155     char msmBlockProxyData[maxlevels*sizeof(CProxy_MsmBlock)];
00156     int nlevels;
00157 
00158     // put an array into an allocated message to be sent
00159     void put(const msm::Array<CProxy_MsmBlock>& a) {
00160       nlevels = a.len();
00161       if (nlevels > maxlevels) {
00162         NAMD_die("Exceeded maximum number of MSM levels\n");
00163       }
00164       memcpy(msmBlockProxyData, a.buffer(), nlevels*sizeof(CProxy_MsmBlock));
00165     }
00166 
00167     // get the array from a received message
00168     void get(msm::Array<CProxy_MsmBlock>& a) {
00169       a.resize(nlevels);
00170       memcpy(a.buffer(), msmBlockProxyData, nlevels*sizeof(CProxy_MsmBlock));
00171     }
00172 };
00173 
00174 
00175 class MsmC1HermiteBlockProxyMsg : public CMessage_MsmC1HermiteBlockProxyMsg {
00176   public:
00177     enum { maxlevels = 32 };
00178     char msmBlockProxyData[maxlevels*sizeof(CProxy_MsmC1HermiteBlock)];
00179     int nlevels;
00180 
00181     // put an array into an allocated message to be sent
00182     void put(const msm::Array<CProxy_MsmC1HermiteBlock>& a) {
00183       nlevels = a.len();
00184       if (nlevels > maxlevels) {
00185         NAMD_die("Exceeded maximum number of MSM levels\n");
00186       }
00187       memcpy(msmBlockProxyData, a.buffer(),
00188           nlevels*sizeof(CProxy_MsmC1HermiteBlock));
00189     }
00190 
00191     // get the array from a received message
00192     void get(msm::Array<CProxy_MsmC1HermiteBlock>& a) {
00193       a.resize(nlevels);
00194       memcpy(a.buffer(), msmBlockProxyData,
00195           nlevels*sizeof(CProxy_MsmC1HermiteBlock));
00196     }
00197 };
00198 
00199 
00200 class MsmGridCutoffProxyMsg : public CMessage_MsmGridCutoffProxyMsg {
00201   public:
00202     char msmGridCutoffProxyData[sizeof(CProxy_MsmGridCutoff)];
00203 
00204     // put proxy into an allocated message to be sent
00205     void put(const CProxy_MsmGridCutoff *p) {
00206       memcpy(msmGridCutoffProxyData, p, sizeof(CProxy_MsmGridCutoff));
00207     }
00208 
00209     // get the proxy from a received message
00210     void get(CProxy_MsmGridCutoff *p) {
00211       memcpy(p, msmGridCutoffProxyData, sizeof(CProxy_MsmGridCutoff));
00212     }
00213 };
00214 
00215 
00216 class MsmC1HermiteGridCutoffProxyMsg :
00217   public CMessage_MsmC1HermiteGridCutoffProxyMsg
00218 {
00219   public:
00220     char msmGridCutoffProxyData[sizeof(CProxy_MsmC1HermiteGridCutoff)];
00221 
00222     // put proxy into an allocated message to be sent
00223     void put(const CProxy_MsmC1HermiteGridCutoff *p) {
00224       memcpy(msmGridCutoffProxyData, p,
00225           sizeof(CProxy_MsmC1HermiteGridCutoff));
00226     }
00227 
00228     // get the proxy from a received message
00229     void get(CProxy_MsmC1HermiteGridCutoff *p) {
00230       memcpy(p, msmGridCutoffProxyData,
00231           sizeof(CProxy_MsmC1HermiteGridCutoff));
00232     }
00233 };
00234 
00235 
00236 class MsmGridCutoffInitMsg : public CMessage_MsmGridCutoffInitMsg {
00237   public:
00238     msm::BlockIndex qhBlockIndex;  // charge block index
00239     msm::BlockSend ehBlockSend;    // potential block sending address
00240     MsmGridCutoffInitMsg(const msm::BlockIndex& i, const msm::BlockSend& b)
00241       : qhBlockIndex(i), ehBlockSend(b) { }
00242 };
00243 
00244 
00245 class MsmGridCutoffSetupMsg :
00246   public CkMcastBaseMsg, public CMessage_MsmGridCutoffSetupMsg
00247 {
00248   public:
00249     char msmBlockElementProxyData[sizeof(CProxyElement_MsmBlock)];
00250 
00251     // put proxy into an allocated message to be sent
00252     void put(
00253         const CProxyElement_MsmBlock *q //,
00254         ) {
00255       memcpy(msmBlockElementProxyData, q, sizeof(CProxyElement_MsmBlock));
00256     }
00257 
00258     // get the proxy from a received message
00259     void get(
00260         CProxyElement_MsmBlock *q //,
00261         ) {
00262       memcpy(q, msmBlockElementProxyData, sizeof(CProxyElement_MsmBlock));
00263     }
00264 };
00265 
00266 
00267 class MsmC1HermiteGridCutoffSetupMsg :
00268   public CkMcastBaseMsg, public CMessage_MsmC1HermiteGridCutoffSetupMsg
00269 {
00270   public:
00271     char msmBlockElementProxyData[sizeof(CProxyElement_MsmC1HermiteBlock)];
00272 
00273     // put proxy into an allocated message to be sent
00274     void put(
00275         const CProxyElement_MsmC1HermiteBlock *q //,
00276         ) {
00277       memcpy(msmBlockElementProxyData, q,
00278           sizeof(CProxyElement_MsmC1HermiteBlock));
00279     }
00280 
00281     // get the proxy from a received message
00282     void get(
00283         CProxyElement_MsmC1HermiteBlock *q //,
00284         ) {
00285       memcpy(q, msmBlockElementProxyData,
00286           sizeof(CProxyElement_MsmC1HermiteBlock));
00287     }
00288 };
00289 
00290 
00291 // Used only when MSM_TIMING is defined
00292 class MsmTimer : public CBase_MsmTimer {
00293   public:
00294     enum { ANTERP=0, INTERP, RESTRICT, PROLONGATE, GRIDCUTOFF, COMM, MAX };
00295 
00296     MsmTimer() {
00297       for (int i = 0;  i < MAX;  i++)  timing[i] = 0;
00298     }
00299     void done(double tm[], int n) {
00300       for (int i = 0;  i < MAX;  i++)  timing[i] = tm[i];
00301       print();
00302     }
00303     void print() {
00304       CkPrintf("MSM timings:\n");
00305       CkPrintf("   anterpolation   %8.6f sec\n", timing[ANTERP]);
00306       CkPrintf("   interpolation   %8.6f sec\n", timing[INTERP]);
00307       CkPrintf("   restriction     %8.6f sec\n", timing[RESTRICT]);
00308       CkPrintf("   prolongation    %8.6f sec\n", timing[PROLONGATE]);
00309       CkPrintf("   grid cutoff     %8.6f sec\n", timing[GRIDCUTOFF]);
00310       CkPrintf("   communication   %8.6f sec\n", timing[COMM]);
00311     }
00312 
00313     double timing[MAX];
00314 };
00315 
00316 
00317 // Used only when MSM_PROFILING is defined
00318 class MsmProfiler : public CBase_MsmProfiler {
00319   public:
00320     enum { MAX = MSM_MAX_BLOCK_SIZE+1 };
00321 
00322     MsmProfiler() {
00323       for (int i = 0;  i < MAX;  i++)  xloopcnt[i] = 0;
00324     }
00325     void done(int lc[], int n) {
00326       for (int i = 0;  i < MAX;  i++)  xloopcnt[i] = lc[i];
00327       print();
00328     }
00329     void print() {
00330       int sum = 0;
00331       for (int i = 0;  i < MAX;  i++)  sum += xloopcnt[i];
00332       CkPrintf("MSM profiling:\n");
00333       CkPrintf("   total executions of inner loop:   %d\n", sum);
00334       for (int i = 0;  i < MAX;  i++) {
00335         CkPrintf("   executing %d times:   %d  (%5.2f%%)\n",
00336             i, xloopcnt[i], 100*double(xloopcnt[i])/sum);
00337       }
00338     }
00339 
00340     int xloopcnt[MAX];
00341 };
00342 
00343 
00344 // used with PriorityQueue
00345 // when determining work mapped to node or PE
00346 struct WorkIndex {
00347   float work;
00348   int index;
00349   WorkIndex() : work(0), index(0) { }
00350   WorkIndex(float w, int i) : work(w), index(i) { }
00351   int operator<=(const WorkIndex& wn) {
00352     return (work <= wn.work);
00353   }
00354 };
00355 
00356 
00358 //
00359 //  ComputeMsmMgr
00360 //  chare group containing MSM parameters and constants;
00361 //  one chare object per PE
00362 //
00363 
00364 class ComputeMsmMgr : public CBase_ComputeMsmMgr {
00365   friend struct msm::PatchData;
00366   friend class MsmBlock;
00367   //friend class MsmGridCutoff;
00368   friend class MsmBlockMap;
00369   friend class MsmGridCutoffMap;
00370 
00371 public:
00372   ComputeMsmMgr();                    // entry
00373   ~ComputeMsmMgr();
00374 
00375   void initialize(MsmInitMsg *);      // entry with message
00376   void initialize_create();           // entry no message
00377 private:
00378   void initialize2();                 // split in two
00379 public:
00380 
00381   void recvMsmBlockProxy(MsmBlockProxyMsg *);  // entry with message
00382   void recvMsmGridCutoffProxy(MsmGridCutoffProxyMsg *);  // entry with message
00383 
00384   void recvMsmC1HermiteBlockProxy(MsmC1HermiteBlockProxyMsg *);
00385     // entry with message
00386   void recvMsmC1HermiteGridCutoffProxy(MsmC1HermiteGridCutoffProxyMsg *);
00387     // entry with message
00388 
00389   void update(CkQdMsg *);             // entry with message
00390 
00391   void compute(msm::Array<int>& patchIDList);
00392                                       // called by local ComputeMsm object
00393 
00394   void addPotential(GridMsg *);  // entry with message
00395   void doneCompute();  // called by each local patch
00396 
00397 #ifdef MSM_TIMING
00398   void initTiming() {
00399     for (int i = 0;  i < MsmTimer::MAX;  i++)  msmTiming[i] = 0;
00400     cntTiming = 0;
00401   }
00402   // every local object being timed should call this during initialization
00403   void addTiming() {
00404     numTiming++;
00405   }
00406   // object calls before being migrated
00407   void subtractTiming() {
00408     numTiming--;
00409   }
00410   void doneTiming() {
00411     if (++cntTiming >= numTiming) {
00412       CkCallback cb(CkReductionTarget(MsmTimer, done), msmTimer);
00413       contribute(MsmTimer::MAX*sizeof(double), msmTiming,
00414           CkReduction::sum_double, cb);
00415       initTiming();
00416     }
00417   }
00418 #endif
00419 
00420 #ifdef MSM_PROFILING
00421   void initProfiling() {
00422     for (int i = 0;  i < MsmProfiler::MAX;  i++)  xLoopCnt[i] = 0;
00423     cntProfiling = 0;
00424   }
00425   // every local object being profiled should call this during initialization
00426   void addProfiling() {
00427     numProfiling++;
00428   }
00429   // object calls before being migrated
00430   void subtractProfiling() {
00431     numProfiling--;
00432   }
00433   void doneProfiling() {
00434     if (++cntProfiling >= numProfiling) {
00435       CkCallback cb(CkReductionTarget(MsmProfiler, done), msmProfiler);
00436       contribute(MsmProfiler::MAX*sizeof(int), xLoopCnt,
00437           CkReduction::sum_int, cb);
00438       initProfiling();  // reset accumulators for next visit
00439     }
00440   }
00441 #endif
00442 
00443   void setCompute(ComputeMsm *c) { msmCompute = c;  c->setMgr(this); } // local
00444 
00445   msm::PatchPtrArray& patchPtrArray() { return patchPtr; }
00446 
00447   msm::Map& mapData() { return map; }
00448 
00449   int numLevels() const { return nlevels; }
00450 
00451   // sign(n) = -1 if n < 0,  0 if n == 0,  or  1 if n > 0
00452   static inline int sign(int n) {
00453     return (n < 0 ? -1 : (n > 0 ? 1 : 0));
00454   }
00455 
00456 //private:
00457   void setup_hgrid_1d(BigReal len, BigReal& hh, int& nn,
00458       int& ia, int& ib, int isperiodic);
00459   void setup_periodic_blocksize(int& bsize, int n);
00460 
00461   CProxy_ComputeMsmMgr msmProxy;
00462   ComputeMsm *msmCompute;
00463 
00464   msm::Array<CProxy_MsmBlock> msmBlock;
00465   msm::Array<CProxy_MsmC1HermiteBlock> msmC1HermiteBlock;
00466 
00467   CProxy_MsmGridCutoff msmGridCutoff;
00468   CProxy_MsmC1HermiteGridCutoff msmC1HermiteGridCutoff;
00469   int numGridCutoff;  // length of msmGridCutoff chare array
00470 
00471   msm::Map map;
00472 
00473   // find patch by patchID
00474   // array is length number of patches, initialized to NULL
00475   // allocate PatchData for only those patches on this PE
00476   msm::PatchPtrArray patchPtr;
00477 
00478   // allocate subgrid used for receiving message data in addPotential()
00479   // and sending on to PatchData::addPotential()
00480   msm::Grid<Float> subgrid;
00481   msm::Grid<C1Vector> subgrid_c1hermite;
00482 
00483 #ifdef MSM_NODE_MAPPING
00484   msm::Array<int> blockAssign;
00485   msm::Array<int> gcutAssign;
00486   //msm::Array<int> nodecnt;
00487   int blockFlatIndex(int level, int i, int j, int k) {
00488     int n = 0;
00489     for (int l = 0;  l < level;  l++) {
00490       n += map.blockLevel[l].nn();
00491     }
00492     return (n + map.blockLevel[level].flatindex(i,j,k));
00493   }
00494   float calcBlockWork(const msm::BlockDiagram& b) {
00495     // XXX ratio of work for MsmBlock to MsmGridCutoff?
00496     const float scalingFactor = 3;
00497     const int volumeFullBlock = map.bsx[0] * map.bsy[0] * map.bsz[0];
00498     msm::Ivec gn;
00499     if (approx == C1HERMITE) {
00500       gn = map.gc_c1hermite[0].extent();
00501     }
00502     else {
00503       gn = map.gc[0].extent();
00504     }
00505     const int volumeFullCutoff = (map.bsx[0] + gn.i - 1) *
00506       (map.bsy[0] + gn.j - 1) * (map.bsz[0] + gn.k - 1);
00507     msm::Ivec n = b.nrange.extent();
00508     int volumeBlock = n.i * n.j * n.k;
00509     msm::Ivec nc = b.nrangeCutoff.extent();
00510     int volumeCutoff = nc.i * nc.j * nc.k;
00511     return( scalingFactor * (float(volumeBlock) / volumeFullBlock) *
00512         (float(volumeCutoff) / volumeFullCutoff) );
00513   }
00514   float calcGcutWork(const msm::BlockSend& bs) {
00515     const int volumeFullBlock = map.bsx[0] * map.bsy[0] * map.bsz[0];
00516     msm::Ivec n = bs.nrange_wrap.extent();;
00517     int volumeBlock = n.i * n.j * n.k;
00518     return( float(volumeBlock) / volumeFullBlock );
00519   }
00520 #endif
00521 
00522   // sum local virial factors
00523   msm::Grid<Float> gvsum;
00524   int numVirialContrib;
00525   int cntVirialContrib;
00526   enum { VXX=0, VXY, VXZ, VYY, VYZ, VZZ, VMAX };
00527   Float virial[VMAX];
00528 
00529   void initVirialContrib() {
00530     gvsum.reset(0);
00531     cntVirialContrib = 0;
00532   }
00533   void addVirialContrib() {
00534     numVirialContrib++;
00535   }
00536   void subtractVirialContrib() {
00537     numVirialContrib--;
00538   }
00539   void doneVirialContrib() {
00540     if (++cntVirialContrib >= numVirialContrib) {
00541       // reduce all gvsum contributions into virial tensor
00542       for (int n = 0;  n < VMAX;  n++) { virial[n] = 0; }
00543       int ia = gvsum.ia();
00544       int ib = gvsum.ib();
00545       int ja = gvsum.ja();
00546       int jb = gvsum.jb();
00547       int ka = gvsum.ka();
00548       int kb = gvsum.kb();
00549       for (int k = ka;  k <= kb;  k++) {
00550         for (int j = ja;  j <= jb;  j++) {
00551           for (int i = ia;  i <= ib;  i++) {
00552             Float cu = Float(i);
00553             Float cv = Float(j);
00554             Float cw = Float(k);
00555             Float c = gvsum(i,j,k);
00556             Float vx = cu*hufx + cv*hvfx + cw*hwfx;
00557             Float vy = cu*hufy + cv*hvfy + cw*hwfy;
00558             Float vz = cu*hufz + cv*hvfz + cw*hwfz;
00559             virial[VXX] -= c * vx * vx;
00560             virial[VXY] -= c * vx * vy;
00561             virial[VXZ] -= c * vx * vz;
00562             virial[VYY] -= c * vy * vy;
00563             virial[VYZ] -= c * vy * vz;
00564             virial[VZZ] -= c * vz * vz;
00565           }
00566         }
00567       }
00568       initVirialContrib();
00569     }
00570   }
00571 
00572 #ifdef MSM_TIMING
00573   CProxy_MsmTimer msmTimer;
00574   double msmTiming[MsmTimer::MAX];
00575   int numTiming;  // total number of objects being timed
00576   int cntTiming;  // count the objects as they provide timing results
00577   CkCallback *cbTiming;
00578 #endif
00579 
00580 #ifdef MSM_PROFILING
00581   CProxy_MsmProfiler msmProfiler;
00582   int xLoopCnt[MsmProfiler::MAX];
00583   int numProfiling;  // total number of objects being profiled
00584   int cntProfiling;  // count the objects as they provide profiling results
00585   CkCallback *cbProfiling;
00586 #endif
00587 
00588   Vector c, u, v, w;    // rescaled center and lattice vectors
00589   Vector ru, rv, rw;    // row vectors to transform to unit space
00590   int ispu, ispv, ispw; // is periodic along u, v, w?
00591 
00592   Lattice lattice;      // keep local copy of lattice
00593   ScaledPosition smin;  // keep min values for non-periodic dimensions
00594   ScaledPosition smax;  // keep max values for non-periodic dimensions
00595   BigReal gridspacing;  // preferred grid spacing
00596   BigReal padding;      // padding for non-periodic boundaries
00597   BigReal gridScalingFactor;  // scaling for Hermite interpolation
00598   BigReal a;            // cutoff distance
00599   BigReal hxlen, hylen, hzlen;  // first level grid spacings along basis vectors
00600   BigReal hxlen_1, hylen_1, hzlen_1;  // inverses of grid spacings
00601   Vector hu, hv, hw;    // first level grid spacing vectors
00602   Float hufx, hufy, hufz, hvfx, hvfy, hvfz, hwfx, hwfy, hwfz;
00603   int nhx, nhy, nhz;    // number of h spacings that cover cell
00604   int approx;           // ID for approximation
00605   int split;            // ID for splitting
00606   int nlevels;          // number of grid levels
00607   int dispersion;       // calculating dispersion forces?
00608   BigReal gzero;        // self energy factor from splitting
00609 
00610   Vector sglower;       // lower corner of grid in scaled space
00611                         // corresponds to index (0,0,0)
00612 
00613   BigReal shx, shy, shz;  // grid spacings in scaled space
00614   BigReal shx_1, shy_1, shz_1;
00615   Vector sx_shx;          // row vector to transform interpolated force x
00616   Vector sy_shy;          // row vector to transform interpolated force y
00617   Vector sz_shz;          // row vector to transform interpolated force z
00618   Float srx_x, srx_y, srx_z;  // float version of sx_shx
00619   Float sry_x, sry_y, sry_z;  // float version of sy_shy
00620   Float srz_x, srz_y, srz_z;  // float version of sz_shz
00621 
00622   int s_edge;
00623   int omega;
00624 
00625   enum Approx { CUBIC=0, QUINTIC, QUINTIC2,
00626     SEPTIC, SEPTIC3, NONIC, NONIC4, C1HERMITE, NUM_APPROX };
00627 
00628   enum Split { TAYLOR2=0, TAYLOR3, TAYLOR4,
00629     TAYLOR5, TAYLOR6, TAYLOR7, TAYLOR8,
00630     TAYLOR2_DISP, TAYLOR3_DISP, TAYLOR4_DISP, TAYLOR5_DISP,
00631     TAYLOR6_DISP, TAYLOR7_DISP, TAYLOR8_DISP, NUM_SPLIT };
00632 
00633   enum {
00634     // Approximation formulas with up to degree 9 polynomials.
00635     MAX_POLY_DEGREE = 9,
00636 
00637     // Max stencil length for polynomial approximation.
00638     MAX_NSTENCIL_SIZE = (2*MAX_POLY_DEGREE + 1),
00639 
00640     // Max stencil length when skipping zeros
00641     // (almost half entries are zero for interpolating polynomials).
00642     MAX_NSTENCIL_SKIP_ZERO = (MAX_POLY_DEGREE + 2),
00643 
00644     // Number of scalar approximation formulaes
00645     NUM_APPROX_FORMS = (NONIC4 - CUBIC) + 1
00646   };
00647 
00648   // Degree of polynomial basis function Phi.
00649   static const int PolyDegree[NUM_APPROX];
00650 
00651   // The stencil array lengths below.
00652   static const int Nstencil[NUM_APPROX];
00653 
00654   // Index offsets from the stencil-centered grid element, to get
00655   // to the correct contributing grid element.
00656   static const int IndexOffset[NUM_APPROX][MAX_NSTENCIL_SKIP_ZERO];
00657 
00658   // The grid transfer stencils for the non-factored restriction and
00659   // prolongation procedures.
00660   static const Float PhiStencil[NUM_APPROX_FORMS][MAX_NSTENCIL_SKIP_ZERO];
00661 
00662   // Calculate the smoothing function and its derivative:
00663   // g(R) and (d/dR)g(R), where R=r/a.
00664   // Use double precision for calculating the MSM constant weights 
00665   // and coefficients.  The resulting coefficents to be used in 
00666   // the repeatedly called algorithm are stored in single precision.
00667   static void splitting(BigReal& g, BigReal& dg, BigReal r_a, int _split) {
00668     BigReal s = r_a * r_a;  // s = (r/a)^2, assuming 0 <= s <= 1
00669     switch (_split) {
00670       case TAYLOR2:
00671         g = 1 + (s-1)*(-1./2 + (s-1)*(3./8));
00672         dg = (2*r_a)*(-1./2 + (s-1)*(3./4));
00673         break;
00674       case TAYLOR3:
00675         g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16)));
00676         dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16)));
00677         break;
00678       case TAYLOR4:
00679         g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
00680                 + (s-1)*(35./128))));
00681         dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16
00682                 + (s-1)*(35./32))));
00683         break;
00684       case TAYLOR5:
00685         g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
00686                 + (s-1)*(35./128 + (s-1)*(-63./256)))));
00687         dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16
00688                 + (s-1)*(35./32 + (s-1)*(-315./256)))));
00689         break;
00690       case TAYLOR6:
00691         g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
00692                 + (s-1)*(35./128 + (s-1)*(-63./256
00693                     + (s-1)*(231./1024))))));
00694         dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16
00695                 + (s-1)*(35./32 + (s-1)*(-315./256
00696                     + (s-1)*(693./512))))));
00697         break;
00698       case TAYLOR7:
00699         g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
00700             + (s-1)*(35./128 + (s-1)*(-63./256
00701                 + (s-1)*(231./1024 + (s-1)*(-429./2048)))))));
00702         dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16
00703                 + (s-1)*(35./32 + (s-1)*(-315./256
00704                     + (s-1)*(693./512 + (s-1)*(-3003./2048)))))));
00705         break;
00706       case TAYLOR8:
00707         g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
00708                 + (s-1)*(35./128 + (s-1)*(-63./256
00709                     + (s-1)*(231./1024 + (s-1)*(-429./2048
00710                         + (s-1)*(6435./32768))))))));
00711         dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16
00712                 + (s-1)*(35./32 + (s-1)*(-315./256
00713                     + (s-1)*(693./512 + (s-1)*(-3003./2048
00714                         + (s-1)*(6435./4096))))))));
00715         break;
00716       case TAYLOR2_DISP:
00717         g = 1 + (s-1)*(-3 + (s-1)*(6));
00718         dg = (2*r_a)*(-3 + (s-1)*(12));
00719         break;
00720       case TAYLOR3_DISP:
00721         g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10)));
00722         dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30)));
00723         break;
00724       case TAYLOR4_DISP:
00725         g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10 + (s-1)*(15))));
00726         dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30 + (s-1)*(60))));
00727         break;
00728       case TAYLOR5_DISP:
00729         g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10
00730                 + (s-1)*(15 + (s-1)*(-21)))));
00731         dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30
00732                 + (s-1)*(60 + (s-1)*(-105)))));
00733         break;
00734       case TAYLOR6_DISP:
00735         g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10
00736                 + (s-1)*(15 + (s-1)*(-21 + (s-1)*(28))))));
00737         dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30
00738                 + (s-1)*(60 + (s-1)*(-105 + (s-1)*(168))))));
00739         break;
00740       case TAYLOR7_DISP:
00741         g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10
00742                 + (s-1)*(15 + (s-1)*(-21 + (s-1)*(28
00743                       + (s-1)*(-36)))))));
00744         dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30
00745                 + (s-1)*(60 + (s-1)*(-105 + (s-1)*(168
00746                       + (s-1)*(-252)))))));
00747         break;
00748       case TAYLOR8_DISP:
00749         g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10
00750                 + (s-1)*(15 + (s-1)*(-21 + (s-1)*(28
00751                       + (s-1)*(-36 + (s-1)*(45))))))));
00752         dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30
00753                 + (s-1)*(60 + (s-1)*(-105 + (s-1)*(168
00754                       + (s-1)*(-252 + (s-1)*(360))))))));
00755         break;
00756       default:
00757         NAMD_die("Unknown MSM splitting.");
00758     } // switch
00759   } // splitting()
00760 
00761   void stencil_1d(Float phi[], Float t) {
00762     switch (approx) {
00763       case CUBIC:
00764         phi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t);
00765         t--;
00766         phi[1] = (1 - t) * (1 + t - 1.5f * t * t);
00767         t--;
00768         phi[2] = (1 + t) * (1 - t - 1.5f * t * t);
00769         t--;
00770         phi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t);
00771         break;
00772       case QUINTIC:
00773         phi[0] = (1.f/24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t);
00774         t--;
00775         phi[1] = (1-t) * (2-t) * (3-t) * ((1.f/6)
00776             + t * (0.375f - (5.f/24)*t));
00777         t--;
00778         phi[2] = (1-t*t) * (2-t) * (0.5f + t * (0.25f - (5.f/12)*t));
00779         t--;
00780         phi[3] = (1-t*t) * (2+t) * (0.5f - t * (0.25f + (5.f/12)*t));
00781         t--;
00782         phi[4] = (1+t) * (2+t) * (3+t) * ((1.f/6)
00783             - t * (0.375f + (5.f/24)*t));
00784         t--;
00785         phi[5] = (1.f/24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t);
00786         break;
00787       case QUINTIC2:
00788         phi[0] = (1.f/24) * (3-t) * (3-t) * (3-t) * (t-2) * (5*t-8);
00789         t--;
00790         phi[1] = (-1.f/24) * (2-t) * (t-1) * (-48+t*(153+t*(-114+t*25)));
00791         t--;
00792         phi[2] = (1.f/12) * (1-t) * (12+t*(12+t*(-3+t*(-38+t*25))));
00793         t--;
00794         phi[3] = (1.f/12) * (1+t) * (12+t*(-12+t*(-3+t*(38+t*25))));
00795         t--;
00796         phi[4] = (-1.f/24) * (2+t) * (t+1) * (48+t*(153+t*(114+t*25)));
00797         t--;
00798         phi[5] = (1.f/24) * (3+t) * (3+t) * (3+t) * (t+2) * (5*t+8);
00799         break;
00800       case SEPTIC:
00801         phi[0] = (-1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-4)*(t-5)*(t-6);
00802         t--;
00803         phi[1] = (1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-5)*(-6+t*(-20+7*t));
00804         t--;
00805         phi[2] = (-1.f/240)*(t*t-1)*(t-2)*(t-3)*(t-4)*(-10+t*(-12+7*t));
00806         t--;
00807         phi[3] = (1.f/144)*(t*t-1)*(t*t-4)*(t-3)*(-12+t*(-4+7*t));
00808         t--;
00809         phi[4] = (-1.f/144)*(t*t-1)*(t*t-4)*(t+3)*(-12+t*(4+7*t));
00810         t--;
00811         phi[5] = (1.f/240)*(t*t-1)*(t+2)*(t+3)*(t+4)*(-10+t*(12+7*t));
00812         t--;
00813         phi[6] = (-1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(-6+t*(20+7*t));
00814         t--;
00815         phi[7] = (1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+4)*(t+5)*(t+6);
00816         break;
00817       case SEPTIC3:
00818         phi[0] = (3632.f/5) + t*((-7456.f/5) + t*((58786.f/45) + t*(-633
00819                 + t*((26383.f/144) + t*((-22807.f/720) + t*((727.f/240)
00820                       + t*(-89.f/720)))))));
00821         t--;
00822         phi[1] = -440 + t*((25949.f/20) + t*((-117131.f/72) + t*((2247.f/2)
00823                 + t*((-66437.f/144) + t*((81109.f/720) + t*((-727.f/48)
00824                       + t*(623.f/720)))))));
00825         t--;
00826         phi[2] = (138.f/5) + t*((-8617.f/60) + t*((12873.f/40) + t*((-791.f/2)
00827                 + t*((4557.f/16) + t*((-9583.f/80) + t*((2181.f/80)
00828                       + t*(-623.f/240)))))));
00829         t--;
00830         phi[3] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((2569.f/144)
00831                 + t*((-727.f/48) + t*(623.f/144)))));
00832         t--;
00833         phi[4] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((-2569.f/144)
00834                 + t*((-727.f/48) + t*(-623.f/144)))));
00835         t--;
00836         phi[5] = (138.f/5) + t*((8617.f/60) + t*((12873.f/40) + t*((791.f/2)
00837                 + t*((4557.f/16) + t*((9583.f/80) + t*((2181.f/80)
00838                       + t*(623.f/240)))))));
00839         t--;
00840         phi[6] = -440 + t*((-25949.f/20) + t*((-117131.f/72) + t*((-2247.f/2)
00841                 + t*((-66437.f/144) + t*((-81109.f/720) + t*((-727.f/48)
00842                       + t*(-623.f/720)))))));
00843         t--;
00844         phi[7] = (3632.f/5) + t*((7456.f/5) + t*((58786.f/45) + t*(633
00845                 + t*((26383.f/144) + t*((22807.f/720) + t*((727.f/240)
00846                       + t*(89.f/720)))))));
00847         break;
00848       case NONIC:
00849         phi[0] = (-1.f/40320)*(t-8)*(t-7)*(t-6)*(t-5)*(t-5)*(t-4)*(t-3)*
00850           (t-2)*(t-1);
00851         t--;
00852         phi[1] = (1.f/40320)*(t-7)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*
00853           (-8+t*(-35+9*t));
00854         t--;
00855         phi[2] = (-1.f/10080)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*
00856           (-14+t*(-25+9*t));
00857         t--;
00858         phi[3] = (1.f/1440)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*
00859           (-6+t*(-5+3*t));
00860         t--;
00861         phi[4] = (-1.f/2880)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*
00862           (-20+t*(-5+9*t));
00863         t--;
00864         phi[5] = (1.f/2880)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*
00865           (-20+t*(5+9*t));
00866         t--;
00867         phi[6] = (-1.f/1440)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*
00868           (-6+t*(5+3*t));
00869         t--;
00870         phi[7] = (1.f/10080)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*
00871           (-14+t*(25+9*t));
00872         t--;
00873         phi[8] = (-1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*(t+7)*
00874           (-8+t*(35+9*t));
00875         t--;
00876         phi[9] = (1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+5)*(t+6)*
00877           (t+7)*(t+8);
00878         break;
00879       case NONIC4:
00880       { // begin grouping to define local variables
00881         double Tphi[10];
00882         double T=t;
00883         Tphi[0] = 439375./7+T*(-64188125./504+T*(231125375./2016
00884               +T*(-17306975./288+T*(7761805./384+T*(-2895587./640
00885                     +T*(129391./192+T*(-259715./4032+T*(28909./8064
00886                           +T*(-3569./40320)))))))));
00887         T--;
00888         Tphi[1] = -56375+T*(8314091./56+T*(-49901303./288+T*(3763529./32
00889                 +T*(-19648027./384+T*(9469163./640+T*(-545977./192
00890                       +T*(156927./448+T*(-28909./1152
00891                           +T*(3569./4480)))))))));
00892         T--;
00893         Tphi[2] = 68776./7+T*(-1038011./28+T*(31157515./504+T*(-956669./16
00894                 +T*(3548009./96+T*(-2422263./160+T*(197255./48
00895                       +T*(-19959./28+T*(144545./2016
00896                           +T*(-3569./1120)))))))));
00897         T--;
00898         Tphi[3] = -154+T*(12757./12+T*(-230123./72+T*(264481./48
00899                 +T*(-576499./96+T*(686147./160+T*(-96277./48
00900                       +T*(14221./24+T*(-28909./288+T*(3569./480)))))))));
00901         T--;
00902         Tphi[4] = 1+T*T*(-205./144+T*T*(91./192+T*(-6181./320
00903                 +T*(6337./96+T*(-2745./32+T*(28909./576
00904                       +T*(-3569./320)))))));
00905         T--;
00906         Tphi[5] = 1+T*T*(-205./144+T*T*(91./192+T*(6181./320
00907                 +T*(6337./96+T*(2745./32+T*(28909./576
00908                       +T*(3569./320)))))));
00909         T--;
00910         Tphi[6] = -154+T*(-12757./12+T*(-230123./72+T*(-264481./48
00911                 +T*(-576499./96+T*(-686147./160+T*(-96277./48
00912                       +T*(-14221./24+T*(-28909./288+T*(-3569./480)))))))));
00913         T--;
00914         Tphi[7] = 68776./7+T*(1038011./28+T*(31157515./504+T*(956669./16
00915                 +T*(3548009./96+T*(2422263./160+T*(197255./48
00916                       +T*(19959./28+T*(144545./2016+T*(3569./1120)))))))));
00917         T--;
00918         Tphi[8] = -56375+T*(-8314091./56+T*(-49901303./288+T*(-3763529./32
00919                 +T*(-19648027./384+T*(-9469163./640+T*(-545977./192
00920                       +T*(-156927./448+T*(-28909./1152
00921                           +T*(-3569./4480)))))))));
00922         T--;
00923         Tphi[9] = 439375./7+T*(64188125./504+T*(231125375./2016
00924               +T*(17306975./288+T*(7761805./384+T*(2895587./640
00925                     +T*(129391./192+T*(259715./4032+T*(28909./8064
00926                           +T*(3569./40320)))))))));
00927         for (int i=0;  i < 10;  i++) {
00928           phi[i] = Float(Tphi[i]);
00929         }
00930       } // end grouping to define local variables
00931         break;
00932       default:
00933         NAMD_die("Unknown MSM approximation.");
00934     } // switch
00935   } // stencil_1d()
00936 
00937   void d_stencil_1d(Float dphi[], Float phi[], Float t, Float h_1) {
00938     switch (approx) {
00939       case CUBIC:
00940         phi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t);
00941         dphi[0] = (1.5f * t - 2) * (2 - t) * h_1;
00942         t--;
00943         phi[1] = (1 - t) * (1 + t - 1.5f * t * t);
00944         dphi[1] = (-5 + 4.5f * t) * t * h_1;
00945         t--;
00946         phi[2] = (1 + t) * (1 - t - 1.5f * t * t);
00947         dphi[2] = (-5 - 4.5f * t) * t * h_1;
00948         t--;
00949         phi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t);
00950         dphi[3] = (1.5f * t + 2) * (2 + t) * h_1;
00951         break;
00952       case QUINTIC:
00953         phi[0] = (1.f/24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t);
00954         dphi[0] = ((-1.f/24) * ((3-t) * (3-t) * (14 + t * (-14 + 3*t))
00955               + 2 * (1-t) * (2-t) * (3-t) * (4-t))) * h_1;
00956         t--;
00957         phi[1] = (1-t) * (2-t) * (3-t) * ((1.f/6)
00958             + t * (0.375f - (5.f/24)*t));
00959         dphi[1] = (-((1.f/6) + t * (0.375f - (5.f/24)*t)) *
00960             (11 + t * (-12 + 3*t)) + (1-t) * (2-t) * (3-t) *
00961             (0.375f - (5.f/12)*t)) * h_1;
00962         t--;
00963         phi[2] = (1-t*t) * (2-t) * (0.5f + t * (0.25f - (5.f/12)*t));
00964         dphi[2] = (-(0.5f + t * (0.25f - (5.f/12)*t)) * (1 + t * (4 - 3*t))
00965             + (1-t*t) * (2-t) * (0.25f - (5.f/6)*t)) * h_1;
00966         t--;
00967         phi[3] = (1-t*t) * (2+t) * (0.5f - t * (0.25f + (5.f/12)*t));
00968         dphi[3] = ((0.5f + t * (-0.25f - (5.f/12)*t)) * (1 + t * (-4 - 3*t))
00969             - (1-t*t) * (2+t) * (0.25f + (5.f/6)*t)) * h_1;
00970         t--;
00971         phi[4] = (1+t) * (2+t) * (3+t) * ((1.f/6)
00972             - t * (0.375f + (5.f/24)*t));
00973         dphi[4] = (((1.f/6) + t * (-0.375f - (5.f/24)*t)) *
00974             (11 + t * (12 + 3*t)) - (1+t) * (2+t) * (3+t) *
00975             (0.375f + (5.f/12)*t)) * h_1;
00976         t--;
00977         phi[5] = (1.f/24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t);
00978         dphi[5] = ((1.f/24) * ((3+t) * (3+t) * (14 + t * (14 + 3*t))
00979               + 2 * (1+t) * (2+t) * (3+t) * (4+t))) * h_1;
00980         break;
00981       case QUINTIC2:
00982         phi[0] = (1.f/24) * (3-t) * (3-t) * (3-t) * (t-2) * (5*t-8);
00983         dphi[0] = ((1.f/24) * (3-t) * (3-t) * ((3-t)*(5*t-8)
00984               - 3*(t-2)*(5*t-8) + 5*(t-2)*(3-t))) * h_1;
00985         t--;
00986         phi[1] = (-1.f/24) * (2-t) * (t-1) * (-48+t*(153+t*(-114+t*25)));
00987         dphi[1] = ((-1.f/24) * ((2-t)*(-48+t*(153+t*(-114+t*25)))
00988               - (t-1)* (-48+t*(153+t*(-114+t*25)))
00989               + (2-t)*(t-1)*(153+t*(-228+t*75)))) * h_1;
00990         t--;
00991         phi[2] = (1.f/12) * (1-t) * (12+t*(12+t*(-3+t*(-38+t*25))));
00992         dphi[2] = ((1.f/12) * (-(12+t*(12+t*(-3+t*(-38+t*25))))
00993               + (1-t)*(12+t*(-6+t*(-114+t*100))))) * h_1;
00994         t--;
00995         phi[3] = (1.f/12) * (1+t) * (12+t*(-12+t*(-3+t*(38+t*25))));
00996         dphi[3] = ((1.f/12) * ((12+t*(-12+t*(-3+t*(38+t*25))))
00997               + (1+t)*(-12+t*(-6+t*(114+t*100))))) * h_1;
00998         t--;
00999         phi[4] = (-1.f/24) * (2+t) * (t+1) * (48+t*(153+t*(114+t*25)));
01000         dphi[4] = ((-1.f/24) * ((2+t)*(48+t*(153+t*(114+t*25)))
01001               + (t+1)* (48+t*(153+t*(114+t*25)))
01002               + (2+t)*(t+1)*(153+t*(228+t*75)))) * h_1;
01003         t--;
01004         phi[5] = (1.f/24) * (3+t) * (3+t) * (3+t) * (t+2) * (5*t+8);
01005         dphi[5] = ((1.f/24) * (3+t) * (3+t) * ((3+t)*(5*t+8)
01006               + 3*(t+2)*(5*t+8) + 5*(t+2)*(3+t))) * h_1;
01007         break;
01008       case SEPTIC:
01009         phi[0] = (-1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-4)*(t-5)*(t-6);
01010         dphi[0] = (-1.f/720)*(t-4)*(-1944+t*(3644+t*(-2512+t*(807
01011                   +t*(-122+t*7))))) * h_1;
01012         t--;
01013         phi[1] = (1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-5)*(-6+t*(-20+7*t));
01014         dphi[1] = (1.f/720)*(756+t*(-9940+t*(17724+t*(-12740+t*(4445
01015                     +t*(-750+t*49)))))) * h_1;
01016         t--;
01017         phi[2] = (-1.f/240)*(t*t-1)*(t-2)*(t-3)*(t-4)*(-10+t*(-12+7*t));
01018         dphi[2] = (-1.f/240)*(-28+t*(1260+t*(-756+t*(-1260+t*(1365
01019                     +t*(-450+t*49)))))) * h_1;
01020         t--;
01021         phi[3] = (1.f/144)*(t*t-1)*(t*t-4)*(t-3)*(-12+t*(-4+7*t));
01022         dphi[3] = (1.f/144)*t*(-560+t*(84+t*(644+t*(-175
01023                   +t*(-150+t*49))))) * h_1;
01024         t--;
01025         phi[4] = (-1.f/144)*(t*t-1)*(t*t-4)*(t+3)*(-12+t*(4+7*t));
01026         dphi[4] = (-1.f/144)*t*(560+t*(84+t*(-644+t*(-175
01027                   +t*(150+t*49))))) * h_1;
01028         t--;
01029         phi[5] = (1.f/240)*(t*t-1)*(t+2)*(t+3)*(t+4)*(-10+t*(12+7*t));
01030         dphi[5] = (1.f/240)*(-28+t*(-1260+t*(-756+t*(1260+t*(1365
01031                     +t*(450+t*49)))))) * h_1;
01032         t--;
01033         phi[6] = (-1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(-6+t*(20+7*t));
01034         dphi[6] = (-1.f/720)*(756+t*(9940+t*(17724+t*(12740+t*(4445
01035                     +t*(750+t*49)))))) * h_1;
01036         t--;
01037         phi[7] = (1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+4)*(t+5)*(t+6);
01038         dphi[7] = (1.f/720)*(t+4)*(1944+t*(3644+t*(2512+t*(807
01039                   +t*(122+t*7))))) * h_1;
01040         break;
01041       case SEPTIC3:
01042         phi[0] = (3632.f/5) + t*((-7456.f/5) + t*((58786.f/45) + t*(-633
01043                 + t*((26383.f/144) + t*((-22807.f/720) + t*((727.f/240)
01044                       + t*(-89.f/720)))))));
01045         dphi[0] = ((-7456.f/5) + t*((117572.f/45) + t*(-1899
01046                 + t*((26383.f/36) + t*((-22807.f/144) + t*((727.f/40)
01047                       + t*(-623.f/720))))))) * h_1;
01048         t--;
01049         phi[1] = -440 + t*((25949.f/20) + t*((-117131.f/72) + t*((2247.f/2)
01050                 + t*((-66437.f/144) + t*((81109.f/720) + t*((-727.f/48)
01051                       + t*(623.f/720)))))));
01052         dphi[1] = ((25949.f/20) + t*((-117131.f/36) + t*((6741.f/2)
01053                 + t*((-66437.f/36) + t*((81109.f/144) + t*((-727.f/8)
01054                       + t*(4361.f/720))))))) * h_1;
01055         t--;
01056         phi[2] = (138.f/5) + t*((-8617.f/60) + t*((12873.f/40) + t*((-791.f/2)
01057                 + t*((4557.f/16) + t*((-9583.f/80) + t*((2181.f/80)
01058                       + t*(-623.f/240)))))));
01059         dphi[2] = ((-8617.f/60) + t*((12873.f/20) + t*((-2373.f/2)
01060                 + t*((4557.f/4) + t*((-9583.f/16) + t*((6543.f/40)
01061                       + t*(-4361.f/240))))))) * h_1;
01062         t--;
01063         phi[3] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((2569.f/144)
01064                 + t*((-727.f/48) + t*(623.f/144)))));
01065         dphi[3] = (t*((-49.f/18) + t*t*((-959.f/36) + t*((12845.f/144)
01066                   + t*((-727.f/8) + t*(4361.f/144)))))) * h_1;
01067         t--;
01068         phi[4] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((-2569.f/144)
01069                 + t*((-727.f/48) + t*(-623.f/144)))));
01070         dphi[4] = (t*((-49.f/18) + t*t*((-959.f/36) + t*((-12845.f/144)
01071                   + t*((-727.f/8) + t*(-4361.f/144)))))) * h_1;
01072         t--;
01073         phi[5] = (138.f/5) + t*((8617.f/60) + t*((12873.f/40) + t*((791.f/2)
01074                 + t*((4557.f/16) + t*((9583.f/80) + t*((2181.f/80)
01075                       + t*(623.f/240)))))));
01076         dphi[5] = ((8617.f/60) + t*((12873.f/20) + t*((2373.f/2)
01077                 + t*((4557.f/4) + t*((9583.f/16) + t*((6543.f/40)
01078                       + t*(4361.f/240))))))) * h_1;
01079         t--;
01080         phi[6] = -440 + t*((-25949.f/20) + t*((-117131.f/72) + t*((-2247.f/2)
01081                 + t*((-66437.f/144) + t*((-81109.f/720) + t*((-727.f/48)
01082                       + t*(-623.f/720)))))));
01083         dphi[6] = ((-25949.f/20) + t*((-117131.f/36) + t*((-6741.f/2)
01084                 + t*((-66437.f/36) + t*((-81109.f/144) + t*((-727.f/8)
01085                       + t*(-4361.f/720))))))) * h_1;
01086         t--;
01087         phi[7] = (3632.f/5) + t*((7456.f/5) + t*((58786.f/45) + t*(633
01088                 + t*((26383.f/144) + t*((22807.f/720) + t*((727.f/240)
01089                       + t*(89.f/720)))))));
01090         dphi[7] = ((7456.f/5) + t*((117572.f/45) + t*(1899
01091                 + t*((26383.f/36) + t*((22807.f/144) + t*((727.f/40)
01092                       + t*(623.f/720))))))) * h_1;
01093         break;
01094       case NONIC:
01095         phi[0] = (-1.f/40320)*(t-8)*(t-7)*(t-6)*(t-5)*(t-5)*(t-4)*(t-3)*
01096           (t-2)*(t-1);
01097         dphi[0] = (-1.f/40320)*(t-5)*(-117648+t*(256552+t*(-221416
01098                 +t*(99340+t*(-25261+t*(3667+t*(-283+t*9)))))))*h_1;
01099         t--;
01100         phi[1] = (1.f/40320)*(t-7)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*
01101           (-8+t*(-35+9*t));
01102         dphi[1] = (1.f/40320)*(71856+t*(-795368+t*(1569240+t*(-1357692
01103                   +t*(634725+t*(-172116+t*(27090+t*(-2296+t*81))))))))*h_1;
01104         t--;
01105         phi[2] = (-1.f/10080)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*
01106           (-14+t*(-25+9*t));
01107         dphi[2] = (1.f/10080)*(3384+t*(-69080+t*(55026
01108                 +t*(62580+t*(-99225+t*(51660+t*(-13104+t*(1640
01109                           +t*(-81)))))))))*h_1;
01110         t--;
01111         phi[3] = (1.f/1440)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*
01112           (-6+t*(-5+3*t));
01113         dphi[3] = (1.f/1440)*(72+t*(-6344+t*(2070
01114                 +t*(7644+t*(-4725+t*(-828+t*(1260+t*(-328+t*27))))))))*h_1;
01115         t--;
01116         phi[4] = (-1.f/2880)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*
01117           (-20+t*(-5+9*t));
01118         dphi[4] = (-1.f/2880)*t*(10792+t*(-972+t*(-12516
01119                 +t*(2205+t*(3924+t*(-882+t*(-328+t*81)))))))*h_1;
01120         t--;
01121         phi[5] = (1.f/2880)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*
01122           (-20+t*(5+9*t));
01123         dphi[5] = (1.f/2880)*t*(-10792+t*(-972+t*(12516
01124                 +t*(2205+t*(-3924+t*(-882+t*(328+t*81)))))))*h_1;
01125         t--;
01126         phi[6] = (-1.f/1440)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*
01127           (-6+t*(5+3*t));
01128         dphi[6] = (1.f/1440)*(-72+t*(-6344+t*(-2070
01129                 +t*(7644+t*(4725+t*(-828+t*(-1260+t*(-328+t*(-27)))))))))*h_1;
01130         t--;
01131         phi[7] = (1.f/10080)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*
01132           (-14+t*(25+9*t));
01133         dphi[7] = (1.f/10080)*(-3384+t*(-69080+t*(-55026
01134                 +t*(62580+t*(99225+t*(51660+t*(13104+t*(1640+t*81))))))))*h_1;
01135         t--;
01136         phi[8] = (-1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*(t+7)*
01137           (-8+t*(35+9*t));
01138         dphi[8] = (-1.f/40320)*(71856+t*(795368+t*(1569240
01139                 +t*(1357692+t*(634725+t*(172116+t*(27090+t*(2296
01140                           +t*81))))))))*h_1;
01141         t--;
01142         phi[9] = (1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+5)*(t+6)*
01143           (t+7)*(t+8);
01144         dphi[9] = (1.f/40320)*(t+5)*(117648+t*(256552+t*(221416
01145                 +t*(99340+t*(25261+t*(3667+t*(283+t*9)))))))*h_1;
01146         break;
01147       case NONIC4:
01148       { // begin grouping to define local variables
01149         double Tphi[10], Tdphi[10];
01150         double T=t;
01151         Tphi[0] = 439375./7+T*(-64188125./504+T*(231125375./2016
01152               +T*(-17306975./288+T*(7761805./384+T*(-2895587./640
01153                     +T*(129391./192+T*(-259715./4032+T*(28909./8064
01154                           +T*(-3569./40320)))))))));
01155         Tdphi[0] = (-64188125./504+T*(231125375./1008
01156               +T*(-17306975./96+T*(7761805./96+T*(-2895587./128
01157                     +T*(129391./32+T*(-259715./576+T*(28909./1008
01158                           +T*(-3569./4480))))))))) * h_1;
01159         T--;
01160         Tphi[1] = -56375+T*(8314091./56+T*(-49901303./288+T*(3763529./32
01161                 +T*(-19648027./384+T*(9469163./640+T*(-545977./192
01162                       +T*(156927./448+T*(-28909./1152
01163                           +T*(3569./4480)))))))));
01164         Tdphi[1] = (8314091./56+T*(-49901303./144+T*(11290587./32
01165                 +T*(-19648027./96+T*(9469163./128+T*(-545977./32
01166                       +T*(156927./64+T*(-28909./144
01167                           +T*(32121./4480))))))))) * h_1;
01168         T--;
01169         Tphi[2] = 68776./7+T*(-1038011./28+T*(31157515./504+T*(-956669./16
01170                 +T*(3548009./96+T*(-2422263./160+T*(197255./48
01171                       +T*(-19959./28+T*(144545./2016
01172                           +T*(-3569./1120)))))))));
01173         Tdphi[2] = (-1038011./28+T*(31157515./252+T*(-2870007./16
01174                 +T*(3548009./24+T*(-2422263./32+T*(197255./8
01175                       +T*(-19959./4+T*(144545./252
01176                           +T*(-32121./1120))))))))) * h_1;
01177         T--;
01178         Tphi[3] = -154+T*(12757./12+T*(-230123./72+T*(264481./48
01179                 +T*(-576499./96+T*(686147./160+T*(-96277./48
01180                       +T*(14221./24+T*(-28909./288+T*(3569./480)))))))));
01181         Tdphi[3] = (12757./12+T*(-230123./36+T*(264481./16
01182                 +T*(-576499./24+T*(686147./32+T*(-96277./8
01183                       +T*(99547./24+T*(-28909./36
01184                           +T*(10707./160))))))))) * h_1;
01185         T--;
01186         Tphi[4] = 1+T*T*(-205./144+T*T*(91./192+T*(-6181./320
01187                 +T*(6337./96+T*(-2745./32+T*(28909./576
01188                       +T*(-3569./320)))))));
01189         Tdphi[4] = T*(-205./72+T*T*(91./48+T*(-6181./64
01190                 +T*(6337./16+T*(-19215./32+T*(28909./72
01191                       +T*(-32121./320))))))) * h_1;
01192         T--;
01193         Tphi[5] = 1+T*T*(-205./144+T*T*(91./192+T*(6181./320
01194                 +T*(6337./96+T*(2745./32+T*(28909./576
01195                       +T*(3569./320)))))));
01196         Tdphi[5] = T*(-205./72+T*T*(91./48+T*(6181./64
01197                 +T*(6337./16+T*(19215./32+T*(28909./72
01198                       +T*(32121./320))))))) * h_1;
01199         T--;
01200         Tphi[6] = -154+T*(-12757./12+T*(-230123./72+T*(-264481./48
01201                 +T*(-576499./96+T*(-686147./160+T*(-96277./48
01202                       +T*(-14221./24+T*(-28909./288+T*(-3569./480)))))))));
01203         Tdphi[6] = (-12757./12+T*(-230123./36+T*(-264481./16
01204                 +T*(-576499./24+T*(-686147./32+T*(-96277./8
01205                       +T*(-99547./24+T*(-28909./36
01206                           +T*(-10707./160))))))))) * h_1;
01207         T--;
01208         Tphi[7] = 68776./7+T*(1038011./28+T*(31157515./504+T*(956669./16
01209                 +T*(3548009./96+T*(2422263./160+T*(197255./48
01210                       +T*(19959./28+T*(144545./2016+T*(3569./1120)))))))));
01211         Tdphi[7] = (1038011./28+T*(31157515./252+T*(2870007./16
01212                 +T*(3548009./24+T*(2422263./32+T*(197255./8
01213                       +T*(19959./4+T*(144545./252
01214                           +T*(32121./1120))))))))) * h_1;
01215         T--;
01216         Tphi[8] = -56375+T*(-8314091./56+T*(-49901303./288+T*(-3763529./32
01217                 +T*(-19648027./384+T*(-9469163./640+T*(-545977./192
01218                       +T*(-156927./448+T*(-28909./1152
01219                           +T*(-3569./4480)))))))));
01220         Tdphi[8] = (-8314091./56+T*(-49901303./144+T*(-11290587./32
01221                 +T*(-19648027./96+T*(-9469163./128+T*(-545977./32
01222                       +T*(-156927./64+T*(-28909./144
01223                           +T*(-32121./4480))))))))) * h_1;
01224         T--;
01225         Tphi[9] = 439375./7+T*(64188125./504+T*(231125375./2016
01226               +T*(17306975./288+T*(7761805./384+T*(2895587./640
01227                     +T*(129391./192+T*(259715./4032+T*(28909./8064
01228                           +T*(3569./40320)))))))));
01229         Tdphi[9] = (64188125./504+T*(231125375./1008
01230               +T*(17306975./96+T*(7761805./96+T*(2895587./128
01231                     +T*(129391./32+T*(259715./576+T*(28909./1008
01232                           +T*(3569./4480))))))))) * h_1;
01233         for (int i=0;  i < 10;  i++) {
01234           phi[i] = Float(Tphi[i]);
01235           dphi[i] = Float(Tdphi[i]);
01236         }
01237       } // end grouping to define local variables
01238         break;
01239       default:
01240         NAMD_die("Unknown MSM approximation.");
01241     } // switch
01242   } // d_stencil_1d()
01243 
01244   void stencil_1d_c1hermite(Float phi[], Float psi[], Float t, Float h) {
01245     phi[0] = (1 - t) * (1 - t) * (1 + 2*t);
01246     psi[0] = h * t * (1 - t) * (1 - t);
01247     t--;
01248     phi[1] = (1 + t) * (1 + t) * (1 - 2*t);
01249     psi[1] = h * t * (1 + t) * (1 + t);
01250   }
01251 
01252   void d_stencil_1d_c1hermite(
01253       Float dphi[], Float phi[], Float dpsi[], Float psi[],
01254       Float t, Float h, Float h_1) {
01255     phi[0] = (1 - t) * (1 - t) * (1 + 2*t);
01256     dphi[0] = -6 * t * (1 - t) * h_1;
01257     psi[0] = h * t * (1 - t) * (1 - t);
01258     dpsi[0] = (1 - t) * (1 - 3*t);
01259     t--;
01260     phi[1] = (1 + t) * (1 + t) * (1 - 2*t);
01261     dphi[1] = -6 * t * (1 + t) * h_1;
01262     psi[1] = h * t * (1 + t) * (1 + t);
01263     dpsi[1] = (1 + t) * (1 + 3*t);
01264   }
01265 
01266   static void ndsplitting(BigReal pg[], BigReal s, int n, int _split) {
01267     int k = 0;
01268     if (k == n) return;
01269     if (s <= 1) {
01270       // compute derivatives of smoothed part
01271       switch (_split) {
01272         case TAYLOR2:
01273           pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8));
01274           if (k == n) break;
01275           pg[k++] = -1./2 + (s-1)*(3./4);
01276           if (k == n) break;
01277           pg[k++] = 3./4;
01278           break;
01279         case TAYLOR3:
01280           pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16)));
01281           if (k == n) break;
01282           pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16));
01283           if (k == n) break;
01284           pg[k++] = 3./4 + (s-1)*(-15./8);
01285           if (k == n) break;
01286           pg[k++] = -15./8;
01287           break;
01288         case TAYLOR4:
01289           pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
01290                   + (s-1)*(35./128))));
01291           if (k == n) break;
01292           pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16 + (s-1)*(35./32)));
01293           if (k == n) break;
01294           pg[k++] = 3./4 + (s-1)*(-15./8 + (s-1)*(105./32));
01295           if (k == n) break;
01296           pg[k++] = -15./8 + (s-1)*(105./16);
01297           if (k == n) break;
01298           pg[k++] = 105./16;
01299           break;
01300         case TAYLOR5:
01301           pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
01302                   + (s-1)*(35./128 + (s-1)*(-63./256)))));
01303           if (k == n) break;
01304           pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16 + (s-1)*(35./32
01305                   + (s-1)*(-315./256))));
01306           if (k == n) break;
01307           pg[k++] = 3./4 + (s-1)*(-15./8 + (s-1)*(105./32 + (s-1)*(-315./64)));
01308           if (k == n) break;
01309           pg[k++] = -15./8 + (s-1)*(105./16 + (s-1)*(-945./64));
01310           if (k == n) break;
01311           pg[k++] = 105./16 + (s-1)*(-945./32);
01312           if (k == n) break;
01313           pg[k++] = -945./32;
01314           break;
01315         case TAYLOR6:
01316           pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
01317                   + (s-1)*(35./128 + (s-1)*(-63./256 + (s-1)*(231./1024))))));
01318           if (k == n) break;
01319           pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16 + (s-1)*(35./32
01320                   + (s-1)*(-315./256 + (s-1)*(693./512)))));
01321           if (k == n) break;
01322           pg[k++] = 3./4 + (s-1)*(-15./8 + (s-1)*(105./32 + (s-1)*(-315./64
01323                   + (s-1)*(3465./512))));
01324           if (k == n) break;
01325           pg[k++] = -15./8 + (s-1)*(105./16 + (s-1)*(-945./64
01326                 + (s-1)*(3465./128)));
01327           if (k == n) break;
01328           pg[k++] = 105./16 + (s-1)*(-945./32 + (s-1)*(10395./128));
01329           if (k == n) break;
01330           pg[k++] = -945./32 + (s-1)*(10395./64);
01331           if (k == n) break;
01332           pg[k++] = 10395./64;
01333           break;
01334         case TAYLOR7:
01335           pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
01336                   + (s-1)*(35./128 + (s-1)*(-63./256
01337                       + (s-1)*(231./1024 + (s-1)*(-429./2048)))))));
01338           if (k == n) break;
01339           pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16 + (s-1)*(35./32
01340                   + (s-1)*(-315./256 + (s-1)*(693./512
01341                       + (s-1)*(-3003./2048))))));
01342           if (k == n) break;
01343           pg[k++] = 3./4 + (s-1)*(-15./8 + (s-1)*(105./32 + (s-1)*(-315./64
01344                   + (s-1)*(3465./512 + (s-1)*(-9009./1024)))));
01345           if (k == n) break;
01346           pg[k++] = -15./8 + (s-1)*(105./16 + (s-1)*(-945./64 + (s-1)*(3465./128
01347                   + (s-1)*(-45045./1024))));
01348           if (k == n) break;
01349           pg[k++] = 105./16 + (s-1)*(-945./32 + (s-1)*(10395./128
01350                 + (s-1)*(-45045./256)));
01351           if (k == n) break;
01352           pg[k++] = -945./32 + (s-1)*(10395./64 + (s-1)*(-135135./256));
01353           if (k == n) break;
01354           pg[k++] = 10395./64 + (s-1)*(-135135./128);
01355           if (k == n) break;
01356           pg[k++] = -135135./128;
01357           break;
01358         case TAYLOR8:
01359           pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
01360                   + (s-1)*(35./128 + (s-1)*(-63./256
01361                       + (s-1)*(231./1024 + (s-1)*(-429./2048
01362                           + (s-1)*(6435./32768))))))));
01363           if (k == n) break;
01364           pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16 + (s-1)*(35./32
01365                   + (s-1)*(-315./256 + (s-1)*(693./512
01366                       + (s-1)*(-3003./2048 + (s-1)*(6435./4096)))))));
01367           if (k == n) break;
01368           pg[k++] = 3./4 + (s-1)*(-15./8 + (s-1)*(105./32 + (s-1)*(-315./64
01369                   + (s-1)*(3465./512 + (s-1)*(-9009./1024
01370                       + (s-1)*(45045./4096))))));
01371           if (k == n) break;
01372           pg[k++] = -15./8 + (s-1)*(105./16 + (s-1)*(-945./64 + (s-1)*(3465./128
01373                   + (s-1)*(-45045./1024 + (s-1)*(135135./2048)))));
01374           if (k == n) break;
01375           pg[k++] = 105./16 + (s-1)*(-945./32 + (s-1)*(10395./128
01376                 + (s-1)*(-45045./256 + (s-1)*(675675./2048))));
01377           if (k == n) break;
01378           pg[k++] = -945./32 + (s-1)*(10395./64 + (s-1)*(-135135./256
01379                 + (s-1)*(675675./512)));
01380           if (k == n) break;
01381           pg[k++] = 10395./64 + (s-1)*(-135135./128 + (s-1)*(2027025./512));
01382           if (k == n) break;
01383           pg[k++] = -135135./128 + (s-1)*(2027025./256);
01384           if (k == n) break;
01385           pg[k++] = 2027025./256;
01386           break;
01387         default:
01388           NAMD_die("Unknown MSM splitting.");
01389       }
01390     } // if (s <= 1)
01391     else { // (s > 1)
01392       // compute derivatives of s^(-1/2)
01393       const BigReal s_1 = 1./s;
01394       BigReal s_p = sqrt(s_1);
01395       BigReal p = -0.5;
01396       BigReal _c = 1;
01397       pg[k++] = _c * s_p;
01398       while (k < n) {
01399         s_p *= s_1;
01400         _c *= p;
01401         p -= 1;
01402         pg[k++] = _c * s_p;
01403       }
01404     } // else (s > 1)
01405     // higher derivatives are zero
01406     while (k < n) pg[k++] = 0;
01407   } // ndsplitting()
01408 
01409 
01410   static void gc_c1hermite_elem_accum(C1Matrix& matrix, BigReal _c,
01411       Vector rv, BigReal _a, int _split) {
01412     const BigReal a_1 = 1./_a;
01413     const BigReal a_2 = a_1 * a_1;
01414     const BigReal s = (rv * rv) * a_2;
01415     const BigReal dx = -2 * rv.x * a_2;  // ds/dx
01416     const BigReal dy = -2 * rv.y * a_2;  // ds/dy
01417     const BigReal dz = -2 * rv.z * a_2;  // ds/dz
01418     const BigReal dd = 2 * a_2;  // d^2s/dx^2 = d^2s/dy^2 = d^2s/dz^2
01419     BigReal tmp;
01420     enum { nderiv = C1_VECTOR_SIZE-1 };
01421     BigReal p[nderiv];
01422     Float *g = matrix.melem;
01423 
01424     // multiply entire matrix by this coefficient
01425     _c = _c * a_1;
01426 
01427     // compute derivatives (d/ds)^k of splitting g(s), s=r^2
01428     ndsplitting(p, s, nderiv, _split);
01429 
01430     // weight 0
01431     tmp = _c * p[0];
01432     g[C1INDEX(D000,D000)] += tmp;
01433 
01434     // weight 1
01435     tmp = _c * p[1] * dx;
01436     g[C1INDEX(D100,D000)] += tmp;
01437     g[C1INDEX(D000,D100)] -= tmp;
01438 
01439     tmp = _c * p[1] * dy;
01440     g[C1INDEX(D010,D000)] += tmp;
01441     g[C1INDEX(D000,D010)] -= tmp;
01442 
01443     tmp = _c * p[1] * dz;
01444     g[C1INDEX(D001,D000)] += tmp;
01445     g[C1INDEX(D000,D001)] -= tmp;
01446 
01447     // C1 splitting returns here
01448 
01449     // weight 2
01450     tmp = _c * p[2] * dx * dy;
01451     g[C1INDEX(D110,D000)] += tmp;
01452     g[C1INDEX(D000,D110)] += tmp;
01453     g[C1INDEX(D100,D010)] -= tmp;
01454     g[C1INDEX(D010,D100)] -= tmp;
01455 
01456     tmp = _c * p[2] * dx * dz;
01457     g[C1INDEX(D101,D000)] += tmp;
01458     g[C1INDEX(D000,D101)] += tmp;
01459     g[C1INDEX(D100,D001)] -= tmp;
01460     g[C1INDEX(D001,D100)] -= tmp;
01461 
01462     tmp = _c * p[2] * dy * dz;
01463     g[C1INDEX(D011,D000)] += tmp;
01464     g[C1INDEX(D000,D011)] += tmp;
01465     g[C1INDEX(D010,D001)] -= tmp;
01466     g[C1INDEX(D001,D010)] -= tmp;
01467 
01468     tmp = _c * (p[2] * dx*dx + p[1] * dd);
01469     g[C1INDEX(D100,D100)] -= tmp;
01470     tmp = _c * (p[2] * dy*dy + p[1] * dd);
01471     g[C1INDEX(D010,D010)] -= tmp;
01472     tmp = _c * (p[2] * dz*dz + p[1] * dd);
01473     g[C1INDEX(D001,D001)] -= tmp;
01474 
01475     // C2 splitting returns here
01476     if (_split == TAYLOR2) return;
01477 
01478     // weight 3
01479     tmp = _c * p[3] * dx * dy * dz;
01480     g[C1INDEX(D111,D000)] += tmp;
01481     g[C1INDEX(D110,D001)] -= tmp;
01482     g[C1INDEX(D101,D010)] -= tmp;
01483     g[C1INDEX(D011,D100)] -= tmp;
01484     g[C1INDEX(D100,D011)] += tmp;
01485     g[C1INDEX(D010,D101)] += tmp;
01486     g[C1INDEX(D001,D110)] += tmp;
01487     g[C1INDEX(D000,D111)] -= tmp;
01488 
01489     tmp = _c * (p[3] * dx*dx * dy + p[2] * dd * dy);
01490     g[C1INDEX(D110,D100)] -= tmp;
01491     g[C1INDEX(D100,D110)] += tmp;
01492 
01493     tmp = _c * (p[3] * dx*dx * dz + p[2] * dd * dz);
01494     g[C1INDEX(D101,D100)] -= tmp;
01495     g[C1INDEX(D100,D101)] += tmp;
01496 
01497     tmp = _c * (p[3] * dy*dy * dx + p[2] * dd * dx);
01498     g[C1INDEX(D110,D010)] -= tmp;
01499     g[C1INDEX(D010,D110)] += tmp;
01500 
01501     tmp = _c * (p[3] * dy*dy * dz + p[2] * dd * dz);
01502     g[C1INDEX(D011,D010)] -= tmp;
01503     g[C1INDEX(D010,D011)] += tmp;
01504 
01505     tmp = _c * (p[3] * dz*dz * dx + p[2] * dd * dx);
01506     g[C1INDEX(D101,D001)] -= tmp;
01507     g[C1INDEX(D001,D101)] += tmp;
01508 
01509     tmp = _c * (p[3] * dz*dz * dy + p[2] * dd * dy);
01510     g[C1INDEX(D011,D001)] -= tmp;
01511     g[C1INDEX(D001,D011)] += tmp;
01512 
01513     // C3 splitting returns here
01514     if (_split == TAYLOR3) return;
01515 
01516     // weight 4
01517     tmp = _c * (p[4] * dx*dx * dy * dz + p[3] * dd * dy * dz);
01518     g[C1INDEX(D111,D100)] -= tmp;
01519     g[C1INDEX(D100,D111)] -= tmp;
01520     g[C1INDEX(D110,D101)] += tmp;
01521     g[C1INDEX(D101,D110)] += tmp;
01522 
01523     tmp = _c * (p[4] * dy*dy * dx * dz + p[3] * dd * dx * dz);
01524     g[C1INDEX(D111,D010)] -= tmp;
01525     g[C1INDEX(D010,D111)] -= tmp;
01526     g[C1INDEX(D110,D011)] += tmp;
01527     g[C1INDEX(D011,D110)] += tmp;
01528 
01529     tmp = _c * (p[4] * dz*dz * dx * dy + p[3] * dd * dx * dy);
01530     g[C1INDEX(D111,D001)] -= tmp;
01531     g[C1INDEX(D001,D111)] -= tmp;
01532     g[C1INDEX(D101,D011)] += tmp;
01533     g[C1INDEX(D011,D101)] += tmp;
01534 
01535     tmp = _c * (p[4] * dx*dx * dy*dy + p[3] * dx*dx * dd
01536         + p[3] * dd * dy*dy + p[2] * dd * dd);
01537     g[C1INDEX(D110,D110)] += tmp;
01538     tmp = _c * (p[4] * dx*dx * dz*dz + p[3] * dx*dx * dd
01539         + p[3] * dd * dz*dz + p[2] * dd * dd);
01540     g[C1INDEX(D101,D101)] += tmp;
01541     tmp = _c * (p[4] * dy*dy * dz*dz + p[3] * dy*dy * dd
01542         + p[3] * dd * dz*dz + p[2] * dd * dd);
01543     g[C1INDEX(D011,D011)] += tmp;
01544 
01545     // C4 splitting returns here
01546     if (_split == TAYLOR4) return;
01547 
01548     // weight 5
01549     tmp = _c * (p[5] * dx*dx * dy*dy * dz + p[4] * dx*dx * dd * dz
01550         + p[4] * dd * dy*dy * dz + p[3] * dd * dd * dz);
01551     g[C1INDEX(D111,D110)] += tmp;
01552     g[C1INDEX(D110,D111)] -= tmp;
01553 
01554     tmp = _c * (p[5] * dx*dx * dz*dz * dy + p[4] * dx*dx * dd * dy
01555         + p[4] * dd * dz*dz * dy + p[3] * dd * dd * dy);
01556     g[C1INDEX(D111,D101)] += tmp;
01557     g[C1INDEX(D101,D111)] -= tmp;
01558 
01559     tmp = _c * (p[5] * dy*dy * dz*dz * dx + p[4] * dy*dy * dd * dx
01560         + p[4] * dd * dz*dz * dx + p[3] * dd * dd * dx);
01561     g[C1INDEX(D111,D011)] += tmp;
01562     g[C1INDEX(D011,D111)] -= tmp;
01563 
01564     // C5 splitting returns here
01565     if (_split == TAYLOR5) return;
01566 
01567     // weight 6
01568     tmp = _c * (p[6] * dx*dx * dy*dy * dz*dz + p[5] * dx*dx * dy*dy * dd
01569         + p[5] * dx*dx * dd * dz*dz + p[5] * dd * dy*dy * dz*dz
01570         + p[4] * dx*dx * dd * dd + p[4] * dd * dy*dy * dd
01571         + p[4] * dd * dd * dz*dz + p[3] * dd * dd * dd);
01572     g[C1INDEX(D111,D111)] -= tmp;
01573 
01574     // calculate full matrix for C6 or higher splitting
01575 
01576   } // gc_c1hermite_elem_accum()
01577 
01578 
01579 }; // ComputeMsmMgr
01580 
01581 
01582 // Degree of polynomial basis function Phi.
01583 // For the purpose of finding the stencil width, Hermite interpolation 
01584 // sets this value to 1.
01585 const int ComputeMsmMgr::PolyDegree[NUM_APPROX] = {
01586   3, 5, 5, 7, 7, 9, 9, 1,
01587 };
01588 
01589 // The stencil array lengths below.
01590 const int ComputeMsmMgr::Nstencil[NUM_APPROX] = {
01591   5, 7, 7, 9, 9, 11, 11, 3,
01592 };
01593 
01594 // Index offsets from the stencil-centered grid element, to get
01595 // to the correct contributing grid element.
01596 const int
01597 ComputeMsmMgr::IndexOffset[NUM_APPROX][MAX_NSTENCIL_SKIP_ZERO] = {
01598   // cubic
01599   {-3, -1, 0, 1, 3},
01600 
01601   // quintic C1
01602   {-5, -3, -1, 0, 1, 3, 5},
01603 
01604   // quintic C2  (same as quintic C1)
01605   {-5, -3, -1, 0, 1, 3, 5},
01606 
01607   // septic C1
01608   {-7, -5, -3, -1, 0, 1, 3, 5, 7},
01609 
01610   // septic C3  (same as septic C1)
01611   {-7, -5, -3, -1, 0, 1, 3, 5, 7},
01612 
01613   // nonic C1
01614   {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
01615 
01616   // nonic C4  (same as nonic C1)
01617   {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
01618 
01619   // C1 Hermite
01620   {-1, 0, 1},
01621 };
01622 
01623 // The grid transfer stencils for the non-factored restriction and
01624 // prolongation procedures.
01625 const Float
01626 ComputeMsmMgr::PhiStencil[NUM_APPROX_FORMS][MAX_NSTENCIL_SKIP_ZERO] = {
01627   // cubic
01628   {-1.f/16, 9.f/16, 1, 9.f/16, -1.f/16},
01629 
01630   // quintic C1
01631   {3.f/256, -25.f/256, 75.f/128, 1, 75.f/128, -25.f/256, 3.f/256},
01632 
01633   // quintic C2  (same as quintic C1)
01634   {3.f/256, -25.f/256, 75.f/128, 1, 75.f/128, -25.f/256, 3.f/256},
01635 
01636   // septic C1
01637   { -5.f/2048, 49.f/2048, -245.f/2048, 1225.f/2048, 1, 1225.f/2048,
01638     -245.f/2048, 49.f/2048, -5.f/2048 },
01639 
01640   // septic C3  (same as septic C3)
01641   { -5.f/2048, 49.f/2048, -245.f/2048, 1225.f/2048, 1, 1225.f/2048,
01642     -245.f/2048, 49.f/2048, -5.f/2048 },
01643 
01644   // nonic C1
01645   { 35.f/65536, -405.f/65536, 567.f/16384, -2205.f/16384, 
01646     19845.f/32768, 1, 19845.f/32768, -2205.f/16384, 567.f/16384, 
01647     -405.f/65536, 35.f/65536 },
01648 
01649   // nonic C4  (same as nonic C1)
01650   { 35.f/65536, -405.f/65536, 567.f/16384, -2205.f/16384, 
01651     19845.f/32768, 1, 19845.f/32768, -2205.f/16384, 567.f/16384, 
01652     -405.f/65536, 35.f/65536 },
01653 };
01654 
01655 
01656 // Designates PE assignment for static load balancing of 
01657 // MsmBlock-related arrays
01658 class MsmBlockMap : public CkArrayMap {
01659   private:
01660     ComputeMsmMgr *mgrLocal;
01661     int *penum;
01662     int level;
01663   public:
01664     MsmBlockMap(int lvl) {
01665       mgrLocal = CProxy_ComputeMsmMgr::ckLocalBranch(
01666           CkpvAccess(BOCclass_group).computeMsmMgr);
01667 #ifdef MSM_NODE_MAPPING
01668       penum = mgrLocal->blockAssign.buffer();
01669 #else
01670       penum = 0;
01671 #endif
01672       level = lvl;
01673     }
01674     MsmBlockMap(CkMigrateMessage *m) { }
01675     int registerArray(CkArrayIndex& numElements, CkArrayID aid) {
01676       return 0;
01677     }
01678     int procNum(int /*arrayHdl*/, const CkArrayIndex &idx) {
01679       int *pn = (int *)idx.data();
01680 #ifdef MSM_NODE_MAPPING
01681       int n = mgrLocal->blockFlatIndex(level, pn[0], pn[1], pn[2]);
01682       return penum[n];
01683 #else
01684       return 0;
01685 #endif
01686     }
01687 };
01688 
01689 
01690 // Designates PE assignment for static load balancing of 
01691 // MsmGridCutoff-related arrays
01692 class MsmGridCutoffMap : public CkArrayMap {
01693   private:
01694     int *penum;
01695   public:
01696     MsmGridCutoffMap() {
01697       ComputeMsmMgr *mgrLocal = CProxy_ComputeMsmMgr::ckLocalBranch(
01698           CkpvAccess(BOCclass_group).computeMsmMgr);
01699 #ifdef MSM_NODE_MAPPING
01700       penum = mgrLocal->gcutAssign.buffer();
01701 #else
01702       penum = 0;
01703 #endif
01704     }
01705     int registerArray(CkArrayIndex& numElements, CkArrayID aid) {
01706       return 0;
01707     }
01708     int procNum(int /*arrayHdl*/, const CkArrayIndex &idx) {
01709 #if 1
01710       int n = *((int *)idx.data());
01711 #ifdef MSM_NODE_MAPPING
01712       return penum[n];
01713 #else
01714       return 0;
01715 #endif
01716 #else
01717       return 0;  // XXX to test load balancing
01718 #endif
01719     }
01720 };
01721 
01722 
01723 namespace msm {
01724 
01725   //
01726   // PatchData
01727   //
01728   // Performs anterpolation and interpolation algorithms.
01729   //
01730   // Surround each NAMD patch with enough grid points to perform 
01731   // anterpolation and interpolation without having to do any 
01732   // grid wrapping.  This does not give a partitioning of the 
01733   // MSM finest level grid --- rather, the edges of adjacent 
01734   // PatchData grids will overlap or contain image points along 
01735   // the periodic boundaries.  
01736   //
01737  
01738   struct PatchData {
01739     ComputeMsmMgr *mgr;
01740     Map *map;
01741     PatchDiagram *pd;
01742     AtomCoordArray coord;
01743     ForceArray force;
01744     Grid<Float> qh;
01745     Grid<Float> eh;
01746     Grid<Float> subgrid;
01747     Grid<C1Vector> qh_c1hermite;
01748     Grid<C1Vector> eh_c1hermite;
01749     Grid<C1Vector> subgrid_c1hermite;
01750     BigReal energy;
01751     //BigReal virial[3][3];
01752     int cntRecvs;
01753     int patchID;
01754     int sequence;  // from Compute object for message priority
01755 
01756     AtomCoordArray& coordArray() { return coord; }
01757     ForceArray& forceArray() { return force; }
01758 
01759     PatchData(ComputeMsmMgr *pmgr, int pid);
01760     void init(int natoms);
01761 
01762     void anterpolation();
01763     void sendCharge();
01764     void addPotential(const Grid<Float>& epart);
01765     void interpolation();
01766 
01767     void anterpolationC1Hermite();
01768     void sendChargeC1Hermite();
01769     void addPotentialC1Hermite(const Grid<C1Vector>& epart);
01770     void interpolationC1Hermite();
01771   };
01772 
01773 } // namespace msm
01774 
01775 
01777 //
01778 // MsmGridCutoff
01779 //
01780 // Performs grid cutoff part of the computation.
01781 //
01782 // The grid cutoff part is the most computationally intensive part 
01783 // of MSM.  The templated MsmGridCutoffKernel class takes Vtype 
01784 // for charge and potential data (generalizes to vector for Hermite
01785 // interpolation) and takes Mtype for the pre-computed grid coefficient 
01786 // weights (generalizes to matrix for Hermite interpolation).
01787 //
01788 
01789 template <class Vtype, class Mtype>
01790 class MsmGridCutoffKernel {
01791   public:
01792     ComputeMsmMgr *mgrLocal;     // for quick access to data
01793     msm::Map *map;
01794     msm::BlockIndex qhblockIndex;  // source of charges
01795     msm::BlockSend ehblockSend;    // destination for potentials
01796     int eia, eib, eja, ejb, eka, ekb, eni, enj, enk;  // for "fold factor"
01797     int isfold;  // for "fold factor"
01798     msm::Grid<Vtype> qh;
01799     msm::Grid<Vtype> eh;
01800     msm::Grid<Vtype> ehfold;  // for "fold factor"
01801     const msm::Grid<Mtype> *pgc;
01802     const msm::Grid<Mtype> *pgvc;
01803     int priority;
01804     int sequence;
01805 
01806     MsmGridCutoffKernel() { init(); }
01807 
01808     void init() {
01809       isfold = 0;
01810       mgrLocal = CProxy_ComputeMsmMgr::ckLocalBranch(
01811           CkpvAccess(BOCclass_group).computeMsmMgr);
01812       map = &(mgrLocal->mapData());
01813       mgrLocal->addVirialContrib();
01814 #ifdef MSM_TIMING
01815       mgrLocal->addTiming();
01816 #endif
01817 #ifdef MSM_PROFILING
01818       mgrLocal->addProfiling();
01819 #endif
01820     }
01821 
01822 #ifdef MSM_MIGRATION
01823     void pup(PUP::er& p) {
01824 #ifdef MSM_TIMING
01825       mgrLocal->subtractTiming();
01826 #endif
01827 #ifdef MSM_PROFILING
01828       mgrLocal->subtractProfiling();
01829 #endif
01830       p | qhblockIndex;
01831       p | ehblockSend;
01832       p | eia, p | eib, p | eja, p | ejb, p | eka, p | ekb;
01833       p | eni, p | enj, p | enk;
01834       p | isfold;
01835     }
01836 #endif // MSM_MIGRATION
01837 
01838     void setup(MsmGridCutoffInitMsg *bmsg) {
01839       qhblockIndex = bmsg->qhBlockIndex;
01840       ehblockSend = bmsg->ehBlockSend;
01841       delete bmsg;
01842 
01843       // set message priority
01844       priority = mgrLocal->nlevels
01845         + 2*(mgrLocal->nlevels - ehblockSend.nblock_wrap.level) - 1;
01846       // allocate qh buffer
01847       qh.init(map->blockLevel[qhblockIndex.level](qhblockIndex.n).nrange);
01848       // allocate eh buffer
01849       eh.init(ehblockSend.nrange);
01850       // preprocess "fold factor" if active for this level
01851       if (map->foldfactor[qhblockIndex.level].active) {
01852         // allocate ehfold buffer
01853         ehfold = eh;
01854         // set index range of potentials
01855         eia = eh.ia();
01856         eib = eh.ib();
01857         eja = eh.ja();
01858         ejb = eh.jb();
01859         eka = eh.ka();
01860         ekb = eh.kb();
01861         eni = eh.ni();
01862         enj = eh.nj();
01863         enk = eh.nk();
01864         if (map->blockLevel[qhblockIndex.level].nn() == 1) {
01865           if (map->ispx) { eia = qh.ia();  eib = qh.ib();  eni = qh.ni(); }
01866           if (map->ispy) { eja = qh.ja();  ejb = qh.jb();  enj = qh.nj(); }
01867           if (map->ispz) { eka = qh.ka();  ekb = qh.kb();  enk = qh.nk(); }
01868         }
01869         else {
01870           // find destination block index
01871           int level = qhblockIndex.level;
01872           msm::BlockIndex bn = map->blockOfGridIndex(
01873               ehblockSend.nrange_wrap.lower(), level);
01874           map->wrapBlockIndex(bn);
01875           if (map->ispx) {
01876             eia = bn.n.i * map->bsx[level];
01877             eib = eia + qh.ni() - 1;
01878             eni = qh.ni();
01879           }
01880           if (map->ispy) {
01881             eja = bn.n.j * map->bsy[level];
01882             ejb = eja + qh.nj() - 1;
01883             enj = qh.nj();
01884           }
01885           if (map->ispz) {
01886             eka = bn.n.k * map->bsz[level];
01887             ekb = eka + qh.nk() - 1;
01888             enk = qh.nk();
01889           }
01890         }
01891         isfold = 1;
01892       } // if fold factor
01893     } // setup()
01894 
01895     void setupWeights(
01896         const msm::Grid<Mtype> *ptrgc,
01897         const msm::Grid<Mtype> *ptrgvc
01898         ) {
01899       pgc = ptrgc;
01900       pgvc = ptrgvc;
01901     } // setupWeights()
01902 
01903 
01904     void compute(GridMsg *gmsg) {
01905 #ifdef MSM_TIMING
01906       double startTime, stopTime;
01907       startTime = CkWallTimer();
01908 #endif
01909       //
01910       // receive block of charges
01911       //
01912       int pid;
01913       // qh is resized only the first time, memory allocation persists
01914       gmsg->get(qh, pid, sequence);
01915       delete gmsg;
01916 #ifdef MSM_TIMING
01917       stopTime = CkWallTimer();
01918       mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
01919 #endif
01920 
01921       //
01922       // grid cutoff calculation
01923       // this charge block -> this potential block
01924       //
01925 
01926 #ifdef MSM_TIMING
01927       startTime = stopTime;
01928 #endif
01929       // resets indexing on block
01930       eh.init(ehblockSend.nrange);  // (always have to re-init nrange for eh)
01931       eh.reset(0);
01932       // index range of weights
01933       int gia = pgc->ia();
01934       int gib = pgc->ib();
01935       int gja = pgc->ja();
01936       int gjb = pgc->jb();
01937       int gka = pgc->ka();
01938       int gkb = pgc->kb();
01939       int gni = pgc->ni();
01940       int gnj = pgc->nj();
01941       // index range of charge grid
01942       int qia = qh.ia();
01943       int qib = qh.ib();
01944       int qja = qh.ja();
01945       int qjb = qh.jb();
01946       int qka = qh.ka();
01947       int qkb = qh.kb();
01948       int qni = qh.ni();
01949       int qnj = qh.nj();
01950       // index range of potentials
01951       int ia = eh.ia();
01952       int ib = eh.ib();
01953       int ja = eh.ja();
01954       int jb = eh.jb();
01955       int ka = eh.ka();
01956       int kb = eh.kb();
01957 
01958       int index = 0;
01959 
01960       // access buffers directly
01961       const Mtype *gcbuffer = pgc->data().buffer();
01962       //const Mtype *gvcbuffer = pgvc->data().buffer();
01963       const Vtype *qhbuffer = qh.data().buffer();
01964       Vtype *ehbuffer = eh.data().buffer();
01965       //Vtype *gvsumbuffer = mgrLocal->gvsum.data().buffer();
01966 
01967 #ifndef MSM_COMM_ONLY
01968       // loop over potentials
01969       for (int k = ka;  k <= kb;  k++) {
01970         // clip charges to weights along k
01971         int mka = ( qka >= gka + k ? qka : gka + k );
01972         int mkb = ( qkb <= gkb + k ? qkb : gkb + k );
01973 
01974         for (int j = ja;  j <= jb;  j++) {
01975           // clip charges to weights along j
01976           int mja = ( qja >= gja + j ? qja : gja + j );
01977           int mjb = ( qjb <= gjb + j ? qjb : gjb + j );
01978 
01979           for (int i = ia;  i <= ib;  i++) {
01980             // clip charges to weights along i
01981             int mia = ( qia >= gia + i ? qia : gia + i );
01982             int mib = ( qib <= gib + i ? qib : gib + i );
01983 
01984             // accumulate sum to this eh point
01985             Vtype ehsum = 0;
01986 
01987 #if 0
01988             // loop over charge grid
01989             for (int qk = mka;  qk <= mkb;  qk++) {
01990               int qkoff = (qk - qka) * qnj;
01991               int gkoff = ((qk-k) - gka) * gnj;
01992 
01993               for (int qj = mja;  qj <= mjb;  qj++) {
01994                 int qjkoff = (qkoff + qj - qja) * qni;
01995                 int gjkoff = (gkoff + (qj-j) - gja) * gni;
01996 
01997 // help the vectorizer make reasonable decisions
01998 #if defined(__INTEL_COMPILER)
01999 #pragma vector always 
02000 #endif
02001                 for (int qi = mia;  qi <= mib;  qi++) {
02002                   int qijkoff = qjkoff + qi - qia;
02003                   int gijkoff = gjkoff + (qi-i) - gia;
02004 
02005                   ehsum += gcbuffer[gijkoff] * qhbuffer[qijkoff];
02006                 }
02007               }
02008             } // end loop over charge grid
02009 #else
02010 
02011 #if 0
02012             // loop over charge grid
02013             int nn = mib - mia + 1;
02014             for (int qk = mka;  qk <= mkb;  qk++) {
02015               int qkoff = (qk - qka) * qnj;
02016               int gkoff = ((qk-k) - gka) * gnj;
02017 
02018               for (int qj = mja;  qj <= mjb;  qj++) {
02019                 int qjkoff = (qkoff + qj - qja) * qni;
02020                 int gjkoff = (gkoff + (qj-j) - gja) * gni;
02021 
02022                 const Float *qbuf = qhbuffer + (qjkoff - qia + mia);
02023                 const Float *gbuf = gcbuffer + (gjkoff - i - gia + mia);
02024 #ifdef MSM_PROFILING
02025                 mgrLocal->xLoopCnt[nn]++;
02026 #endif
02027 // help the vectorizer make reasonable decisions
02028 #if defined(__INTEL_COMPILER)
02029 #pragma vector always 
02030 #endif
02031                 for (int ii = 0;  ii < nn;  ii++) {
02032                   ehsum += gbuf[ii] * qbuf[ii];
02033                 }
02034               }
02035             } // end loop over charge grid
02036 #else
02037             // loop over charge grid
02038             int nn = mib - mia + 1;
02039             if (nn == 8) {  // hard coded inner loop = 8
02040               int qnji = qnj * qni;
02041               int qkoff = -qka*qnji - qja*qni - qia + mia;
02042               int gnji = gnj * gni;
02043               int gkoff = (-k-gka)*gnji + (-j-gja)*gni - i - gia + mia;
02044 
02045               for (int qk = mka;  qk <= mkb;  qk++) {
02046                 int qjkoff = qkoff + qk*qnji;
02047                 int gjkoff = gkoff + qk*gnji;
02048 
02049                 for (int qj = mja;  qj <= mjb;  qj++) {
02050                   const Vtype *qbuf = qhbuffer + (qjkoff + qj*qni);
02051                   const Mtype *gbuf = gcbuffer + (gjkoff + qj*gni);
02052                   //const Mtype *gvcbuf = gvcbuffer + (gjkoff + qj*gni);
02053                   //Vtype *gvsumbuf = gvsumbuffer + (gjkoff + qj*gni);
02054 #ifdef MSM_PROFILING
02055                   mgrLocal->xLoopCnt[nn]++;
02056 #endif
02057 // help the vectorizer make reasonable decisions
02058 #if defined(__INTEL_COMPILER)
02059 #pragma vector always 
02060 #endif
02061                   for (int ii = 0;  ii < 8;  ii++) {
02062                     ehsum += gbuf[ii] * qbuf[ii];
02063                     //gvsumbuf[ii] += qbuf[ii] * qbuf[ii] * gvcbuf[ii];
02064                   }
02065                 }
02066               } // end loop over charge grid
02067             }
02068             else {  // variable length inner loop < 8
02069               int qnji = qnj * qni;
02070               int qkoff = -qka*qnji - qja*qni - qia + mia;
02071               int gnji = gnj * gni;
02072               int gkoff = (-k-gka)*gnji + (-j-gja)*gni - i - gia + mia;
02073 
02074               for (int qk = mka;  qk <= mkb;  qk++) {
02075                 int qjkoff = qkoff + qk*qnji;
02076                 int gjkoff = gkoff + qk*gnji;
02077 
02078                 for (int qj = mja;  qj <= mjb;  qj++) {
02079                   const Vtype *qbuf = qhbuffer + (qjkoff + qj*qni);
02080                   const Mtype *gbuf = gcbuffer + (gjkoff + qj*gni);
02081                   //const Mtype *gvcbuf = gvcbuffer + (gjkoff + qj*gni);
02082                   //Vtype *gvsumbuf = gvsumbuffer + (gjkoff + qj*gni);
02083 #ifdef MSM_PROFILING
02084                   mgrLocal->xLoopCnt[nn]++;
02085 #endif
02086 // help the vectorizer make reasonable decisions
02087 #if defined(__INTEL_COMPILER)
02088 #pragma vector always 
02089 #endif
02090                   for (int ii = 0;  ii < nn;  ii++) {
02091                     ehsum += gbuf[ii] * qbuf[ii];
02092                     //gvsumbuf[ii] += qbuf[ii] * qbuf[ii] * gvcbuf[ii];
02093                   }
02094                 }
02095               } // end loop over charge grid
02096             }
02097 #endif // 0
02098 
02099 #endif // 0
02100 
02101             ehbuffer[index] = ehsum;
02102             index++;
02103           }
02104         }
02105       } // end loop over potentials
02106 #endif // !MSM_COMM_ONLY
02107 
02108 #ifdef MSM_PROFILING
02109       mgrLocal->doneProfiling();
02110 #endif
02111 
02112       //
02113       // send block of potentials
02114       //
02115 
02116 #ifdef MSM_FOLD_FACTOR
02117       // if "fold factor" is active for this level,
02118       // need to sum unfolded potential grid back into periodic grid
02119       if (isfold) {
02120         // copy unfolded grid
02121         ehfold = eh;
02122         // reset eh indexing to correctly folded size
02123         eh.set(eia, eni, eja, enj, eka, enk);
02124         eh.reset(0);
02125 #ifdef DEBUG_MSM_GRID
02126         printf("level=%d   ehfold:  [%d..%d] x [%d..%d] x [%d..%d]  "
02127             "(%d x %d x %d)\n"
02128                 "              eh:  [%d..%d] x [%d..%d] x [%d..%d]  "
02129             "(%d x %d x %d)\n"
02130                "         eh lower:  %d %d %d\n",
02131             qhblockIndex.level,
02132             ehfold.ia(), ehfold.ib(), 
02133             ehfold.ja(), ehfold.jb(),
02134             ehfold.ka(), ehfold.kb(),
02135             ehfold.ni(), ehfold.nj(), ehfold.nk(),
02136             eh.ia(), eh.ib(), 
02137             eh.ja(), eh.jb(),
02138             eh.ka(), eh.kb(),
02139             eh.ni(), eh.nj(), eh.nk(),
02140             ehblockSend.nrange_wrap.lower().i,
02141             ehblockSend.nrange_wrap.lower().j,
02142             ehblockSend.nrange_wrap.lower().k
02143             );
02144 #endif
02145         const Vtype *ehfoldbuf = ehfold.data().buffer();
02146         Vtype *ehbuf = eh.data().buffer();
02147         // now we "fold" eh by calculating the
02148         // wrap around sum of ehfold into correctly sized eh
02149         int index = 0;
02150         for (int k = ka;  k <= kb;  k++) {
02151           int kk = k;
02152           if      (kk < eka)  do { kk += enk; } while (kk < eka);
02153           else if (kk > ekb)  do { kk -= enk; } while (kk > ekb);
02154           int koff = (kk - eka) * enj;
02155           for (int j = ja;  j <= jb;  j++) {
02156             int jj = j;
02157             if      (jj < eja)  do { jj += enj; } while (jj < eja);
02158             else if (jj > ejb)  do { jj -= enj; } while (jj > ejb);
02159             int jkoff = (koff + (jj - eja)) * eni;
02160             for (int i = ia;  i <= ib;  i++, index++) {
02161               int ii = i;
02162               if      (ii < eia)  do { ii += eni; } while (ii < eia);
02163               else if (ii > eib)  do { ii -= eni; } while (ii > eib);
02164               int ijkoff = jkoff + (ii - eia);
02165               ehbuf[ijkoff] += ehfoldbuf[index];
02166             }
02167           }
02168         }
02169       }
02170       else {
02171         // shift grid index range to its true (wrapped) values
02172         eh.updateLower( ehblockSend.nrange_wrap.lower() );
02173       }
02174 #else    // !MSM_FOLD_FACTOR
02175       // shift grid index range to its true (wrapped) values
02176       eh.updateLower( ehblockSend.nrange_wrap.lower() );
02177 #endif   // MSM_FOLD_FACTOR
02178 
02179 #ifdef MSM_TIMING
02180       stopTime = CkWallTimer();
02181       mgrLocal->msmTiming[MsmTimer::GRIDCUTOFF] += stopTime - startTime;
02182 #endif
02183     } // compute()
02184 
02185 };
02186 
02187 
02188 //
02189 // MsmGridCutoff wraps kernel template for approximations 
02190 // that involve only function values (e.g., CUBIC, QUINTIC).
02191 // Elements of 1D chare array.
02192 //
02193 class MsmGridCutoff :
02194   public CBase_MsmGridCutoff,
02195   public MsmGridCutoffKernel<Float,Float>
02196 {
02197   public:
02198     CProxyElement_MsmBlock msmBlockElementProxy;  // root of reduction
02199     CkSectionInfo cookie;  // need to save cookie for section reduction
02200 #ifdef MSM_REDUCE_GRID
02201     msm::Grid<Float> ehfull;
02202 #endif // MSM_REDUCE_GRID
02203 
02204     MsmGridCutoff() { }
02205 
02206     MsmGridCutoff(CkMigrateMessage *m)
02207 #if  ! defined(MSM_MIGRATION)
02208     { }
02209 #else // MSM_MIGRATION
02210       : CBase_MsmGridCutoff(m) {
02211 #ifdef DEBUG_MSM_MIGRATE
02212       printf("MsmGridCutoff element %d migrated to processor %d\n",
02213           thisIndex, CkMyPe());
02214 #endif
02215       init();
02216       // access type dependent constants from map
02217       MsmGridCutoffKernel<Float,Float>::setupWeights(
02218           &(map->gc[ehblockSend.nblock_wrap.level]),
02219           &(map->gvc[ehblockSend.nblock_wrap.level])
02220           );
02221     }
02222 
02223     virtual void pup(PUP::er& p) {
02224 #ifdef DEBUG_MSM_MIGRATE
02225       printf("MsmGridCutoff element %d pupped on processor %d\n",
02226           thisIndex, CkMyPe());
02227 #endif
02228       CBase_MsmGridCutoff::pup(p);  // pack our superclass
02229       MsmGridCutoffKernel<Float,Float>::pup(p);
02230     }
02231 #endif // MSM_MIGRATION
02232 
02233     void init() {
02234       MsmGridCutoffKernel<Float,Float>::init();
02235     }
02236 
02237     void setup(MsmGridCutoffInitMsg *bmsg) {
02238       // base class consumes this init proxy  message
02239       MsmGridCutoffKernel<Float,Float>::setup(bmsg);
02240       // access type dependent constants from map
02241       MsmGridCutoffKernel<Float,Float>::setupWeights(
02242           &(map->gc[ehblockSend.nblock_wrap.level]),
02243           &(map->gvc[ehblockSend.nblock_wrap.level])
02244           );
02245 #ifdef MSM_REDUCE_GRID
02246       // allocate full buffer space needed for section reduction
02247       int level = ehblockSend.nblock_wrap.level;
02248       int i = ehblockSend.nblock_wrap.n.i;
02249       int j = ehblockSend.nblock_wrap.n.j;
02250       int k = ehblockSend.nblock_wrap.n.k;
02251       ehfull.init( map->blockLevel[level](i,j,k).nrange );
02252 #endif // MSM_REDUCE_GRID
02253 #ifdef DEBUG_MSM_GRID
02254       printf("MsmGridCutoff[%d]:  setup()"
02255           " send to level=%d block=(%d,%d,%d)\n",
02256           thisIndex, ehblockSend.nblock_wrap.level,
02257           ehblockSend.nblock_wrap.n.i,
02258           ehblockSend.nblock_wrap.n.j,
02259           ehblockSend.nblock_wrap.n.k);
02260 #endif
02261     }
02262 
02263     void setupSections(MsmGridCutoffSetupMsg *msg) {
02264 #ifdef DEBUG_MSM_GRID
02265       CkPrintf("MSM GRID CUTOFF %d setup section on PE %d\n",
02266           thisIndex, CkMyPe());
02267 #endif
02268       CkGetSectionInfo(cookie, msg);  // init the cookie
02269       msg->get(&msmBlockElementProxy);  // get proxy to MsmBlock
02270       delete msg;
02271     }
02272 
02273     void compute(GridMsg *gmsg) {
02274 #ifdef DEBUG_MSM_GRID
02275       printf("MsmGridCutoff %d:  compute()\n", thisIndex);
02276 #endif
02277       // base class consumes this grid message
02278       MsmGridCutoffKernel<Float,Float>::compute(gmsg);
02279 
02280 #ifdef MSM_TIMING
02281       double startTime, stopTime;
02282       startTime = CkWallTimer();
02283 #endif
02284 #ifdef MSM_REDUCE_GRID
02285 
02286       // perform section reduction over potential grids
02287       CProxy_CkMulticastMgr mcastProxy =
02288         CkpvAccess(BOCclass_group).multicastMgr;
02289       CkMulticastMgr *mcastPtr =
02290         CProxy_CkMulticastMgr(mcastProxy).ckLocalBranch();
02291       CkCallback cb(CkIndex_MsmBlock::sumReducedPotential(NULL),
02292           msmBlockElementProxy);
02293       // sum into "full" sized buffer needed for contribute
02294       ehfull.reset(0);
02295       ehfull += eh;
02296       mcastPtr->contribute(
02297           ehfull.nn() * sizeof(Float), ehfull.data().buffer(), 
02298           CkReduction::sum_float, cookie, cb);
02299 
02300 #else
02301       // place eh into message
02302       const msm::BlockIndex& bindex = ehblockSend.nblock_wrap;
02303       int msgsz = eh.data().len() * sizeof(Float);
02304       GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
02305       SET_PRIORITY(gm, sequence, priority);
02306       gm->put(eh, bindex.level, sequence);
02307       // lookup in ComputeMsmMgr proxy array by level
02308       mgrLocal->msmBlock[bindex.level](
02309           bindex.n.i, bindex.n.j, bindex.n.k).addPotential(gm);
02310 
02311 #endif // MSM_REDUCE_GRID
02312 
02313 #ifdef MSM_TIMING
02314       stopTime = CkWallTimer();
02315       mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
02316       mgrLocal->doneTiming();
02317 #endif
02318     } // compute()
02319 
02320 }; // MsmGridCutoff
02321 
02322 
02323 //
02324 // MsmC1HermiteGridCutoff wraps kernel template for
02325 // C1 Hermite approximation.  Elements of 1D chare array.
02326 //
02327 class MsmC1HermiteGridCutoff :
02328   public CBase_MsmC1HermiteGridCutoff,
02329   public MsmGridCutoffKernel<C1Vector,C1Matrix>
02330 {
02331   public:
02332     CProxyElement_MsmC1HermiteBlock msmBlockElementProxy;  // root of reduction
02333     CkSectionInfo cookie;  // need to save cookie for section reduction
02334 #ifdef MSM_REDUCE_GRID
02335     msm::Grid<C1Vector> ehfull;
02336 #endif // MSM_REDUCE_GRID
02337 
02338     MsmC1HermiteGridCutoff() { }
02339 
02340     MsmC1HermiteGridCutoff(CkMigrateMessage *m)
02341 #if  ! defined(MSM_MIGRATION)
02342     { }
02343 #else // MSM_MIGRATION
02344       : CBase_MsmC1HermiteGridCutoff(m) {
02345 #ifdef DEBUG_MSM_MIGRATE
02346       printf("MsmC1HermiteGridCutoff element %d migrated to processor %d\n",
02347           thisIndex, CkMyPe());
02348 #endif
02349       init();
02350       // access type dependent constants from map
02351       MsmGridCutoffKernel<C1Vector,C1Matrix>::setupWeights(
02352           &(map->gc_c1hermite[ehblockSend.nblock_wrap.level]),
02353           NULL
02354           );
02355     }
02356 
02357     virtual void pup(PUP::er& p) {
02358 #ifdef DEBUG_MSM_MIGRATE
02359       printf("MsmC1HermiteGridCutoff element %d pupped on processor %d\n",
02360           thisIndex, CkMyPe());
02361 #endif
02362       CBase_MsmC1HermiteGridCutoff::pup(p);  // pack our superclass
02363       MsmGridCutoffKernel<C1Vector,C1Matrix>::pup(p);
02364     }
02365 #endif // MSM_MIGRATION
02366 
02367     void init() {
02368       MsmGridCutoffKernel<C1Vector,C1Matrix>::init();
02369     }
02370 
02371     void setup(MsmGridCutoffInitMsg *bmsg) {
02372       // base class consumes this init proxy  message
02373       MsmGridCutoffKernel<C1Vector,C1Matrix>::setup(bmsg);
02374       // access type dependent constants from map
02375       MsmGridCutoffKernel<C1Vector,C1Matrix>::setupWeights(
02376           &(map->gc_c1hermite[ehblockSend.nblock_wrap.level]),
02377           NULL
02378           );
02379 #ifdef DEBUG_MSM_GRID
02380       printf("MsmC1HermiteGridCutoff[%d]:  setup()"
02381           " send to level=%d block=(%d,%d,%d)\n",
02382           thisIndex, ehblockSend.nblock_wrap.level,
02383           ehblockSend.nblock_wrap.n.i,
02384           ehblockSend.nblock_wrap.n.j,
02385           ehblockSend.nblock_wrap.n.k);
02386 #endif
02387 #ifdef MSM_REDUCE_GRID
02388       // allocate full buffer space needed for section reduction
02389       int level = ehblockSend.nblock_wrap.level;
02390       int i = ehblockSend.nblock_wrap.n.i;
02391       int j = ehblockSend.nblock_wrap.n.j;
02392       int k = ehblockSend.nblock_wrap.n.k;
02393       ehfull.init( map->blockLevel[level](i,j,k).nrange );
02394 #endif // MSM_REDUCE_GRID
02395     }
02396 
02397     void setupSections(MsmC1HermiteGridCutoffSetupMsg *msg) {
02398 #ifdef DEBUG_MSM_GRID
02399       CkPrintf("MSM C1 HERMITE GRID CUTOFF %d setup section on PE %d\n",
02400           thisIndex, CkMyPe());
02401 #endif
02402       CkGetSectionInfo(cookie, msg);  // init the cookie
02403       msg->get(&msmBlockElementProxy);  // get proxy to MsmC1HermiteBlock
02404       delete msg;
02405     }
02406 
02407     void compute(GridMsg *gmsg) {
02408 #ifdef DEBUG_MSM_GRID
02409       printf("MsmC1HermiteGridCutoff %d:  compute()\n", thisIndex);
02410 #endif
02411 #if 0
02412       // base class consumes this grid message
02413       MsmGridCutoffKernel<C1Vector,C1Matrix>::compute(gmsg);
02414 #else
02415       compute_specialized(gmsg);
02416 #endif
02417 
02418 #ifdef MSM_TIMING
02419       double startTime, stopTime;
02420       startTime = CkWallTimer();
02421 #endif
02422 #ifdef MSM_REDUCE_GRID
02423 
02424       // perform section reduction over potential grids
02425       CProxy_CkMulticastMgr mcastProxy =
02426         CkpvAccess(BOCclass_group).multicastMgr;
02427       CkMulticastMgr *mcastPtr =
02428         CProxy_CkMulticastMgr(mcastProxy).ckLocalBranch();
02429       CkCallback cb(CkIndex_MsmC1HermiteBlock::sumReducedPotential(NULL),
02430           msmBlockElementProxy);
02431       // sum into "full" sized buffer needed for contribute
02432       ehfull.reset(0);
02433       ehfull += eh;
02434       mcastPtr->contribute(
02435           ehfull.nn() * sizeof(C1Vector), ehfull.data().buffer(), 
02436           CkReduction::sum_float, cookie, cb);
02437 
02438 #else
02439       // place eh into message
02440       const msm::BlockIndex& bindex = ehblockSend.nblock_wrap;
02441       int msgsz = eh.data().len() * sizeof(C1Vector);
02442       GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
02443       SET_PRIORITY(gm, sequence, priority);
02444       gm->put(eh, bindex.level, sequence);
02445       // lookup in ComputeMsmMgr proxy array by level
02446       mgrLocal->msmC1HermiteBlock[bindex.level](
02447           bindex.n.i, bindex.n.j, bindex.n.k).addPotential(gm);
02448 
02449 #endif // MSM_REDUCE_GRID
02450 
02451 #ifdef MSM_TIMING
02452       stopTime = CkWallTimer();
02453       mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
02454       mgrLocal->doneTiming();
02455 #endif
02456     } // compute()
02457 
02458     // try to improve performance of the major computational part
02459     void compute_specialized(GridMsg *gmsg);
02460 
02461 }; // MsmC1HermiteGridCutoff
02462 
02463 void MsmC1HermiteGridCutoff::compute_specialized(GridMsg *gmsg) {
02464 #ifdef MSM_TIMING
02465       double startTime, stopTime;
02466       startTime = CkWallTimer();
02467 #endif
02468       //
02469       // receive block of charges
02470       //
02471       int pid;
02472       // qh is resized only the first time, memory allocation persists
02473       gmsg->get(qh, pid, sequence);
02474       delete gmsg;
02475 #ifdef MSM_TIMING
02476       stopTime = CkWallTimer();
02477       mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
02478 #endif
02479 
02480       //
02481       // grid cutoff calculation
02482       // this charge block -> this potential block
02483       //
02484 
02485 #ifdef MSM_TIMING
02486       startTime = stopTime;
02487 #endif
02488       // resets indexing on block
02489       eh.init(ehblockSend.nrange);  // (always have to re-init nrange for eh)
02490       eh.reset(0);
02491       // index range of weights
02492       int gia = pgc->ia();
02493       int gib = pgc->ib();
02494       int gja = pgc->ja();
02495       int gjb = pgc->jb();
02496       int gka = pgc->ka();
02497       int gkb = pgc->kb();
02498       int gni = pgc->ni();
02499       int gnj = pgc->nj();
02500       // index range of charge grid
02501       int qia = qh.ia();
02502       int qib = qh.ib();
02503       int qja = qh.ja();
02504       int qjb = qh.jb();
02505       int qka = qh.ka();
02506       int qkb = qh.kb();
02507       int qni = qh.ni();
02508       int qnj = qh.nj();
02509       // index range of potentials
02510       int ia = eh.ia();
02511       int ib = eh.ib();
02512       int ja = eh.ja();
02513       int jb = eh.jb();
02514       int ka = eh.ka();
02515       int kb = eh.kb();
02516 
02517       int index = 0;
02518 
02519       // access buffers directly
02520       const C1Matrix *gcbuffer = pgc->data().buffer();
02521       const C1Vector *qhbuffer = qh.data().buffer();
02522       C1Vector *ehbuffer = eh.data().buffer();
02523 #ifdef DEBUG_MEMORY_ALIGNMENT
02524       printf("gcbuffer mem:  addr=%p  div32=%lu  mod32=%lu\n",
02525           gcbuffer,
02526           (unsigned long)(gcbuffer)/32,
02527           (unsigned long)(gcbuffer)%32);
02528       printf("qhbuffer mem:  addr=%p  div32=%lu  mod32=%lu\n",
02529           qhbuffer,
02530           (unsigned long)(qhbuffer)/32,
02531           (unsigned long)(qhbuffer)%32);
02532       printf("ehbuffer mem:  addr=%p  div32=%lu  mod32=%lu\n",
02533           ehbuffer,
02534           (unsigned long)(ehbuffer)/32,
02535           (unsigned long)(ehbuffer)%32);
02536 #endif
02537 
02538 #ifndef MSM_COMM_ONLY
02539       // loop over potentials
02540       for (int k = ka;  k <= kb;  k++) {
02541         // clip charges to weights along k
02542         int mka = ( qka >= gka + k ? qka : gka + k );
02543         int mkb = ( qkb <= gkb + k ? qkb : gkb + k );
02544 
02545         for (int j = ja;  j <= jb;  j++) {
02546           // clip charges to weights along j
02547           int mja = ( qja >= gja + j ? qja : gja + j );
02548           int mjb = ( qjb <= gjb + j ? qjb : gjb + j );
02549 
02550           for (int i = ia;  i <= ib;  i++) {
02551             // clip charges to weights along i
02552             int mia = ( qia >= gia + i ? qia : gia + i );
02553             int mib = ( qib <= gib + i ? qib : gib + i );
02554 
02555             // accumulate sum to this eh point
02556             C1Vector ehsum = 0;
02557 
02558             // loop over charge grid
02559             int nn = mib - mia + 1;
02560 
02561             {
02562               int qnji = qnj * qni;
02563               int qkoff = -qka*qnji - qja*qni - qia + mia;
02564               int gnji = gnj * gni;
02565               int gkoff = (-k-gka)*gnji + (-j-gja)*gni - i - gia + mia;
02566 
02567               for (int qk = mka;  qk <= mkb;  qk++) {
02568                 int qjkoff = qkoff + qk*qnji;
02569                 int gjkoff = gkoff + qk*gnji;
02570 
02571                 for (int qj = mja;  qj <= mjb;  qj++) {
02572                   const C1Vector *qbuf = qhbuffer + (qjkoff + qj*qni);
02573                   const C1Matrix *gbuf = gcbuffer + (gjkoff + qj*gni);
02574 #ifdef MSM_PROFILING
02575                   mgrLocal->xLoopCnt[nn]++;
02576 #endif
02577 // help the vectorizer make reasonable decisions
02578 #if defined(__INTEL_COMPILER)
02579 #pragma vector always 
02580 #endif
02581                   for (int ii = 0;  ii < nn;  ii++) {
02582 
02583 #if 0
02584                     ehsum += gbuf[ii] * qbuf[ii];
02585 #else
02586                     // skip matvec when matrix is 0
02587                     // first matrix element tells us if this is the case
02588                     if ( *((int *)(gbuf)) != 0) {
02589 
02590                       // expand matrix-vector multiply
02591 #if defined(__INTEL_COMPILER)
02592 #pragma vector always
02593 #endif
02594                       for (int km=0, jm=0;  jm < C1_VECTOR_SIZE;  jm++) {
02595                         for (int im=0;  im < C1_VECTOR_SIZE;  im++, km++) {
02596                           ehsum.velem[jm] += gbuf->melem[km] * qbuf->velem[im];
02597                         }
02598                       }
02599                     } // if
02600                     gbuf++;
02601                     qbuf++;
02602 #endif
02603                   }
02604                 }
02605               } // end loop over charge grid
02606 
02607             }
02608 
02609             ehbuffer[index] = ehsum;
02610             index++;
02611           }
02612         }
02613       } // end loop over potentials
02614 #endif // !MSM_COMM_ONLY
02615 
02616 #ifdef MSM_PROFILING
02617       mgrLocal->doneProfiling();
02618 #endif
02619 
02620       //
02621       // send block of potentials
02622       //
02623 
02624 #ifdef MSM_FOLD_FACTOR
02625       // if "fold factor" is active for this level,
02626       // need to sum unfolded potential grid back into periodic grid
02627       if (isfold) {
02628         // copy unfolded grid
02629         ehfold = eh;
02630         // reset eh indexing to correctly folded size
02631         eh.set(eia, eni, eja, enj, eka, enk);
02632         eh.reset(0);
02633 #ifdef DEBUG_MSM_GRID
02634         printf("level=%d   ehfold:  [%d..%d] x [%d..%d] x [%d..%d]  "
02635             "(%d x %d x %d)\n"
02636                 "              eh:  [%d..%d] x [%d..%d] x [%d..%d]  "
02637             "(%d x %d x %d)\n"
02638                "         eh lower:  %d %d %d\n",
02639             qhblockIndex.level,
02640             ehfold.ia(), ehfold.ib(), 
02641             ehfold.ja(), ehfold.jb(),
02642             ehfold.ka(), ehfold.kb(),
02643             ehfold.ni(), ehfold.nj(), ehfold.nk(),
02644             eh.ia(), eh.ib(), 
02645             eh.ja(), eh.jb(),
02646             eh.ka(), eh.kb(),
02647             eh.ni(), eh.nj(), eh.nk(),
02648             ehblockSend.nrange_wrap.lower().i,
02649             ehblockSend.nrange_wrap.lower().j,
02650             ehblockSend.nrange_wrap.lower().k
02651             );
02652 #endif
02653         const C1Vector *ehfoldbuf = ehfold.data().buffer();
02654         C1Vector *ehbuf = eh.data().buffer();
02655         // now we "fold" eh by calculating the
02656         // wrap around sum of ehfold into correctly sized eh
02657         int index = 0;
02658         for (int k = ka;  k <= kb;  k++) {
02659           int kk = k;
02660           if      (kk < eka)  do { kk += enk; } while (kk < eka);
02661           else if (kk > ekb)  do { kk -= enk; } while (kk > ekb);
02662           int koff = (kk - eka) * enj;
02663           for (int j = ja;  j <= jb;  j++) {
02664             int jj = j;
02665             if      (jj < eja)  do { jj += enj; } while (jj < eja);
02666             else if (jj > ejb)  do { jj -= enj; } while (jj > ejb);
02667             int jkoff = (koff + (jj - eja)) * eni;
02668             for (int i = ia;  i <= ib;  i++, index++) {
02669               int ii = i;
02670               if      (ii < eia)  do { ii += eni; } while (ii < eia);
02671               else if (ii > eib)  do { ii -= eni; } while (ii > eib);
02672               int ijkoff = jkoff + (ii - eia);
02673               ehbuf[ijkoff] += ehfoldbuf[index];
02674             }
02675           }
02676         }
02677       }
02678       else {
02679         // shift grid index range to its true (wrapped) values
02680         eh.updateLower( ehblockSend.nrange_wrap.lower() );
02681       }
02682 #else    // !MSM_FOLD_FACTOR
02683       // shift grid index range to its true (wrapped) values
02684       eh.updateLower( ehblockSend.nrange_wrap.lower() );
02685 #endif   // MSM_FOLD_FACTOR
02686 
02687 #ifdef MSM_TIMING
02688       stopTime = CkWallTimer();
02689       mgrLocal->msmTiming[MsmTimer::GRIDCUTOFF] += stopTime - startTime;
02690 #endif
02691 } // MsmC1HermiteGridCutoff::compute_specialized()
02692 
02693 // MsmGridCutoff
02694 //
02696 
02697 
02699 //
02700 // MsmBlock
02701 //
02702 // Performs restriction and prolongation.
02703 //
02704 // Each level of the MSM grid hierarchy is partitioned into MsmBlocks,
02705 // holding both charge and potential grid blocks.
02706 //
02707 // The MsmBlockKernel provides templated routines for the MSM 
02708 // restriction and prolongation algorithms.  Overall is very small 
02709 // part of computational work (less than 2% total for C1 Hermite, 
02710 // less than 4% total for cubic).
02711 // XXX Could be made faster with factored restriction and prolongation 
02712 // algorithms --- especially important for higher order or for 
02713 // generalizing to coarser grid spacing that is not 2h.
02714 // XXX Haven't yet determined factorization for C1 Hermite.
02715 //
02716 // The classes that inherit from MsmBlockKernel provide 
02717 // 3D chare array elements for each level with significant management:
02718 // - receive and sum charges from below
02719 //   (either PatchData or lower level MsmBlock)
02720 // - calculate restriction to 2h grid
02721 // - send up (if not on highest level)
02722 // - section broadcast to MsmGridCutoff
02723 // - receive and sum potentials from above and from 
02724 //   section reduction of MsmGridCutoff
02725 // - calculate prolongation to (1/2)h grid and send down,
02726 //   OR send to PatchData
02727 //
02728 // XXX Grid cutoff calculation below is now replaced with 
02729 // MsmGridCutoff to provide enough parallel work units.
02730 // 
02731 
02732 template <class Vtype, class Mtype>
02733 class MsmBlockKernel {
02734   public:
02735     CProxy_ComputeMsmMgr mgrProxy;
02736     ComputeMsmMgr *mgrLocal;  // for quick access to data
02737     msm::Map *map;
02738     msm::BlockDiagram *bd;
02739     msm::Grid<Vtype> qh;
02740     msm::Grid<Vtype> eh;
02741 #ifndef MSM_GRID_CUTOFF_DECOMP
02742     const msm::Grid<Mtype> *gcWeights;
02743     msm::Grid<Vtype> ehCutoff;
02744 #endif
02745     const msm::Grid<Mtype> *resStencil;
02746     const msm::Grid<Mtype> *proStencil;
02747     msm::Grid<Vtype> qhRestricted;
02748     msm::Grid<Vtype> ehProlongated;
02749     int cntRecvsCharge;
02750     int cntRecvsPotential;
02751     msm::BlockIndex blockIndex;
02752  
02753     msm::Grid<Vtype> subgrid;
02754 
02755     int sequence;  // from incoming message for message priority
02756 
02757     MsmBlockKernel(const msm::BlockIndex&);
02758     MsmBlockKernel(CkMigrateMessage *m) { }
02759 
02760     void init();
02761 
02762 #ifndef MSM_GRID_CUTOFF_DECOMP
02763     void setupStencils(
02764         const msm::Grid<Mtype> *res,
02765         const msm::Grid<Mtype> *pro,
02766         const msm::Grid<Mtype> *gc
02767         )
02768     {
02769       resStencil = res;
02770       proStencil = pro;
02771       gcWeights = gc;
02772     }
02773 #else
02774     void setupStencils(
02775         const msm::Grid<Mtype> *res,
02776         const msm::Grid<Mtype> *pro
02777         )
02778     {
02779       resStencil = res;
02780       proStencil = pro;
02781     }
02782 #endif
02783 
02784     void restrictionKernel();
02785 #ifndef MSM_GRID_CUTOFF_DECOMP
02786     void gridCutoffKernel();
02787 #endif
02788     void prolongationKernel();
02789 
02790 }; // class MsmBlockKernel<Vtype,Mtype>
02791 
02792 template <class Vtype, class Mtype>
02793 MsmBlockKernel<Vtype,Mtype>::MsmBlockKernel(const msm::BlockIndex& bindex) {
02794   blockIndex = bindex;
02795   mgrProxy = CProxy_ComputeMsmMgr(CkpvAccess(BOCclass_group).computeMsmMgr);
02796   mgrLocal = CProxy_ComputeMsmMgr::ckLocalBranch(
02797       CkpvAccess(BOCclass_group).computeMsmMgr);
02798   map = &(mgrLocal->mapData());
02799   bd = &(map->blockLevel[blockIndex.level](blockIndex.n));
02800   qh.init( bd->nrange );
02801   eh.init( bd->nrange );
02802 #ifndef MSM_GRID_CUTOFF_DECOMP
02803   ehCutoff.init( bd->nrangeCutoff );
02804 #endif
02805   qhRestricted.init( bd->nrangeRestricted );
02806   ehProlongated.init( bd->nrangeProlongated );
02807 #ifdef DEBUG_MSM_GRID
02808   printf("MsmBlockKernel level=%d, n=%d %d %d:  constructor\n",
02809       blockIndex.level, blockIndex.n.i, blockIndex.n.j, blockIndex.n.k);
02810 #endif
02811 #ifdef MSM_TIMING
02812   mgrLocal->addTiming();
02813 #endif
02814   init();
02815 } // MsmBlockKernel<Vtype,Mtype>::MsmBlockKernel()
02816 
02817 
02818 template <class Vtype, class Mtype>
02819 void MsmBlockKernel<Vtype,Mtype>::init() {
02820   qh.reset(0);
02821   eh.reset(0);
02822 #ifndef MSM_GRID_CUTOFF_DECOMP
02823   ehCutoff.reset(0);
02824 #endif
02825   qhRestricted.reset(0);
02826   ehProlongated.reset(0);
02827   cntRecvsCharge = 0;
02828   cntRecvsPotential = 0;
02829 } // MsmBlockKernel<Vtype,Mtype>::init()
02830 
02831 
02832 template <class Vtype, class Mtype>
02833 void MsmBlockKernel<Vtype,Mtype>::restrictionKernel()
02834 {
02835 #ifdef DEBUG_MSM_GRID
02836   printf("MsmBlockKernel level=%d, id=%d %d %d:  restriction\n",
02837       blockIndex.level, blockIndex.n.i, blockIndex.n.j, blockIndex.n.k);
02838 #endif
02839 
02840 #ifdef MSM_TIMING
02841   double startTime, stopTime;
02842   startTime = CkWallTimer();
02843 #endif
02844 
02845 #ifndef MSM_COMM_ONLY
02846   // stencil data for approximating charge on restricted grid
02847   const int approx = mgrLocal->approx;
02848   const int nstencil = ComputeMsmMgr::Nstencil[approx];
02849   const int *offset = ComputeMsmMgr::IndexOffset[approx];
02850   const msm::Grid<Mtype>& res = *resStencil;
02851 
02852   // index range for h grid charges
02853   int ia1 = qh.ia();
02854   int ib1 = qh.ib();
02855   int ja1 = qh.ja();
02856   int jb1 = qh.jb();
02857   int ka1 = qh.ka();
02858   int kb1 = qh.kb();
02859 
02860   // index range for restricted (2h) grid charges
02861   int ia2 = qhRestricted.ia();
02862   int ib2 = qhRestricted.ib();
02863   int ja2 = qhRestricted.ja();
02864   int jb2 = qhRestricted.jb();
02865   int ka2 = qhRestricted.ka();
02866   int kb2 = qhRestricted.kb();
02867 
02868   // reset grid
02869   qhRestricted.reset(0);
02870 
02871   // loop over restricted (2h) grid
02872   for (int k2 = ka2;  k2 <= kb2;  k2++) {
02873     int k1 = 2 * k2;
02874     for (int j2 = ja2;  j2 <= jb2;  j2++) {
02875       int j1 = 2 * j2;
02876       for (int i2 = ia2;  i2 <= ib2;  i2++) {
02877         int i1 = 2 * i2;
02878 
02879         // loop over stencils on h grid
02880         Vtype& q2hsum = qhRestricted(i2,j2,k2);
02881 
02882         for (int k = 0;  k < nstencil;  k++) {
02883           int kn = k1 + offset[k];
02884           if      (kn < ka1) continue;
02885           else if (kn > kb1) break;
02886 
02887           for (int j = 0;  j < nstencil;  j++) {
02888             int jn = j1 + offset[j];
02889             if      (jn < ja1) continue;
02890             else if (jn > jb1) break;
02891 
02892             for (int i = 0;  i < nstencil;  i++) {
02893               int in = i1 + offset[i];
02894               if      (in < ia1) continue;
02895               else if (in > ib1) break;
02896 
02897               q2hsum += res(i,j,k) * qh(in,jn,kn);
02898             }
02899           }
02900         } // end loop over stencils on h grid
02901 
02902       }
02903     }
02904   } // end loop over restricted (2h) grid
02905 #else
02906   qhRestricted.reset(0);
02907 #endif // !MSM_COMM_ONLY
02908 
02909 #ifdef MSM_TIMING
02910   stopTime = CkWallTimer();
02911   mgrLocal->msmTiming[MsmTimer::RESTRICT] += stopTime - startTime;
02912 #endif
02913 } // MsmBlockKernel<Vtype,Mtype>::restrictionKernel()
02914 
02915 
02916 #ifndef MSM_GRID_CUTOFF_DECOMP
02917 template <class Vtype, class Mtype>
02918 void MsmBlockKernel<Vtype,Mtype>::gridCutoffKernel()
02919 {
02920 #ifdef DEBUG_MSM_GRID
02921   printf("MsmBlockKernel level=%d, id=%d %d %d:  grid cutoff\n",
02922       blockIndex.level, blockIndex.n.i, blockIndex.n.j, blockIndex.n.k);
02923 #endif
02924 #ifdef MSM_TIMING
02925   double startTime, stopTime;
02926   startTime = CkWallTimer();
02927 #endif
02928 #ifndef MSM_COMM_ONLY
02929   // need grid of weights for this level
02930   msm::Grid<Mtype>& gc = *gcWeights;
02931   // index range of weights
02932   int gia = gc.ia();
02933   int gib = gc.ib();
02934   int gja = gc.ja();
02935   int gjb = gc.jb();
02936   int gka = gc.ka();
02937   int gkb = gc.kb();
02938   // index range of charge grid
02939   int qia = qh.ia();
02940   int qib = qh.ib();
02941   int qja = qh.ja();
02942   int qjb = qh.jb();
02943   int qka = qh.ka();
02944   int qkb = qh.kb();
02945   // index range of potentials
02946   int ia = ehCutoff.ia();
02947   int ib = ehCutoff.ib();
02948   int ja = ehCutoff.ja();
02949   int jb = ehCutoff.jb();
02950   int ka = ehCutoff.ka();
02951   int kb = ehCutoff.kb();
02952   // reset grid
02953   ehCutoff.reset(0);
02954   // loop over potentials
02955   for (int k = ka;  k <= kb;  k++) {
02956     for (int j = ja;  j <= jb;  j++) {
02957       for (int i = ia;  i <= ib;  i++) {
02958         // clip charges to weights
02959         int mia = ( qia >= gia + i ? qia : gia + i );
02960         int mib = ( qib <= gib + i ? qib : gib + i );
02961         int mja = ( qja >= gja + j ? qja : gja + j );
02962         int mjb = ( qjb <= gjb + j ? qjb : gjb + j );
02963         int mka = ( qka >= gka + k ? qka : gka + k );
02964         int mkb = ( qkb <= gkb + k ? qkb : gkb + k );
02965         // accumulate sum to this eh point
02966         Vtype& ehsum = ehCutoff(i,j,k);
02967         // loop over smaller charge grid
02968         for (int qk = mka;  qk <= mkb;  qk++) {
02969           for (int qj = mja;  qj <= mjb;  qj++) {
02970             for (int qi = mia;  qi <= mib;  qi++) {
02971               ehsum += gc(qi-i, qj-j, qk-k) * qh(qi,qj,qk);
02972             }
02973           }
02974         } // end loop over smaller charge grid
02975 
02976       }
02977     }
02978   } // end loop over potentials
02979 #else
02980   ehCutoff.reset(0);
02981 #endif // !MSM_COMM_ONLY
02982 #ifdef MSM_TIMING
02983   stopTime = CkWallTimer();
02984   mgrLocal->msmTiming[MsmTimer::GRIDCUTOFF] += stopTime - startTime;
02985 #endif
02986 } // MsmBlockKernel<Vtype,Mtype>::gridCutoffKernel()
02987 #endif // MSM_GRID_CUTOFF_DECOMP
02988 
02989 
02990 template <class Vtype, class Mtype>
02991 void MsmBlockKernel<Vtype,Mtype>::prolongationKernel()
02992 {
02993 #ifdef DEBUG_MSM_GRID
02994   printf("MsmBlockKernel level=%d, id=%d %d %d:  prolongation\n",
02995       blockIndex.level, blockIndex.n.i, blockIndex.n.j, blockIndex.n.k);
02996 #endif
02997 
02998 #ifdef MSM_TIMING
02999   double startTime, stopTime;
03000   startTime = CkWallTimer();
03001 #endif
03002 #ifndef MSM_COMM_ONLY
03003   // stencil data for approximating potential on prolongated grid
03004   const int approx = mgrLocal->approx;
03005   const int nstencil = ComputeMsmMgr::Nstencil[approx];
03006   const int *offset = ComputeMsmMgr::IndexOffset[approx];
03007   const msm::Grid<Mtype>& pro = *proStencil;
03008 
03009   // index range for prolongated h grid potentials
03010   int ia1 = ehProlongated.ia();
03011   int ib1 = ehProlongated.ib();
03012   int ja1 = ehProlongated.ja();
03013   int jb1 = ehProlongated.jb();
03014   int ka1 = ehProlongated.ka();
03015   int kb1 = ehProlongated.kb();
03016 
03017   // index range for 2h grid potentials
03018   int ia2 = eh.ia();
03019   int ib2 = eh.ib();
03020   int ja2 = eh.ja();
03021   int jb2 = eh.jb();
03022   int ka2 = eh.ka();
03023   int kb2 = eh.kb();
03024 
03025   // loop over 2h grid
03026   for (int k2 = ka2;  k2 <= kb2;  k2++) {
03027     int k1 = 2 * k2;
03028     for (int j2 = ja2;  j2 <= jb2;  j2++) {
03029       int j1 = 2 * j2;
03030       for (int i2 = ia2;  i2 <= ib2;  i2++) {
03031         int i1 = 2 * i2;
03032 
03033         // loop over stencils on prolongated h grid
03034         for (int k = 0;  k < nstencil;  k++) {
03035           int kn = k1 + offset[k];
03036           if      (kn < ka1) continue;
03037           else if (kn > kb1) break;
03038 
03039           for (int j = 0;  j < nstencil;  j++) {
03040             int jn = j1 + offset[j];
03041             if      (jn < ja1) continue;
03042             else if (jn > jb1) break;
03043 
03044             for (int i = 0;  i < nstencil;  i++) {
03045               int in = i1 + offset[i];
03046               if      (in < ia1) continue;
03047               else if (in > ib1) break;
03048 
03049               ehProlongated(in,jn,kn) += pro(i,j,k) * eh(i2,j2,k2);
03050             }
03051           }
03052         } // end loop over stencils on prolongated h grid
03053 
03054       }
03055     }
03056   } // end loop over 2h grid
03057 #else
03058   ehProlongated.reset(0);
03059 #endif // !MSM_COMM_ONLY
03060 #ifdef MSM_TIMING
03061   stopTime = CkWallTimer();
03062   mgrLocal->msmTiming[MsmTimer::PROLONGATE] += stopTime - startTime;
03063 #endif
03064 } // MsmBlockKernel<Vtype,Mtype>::prolongationKernel()
03065 
03066 
03067 //
03068 // MsmBlock handles grids of function values only
03069 // (for cubic, quintic, etc., approximation)
03070 //
03071 class MsmBlock :
03072   public CBase_MsmBlock,
03073   public MsmBlockKernel<Float,Float>
03074 {
03075   public:
03076     CProxySection_MsmGridCutoff msmGridCutoffBroadcast;
03077     CProxySection_MsmGridCutoff msmGridCutoffReduction;
03078  
03079     MsmBlock(int level) :
03080       MsmBlockKernel<Float,Float>(
03081           msm::BlockIndex(level,
03082             msm::Ivec(thisIndex.x, thisIndex.y, thisIndex.z))
03083           )
03084     {
03085 #ifndef MSM_GRID_CUTOFF_DECOMP
03086       setupStencils(&(map->grespro), &(map->grespro), &(map->gc[level]));
03087 #else
03088       setupStencils(&(map->grespro), &(map->grespro));
03089 #endif
03090     }
03091     MsmBlock(CkMigrateMessage *m) : MsmBlockKernel<Float,Float>(m) { }
03092 
03093     void setupSections();
03094 
03095     void sumReducedPotential(CkReductionMsg *msg) {
03096 #ifdef MSM_TIMING
03097       double startTime, stopTime;
03098       startTime = CkWallTimer();
03099 #endif
03100       msm::Grid<Float> ehfull;
03101       ehfull.init( msm::IndexRange(eh) );
03102       memcpy(ehfull.data().buffer(), msg->getData(), msg->getSize());
03103       delete msg;
03104       int priority = mgrLocal->nlevels
03105         + 2*(mgrLocal->nlevels - blockIndex.level)-1;
03106       int msgsz = ehfull.data().len() * sizeof(Float);
03107       GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
03108       SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
03109       gm->put(ehfull, blockIndex.level, sequence);  // send my level
03110 #ifdef MSM_TIMING
03111       stopTime = CkWallTimer();
03112       mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
03113 #endif
03114       addPotential(gm);
03115     }
03116 
03117     void addCharge(GridMsg *);  // entry
03118 
03119     void restriction() {
03120       restrictionKernel();
03121       sendUpCharge();
03122     }
03123     void sendUpCharge();
03124     void gridCutoff();
03125 #ifndef MSM_GRID_CUTOFF_DECOMP
03126     void sendAcrossPotential();
03127 #endif
03128 
03129     void addPotential(GridMsg *);  // entry
03130 
03131     void prolongation() {
03132       prolongationKernel();
03133       sendDownPotential();
03134     }
03135     void sendDownPotential();
03136     void sendPatch();
03137 }; // class MsmBlock
03138 
03139 
03140 void MsmBlock::setupSections()
03141 {
03142 #ifdef DEBUG_MSM_GRID
03143   CkPrintf("LEVEL %d MSM BLOCK (%d,%d,%d):  "
03144       "creating broadcast section on PE %d\n",
03145       blockIndex.level, thisIndex.x, thisIndex.y, thisIndex.z, CkMyPe());
03146 #endif
03147   CkVec<CkArrayIndex1D> elems;
03148   for (int n = 0;  n < bd->indexGridCutoff.len();  n++) {
03149     elems.push_back(CkArrayIndex1D( bd->indexGridCutoff[n] ));
03150   }
03151   msmGridCutoffBroadcast = CProxySection_MsmGridCutoff::ckNew(
03152       mgrLocal->msmGridCutoff, elems.getVec(), elems.size()
03153       );
03154   CProxy_CkMulticastMgr mcastProxy = CkpvAccess(BOCclass_group).multicastMgr;
03155   CkMulticastMgr *mcastPtr = CProxy_CkMulticastMgr(mcastProxy).ckLocalBranch();
03156   msmGridCutoffBroadcast.ckSectionDelegate(mcastPtr);
03157   
03158 #ifdef DEBUG_MSM_GRID
03159   char s[1024];
03160   sprintf(s, "LEVEL %d MSM BLOCK (%d,%d,%d):  "
03161       "creating reduction section on PE %d\n",
03162       blockIndex.level, thisIndex.x, thisIndex.y, thisIndex.z, CkMyPe());
03163 #endif
03164   CkVec<CkArrayIndex1D> elems2;
03165 #ifdef DEBUG_MSM_GRID
03166   strcat(s, "receiving from MsmGridCutoff ID:");
03167 #endif
03168   for (int n = 0;  n < bd->recvGridCutoff.len();  n++) {
03169 #ifdef DEBUG_MSM_GRID
03170     char t[20];
03171     sprintf(t, "  %d", bd->recvGridCutoff[n]);
03172     strcat(s, t);
03173 #endif
03174     elems2.push_back(CkArrayIndex1D( bd->recvGridCutoff[n] ));
03175   }
03176 #ifdef DEBUG_MSM_GRID
03177   strcat(s, "\n");
03178   CkPrintf(s);
03179 #endif
03180   msmGridCutoffReduction = CProxySection_MsmGridCutoff::ckNew(
03181       mgrLocal->msmGridCutoff, elems2.getVec(), elems2.size()
03182       );
03183   msmGridCutoffReduction.ckSectionDelegate(mcastPtr);
03184   MsmGridCutoffSetupMsg *msg = new MsmGridCutoffSetupMsg;
03185   CProxyElement_MsmBlock thisElementProxy = thisProxy(thisIndex);
03186   msg->put(&thisElementProxy);
03187 
03188   msmGridCutoffReduction.setupSections(msg);  // broadcast to entire section
03189 
03190   /* XXX alternatively, setup default reduction client
03191    *
03192   mcastPtr->setReductionClient(msmGridCutoffReduction,
03193       new CkCallback(CkIndex_MsmBlock::myReductionEntry(NULL),
03194         thisElementProxy));
03195    *
03196    */
03197 }
03198 
03199 
03200 void MsmBlock::addCharge(GridMsg *gm)
03201 {
03202 #ifdef MSM_TIMING
03203   double startTime, stopTime;
03204   startTime = CkWallTimer();
03205 #endif
03206   int pid;
03207   gm->get(subgrid, pid, sequence);
03208   delete gm;
03209   qh += subgrid;
03210 #ifdef MSM_TIMING
03211   stopTime = CkWallTimer();
03212   mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
03213 #endif
03214   if (++cntRecvsCharge == bd->numRecvsCharge) {
03215     int nlevels = mgrLocal->numLevels();
03216     if (blockIndex.level < nlevels-1) {
03217       restriction();
03218     }
03219     gridCutoff();
03220   }
03221 } // MsmBlock::addCharge()
03222 
03223 
03224 void MsmBlock::sendUpCharge()
03225 {
03226 #ifdef MSM_TIMING
03227   double startTime, stopTime;
03228   startTime = CkWallTimer();
03229 #endif
03230   int lnext = blockIndex.level + 1;
03231   // buffer portions of grid to send to Blocks on next level
03232   for (int n = 0;  n < bd->sendUp.len();  n++) {
03233     // initialize the proper subgrid indexing range
03234     subgrid.init( bd->sendUp[n].nrange );
03235     // extract the values from the larger grid into the subgrid
03236     qhRestricted.extract(subgrid);
03237     // translate the subgrid indexing range to match the MSM block
03238     subgrid.updateLower( bd->sendUp[n].nrange_wrap.lower() );
03239     // add the subgrid charges into the block
03240     msm::BlockIndex& bindex = bd->sendUp[n].nblock_wrap;
03241     ASSERT(bindex.level == lnext);
03242     // place subgrid into message
03243     // SET MESSAGE PRIORITY
03244     int msgsz = subgrid.nn() * sizeof(Float);
03245     GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
03246     SET_PRIORITY(gm, sequence, MSM_PRIORITY + lnext);
03247     gm->put(subgrid, blockIndex.level, sequence);  // send my level
03248     // lookup in ComputeMsmMgr proxy array by level
03249     mgrLocal->msmBlock[lnext](
03250         bindex.n.i, bindex.n.j, bindex.n.k).addCharge(gm);
03251   } // for
03252 #ifdef MSM_TIMING
03253   stopTime = CkWallTimer();
03254   mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
03255 #endif
03256 } // MsmBlock::sendUpCharge()
03257 
03258 
03259 void MsmBlock::gridCutoff()
03260 {
03261 #ifdef DEBUG_MSM_GRID
03262   printf("MsmBlock level=%d, id=%d %d %d:  grid cutoff\n",
03263       blockIndex.level, blockIndex.n.i, blockIndex.n.j, blockIndex.n.k);
03264 #endif
03265 #ifndef MSM_GRID_CUTOFF_DECOMP
03266   gridCutoffKernel();
03267   sendAcrossPotential();
03268 #else // MSM_GRID_CUTOFF_DECOMP
03269 
03270   // send charge block to MsmGridCutoff compute objects
03271 #ifdef MSM_TIMING
03272   double startTime, stopTime;
03273   startTime = CkWallTimer();
03274 #endif
03275   int priority = mgrLocal->nlevels + 2*(mgrLocal->nlevels - blockIndex.level)-1;
03276   int msgsz = qh.data().len() * sizeof(Float);
03277   int len = bd->indexGridCutoff.len();
03278 
03279 #if 0
03280   // send charge message to each MsmGridCutoff compute element in list
03281   for (int n = 0;  n < len;  n++) {
03282 #ifdef MSM_TIMING
03283     startTime = CkWallTimer();
03284 #endif
03285     int index = bd->indexGridCutoff[n];
03286     GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
03287     SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
03288     gm->put(qh, blockIndex.level, sequence);  // send my level
03289 #ifdef MSM_TIMING
03290     stopTime = CkWallTimer();
03291     mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
03292 #endif
03293     mgrLocal->msmGridCutoff[index].compute(gm);
03294   }
03295 #else
03296 
03297   // broadcast charge message to section
03298   GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
03299   SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
03300   gm->put(qh, blockIndex.level, sequence);  // send my level
03301   msmGridCutoffBroadcast.compute(gm);
03302 #ifdef MSM_TIMING
03303   stopTime = CkWallTimer();
03304   mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
03305 #endif
03306 
03307 #endif // 0
03308 
03309 #endif // MSM_GRID_CUTOFF_DECOMP
03310 
03311 } // MsmBlock::gridCutoff()
03312 
03313 
03314 #ifndef MSM_GRID_CUTOFF_DECOMP
03315 void MsmBlock::sendAcrossPotential()
03316 {
03317 #ifdef MSM_TIMING
03318   double startTime, stopTime;
03319   startTime = CkWallTimer();
03320 #endif
03321   int lnext = blockIndex.level;
03322   int priority = mgrLocal->nlevels + 2*(mgrLocal->nlevels - blockIndex.level)-1;
03323   // buffer portions of grid to send to Blocks on this level
03324   for (int n = 0;  n < bd->sendAcross.len();  n++) {
03325     // initialize the proper subgrid indexing range
03326     subgrid.init( bd->sendAcross[n].nrange );
03327     // extract the values from the larger grid into the subgrid
03328     ehCutoff.extract(subgrid);
03329     // translate the subgrid indexing range to match the MSM block
03330     subgrid.updateLower( bd->sendAcross[n].nrange_wrap.lower() );
03331     // add the subgrid charges into the block
03332     msm::BlockIndex& bindex = bd->sendAcross[n].nblock_wrap;
03333     ASSERT(bindex.level == lnext);
03334     // place subgrid into message
03335     int msgsz = subgrid.nn() * sizeof(Float);
03336     GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
03337     SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
03338     gm->put(subgrid, blockIndex.level, sequence);  // send my level
03339     // lookup in ComputeMsmMgr proxy array by level
03340     mgrLocal->msmBlock[lnext](
03341         bindex.n.i, bindex.n.j, bindex.n.k).addPotential(gm);
03342   } // for
03343 #ifdef MSM_TIMING
03344   stopTime = CkWallTimer();
03345   mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
03346 #endif
03347 } // MsmBlock::sendAcrossPotential()
03348 #endif
03349 
03350 
03351 void MsmBlock::addPotential(GridMsg *gm)
03352 {
03353 #ifdef MSM_TIMING
03354   double startTime, stopTime;
03355   startTime = CkWallTimer();
03356 #endif
03357   int pid;
03358   int pseq;
03359   gm->get(subgrid, pid, pseq);  // receive sender's level
03360   delete gm;
03361   eh += subgrid;
03362 #ifdef MSM_TIMING
03363   stopTime = CkWallTimer();
03364   mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
03365 #endif
03366   if (++cntRecvsPotential == bd->numRecvsPotential) {
03367     if (blockIndex.level > 0) {
03368       prolongation();
03369     }
03370     else {
03371       sendPatch();
03372     }
03373   }
03374 } // MsmBlock::addPotential()
03375 
03376 
03377 void MsmBlock::sendDownPotential()
03378 {
03379 #ifdef MSM_TIMING
03380   double startTime, stopTime;
03381   startTime = CkWallTimer();
03382 #endif
03383   int lnext = blockIndex.level - 1;
03384   int priority = mgrLocal->nlevels + 2*(mgrLocal->nlevels - blockIndex.level);
03385   // buffer portions of grid to send to Blocks on next level
03386   for (int n = 0;  n < bd->sendDown.len();  n++) {
03387     // initialize the proper subgrid indexing range
03388     subgrid.init( bd->sendDown[n].nrange );
03389     // extract the values from the larger grid into the subgrid
03390     ehProlongated.extract(subgrid);
03391     // translate the subgrid indexing range to match the MSM block
03392     subgrid.updateLower( bd->sendDown[n].nrange_wrap.lower() );
03393     // add the subgrid charges into the block
03394     msm::BlockIndex& bindex = bd->sendDown[n].nblock_wrap;
03395     ASSERT(bindex.level == lnext);
03396     // place subgrid into message
03397     int msgsz = subgrid.nn() * sizeof(Float);
03398     GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
03399     SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
03400     gm->put(subgrid, blockIndex.level, sequence);  // send my level
03401     // lookup in ComputeMsmMgr proxy array by level
03402     mgrLocal->msmBlock[lnext](
03403         bindex.n.i, bindex.n.j, bindex.n.k).addPotential(gm);
03404   } // for
03405 #ifdef MSM_TIMING
03406   stopTime = CkWallTimer();
03407   mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
03408   mgrLocal->doneTiming();
03409 #endif
03410   init();  // reinitialize for next computation
03411 } // MsmBlock::sendDownPotential()
03412 
03413 
03414 void MsmBlock::sendPatch()
03415 {
03416 #ifdef MSM_TIMING
03417   double startTime, stopTime;
03418   startTime = CkWallTimer();
03419 #endif
03420   int lnext = blockIndex.level;
03421   int priority = mgrLocal->nlevels + 2*(mgrLocal->nlevels - blockIndex.level);
03422   ASSERT(lnext == 0);
03423   // buffer portions of grid to send to Blocks on next level
03424   for (int n = 0;  n < bd->sendPatch.len();  n++) {
03425     // initialize the proper subgrid indexing range
03426     subgrid.init( bd->sendPatch[n].nrange );
03427     // extract the values from the larger grid into the subgrid
03428     eh.extract(subgrid);
03429     // translate the subgrid indexing range to match the MSM block
03430     subgrid.updateLower( bd->sendPatch[n].nrange_unwrap.lower() );
03431     // add the subgrid charges into the block, need its patch ID
03432     int pid = bd->sendPatch[n].patchID;
03433     // place subgrid into message
03434     int msgsz = subgrid.nn() * sizeof(Float);
03435     GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
03436     SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
03437     gm->put(subgrid, pid, sequence);  // send patch ID
03438     // lookup which PE has this patch
03439     PatchMap *pm = PatchMap::Object();
03440     int pe = pm->node(pid);
03441     mgrProxy[pe].addPotential(gm);
03442   }
03443 #ifdef MSM_TIMING
03444   stopTime = CkWallTimer();
03445   mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
03446   mgrLocal->doneTiming();
03447 #endif
03448   init();  // reinitialize for next computation
03449 } // MsmBlock::sendPatch()
03450 
03451 
03452 //
03453 // MsmC1HermiteBlock handles grids of vector elements
03454 // for C1 Hermite approximation
03455 //
03456 class MsmC1HermiteBlock :
03457   public CBase_MsmC1HermiteBlock,
03458   public MsmBlockKernel<C1Vector,C1Matrix>
03459 {
03460   public:
03461     CProxySection_MsmC1HermiteGridCutoff msmGridCutoffBroadcast;
03462     CProxySection_MsmC1HermiteGridCutoff msmGridCutoffReduction;
03463  
03464     MsmC1HermiteBlock(int level) :
03465       MsmBlockKernel<C1Vector,C1Matrix>(
03466           msm::BlockIndex(level,
03467             msm::Ivec(thisIndex.x, thisIndex.y, thisIndex.z))
03468           )
03469     {
03470       int isfirstlevel = (level == 0);
03471       int istoplevel = (level == map->gridrange.len()-1);
03472       const msm::Grid<C1Matrix> *res =
03473         (istoplevel ? NULL : &(map->gres_c1hermite[level]));
03474       const msm::Grid<C1Matrix> *pro =
03475         (isfirstlevel ? NULL : &(map->gpro_c1hermite[level-1]));
03476 #ifndef MSM_GRID_CUTOFF_DECOMP
03477       const msm::Grid<C1Matrix> *gc = &(map->gc_c1hermite[level]);
03478       setupStencils(res, pro, gc);
03479 #else
03480       setupStencils(res, pro);
03481 #endif
03482     }
03483     MsmC1HermiteBlock(CkMigrateMessage *m) :
03484       MsmBlockKernel<C1Vector,C1Matrix>(m) { }
03485 
03486     void setupSections();
03487 
03488     void sumReducedPotential(CkReductionMsg *msg) {
03489 #ifdef MSM_TIMING
03490       double startTime, stopTime;
03491       startTime = CkWallTimer();
03492 #endif
03493       msm::Grid<C1Vector> ehfull;
03494       ehfull.init( msm::IndexRange(eh) );
03495       memcpy(ehfull.data().buffer(), msg->getData(), msg->getSize());
03496       delete msg;
03497       int priority = mgrLocal->nlevels
03498         + 2*(mgrLocal->nlevels - blockIndex.level)-1;
03499       int msgsz = ehfull.data().len() * sizeof(C1Vector);
03500       GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
03501       SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
03502       gm->put(ehfull, blockIndex.level, sequence);  // send my level
03503 #ifdef MSM_TIMING
03504       stopTime = CkWallTimer();
03505       mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
03506 #endif
03507       addPotential(gm);
03508     }
03509 
03510     void addCharge(GridMsg *);  // entry
03511 
03512     void restriction() {
03513       restrictionKernel();
03514       sendUpCharge();
03515     }
03516     void sendUpCharge();
03517     void gridCutoff();
03518 #ifndef MSM_GRID_CUTOFF_DECOMP
03519     void sendAcrossPotential();
03520 #endif
03521 
03522     void addPotential(GridMsg *);  // entry
03523 
03524     void prolongation() {
03525       prolongationKernel();
03526       sendDownPotential();
03527     }
03528     void sendDownPotential();
03529     void sendPatch();
03530 }; // class MsmC1HermiteBlock
03531 
03532 
03533 void MsmC1HermiteBlock::setupSections()
03534 {
03535 #ifdef DEBUG_MSM_GRID
03536   CkPrintf("LEVEL %d MSM C1 HERMITE BLOCK (%d,%d,%d):  "
03537       "creating broadcast section on PE %d\n",
03538       blockIndex.level, thisIndex.x, thisIndex.y, thisIndex.z, CkMyPe());
03539 #endif
03540   CkVec<CkArrayIndex1D> elems;
03541   for (int n = 0;  n < bd->indexGridCutoff.len();  n++) {
03542     elems.push_back(CkArrayIndex1D( bd->indexGridCutoff[n] ));
03543   }
03544   msmGridCutoffBroadcast = CProxySection_MsmC1HermiteGridCutoff::ckNew(
03545       mgrLocal->msmC1HermiteGridCutoff, elems.getVec(), elems.size()
03546       );
03547   CProxy_CkMulticastMgr mcastProxy = CkpvAccess(BOCclass_group).multicastMgr;
03548   CkMulticastMgr *mcastPtr = CProxy_CkMulticastMgr(mcastProxy).ckLocalBranch();
03549   msmGridCutoffBroadcast.ckSectionDelegate(mcastPtr);
03550   
03551 #ifdef DEBUG_MSM_GRID
03552   char s[1024];
03553   sprintf(s, "LEVEL %d MSM C1 HERMITE BLOCK (%d,%d,%d):  "
03554       "creating reduction section on PE %d\n",
03555       blockIndex.level, thisIndex.x, thisIndex.y, thisIndex.z, CkMyPe());
03556 #endif
03557   CkVec<CkArrayIndex1D> elems2;
03558 #ifdef DEBUG_MSM_GRID
03559   strcat(s, "receiving from MsmC1HermiteGridCutoff ID:");
03560 #endif
03561   for (int n = 0;  n < bd->recvGridCutoff.len();  n++) {
03562 #ifdef DEBUG_MSM_GRID
03563     char t[20];
03564     sprintf(t, "  %d", bd->recvGridCutoff[n]);
03565     strcat(s, t);
03566 #endif
03567     elems2.push_back(CkArrayIndex1D( bd->recvGridCutoff[n] ));
03568   }
03569 #ifdef DEBUG_MSM_GRID
03570   strcat(s, "\n");
03571   CkPrintf(s);
03572 #endif
03573   msmGridCutoffReduction = CProxySection_MsmC1HermiteGridCutoff::ckNew(
03574       mgrLocal->msmC1HermiteGridCutoff, elems2.getVec(), elems2.size()
03575       );
03576   msmGridCutoffReduction.ckSectionDelegate(mcastPtr);
03577   MsmC1HermiteGridCutoffSetupMsg *msg = new MsmC1HermiteGridCutoffSetupMsg;
03578   CProxyElement_MsmC1HermiteBlock thisElementProxy = thisProxy(thisIndex);
03579   msg->put(&thisElementProxy);
03580 
03581   msmGridCutoffReduction.setupSections(msg);  // broadcast to entire section
03582 
03583   /* XXX alternatively, setup default reduction client
03584    *
03585   mcastPtr->setReductionClient(msmGridCutoffReduction,
03586       new CkCallback(CkIndex_MsmC1HermiteBlock::myReductionEntry(NULL),
03587         thisElementProxy));
03588    *
03589    */
03590 } // MsmC1HermiteBlock::setupSections()
03591 
03592 
03593 void MsmC1HermiteBlock::addCharge(GridMsg *gm)
03594 {
03595 #ifdef MSM_TIMING
03596   double startTime, stopTime;
03597   startTime = CkWallTimer();
03598 #endif
03599   int pid;
03600   gm->get(subgrid, pid, sequence);
03601   delete gm;
03602   qh += subgrid;
03603 #ifdef MSM_TIMING
03604   stopTime = CkWallTimer();
03605   mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
03606 #endif
03607   if (++cntRecvsCharge == bd->numRecvsCharge) {
03608     int nlevels = mgrLocal->numLevels();
03609     if (blockIndex.level < nlevels-1) {
03610       restriction();
03611     }
03612     gridCutoff();
03613   }
03614 } // MsmC1HermiteBlock::addCharge()
03615 
03616 
03617 void MsmC1HermiteBlock::sendUpCharge()
03618 {
03619 #ifdef MSM_TIMING
03620   double startTime, stopTime;
03621   startTime = CkWallTimer();
03622 #endif
03623   int lnext = blockIndex.level + 1;
03624   // buffer portions of grid to send to Blocks on next level
03625   for (int n = 0;  n < bd->sendUp.len();  n++) {
03626     // initialize the proper subgrid indexing range
03627     subgrid.init( bd->sendUp[n].nrange );
03628     // extract the values from the larger grid into the subgrid
03629     qhRestricted.extract(subgrid);
03630     // translate the subgrid indexing range to match the MSM block
03631     subgrid.updateLower( bd->sendUp[n].nrange_wrap.lower() );
03632     // add the subgrid charges into the block
03633     msm::BlockIndex& bindex = bd->sendUp[n].nblock_wrap;
03634     ASSERT(bindex.level == lnext);
03635     // place subgrid into message
03636     // SET MESSAGE PRIORITY
03637     int msgsz = subgrid.nn() * sizeof(C1Vector);
03638     GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
03639     SET_PRIORITY(gm, sequence, MSM_PRIORITY + lnext);
03640     gm->put(subgrid, blockIndex.level, sequence);  // send my level
03641     // lookup in ComputeMsmMgr proxy array by level
03642     mgrLocal->msmC1HermiteBlock[lnext](
03643         bindex.n.i, bindex.n.j, bindex.n.k).addCharge(gm);
03644   } // for
03645 #ifdef MSM_TIMING
03646   stopTime = CkWallTimer();
03647   mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
03648 #endif
03649 } // MsmC1HermiteBlock::sendUpCharge()
03650 
03651 
03652 void MsmC1HermiteBlock::gridCutoff()
03653 {
03654 #ifdef DEBUG_MSM_GRID
03655   printf("MsmC1HermiteBlock level=%d, id=%d %d %d:  grid cutoff\n",
03656       blockIndex.level, blockIndex.n.i, blockIndex.n.j, blockIndex.n.k);
03657 #endif
03658 #ifndef MSM_GRID_CUTOFF_DECOMP
03659   gridCutoffKernel();
03660   sendAcrossPotential();
03661 #else // MSM_GRID_CUTOFF_DECOMP
03662 
03663   // send charge block to MsmGridCutoff compute objects
03664 #ifdef MSM_TIMING
03665   double startTime, stopTime;
03666   startTime = CkWallTimer();
03667 #endif
03668   int priority = mgrLocal->nlevels + 2*(mgrLocal->nlevels - blockIndex.level)-1;
03669   int msgsz = qh.data().len() * sizeof(C1Vector);
03670   int len = bd->indexGridCutoff.len();
03671 
03672 #if 0
03673   // send charge message to each MsmGridCutoff compute element in list
03674   for (int n = 0;  n < len;  n++) {
03675 #ifdef MSM_TIMING
03676     startTime = CkWallTimer();
03677 #endif
03678     int index = bd->indexGridCutoff[n];
03679     GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
03680     SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
03681     gm->put(qh, blockIndex.level, sequence);  // send my level
03682 #ifdef MSM_TIMING
03683     stopTime = CkWallTimer();
03684     mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
03685 #endif
03686     mgrLocal->msmGridCutoff[index].compute(gm);
03687   }
03688 #else
03689 
03690   // broadcast charge message to section
03691   GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
03692   SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
03693   gm->put(qh, blockIndex.level, sequence);  // send my level
03694   msmGridCutoffBroadcast.compute(gm);
03695 #ifdef MSM_TIMING
03696   stopTime = CkWallTimer();
03697   mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
03698 #endif
03699 
03700 #endif // 0
03701 
03702 #endif // MSM_GRID_CUTOFF_DECOMP
03703 
03704 } // MsmC1HermiteBlock::gridCutoff()
03705 
03706 
03707 #ifndef MSM_GRID_CUTOFF_DECOMP
03708 void MsmC1HermiteBlock::sendAcrossPotential()
03709 {
03710 #ifdef MSM_TIMING
03711   double startTime, stopTime;
03712   startTime = CkWallTimer();
03713 #endif
03714   int lnext = blockIndex.level;
03715   int priority = mgrLocal->nlevels + 2*(mgrLocal->nlevels - blockIndex.level)-1;
03716   // buffer portions of grid to send to Blocks on this level
03717   for (int n = 0;  n < bd->sendAcross.len();  n++) {
03718     // initialize the proper subgrid indexing range
03719     subgrid.init( bd->sendAcross[n].nrange );
03720     // extract the values from the larger grid into the subgrid
03721     ehCutoff.extract(subgrid);
03722     // translate the subgrid indexing range to match the MSM block
03723     subgrid.updateLower( bd->sendAcross[n].nrange_wrap.lower() );
03724     // add the subgrid charges into the block
03725     msm::BlockIndex& bindex = bd->sendAcross[n].nblock_wrap;
03726     ASSERT(bindex.level == lnext);
03727     // place subgrid into message
03728     int msgsz = subgrid.nn() * sizeof(C1Vector);
03729     GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
03730     SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
03731     gm->put(subgrid, blockIndex.level, sequence);  // send my level
03732     // lookup in ComputeMsmMgr proxy array by level
03733     mgrLocal->msmC1HermiteBlock[lnext](
03734         bindex.n.i, bindex.n.j, bindex.n.k).addPotential(gm);
03735   } // for
03736 #ifdef MSM_TIMING
03737   stopTime = CkWallTimer();
03738   mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
03739 #endif
03740 } // MsmC1HermiteBlock::sendAcrossPotential()
03741 #endif
03742 
03743 
03744 void MsmC1HermiteBlock::addPotential(GridMsg *gm)
03745 {
03746 #ifdef MSM_TIMING
03747   double startTime, stopTime;
03748   startTime = CkWallTimer();
03749 #endif
03750   int pid;
03751   int pseq;
03752   gm->get(subgrid, pid, pseq);  // receive sender's level
03753   delete gm;
03754   eh += subgrid;
03755 #ifdef MSM_TIMING
03756   stopTime = CkWallTimer();
03757   mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
03758 #endif
03759   if (++cntRecvsPotential == bd->numRecvsPotential) {
03760     if (blockIndex.level > 0) {
03761       prolongation();
03762     }
03763     else {
03764       sendPatch();
03765     }
03766   }
03767 } // MsmC1HermiteBlock::addPotential()
03768 
03769 
03770 void MsmC1HermiteBlock::sendDownPotential()
03771 {
03772 #ifdef MSM_TIMING
03773   double startTime, stopTime;
03774   startTime = CkWallTimer();
03775 #endif
03776   int lnext = blockIndex.level - 1;
03777   int priority = mgrLocal->nlevels + 2*(mgrLocal->nlevels - blockIndex.level);
03778   // buffer portions of grid to send to Blocks on next level
03779   for (int n = 0;  n < bd->sendDown.len();  n++) {
03780     // initialize the proper subgrid indexing range
03781     subgrid.init( bd->sendDown[n].nrange );
03782     // extract the values from the larger grid into the subgrid
03783     ehProlongated.extract(subgrid);
03784     // translate the subgrid indexing range to match the MSM block
03785     subgrid.updateLower( bd->sendDown[n].nrange_wrap.lower() );
03786     // add the subgrid charges into the block
03787     msm::BlockIndex& bindex = bd->sendDown[n].nblock_wrap;
03788     ASSERT(bindex.level == lnext);
03789     // place subgrid into message
03790     int msgsz = subgrid.nn() * sizeof(C1Vector);
03791     GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
03792     SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
03793     gm->put(subgrid, blockIndex.level, sequence);  // send my level
03794     // lookup in ComputeMsmMgr proxy array by level
03795     mgrLocal->msmC1HermiteBlock[lnext](
03796         bindex.n.i, bindex.n.j, bindex.n.k).addPotential(gm);
03797   } // for
03798 #ifdef MSM_TIMING
03799   stopTime = CkWallTimer();
03800   mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
03801   mgrLocal->doneTiming();
03802 #endif
03803   init();  // reinitialize for next computation
03804 } // MsmC1HermiteBlock::sendDownPotential()
03805 
03806 
03807 void MsmC1HermiteBlock::sendPatch()
03808 {
03809 #ifdef MSM_TIMING
03810   double startTime, stopTime;
03811   startTime = CkWallTimer();
03812 #endif
03813   int lnext = blockIndex.level;
03814   int priority = mgrLocal->nlevels + 2*(mgrLocal->nlevels - blockIndex.level);
03815   ASSERT(lnext == 0);
03816   // buffer portions of grid to send to Blocks on next level
03817   for (int n = 0;  n < bd->sendPatch.len();  n++) {
03818     // initialize the proper subgrid indexing range
03819     subgrid.init( bd->sendPatch[n].nrange );
03820     // extract the values from the larger grid into the subgrid
03821     eh.extract(subgrid);
03822     // translate the subgrid indexing range to match the MSM block
03823     subgrid.updateLower( bd->sendPatch[n].nrange_unwrap.lower() );
03824     // add the subgrid charges into the block, need its patch ID
03825     int pid = bd->sendPatch[n].patchID;
03826     // place subgrid into message
03827     int msgsz = subgrid.nn() * sizeof(C1Vector);
03828     GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
03829     SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
03830     gm->put(subgrid, pid, sequence);  // send patch ID
03831     // lookup which PE has this patch
03832     PatchMap *pm = PatchMap::Object();
03833     int pe = pm->node(pid);
03834     mgrProxy[pe].addPotential(gm);
03835   }
03836 #ifdef MSM_TIMING
03837   stopTime = CkWallTimer();
03838   mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
03839   mgrLocal->doneTiming();
03840 #endif
03841   init();  // reinitialize for next computation
03842 } // MsmC1HermiteBlock::sendPatch()
03843 
03844 
03845 // MsmBlock
03846 //
03848 
03849 
03850 ComputeMsmMgr::ComputeMsmMgr() :
03851   msmProxy(thisgroup), msmCompute(0)
03852 {
03853 #ifdef DEBUG_MSM_VERBOSE
03854   printf("ComputeMsmMgr:  (constructor) PE %d\n", CkMyPe());
03855 #endif
03856   CkpvAccess(BOCclass_group).computeMsmMgr = thisgroup;
03857 
03858 #ifdef MSM_TIMING
03859   if (CkMyPe() == 0) {
03860     msmTimer = CProxy_MsmTimer::ckNew();
03861   }
03862   initTiming();
03863 #endif
03864 #ifdef MSM_PROFILING
03865   if (CkMyPe() == 0) {
03866     msmProfiler = CProxy_MsmProfiler::ckNew();
03867   }
03868   initProfiling();
03869 #endif
03870 }
03871 
03872 ComputeMsmMgr::~ComputeMsmMgr()
03873 {
03874 #ifdef DEBUG_MSM_VERBOSE
03875   printf("ComputeMsmMgr:  (destructor) PE %d\n", CkMyPe());
03876 #endif
03877   // free memory?
03878 }
03879 
03880 
03881 //
03882 // Given basis vector length "len" (and with user grid spacing)
03883 // If using periodic boundary conditions along this basis vector,
03884 // h is calculated to be close to desired grid spacing such that 
03885 // nn = 2^k or 3*2^k.  For non-periodic boundaries, we can set h
03886 // to the desired grid spacing, and set ia and ib to pad 1/2 the 
03887 // interpolating stencil width.  
03888 //
03889 void ComputeMsmMgr::setup_hgrid_1d(BigReal len, BigReal& hh, int& nn,
03890     int& ia, int& ib, int isperiodic)
03891 {
03892   ASSERT(gridspacing > 0);
03893   if (isperiodic) {
03894     const BigReal hmin = (4./5) * gridspacing;
03895     const BigReal hmax = 1.5 * hmin;
03896     hh = len;
03897     nn = 1;  // start with one grid point across length
03898     while (hh >= hmax) {
03899       hh *= 0.5;  // halve spacing and double grid points
03900       nn <<= 1;
03901     }
03902     if (hh < hmin) {
03903       if (nn < 4) {
03904         NAMD_die("Basis vector is too short or MSM grid spacing is too large");
03905       }
03906       hh *= (4./3);  // scale hh by 4/3 and nn by 3/4
03907       nn >>= 2;
03908       nn *= 3;
03909     }
03910     // now we have:  hmin <= h < hmax,
03911     // where nn is a power of 2 times no more than one power of 3
03912     ia = 0;
03913     ib = nn-1;
03914   }
03915   else {
03916     hh = gridspacing;
03917     // Instead of "nn = (int) ceil(len / hh);"
03918     // len is divisible by hh, up to roundoff error, so round to closest nn
03919     nn = (int) floor(len/hh + 0.5);
03920     ia = -s_edge;
03921     ib = nn + s_edge;
03922   }
03923 } // ComputeMsmMgr::setup_hgrid_1d()
03924 
03925 
03926 // make sure that block sizes divide evenly into periodic dimensions
03927 // call only for periodic dimensions
03928 void ComputeMsmMgr::setup_periodic_blocksize(int& bsize, int n)
03929 {
03930   if (n % bsize != 0) {
03931     // n is either 2^k or 3*2^k
03932     int newbsize = 1;
03933     if (n % 3 == 0) newbsize = 3;
03934     while (newbsize < bsize && newbsize < n) newbsize *= 2;
03935     if (bsize < newbsize) newbsize /= 2;
03936     if (n % newbsize != 0) {
03937       NAMD_die("MSM grid size for periodic dimensions must be "
03938           "a power of 2 times at most one power of 3");
03939     }
03940     bsize = newbsize;
03941   }
03942   return;
03943 }
03944 
03945 
03946 //
03947 // This is the major routine that sets everything up for MSM based on 
03948 // 1. cell basis vectors and/or max and min coordinates plus padding
03949 // 2. cutoff and MSM-related parameters from SimParameter
03950 // Includes determining grid spacings along periodic dimensions, 
03951 // determining grid dimensions and number of levels for system,
03952 // then calculating all needed coefficients for grid cutoff part
03953 // and grid transfer parts (restriction and prolongation).
03954 //
03955 // Then sets up Map for parallel decomposition based on 
03956 // MSM block size parameters from SimParameter.
03957 //
03958 // Then determines chare array element placement of MsmBlock and 
03959 // MsmGridCutoff arrays based on number of PEs and number of nodes.
03960 //
03961 // Then allocates (on PE 0) MsmBlock (3D chare arrays, one per level) 
03962 // and MsmGridCutoff (one 1D chare array for all block-block interactions)
03963 // and then broadcasts array proxies across group.
03964 //
03965 void ComputeMsmMgr::initialize(MsmInitMsg *msg)
03966 {
03967 #ifdef DEBUG_MSM_VERBOSE
03968   printf("ComputeMsmMgr:  initialize() PE %d\n", CkMyPe());
03969 #endif
03970 
03971   smin = msg->smin;
03972   smax = msg->smax;
03973   delete msg;
03974 
03975 #if 0
03976   printf("PE%d: initializing MSM\n", CkMyPe());
03977 #endif
03978 
03979   SimParameters *simParams = Node::Object()->simParameters;
03980 
03981   // get required sim params, check validity
03982   lattice = simParams->lattice;
03983 
03984   // set user-defined extent of system
03985   Vector rmin(simParams->MSMxmin, simParams->MSMymin, simParams->MSMzmin);
03986   Vector rmax(simParams->MSMxmax, simParams->MSMymax, simParams->MSMzmax);
03987   Vector sdmin = lattice.scale(rmin);
03988   Vector sdmax = lattice.scale(rmax);
03989   // swap coordinates between min and max to correct for possible rotation
03990   if (sdmin.x > sdmax.x) { double t=sdmin.x; sdmin.x=sdmax.x; sdmax.x=t; }
03991   if (sdmin.y > sdmax.y) { double t=sdmin.y; sdmin.y=sdmax.y; sdmax.y=t; }
03992   if (sdmin.z > sdmax.z) { double t=sdmin.z; sdmin.z=sdmax.z; sdmax.z=t; }
03993   // extend smin, smax by user-defined extent, where appropriate
03994   if ( ! lattice.a_p() && (sdmin.x != 0 || sdmax.x != 0)) {
03995     if (sdmin.x < smin.x) {
03996       smin.x = sdmin.x;
03997       if (CkMyPe() == 0) {
03998         iout << iINFO << "MSM extending minimum X to "
03999           << simParams->MSMxmin << " A\n" << endi;
04000       }
04001     }
04002     if (sdmax.x > smax.x) {
04003       smax.x = sdmax.x;
04004       if (CkMyPe() == 0) {
04005         iout << iINFO << "MSM extending maximum X to "
04006           << simParams->MSMxmax << " A\n" << endi;
04007       }
04008     }
04009   }
04010   if ( ! lattice.b_p() && (sdmin.y != 0 || sdmax.y != 0)) {
04011     if (sdmin.y < smin.y) {
04012       smin.y = sdmin.y;
04013       if (CkMyPe() == 0) {
04014         iout << iINFO << "MSM extending minimum Y to "
04015           << simParams->MSMymin << " A\n" << endi;
04016       }
04017     }
04018     if (sdmax.y > smax.y) {
04019       smax.y = sdmax.y;
04020       if (CkMyPe() == 0) {
04021         iout << iINFO << "MSM extending maximum Y to "
04022           << simParams->MSMymax << " A\n" << endi;
04023       }
04024     }
04025   }
04026   if ( ! lattice.c_p() && (sdmin.z != 0 || sdmax.z != 0)) {
04027     if (sdmin.z < smin.z) {
04028       smin.z = sdmin.z;
04029       if (CkMyPe() == 0) {
04030         iout << iINFO << "MSM extending minimum Z to "
04031           << simParams->MSMzmin << " A\n" << endi;
04032       }
04033     }
04034     if (sdmax.z > smax.z) {
04035       smax.z = sdmax.z;
04036       if (CkMyPe() == 0) {
04037         iout << iINFO << "MSM extending maximum Z to "
04038           << simParams->MSMzmax << " A\n" << endi;
04039       }
04040     }
04041   }
04042 
04043 #ifdef DEBUG_MSM_VERBOSE
04044   printf("smin = %g %g %g  smax = %g %g %g\n",
04045       smin.x, smin.y, smin.z, smax.x, smax.y, smax.z);
04046 #endif
04047 
04048   approx = simParams->MSMApprox;
04049   if (approx < 0 || approx >= NUM_APPROX) {
04050     NAMD_die("MSM: unknown approximation requested (MSMApprox)");
04051   }
04052 
04053   split = simParams->MSMSplit;
04054   if (split < 0 || split >= NUM_SPLIT) {
04055     NAMD_die("MSM: unknown splitting requested (MSMSplit)");
04056   }
04057 
04058   if (CkMyPe() == 0) {
04059     const char *approx_str, *split_str;
04060     switch (approx) {
04061       case CUBIC:      approx_str = "C1 cubic";    break;
04062       case QUINTIC:    approx_str = "C1 quintic";  break;
04063       case QUINTIC2:   approx_str = "C2 quintic";  break;
04064       case SEPTIC:     approx_str = "C1 septic";   break;
04065       case SEPTIC3:    approx_str = "C3 septic";   break;
04066       case NONIC:      approx_str = "C1 nonic";    break;
04067       case NONIC4:     approx_str = "C4 nonic";    break;
04068       case C1HERMITE:  approx_str = "C1 Hermite";  break;
04069       default:         approx_str = "unknown";     break;
04070     }
04071     switch (split) {
04072       case TAYLOR2:  split_str = "C2 Taylor";   break;
04073       case TAYLOR3:  split_str = "C3 Taylor";   break;
04074       case TAYLOR4:  split_str = "C4 Taylor";   break;
04075       case TAYLOR5:  split_str = "C5 Taylor";   break;
04076       case TAYLOR6:  split_str = "C6 Taylor";   break;
04077       case TAYLOR7:  split_str = "C7 Taylor";   break;
04078       case TAYLOR8:  split_str = "C8 Taylor";   break;
04079       default:       split_str = "unknown";     break;
04080     }
04081     iout << iINFO << "MSM using "
04082                   << approx_str << " interpolation\n";
04083     iout << iINFO << "MSM using "
04084                   << split_str << " splitting function\n";
04085   }
04086 
04087   a = simParams->cutoff;
04088 
04089   if (approx == C1HERMITE) {
04090     gridScalingFactor = 2;
04091   }
04092   else {
04093     gridScalingFactor = 1;
04094   }
04095 
04096   gridspacing = gridScalingFactor * simParams->MSMGridSpacing;
04097   if (gridspacing <= 0) {
04098     NAMD_die("MSM: grid spacing must be greater than 0 (MSMGridSpacing)");
04099   }
04100   else if (gridspacing >= a) {
04101     NAMD_die("MSM: grid spacing must be less than cutoff (MSMGridSpacing)");
04102   }
04103 
04104   padding = gridScalingFactor * simParams->MSMPadding;
04105   if (padding < 0) {
04106     NAMD_die("MSM: padding must be non-negative (MSMPadding)");
04107   }
04108 
04109   // set maximum number of levels (default 0 adapts levels to system)
04110   nlevels = simParams->MSMLevels;
04111 
04112   // XXX dispersion unused for now
04113   dispersion = 0;
04114   if ( ! dispersion && split >= TAYLOR2_DISP) {
04115     NAMD_die("MSM: requested splitting for long-range dispersion "
04116         "(not implemented)");
04117   }
04118 
04119   // set block sizes for grid decomposition
04120   int bsx = simParams->MSMBlockSizeX / int(gridScalingFactor);
04121   int bsy = simParams->MSMBlockSizeY / int(gridScalingFactor);
04122   int bsz = simParams->MSMBlockSizeZ / int(gridScalingFactor);
04123   if (bsx <= 0 || bsy <= 0 || bsz <= 0) {
04124     NAMD_die("MSM: invalid block size requested (MSMBlockSize[XYZ])");
04125   }
04126 #ifdef MSM_FIXED_SIZE_GRID_MSG
04127   else if (bsx * bsy * bsz > MSM_MAX_BLOCK_VOLUME) {
04128     NAMD_die("MSM: requested block size (MSMBlockSize[XYZ]) too big");
04129   }
04130 #endif
04131   if (CkMyPe() == 0) {
04132     iout << iINFO << "MSM block size decomposition along X is "
04133                   << bsx << " grid points\n";
04134     iout << iINFO << "MSM block size decomposition along Y is "
04135                   << bsy << " grid points\n";
04136     iout << iINFO << "MSM block size decomposition along Z is "
04137                   << bsz << " grid points\n";
04138   }
04139 
04140   s_edge = (PolyDegree[approx] - 1) / 2;  // stencil edge size
04141   omega = 2 * PolyDegree[approx];         // smallest non-periodic grid length
04142 
04143   BigReal xlen, ylen, zlen;
04144   Vector sgmin, sgmax;  // grid min and max, in scaled coordinates
04145   int ispx = lattice.a_p();
04146   int ispy = lattice.b_p();
04147   int ispz = lattice.c_p();
04148   int ispany = (ispx || ispy || ispz);  // is there any periodicity?
04149 
04150   if (ispx) {  // periodic along basis vector
04151     xlen = lattice.a().length();
04152     sgmax.x = 0.5;
04153     sgmin.x = -0.5;
04154   }
04155   else {  // non-periodic
04156     sgmax.x = smax.x + padding;  // pad the edges
04157     sgmin.x = smin.x - padding;
04158     ASSERT(gridspacing > 0);
04159     // restrict center to be on a grid point
04160     BigReal mupper = ceil(sgmax.x / (2*gridspacing));
04161     BigReal mlower = floor(sgmin.x / (2*gridspacing));
04162     sgmax.x = 2*gridspacing*mupper;
04163     sgmin.x = 2*gridspacing*mlower;
04164     xlen = sgmax.x - sgmin.x;
04165   }
04166 #ifdef DEBUG_MSM_VERBOSE
04167   printf("xlen = %g   sgmin.x = %g   sgmax.x = %g\n", xlen, sgmin.x, sgmax.x);
04168 #endif
04169 
04170   if (ispy) {  // periodic along basis vector
04171     ylen = lattice.b().length();
04172     sgmax.y = 0.5;
04173     sgmin.y = -0.5;
04174   }
04175   else {  // non-periodic
04176     sgmax.y = smax.y + padding;  // pad the edges
04177     sgmin.y = smin.y - padding;
04178     ASSERT(gridspacing > 0);
04179     // restrict center to be on a grid point
04180     BigReal mupper = ceil(sgmax.y / (2*gridspacing));
04181     BigReal mlower = floor(sgmin.y / (2*gridspacing));
04182     sgmax.y = 2*gridspacing*mupper;
04183     sgmin.y = 2*gridspacing*mlower;
04184     ylen = sgmax.y - sgmin.y;
04185   }
04186 #ifdef DEBUG_MSM_VERBOSE
04187   printf("ylen = %g   sgmin.y = %g   sgmax.y = %g\n", ylen, sgmin.y, sgmax.y);
04188 #endif
04189 
04190   if (ispz) {  // periodic along basis vector
04191     zlen = lattice.c().length();
04192     sgmax.z = 0.5;
04193     sgmin.z = -0.5;
04194   }
04195   else {  // non-periodic
04196     sgmax.z = smax.z + padding;  // pad the edges
04197     sgmin.z = smin.z - padding;
04198     ASSERT(gridspacing > 0);
04199     // restrict center to be on a grid point
04200     BigReal mupper = ceil(sgmax.z / (2*gridspacing));
04201     BigReal mlower = floor(sgmin.z / (2*gridspacing));
04202     sgmax.z = 2*gridspacing*mupper;
04203     sgmin.z = 2*gridspacing*mlower;
04204     zlen = sgmax.z - sgmin.z;
04205   }
04206 #ifdef DEBUG_MSM_VERBOSE
04207   printf("zlen = %g   sgmin.z = %g   sgmax.z = %g\n", zlen, sgmin.z, sgmax.z);
04208 #endif
04209   sglower = sgmin;
04210 
04211   int ia, ib, ja, jb, ka, kb;
04212   setup_hgrid_1d(xlen, hxlen, nhx, ia, ib, ispx);
04213   setup_hgrid_1d(ylen, hylen, nhy, ja, jb, ispy);
04214   setup_hgrid_1d(zlen, hzlen, nhz, ka, kb, ispz);
04215   hxlen_1 = 1 / hxlen;
04216   hylen_1 = 1 / hylen;
04217   hzlen_1 = 1 / hzlen;
04218   if (CkMyPe() == 0) {
04219     if (ispx || ispy || ispz) {
04220       iout << iINFO << "MSM grid spacing along X is "<< hxlen << " A\n";
04221       iout << iINFO << "MSM grid spacing along Y is "<< hylen << " A\n";
04222       iout << iINFO << "MSM grid spacing along Z is "<< hzlen << " A\n";
04223     }
04224     else {
04225       iout << iINFO << "MSM grid spacing is " << gridspacing << " A\n";
04226     }
04227     if ( ! ispx || ! ispy || ! ispz ) {
04228       iout << iINFO<<"MSM non-periodic padding is "<< padding << " A\n";
04229     }
04230   }
04231 
04232   int ni = ib - ia + 1;
04233   int nj = jb - ja + 1;
04234   int nk = kb - ka + 1;
04235   int n;
04236 
04237 #if 0
04238   // reserve temp space for factored grid transfer operation
04239   n = (nk > omega ? nk : omega);  // row along z-dimension
04240   lzd.resize(n);
04241   n *= (nj > omega ? nj : omega);  // plane along yz-dimensions
04242   lyzd.resize(n);
04243 #endif
04244 
04245   int lastnelems = 1;
04246   int smallestnbox = 1;
04247   int isclamped = 0;
04248   int maxlevels = nlevels;  // user-defined number of levels
04249 
04250 #ifdef DEBUG_MSM_VERBOSE
04251   printf("maxlevels = %d\n", maxlevels);
04252 #endif
04253   if (nlevels <= 0) {  // instead we set number of levels
04254     n = ni;
04255     if (n < nj) n = nj;
04256     if (n < nk) n = nk;
04257     for (maxlevels = 1;  n > 0;  n >>= 1)  maxlevels++;
04258     if (ispany == 0) {  // no periodicity
04259       // use rule of thumb 3/4 diameter of grid cutoff sphere
04260       int ngci = (int) ceil(3*a / hxlen) - 1;
04261       int ngcj = (int) ceil(3*a / hylen) - 1;
04262       int ngck = (int) ceil(3*a / hzlen) - 1;
04263       int omega3 = omega * omega * omega;
04264       int nhalf = (int) sqrt((double)ni * nj * nk);
04265       lastnelems = (nhalf > omega3 ? nhalf : omega3);
04266       smallestnbox = ngci * ngcj * ngck;  // smaller grids don't reduce work
04267       isclamped = 1;
04268     }
04269   }
04270 #ifdef DEBUG_MSM_VERBOSE
04271   printf("maxlevels = %d\n", maxlevels);
04272 #endif
04273 
04274   // allocate space for storing grid dimensions for each level
04275   map.gridrange.resize(maxlevels);
04276 
04277   // set periodicity flags
04278   map.ispx = ispx;
04279   map.ispy = ispy;
04280   map.ispz = ispz;
04281 
04282   int level = 0;
04283   int done = 0;
04284   int alldone = 0;
04285   do {
04286     map.gridrange[level].setbounds(ia, ib, ja, jb, ka, kb);
04287 
04288     // Msm index?
04289 
04290     if (++level == nlevels)  done |= 0x07;  // user limit on levels
04291 
04292     if (isclamped) {
04293       int nelems = ni * nj * nk;
04294       if (nelems <= lastnelems)    done |= 0x07;
04295       if (nelems <= smallestnbox)  done |= 0x07;
04296     }
04297 
04298     alldone = (done == 0x07);  // make sure all dimensions are done
04299 
04300     if (ispx) {
04301       ni >>= 1;
04302       ib = ni-1;
04303       if (ni & 1)        done |= 0x07;  // == 3 or 1
04304       else if (ni == 2)  done |= 0x01;  // can do one more
04305     }
04306     else {
04307       ia = -((-ia+1)/2) - s_edge;
04308       ib = (ib+1)/2 + s_edge;
04309       ni = ib - ia + 1;
04310       if (ni <= omega)   done |= 0x01;  // can do more restrictions
04311     }
04312 
04313     if (ispy) {
04314       nj >>= 1;
04315       jb = nj-1;
04316       if (nj & 1)        done |= 0x07;  // == 3 or 1
04317       else if (nj == 2)  done |= 0x02;  // can do one more
04318     }
04319     else {
04320       ja = -((-ja+1)/2) - s_edge;
04321       jb = (jb+1)/2 + s_edge;
04322       nj = jb - ja + 1;
04323       if (nj <= omega)   done |= 0x02;  // can do more restrictions
04324     }
04325 
04326     if (ispz) {
04327       nk >>= 1;
04328       kb = nk-1;
04329       if (nk & 1)        done |= 0x07;  // == 3 or 1
04330       else if (nk == 2)  done |= 0x04;  // can do one more
04331     }
04332     else {
04333       ka = -((-ka+1)/2) - s_edge;
04334       kb = (kb+1)/2 + s_edge;
04335       nk = kb - ka + 1;
04336       if (nk <= omega)   done |= 0x04;  // can do more restrictions
04337     }
04338   } while ( ! alldone );
04339   nlevels = level;
04340 
04341   // for periodic boundaries, don't visit top level (all 0)
04342   // toplevel visited only for all nonperiodic boundaries
04343   int toplevel = (ispany ? nlevels : nlevels - 1);
04344 
04345   // resize down to the actual number of levels (does not change alloc)
04346   map.gridrange.resize(nlevels);
04347 
04348   // print out some information about MSM
04349   if (CkMyPe() == 0) {
04350     iout << iINFO << "MSM using " << nlevels << " levels\n";
04351     for (n = 0;  n < nlevels;  n++) {
04352       char s[100];
04353       snprintf(s, sizeof(s), "    level %d:  "
04354           "[%d..%d] x [%d..%d] x [%d..%d]\n", n,
04355           map.gridrange[n].ia(), map.gridrange[n].ib(),
04356           map.gridrange[n].ja(), map.gridrange[n].jb(),
04357           map.gridrange[n].ka(), map.gridrange[n].kb());
04358       iout << iINFO << s;
04359     }
04360     iout << endi;
04361   }
04362 
04363   // find grid spacing basis vectors
04364   hu = hxlen * lattice.a().unit();
04365   hv = hylen * lattice.b().unit();
04366   hw = hzlen * lattice.c().unit();
04367   hufx = Float(hu.x);
04368   hufy = Float(hu.y);
04369   hufz = Float(hu.z);
04370   hvfx = Float(hv.x);
04371   hvfy = Float(hv.y);
04372   hvfz = Float(hv.z);
04373   hwfx = Float(hw.x);
04374   hwfy = Float(hw.y);
04375   hwfz = Float(hw.z);
04376 
04377   ru = lattice.a_r();
04378   rv = lattice.b_r();
04379   rw = lattice.c_r();
04380 
04381   // determine grid spacings in scaled space
04382   shx = ru * hu;
04383   shy = rv * hv;
04384   shz = rw * hw;
04385   shx_1 = 1 / shx;
04386   shy_1 = 1 / shy;
04387   shz_1 = 1 / shz;
04388 
04389   // row vectors to transform interpolated force back to real space
04390   // XXX Is not needed.
04391   sx_shx = shx_1 * Vector(ru.x, rv.x, rw.x);
04392   sy_shy = shy_1 * Vector(ru.y, rv.y, rw.y);
04393   sz_shz = shz_1 * Vector(ru.z, rv.z, rw.z);
04394   srx_x = Float(sx_shx.x);
04395   srx_y = Float(sx_shx.y);
04396   srx_z = Float(sx_shx.z);
04397   sry_x = Float(sy_shy.x);
04398   sry_y = Float(sy_shy.y);
04399   sry_z = Float(sy_shy.z);
04400   srz_x = Float(sz_shz.x);
04401   srz_y = Float(sz_shz.y);
04402   srz_z = Float(sz_shz.z);
04403 
04404   Vector pu = cross(hv, hw);
04405   BigReal s = (hu * pu) / (pu * pu);
04406   pu *= s;  // pu is orthogonal projection of hu onto hv CROSS hw
04407 
04408   Vector pv = cross(hw, hu);
04409   s = (hv * pv) / (pv * pv);
04410   pv *= s;  // pv is orthogonal projection of hv onto hw CROSS hu
04411 
04412   Vector pw = cross(hu, hv);
04413   s = (hw * pw) / (pw * pw);
04414   pw *= s;  // pw is orthogonal projection of hw onto hu CROSS hv
04415 
04416   // radii for parallelepiped of weights enclosing grid cutoff sphere
04417   ni = (int) ceil(2*a / pu.length() ) - 1;
04418   nj = (int) ceil(2*a / pv.length() ) - 1;
04419   nk = (int) ceil(2*a / pw.length() ) - 1;
04420 
04421   Float scaling = 1;
04422   Float scaling_factor = 0.5f;
04423   BigReal a_1 = 1/a;
04424   BigReal a_p = a_1;
04425   if (dispersion) {
04426     a_p = a_p * a_p * a_p;   // = 1/a^3
04427     a_p = a_p * a_p;         // = 1/a^6
04428     scaling_factor = 1.f/64;  // = 1/2^6
04429   }
04430   int i, j, k;
04431   if (approx < C1HERMITE) {
04432     // resize gc and gvc constants for number of levels
04433     map.gc.resize(nlevels);
04434     map.gvc.resize(nlevels);
04435 
04436     for (level = 0;  level < toplevel;  level++) {
04437       map.gc[level].setbounds(-ni, ni, -nj, nj, -nk, nk);
04438       map.gvc[level].setbounds(-ni, ni, -nj, nj, -nk, nk);
04439 
04440       for (k = -nk;  k <= nk;  k++) {
04441         for (j = -nj;  j <= nj;  j++) {
04442           for (i = -ni;  i <= ni;  i++) {
04443             if (level == 0) {
04444               BigReal s, t, gs=0, gt=0, g=0, dgs=0, dgt=0, dg=0;
04445               BigReal vlen = (i*hu + j*hv + k*hw).length();
04446               s = vlen * a_1;
04447               t = 0.5 * s;
04448               if (t >= 1) {
04449                 g = 0;
04450                 dg = 0;
04451               }
04452               else {
04453                 splitting(gt, dgt, t, split);
04454                 if (s >= 1) {
04455                   BigReal s_1 = 1/s;
04456                   if (dispersion) {
04457                     gs = s_1 * s_1 * s_1;  // = 1/s^3
04458                     gs = gs * gs;  // = 1/s^6
04459                     dgs = -6 * gs * s_1;
04460                   }
04461                   else {
04462                     gs = s_1;
04463                     dgs = -gs * s_1;
04464                   }
04465                 }
04466                 else {
04467                   splitting(gs, dgs, s, split);
04468                 }
04469                 g = (gs - scaling_factor * gt) * a_p;
04470                 BigReal c=0;
04471                 if (i || j || k) {
04472                   c = a_p * a_1 / vlen;
04473                 }
04474                 dg = 0.5 * (dgs - 0.5*scaling_factor * dgt) * c;
04475 
04476                 // Msm index?
04477 
04478               }
04479               map.gc[0](i,j,k) = Float(g);
04480               map.gvc[0](i,j,k) = Float(dg);
04481             } // if level 0
04482             else {
04483               map.gc[level](i,j,k) = scaling * map.gc[0](i,j,k);
04484               map.gvc[level](i,j,k) = scaling * map.gvc[0](i,j,k);
04485             }
04486 
04487           } // for i
04488         } // for j
04489       } // for k
04490       scaling *= scaling_factor;
04491 
04492     } // for level
04493 
04494     // for summed virial factors
04495     gvsum.setbounds(-ni, ni, -nj, nj, -nk, nk);
04496     // make sure final virial sum is initialized to 0
04497     for (i = 0;  i < VMAX;  i++) { virial[i] = 0; }
04498 
04499     if (toplevel < nlevels) {
04500       // nonperiodic along all basis vector directions
04501       // calculate top level weights where all grid points
04502       // interact with each other
04503       ni = map.gridrange[toplevel].ni();
04504       nj = map.gridrange[toplevel].nj();
04505       nk = map.gridrange[toplevel].nk();
04506       map.gc[toplevel].setbounds(-ni, ni, -nj, nj, -nk, nk);
04507 
04508       // Msm index?
04509 
04510       for (k = -nk;  k <= nk;  k++) {
04511         for (j = -nj;  j <= nj;  j++) {
04512           for (i = -ni;  i <= ni;  i++) {
04513             BigReal s, gs, d;
04514             BigReal vlen = (i*hu + j*hv + k*hw).length();
04515             s = vlen * a_1;
04516             if (s >= 1) {
04517               BigReal s_1 = 1/s;
04518               if (dispersion) {
04519                 gs = s_1 * s_1 * s_1;  // = 1/s^3
04520                 gs = gs * gs;  // = 1/s^6
04521               }
04522               else {
04523                 gs = s_1;
04524               }
04525             }
04526             else {
04527               splitting(gs, d, s, split);
04528             }
04529             map.gc[toplevel](i,j,k) = scaling * Float(gs * a_p);
04530           } // for i
04531         } // for j
04532       } // for k
04533     } // if toplevel
04534 
04535     // generate grespro stencil
04536     const int nstencil = Nstencil[approx];
04537     const Float *phi = PhiStencil[approx];
04538     map.grespro.set(0, nstencil, 0, nstencil, 0, nstencil);
04539     for (k = 0;  k < nstencil;  k++) {
04540       for (j = 0;  j < nstencil;  j++) {
04541         for (i = 0;  i < nstencil;  i++) {
04542           map.grespro(i,j,k) = phi[i] * phi[j] * phi[k];
04543         }
04544       }
04545     }
04546 
04547   } // end if approx < C1HERMITE
04548   else {
04549     // C1HERMITE
04550     // resize gc_c1hermite constants for number of levels
04551     map.gc_c1hermite.resize(nlevels);
04552     scaling = 1;
04553 
04554     for (level = 0;  level < toplevel;  level++) {
04555 
04556       Vector hmu = scaling * hu;
04557       Vector hmv = scaling * hv;
04558       Vector hmw = scaling * hw;
04559       BigReal am = scaling * a;
04560 
04561       map.gc_c1hermite[level].setbounds(-ni, ni, -nj, nj, -nk, nk);
04562 
04563       for (k = -nk;  k <= nk;  k++) {
04564         for (j = -nj;  j <= nj;  j++) {
04565           for (i = -ni;  i <= ni;  i++) {
04566             C1Matrix& m = map.gc_c1hermite[level](i,j,k);
04567             Vector rv = i*hmu + j*hmv + k*hmw;
04568             BigReal r2 = rv * rv;
04569             m.set(0);
04570             if (r2 < 4*am*am) {
04571               // accumulate D( g_{a}(0,r) ) term for this level
04572               gc_c1hermite_elem_accum(m, 1, rv, am, split);
04573               // accumulate D( -g_{2a}(0,r) ) term for this level
04574               gc_c1hermite_elem_accum(m, -1, rv, 2*am, split);
04575             } // if within cutoff
04576           }
04577         }
04578       } // end loop over gc_c1hermite elements for this level
04579       scaling *= 2;  // double grid spacing and cutoff at each iteration
04580 
04581     } // end loop over levels
04582 
04583     if (toplevel < nlevels) {
04584       Vector hmu = scaling * hu;
04585       Vector hmv = scaling * hv;
04586       Vector hmw = scaling * hw;
04587       BigReal am = scaling * a;
04588 
04589       // nonperiodic along all basis vector directions
04590       // calculate top level weights where all grid points
04591       // interact with each other
04592       ni = map.gridrange[toplevel].ni();
04593       nj = map.gridrange[toplevel].nj();
04594       nk = map.gridrange[toplevel].nk();
04595       map.gc_c1hermite[toplevel].setbounds(-ni, ni, -nj, nj, -nk, nk);
04596 
04597       for (k = -nk;  k <= nk;  k++) {
04598         for (j = -nj;  j <= nj;  j++) {
04599           for (i = -ni;  i <= ni;  i++) {
04600             C1Matrix& m = map.gc_c1hermite[level](i,j,k);
04601             Vector rv = i*hmu + j*hmv + k*hmw;
04602             m.set(0);
04603             // accumulate D( g_{a}(0,r) ) term for this level
04604             gc_c1hermite_elem_accum(m, 1, rv, am, split);
04605           }
04606         }
04607       } // end loop over gc_c1hermite elements for top level
04608 
04609     } // end if top level
04610 
04611     // C1 Hermite restriction and prolongation stencils
04612     map.gres_c1hermite.resize(nlevels-1);
04613     map.gpro_c1hermite.resize(nlevels-1);
04614 
04615     enum {
04616       ND = 3,    // stencil diameter
04617       NR = ND/2  // stencil radius
04618     };
04619 
04620     // the master basis functions PHI0 and PHI1 for the 3-point stencil
04621     // and their derivatives DPHI0 and DPHI1
04622     const double  PHI0[ND] = { 0.5, 1, 0.5 };
04623     const double DPHI0[ND] = { 1.5, 0, -1.5 };
04624     const double  PHI1[ND] = { -0.125, 0, 0.125 };
04625     const double DPHI1[ND] = { -0.25, 1, -0.25 };
04626 
04627     // for intermediate calculations
04628     double  xphi0_base_array[ND];
04629     double dxphi0_base_array[ND];
04630     double  yphi0_base_array[ND];
04631     double dyphi0_base_array[ND];
04632     double  zphi0_base_array[ND];
04633     double dzphi0_base_array[ND];
04634     double  xphi1_base_array[ND];
04635     double dxphi1_base_array[ND];
04636     double  yphi1_base_array[ND];
04637     double dyphi1_base_array[ND];
04638     double  zphi1_base_array[ND];
04639     double dzphi1_base_array[ND];
04640     // will point to center of stencil arrays
04641     double *xphi0, *dxphi0, *xphi1, *dxphi1;
04642     double *yphi0, *dyphi0, *yphi1, *dyphi1;
04643     double *zphi0, *dzphi0, *zphi1, *dzphi1;
04644 
04645     for (n = 0;  n < ND;  n++) {
04646       xphi0_base_array[n]  = PHI0[n];
04647       dxphi0_base_array[n] = hxlen_1 * DPHI0[n];  // scale by grid spacing
04648       xphi1_base_array[n]  = hxlen * PHI1[n];     // scale by grid spacing
04649       dxphi1_base_array[n] = DPHI1[n];
04650       yphi0_base_array[n]  = PHI0[n];
04651       dyphi0_base_array[n] = hylen_1 * DPHI0[n];  // scale by grid spacing
04652       yphi1_base_array[n]  = hylen * PHI1[n];     // scale by grid spacing
04653       dyphi1_base_array[n] = DPHI1[n];
04654       zphi0_base_array[n]  = PHI0[n];
04655       dzphi0_base_array[n] = hzlen_1 * DPHI0[n];  // scale by grid spacing
04656       zphi1_base_array[n]  = hzlen * PHI1[n];     // scale by grid spacing
04657       dzphi1_base_array[n] = DPHI1[n];
04658     }
04659     xphi0  =  xphi0_base_array + NR;  // point into center of arrays
04660     dxphi0 = dxphi0_base_array + NR;
04661     xphi1  =  xphi1_base_array + NR;
04662     dxphi1 = dxphi1_base_array + NR;
04663     yphi0  =  yphi0_base_array + NR;
04664     dyphi0 = dyphi0_base_array + NR;
04665     yphi1  =  yphi1_base_array + NR;
04666     dyphi1 = dyphi1_base_array + NR;
04667     zphi0  =  zphi0_base_array + NR;
04668     dzphi0 = dzphi0_base_array + NR;
04669     zphi1  =  zphi1_base_array + NR;
04670     dzphi1 = dzphi1_base_array + NR;
04671 
04672     for (level = 0;  level < nlevels-1;  level++) {
04673       // allocate space for restriction and prolongation stencils
04674       map.gres_c1hermite[level].set(0, ND, 0, ND, 0, ND);
04675       map.gpro_c1hermite[level].set(0, ND, 0, ND, 0, ND);
04676 
04677       // scale up to next level grid spacing
04678       //
04679       // have to determine for each dimension whether or not 
04680       // a periodic grid spacing has increased 
04681       // (equivalent to if there are fewer grid points)
04682       for (n = -NR;  n <= NR;  n++) {
04683         if ( ! ispx ||
04684               map.gridrange[level+1].ni() < map.gridrange[level].ni() ) {
04685           dxphi0[n] *= 0.5;
04686           xphi1[n] *= 2;
04687         }
04688         if ( ! ispy ||
04689               map.gridrange[level+1].nj() < map.gridrange[level].nj() ) {
04690           dyphi0[n] *= 0.5;
04691           yphi1[n] *= 2;
04692         }
04693         if ( ! ispz ||
04694               map.gridrange[level+1].nk() < map.gridrange[level].nk() ) {
04695           dzphi0[n] *= 0.5;
04696           zphi1[n] *= 2;
04697         }
04698       }
04699 
04700       // loop over restriction stencil matrices
04701       // calculate from partial derivatives
04702       for (k = -NR;  k <= NR;  k++) {
04703         for (j = -NR;  j <= NR;  j++) {
04704           for (i = -NR;  i <= NR;  i++) {
04705             Float *t = map.gres_c1hermite[level](i+NR,j+NR,k+NR).melem;
04706 
04707             t[C1INDEX(D000,D000)] =  xphi0[i] *  yphi0[j]  *  zphi0[k];
04708             t[C1INDEX(D000,D100)] = dxphi0[i] *  yphi0[j]  *  zphi0[k];
04709             t[C1INDEX(D000,D010)] =  xphi0[i] * dyphi0[j]  *  zphi0[k];
04710             t[C1INDEX(D000,D001)] =  xphi0[i] *  yphi0[j]  * dzphi0[k];
04711             t[C1INDEX(D000,D110)] = dxphi0[i] * dyphi0[j]  *  zphi0[k];
04712             t[C1INDEX(D000,D101)] = dxphi0[i] *  yphi0[j]  * dzphi0[k];
04713             t[C1INDEX(D000,D011)] =  xphi0[i] * dyphi0[j]  * dzphi0[k];
04714             t[C1INDEX(D000,D111)] = dxphi0[i] * dyphi0[j]  * dzphi0[k];
04715 
04716             t[C1INDEX(D100,D000)] =  xphi1[i] *  yphi0[j]  *  zphi0[k];
04717             t[C1INDEX(D100,D100)] = dxphi1[i] *  yphi0[j]  *  zphi0[k];
04718             t[C1INDEX(D100,D010)] =  xphi1[i] * dyphi0[j]  *  zphi0[k];
04719             t[C1INDEX(D100,D001)] =  xphi1[i] *  yphi0[j]  * dzphi0[k];
04720             t[C1INDEX(D100,D110)] = dxphi1[i] * dyphi0[j]  *  zphi0[k];
04721             t[C1INDEX(D100,D101)] = dxphi1[i] *  yphi0[j]  * dzphi0[k];
04722             t[C1INDEX(D100,D011)] =  xphi1[i] * dyphi0[j]  * dzphi0[k];
04723             t[C1INDEX(D100,D111)] = dxphi1[i] * dyphi0[j]  * dzphi0[k];
04724 
04725             t[C1INDEX(D010,D000)] =  xphi0[i] *  yphi1[j]  *  zphi0[k];
04726             t[C1INDEX(D010,D100)] = dxphi0[i] *  yphi1[j]  *  zphi0[k];
04727             t[C1INDEX(D010,D010)] =  xphi0[i] * dyphi1[j]  *  zphi0[k];
04728             t[C1INDEX(D010,D001)] =  xphi0[i] *  yphi1[j]  * dzphi0[k];
04729             t[C1INDEX(D010,D110)] = dxphi0[i] * dyphi1[j]  *  zphi0[k];
04730             t[C1INDEX(D010,D101)] = dxphi0[i] *  yphi1[j]  * dzphi0[k];
04731             t[C1INDEX(D010,D011)] =  xphi0[i] * dyphi1[j]  * dzphi0[k];
04732             t[C1INDEX(D010,D111)] = dxphi0[i] * dyphi1[j]  * dzphi0[k];
04733 
04734             t[C1INDEX(D001,D000)] =  xphi0[i] *  yphi0[j]  *  zphi1[k];
04735             t[C1INDEX(D001,D100)] = dxphi0[i] *  yphi0[j]  *  zphi1[k];
04736             t[C1INDEX(D001,D010)] =  xphi0[i] * dyphi0[j]  *  zphi1[k];
04737             t[C1INDEX(D001,D001)] =  xphi0[i] *  yphi0[j]  * dzphi1[k];
04738             t[C1INDEX(D001,D110)] = dxphi0[i] * dyphi0[j]  *  zphi1[k];
04739             t[C1INDEX(D001,D101)] = dxphi0[i] *  yphi0[j]  * dzphi1[k];
04740             t[C1INDEX(D001,D011)] =  xphi0[i] * dyphi0[j]  * dzphi1[k];
04741             t[C1INDEX(D001,D111)] = dxphi0[i] * dyphi0[j]  * dzphi1[k];
04742 
04743             t[C1INDEX(D110,D000)] =  xphi1[i] *  yphi1[j]  *  zphi0[k];
04744             t[C1INDEX(D110,D100)] = dxphi1[i] *  yphi1[j]  *  zphi0[k];
04745             t[C1INDEX(D110,D010)] =  xphi1[i] * dyphi1[j]  *  zphi0[k];
04746             t[C1INDEX(D110,D001)] =  xphi1[i] *  yphi1[j]  * dzphi0[k];
04747             t[C1INDEX(D110,D110)] = dxphi1[i] * dyphi1[j]  *  zphi0[k];
04748             t[C1INDEX(D110,D101)] = dxphi1[i] *  yphi1[j]  * dzphi0[k];
04749             t[C1INDEX(D110,D011)] =  xphi1[i] * dyphi1[j]  * dzphi0[k];
04750             t[C1INDEX(D110,D111)] = dxphi1[i] * dyphi1[j]  * dzphi0[k];
04751 
04752             t[C1INDEX(D101,D000)] =  xphi1[i] *  yphi0[j]  *  zphi1[k];
04753             t[C1INDEX(D101,D100)] = dxphi1[i] *  yphi0[j]  *  zphi1[k];
04754             t[C1INDEX(D101,D010)] =  xphi1[i] * dyphi0[j]  *  zphi1[k];
04755             t[C1INDEX(D101,D001)] =  xphi1[i] *  yphi0[j]  * dzphi1[k];
04756             t[C1INDEX(D101,D110)] = dxphi1[i] * dyphi0[j]  *  zphi1[k];
04757             t[C1INDEX(D101,D101)] = dxphi1[i] *  yphi0[j]  * dzphi1[k];
04758             t[C1INDEX(D101,D011)] =  xphi1[i] * dyphi0[j]  * dzphi1[k];
04759             t[C1INDEX(D101,D111)] = dxphi1[i] * dyphi0[j]  * dzphi1[k];
04760 
04761             t[C1INDEX(D011,D000)] =  xphi0[i] *  yphi1[j]  *  zphi1[k];
04762             t[C1INDEX(D011,D100)] = dxphi0[i] *  yphi1[j]  *  zphi1[k];
04763             t[C1INDEX(D011,D010)] =  xphi0[i] * dyphi1[j]  *  zphi1[k];
04764             t[C1INDEX(D011,D001)] =  xphi0[i] *  yphi1[j]  * dzphi1[k];
04765             t[C1INDEX(D011,D110)] = dxphi0[i] * dyphi1[j]  *  zphi1[k];
04766             t[C1INDEX(D011,D101)] = dxphi0[i] *  yphi1[j]  * dzphi1[k];
04767             t[C1INDEX(D011,D011)] =  xphi0[i] * dyphi1[j]  * dzphi1[k];
04768             t[C1INDEX(D011,D111)] = dxphi0[i] * dyphi1[j]  * dzphi1[k];
04769 
04770             t[C1INDEX(D111,D000)] =  xphi1[i] *  yphi1[j]  *  zphi1[k];
04771             t[C1INDEX(D111,D100)] = dxphi1[i] *  yphi1[j]  *  zphi1[k];
04772             t[C1INDEX(D111,D010)] =  xphi1[i] * dyphi1[j]  *  zphi1[k];
04773             t[C1INDEX(D111,D001)] =  xphi1[i] *  yphi1[j]  * dzphi1[k];
04774             t[C1INDEX(D111,D110)] = dxphi1[i] * dyphi1[j]  *  zphi1[k];
04775             t[C1INDEX(D111,D101)] = dxphi1[i] *  yphi1[j]  * dzphi1[k];
04776             t[C1INDEX(D111,D011)] =  xphi1[i] * dyphi1[j]  * dzphi1[k];
04777             t[C1INDEX(D111,D111)] = dxphi1[i] * dyphi1[j]  * dzphi1[k];
04778           }
04779         }
04780       } // end loops over restriction stencil matrices
04781 
04782       // loop over prolongation stencil matrices
04783       // prolongation stencil matrices are the transpose of restriction
04784       for (k = -NR;  k <= NR;  k++) {
04785         for (j = -NR;  j <= NR;  j++) {
04786           for (i = -NR;  i <= NR;  i++) {
04787             Float *t = map.gres_c1hermite[level](i+NR,j+NR,k+NR).melem;
04788             Float *tt = map.gpro_c1hermite[level](i+NR,j+NR,k+NR).melem;
04789 
04790             tt[C1INDEX(D000,D000)] = t[C1INDEX(D000,D000)];
04791             tt[C1INDEX(D000,D100)] = t[C1INDEX(D100,D000)];
04792             tt[C1INDEX(D000,D010)] = t[C1INDEX(D010,D000)];
04793             tt[C1INDEX(D000,D001)] = t[C1INDEX(D001,D000)];
04794             tt[C1INDEX(D000,D110)] = t[C1INDEX(D110,D000)];
04795             tt[C1INDEX(D000,D101)] = t[C1INDEX(D101,D000)];
04796             tt[C1INDEX(D000,D011)] = t[C1INDEX(D011,D000)];
04797             tt[C1INDEX(D000,D111)] = t[C1INDEX(D111,D000)];
04798 
04799             tt[C1INDEX(D100,D000)] = t[C1INDEX(D000,D100)];
04800             tt[C1INDEX(D100,D100)] = t[C1INDEX(D100,D100)];
04801             tt[C1INDEX(D100,D010)] = t[C1INDEX(D010,D100)];
04802             tt[C1INDEX(D100,D001)] = t[C1INDEX(D001,D100)];
04803             tt[C1INDEX(D100,D110)] = t[C1INDEX(D110,D100)];
04804             tt[C1INDEX(D100,D101)] = t[C1INDEX(D101,D100)];
04805             tt[C1INDEX(D100,D011)] = t[C1INDEX(D011,D100)];
04806             tt[C1INDEX(D100,D111)] = t[C1INDEX(D111,D100)];
04807 
04808             tt[C1INDEX(D010,D000)] = t[C1INDEX(D000,D010)];
04809             tt[C1INDEX(D010,D100)] = t[C1INDEX(D100,D010)];
04810             tt[C1INDEX(D010,D010)] = t[C1INDEX(D010,D010)];
04811             tt[C1INDEX(D010,D001)] = t[C1INDEX(D001,D010)];
04812             tt[C1INDEX(D010,D110)] = t[C1INDEX(D110,D010)];
04813             tt[C1INDEX(D010,D101)] = t[C1INDEX(D101,D010)];
04814             tt[C1INDEX(D010,D011)] = t[C1INDEX(D011,D010)];
04815             tt[C1INDEX(D010,D111)] = t[C1INDEX(D111,D010)];
04816 
04817             tt[C1INDEX(D001,D000)] = t[C1INDEX(D000,D001)];
04818             tt[C1INDEX(D001,D100)] = t[C1INDEX(D100,D001)];
04819             tt[C1INDEX(D001,D010)] = t[C1INDEX(D010,D001)];
04820             tt[C1INDEX(D001,D001)] = t[C1INDEX(D001,D001)];
04821             tt[C1INDEX(D001,D110)] = t[C1INDEX(D110,D001)];
04822             tt[C1INDEX(D001,D101)] = t[C1INDEX(D101,D001)];
04823             tt[C1INDEX(D001,D011)] = t[C1INDEX(D011,D001)];
04824             tt[C1INDEX(D001,D111)] = t[C1INDEX(D111,D001)];
04825 
04826             tt[C1INDEX(D110,D000)] = t[C1INDEX(D000,D110)];
04827             tt[C1INDEX(D110,D100)] = t[C1INDEX(D100,D110)];
04828             tt[C1INDEX(D110,D010)] = t[C1INDEX(D010,D110)];
04829             tt[C1INDEX(D110,D001)] = t[C1INDEX(D001,D110)];
04830             tt[C1INDEX(D110,D110)] = t[C1INDEX(D110,D110)];
04831             tt[C1INDEX(D110,D101)] = t[C1INDEX(D101,D110)];
04832             tt[C1INDEX(D110,D011)] = t[C1INDEX(D011,D110)];
04833             tt[C1INDEX(D110,D111)] = t[C1INDEX(D111,D110)];
04834 
04835             tt[C1INDEX(D101,D000)] = t[C1INDEX(D000,D101)];
04836             tt[C1INDEX(D101,D100)] = t[C1INDEX(D100,D101)];
04837             tt[C1INDEX(D101,D010)] = t[C1INDEX(D010,D101)];
04838             tt[C1INDEX(D101,D001)] = t[C1INDEX(D001,D101)];
04839             tt[C1INDEX(D101,D110)] = t[C1INDEX(D110,D101)];
04840             tt[C1INDEX(D101,D101)] = t[C1INDEX(D101,D101)];
04841             tt[C1INDEX(D101,D011)] = t[C1INDEX(D011,D101)];
04842             tt[C1INDEX(D101,D111)] = t[C1INDEX(D111,D101)];
04843 
04844             tt[C1INDEX(D011,D000)] = t[C1INDEX(D000,D011)];
04845             tt[C1INDEX(D011,D100)] = t[C1INDEX(D100,D011)];
04846             tt[C1INDEX(D011,D010)] = t[C1INDEX(D010,D011)];
04847             tt[C1INDEX(D011,D001)] = t[C1INDEX(D001,D011)];
04848             tt[C1INDEX(D011,D110)] = t[C1INDEX(D110,D011)];
04849             tt[C1INDEX(D011,D101)] = t[C1INDEX(D101,D011)];
04850             tt[C1INDEX(D011,D011)] = t[C1INDEX(D011,D011)];
04851             tt[C1INDEX(D011,D111)] = t[C1INDEX(D111,D011)];
04852 
04853             tt[C1INDEX(D111,D000)] = t[C1INDEX(D000,D111)];
04854             tt[C1INDEX(D111,D100)] = t[C1INDEX(D100,D111)];
04855             tt[C1INDEX(D111,D010)] = t[C1INDEX(D010,D111)];
04856             tt[C1INDEX(D111,D001)] = t[C1INDEX(D001,D111)];
04857             tt[C1INDEX(D111,D110)] = t[C1INDEX(D110,D111)];
04858             tt[C1INDEX(D111,D101)] = t[C1INDEX(D101,D111)];
04859             tt[C1INDEX(D111,D011)] = t[C1INDEX(D011,D111)];
04860             tt[C1INDEX(D111,D111)] = t[C1INDEX(D111,D111)];
04861           }
04862         }
04863       } // end loops over prolongation stencil matrices
04864 
04865     } // end loop over levels
04866 
04867   } // end if C1HERMITE
04868 
04869   // calculate self energy factor for splitting
04870   BigReal gs=0, d=0;
04871   splitting(gs, d, 0, split);
04872   gzero = gs * a_p;
04873 
04874   if (CkMyPe() == 0) {
04875     iout << iINFO << "MSM finished calculating stencils\n" << endi;
04876   }
04877 
04878   // allocate map for patches
04879   PatchMap *pm = PatchMap::Object();
04880   int numpatches = pm->numPatches();
04881   map.patchList.resize(numpatches);
04882 #ifdef DEBUG_MSM_VERBOSE
04883   printf("numPatches = %d\n", numpatches);
04884 #endif
04885 
04886   // allocate map for blocks for each grid level
04887   map.blockLevel.resize(nlevels);
04888   map.bsx.resize(nlevels);
04889   map.bsy.resize(nlevels);
04890   map.bsz.resize(nlevels);
04891 #ifdef MSM_FOLD_FACTOR
04892   map.foldfactor.resize(nlevels);
04893 #endif
04894   for (level = 0;  level < nlevels;  level++) {
04895     msm::IndexRange& g = map.gridrange[level];
04896     msm::Grid<msm::BlockDiagram>& b = map.blockLevel[level];
04897     int gia = g.ia();
04898     int gni = g.ni();
04899     int gja = g.ja();
04900     int gnj = g.nj();
04901     int gka = g.ka();
04902     int gnk = g.nk();
04903     map.bsx[level] = bsx;
04904     map.bsy[level] = bsy;
04905     map.bsz[level] = bsz;
04906     if (/* map.bsx[level] < gni ||
04907         map.bsy[level] < gnj ||
04908         map.bsz[level] < gnk */ 1) {
04909       // make sure that block sizes divide evenly into periodic dimensions
04910       if (ispx) setup_periodic_blocksize(map.bsx[level], gni);
04911       if (ispy) setup_periodic_blocksize(map.bsy[level], gnj);
04912       if (ispz) setup_periodic_blocksize(map.bsz[level], gnk);
04913 #ifdef MSM_DEBUG_VERBOSE
04914       if (CkMyPe() == 0) {
04915         printf("level = %d\n  map.bs* = %d %d %d  gn* = %d %d %d\n",
04916             level, map.bsx[level], map.bsy[level], map.bsz[level],gni,gnj,gnk);
04917       }
04918 #endif
04919       // subdivide grid into multiple blocks
04920       //   == ceil(gni / bsx), etc.
04921       int bni = (gni / map.bsx[level]) + (gni % map.bsx[level] != 0);
04922       int bnj = (gnj / map.bsy[level]) + (gnj % map.bsy[level] != 0);
04923       int bnk = (gnk / map.bsz[level]) + (gnk % map.bsz[level] != 0);
04924 #ifdef MSM_FOLD_FACTOR
04925       if (/* level > 2 && */ (bni == 1 || bnj == 1 || bnk == 1)) {
04926         map.foldfactor[level].set(bsx / gni, bsy / gnj, bsz / gnk);
04927 #if 0
04928         if (CkMyPe() == 0) {
04929           printf("Setting MSM FoldFactor level %d:  %d %d %d\n",
04930               level, bsx / gni, bsy / gnj, bsz / gnk);
04931         }
04932 #endif
04933       }
04934 #endif
04935       b.set(0, bni, 0, bnj, 0, bnk);
04936       for (k = 0;  k < bnk;  k++) {
04937         for (j = 0;  j < bnj;  j++) {
04938           for (i = 0;  i < bni;  i++) {
04939             b(i,j,k).reset();
04940             int ia = gia + i*map.bsx[level];
04941             int ib = ia + map.bsx[level] - 1;
04942             int ja = gja + j*map.bsy[level];
04943             int jb = ja + map.bsy[level] - 1;
04944             int ka = gka + k*map.bsz[level];
04945             int kb = ka + map.bsz[level] - 1;
04946             if (ib >= gia + gni) ib = gia + gni - 1;
04947             if (jb >= gja + gnj) jb = gja + gnj - 1;
04948             if (kb >= gka + gnk) kb = gka + gnk - 1;
04949             b(i,j,k).nrange.setbounds(ia, ib, ja, jb, ka, kb);
04950           }
04951         }
04952       }
04953     }
04954     /*
04955     else {
04956       // make entire grid into single block
04957       b.set(0, 1, 0, 1, 0, 1);
04958       b(0,0,0).reset();
04959       b(0,0,0).nrange.set(gia, gni, gja, gnj, gka, gnk);
04960       // set correct block dimensions
04961       map.bsx[level] = gni;
04962       map.bsy[level] = gnj;
04963       map.bsz[level] = gnk;
04964     }
04965     */
04966   }
04967   //CkExit();
04968 #ifdef DEBUG_MSM_VERBOSE
04969   printf("Done allocating map for grid levels\n");
04970   printf("Grid level decomposition:\n");
04971   for (level = 0;  level < nlevels;  level++) {
04972     msm::Grid<msm::BlockDiagram>& b = map.blockLevel[level];
04973     int bia = b.ia();
04974     int bib = b.ib();
04975     int bja = b.ja();
04976     int bjb = b.jb();
04977     int bka = b.ka();
04978     int bkb = b.kb();
04979     for (k = bka;  k <= bkb;  k++) {
04980       for (j = bja;  j <= bjb;  j++) {
04981         for (i = bia;  i <= bib;  i++) {
04982           int ia = b(i,j,k).nrange.ia();
04983           int ib = b(i,j,k).nrange.ib();
04984           int ja = b(i,j,k).nrange.ja();
04985           int jb = b(i,j,k).nrange.jb();
04986           int ka = b(i,j,k).nrange.ka();
04987           int kb = b(i,j,k).nrange.kb();
04988           printf("level=%d  id=%d %d %d  [%d..%d] x [%d..%d] x [%d..%d]"
04989               " --> %d\n",
04990               level, i, j, k, ia, ib, ja, jb, ka, kb,
04991               b(i,j,k).nrange.nn());
04992         }
04993       }
04994     }
04995   }
04996 #endif
04997   if (CkMyPe() == 0) {
04998     iout << iINFO << "MSM finished creating map for grid levels\n" << endi;
04999   }
05000 
05001   initialize2();
05002 } // ComputeMsmMgr::initialize()
05003 
05004 
05005 void ComputeMsmMgr::initialize2()
05006 {
05007   SimParameters *simParams = Node::Object()->simParameters;
05008   PatchMap *pm = PatchMap::Object();
05009   int numpatches = pm->numPatches();
05010   int i, j, k, n, level;
05011 
05012   // initialize grid of PatchDiagram
05013   // a = cutoff
05014   BigReal sysdima = lattice.a_r().unit() * lattice.a();
05015   BigReal sysdimb = lattice.b_r().unit() * lattice.b();
05016   BigReal sysdimc = lattice.c_r().unit() * lattice.c();
05017   BigReal patchdim = simParams->patchDimension;
05018   BigReal xmargin = 0.5 * (patchdim - a) / sysdima;
05019   BigReal ymargin = 0.5 * (patchdim - a) / sysdimb;
05020   BigReal zmargin = 0.5 * (patchdim - a) / sysdimc;
05021 #if 0
05022   // set min and max grid indices for patch covering
05023   // for non-periodic boundaries they conform to grid
05024   // periodic permits wrapping, so set to min/max for int
05025   int ia_min = (lattice.a_p() ? INT_MIN : map.gridrange[0].ia());
05026   int ib_max = (lattice.a_p() ? INT_MAX : map.gridrange[0].ib());
05027   int ja_min = (lattice.b_p() ? INT_MIN : map.gridrange[0].ja());
05028   int jb_max = (lattice.b_p() ? INT_MAX : map.gridrange[0].jb());
05029   int ka_min = (lattice.c_p() ? INT_MIN : map.gridrange[0].ka());
05030   int kb_max = (lattice.c_p() ? INT_MAX : map.gridrange[0].kb());
05031 #endif
05032   int pid;
05033   for (pid = 0;  pid < numpatches;  pid++) {
05034     // shortcut reference to this patch diagram
05035     msm::PatchDiagram& p = map.patchList[pid];
05036     p.reset();
05037     // find extent of patch including margin
05038     BigReal xmin = pm->min_a(pid) - xmargin;
05039     BigReal xmax = pm->max_a(pid) + xmargin;
05040     BigReal ymin = pm->min_b(pid) - ymargin;
05041     BigReal ymax = pm->max_b(pid) + ymargin;
05042     BigReal zmin = pm->min_c(pid) - zmargin;
05043     BigReal zmax = pm->max_c(pid) + zmargin;
05044     // find grid point covering of patch plus outer edge stencil
05045     int ia = int(floor((xmin - sglower.x) * shx_1)) - s_edge;
05046     int ib = int(floor((xmax - sglower.x) * shx_1)) + 1 + s_edge;
05047     int ja = int(floor((ymin - sglower.y) * shy_1)) - s_edge;
05048     int jb = int(floor((ymax - sglower.y) * shy_1)) + 1 + s_edge;
05049     int ka = int(floor((zmin - sglower.z) * shz_1)) - s_edge;
05050     int kb = int(floor((zmax - sglower.z) * shz_1)) + 1 + s_edge;
05051     // for edge patches along non-periodic boundaries
05052     // clamp subgrid to full grid boundaries
05053     if ( ! lattice.a_p() ) {  // non-periodic along lattice basis vector a
05054       int mi = pm->index_a(pid);
05055       if (mi == 0)                   ia = map.gridrange[0].ia();
05056       if (mi == pm->gridsize_a()-1)  ib = map.gridrange[0].ib();
05057     }
05058     if ( ! lattice.b_p() ) {  // non-periodic along lattice basis vector b
05059       int mj = pm->index_b(pid);
05060       if (mj == 0)                   ja = map.gridrange[0].ja();
05061       if (mj == pm->gridsize_b()-1)  jb = map.gridrange[0].jb();
05062     }
05063     if ( ! lattice.c_p() ) {  // non-periodic along lattice basis vector a
05064       int mk = pm->index_c(pid);
05065       if (mk == 0)                   ka = map.gridrange[0].ka();
05066       if (mk == pm->gridsize_c()-1)  kb = map.gridrange[0].kb();
05067     }
05068 #if 0
05069     // truncate subgrid covering to grid min/max
05070     // so that subgrid does not extend beyond full grid
05071     // works for both periodic and non-periodic boundary conditions
05072     if (ia < ia_min)  ia = ia_min;
05073     if (ib > ib_max)  ib = ib_max;
05074     if (ja < ja_min)  ja = ja_min;
05075     if (jb > jb_max)  jb = jb_max;
05076     if (ka < ka_min)  ka = ka_min;
05077     if (kb > kb_max)  kb = kb_max;
05078     // check for edge patch and extend subgrid to grid min/max
05079     // so that subgrid fully covers up to the edge of full grid
05080     int mi = pm->index_a(pid);
05081     int mj = pm->index_b(pid);
05082     int mk = pm->index_c(pid);
05083     int npi = pm->gridsize_a();
05084     int npj = pm->gridsize_b();
05085     int npk = pm->gridsize_c();
05086     if (mi == 0)      ia = ia_min;
05087     if (mi == npi-1)  ib = ib_max;
05088     if (mj == 0)      ja = ja_min;
05089     if (mj == npj-1)  jb = jb_max;
05090     if (mk == 0)      ka = ka_min;
05091     if (mk == npk-1)  kb = kb_max;
05092 #endif
05093 #if 0
05094     printf("patch %d:  grid covering:  [%d..%d] x [%d..%d] x [%d..%d]\n",
05095         pid, ia, ib, ja, jb, ka, kb);
05096     fflush(stdout);
05097 #endif
05098     // set the index range for this patch's surrounding grid points
05099     p.nrange.setbounds(ia,ib,ja,jb,ka,kb);
05100     // find lower and upper blocks of MSM h-grid
05101     msm::BlockIndex blower = map.blockOfGridIndex(msm::Ivec(ia,ja,ka),0);
05102     msm::BlockIndex bupper = map.blockOfGridIndex(msm::Ivec(ib,jb,kb),0);
05103     int maxarrlen = (bupper.n.i - blower.n.i + 1) *
05104       (bupper.n.j - blower.n.j + 1) * (bupper.n.k - blower.n.k + 1);
05105     p.send.setmax(maxarrlen);  // allocate space for send array
05106     // loop over the blocks
05107 #if 0
05108     printf("blower: level=%d  n=%d %d %d   bupper: level=%d  n=%d %d %d\n",
05109         blower.level, blower.n.i, blower.n.j, blower.n.k,
05110         bupper.level, bupper.n.i, bupper.n.j, bupper.n.k);
05111     fflush(stdout);
05112 #endif
05113     for (int kk = blower.n.k;  kk <= bupper.n.k;  kk++) {
05114       for (int jj = blower.n.j;  jj <= bupper.n.j;  jj++) {
05115         for (int ii = blower.n.i;  ii <= bupper.n.i;  ii++) {
05116 #if 0
05117           printf("ii=%d  jj=%d  kk=%d\n", ii, jj, kk);
05118           fflush(stdout);
05119 #endif
05120           // determine actual block and range to send to
05121           msm::BlockSend bs;
05122           bs.nblock.n = msm::Ivec(ii,jj,kk);
05123           bs.nblock.level = 0;
05124           bs.nrange = map.clipBlockToIndexRange(bs.nblock, p.nrange);
05125           map.wrapBlockSend(bs);  // determine wrapping to true block index
05126           p.send.append(bs);  // append this block to the send array
05127           // increment counter for receive block
05128           map.blockLevel[0](bs.nblock_wrap.n).numRecvsCharge++;
05129           // initialize patch send back from this block
05130           msm::PatchSend ps;
05131           ps.nrange = bs.nrange_wrap;
05132           ps.nrange_unwrap = bs.nrange;
05133           ps.patchID = pid;
05134           map.blockLevel[0](bs.nblock_wrap.n).sendPatch.append(ps);
05135           // increment number of receives back to this patch
05136           p.numRecvs++;
05137         }
05138       }
05139     }
05140     // number of receives should be same as number of sends
05141     ASSERT(p.numRecvs == p.send.len() );
05142   }
05143 #ifdef DEBUG_MSM_VERBOSE
05144 if (CkMyPe() == 0) {
05145   printf("Done allocating map for patches\n");
05146   printf("Patch level decomposition:\n");
05147   for (pid = 0;  pid < numpatches;  pid++) {
05148     msm::PatchDiagram& p = map.patchList[pid];
05149     int ia = p.nrange.ia();
05150     int ib = p.nrange.ib();
05151     int ja = p.nrange.ja();
05152     int jb = p.nrange.jb();
05153     int ka = p.nrange.ka();
05154     int kb = p.nrange.kb();
05155     printf("patch id=%d  [%d..%d] x [%d..%d] x [%d..%d]\n",
05156         pid, ia, ib, ja, jb, ka, kb);
05157   }
05158 }
05159 #endif
05160   if (CkMyPe() == 0) {
05161     iout << iINFO << "MSM finished creating map for patches\n" << endi;
05162   }
05163 
05164   // initialize grid of BlockDiagram for each level
05165   int polydeg = PolyDegree[approx];
05166   numGridCutoff = 0;
05167   for (level = 0;  level < nlevels;  level++) {
05168     msm::Grid<msm::BlockDiagram>& b = map.blockLevel[level];
05169     int bni = b.ni();
05170     int bnj = b.nj();
05171     int bnk = b.nk();
05172 #ifdef MSM_SKIP_BEYOND_SPHERE
05173     int gia, gib, gja, gjb, gka, gkb;
05174     if (approx == C1HERMITE) {
05175       gia = map.gc_c1hermite[level].ia();
05176       gib = map.gc_c1hermite[level].ib();
05177       gja = map.gc_c1hermite[level].ja();
05178       gjb = map.gc_c1hermite[level].jb();
05179       gka = map.gc_c1hermite[level].ka();
05180       gkb = map.gc_c1hermite[level].kb();
05181     }
05182     else {
05183       gia = map.gc[level].ia();
05184       gib = map.gc[level].ib();
05185       gja = map.gc[level].ja();
05186       gjb = map.gc[level].jb();
05187       gka = map.gc[level].ka();
05188       gkb = map.gc[level].kb();
05189     }
05190 #endif
05191 #ifdef MSM_SKIP_TOO_DISTANT_BLOCKS
05192     int bsx = map.bsx[level];
05193     int bsy = map.bsy[level];
05194     int bsz = map.bsz[level];
05195 #endif
05196 #ifdef MSM_FOLD_FACTOR
05197     if (map.foldfactor[level].active) {
05198       bsx *= map.foldfactor[level].numrep.i;
05199       bsy *= map.foldfactor[level].numrep.j;
05200       bsz *= map.foldfactor[level].numrep.k;
05201     }
05202 #endif
05203     for (k = 0;  k < bnk;  k++) {
05204       for (j = 0;  j < bnj;  j++) {
05205         for (i = 0;  i < bni;  i++) {
05206 
05207           // Grid cutoff calculation, sendAcross
05208           int ia = b(i,j,k).nrange.ia();
05209           int ib = b(i,j,k).nrange.ib();
05210           int ja = b(i,j,k).nrange.ja();
05211           int jb = b(i,j,k).nrange.jb();
05212           int ka = b(i,j,k).nrange.ka();
05213           int kb = b(i,j,k).nrange.kb();
05214           if (approx == C1HERMITE) {
05215             ia += map.gc_c1hermite[level].ia();
05216             ib += map.gc_c1hermite[level].ib();
05217             ja += map.gc_c1hermite[level].ja();
05218             jb += map.gc_c1hermite[level].jb();
05219             ka += map.gc_c1hermite[level].ka();
05220             kb += map.gc_c1hermite[level].kb();
05221           }
05222           else {
05223             ia += map.gc[level].ia();
05224             ib += map.gc[level].ib();
05225             ja += map.gc[level].ja();
05226             jb += map.gc[level].jb();
05227             ka += map.gc[level].ka();
05228             kb += map.gc[level].kb();
05229           }
05230           msm::Ivec na = map.clipIndexToLevel(msm::Ivec(ia,ja,ka), level);
05231           msm::Ivec nb = map.clipIndexToLevel(msm::Ivec(ib,jb,kb), level);
05232           b(i,j,k).nrangeCutoff.setbounds(na.i, nb.i, na.j, nb.j, na.k, nb.k);
05233           // determine sendAcross blocks
05234 #ifdef MSM_FOLD_FACTOR
05235           msm::BlockIndex blower = map.blockOfGridIndexFold(na, level);
05236           msm::BlockIndex bupper = map.blockOfGridIndexFold(nb, level);
05237 #else
05238           msm::BlockIndex blower = map.blockOfGridIndex(na, level);
05239           msm::BlockIndex bupper = map.blockOfGridIndex(nb, level);
05240 #endif
05241           int maxarrlen = (bupper.n.i - blower.n.i + 1) *
05242             (bupper.n.j - blower.n.j + 1) * (bupper.n.k - blower.n.k + 1);
05243           b(i,j,k).sendAcross.setmax(maxarrlen);  // allocate send array
05244           b(i,j,k).indexGridCutoff.setmax(maxarrlen);  // alloc indexing
05245           b(i,j,k).recvGridCutoff.setmax(maxarrlen);  // alloc indexing
05246           // loop over sendAcross blocks
05247           int ii, jj, kk;
05248 #if 0
05249           {
05250             msm::IndexRange& bn = b(i,j,k).nrange;
05251             printf("ME %4d   [%d..%d] x [%d..%d] x [%d..%d]\n",
05252                 bn.nn(),
05253                 bn.ia(), bn.ib(),
05254                 bn.ja(), bn.jb(),
05255                 bn.ka(), bn.kb());
05256           }
05257 #endif
05258           for (kk = blower.n.k;  kk <= bupper.n.k;  kk++) {
05259             for (jj = blower.n.j;  jj <= bupper.n.j;  jj++) {
05260               for (ii = blower.n.i;  ii <= bupper.n.i;  ii++) {
05261 #ifdef MSM_SKIP_TOO_DISTANT_BLOCKS
05262                 // make sure that block (ii,jj,kk) interacts with (i,j,k)
05263                 int si = sign(ii-i);
05264                 int sj = sign(jj-j);
05265                 int sk = sign(kk-k);
05266                 int di = (ii-i)*bsx + si*(1-bsx);
05267                 int dj = (jj-j)*bsy + sj*(1-bsy);
05268                 int dk = (kk-k)*bsz + sk*(1-bsz);
05269                 Vector d = di*hu + dj*hv + dk*hw;
05270                 if (d.length2() >= 4*a*a) continue;
05271 #endif
05272                 // determine actual block and range to send to
05273                 msm::BlockSend bs;
05274                 bs.nblock.n = msm::Ivec(ii,jj,kk);
05275                 bs.nblock.level = level;
05276 #ifdef MSM_FOLD_FACTOR
05277                 bs.nrange = map.clipBlockToIndexRangeFold(bs.nblock,
05278                     b(i,j,k).nrangeCutoff);
05279                 map.wrapBlockSendFold(bs);  // wrap to true block index
05280 #else
05281                 bs.nrange = map.clipBlockToIndexRange(bs.nblock,
05282                     b(i,j,k).nrangeCutoff);
05283                 map.wrapBlockSend(bs);  // wrap to true block index
05284 #endif
05285 #ifdef MSM_SKIP_BEYOND_SPHERE
05286 #if 0
05287                 printf("send to volume %4d   [%d..%d] x [%d..%d] x [%d..%d]\n",
05288                     bs.nrange.nn(),
05289                     bs.nrange.ia(), bs.nrange.ib(),
05290                     bs.nrange.ja(), bs.nrange.jb(),
05291                     bs.nrange.ka(), bs.nrange.kb());
05292 #endif
05293                 msm::IndexRange& bm = b(i,j,k).nrange;
05294                 msm::IndexRange& bn = bs.nrange;
05295                 int qia = bm.ia();
05296                 int qib = bm.ib();
05297                 int qja = bm.ja();
05298                 int qjb = bm.jb();
05299                 int qka = bm.ka();
05300                 int qkb = bm.kb();
05301                 int inc_in = (bn.ni() > 1 ? bn.ni()-1 : 1);
05302                 int inc_jn = (bn.nj() > 1 ? bn.nj()-1 : 1);
05303                 int inc_kn = (bn.nk() > 1 ? bn.nk()-1 : 1);
05304                 // loop over corner points of potential grid
05305                 int iscalc = 0;
05306                 for (int kn = bn.ka();  kn <= bn.kb();  kn += inc_kn) {
05307                   for (int jn = bn.ja();  jn <= bn.jb();  jn += inc_jn) {
05308                     for (int in = bn.ia();  in <= bn.ib();  in += inc_in) {
05309                       // clip charges to weights
05310                       int mia = ( qia >= gia + in ? qia : gia + in );
05311                       int mib = ( qib <= gib + in ? qib : gib + in );
05312                       int mja = ( qja >= gja + jn ? qja : gja + jn );
05313                       int mjb = ( qjb <= gjb + jn ? qjb : gjb + jn );
05314                       int mka = ( qka >= gka + kn ? qka : gka + kn );
05315                       int mkb = ( qkb <= gkb + kn ? qkb : gkb + kn );
05316                       int inc_im = (mib-mia > 0 ? mib-mia : 1);
05317                       int inc_jm = (mjb-mja > 0 ? mjb-mja : 1);
05318                       int inc_km = (mkb-mka > 0 ? mkb-mka : 1);
05319 
05320                       // loop over corner points of charge grid
05321                       for (int km = mka;  km <= mkb;  km += inc_km) {
05322                         for (int jm = mja;  jm <= mjb;  jm += inc_jm) {
05323                           for (int im = mia;  im <= mib;  im += inc_im) {
05324 
05325                             Float g;
05326                             if (approx == C1HERMITE) {
05327                               g = map.gc_c1hermite[level](im-in,jm-jn,km-kn).melem[0];
05328                             }
05329                             else {
05330                               g = map.gc[level](im-in,jm-jn,km-kn);
05331                             }
05332                             iscalc |= (g != 0);
05333                           }
05334                         }
05335                       }
05336 
05337                     }
05338                   }
05339                 }
05340                 if ( ! iscalc) {
05341                   //printf("SKIPPING\n");  // XXX
05342                   continue;  // skip because overlap is beyond nonzero gc sphere
05343                 }
05344 #endif
05345                 b(i,j,k).sendAcross.append(bs);
05346                 b(i,j,k).indexGridCutoff.append(numGridCutoff);
05347                 // receiving block records this grid cutoff ID
05348                 b(bs.nblock_wrap.n).recvGridCutoff.append(numGridCutoff);
05349                 // increment counter for receive block
05350                 b(bs.nblock_wrap.n).numRecvsPotential++;
05351 
05352                 numGridCutoff++;  // one MsmGridCutoff for each send across
05353               }
05354             }
05355           } // end loop over sendAcross blocks
05356 
05357           // Restriction, sendUp
05358           if (level < nlevels-1) {
05359             int ia2, ib2, ja2, jb2, ka2, kb2;
05360             ia = b(i,j,k).nrange.ia();
05361             ib = b(i,j,k).nrange.ib();
05362             ja = b(i,j,k).nrange.ja();
05363             jb = b(i,j,k).nrange.jb();
05364             ka = b(i,j,k).nrange.ka();
05365             kb = b(i,j,k).nrange.kb();
05366             // determine expansion of h-grid onto 2h-grid
05367             if ( ia==ib && ((ia & 1)==0) ) {
05368               ia2 = ib2 = ia / 2;
05369             }
05370             else {
05371               ia2 = (ia / 2) - ((polydeg+1) / 2) + 1;
05372               ib2 = ((ib+1) / 2) + ((polydeg+1) / 2) - 1;
05373             }
05374             if ( ja==jb && ((ja & 1)==0) ) {
05375               ja2 = jb2 = ja / 2;
05376             }
05377             else {
05378               ja2 = (ja / 2) - ((polydeg+1) / 2) + 1;
05379               jb2 = ((jb+1) / 2) + ((polydeg+1) / 2) - 1;
05380             }
05381             if ( ka==kb && ((ka & 1)==0) ) {
05382               ka2 = kb2 = ka / 2;
05383             }
05384             else {
05385               ka2 = (ka / 2) - ((polydeg+1) / 2) + 1;
05386               kb2 = ((kb+1) / 2) + ((polydeg+1) / 2) - 1;
05387             }
05388             // clip to boundaries of 2h-grid
05389             msm::Ivec na2, nb2;
05390             na2 = map.clipIndexToLevel(msm::Ivec(ia2,ja2,ka2), level+1);
05391             nb2 = map.clipIndexToLevel(msm::Ivec(ib2,jb2,kb2), level+1);
05392             b(i,j,k).nrangeRestricted.setbounds(na2.i, nb2.i, na2.j, nb2.j,
05393                 na2.k, nb2.k);
05394             // determine sendUp blocks
05395             msm::BlockIndex blower = map.blockOfGridIndex(na2, level+1);
05396             msm::BlockIndex bupper = map.blockOfGridIndex(nb2, level+1);
05397             int maxarrlen = (bupper.n.i - blower.n.i + 1) *
05398               (bupper.n.j - blower.n.j + 1) * (bupper.n.k - blower.n.k + 1);
05399             b(i,j,k).sendUp.setmax(maxarrlen);  // allocate send array
05400             // loop over sendUp blocks
05401             int ii, jj, kk;
05402             for (kk = blower.n.k;  kk <= bupper.n.k;  kk++) {
05403               for (jj = blower.n.j;  jj <= bupper.n.j;  jj++) {
05404                 for (ii = blower.n.i;  ii <= bupper.n.i;  ii++) {
05405                   // determine actual block and range to send to
05406                   msm::BlockSend bs;
05407                   bs.nblock.n = msm::Ivec(ii,jj,kk);
05408                   bs.nblock.level = level+1;
05409                   bs.nrange = map.clipBlockToIndexRange(bs.nblock,
05410                       b(i,j,k).nrangeRestricted);
05411                   map.wrapBlockSend(bs);  // wrap to true block index
05412                   b(i,j,k).sendUp.append(bs);
05413                   // increment counter for receive block
05414                   map.blockLevel[level+1](bs.nblock_wrap.n).numRecvsCharge++;
05415                 }
05416               }
05417             } // end loop over sendUp blocks
05418 
05419           } // end if restriction
05420 
05421           // Prolongation, sendDown
05422           if (level > 0) {
05423             int ia2 = b(i,j,k).nrange.ia();
05424             int ib2 = b(i,j,k).nrange.ib();
05425             int ja2 = b(i,j,k).nrange.ja();
05426             int jb2 = b(i,j,k).nrange.jb();
05427             int ka2 = b(i,j,k).nrange.ka();
05428             int kb2 = b(i,j,k).nrange.kb();
05429             // determine expansion of 2h-grid onto h-grid
05430             ia = 2*ia2 - polydeg;
05431             ib = 2*ib2 + polydeg;
05432             ja = 2*ja2 - polydeg;
05433             jb = 2*jb2 + polydeg;
05434             ka = 2*ka2 - polydeg;
05435             kb = 2*kb2 + polydeg;
05436             // clip to boundaries of h-grid
05437             msm::Ivec na, nb;
05438             na = map.clipIndexToLevel(msm::Ivec(ia,ja,ka), level-1);
05439             nb = map.clipIndexToLevel(msm::Ivec(ib,jb,kb), level-1);
05440             b(i,j,k).nrangeProlongated.setbounds(na.i, nb.i, na.j, nb.j,
05441                 na.k, nb.k);
05442             // determine sendDown blocks
05443             msm::BlockIndex blower = map.blockOfGridIndex(na, level-1);
05444             msm::BlockIndex bupper = map.blockOfGridIndex(nb, level-1);
05445             int maxarrlen = (bupper.n.i - blower.n.i + 1) *
05446               (bupper.n.j - blower.n.j + 1) * (bupper.n.k - blower.n.k + 1);
05447             b(i,j,k).sendDown.setmax(maxarrlen);  // allocate send array
05448             // loop over sendUp blocks
05449             int ii, jj, kk;
05450             for (kk = blower.n.k;  kk <= bupper.n.k;  kk++) {
05451               for (jj = blower.n.j;  jj <= bupper.n.j;  jj++) {
05452                 for (ii = blower.n.i;  ii <= bupper.n.i;  ii++) {
05453                   // determine actual block and range to send to
05454                   msm::BlockSend bs;
05455                   bs.nblock.n = msm::Ivec(ii,jj,kk);
05456                   bs.nblock.level = level-1;
05457                   bs.nrange = map.clipBlockToIndexRange(bs.nblock,
05458                       b(i,j,k).nrangeProlongated);
05459                   map.wrapBlockSend(bs);  // wrap to true block index
05460                   b(i,j,k).sendDown.append(bs);
05461                   // increment counter for receive block
05462                   map.blockLevel[level-1](bs.nblock_wrap.n).numRecvsPotential++;
05463                 }
05464               }
05465             } // end loop over sendDown blocks
05466 
05467           } // end if prolongation
05468 
05469 #ifdef MSM_REDUCE_GRID
05470           // using a reduction decreases the number of messages
05471           // from MsmGridCutoff elements to just 1
05472           b(i,j,k).numRecvsPotential -= ( b(i,j,k).indexGridCutoff.len() - 1 );
05473 #endif
05474 
05475         }
05476       }
05477     } // end loop over block diagram
05478 
05479   } // end loop over levels
05480   // end of Map setup
05481 
05482   // XXX
05483   //
05484   // NO, WAIT!
05485   // More Map setup below for node mapping!
05486   //
05487   // XXX
05488 
05489   // allocate chare arrays
05490 
05491   if (1) {
05492     PatchMap *pm = PatchMap::Object();
05493     patchPtr.resize( pm->numPatches() );
05494     for (int i = 0;  i < pm->numPatches();  i++) {
05495       patchPtr[i] = NULL;
05496     }
05497 #ifdef DEBUG_MSM_VERBOSE
05498     printf("Allocating patchPtr array length %d\n", pm->numPatches());
05499 #endif
05500     if (CkMyPe() == 0) {
05501       iout << iINFO << "MSM has " << pm->numPatches()
05502                     << " interpolation / anterpolation objects"
05503                     << " (one per patch)\n" << endi;
05504     }
05505   }
05506 
05507 #ifdef MSM_NODE_MAPPING
05508   if (1) {
05509     // Node aware initial assignment of chares
05510     //
05511     // Create map object for each 3D chare array of MsmBlock and the
05512     // 1D chare array of MsmGridCutoff.  Design map to equally distribute
05513     // blocks across nodes, assigned to node PEs in round robin manner.  
05514     // Attempt to reduce internode communication bandwidth by assigning 
05515     // each MsmGridCutoff element to either its source node or its 
05516     // destination node, again assigned to node PEs in round robin manner.
05517 #if 0
05518     // for testing
05519 #if 0
05520     int numNodes = 16;
05521     int numPes = 512;
05522 #else
05523     int numNodes = 32;
05524     int numPes = 1024;
05525 #endif
05526 #else
05527     int numNodes = CkNumNodes();
05528     int numPes = CkNumPes();
05529 #endif
05530     int numPesPerNode = numPes / numNodes;
05531     int numBlocks = 0;  // find total number of blocks
05532     for (level = 0;  level < nlevels;  level++) {
05533       numBlocks += map.blockLevel[level].nn();
05534     }
05535 
05536     // final result is arrays for blocks and gcuts, each with pe number
05537     blockAssign.resize(numBlocks);
05538     gcutAssign.resize(numGridCutoff);
05539     //printf("XXX numBlocks = %d\n", numBlocks);
05540     //printf("XXX numGridCutoff = %d\n", numGridCutoff);
05541 
05542     msm::Array<float> blockWork(numBlocks);
05543     msm::Array<float> gcutWork(numGridCutoff);
05544 
05545     msm::Array<float> nodeWork(numNodes);
05546     nodeWork.reset(0);
05547 #ifdef MSM_NODE_MAPPING_STATS
05548     msm::Array<float> peWork(numPes);
05549     peWork.reset(0);
05550 #endif
05551 
05552     msm::PriorityQueue<WorkIndex> nodeQueue(numNodes);
05553     for (n = 0;  n < numNodes;  n++) {
05554       nodeQueue.insert(WorkIndex(0, n));
05555     }
05556 
05557     int bindex = 0;  // index for block array
05558     for (level = 0;  level < nlevels;  level++) {
05559       msm::Grid<msm::BlockDiagram>& b = map.blockLevel[level];
05560       int bni = b.ni();
05561       int bnj = b.nj();
05562       int bnk = b.nk();
05563       for (k = 0;  k < bnk;  k++) { // for all blocks
05564         for (j = 0;  j < bnj;  j++) {
05565           for (i = 0;  i < bni;  i++) {
05566             WorkIndex wn;
05567             nodeQueue.remove(wn);
05568             float bw = calcBlockWork(b(i,j,k));
05569             blockAssign[bindex] = wn.index;
05570             nodeWork[wn.index] += bw;
05571             wn.work += bw;
05572             blockWork[bindex] = bw;
05573             nodeQueue.insert(wn);
05574             bindex++;
05575           }
05576         }
05577       } // end for all blocks
05578     } // end for all levels
05579 
05580 #if 0
05581     for (n = 0;  n < numBlocks;  n++) {
05582       WorkIndex wn;
05583       nodeQueue.remove(wn);
05584       float bw = calcBlockWork(n);
05585       blockAssign[n] = wn.index;
05586       nodeWork[wn.index] += bw;
05587       wn.work += bw;
05588       blockWork[n] = bw;
05589       nodeQueue.insert(wn);
05590     }
05591 #endif
05592 
05593     // assign grid cutoff objects to nodes (gcutAssign)
05594     // choose whichever of source or destination node has less work
05595     int gindex = 0;  // index for grid cutoff array
05596     for (level = 0;  level < nlevels;  level++) { // for all levels
05597       msm::Grid<msm::BlockDiagram>& b = map.blockLevel[level];
05598       int bni = b.ni();
05599       int bnj = b.nj();
05600       int bnk = b.nk();
05601       for (k = 0;  k < bnk;  k++) { // for all blocks
05602         for (j = 0;  j < bnj;  j++) {
05603           for (i = 0;  i < bni;  i++) {
05604             int isrc = blockFlatIndex(level, i, j, k);
05605             int nsrc = blockAssign[isrc];  // node block isrc is assigned
05606             int numSendAcross = b(i,j,k).sendAcross.len();
05607             ASSERT( numSendAcross == b(i,j,k).indexGridCutoff.len() );
05608             for (n = 0;  n < numSendAcross;  n++) {
05609               msm::BlockSend& bs = b(i,j,k).sendAcross[n];
05610               msm::BlockIndex& bn = bs.nblock_wrap;
05611               int idest = blockFlatIndex(level, bn.n.i, bn.n.j, bn.n.k);
05612               int ndest = blockAssign[idest];  // node block idest is assigned
05613               gcutWork[gindex] = calcGcutWork(bs);
05614               if (nodeWork[nsrc] <= nodeWork[ndest]) {
05615                 gcutAssign[gindex] = nsrc;
05616                 nodeWork[nsrc] += gcutWork[gindex];
05617               }
05618               else {
05619                 gcutAssign[gindex] = ndest;
05620                 nodeWork[ndest] += gcutWork[gindex];
05621               }
05622               gindex++;
05623             } // end for numSendAcross
05624           }
05625         }
05626       } // end for all blocks
05627     } // end for all levels
05628 
05629     msm::Array< msm::PriorityQueue<WorkIndex> > peQueue(numNodes);
05630     for (n = 0;  n < numNodes;  n++) {
05631       peQueue[n].init(numPesPerNode);
05632       for (int poff = 0;  poff < numPesPerNode;  poff++) {
05633         peQueue[n].insert(WorkIndex(0, n*numPesPerNode + poff));
05634       }
05635     }
05636 
05637     for (n = 0;  n < numBlocks;  n++) {
05638       WorkIndex wn;
05639       int node = blockAssign[n];
05640       peQueue[node].remove(wn);
05641       blockAssign[n] = wn.index;
05642       wn.work += blockWork[n];
05643       peQueue[node].insert(wn);
05644 #ifdef MSM_NODE_MAPPING_STATS
05645       peWork[wn.index] += blockWork[n];
05646 #endif
05647     }
05648 
05649     for (n = 0;  n < numGridCutoff;  n++) {
05650       WorkIndex wn;
05651       int node = gcutAssign[n];
05652       peQueue[node].remove(wn);
05653       gcutAssign[n] = wn.index;
05654       wn.work += gcutWork[n];
05655       peQueue[node].insert(wn);
05656 #ifdef MSM_NODE_MAPPING_STATS
05657       peWork[wn.index] += gcutWork[n];
05658 #endif
05659     }
05660 
05661 #ifdef MSM_NODE_MAPPING_STATS
05662     if (CkMyPe() == 0) {
05663       printf("Mapping of MSM work (showing scaled estimated work units):\n");
05664       for (n = 0;  n < numNodes;  n++) {
05665         printf("    node %d   work %8.3f\n", n, nodeWork[n]);
05666         for (int poff = 0;  poff < numPesPerNode;  poff++) {
05667           int p = n*numPesPerNode + poff;
05668           printf("        pe %d     work %8.3f\n", p, peWork[p]);
05669         }
05670       }
05671       //CkExit();
05672     }
05673 #endif
05674 
05675 #if 0
05676     int numBlocks = 0;  // find total number of blocks
05677     for (level = 0;  level < nlevels;  level++) {
05678       numBlocks += map.blockLevel[level].nn();
05679     }
05680 
05681     // final result is arrays for blocks and gcuts, each with pe number
05682     blockAssign.resize(numBlocks);
05683     gcutAssign.resize(numGridCutoff);
05684 
05685     nodecnt.resize(numNodes);
05686 
05687     // assign blocks to nodes
05688     // the following algorithm divides as evenly as possible the
05689     // blocks across the nodes
05690     int r = numBlocks % numNodes;
05691     int q = numBlocks / numNodes;
05692     int qp = q + 1;
05693     for (n = 0;  n < numNodes - r;  n++) {
05694       int moffset = n * q;
05695       for (int m = 0;  m < q;  m++) {
05696         blockAssign[moffset + m] = n;
05697       }
05698       nodecnt[n] = q;
05699     }
05700     for ( ;  n < numNodes;  n++) {
05701       int moffset = (numNodes - r)*q + (n - (numNodes - r))*qp;
05702       for (int m = 0;  m < qp;  m++) {
05703         blockAssign[moffset + m] = n;
05704       }
05705       nodecnt[n] = qp;
05706     }
05707 #if 0
05708     if (CkMyPe() == 0) {
05709       CkPrintf("%d objects to %d nodes\n", q, numNodes-r);
05710       if (r != 0) {
05711         CkPrintf("%d objects to %d nodes\n", qp, r);
05712       }
05713       CkPrintf("%d  =?  %d\n", (numNodes-r)*q + r*qp, numBlocks);
05714     }
05715 #endif
05716 
05717     // assign grid cutoff objects to nodes (gcutAssign)
05718     // choose whichever of source or destination node has less work
05719     int gindex = 0;  // index for grid cutoff array
05720     for (level = 0;  level < nlevels;  level++) { // for all levels
05721       msm::Grid<msm::BlockDiagram>& b = map.blockLevel[level];
05722       int bni = b.ni();
05723       int bnj = b.nj();
05724       int bnk = b.nk();
05725       for (k = 0;  k < bnk;  k++) { // for all blocks
05726         for (j = 0;  j < bnj;  j++) {
05727           for (i = 0;  i < bni;  i++) {
05728             int isrc = blockFlatIndex(level, i, j, k);
05729             int nsrc = blockAssign[isrc];  // node block isrc is assigned
05730             int numSendAcross = b(i,j,k).sendAcross.len();
05731             ASSERT( numSendAcross == b(i,j,k).indexGridCutoff.len() );
05732             for (n = 0;  n < numSendAcross;  n++) {
05733               msm::BlockIndex &bn = b(i,j,k).sendAcross[n].nblock_wrap;
05734               int idest = blockFlatIndex(level, bn.n.i, bn.n.j, bn.n.k);
05735               int ndest = blockAssign[idest];  // node block idest is assigned
05736               // assign this grid cutoff work to least subscribed node
05737               if (nodecnt[nsrc] <= nodecnt[ndest]) {
05738                 gcutAssign[gindex] = nsrc;
05739                 nodecnt[nsrc]++;
05740               }
05741               else {
05742                 gcutAssign[gindex] = ndest;
05743                 nodecnt[ndest]++;
05744               }
05745               gindex++;
05746             } // end for numSendAcross
05747           }
05748         }
05749       } // end for all blocks
05750     } // end for all levels
05751 
05752     // now change the node assignments into PE assignments
05753     // use round robin assignment to PEs within each node
05754     int ppn = numPes / numNodes;  // num PEs per node
05755     // reset nodecnt - this array will now store PE offset for that node
05756     for (n = 0;  n < numNodes;  n++)  nodecnt[n] = 0;
05757     for (n = 0;  n < numBlocks;  n++) {
05758       int node = blockAssign[n];
05759       blockAssign[n] = node * ppn + nodecnt[node];  // PE number within node
05760       nodecnt[node]++;  // increment to next PE
05761       if (nodecnt[node] >= ppn)  nodecnt[node] = 0;  // with wrap around
05762     }
05763     for (n = 0;  n < numGridCutoff;  n++) {
05764       int node = gcutAssign[n];
05765       gcutAssign[n] = node * ppn + nodecnt[node];  // PE number within node
05766       nodecnt[node]++;  // increment to next PE
05767       if (nodecnt[node] >= ppn)  nodecnt[node] = 0;  // with wrap around
05768     }
05769 
05770     // print mapping
05771 #if 0
05772     if (CkMyPe() == 0) {
05773       for (n = 0;  n < numBlocks;  n++) {
05774         CkPrintf("block %d:   node=%d  pe=%d\n",
05775             n, blockAssign[n]/ppn, blockAssign[n]);
05776       }
05777 #if 0
05778       for (n = 0;  n < numGridCutoff;  n++) {
05779         CkPrintf("grid cutoff %d:   node=%d  pe=%d\n",
05780             n, gcutAssign[n]/ppn, gcutAssign[n]);
05781       }
05782 #endif
05783     }
05784 #endif
05785 
05786 #endif // 0
05787 
05788   } // end node aware initial assignment of chares
05789 #endif // MSM_NODE_MAPPING
05790 
05791 } // ComputeMsmMgr::initialize2()
05792 
05793 
05794 void ComputeMsmMgr::initialize_create() {
05795   int i, j, k, n, level;
05796 
05797   if (CkMyPe() == 0) {
05798 
05799     // on PE 0, create 3D chare array of MsmBlock for each level;
05800     // broadcast this array of proxies to the rest of the group
05801     if (approx == C1HERMITE) {
05802       msmC1HermiteBlock.resize(nlevels);
05803     }
05804     else {
05805       msmBlock.resize(nlevels);
05806     }
05807     for (level = 0;  level < nlevels;  level++) {
05808       int ni = map.blockLevel[level].ni();
05809       int nj = map.blockLevel[level].nj();
05810       int nk = map.blockLevel[level].nk();
05811 #ifdef MSM_NODE_MAPPING
05812       CkPrintf("Using MsmBlockMap for level %d\n", level);
05813       CProxy_MsmBlockMap blockMap = CProxy_MsmBlockMap::ckNew(level);
05814       CkArrayOptions opts(ni, nj, nk);
05815       opts.setMap(blockMap);
05816       if (approx == C1HERMITE) {
05817         msmC1HermiteBlock[level] =
05818           CProxy_MsmC1HermiteBlock::ckNew(level, opts);
05819       }
05820       else {
05821         msmBlock[level] = CProxy_MsmBlock::ckNew(level, opts);
05822       }
05823 #else
05824       if (approx == C1HERMITE) {
05825         msmC1HermiteBlock[level] =
05826           CProxy_MsmC1HermiteBlock::ckNew(level, ni, nj, nk);
05827       }
05828       else {
05829         msmBlock[level] = CProxy_MsmBlock::ckNew(level, ni, nj, nk);
05830       }
05831 #endif
05832 #ifdef DEBUG_MSM_VERBOSE
05833       printf("Create MsmBlock[%d] 3D chare array ( %d x %d x %d )\n",
05834           level, ni, nj, nk);
05835 #endif
05836       char msg[128];
05837       int nijk = ni * nj * nk;
05838       sprintf(msg, "MSM grid level %d decomposed into %d block%s"
05839           " ( %d x %d x %d )\n",
05840           level, nijk, (nijk==1 ? "" : "s"), ni, nj, nk);
05841       iout << iINFO << msg;
05842     }
05843     if (approx == C1HERMITE) {
05844       MsmC1HermiteBlockProxyMsg *msg = new MsmC1HermiteBlockProxyMsg;
05845       msg->put(msmC1HermiteBlock);
05846       msmProxy.recvMsmC1HermiteBlockProxy(msg);  // broadcast
05847     }
05848     else {
05849       MsmBlockProxyMsg *msg = new MsmBlockProxyMsg;
05850       msg->put(msmBlock);
05851       msmProxy.recvMsmBlockProxy(msg);  // broadcast
05852     }
05853 
05854 #ifdef MSM_GRID_CUTOFF_DECOMP
05855     // on PE 0, create 1D chare array of MsmGridCutoff
05856     // broadcast this array proxy to the rest of the group
05857 #ifdef MSM_NODE_MAPPING
05858     CkPrintf("Using MsmGridCutoffMap\n");
05859     CProxy_MsmGridCutoffMap gcutMap = CProxy_MsmGridCutoffMap::ckNew();
05860     CkArrayOptions optsgcut(numGridCutoff);
05861     optsgcut.setMap(gcutMap);
05862     if (approx == C1HERMITE) {
05863       msmC1HermiteGridCutoff = CProxy_MsmC1HermiteGridCutoff::ckNew(optsgcut);
05864     }
05865     else {
05866       msmGridCutoff = CProxy_MsmGridCutoff::ckNew(optsgcut);
05867     }
05868 #else
05869     if (approx == C1HERMITE) {
05870       msmC1HermiteGridCutoff =
05871         CProxy_MsmC1HermiteGridCutoff::ckNew(numGridCutoff);
05872     }
05873     else {
05874       msmGridCutoff = CProxy_MsmGridCutoff::ckNew(numGridCutoff);
05875     }
05876 #endif
05877     if (approx == C1HERMITE) {
05878       MsmC1HermiteGridCutoffProxyMsg *gcmsg =
05879         new MsmC1HermiteGridCutoffProxyMsg;
05880       gcmsg->put(&msmC1HermiteGridCutoff);
05881       msmProxy.recvMsmC1HermiteGridCutoffProxy(gcmsg);
05882     }
05883     else {
05884       MsmGridCutoffProxyMsg *gcmsg = new MsmGridCutoffProxyMsg;
05885       gcmsg->put(&msmGridCutoff);
05886       msmProxy.recvMsmGridCutoffProxy(gcmsg);
05887     }
05888 
05889     // XXX PE 0 initializes each MsmGridCutoff
05890     // one-to-many
05891     // for M length chare array, better for each PE to initialize M/P?
05892     for (level = 0;  level < nlevels;  level++) { // for all levels
05893       msm::Grid<msm::BlockDiagram>& b = map.blockLevel[level];
05894       int bni = b.ni();
05895       int bnj = b.nj();
05896       int bnk = b.nk();
05897       for (k = 0;  k < bnk;  k++) { // for all blocks
05898         for (j = 0;  j < bnj;  j++) {
05899           for (i = 0;  i < bni;  i++) {
05900             // source for charges
05901             msm::BlockIndex bi = msm::BlockIndex(level, msm::Ivec(i,j,k));
05902             int numSendAcross = b(i,j,k).sendAcross.len();
05903             ASSERT( numSendAcross == b(i,j,k).indexGridCutoff.len() );
05904             // for this source, loop over destinations for potentials
05905             for (n = 0;  n < numSendAcross;  n++) {
05906               msm::BlockSend &bs = b(i,j,k).sendAcross[n];
05907               int index = b(i,j,k).indexGridCutoff[n];
05908               MsmGridCutoffInitMsg *bsmsg = new MsmGridCutoffInitMsg(bi, bs);
05909               if (approx == C1HERMITE) {
05910                 msmC1HermiteGridCutoff[index].setup(bsmsg);
05911               }
05912               else {
05913                 msmGridCutoff[index].setup(bsmsg);
05914               }
05915             } // traverse sendAcross, indexGridCutoff arrays
05916 
05917           }
05918         }
05919       } // end for all blocks
05920 
05921     } // end for all levels
05922 
05923     iout << iINFO << "MSM grid cutoff calculation decomposed into "
05924       << numGridCutoff << " work objects\n";
05925 #endif
05926     iout << endi;
05927   }
05928 
05929 #ifdef DEBUG_MSM_VERBOSE
05930   printf("end of initialization\n");
05931 #endif
05932 } // ComputeMsmMgr::initialize_create()
05933 
05934 
05935 void ComputeMsmMgr::recvMsmBlockProxy(MsmBlockProxyMsg *msg)
05936 {
05937   msg->get(msmBlock);
05938   delete(msg);
05939 }
05940 
05941 void ComputeMsmMgr::recvMsmGridCutoffProxy(MsmGridCutoffProxyMsg *msg)
05942 {
05943   msg->get(&msmGridCutoff);
05944   delete(msg);
05945 }
05946 
05947 void ComputeMsmMgr::recvMsmC1HermiteBlockProxy(
05948     MsmC1HermiteBlockProxyMsg *msg
05949     )
05950 {
05951   msg->get(msmC1HermiteBlock);
05952   delete(msg);
05953 }
05954 
05955 void ComputeMsmMgr::recvMsmC1HermiteGridCutoffProxy(
05956     MsmC1HermiteGridCutoffProxyMsg *msg
05957     )
05958 {
05959   msg->get(&msmC1HermiteGridCutoff);
05960   delete(msg);
05961 }
05962 
05963 void ComputeMsmMgr::update(CkQdMsg *msg)
05964 {
05965 #ifdef DEBUG_MSM_VERBOSE
05966   printf("ComputeMsmMgr:  update() PE %d\n", CkMyPe());
05967 #endif
05968   delete msg;
05969 
05970   // have to setup sections AFTER initialization is finished
05971   if (CkMyPe() == 0) {
05972     for (int level = 0;  level < nlevels;  level++) {
05973       if (approx == C1HERMITE) {
05974         msmC1HermiteBlock[level].setupSections();
05975       }
05976       else {
05977         msmBlock[level].setupSections();
05978       }
05979     }
05980   }
05981 
05982   // XXX how do update for constant pressure simulation?
05983 }
05984 
05985 
05986 void ComputeMsmMgr::compute(msm::Array<int>& patchIDList)
05987 {
05988 #ifdef DEBUG_MSM_VERBOSE
05989   printf("ComputeMsmMgr:  compute() PE=%d\n", CkMyPe());
05990 #endif
05991 
05992   int n; 
05993   for (n = 0;  n < patchIDList.len();  n++) {
05994     int patchID = patchIDList[n];
05995     if (patchPtr[patchID] == NULL) {
05996       char msg[100];
05997       snprintf(msg, sizeof(msg),
05998           "Expected MSM data for patch %d does not exist on PE %d",
05999           patchID, CkMyPe());
06000       NAMD_die(msg);
06001     }
06002     if (approx == C1HERMITE) {
06003       patchPtr[patchID]->anterpolationC1Hermite();
06004     }
06005     else {
06006       patchPtr[patchID]->anterpolation();
06007     }
06008     // all else should follow from here
06009   }
06010   return;
06011 }
06012 
06013 
06014 void ComputeMsmMgr::addPotential(GridMsg *gm)
06015 {
06016   int pid;  // receive patch ID
06017   int pseq;
06018   if (approx == C1HERMITE) {
06019     gm->get(subgrid_c1hermite, pid, pseq);
06020   }
06021   else {
06022     gm->get(subgrid, pid, pseq);
06023   }
06024   delete gm;
06025   if (patchPtr[pid] == NULL) {
06026     char msg[100];
06027     snprintf(msg, sizeof(msg), "Expecting patch %d to exist on PE %d",
06028         pid, CkMyPe());
06029     NAMD_die(msg);
06030   }
06031   if (approx == C1HERMITE) {
06032     patchPtr[pid]->addPotentialC1Hermite(subgrid_c1hermite);
06033   }
06034   else {
06035     patchPtr[pid]->addPotential(subgrid);
06036   }
06037 }
06038 
06039 
06040 void ComputeMsmMgr::doneCompute()
06041 {
06042   msmCompute->saveResults();
06043 }
06044 
06045 
06047 //
06048 //  ComputeMsm
06049 //  MSM compute objects, starts and finishes calculation;
06050 //  there is up to one compute object per PE
06051 //
06052 
06053 ComputeMsm::ComputeMsm(ComputeID c) : ComputeHomePatches(c)
06054 {
06055   CProxy_ComputeMsmMgr::ckLocalBranch(
06056       CkpvAccess(BOCclass_group).computeMsmMgr)->setCompute(this);
06057   SimParameters *simParams = Node::Object()->simParameters;
06058   qscaling = sqrtf(COULOMB / simParams->dielectric);
06059   reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
06060 #ifdef DEBUG_MSM_VERBOSE
06061   printf("ComputeMsm:  (constructor) PE=%d\n", CkMyPe());
06062 #endif
06063 }
06064 
06065 ComputeMsm::~ComputeMsm()
06066 {
06067   // free memory
06068 #ifdef DEBUG_MSM_VERBOSE
06069   printf("ComputeMsm:  (destructor) PE=%d\n", CkMyPe());
06070 #endif
06071 }
06072 
06073 void ComputeMsm::doWork()
06074 {
06075   // for each patch do stuff
06076 #ifdef DEBUG_MSM_VERBOSE
06077   printf("ComputeMsm:  doWork() PE=%d\n", CkMyPe());
06078 #endif
06079 
06080 #if 0
06081 #ifdef MSM_TIMING
06082   myMgr->initTiming();
06083 #endif
06084 #ifdef MSM_PROFILING
06085   myMgr->initProfiling();
06086 #endif
06087 #endif
06088 
06089   // patchList is inherited from ComputeHomePatches
06090   ResizeArrayIter<PatchElem> ap(patchList);
06091   numLocalPatches = patchList.size();
06092   cntLocalPatches = 0;
06093   ASSERT(cntLocalPatches < numLocalPatches);
06094 
06095 #ifdef DEBUG_MSM_VERBOSE
06096   printf("patchList size = %d\n", patchList.size() );
06097 #endif
06098 
06099   // Skip computations if nothing to do.
06100   if ( ! patchList[0].p->flags.doFullElectrostatics ) {
06101     for (ap = ap.begin();  ap != ap.end();  ap++) {
06102       CompAtom *x = (*ap).positionBox->open();
06103       Results *r = (*ap).forceBox->open();
06104       (*ap).positionBox->close(&x);
06105       (*ap).forceBox->close(&r);
06106     }
06107     reduction->submit();
06108     return;
06109   }
06110   msm::Map& map = myMgr->mapData();
06111   // This is the patchPtr array for MSM; any local patch will be set up
06112   // with a non-NULL pointer to its supporting data structure.
06113   msm::PatchPtrArray& patchPtr = myMgr->patchPtrArray();
06114   // also store just a list of IDs for the local patches
06115   msm::Array<int> patchIDList(numLocalPatches);
06116   patchIDList.resize(0);  // to use append on pre-allocated array buffer
06117   int cnt=0, n;
06118   for (ap = ap.begin();  ap != ap.end();  ap++) {
06119     CompAtom *x = (*ap).positionBox->open();
06120     CompAtomExt *xExt = (*ap).p->getCompAtomExtInfo();
06121     if ( patchList[0].p->flags.doMolly ) {
06122       (*ap).positionBox->close(&x);
06123       x = (*ap).avgPositionBox->open();
06124     }
06125     int numAtoms = (*ap).p->getNumAtoms();
06126     int patchID = (*ap).patchID;
06127     patchIDList.append(patchID);
06128     if (patchPtr[patchID] == NULL) {
06129       // create PatchData if it doesn't exist for this patchID
06130       patchPtr[patchID] = new msm::PatchData(myMgr, patchID);
06131 #ifdef DEBUG_MSM_VERBOSE
06132       printf("Creating new PatchData:  patchID=%d  PE=%d\n",
06133           patchID, CkMyPe());
06134 #endif
06135     }
06136     msm::PatchData& patch = *(patchPtr[patchID]);
06137     patch.init(numAtoms);
06138     msm::AtomCoordArray& coord = patch.coordArray();
06139     ASSERT(coord.len() == numAtoms);
06140     for (n = 0;  n < numAtoms;  n++) {
06141       coord[n].position = x[n].position;
06142       coord[n].charge = qscaling * x[n].charge;
06143       coord[n].id = xExt[n].id;
06144     }
06145     if ( patchList[0].p->flags.doMolly ) {
06146       (*ap).avgPositionBox->close(&x);
06147     }
06148     else {
06149       (*ap).positionBox->close(&x);
06150     }
06151     patch.sequence = sequence();
06152   }
06153 
06154   myMgr->compute(patchIDList);
06155 }
06156 
06157 void ComputeMsm::saveResults()
06158 {
06159   if (++cntLocalPatches != numLocalPatches) return;
06160 
06161   // NAMD patches
06162   ResizeArrayIter<PatchElem> ap(patchList);
06163 #ifdef DEBUG_MSM
06164   for (ap = ap.begin();  ap != ap.end();  ap++) {
06165     int patchID = (*ap).patchID;
06166     ASSERT(myMgr->patchPtrArray()[patchID]->cntRecvs ==
06167         myMgr->mapData().patchList[patchID].numRecvs);
06168   }
06169 #endif
06170 
06171   // get results from ComputeMsmMgr
06172   msm::PatchPtrArray& patchPtr = myMgr->patchPtrArray();
06173 
06174 #ifdef DEBUG_MSM_VERBOSE
06175   printf("ComputeMsm:  saveResults() PE=%d\n", CkMyPe());
06176 #endif
06177   // store force updates
06178   // submit reductions
06179 
06180   // add in forces
06181   int cnt=0, n;
06182   for (ap = ap.begin(); ap != ap.end(); ap++) {
06183     Results *r = (*ap).forceBox->open();
06184     Force *f = r->f[Results::slow];
06185     int numAtoms = (*ap).p->getNumAtoms();
06186     int patchID = (*ap).patchID;
06187     if (patchPtr[patchID] == NULL) {
06188       char msg[100];
06189       snprintf(msg, sizeof(msg), "Expecting patch %d to exist on PE %d",
06190           patchID, CkMyPe());
06191       NAMD_die(msg);
06192     }
06193     msm::PatchData& patch = *(patchPtr[patchID]);
06194     ASSERT(numAtoms == patch.force.len() );
06195     for (n = 0;  n < numAtoms;  n++) {
06196       f[n] += patch.force[n];
06197     }
06198     (*ap).forceBox->close(&r);
06199 
06200     reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += patch.energy;
06201 //    reduction->item(REDUCTION_VIRIAL_SLOW_XX) += patch.virial[0][0];
06202 //    reduction->item(REDUCTION_VIRIAL_SLOW_XY) += patch.virial[0][1];
06203 //    reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += patch.virial[0][2];
06204 //    reduction->item(REDUCTION_VIRIAL_SLOW_YX) += patch.virial[1][0];
06205 //    reduction->item(REDUCTION_VIRIAL_SLOW_YY) += patch.virial[1][1];
06206 //    reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += patch.virial[1][2];
06207 //    reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += patch.virial[2][0];
06208 //    reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += patch.virial[2][1];
06209 //    reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += patch.virial[2][2];
06210     Float *virial = myMgr->virial;
06211     reduction->item(REDUCTION_VIRIAL_SLOW_XX) += virial[ComputeMsmMgr::VXX];
06212     reduction->item(REDUCTION_VIRIAL_SLOW_XY) += virial[ComputeMsmMgr::VXY];
06213     reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += virial[ComputeMsmMgr::VXZ];
06214     reduction->item(REDUCTION_VIRIAL_SLOW_YX) += virial[ComputeMsmMgr::VXY];
06215     reduction->item(REDUCTION_VIRIAL_SLOW_YY) += virial[ComputeMsmMgr::VYY];
06216     reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += virial[ComputeMsmMgr::VYZ];
06217     reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += virial[ComputeMsmMgr::VXZ];
06218     reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += virial[ComputeMsmMgr::VYZ];
06219     reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += virial[ComputeMsmMgr::VZZ];
06220   }
06221   reduction->submit();
06222 }
06223 
06224 // method definitions for PatchData
06225 namespace msm {
06226 
06227   PatchData::PatchData(ComputeMsmMgr *pmgr, int pid) {
06228     mgr = pmgr;
06229     map = &(mgr->mapData());
06230     patchID = pid;
06231     //PatchMap *pm = PatchMap::Object();
06232     pd = &(map->patchList[pid]);
06233     if (mgr->approx == ComputeMsmMgr::C1HERMITE) {
06234       qh_c1hermite.init(pd->nrange);
06235       eh_c1hermite.init(pd->nrange);
06236       subgrid_c1hermite.resize(map->bsx[0] * map->bsy[0] * map->bsz[0]);
06237     }
06238     else {
06239       qh.init(pd->nrange);
06240       eh.init(pd->nrange);
06241       subgrid.resize(map->bsx[0] * map->bsy[0] * map->bsz[0]);
06242     }
06243 #ifdef MSM_TIMING
06244     mgr->addTiming();
06245 #endif
06246   }
06247 
06248   void PatchData::init(int natoms) {
06249     coord.resize(natoms);
06250     force.resize(natoms);
06251     cntRecvs = 0;
06252     energy = 0;
06253     //memset(virial, 0, 3*3*sizeof(BigReal));
06254     for (int i = 0;  i < natoms;  i++)  force[i] = 0;
06255     if (mgr->approx == ComputeMsmMgr::C1HERMITE) {
06256       qh_c1hermite.reset(0);
06257       eh_c1hermite.reset(0);
06258     }
06259     else {
06260       qh.reset(0);
06261       eh.reset(0);
06262     }
06263   }
06264 
06265   void PatchData::anterpolation() {
06266 #ifdef DEBUG_MSM_GRID
06267     printf("patchID %d:  anterpolation\n", patchID);
06268 #endif
06269 
06270 #ifdef MSM_TIMING
06271     double startTime, stopTime;
06272     startTime = CkWallTimer();
06273 #endif
06274 #ifndef MSM_COMM_ONLY
06275     Float xphi[ComputeMsmMgr::MAX_POLY_DEGREE+1];
06276     Float yphi[ComputeMsmMgr::MAX_POLY_DEGREE+1];
06277     Float zphi[ComputeMsmMgr::MAX_POLY_DEGREE+1];
06278 
06279     const Double rs_edge = Double( mgr->s_edge );
06280     const int s_size = ComputeMsmMgr::PolyDegree[mgr->approx] + 1;
06281 
06282     const int ia = qh.ia();
06283     const int ib = qh.ib();
06284     const int ja = qh.ja();
06285     const int jb = qh.jb();
06286     const int ka = qh.ka();
06287     const int kb = qh.kb();
06288     const int ni = qh.ni();
06289     const int nj = qh.nj();
06290     Float *qhbuffer = qh.data().buffer();
06291 
06292     // loop over atoms
06293     for (int n = 0;  n < coord.len();  n++) {
06294       Float q = coord[n].charge;
06295       if (0==q) continue;
06296 
06297       ScaledPosition s = mgr->lattice.scale(coord[n].position);
06298 
06299       BigReal sx_hx = (s.x - mgr->sglower.x) * mgr->shx_1;
06300       BigReal sy_hy = (s.y - mgr->sglower.y) * mgr->shy_1;
06301       BigReal sz_hz = (s.z - mgr->sglower.z) * mgr->shz_1;
06302 
06303       BigReal xlo = floor(sx_hx) - rs_edge;
06304       BigReal ylo = floor(sy_hy) - rs_edge;
06305       BigReal zlo = floor(sz_hz) - rs_edge;
06306 
06307       // calculate Phi stencils along each dimension
06308       Float xdelta = Float(sx_hx - xlo);
06309       mgr->stencil_1d(xphi, xdelta);
06310       Float ydelta = Float(sy_hy - ylo);
06311       mgr->stencil_1d(yphi, ydelta);
06312       Float zdelta = Float(sz_hz - zlo);
06313       mgr->stencil_1d(zphi, zdelta);
06314 
06315       int ilo = int(xlo);
06316       int jlo = int(ylo);
06317       int klo = int(zlo);
06318 
06319       // test to see if stencil is within edges of grid
06320       int iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
06321                        ja <= jlo && (jlo+(s_size-1)) <= jb &&
06322                        ka <= klo && (klo+(s_size-1)) <= kb );
06323 
06324       if ( ! iswithin ) {
06325 #if 0
06326         printf("PE %d:  atom %d:  pos= %g %g %g  patchID=%d\n",
06327             CkMyPe(), coord[n].id,
06328             coord[n].position.x, coord[n].position.y, coord[n].position.z,
06329             patchID);
06330         printf("PE %d:  atom subgrid [%d..%d] x [%d..%d] x [%d..%d]\n",
06331             CkMyPe(),
06332             ilo, ilo+s_size-1, jlo, jlo+s_size-1, klo, klo+s_size-1);
06333         printf("PE %d:  patch grid [%d..%d] x [%d..%d] x [%d..%d]\n",
06334             CkMyPe(),
06335             ia, ib, ja, jb, ka, kb);
06336 #endif
06337         char msg[100];
06338         snprintf(msg, sizeof(msg), "Atom %d is outside of the MSM grid.",
06339             coord[n].id);
06340         NAMD_die(msg);
06341       }
06342 
06343       // determine charge on cube of grid points around atom
06344       for (int k = 0;  k < s_size;  k++) {
06345         int koff = ((k+klo) - ka) * nj;
06346         Float ck = zphi[k] * q;
06347         for (int j = 0;  j < s_size;  j++) {
06348           int jkoff = (koff + (j+jlo) - ja) * ni;
06349           Float cjk = yphi[j] * ck;
06350           for (int i = 0;  i < s_size;  i++) {
06351             int ijkoff = jkoff + (i+ilo) - ia;
06352             qhbuffer[ijkoff] += xphi[i] * cjk;
06353           }
06354         }
06355       }
06356 
06357     } // end loop over atoms
06358 #endif // !MSM_COMM_ONLY
06359 #ifdef MSM_TIMING
06360     stopTime = CkWallTimer();
06361     mgr->msmTiming[MsmTimer::ANTERP] += stopTime - startTime;
06362 #endif
06363 
06364     sendCharge();
06365   }
06366 
06367   void PatchData::sendCharge() {
06368 #ifdef MSM_TIMING
06369     double startTime, stopTime;
06370 #endif
06371     int priority = 1;
06372     // buffer portions of grid to send to Blocks on level 0
06373     // allocate the largest buffer space we'll need
06374     //Grid<BigReal> subgrid;
06375     //subgrid.resize(map->bsx[0] * map->bsy[0] * map->bsz[0]);
06376     for (int n = 0;  n < pd->send.len();  n++) {
06377 #ifdef MSM_TIMING
06378       startTime = CkWallTimer();
06379 #endif
06380       // initialize the proper subgrid indexing range
06381       subgrid.init( pd->send[n].nrange );
06382       // extract the values from the larger grid into the subgrid
06383       qh.extract(subgrid);
06384       // translate the subgrid indexing range to match the MSM block
06385       subgrid.updateLower( pd->send[n].nrange_wrap.lower() );
06386       // add the subgrid charges into the block
06387       BlockIndex& bindex = pd->send[n].nblock_wrap;
06388       // place subgrid into message
06389       int msgsz = subgrid.data().len() * sizeof(Float);
06390       GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
06391       SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
06392       gm->put(subgrid, bindex.level, sequence);
06393 #ifdef MSM_TIMING
06394       stopTime = CkWallTimer();
06395       mgr->msmTiming[MsmTimer::COMM] += stopTime - startTime;
06396 #endif
06397       mgr->msmBlock[bindex.level](
06398           bindex.n.i, bindex.n.j, bindex.n.k).addCharge(gm);
06399     }
06400   }
06401 
06402   void PatchData::addPotential(const Grid<Float>& epart) {
06403 #ifdef MSM_TIMING
06404     double startTime, stopTime;
06405     startTime = CkWallTimer();
06406 #endif
06407     eh += epart;
06408 #ifdef MSM_TIMING
06409     stopTime = CkWallTimer();
06410     mgr->msmTiming[MsmTimer::COMM] += stopTime - startTime;
06411 #endif
06412     if (++cntRecvs == pd->numRecvs) {
06413       interpolation();
06414     }
06415   }
06416 
06417   void PatchData::interpolation() {
06418 #ifdef DEBUG_MSM_GRID
06419     printf("patchID %d:  interpolation\n", patchID);
06420 #endif
06421 
06422 #ifdef MSM_TIMING
06423     double startTime, stopTime;
06424     startTime = CkWallTimer();
06425 #endif
06426 #ifndef MSM_COMM_ONLY
06427     BigReal energy_self = 0;
06428 
06429     Float xphi[ComputeMsmMgr::MAX_POLY_DEGREE+1];
06430     Float yphi[ComputeMsmMgr::MAX_POLY_DEGREE+1];
06431     Float zphi[ComputeMsmMgr::MAX_POLY_DEGREE+1];
06432     Float dxphi[ComputeMsmMgr::MAX_POLY_DEGREE+1];
06433     Float dyphi[ComputeMsmMgr::MAX_POLY_DEGREE+1];
06434     Float dzphi[ComputeMsmMgr::MAX_POLY_DEGREE+1];
06435 
06436     const Double rs_edge = Double( mgr->s_edge );
06437     const int s_size = ComputeMsmMgr::PolyDegree[mgr->approx] + 1;
06438 
06439     const Float hx_1 = Float(mgr->hxlen_1);  // real space inverse grid spacing
06440     const Float hy_1 = Float(mgr->hylen_1);
06441     const Float hz_1 = Float(mgr->hzlen_1);
06442 
06443     const int ia = eh.ia();
06444     const int ib = eh.ib();
06445     const int ja = eh.ja();
06446     const int jb = eh.jb();
06447     const int ka = eh.ka();
06448     const int kb = eh.kb();
06449     const int ni = eh.ni();
06450     const int nj = eh.nj();
06451     Float *ehbuffer = eh.data().buffer();
06452 
06453     // loop over atoms
06454     for (int n = 0;  n < coord.len();  n++) {
06455       Float q = coord[n].charge;
06456       if (0==q) continue;
06457 
06458       ScaledPosition s = mgr->lattice.scale(coord[n].position);
06459 
06460       BigReal sx_hx = (s.x - mgr->sglower.x) * mgr->shx_1;
06461       BigReal sy_hy = (s.y - mgr->sglower.y) * mgr->shy_1;
06462       BigReal sz_hz = (s.z - mgr->sglower.z) * mgr->shz_1;
06463 
06464       BigReal xlo = floor(sx_hx) - rs_edge;
06465       BigReal ylo = floor(sy_hy) - rs_edge;
06466       BigReal zlo = floor(sz_hz) - rs_edge;
06467 
06468       // calculate Phi stencils along each dimension
06469       Float xdelta = Float(sx_hx - xlo);
06470       mgr->d_stencil_1d(dxphi, xphi, xdelta, hx_1);
06471       Float ydelta = Float(sy_hy - ylo);
06472       mgr->d_stencil_1d(dyphi, yphi, ydelta, hy_1);
06473       Float zdelta = Float(sz_hz - zlo);
06474       mgr->d_stencil_1d(dzphi, zphi, zdelta, hz_1);
06475 
06476       int ilo = int(xlo);
06477       int jlo = int(ylo);
06478       int klo = int(zlo);
06479 
06480 #if 0
06481       // XXX don't need to test twice!
06482 
06483       // test to see if stencil is within edges of grid
06484       int iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
06485                        ja <= jlo && (jlo+(s_size-1)) <= jb &&
06486                        ka <= klo && (klo+(s_size-1)) <= kb );
06487 
06488       if ( ! iswithin ) {
06489         char msg[100];
06490         snprintf(msg, sizeof(msg), "Atom %d is outside of the MSM grid.",
06491             coord[n].id);
06492         NAMD_die(msg);
06493       }
06494 #endif
06495 
06496       // determine force on atom from surrounding potential grid points
06497       //Force f = 0;
06498       //BigReal e = 0;
06499       Float fx=0, fy=0, fz=0, e=0;
06500       for (int k = 0;  k < s_size;  k++) {
06501         int koff = ((k+klo) - ka) * nj;
06502         for (int j = 0;  j < s_size;  j++) {
06503           int jkoff = (koff + (j+jlo) - ja) * ni;
06504           Float cx = yphi[j] * zphi[k];
06505           Float cy = dyphi[j] * zphi[k];
06506           Float cz = yphi[j] * dzphi[k];
06507           for (int i = 0;  i < s_size;  i++) {
06508             int ijkoff = jkoff + (i+ilo) - ia;
06509             Float ec = ehbuffer[ijkoff];
06510             fx += ec * dxphi[i] * cx;
06511             fy += ec * xphi[i] * cy;
06512             fz += ec * xphi[i] * cz;
06513             e += ec * xphi[i] * cx;
06514           }
06515         }
06516       }
06517 
06518 #if 0
06519       force[n].x -= q * (mgr->srx_x * fx + mgr->srx_y * fy + mgr->srx_z * fz);
06520       force[n].y -= q * (mgr->sry_x * fx + mgr->sry_y * fy + mgr->sry_z * fz);
06521       force[n].z -= q * (mgr->srz_x * fx + mgr->srz_y * fy + mgr->srz_z * fz);
06522 #endif
06523       force[n].x -= q * fx;
06524       force[n].y -= q * fy;
06525       force[n].z -= q * fz;
06526       energy += q * e;
06527       energy_self += q * q;
06528 
06529     } // end loop over atoms
06530 
06531     energy_self *= mgr->gzero;
06532     energy -= energy_self;
06533     energy *= 0.5;
06534 #endif // !MSM_COMM_ONLY
06535 #ifdef MSM_TIMING
06536     stopTime = CkWallTimer();
06537     mgr->msmTiming[MsmTimer::INTERP] += stopTime - startTime;
06538     mgr->doneTiming();
06539 #endif
06540     mgr->doneCompute();
06541   }
06542 
06543   void PatchData::anterpolationC1Hermite() {
06544 #ifdef DEBUG_MSM_GRID
06545     printf("patchID %d:  anterpolationC1Hermite\n", patchID);
06546 #endif
06547 
06548 #ifdef MSM_TIMING
06549     double startTime, stopTime;
06550     startTime = CkWallTimer();
06551 #endif
06552 #ifndef MSM_COMM_ONLY
06553     Float xphi[2], xpsi[2];
06554     Float yphi[2], ypsi[2];
06555     Float zphi[2], zpsi[2];
06556 
06557     const Float hx = Float(mgr->hxlen);  // real space grid spacing
06558     const Float hy = Float(mgr->hylen);
06559     const Float hz = Float(mgr->hzlen);
06560 
06561     const int ia = qh_c1hermite.ia();
06562     const int ib = qh_c1hermite.ib();
06563     const int ja = qh_c1hermite.ja();
06564     const int jb = qh_c1hermite.jb();
06565     const int ka = qh_c1hermite.ka();
06566     const int kb = qh_c1hermite.kb();
06567     const int ni = qh_c1hermite.ni();
06568     const int nj = qh_c1hermite.nj();
06569     C1Vector *qhbuffer = qh_c1hermite.data().buffer();
06570 
06571     // loop over atoms
06572     for (int n = 0;  n < coord.len();  n++) {
06573       Float q = coord[n].charge;
06574       if (0==q) continue;
06575 
06576       ScaledPosition s = mgr->lattice.scale(coord[n].position);
06577 
06578       BigReal sx_hx = (s.x - mgr->sglower.x) * mgr->shx_1;
06579       BigReal sy_hy = (s.y - mgr->sglower.y) * mgr->shy_1;
06580       BigReal sz_hz = (s.z - mgr->sglower.z) * mgr->shz_1;
06581 
06582       BigReal xlo = floor(sx_hx);
06583       BigReal ylo = floor(sy_hy);
06584       BigReal zlo = floor(sz_hz);
06585 
06586       // calculate Phi stencils along each dimension
06587       Float xdelta = Float(sx_hx - xlo);
06588       mgr->stencil_1d_c1hermite(xphi, xpsi, xdelta, hx);
06589       Float ydelta = Float(sy_hy - ylo);
06590       mgr->stencil_1d_c1hermite(yphi, ypsi, ydelta, hy);
06591       Float zdelta = Float(sz_hz - zlo);
06592       mgr->stencil_1d_c1hermite(zphi, zpsi, zdelta, hz);
06593 
06594       int ilo = int(xlo);
06595       int jlo = int(ylo);
06596       int klo = int(zlo);
06597 
06598       // test to see if stencil is within edges of grid
06599       int iswithin = ( ia <= ilo && ilo < ib &&
06600                        ja <= jlo && jlo < jb &&
06601                        ka <= klo && klo < kb );
06602 
06603       if ( ! iswithin ) {
06604         char msg[100];
06605         snprintf(msg, sizeof(msg), "Atom %d is outside of the MSM grid.",
06606             coord[n].id);
06607         NAMD_die(msg);
06608       }
06609 
06610       // determine charge on cube of grid points around atom
06611       for (int k = 0;  k < 2;  k++) {
06612         int koff = ((k+klo) - ka) * nj;
06613         Float c_zphi = zphi[k] * q;
06614         Float c_zpsi = zpsi[k] * q;
06615         for (int j = 0;  j < 2;  j++) {
06616           int jkoff = (koff + (j+jlo) - ja) * ni;
06617           Float c_yphi_zphi = yphi[j] * c_zphi;
06618           Float c_ypsi_zphi = ypsi[j] * c_zphi;
06619           Float c_yphi_zpsi = yphi[j] * c_zpsi;
06620           Float c_ypsi_zpsi = ypsi[j] * c_zpsi;
06621           for (int i = 0;  i < 2;  i++) {
06622             int ijkoff = jkoff + (i+ilo) - ia;
06623             qhbuffer[ijkoff].velem[D000] += xphi[i] * c_yphi_zphi;
06624             qhbuffer[ijkoff].velem[D100] += xpsi[i] * c_yphi_zphi;
06625             qhbuffer[ijkoff].velem[D010] += xphi[i] * c_ypsi_zphi;
06626             qhbuffer[ijkoff].velem[D001] += xphi[i] * c_yphi_zpsi;
06627             qhbuffer[ijkoff].velem[D110] += xpsi[i] * c_ypsi_zphi;
06628             qhbuffer[ijkoff].velem[D101] += xpsi[i] * c_yphi_zpsi;
06629             qhbuffer[ijkoff].velem[D011] += xphi[i] * c_ypsi_zpsi;
06630             qhbuffer[ijkoff].velem[D111] += xpsi[i] * c_ypsi_zpsi;
06631           }
06632         }
06633       }
06634 
06635     } // end loop over atoms
06636 
06637 #endif // !MSM_COMM_ONLY
06638 #ifdef MSM_TIMING
06639     stopTime = CkWallTimer();
06640     mgr->msmTiming[MsmTimer::ANTERP] += stopTime - startTime;
06641 #endif
06642 
06643     sendChargeC1Hermite();
06644   }
06645 
06646   void PatchData::sendChargeC1Hermite() {
06647 #ifdef MSM_TIMING
06648     double startTime, stopTime;
06649 #endif
06650     int priority = 1;
06651     // buffer portions of grid to send to Blocks on level 0
06652     for (int n = 0;  n < pd->send.len();  n++) {
06653 #ifdef MSM_TIMING
06654       startTime = CkWallTimer();
06655 #endif
06656       // initialize the proper subgrid indexing range
06657       subgrid_c1hermite.init( pd->send[n].nrange );
06658       // extract the values from the larger grid into the subgrid
06659       qh_c1hermite.extract(subgrid_c1hermite);
06660       // translate the subgrid indexing range to match the MSM block
06661       subgrid_c1hermite.updateLower( pd->send[n].nrange_wrap.lower() );
06662       // add the subgrid charges into the block
06663       BlockIndex& bindex = pd->send[n].nblock_wrap;
06664       // place subgrid into message
06665       int msgsz = subgrid_c1hermite.data().len() * sizeof(C1Vector);
06666       GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
06667       SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
06668       gm->put(subgrid_c1hermite, bindex.level, sequence);
06669 #ifdef MSM_TIMING
06670       stopTime = CkWallTimer();
06671       mgr->msmTiming[MsmTimer::COMM] += stopTime - startTime;
06672 #endif
06673       mgr->msmC1HermiteBlock[bindex.level](
06674           bindex.n.i, bindex.n.j, bindex.n.k).addCharge(gm);
06675     }
06676   }
06677 
06678   void PatchData::addPotentialC1Hermite(const Grid<C1Vector>& epart) {
06679 #ifdef MSM_TIMING
06680     double startTime, stopTime;
06681     startTime = CkWallTimer();
06682 #endif
06683     eh_c1hermite += epart;
06684 #ifdef MSM_TIMING
06685     stopTime = CkWallTimer();
06686     mgr->msmTiming[MsmTimer::COMM] += stopTime - startTime;
06687 #endif
06688     if (++cntRecvs == pd->numRecvs) {
06689       interpolationC1Hermite();
06690     }
06691   }
06692 
06693   void PatchData::interpolationC1Hermite() {
06694 #ifdef DEBUG_MSM_GRID
06695     printf("patchID %d:  interpolation\n", patchID);
06696 #endif
06697 
06698 #ifdef MSM_TIMING
06699     double startTime, stopTime;
06700     startTime = CkWallTimer();
06701 #endif
06702 #ifndef MSM_COMM_ONLY
06703     BigReal energy_self = 0;
06704 
06705     Float xphi[2], dxphi[2], xpsi[2], dxpsi[2];
06706     Float yphi[2], dyphi[2], ypsi[2], dypsi[2];
06707     Float zphi[2], dzphi[2], zpsi[2], dzpsi[2];
06708 
06709     const Float hx = Float(mgr->hxlen);      // real space grid spacing
06710     const Float hy = Float(mgr->hylen);
06711     const Float hz = Float(mgr->hzlen);
06712 
06713     const Float hx_1 = Float(mgr->hxlen_1);  // real space inverse grid spacing
06714     const Float hy_1 = Float(mgr->hylen_1);
06715     const Float hz_1 = Float(mgr->hzlen_1);
06716 
06717     const int ia = eh_c1hermite.ia();
06718     const int ib = eh_c1hermite.ib();
06719     const int ja = eh_c1hermite.ja();
06720     const int jb = eh_c1hermite.jb();
06721     const int ka = eh_c1hermite.ka();
06722     const int kb = eh_c1hermite.kb();
06723     const int ni = eh_c1hermite.ni();
06724     const int nj = eh_c1hermite.nj();
06725     C1Vector *ehbuffer = eh_c1hermite.data().buffer();
06726 
06727     // loop over atoms
06728     for (int n = 0;  n < coord.len();  n++) {
06729       Float q = coord[n].charge;
06730       if (0==q) continue;
06731 
06732       ScaledPosition s = mgr->lattice.scale(coord[n].position);
06733 
06734       BigReal sx_hx = (s.x - mgr->sglower.x) * mgr->shx_1;
06735       BigReal sy_hy = (s.y - mgr->sglower.y) * mgr->shy_1;
06736       BigReal sz_hz = (s.z - mgr->sglower.z) * mgr->shz_1;
06737 
06738       BigReal xlo = floor(sx_hx);
06739       BigReal ylo = floor(sy_hy);
06740       BigReal zlo = floor(sz_hz);
06741 
06742       // calculate Phi stencils along each dimension
06743       Float xdelta = Float(sx_hx - xlo);
06744       mgr->d_stencil_1d_c1hermite(dxphi, xphi, dxpsi, xpsi,
06745           xdelta, hx, hx_1);
06746       Float ydelta = Float(sy_hy - ylo);
06747       mgr->d_stencil_1d_c1hermite(dyphi, yphi, dypsi, ypsi,
06748           ydelta, hy, hy_1);
06749       Float zdelta = Float(sz_hz - zlo);
06750       mgr->d_stencil_1d_c1hermite(dzphi, zphi, dzpsi, zpsi,
06751           zdelta, hz, hz_1);
06752 
06753       int ilo = int(xlo);
06754       int jlo = int(ylo);
06755       int klo = int(zlo);
06756 
06757 #if 0
06758       // XXX don't need to test twice!
06759 
06760       // test to see if stencil is within edges of grid
06761       int iswithin = ( ia <= ilo && ilo < ib &&
06762                        ja <= jlo && jlo < jb &&
06763                        ka <= klo && klo < kb );
06764 
06765       if ( ! iswithin ) {
06766         char msg[100];
06767         snprintf(msg, sizeof(msg), "Atom %d is outside of the MSM grid.",
06768             coord[n].id);
06769         NAMD_die(msg);
06770       }
06771 #endif
06772 
06773       // determine force on atom from surrounding potential grid points
06774       Float fx=0, fy=0, fz=0, e=0;
06775       for (int k = 0;  k < 2;  k++) {
06776         int koff = ((k+klo) - ka) * nj;
06777         for (int j = 0;  j < 2;  j++) {
06778           int jkoff = (koff + (j+jlo) - ja) * ni;
06779           Float c_yphi_zphi = yphi[j] * zphi[k];
06780           Float c_ypsi_zphi = ypsi[j] * zphi[k];
06781           Float c_yphi_zpsi = yphi[j] * zpsi[k];
06782           Float c_ypsi_zpsi = ypsi[j] * zpsi[k];
06783           Float c_yphi_dzphi = yphi[j] * dzphi[k];
06784           Float c_ypsi_dzphi = ypsi[j] * dzphi[k];
06785           Float c_yphi_dzpsi = yphi[j] * dzpsi[k];
06786           Float c_ypsi_dzpsi = ypsi[j] * dzpsi[k];
06787           Float c_dyphi_zphi = dyphi[j] * zphi[k];
06788           Float c_dypsi_zphi = dypsi[j] * zphi[k];
06789           Float c_dyphi_zpsi = dyphi[j] * zpsi[k];
06790           Float c_dypsi_zpsi = dypsi[j] * zpsi[k];
06791           for (int i = 0;  i < 2;  i++) {
06792             int ijkoff = jkoff + (i+ilo) - ia;
06793             fx += dxphi[i] * (c_yphi_zphi * ehbuffer[ijkoff].velem[D000]
06794                 + c_ypsi_zphi * ehbuffer[ijkoff].velem[D010]
06795                 + c_yphi_zpsi * ehbuffer[ijkoff].velem[D001]
06796                 + c_ypsi_zpsi * ehbuffer[ijkoff].velem[D011])
06797               + dxpsi[i] * (c_yphi_zphi * ehbuffer[ijkoff].velem[D100]
06798                   + c_ypsi_zphi * ehbuffer[ijkoff].velem[D110]
06799                   + c_yphi_zpsi * ehbuffer[ijkoff].velem[D101]
06800                   + c_ypsi_zpsi * ehbuffer[ijkoff].velem[D111]);
06801             fy += xphi[i] * (c_dyphi_zphi * ehbuffer[ijkoff].velem[D000]
06802                 + c_dypsi_zphi * ehbuffer[ijkoff].velem[D010]
06803                 + c_dyphi_zpsi * ehbuffer[ijkoff].velem[D001]
06804                 + c_dypsi_zpsi * ehbuffer[ijkoff].velem[D011])
06805               + xpsi[i] * (c_dyphi_zphi * ehbuffer[ijkoff].velem[D100]
06806                   + c_dypsi_zphi * ehbuffer[ijkoff].velem[D110]
06807                   + c_dyphi_zpsi * ehbuffer[ijkoff].velem[D101]
06808                   + c_dypsi_zpsi * ehbuffer[ijkoff].velem[D111]);
06809             fz += xphi[i] * (c_yphi_dzphi * ehbuffer[ijkoff].velem[D000]
06810                 + c_ypsi_dzphi * ehbuffer[ijkoff].velem[D010]
06811                 + c_yphi_dzpsi * ehbuffer[ijkoff].velem[D001]
06812                 + c_ypsi_dzpsi * ehbuffer[ijkoff].velem[D011])
06813               + xpsi[i] * (c_yphi_dzphi * ehbuffer[ijkoff].velem[D100]
06814                   + c_ypsi_dzphi * ehbuffer[ijkoff].velem[D110]
06815                   + c_yphi_dzpsi * ehbuffer[ijkoff].velem[D101]
06816                   + c_ypsi_dzpsi * ehbuffer[ijkoff].velem[D111]);
06817             e += xphi[i] * (c_yphi_zphi * ehbuffer[ijkoff].velem[D000]
06818                 + c_ypsi_zphi * ehbuffer[ijkoff].velem[D010]
06819                 + c_yphi_zpsi * ehbuffer[ijkoff].velem[D001]
06820                 + c_ypsi_zpsi * ehbuffer[ijkoff].velem[D011])
06821               + xpsi[i] * (c_yphi_zphi * ehbuffer[ijkoff].velem[D100]
06822                   + c_ypsi_zphi * ehbuffer[ijkoff].velem[D110]
06823                   + c_yphi_zpsi * ehbuffer[ijkoff].velem[D101]
06824                   + c_ypsi_zpsi * ehbuffer[ijkoff].velem[D111]);
06825           }
06826         }
06827       }
06828 
06829 #if 0
06830       force[n].x -= q * (mgr->srx_x * fx + mgr->srx_y * fy + mgr->srx_z * fz);
06831       force[n].y -= q * (mgr->sry_x * fx + mgr->sry_y * fy + mgr->sry_z * fz);
06832       force[n].z -= q * (mgr->srz_x * fx + mgr->srz_y * fy + mgr->srz_z * fz);
06833 #endif
06834       force[n].x -= q * fx;
06835       force[n].y -= q * fy;
06836       force[n].z -= q * fz;
06837       energy += q * e;
06838       energy_self += q * q;
06839 
06840     } // end loop over atoms
06841 
06842     energy_self *= mgr->gzero;
06843     energy -= energy_self;
06844     energy *= 0.5;
06845 #endif // !MSM_COMM_ONLY
06846 #ifdef MSM_TIMING
06847     stopTime = CkWallTimer();
06848     mgr->msmTiming[MsmTimer::INTERP] += stopTime - startTime;
06849     mgr->doneTiming();
06850 #endif
06851     mgr->doneCompute();
06852   }
06853 
06854 } // namespace msm
06855 
06856 
06857 #include "ComputeMsmMgr.def.h"

Generated on Tue Sep 19 01:17:11 2017 for NAMD by  doxygen 1.4.7