NAMD
ComputeMsm.C
Go to the documentation of this file.
1 
7 #include "InfoStream.h"
8 #include "Node.h"
9 #include "PDB.h"
10 #include "PatchMap.h"
11 #include "PatchMap.inl"
12 #include "AtomMap.h"
13 #include "ComputeMsm.h"
14 #include "PatchMgr.h"
15 #include "Molecule.h"
16 #include "ReductionMgr.h"
17 #include "ComputeMgr.h"
18 #include "ComputeMgr.decl.h"
19 #include "Debug.h"
20 #include "SimParameters.h"
21 #include "WorkDistrib.h"
22 #include "Priorities.h"
23 #include "varsizemsg.h"
24 //#include "ckmulticast.h"
25 #include <stdio.h>
26 #include "MsmMap.h"
27 
28 // MSM (multilevel summation method)
29 // has O(N) algorithmic complexity
30 
31 // use multicast reduction of grids from sections of MsmGridCutoff
32 #define MSM_REDUCE_GRID
33 //#undef MSM_REDUCE_GRID
34 
35 // use the decomposition of grid cutoff to create more work units
36 #define MSM_GRID_CUTOFF_DECOMP
37 //#undef MSM_GRID_CUTOFF_DECOMP
38 
39 // skip over pairs of blocks that do not actually interact
40 #define MSM_SKIP_TOO_DISTANT_BLOCKS
41 //#undef MSM_SKIP_TOO_DISTANT_BLOCKS
42 
43 // skip over pairs of blocks whose overlap is beyond nonzero gc sphere
44 // this search is more expensive than MSM_SKIP_TOO_DISTANT_BLOCKS
45 // and does not eliminate many block pairs
46 #define MSM_SKIP_BEYOND_SPHERE
47 //#undef MSM_SKIP_BEYOND_SPHERE
48 
49 // node aware mapping of chare arrays
50 #define MSM_NODE_MAPPING
51 //#undef MSM_NODE_MAPPING
52 
53 #define MSM_NODE_MAPPING_STATS
54 #undef MSM_NODE_MAPPING_STATS
55 
56 // top of hierarchy calculates smaller blocks of charge to
57 // unfolded image blocks of potential, up to the desired block size,
58 // then sums the unfolded images of potential back into the
59 // actual potential block, thereby greatly reducing the number of
60 // block pairs that would otherwise be scheduled
61 #define MSM_FOLD_FACTOR
62 //#undef MSM_FOLD_FACTOR
63 
64 // report timings for compute routines
65 // for diagnostic purposes only
66 #define MSM_TIMING
67 #undef MSM_TIMING
68 
69 // report profiling for compute routines
70 // for diagnostic purposes only
71 #define MSM_PROFILING
72 #undef MSM_PROFILING
73 
74 // use fixed size grid message
75 // XXX probably does not work anymore
76 #define MSM_FIXED_SIZE_GRID_MSG
77 #undef MSM_FIXED_SIZE_GRID_MSG
78 
79 // turn off computation
80 // for diagnostic purposes only
81 //#define MSM_COMM_ONLY
82 
83 // print diagnostics for memory alignment (for grid cutoff calculation)
84 // for diagnostic purposes only
85 #define DEBUG_MEMORY_ALIGNMENT
86 #undef DEBUG_MEMORY_ALIGNMENT
87 
88 
89 //
90 // This is the main message that gets passed between compute chares.
91 // It is used to bundle blocks of charge (sendUp and send to MsmGridCutoff)
92 // and blocks of potential (sendAcross, sendDown, and sendPatch).
93 //
94 // Higher priority has a numerically lower value.
95 //
96 // The priorities are set as follows:
97 //
98 // sendUp priority = level+1
99 //
100 // (send to MsmGridCutoff) and sendAcross priority
101 // = nlevels + 2*(nlevels - level) - 1
102 //
103 // sendDown and sendPatch priority
104 // = nlevels + 2*(nlevels - level)
105 //
106 // This puts the priority on going up the hierarchy before going across
107 // and puts the priority on finishing the top levels and down before
108 // finishing the lower levels.
109 //
110 
111 class GridMsg : public CkMcastBaseMsg, public CMessage_GridMsg {
112  public:
113  char *gdata;
114  int idnum;
115  int nlower_i;
116  int nlower_j;
117  int nlower_k;
121  int nbytes;
122  int seqnum; // sequence number is used for message priority
123 
124  // put a grid into an allocated message to be sent
125  template <class T>
126  void put(const msm::Grid<T>& g, int id, int seq) {
127  idnum = id;
128  nlower_i = g.lower().i;
129  nlower_j = g.lower().j;
130  nlower_k = g.lower().k;
131  nextent_i = g.extent().i;
132  nextent_j = g.extent().j;
133  nextent_k = g.extent().k;
134  nbytes = g.data().len()*sizeof(T);
135  seqnum = seq;
136  memcpy(gdata, g.data().buffer(), nbytes);
137  }
138 
139  // get the grid from a received message
140  template <class T>
141  void get(msm::Grid<T>& g, int& id, int& seq) {
142  id = idnum;
145  seq = seqnum;
146  ASSERT(g.data().len()*sizeof(T) == nbytes);
147  memcpy(g.data().buffer(), gdata, nbytes);
148  }
149 };
150 
151 
152 class MsmBlockProxyMsg : public CMessage_MsmBlockProxyMsg {
153  public:
154  enum { maxlevels = 32 };
155  char msmBlockProxyData[maxlevels*sizeof(CProxy_MsmBlock)];
156  int nlevels;
157 
158  // put an array into an allocated message to be sent
160  nlevels = a.len();
161  if (nlevels > maxlevels) {
162  NAMD_die("Exceeded maximum number of MSM levels\n");
163  }
164  memcpy(msmBlockProxyData, a.buffer(), nlevels*sizeof(CProxy_MsmBlock));
165  }
166 
167  // get the array from a received message
169  a.resize(nlevels);
170  memcpy(a.buffer(), msmBlockProxyData, nlevels*sizeof(CProxy_MsmBlock));
171  }
172 };
173 
174 
175 class MsmC1HermiteBlockProxyMsg : public CMessage_MsmC1HermiteBlockProxyMsg {
176  public:
177  enum { maxlevels = 32 };
178  char msmBlockProxyData[maxlevels*sizeof(CProxy_MsmC1HermiteBlock)];
179  int nlevels;
180 
181  // put an array into an allocated message to be sent
183  nlevels = a.len();
184  if (nlevels > maxlevels) {
185  NAMD_die("Exceeded maximum number of MSM levels\n");
186  }
187  memcpy(msmBlockProxyData, a.buffer(),
188  nlevels*sizeof(CProxy_MsmC1HermiteBlock));
189  }
190 
191  // get the array from a received message
193  a.resize(nlevels);
194  memcpy(a.buffer(), msmBlockProxyData,
195  nlevels*sizeof(CProxy_MsmC1HermiteBlock));
196  }
197 };
198 
199 
200 class MsmGridCutoffProxyMsg : public CMessage_MsmGridCutoffProxyMsg {
201  public:
202  char msmGridCutoffProxyData[sizeof(CProxy_MsmGridCutoff)];
203 
204  // put proxy into an allocated message to be sent
205  void put(const CProxy_MsmGridCutoff *p) {
206  memcpy(msmGridCutoffProxyData, p, sizeof(CProxy_MsmGridCutoff));
207  }
208 
209  // get the proxy from a received message
210  void get(CProxy_MsmGridCutoff *p) {
211  memcpy(p, msmGridCutoffProxyData, sizeof(CProxy_MsmGridCutoff));
212  }
213 };
214 
215 
217  public CMessage_MsmC1HermiteGridCutoffProxyMsg
218 {
219  public:
220  char msmGridCutoffProxyData[sizeof(CProxy_MsmC1HermiteGridCutoff)];
221 
222  // put proxy into an allocated message to be sent
223  void put(const CProxy_MsmC1HermiteGridCutoff *p) {
224  memcpy(msmGridCutoffProxyData, p,
225  sizeof(CProxy_MsmC1HermiteGridCutoff));
226  }
227 
228  // get the proxy from a received message
229  void get(CProxy_MsmC1HermiteGridCutoff *p) {
230  memcpy(p, msmGridCutoffProxyData,
231  sizeof(CProxy_MsmC1HermiteGridCutoff));
232  }
233 };
234 
235 
236 class MsmGridCutoffInitMsg : public CMessage_MsmGridCutoffInitMsg {
237  public:
238  msm::BlockIndex qhBlockIndex; // charge block index
239  msm::BlockSend ehBlockSend; // potential block sending address
241  : qhBlockIndex(i), ehBlockSend(b) { }
242 };
243 
244 
246  public CkMcastBaseMsg, public CMessage_MsmGridCutoffSetupMsg
247 {
248  public:
249  char msmBlockElementProxyData[sizeof(CProxyElement_MsmBlock)];
250 
251  // put proxy into an allocated message to be sent
252  void put(
253  const CProxyElement_MsmBlock *q //,
254  ) {
255  memcpy(msmBlockElementProxyData, q, sizeof(CProxyElement_MsmBlock));
256  }
257 
258  // get the proxy from a received message
259  void get(
260  CProxyElement_MsmBlock *q //,
261  ) {
262  memcpy(q, msmBlockElementProxyData, sizeof(CProxyElement_MsmBlock));
263  }
264 };
265 
266 
268  public CkMcastBaseMsg, public CMessage_MsmC1HermiteGridCutoffSetupMsg
269 {
270  public:
271  char msmBlockElementProxyData[sizeof(CProxyElement_MsmC1HermiteBlock)];
272 
273  // put proxy into an allocated message to be sent
274  void put(
275  const CProxyElement_MsmC1HermiteBlock *q //,
276  ) {
277  memcpy(msmBlockElementProxyData, q,
278  sizeof(CProxyElement_MsmC1HermiteBlock));
279  }
280 
281  // get the proxy from a received message
282  void get(
283  CProxyElement_MsmC1HermiteBlock *q //,
284  ) {
285  memcpy(q, msmBlockElementProxyData,
286  sizeof(CProxyElement_MsmC1HermiteBlock));
287  }
288 };
289 
290 
291 // Used only when MSM_TIMING is defined
292 class MsmTimer : public CBase_MsmTimer {
293  public:
295 
297  for (int i = 0; i < MAX; i++) timing[i] = 0;
298  }
299  void done(double tm[], int n) {
300  for (int i = 0; i < MAX; i++) timing[i] = tm[i];
301  print();
302  }
303  void print() {
304  CkPrintf("MSM timings:\n");
305  CkPrintf(" anterpolation %8.6f sec\n", timing[ANTERP]);
306  CkPrintf(" interpolation %8.6f sec\n", timing[INTERP]);
307  CkPrintf(" restriction %8.6f sec\n", timing[RESTRICT]);
308  CkPrintf(" prolongation %8.6f sec\n", timing[PROLONGATE]);
309  CkPrintf(" grid cutoff %8.6f sec\n", timing[GRIDCUTOFF]);
310  CkPrintf(" communication %8.6f sec\n", timing[COMM]);
311  }
312 
313  double timing[MAX];
314 };
315 
316 
317 // Used only when MSM_PROFILING is defined
318 class MsmProfiler : public CBase_MsmProfiler {
319  public:
320  enum { MAX = MSM_MAX_BLOCK_SIZE+1 };
321 
323  for (int i = 0; i < MAX; i++) xloopcnt[i] = 0;
324  }
325  void done(int lc[], int n) {
326  for (int i = 0; i < MAX; i++) xloopcnt[i] = lc[i];
327  print();
328  }
329  void print() {
330  int sum = 0;
331  for (int i = 0; i < MAX; i++) sum += xloopcnt[i];
332  CkPrintf("MSM profiling:\n");
333  CkPrintf(" total executions of inner loop: %d\n", sum);
334  for (int i = 0; i < MAX; i++) {
335  CkPrintf(" executing %d times: %d (%5.2f%%)\n",
336  i, xloopcnt[i], 100*double(xloopcnt[i])/sum);
337  }
338  }
339 
340  int xloopcnt[MAX];
341 };
342 
343 
344 // used with PriorityQueue
345 // when determining work mapped to node or PE
346 struct WorkIndex {
347  float work;
348  int index;
349  WorkIndex() : work(0), index(0) { }
350  WorkIndex(float w, int i) : work(w), index(i) { }
351  int operator<=(const WorkIndex& wn) {
352  return (work <= wn.work);
353  }
354 };
355 
356 
358 //
359 // ComputeMsmMgr
360 // chare group containing MSM parameters and constants;
361 // one chare object per PE
362 //
363 
364 class ComputeMsmMgr : public CBase_ComputeMsmMgr {
365  friend struct msm::PatchData;
366  friend class MsmBlock;
367  //friend class MsmGridCutoff;
368  friend class MsmBlockMap;
369  friend class MsmGridCutoffMap;
370 
371 public:
372  ComputeMsmMgr(); // entry
373  ~ComputeMsmMgr();
374 
375  void initialize(MsmInitMsg *); // entry with message
376  void initialize_create(); // entry no message
377 private:
378  void initialize2(); // split in two
379 public:
380 
381  void recvMsmBlockProxy(MsmBlockProxyMsg *); // entry with message
382  void recvMsmGridCutoffProxy(MsmGridCutoffProxyMsg *); // entry with message
383 
385  // entry with message
387  // entry with message
388 
389  void update(CkQdMsg *); // entry with message
390 
391  void compute(msm::Array<int>& patchIDList);
392  // called by local ComputeMsm object
393 
394  void addPotential(GridMsg *); // entry with message
395  void doneCompute(); // called by each local patch
396 
397 #ifdef MSM_TIMING
398  void initTiming() {
399  for (int i = 0; i < MsmTimer::MAX; i++) msmTiming[i] = 0;
400  cntTiming = 0;
401  }
402  // every local object being timed should call this during initialization
403  void addTiming() {
404  numTiming++;
405  }
406  // object calls before being migrated
407  void subtractTiming() {
408  numTiming--;
409  }
410  void doneTiming() {
411  if (++cntTiming >= numTiming) {
412  CkCallback cb(CkReductionTarget(MsmTimer, done), msmTimer);
413  contribute(MsmTimer::MAX*sizeof(double), msmTiming,
414  CkReduction::sum_double, cb);
415  initTiming();
416  }
417  }
418 #endif
419 
420 #ifdef MSM_PROFILING
421  void initProfiling() {
422  for (int i = 0; i < MsmProfiler::MAX; i++) xLoopCnt[i] = 0;
423  cntProfiling = 0;
424  }
425  // every local object being profiled should call this during initialization
426  void addProfiling() {
427  numProfiling++;
428  }
429  // object calls before being migrated
430  void subtractProfiling() {
431  numProfiling--;
432  }
433  void doneProfiling() {
434  if (++cntProfiling >= numProfiling) {
435  CkCallback cb(CkReductionTarget(MsmProfiler, done), msmProfiler);
436  contribute(MsmProfiler::MAX*sizeof(int), xLoopCnt,
437  CkReduction::sum_int, cb);
438  initProfiling(); // reset accumulators for next visit
439  }
440  }
441 #endif
442 
443  void setCompute(ComputeMsm *c) { msmCompute = c; c->setMgr(this); } // local
444 
446 
447  msm::Map& mapData() { return map; }
448 
449  int numLevels() const { return nlevels; }
450 
451  // sign(n) = -1 if n < 0, 0 if n == 0, or 1 if n > 0
452  static inline int sign(int n) {
453  return (n < 0 ? -1 : (n > 0 ? 1 : 0));
454  }
455 
456 //private:
457  void setup_hgrid_1d(BigReal len, BigReal& hh, int& nn,
458  int& ia, int& ib, int isperiodic);
459  void setup_periodic_blocksize(int& bsize, int n);
460 
461  CProxy_ComputeMsmMgr msmProxy;
463 
466 
467  CProxy_MsmGridCutoff msmGridCutoff;
468  CProxy_MsmC1HermiteGridCutoff msmC1HermiteGridCutoff;
469  int numGridCutoff; // length of msmGridCutoff chare array
470 
472 
473  // find patch by patchID
474  // array is length number of patches, initialized to NULL
475  // allocate PatchData for only those patches on this PE
477 
478  // allocate subgrid used for receiving message data in addPotential()
479  // and sending on to PatchData::addPotential()
482 
483 #ifdef MSM_NODE_MAPPING
486  //msm::Array<int> nodecnt;
487  int blockFlatIndex(int level, int i, int j, int k) {
488  int n = 0;
489  for (int l = 0; l < level; l++) {
490  n += map.blockLevel[l].nn();
491  }
492  return (n + map.blockLevel[level].flatindex(i,j,k));
493  }
495  // XXX ratio of work for MsmBlock to MsmGridCutoff?
496  const float scalingFactor = 3;
497  const int volumeFullBlock = map.bsx[0] * map.bsy[0] * map.bsz[0];
498  msm::Ivec gn;
499  if (approx == C1HERMITE) {
500  gn = map.gc_c1hermite[0].extent();
501  }
502  else {
503  gn = map.gc[0].extent();
504  }
505  const int volumeFullCutoff = (map.bsx[0] + gn.i - 1) *
506  (map.bsy[0] + gn.j - 1) * (map.bsz[0] + gn.k - 1);
507  msm::Ivec n = b.nrange.extent();
508  int volumeBlock = n.i * n.j * n.k;
509  msm::Ivec nc = b.nrangeCutoff.extent();
510  int volumeCutoff = nc.i * nc.j * nc.k;
511  return( scalingFactor * (float(volumeBlock) / volumeFullBlock) *
512  (float(volumeCutoff) / volumeFullCutoff) );
513  }
514  float calcGcutWork(const msm::BlockSend& bs) {
515  const int volumeFullBlock = map.bsx[0] * map.bsy[0] * map.bsz[0];
516  msm::Ivec n = bs.nrange_wrap.extent();;
517  int volumeBlock = n.i * n.j * n.k;
518  return( float(volumeBlock) / volumeFullBlock );
519  }
520 #endif
521 
522  // sum local virial factors
526  enum { VXX=0, VXY, VXZ, VYY, VYZ, VZZ, VMAX };
528 
530  gvsum.reset(0);
531  cntVirialContrib = 0;
532  }
535  }
538  }
541  // reduce all gvsum contributions into virial tensor
542  for (int n = 0; n < VMAX; n++) { virial[n] = 0; }
543  int ia = gvsum.ia();
544  int ib = gvsum.ib();
545  int ja = gvsum.ja();
546  int jb = gvsum.jb();
547  int ka = gvsum.ka();
548  int kb = gvsum.kb();
549  for (int k = ka; k <= kb; k++) {
550  for (int j = ja; j <= jb; j++) {
551  for (int i = ia; i <= ib; i++) {
552  Float cu = Float(i);
553  Float cv = Float(j);
554  Float cw = Float(k);
555  Float c = gvsum(i,j,k);
556  Float vx = cu*hufx + cv*hvfx + cw*hwfx;
557  Float vy = cu*hufy + cv*hvfy + cw*hwfy;
558  Float vz = cu*hufz + cv*hvfz + cw*hwfz;
559  virial[VXX] -= c * vx * vx;
560  virial[VXY] -= c * vx * vy;
561  virial[VXZ] -= c * vx * vz;
562  virial[VYY] -= c * vy * vy;
563  virial[VYZ] -= c * vy * vz;
564  virial[VZZ] -= c * vz * vz;
565  }
566  }
567  }
569  }
570  }
571 
572 #ifdef MSM_TIMING
573  CProxy_MsmTimer msmTimer;
574  double msmTiming[MsmTimer::MAX];
575  int numTiming; // total number of objects being timed
576  int cntTiming; // count the objects as they provide timing results
577  CkCallback *cbTiming;
578 #endif
579 
580 #ifdef MSM_PROFILING
581  CProxy_MsmProfiler msmProfiler;
582  int xLoopCnt[MsmProfiler::MAX];
583  int numProfiling; // total number of objects being profiled
584  int cntProfiling; // count the objects as they provide profiling results
585  CkCallback *cbProfiling;
586 #endif
587 
588  Vector c, u, v, w; // rescaled center and lattice vectors
589  Vector ru, rv, rw; // row vectors to transform to unit space
590  int ispu, ispv, ispw; // is periodic along u, v, w?
591 
592  Lattice lattice; // keep local copy of lattice
593  ScaledPosition smin; // keep min values for non-periodic dimensions
594  ScaledPosition smax; // keep max values for non-periodic dimensions
595  BigReal gridspacing; // preferred grid spacing
596  BigReal padding; // padding for non-periodic boundaries
597  BigReal gridScalingFactor; // scaling for Hermite interpolation
598  BigReal a; // cutoff distance
599  BigReal hxlen, hylen, hzlen; // first level grid spacings along basis vectors
600  BigReal hxlen_1, hylen_1, hzlen_1; // inverses of grid spacings
601  Vector hu, hv, hw; // first level grid spacing vectors
603  int nhx, nhy, nhz; // number of h spacings that cover cell
604  int approx; // ID for approximation
605  int split; // ID for splitting
606  int nlevels; // number of grid levels
607  int dispersion; // calculating dispersion forces?
608  BigReal gzero; // self energy factor from splitting
609 
610  Vector sglower; // lower corner of grid in scaled space
611  // corresponds to index (0,0,0)
612 
613  BigReal shx, shy, shz; // grid spacings in scaled space
615  Vector sx_shx; // row vector to transform interpolated force x
616  Vector sy_shy; // row vector to transform interpolated force y
617  Vector sz_shz; // row vector to transform interpolated force z
618  Float srx_x, srx_y, srx_z; // float version of sx_shx
619  Float sry_x, sry_y, sry_z; // float version of sy_shy
620  Float srz_x, srz_y, srz_z; // float version of sz_shz
621 
622  int s_edge;
623  int omega;
624 
627 
632 
633  enum {
634  // Approximation formulas with up to degree 9 polynomials.
636 
637  // Max stencil length for polynomial approximation.
639 
640  // Max stencil length when skipping zeros
641  // (almost half entries are zero for interpolating polynomials).
643 
644  // Number of scalar approximation formulaes
646  };
647 
648  // Degree of polynomial basis function Phi.
649  static const int PolyDegree[NUM_APPROX];
650 
651  // The stencil array lengths below.
652  static const int Nstencil[NUM_APPROX];
653 
654  // Index offsets from the stencil-centered grid element, to get
655  // to the correct contributing grid element.
657 
658  // The grid transfer stencils for the non-factored restriction and
659  // prolongation procedures.
661 
662  // Calculate the smoothing function and its derivative:
663  // g(R) and (d/dR)g(R), where R=r/a.
664  // Use double precision for calculating the MSM constant weights
665  // and coefficients. The resulting coefficents to be used in
666  // the repeatedly called algorithm are stored in single precision.
667  static void splitting(BigReal& g, BigReal& dg, BigReal r_a, int _split) {
668  BigReal s = r_a * r_a; // s = (r/a)^2, assuming 0 <= s <= 1
669  switch (_split) {
670  case TAYLOR2:
671  g = 1 + (s-1)*(-1./2 + (s-1)*(3./8));
672  dg = (2*r_a)*(-1./2 + (s-1)*(3./4));
673  break;
674  case TAYLOR3:
675  g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16)));
676  dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16)));
677  break;
678  case TAYLOR4:
679  g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
680  + (s-1)*(35./128))));
681  dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16
682  + (s-1)*(35./32))));
683  break;
684  case TAYLOR5:
685  g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
686  + (s-1)*(35./128 + (s-1)*(-63./256)))));
687  dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16
688  + (s-1)*(35./32 + (s-1)*(-315./256)))));
689  break;
690  case TAYLOR6:
691  g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
692  + (s-1)*(35./128 + (s-1)*(-63./256
693  + (s-1)*(231./1024))))));
694  dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16
695  + (s-1)*(35./32 + (s-1)*(-315./256
696  + (s-1)*(693./512))))));
697  break;
698  case TAYLOR7:
699  g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
700  + (s-1)*(35./128 + (s-1)*(-63./256
701  + (s-1)*(231./1024 + (s-1)*(-429./2048)))))));
702  dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16
703  + (s-1)*(35./32 + (s-1)*(-315./256
704  + (s-1)*(693./512 + (s-1)*(-3003./2048)))))));
705  break;
706  case TAYLOR8:
707  g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
708  + (s-1)*(35./128 + (s-1)*(-63./256
709  + (s-1)*(231./1024 + (s-1)*(-429./2048
710  + (s-1)*(6435./32768))))))));
711  dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16
712  + (s-1)*(35./32 + (s-1)*(-315./256
713  + (s-1)*(693./512 + (s-1)*(-3003./2048
714  + (s-1)*(6435./4096))))))));
715  break;
716  case TAYLOR2_DISP:
717  g = 1 + (s-1)*(-3 + (s-1)*(6));
718  dg = (2*r_a)*(-3 + (s-1)*(12));
719  break;
720  case TAYLOR3_DISP:
721  g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10)));
722  dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30)));
723  break;
724  case TAYLOR4_DISP:
725  g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10 + (s-1)*(15))));
726  dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30 + (s-1)*(60))));
727  break;
728  case TAYLOR5_DISP:
729  g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10
730  + (s-1)*(15 + (s-1)*(-21)))));
731  dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30
732  + (s-1)*(60 + (s-1)*(-105)))));
733  break;
734  case TAYLOR6_DISP:
735  g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10
736  + (s-1)*(15 + (s-1)*(-21 + (s-1)*(28))))));
737  dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30
738  + (s-1)*(60 + (s-1)*(-105 + (s-1)*(168))))));
739  break;
740  case TAYLOR7_DISP:
741  g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10
742  + (s-1)*(15 + (s-1)*(-21 + (s-1)*(28
743  + (s-1)*(-36)))))));
744  dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30
745  + (s-1)*(60 + (s-1)*(-105 + (s-1)*(168
746  + (s-1)*(-252)))))));
747  break;
748  case TAYLOR8_DISP:
749  g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10
750  + (s-1)*(15 + (s-1)*(-21 + (s-1)*(28
751  + (s-1)*(-36 + (s-1)*(45))))))));
752  dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30
753  + (s-1)*(60 + (s-1)*(-105 + (s-1)*(168
754  + (s-1)*(-252 + (s-1)*(360))))))));
755  break;
756  default:
757  NAMD_die("Unknown MSM splitting.");
758  } // switch
759  } // splitting()
760 
761  void stencil_1d(Float phi[], Float t) {
762  switch (approx) {
763  case CUBIC:
764  phi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t);
765  t--;
766  phi[1] = (1 - t) * (1 + t - 1.5f * t * t);
767  t--;
768  phi[2] = (1 + t) * (1 - t - 1.5f * t * t);
769  t--;
770  phi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t);
771  break;
772  case QUINTIC:
773  phi[0] = (1.f/24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t);
774  t--;
775  phi[1] = (1-t) * (2-t) * (3-t) * ((1.f/6)
776  + t * (0.375f - (5.f/24)*t));
777  t--;
778  phi[2] = (1-t*t) * (2-t) * (0.5f + t * (0.25f - (5.f/12)*t));
779  t--;
780  phi[3] = (1-t*t) * (2+t) * (0.5f - t * (0.25f + (5.f/12)*t));
781  t--;
782  phi[4] = (1+t) * (2+t) * (3+t) * ((1.f/6)
783  - t * (0.375f + (5.f/24)*t));
784  t--;
785  phi[5] = (1.f/24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t);
786  break;
787  case QUINTIC2:
788  phi[0] = (1.f/24) * (3-t) * (3-t) * (3-t) * (t-2) * (5*t-8);
789  t--;
790  phi[1] = (-1.f/24) * (2-t) * (t-1) * (-48+t*(153+t*(-114+t*25)));
791  t--;
792  phi[2] = (1.f/12) * (1-t) * (12+t*(12+t*(-3+t*(-38+t*25))));
793  t--;
794  phi[3] = (1.f/12) * (1+t) * (12+t*(-12+t*(-3+t*(38+t*25))));
795  t--;
796  phi[4] = (-1.f/24) * (2+t) * (t+1) * (48+t*(153+t*(114+t*25)));
797  t--;
798  phi[5] = (1.f/24) * (3+t) * (3+t) * (3+t) * (t+2) * (5*t+8);
799  break;
800  case SEPTIC:
801  phi[0] = (-1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-4)*(t-5)*(t-6);
802  t--;
803  phi[1] = (1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-5)*(-6+t*(-20+7*t));
804  t--;
805  phi[2] = (-1.f/240)*(t*t-1)*(t-2)*(t-3)*(t-4)*(-10+t*(-12+7*t));
806  t--;
807  phi[3] = (1.f/144)*(t*t-1)*(t*t-4)*(t-3)*(-12+t*(-4+7*t));
808  t--;
809  phi[4] = (-1.f/144)*(t*t-1)*(t*t-4)*(t+3)*(-12+t*(4+7*t));
810  t--;
811  phi[5] = (1.f/240)*(t*t-1)*(t+2)*(t+3)*(t+4)*(-10+t*(12+7*t));
812  t--;
813  phi[6] = (-1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(-6+t*(20+7*t));
814  t--;
815  phi[7] = (1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+4)*(t+5)*(t+6);
816  break;
817  case SEPTIC3:
818  phi[0] = (3632.f/5) + t*((-7456.f/5) + t*((58786.f/45) + t*(-633
819  + t*((26383.f/144) + t*((-22807.f/720) + t*((727.f/240)
820  + t*(-89.f/720)))))));
821  t--;
822  phi[1] = -440 + t*((25949.f/20) + t*((-117131.f/72) + t*((2247.f/2)
823  + t*((-66437.f/144) + t*((81109.f/720) + t*((-727.f/48)
824  + t*(623.f/720)))))));
825  t--;
826  phi[2] = (138.f/5) + t*((-8617.f/60) + t*((12873.f/40) + t*((-791.f/2)
827  + t*((4557.f/16) + t*((-9583.f/80) + t*((2181.f/80)
828  + t*(-623.f/240)))))));
829  t--;
830  phi[3] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((2569.f/144)
831  + t*((-727.f/48) + t*(623.f/144)))));
832  t--;
833  phi[4] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((-2569.f/144)
834  + t*((-727.f/48) + t*(-623.f/144)))));
835  t--;
836  phi[5] = (138.f/5) + t*((8617.f/60) + t*((12873.f/40) + t*((791.f/2)
837  + t*((4557.f/16) + t*((9583.f/80) + t*((2181.f/80)
838  + t*(623.f/240)))))));
839  t--;
840  phi[6] = -440 + t*((-25949.f/20) + t*((-117131.f/72) + t*((-2247.f/2)
841  + t*((-66437.f/144) + t*((-81109.f/720) + t*((-727.f/48)
842  + t*(-623.f/720)))))));
843  t--;
844  phi[7] = (3632.f/5) + t*((7456.f/5) + t*((58786.f/45) + t*(633
845  + t*((26383.f/144) + t*((22807.f/720) + t*((727.f/240)
846  + t*(89.f/720)))))));
847  break;
848  case NONIC:
849  phi[0] = (-1.f/40320)*(t-8)*(t-7)*(t-6)*(t-5)*(t-5)*(t-4)*(t-3)*
850  (t-2)*(t-1);
851  t--;
852  phi[1] = (1.f/40320)*(t-7)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*
853  (-8+t*(-35+9*t));
854  t--;
855  phi[2] = (-1.f/10080)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*
856  (-14+t*(-25+9*t));
857  t--;
858  phi[3] = (1.f/1440)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*
859  (-6+t*(-5+3*t));
860  t--;
861  phi[4] = (-1.f/2880)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*
862  (-20+t*(-5+9*t));
863  t--;
864  phi[5] = (1.f/2880)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*
865  (-20+t*(5+9*t));
866  t--;
867  phi[6] = (-1.f/1440)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*
868  (-6+t*(5+3*t));
869  t--;
870  phi[7] = (1.f/10080)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*
871  (-14+t*(25+9*t));
872  t--;
873  phi[8] = (-1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*(t+7)*
874  (-8+t*(35+9*t));
875  t--;
876  phi[9] = (1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+5)*(t+6)*
877  (t+7)*(t+8);
878  break;
879  case NONIC4:
880  { // begin grouping to define local variables
881  double Tphi[10];
882  double T=t;
883  Tphi[0] = 439375./7+T*(-64188125./504+T*(231125375./2016
884  +T*(-17306975./288+T*(7761805./384+T*(-2895587./640
885  +T*(129391./192+T*(-259715./4032+T*(28909./8064
886  +T*(-3569./40320)))))))));
887  T--;
888  Tphi[1] = -56375+T*(8314091./56+T*(-49901303./288+T*(3763529./32
889  +T*(-19648027./384+T*(9469163./640+T*(-545977./192
890  +T*(156927./448+T*(-28909./1152
891  +T*(3569./4480)))))))));
892  T--;
893  Tphi[2] = 68776./7+T*(-1038011./28+T*(31157515./504+T*(-956669./16
894  +T*(3548009./96+T*(-2422263./160+T*(197255./48
895  +T*(-19959./28+T*(144545./2016
896  +T*(-3569./1120)))))))));
897  T--;
898  Tphi[3] = -154+T*(12757./12+T*(-230123./72+T*(264481./48
899  +T*(-576499./96+T*(686147./160+T*(-96277./48
900  +T*(14221./24+T*(-28909./288+T*(3569./480)))))))));
901  T--;
902  Tphi[4] = 1+T*T*(-205./144+T*T*(91./192+T*(-6181./320
903  +T*(6337./96+T*(-2745./32+T*(28909./576
904  +T*(-3569./320)))))));
905  T--;
906  Tphi[5] = 1+T*T*(-205./144+T*T*(91./192+T*(6181./320
907  +T*(6337./96+T*(2745./32+T*(28909./576
908  +T*(3569./320)))))));
909  T--;
910  Tphi[6] = -154+T*(-12757./12+T*(-230123./72+T*(-264481./48
911  +T*(-576499./96+T*(-686147./160+T*(-96277./48
912  +T*(-14221./24+T*(-28909./288+T*(-3569./480)))))))));
913  T--;
914  Tphi[7] = 68776./7+T*(1038011./28+T*(31157515./504+T*(956669./16
915  +T*(3548009./96+T*(2422263./160+T*(197255./48
916  +T*(19959./28+T*(144545./2016+T*(3569./1120)))))))));
917  T--;
918  Tphi[8] = -56375+T*(-8314091./56+T*(-49901303./288+T*(-3763529./32
919  +T*(-19648027./384+T*(-9469163./640+T*(-545977./192
920  +T*(-156927./448+T*(-28909./1152
921  +T*(-3569./4480)))))))));
922  T--;
923  Tphi[9] = 439375./7+T*(64188125./504+T*(231125375./2016
924  +T*(17306975./288+T*(7761805./384+T*(2895587./640
925  +T*(129391./192+T*(259715./4032+T*(28909./8064
926  +T*(3569./40320)))))))));
927  for (int i=0; i < 10; i++) {
928  phi[i] = Float(Tphi[i]);
929  }
930  } // end grouping to define local variables
931  break;
932  default:
933  NAMD_die("Unknown MSM approximation.");
934  } // switch
935  } // stencil_1d()
936 
937  void d_stencil_1d(Float dphi[], Float phi[], Float t, Float h_1) {
938  switch (approx) {
939  case CUBIC:
940  phi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t);
941  dphi[0] = (1.5f * t - 2) * (2 - t) * h_1;
942  t--;
943  phi[1] = (1 - t) * (1 + t - 1.5f * t * t);
944  dphi[1] = (-5 + 4.5f * t) * t * h_1;
945  t--;
946  phi[2] = (1 + t) * (1 - t - 1.5f * t * t);
947  dphi[2] = (-5 - 4.5f * t) * t * h_1;
948  t--;
949  phi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t);
950  dphi[3] = (1.5f * t + 2) * (2 + t) * h_1;
951  break;
952  case QUINTIC:
953  phi[0] = (1.f/24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t);
954  dphi[0] = ((-1.f/24) * ((3-t) * (3-t) * (14 + t * (-14 + 3*t))
955  + 2 * (1-t) * (2-t) * (3-t) * (4-t))) * h_1;
956  t--;
957  phi[1] = (1-t) * (2-t) * (3-t) * ((1.f/6)
958  + t * (0.375f - (5.f/24)*t));
959  dphi[1] = (-((1.f/6) + t * (0.375f - (5.f/24)*t)) *
960  (11 + t * (-12 + 3*t)) + (1-t) * (2-t) * (3-t) *
961  (0.375f - (5.f/12)*t)) * h_1;
962  t--;
963  phi[2] = (1-t*t) * (2-t) * (0.5f + t * (0.25f - (5.f/12)*t));
964  dphi[2] = (-(0.5f + t * (0.25f - (5.f/12)*t)) * (1 + t * (4 - 3*t))
965  + (1-t*t) * (2-t) * (0.25f - (5.f/6)*t)) * h_1;
966  t--;
967  phi[3] = (1-t*t) * (2+t) * (0.5f - t * (0.25f + (5.f/12)*t));
968  dphi[3] = ((0.5f + t * (-0.25f - (5.f/12)*t)) * (1 + t * (-4 - 3*t))
969  - (1-t*t) * (2+t) * (0.25f + (5.f/6)*t)) * h_1;
970  t--;
971  phi[4] = (1+t) * (2+t) * (3+t) * ((1.f/6)
972  - t * (0.375f + (5.f/24)*t));
973  dphi[4] = (((1.f/6) + t * (-0.375f - (5.f/24)*t)) *
974  (11 + t * (12 + 3*t)) - (1+t) * (2+t) * (3+t) *
975  (0.375f + (5.f/12)*t)) * h_1;
976  t--;
977  phi[5] = (1.f/24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t);
978  dphi[5] = ((1.f/24) * ((3+t) * (3+t) * (14 + t * (14 + 3*t))
979  + 2 * (1+t) * (2+t) * (3+t) * (4+t))) * h_1;
980  break;
981  case QUINTIC2:
982  phi[0] = (1.f/24) * (3-t) * (3-t) * (3-t) * (t-2) * (5*t-8);
983  dphi[0] = ((1.f/24) * (3-t) * (3-t) * ((3-t)*(5*t-8)
984  - 3*(t-2)*(5*t-8) + 5*(t-2)*(3-t))) * h_1;
985  t--;
986  phi[1] = (-1.f/24) * (2-t) * (t-1) * (-48+t*(153+t*(-114+t*25)));
987  dphi[1] = ((-1.f/24) * ((2-t)*(-48+t*(153+t*(-114+t*25)))
988  - (t-1)* (-48+t*(153+t*(-114+t*25)))
989  + (2-t)*(t-1)*(153+t*(-228+t*75)))) * h_1;
990  t--;
991  phi[2] = (1.f/12) * (1-t) * (12+t*(12+t*(-3+t*(-38+t*25))));
992  dphi[2] = ((1.f/12) * (-(12+t*(12+t*(-3+t*(-38+t*25))))
993  + (1-t)*(12+t*(-6+t*(-114+t*100))))) * h_1;
994  t--;
995  phi[3] = (1.f/12) * (1+t) * (12+t*(-12+t*(-3+t*(38+t*25))));
996  dphi[3] = ((1.f/12) * ((12+t*(-12+t*(-3+t*(38+t*25))))
997  + (1+t)*(-12+t*(-6+t*(114+t*100))))) * h_1;
998  t--;
999  phi[4] = (-1.f/24) * (2+t) * (t+1) * (48+t*(153+t*(114+t*25)));
1000  dphi[4] = ((-1.f/24) * ((2+t)*(48+t*(153+t*(114+t*25)))
1001  + (t+1)* (48+t*(153+t*(114+t*25)))
1002  + (2+t)*(t+1)*(153+t*(228+t*75)))) * h_1;
1003  t--;
1004  phi[5] = (1.f/24) * (3+t) * (3+t) * (3+t) * (t+2) * (5*t+8);
1005  dphi[5] = ((1.f/24) * (3+t) * (3+t) * ((3+t)*(5*t+8)
1006  + 3*(t+2)*(5*t+8) + 5*(t+2)*(3+t))) * h_1;
1007  break;
1008  case SEPTIC:
1009  phi[0] = (-1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-4)*(t-5)*(t-6);
1010  dphi[0] = (-1.f/720)*(t-4)*(-1944+t*(3644+t*(-2512+t*(807
1011  +t*(-122+t*7))))) * h_1;
1012  t--;
1013  phi[1] = (1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-5)*(-6+t*(-20+7*t));
1014  dphi[1] = (1.f/720)*(756+t*(-9940+t*(17724+t*(-12740+t*(4445
1015  +t*(-750+t*49)))))) * h_1;
1016  t--;
1017  phi[2] = (-1.f/240)*(t*t-1)*(t-2)*(t-3)*(t-4)*(-10+t*(-12+7*t));
1018  dphi[2] = (-1.f/240)*(-28+t*(1260+t*(-756+t*(-1260+t*(1365
1019  +t*(-450+t*49)))))) * h_1;
1020  t--;
1021  phi[3] = (1.f/144)*(t*t-1)*(t*t-4)*(t-3)*(-12+t*(-4+7*t));
1022  dphi[3] = (1.f/144)*t*(-560+t*(84+t*(644+t*(-175
1023  +t*(-150+t*49))))) * h_1;
1024  t--;
1025  phi[4] = (-1.f/144)*(t*t-1)*(t*t-4)*(t+3)*(-12+t*(4+7*t));
1026  dphi[4] = (-1.f/144)*t*(560+t*(84+t*(-644+t*(-175
1027  +t*(150+t*49))))) * h_1;
1028  t--;
1029  phi[5] = (1.f/240)*(t*t-1)*(t+2)*(t+3)*(t+4)*(-10+t*(12+7*t));
1030  dphi[5] = (1.f/240)*(-28+t*(-1260+t*(-756+t*(1260+t*(1365
1031  +t*(450+t*49)))))) * h_1;
1032  t--;
1033  phi[6] = (-1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(-6+t*(20+7*t));
1034  dphi[6] = (-1.f/720)*(756+t*(9940+t*(17724+t*(12740+t*(4445
1035  +t*(750+t*49)))))) * h_1;
1036  t--;
1037  phi[7] = (1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+4)*(t+5)*(t+6);
1038  dphi[7] = (1.f/720)*(t+4)*(1944+t*(3644+t*(2512+t*(807
1039  +t*(122+t*7))))) * h_1;
1040  break;
1041  case SEPTIC3:
1042  phi[0] = (3632.f/5) + t*((-7456.f/5) + t*((58786.f/45) + t*(-633
1043  + t*((26383.f/144) + t*((-22807.f/720) + t*((727.f/240)
1044  + t*(-89.f/720)))))));
1045  dphi[0] = ((-7456.f/5) + t*((117572.f/45) + t*(-1899
1046  + t*((26383.f/36) + t*((-22807.f/144) + t*((727.f/40)
1047  + t*(-623.f/720))))))) * h_1;
1048  t--;
1049  phi[1] = -440 + t*((25949.f/20) + t*((-117131.f/72) + t*((2247.f/2)
1050  + t*((-66437.f/144) + t*((81109.f/720) + t*((-727.f/48)
1051  + t*(623.f/720)))))));
1052  dphi[1] = ((25949.f/20) + t*((-117131.f/36) + t*((6741.f/2)
1053  + t*((-66437.f/36) + t*((81109.f/144) + t*((-727.f/8)
1054  + t*(4361.f/720))))))) * h_1;
1055  t--;
1056  phi[2] = (138.f/5) + t*((-8617.f/60) + t*((12873.f/40) + t*((-791.f/2)
1057  + t*((4557.f/16) + t*((-9583.f/80) + t*((2181.f/80)
1058  + t*(-623.f/240)))))));
1059  dphi[2] = ((-8617.f/60) + t*((12873.f/20) + t*((-2373.f/2)
1060  + t*((4557.f/4) + t*((-9583.f/16) + t*((6543.f/40)
1061  + t*(-4361.f/240))))))) * h_1;
1062  t--;
1063  phi[3] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((2569.f/144)
1064  + t*((-727.f/48) + t*(623.f/144)))));
1065  dphi[3] = (t*((-49.f/18) + t*t*((-959.f/36) + t*((12845.f/144)
1066  + t*((-727.f/8) + t*(4361.f/144)))))) * h_1;
1067  t--;
1068  phi[4] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((-2569.f/144)
1069  + t*((-727.f/48) + t*(-623.f/144)))));
1070  dphi[4] = (t*((-49.f/18) + t*t*((-959.f/36) + t*((-12845.f/144)
1071  + t*((-727.f/8) + t*(-4361.f/144)))))) * h_1;
1072  t--;
1073  phi[5] = (138.f/5) + t*((8617.f/60) + t*((12873.f/40) + t*((791.f/2)
1074  + t*((4557.f/16) + t*((9583.f/80) + t*((2181.f/80)
1075  + t*(623.f/240)))))));
1076  dphi[5] = ((8617.f/60) + t*((12873.f/20) + t*((2373.f/2)
1077  + t*((4557.f/4) + t*((9583.f/16) + t*((6543.f/40)
1078  + t*(4361.f/240))))))) * h_1;
1079  t--;
1080  phi[6] = -440 + t*((-25949.f/20) + t*((-117131.f/72) + t*((-2247.f/2)
1081  + t*((-66437.f/144) + t*((-81109.f/720) + t*((-727.f/48)
1082  + t*(-623.f/720)))))));
1083  dphi[6] = ((-25949.f/20) + t*((-117131.f/36) + t*((-6741.f/2)
1084  + t*((-66437.f/36) + t*((-81109.f/144) + t*((-727.f/8)
1085  + t*(-4361.f/720))))))) * h_1;
1086  t--;
1087  phi[7] = (3632.f/5) + t*((7456.f/5) + t*((58786.f/45) + t*(633
1088  + t*((26383.f/144) + t*((22807.f/720) + t*((727.f/240)
1089  + t*(89.f/720)))))));
1090  dphi[7] = ((7456.f/5) + t*((117572.f/45) + t*(1899
1091  + t*((26383.f/36) + t*((22807.f/144) + t*((727.f/40)
1092  + t*(623.f/720))))))) * h_1;
1093  break;
1094  case NONIC:
1095  phi[0] = (-1.f/40320)*(t-8)*(t-7)*(t-6)*(t-5)*(t-5)*(t-4)*(t-3)*
1096  (t-2)*(t-1);
1097  dphi[0] = (-1.f/40320)*(t-5)*(-117648+t*(256552+t*(-221416
1098  +t*(99340+t*(-25261+t*(3667+t*(-283+t*9)))))))*h_1;
1099  t--;
1100  phi[1] = (1.f/40320)*(t-7)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*
1101  (-8+t*(-35+9*t));
1102  dphi[1] = (1.f/40320)*(71856+t*(-795368+t*(1569240+t*(-1357692
1103  +t*(634725+t*(-172116+t*(27090+t*(-2296+t*81))))))))*h_1;
1104  t--;
1105  phi[2] = (-1.f/10080)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*
1106  (-14+t*(-25+9*t));
1107  dphi[2] = (1.f/10080)*(3384+t*(-69080+t*(55026
1108  +t*(62580+t*(-99225+t*(51660+t*(-13104+t*(1640
1109  +t*(-81)))))))))*h_1;
1110  t--;
1111  phi[3] = (1.f/1440)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*
1112  (-6+t*(-5+3*t));
1113  dphi[3] = (1.f/1440)*(72+t*(-6344+t*(2070
1114  +t*(7644+t*(-4725+t*(-828+t*(1260+t*(-328+t*27))))))))*h_1;
1115  t--;
1116  phi[4] = (-1.f/2880)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*
1117  (-20+t*(-5+9*t));
1118  dphi[4] = (-1.f/2880)*t*(10792+t*(-972+t*(-12516
1119  +t*(2205+t*(3924+t*(-882+t*(-328+t*81)))))))*h_1;
1120  t--;
1121  phi[5] = (1.f/2880)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*
1122  (-20+t*(5+9*t));
1123  dphi[5] = (1.f/2880)*t*(-10792+t*(-972+t*(12516
1124  +t*(2205+t*(-3924+t*(-882+t*(328+t*81)))))))*h_1;
1125  t--;
1126  phi[6] = (-1.f/1440)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*
1127  (-6+t*(5+3*t));
1128  dphi[6] = (1.f/1440)*(-72+t*(-6344+t*(-2070
1129  +t*(7644+t*(4725+t*(-828+t*(-1260+t*(-328+t*(-27)))))))))*h_1;
1130  t--;
1131  phi[7] = (1.f/10080)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*
1132  (-14+t*(25+9*t));
1133  dphi[7] = (1.f/10080)*(-3384+t*(-69080+t*(-55026
1134  +t*(62580+t*(99225+t*(51660+t*(13104+t*(1640+t*81))))))))*h_1;
1135  t--;
1136  phi[8] = (-1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*(t+7)*
1137  (-8+t*(35+9*t));
1138  dphi[8] = (-1.f/40320)*(71856+t*(795368+t*(1569240
1139  +t*(1357692+t*(634725+t*(172116+t*(27090+t*(2296
1140  +t*81))))))))*h_1;
1141  t--;
1142  phi[9] = (1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+5)*(t+6)*
1143  (t+7)*(t+8);
1144  dphi[9] = (1.f/40320)*(t+5)*(117648+t*(256552+t*(221416
1145  +t*(99340+t*(25261+t*(3667+t*(283+t*9)))))))*h_1;
1146  break;
1147  case NONIC4:
1148  { // begin grouping to define local variables
1149  double Tphi[10], Tdphi[10];
1150  double T=t;
1151  Tphi[0] = 439375./7+T*(-64188125./504+T*(231125375./2016
1152  +T*(-17306975./288+T*(7761805./384+T*(-2895587./640
1153  +T*(129391./192+T*(-259715./4032+T*(28909./8064
1154  +T*(-3569./40320)))))))));
1155  Tdphi[0] = (-64188125./504+T*(231125375./1008
1156  +T*(-17306975./96+T*(7761805./96+T*(-2895587./128
1157  +T*(129391./32+T*(-259715./576+T*(28909./1008
1158  +T*(-3569./4480))))))))) * h_1;
1159  T--;
1160  Tphi[1] = -56375+T*(8314091./56+T*(-49901303./288+T*(3763529./32
1161  +T*(-19648027./384+T*(9469163./640+T*(-545977./192
1162  +T*(156927./448+T*(-28909./1152
1163  +T*(3569./4480)))))))));
1164  Tdphi[1] = (8314091./56+T*(-49901303./144+T*(11290587./32
1165  +T*(-19648027./96+T*(9469163./128+T*(-545977./32
1166  +T*(156927./64+T*(-28909./144
1167  +T*(32121./4480))))))))) * h_1;
1168  T--;
1169  Tphi[2] = 68776./7+T*(-1038011./28+T*(31157515./504+T*(-956669./16
1170  +T*(3548009./96+T*(-2422263./160+T*(197255./48
1171  +T*(-19959./28+T*(144545./2016
1172  +T*(-3569./1120)))))))));
1173  Tdphi[2] = (-1038011./28+T*(31157515./252+T*(-2870007./16
1174  +T*(3548009./24+T*(-2422263./32+T*(197255./8
1175  +T*(-19959./4+T*(144545./252
1176  +T*(-32121./1120))))))))) * h_1;
1177  T--;
1178  Tphi[3] = -154+T*(12757./12+T*(-230123./72+T*(264481./48
1179  +T*(-576499./96+T*(686147./160+T*(-96277./48
1180  +T*(14221./24+T*(-28909./288+T*(3569./480)))))))));
1181  Tdphi[3] = (12757./12+T*(-230123./36+T*(264481./16
1182  +T*(-576499./24+T*(686147./32+T*(-96277./8
1183  +T*(99547./24+T*(-28909./36
1184  +T*(10707./160))))))))) * h_1;
1185  T--;
1186  Tphi[4] = 1+T*T*(-205./144+T*T*(91./192+T*(-6181./320
1187  +T*(6337./96+T*(-2745./32+T*(28909./576
1188  +T*(-3569./320)))))));
1189  Tdphi[4] = T*(-205./72+T*T*(91./48+T*(-6181./64
1190  +T*(6337./16+T*(-19215./32+T*(28909./72
1191  +T*(-32121./320))))))) * h_1;
1192  T--;
1193  Tphi[5] = 1+T*T*(-205./144+T*T*(91./192+T*(6181./320
1194  +T*(6337./96+T*(2745./32+T*(28909./576
1195  +T*(3569./320)))))));
1196  Tdphi[5] = T*(-205./72+T*T*(91./48+T*(6181./64
1197  +T*(6337./16+T*(19215./32+T*(28909./72
1198  +T*(32121./320))))))) * h_1;
1199  T--;
1200  Tphi[6] = -154+T*(-12757./12+T*(-230123./72+T*(-264481./48
1201  +T*(-576499./96+T*(-686147./160+T*(-96277./48
1202  +T*(-14221./24+T*(-28909./288+T*(-3569./480)))))))));
1203  Tdphi[6] = (-12757./12+T*(-230123./36+T*(-264481./16
1204  +T*(-576499./24+T*(-686147./32+T*(-96277./8
1205  +T*(-99547./24+T*(-28909./36
1206  +T*(-10707./160))))))))) * h_1;
1207  T--;
1208  Tphi[7] = 68776./7+T*(1038011./28+T*(31157515./504+T*(956669./16
1209  +T*(3548009./96+T*(2422263./160+T*(197255./48
1210  +T*(19959./28+T*(144545./2016+T*(3569./1120)))))))));
1211  Tdphi[7] = (1038011./28+T*(31157515./252+T*(2870007./16
1212  +T*(3548009./24+T*(2422263./32+T*(197255./8
1213  +T*(19959./4+T*(144545./252
1214  +T*(32121./1120))))))))) * h_1;
1215  T--;
1216  Tphi[8] = -56375+T*(-8314091./56+T*(-49901303./288+T*(-3763529./32
1217  +T*(-19648027./384+T*(-9469163./640+T*(-545977./192
1218  +T*(-156927./448+T*(-28909./1152
1219  +T*(-3569./4480)))))))));
1220  Tdphi[8] = (-8314091./56+T*(-49901303./144+T*(-11290587./32
1221  +T*(-19648027./96+T*(-9469163./128+T*(-545977./32
1222  +T*(-156927./64+T*(-28909./144
1223  +T*(-32121./4480))))))))) * h_1;
1224  T--;
1225  Tphi[9] = 439375./7+T*(64188125./504+T*(231125375./2016
1226  +T*(17306975./288+T*(7761805./384+T*(2895587./640
1227  +T*(129391./192+T*(259715./4032+T*(28909./8064
1228  +T*(3569./40320)))))))));
1229  Tdphi[9] = (64188125./504+T*(231125375./1008
1230  +T*(17306975./96+T*(7761805./96+T*(2895587./128
1231  +T*(129391./32+T*(259715./576+T*(28909./1008
1232  +T*(3569./4480))))))))) * h_1;
1233  for (int i=0; i < 10; i++) {
1234  phi[i] = Float(Tphi[i]);
1235  dphi[i] = Float(Tdphi[i]);
1236  }
1237  } // end grouping to define local variables
1238  break;
1239  default:
1240  NAMD_die("Unknown MSM approximation.");
1241  } // switch
1242  } // d_stencil_1d()
1243 
1244  void stencil_1d_c1hermite(Float phi[], Float psi[], Float t, Float h) {
1245  phi[0] = (1 - t) * (1 - t) * (1 + 2*t);
1246  psi[0] = h * t * (1 - t) * (1 - t);
1247  t--;
1248  phi[1] = (1 + t) * (1 + t) * (1 - 2*t);
1249  psi[1] = h * t * (1 + t) * (1 + t);
1250  }
1251 
1253  Float dphi[], Float phi[], Float dpsi[], Float psi[],
1254  Float t, Float h, Float h_1) {
1255  phi[0] = (1 - t) * (1 - t) * (1 + 2*t);
1256  dphi[0] = -6 * t * (1 - t) * h_1;
1257  psi[0] = h * t * (1 - t) * (1 - t);
1258  dpsi[0] = (1 - t) * (1 - 3*t);
1259  t--;
1260  phi[1] = (1 + t) * (1 + t) * (1 - 2*t);
1261  dphi[1] = -6 * t * (1 + t) * h_1;
1262  psi[1] = h * t * (1 + t) * (1 + t);
1263  dpsi[1] = (1 + t) * (1 + 3*t);
1264  }
1265 
1266  static void ndsplitting(BigReal pg[], BigReal s, int n, int _split) {
1267  int k = 0;
1268  if (k == n) return;
1269  if (s <= 1) {
1270  // compute derivatives of smoothed part
1271  switch (_split) {
1272  case TAYLOR2:
1273  pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8));
1274  if (k == n) break;
1275  pg[k++] = -1./2 + (s-1)*(3./4);
1276  if (k == n) break;
1277  pg[k++] = 3./4;
1278  break;
1279  case TAYLOR3:
1280  pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16)));
1281  if (k == n) break;
1282  pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16));
1283  if (k == n) break;
1284  pg[k++] = 3./4 + (s-1)*(-15./8);
1285  if (k == n) break;
1286  pg[k++] = -15./8;
1287  break;
1288  case TAYLOR4:
1289  pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
1290  + (s-1)*(35./128))));
1291  if (k == n) break;
1292  pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16 + (s-1)*(35./32)));
1293  if (k == n) break;
1294  pg[k++] = 3./4 + (s-1)*(-15./8 + (s-1)*(105./32));
1295  if (k == n) break;
1296  pg[k++] = -15./8 + (s-1)*(105./16);
1297  if (k == n) break;
1298  pg[k++] = 105./16;
1299  break;
1300  case TAYLOR5:
1301  pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
1302  + (s-1)*(35./128 + (s-1)*(-63./256)))));
1303  if (k == n) break;
1304  pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16 + (s-1)*(35./32
1305  + (s-1)*(-315./256))));
1306  if (k == n) break;
1307  pg[k++] = 3./4 + (s-1)*(-15./8 + (s-1)*(105./32 + (s-1)*(-315./64)));
1308  if (k == n) break;
1309  pg[k++] = -15./8 + (s-1)*(105./16 + (s-1)*(-945./64));
1310  if (k == n) break;
1311  pg[k++] = 105./16 + (s-1)*(-945./32);
1312  if (k == n) break;
1313  pg[k++] = -945./32;
1314  break;
1315  case TAYLOR6:
1316  pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
1317  + (s-1)*(35./128 + (s-1)*(-63./256 + (s-1)*(231./1024))))));
1318  if (k == n) break;
1319  pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16 + (s-1)*(35./32
1320  + (s-1)*(-315./256 + (s-1)*(693./512)))));
1321  if (k == n) break;
1322  pg[k++] = 3./4 + (s-1)*(-15./8 + (s-1)*(105./32 + (s-1)*(-315./64
1323  + (s-1)*(3465./512))));
1324  if (k == n) break;
1325  pg[k++] = -15./8 + (s-1)*(105./16 + (s-1)*(-945./64
1326  + (s-1)*(3465./128)));
1327  if (k == n) break;
1328  pg[k++] = 105./16 + (s-1)*(-945./32 + (s-1)*(10395./128));
1329  if (k == n) break;
1330  pg[k++] = -945./32 + (s-1)*(10395./64);
1331  if (k == n) break;
1332  pg[k++] = 10395./64;
1333  break;
1334  case TAYLOR7:
1335  pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
1336  + (s-1)*(35./128 + (s-1)*(-63./256
1337  + (s-1)*(231./1024 + (s-1)*(-429./2048)))))));
1338  if (k == n) break;
1339  pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16 + (s-1)*(35./32
1340  + (s-1)*(-315./256 + (s-1)*(693./512
1341  + (s-1)*(-3003./2048))))));
1342  if (k == n) break;
1343  pg[k++] = 3./4 + (s-1)*(-15./8 + (s-1)*(105./32 + (s-1)*(-315./64
1344  + (s-1)*(3465./512 + (s-1)*(-9009./1024)))));
1345  if (k == n) break;
1346  pg[k++] = -15./8 + (s-1)*(105./16 + (s-1)*(-945./64 + (s-1)*(3465./128
1347  + (s-1)*(-45045./1024))));
1348  if (k == n) break;
1349  pg[k++] = 105./16 + (s-1)*(-945./32 + (s-1)*(10395./128
1350  + (s-1)*(-45045./256)));
1351  if (k == n) break;
1352  pg[k++] = -945./32 + (s-1)*(10395./64 + (s-1)*(-135135./256));
1353  if (k == n) break;
1354  pg[k++] = 10395./64 + (s-1)*(-135135./128);
1355  if (k == n) break;
1356  pg[k++] = -135135./128;
1357  break;
1358  case TAYLOR8:
1359  pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
1360  + (s-1)*(35./128 + (s-1)*(-63./256
1361  + (s-1)*(231./1024 + (s-1)*(-429./2048
1362  + (s-1)*(6435./32768))))))));
1363  if (k == n) break;
1364  pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16 + (s-1)*(35./32
1365  + (s-1)*(-315./256 + (s-1)*(693./512
1366  + (s-1)*(-3003./2048 + (s-1)*(6435./4096)))))));
1367  if (k == n) break;
1368  pg[k++] = 3./4 + (s-1)*(-15./8 + (s-1)*(105./32 + (s-1)*(-315./64
1369  + (s-1)*(3465./512 + (s-1)*(-9009./1024
1370  + (s-1)*(45045./4096))))));
1371  if (k == n) break;
1372  pg[k++] = -15./8 + (s-1)*(105./16 + (s-1)*(-945./64 + (s-1)*(3465./128
1373  + (s-1)*(-45045./1024 + (s-1)*(135135./2048)))));
1374  if (k == n) break;
1375  pg[k++] = 105./16 + (s-1)*(-945./32 + (s-1)*(10395./128
1376  + (s-1)*(-45045./256 + (s-1)*(675675./2048))));
1377  if (k == n) break;
1378  pg[k++] = -945./32 + (s-1)*(10395./64 + (s-1)*(-135135./256
1379  + (s-1)*(675675./512)));
1380  if (k == n) break;
1381  pg[k++] = 10395./64 + (s-1)*(-135135./128 + (s-1)*(2027025./512));
1382  if (k == n) break;
1383  pg[k++] = -135135./128 + (s-1)*(2027025./256);
1384  if (k == n) break;
1385  pg[k++] = 2027025./256;
1386  break;
1387  default:
1388  NAMD_die("Unknown MSM splitting.");
1389  }
1390  } // if (s <= 1)
1391  else { // (s > 1)
1392  // compute derivatives of s^(-1/2)
1393  const BigReal s_1 = 1./s;
1394  BigReal s_p = sqrt(s_1);
1395  BigReal p = -0.5;
1396  BigReal _c = 1;
1397  pg[k++] = _c * s_p;
1398  while (k < n) {
1399  s_p *= s_1;
1400  _c *= p;
1401  p -= 1;
1402  pg[k++] = _c * s_p;
1403  }
1404  } // else (s > 1)
1405  // higher derivatives are zero
1406  while (k < n) pg[k++] = 0;
1407  } // ndsplitting()
1408 
1409 
1410  static void gc_c1hermite_elem_accum(C1Matrix& matrix, BigReal _c,
1411  Vector rv, BigReal _a, int _split) {
1412  const BigReal a_1 = 1./_a;
1413  const BigReal a_2 = a_1 * a_1;
1414  const BigReal s = (rv * rv) * a_2;
1415  const BigReal dx = -2 * rv.x * a_2; // ds/dx
1416  const BigReal dy = -2 * rv.y * a_2; // ds/dy
1417  const BigReal dz = -2 * rv.z * a_2; // ds/dz
1418  const BigReal dd = 2 * a_2; // d^2s/dx^2 = d^2s/dy^2 = d^2s/dz^2
1419  BigReal tmp;
1420  enum { nderiv = C1_VECTOR_SIZE-1 };
1421  BigReal p[nderiv];
1422  Float *g = matrix.melem;
1423 
1424  // multiply entire matrix by this coefficient
1425  _c = _c * a_1;
1426 
1427  // compute derivatives (d/ds)^k of splitting g(s), s=r^2
1428  ndsplitting(p, s, nderiv, _split);
1429 
1430  // weight 0
1431  tmp = _c * p[0];
1432  g[C1INDEX(D000,D000)] += tmp;
1433 
1434  // weight 1
1435  tmp = _c * p[1] * dx;
1436  g[C1INDEX(D100,D000)] += tmp;
1437  g[C1INDEX(D000,D100)] -= tmp;
1438 
1439  tmp = _c * p[1] * dy;
1440  g[C1INDEX(D010,D000)] += tmp;
1441  g[C1INDEX(D000,D010)] -= tmp;
1442 
1443  tmp = _c * p[1] * dz;
1444  g[C1INDEX(D001,D000)] += tmp;
1445  g[C1INDEX(D000,D001)] -= tmp;
1446 
1447  // C1 splitting returns here
1448 
1449  // weight 2
1450  tmp = _c * p[2] * dx * dy;
1451  g[C1INDEX(D110,D000)] += tmp;
1452  g[C1INDEX(D000,D110)] += tmp;
1453  g[C1INDEX(D100,D010)] -= tmp;
1454  g[C1INDEX(D010,D100)] -= tmp;
1455 
1456  tmp = _c * p[2] * dx * dz;
1457  g[C1INDEX(D101,D000)] += tmp;
1458  g[C1INDEX(D000,D101)] += tmp;
1459  g[C1INDEX(D100,D001)] -= tmp;
1460  g[C1INDEX(D001,D100)] -= tmp;
1461 
1462  tmp = _c * p[2] * dy * dz;
1463  g[C1INDEX(D011,D000)] += tmp;
1464  g[C1INDEX(D000,D011)] += tmp;
1465  g[C1INDEX(D010,D001)] -= tmp;
1466  g[C1INDEX(D001,D010)] -= tmp;
1467 
1468  tmp = _c * (p[2] * dx*dx + p[1] * dd);
1469  g[C1INDEX(D100,D100)] -= tmp;
1470  tmp = _c * (p[2] * dy*dy + p[1] * dd);
1471  g[C1INDEX(D010,D010)] -= tmp;
1472  tmp = _c * (p[2] * dz*dz + p[1] * dd);
1473  g[C1INDEX(D001,D001)] -= tmp;
1474 
1475  // C2 splitting returns here
1476  if (_split == TAYLOR2) return;
1477 
1478  // weight 3
1479  tmp = _c * p[3] * dx * dy * dz;
1480  g[C1INDEX(D111,D000)] += tmp;
1481  g[C1INDEX(D110,D001)] -= tmp;
1482  g[C1INDEX(D101,D010)] -= tmp;
1483  g[C1INDEX(D011,D100)] -= tmp;
1484  g[C1INDEX(D100,D011)] += tmp;
1485  g[C1INDEX(D010,D101)] += tmp;
1486  g[C1INDEX(D001,D110)] += tmp;
1487  g[C1INDEX(D000,D111)] -= tmp;
1488 
1489  tmp = _c * (p[3] * dx*dx * dy + p[2] * dd * dy);
1490  g[C1INDEX(D110,D100)] -= tmp;
1491  g[C1INDEX(D100,D110)] += tmp;
1492 
1493  tmp = _c * (p[3] * dx*dx * dz + p[2] * dd * dz);
1494  g[C1INDEX(D101,D100)] -= tmp;
1495  g[C1INDEX(D100,D101)] += tmp;
1496 
1497  tmp = _c * (p[3] * dy*dy * dx + p[2] * dd * dx);
1498  g[C1INDEX(D110,D010)] -= tmp;
1499  g[C1INDEX(D010,D110)] += tmp;
1500 
1501  tmp = _c * (p[3] * dy*dy * dz + p[2] * dd * dz);
1502  g[C1INDEX(D011,D010)] -= tmp;
1503  g[C1INDEX(D010,D011)] += tmp;
1504 
1505  tmp = _c * (p[3] * dz*dz * dx + p[2] * dd * dx);
1506  g[C1INDEX(D101,D001)] -= tmp;
1507  g[C1INDEX(D001,D101)] += tmp;
1508 
1509  tmp = _c * (p[3] * dz*dz * dy + p[2] * dd * dy);
1510  g[C1INDEX(D011,D001)] -= tmp;
1511  g[C1INDEX(D001,D011)] += tmp;
1512 
1513  // C3 splitting returns here
1514  if (_split == TAYLOR3) return;
1515 
1516  // weight 4
1517  tmp = _c * (p[4] * dx*dx * dy * dz + p[3] * dd * dy * dz);
1518  g[C1INDEX(D111,D100)] -= tmp;
1519  g[C1INDEX(D100,D111)] -= tmp;
1520  g[C1INDEX(D110,D101)] += tmp;
1521  g[C1INDEX(D101,D110)] += tmp;
1522 
1523  tmp = _c * (p[4] * dy*dy * dx * dz + p[3] * dd * dx * dz);
1524  g[C1INDEX(D111,D010)] -= tmp;
1525  g[C1INDEX(D010,D111)] -= tmp;
1526  g[C1INDEX(D110,D011)] += tmp;
1527  g[C1INDEX(D011,D110)] += tmp;
1528 
1529  tmp = _c * (p[4] * dz*dz * dx * dy + p[3] * dd * dx * dy);
1530  g[C1INDEX(D111,D001)] -= tmp;
1531  g[C1INDEX(D001,D111)] -= tmp;
1532  g[C1INDEX(D101,D011)] += tmp;
1533  g[C1INDEX(D011,D101)] += tmp;
1534 
1535  tmp = _c * (p[4] * dx*dx * dy*dy + p[3] * dx*dx * dd
1536  + p[3] * dd * dy*dy + p[2] * dd * dd);
1537  g[C1INDEX(D110,D110)] += tmp;
1538  tmp = _c * (p[4] * dx*dx * dz*dz + p[3] * dx*dx * dd
1539  + p[3] * dd * dz*dz + p[2] * dd * dd);
1540  g[C1INDEX(D101,D101)] += tmp;
1541  tmp = _c * (p[4] * dy*dy * dz*dz + p[3] * dy*dy * dd
1542  + p[3] * dd * dz*dz + p[2] * dd * dd);
1543  g[C1INDEX(D011,D011)] += tmp;
1544 
1545  // C4 splitting returns here
1546  if (_split == TAYLOR4) return;
1547 
1548  // weight 5
1549  tmp = _c * (p[5] * dx*dx * dy*dy * dz + p[4] * dx*dx * dd * dz
1550  + p[4] * dd * dy*dy * dz + p[3] * dd * dd * dz);
1551  g[C1INDEX(D111,D110)] += tmp;
1552  g[C1INDEX(D110,D111)] -= tmp;
1553 
1554  tmp = _c * (p[5] * dx*dx * dz*dz * dy + p[4] * dx*dx * dd * dy
1555  + p[4] * dd * dz*dz * dy + p[3] * dd * dd * dy);
1556  g[C1INDEX(D111,D101)] += tmp;
1557  g[C1INDEX(D101,D111)] -= tmp;
1558 
1559  tmp = _c * (p[5] * dy*dy * dz*dz * dx + p[4] * dy*dy * dd * dx
1560  + p[4] * dd * dz*dz * dx + p[3] * dd * dd * dx);
1561  g[C1INDEX(D111,D011)] += tmp;
1562  g[C1INDEX(D011,D111)] -= tmp;
1563 
1564  // C5 splitting returns here
1565  if (_split == TAYLOR5) return;
1566 
1567  // weight 6
1568  tmp = _c * (p[6] * dx*dx * dy*dy * dz*dz + p[5] * dx*dx * dy*dy * dd
1569  + p[5] * dx*dx * dd * dz*dz + p[5] * dd * dy*dy * dz*dz
1570  + p[4] * dx*dx * dd * dd + p[4] * dd * dy*dy * dd
1571  + p[4] * dd * dd * dz*dz + p[3] * dd * dd * dd);
1572  g[C1INDEX(D111,D111)] -= tmp;
1573 
1574  // calculate full matrix for C6 or higher splitting
1575 
1576  } // gc_c1hermite_elem_accum()
1577 
1578 
1579 }; // ComputeMsmMgr
1580 
1581 
1582 // Degree of polynomial basis function Phi.
1583 // For the purpose of finding the stencil width, Hermite interpolation
1584 // sets this value to 1.
1586  3, 5, 5, 7, 7, 9, 9, 1,
1587 };
1588 
1589 // The stencil array lengths below.
1590 const int ComputeMsmMgr::Nstencil[NUM_APPROX] = {
1591  5, 7, 7, 9, 9, 11, 11, 3,
1592 };
1593 
1594 // Index offsets from the stencil-centered grid element, to get
1595 // to the correct contributing grid element.
1596 const int
1598  // cubic
1599  {-3, -1, 0, 1, 3},
1600 
1601  // quintic C1
1602  {-5, -3, -1, 0, 1, 3, 5},
1603 
1604  // quintic C2 (same as quintic C1)
1605  {-5, -3, -1, 0, 1, 3, 5},
1606 
1607  // septic C1
1608  {-7, -5, -3, -1, 0, 1, 3, 5, 7},
1609 
1610  // septic C3 (same as septic C1)
1611  {-7, -5, -3, -1, 0, 1, 3, 5, 7},
1612 
1613  // nonic C1
1614  {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
1615 
1616  // nonic C4 (same as nonic C1)
1617  {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
1618 
1619  // C1 Hermite
1620  {-1, 0, 1},
1621 };
1622 
1623 // The grid transfer stencils for the non-factored restriction and
1624 // prolongation procedures.
1625 const Float
1627  // cubic
1628  {-1.f/16, 9.f/16, 1, 9.f/16, -1.f/16},
1629 
1630  // quintic C1
1631  {3.f/256, -25.f/256, 75.f/128, 1, 75.f/128, -25.f/256, 3.f/256},
1632 
1633  // quintic C2 (same as quintic C1)
1634  {3.f/256, -25.f/256, 75.f/128, 1, 75.f/128, -25.f/256, 3.f/256},
1635 
1636  // septic C1
1637  { -5.f/2048, 49.f/2048, -245.f/2048, 1225.f/2048, 1, 1225.f/2048,
1638  -245.f/2048, 49.f/2048, -5.f/2048 },
1639 
1640  // septic C3 (same as septic C3)
1641  { -5.f/2048, 49.f/2048, -245.f/2048, 1225.f/2048, 1, 1225.f/2048,
1642  -245.f/2048, 49.f/2048, -5.f/2048 },
1643 
1644  // nonic C1
1645  { 35.f/65536, -405.f/65536, 567.f/16384, -2205.f/16384,
1646  19845.f/32768, 1, 19845.f/32768, -2205.f/16384, 567.f/16384,
1647  -405.f/65536, 35.f/65536 },
1648 
1649  // nonic C4 (same as nonic C1)
1650  { 35.f/65536, -405.f/65536, 567.f/16384, -2205.f/16384,
1651  19845.f/32768, 1, 19845.f/32768, -2205.f/16384, 567.f/16384,
1652  -405.f/65536, 35.f/65536 },
1653 };
1654 
1655 
1656 // Designates PE assignment for static load balancing of
1657 // MsmBlock-related arrays
1658 class MsmBlockMap : public CkArrayMap {
1659  private:
1660  ComputeMsmMgr *mgrLocal;
1661  int *penum;
1662  int level;
1663  public:
1664  MsmBlockMap(int lvl) {
1665  mgrLocal = CProxy_ComputeMsmMgr::ckLocalBranch(
1666  CkpvAccess(BOCclass_group).computeMsmMgr);
1667 #ifdef MSM_NODE_MAPPING
1668  penum = mgrLocal->blockAssign.buffer();
1669 #else
1670  penum = 0;
1671 #endif
1672  level = lvl;
1673  }
1674  MsmBlockMap(CkMigrateMessage *m) { }
1675  int registerArray(CkArrayIndex& numElements, CkArrayID aid) {
1676  return 0;
1677  }
1678  int procNum(int /*arrayHdl*/, const CkArrayIndex &idx) {
1679  int *pn = (int *)idx.data();
1680 #ifdef MSM_NODE_MAPPING
1681  int n = mgrLocal->blockFlatIndex(level, pn[0], pn[1], pn[2]);
1682  return penum[n];
1683 #else
1684  return 0;
1685 #endif
1686  }
1687 };
1688 
1689 
1690 // Designates PE assignment for static load balancing of
1691 // MsmGridCutoff-related arrays
1692 class MsmGridCutoffMap : public CkArrayMap {
1693  private:
1694  int *penum;
1695  public:
1697  ComputeMsmMgr *mgrLocal = CProxy_ComputeMsmMgr::ckLocalBranch(
1698  CkpvAccess(BOCclass_group).computeMsmMgr);
1699 #ifdef MSM_NODE_MAPPING
1700  penum = mgrLocal->gcutAssign.buffer();
1701 #else
1702  penum = 0;
1703 #endif
1704  }
1705  int registerArray(CkArrayIndex& numElements, CkArrayID aid) {
1706  return 0;
1707  }
1708  int procNum(int /*arrayHdl*/, const CkArrayIndex &idx) {
1709 #if 1
1710  int n = *((int *)idx.data());
1711 #ifdef MSM_NODE_MAPPING
1712  return penum[n];
1713 #else
1714  return 0;
1715 #endif
1716 #else
1717  return 0; // XXX to test load balancing
1718 #endif
1719  }
1720 };
1721 
1722 
1723 namespace msm {
1724 
1725  //
1726  // PatchData
1727  //
1728  // Performs anterpolation and interpolation algorithms.
1729  //
1730  // Surround each NAMD patch with enough grid points to perform
1731  // anterpolation and interpolation without having to do any
1732  // grid wrapping. This does not give a partitioning of the
1733  // MSM finest level grid --- rather, the edges of adjacent
1734  // PatchData grids will overlap or contain image points along
1735  // the periodic boundaries.
1736  //
1737 
1738  struct PatchData {
1751  //BigReal virial[3][3];
1753  int patchID;
1754  int sequence; // from Compute object for message priority
1755 
1756  AtomCoordArray& coordArray() { return coord; }
1757  ForceArray& forceArray() { return force; }
1758 
1759  PatchData(ComputeMsmMgr *pmgr, int pid);
1760  void init(int natoms);
1761 
1762  void anterpolation();
1763  void sendCharge();
1764  void addPotential(const Grid<Float>& epart);
1765  void interpolation();
1766 
1767  void anterpolationC1Hermite();
1768  void sendChargeC1Hermite();
1769  void addPotentialC1Hermite(const Grid<C1Vector>& epart);
1770  void interpolationC1Hermite();
1771  };
1772 
1773 } // namespace msm
1774 
1775 
1777 //
1778 // MsmGridCutoff
1779 //
1780 // Performs grid cutoff part of the computation.
1781 //
1782 // The grid cutoff part is the most computationally intensive part
1783 // of MSM. The templated MsmGridCutoffKernel class takes Vtype
1784 // for charge and potential data (generalizes to vector for Hermite
1785 // interpolation) and takes Mtype for the pre-computed grid coefficient
1786 // weights (generalizes to matrix for Hermite interpolation).
1787 //
1788 
1789 template <class Vtype, class Mtype>
1791  public:
1792  ComputeMsmMgr *mgrLocal; // for quick access to data
1794  msm::BlockIndex qhblockIndex; // source of charges
1795  msm::BlockSend ehblockSend; // destination for potentials
1796  int eia, eib, eja, ejb, eka, ekb, eni, enj, enk; // for "fold factor"
1797  int isfold; // for "fold factor"
1800  msm::Grid<Vtype> ehfold; // for "fold factor"
1805 
1806  MsmGridCutoffKernel() { init(); }
1807 
1808  void init() {
1809  isfold = 0;
1810  mgrLocal = CProxy_ComputeMsmMgr::ckLocalBranch(
1811  CkpvAccess(BOCclass_group).computeMsmMgr);
1812  map = &(mgrLocal->mapData());
1813  mgrLocal->addVirialContrib();
1814 #ifdef MSM_TIMING
1815  mgrLocal->addTiming();
1816 #endif
1817 #ifdef MSM_PROFILING
1818  mgrLocal->addProfiling();
1819 #endif
1820  }
1821 
1822 #ifdef MSM_MIGRATION
1823  void pup(PUP::er& p) {
1824 #ifdef MSM_TIMING
1825  mgrLocal->subtractTiming();
1826 #endif
1827 #ifdef MSM_PROFILING
1828  mgrLocal->subtractProfiling();
1829 #endif
1830  p | qhblockIndex;
1831  p | ehblockSend;
1832  p | eia, p | eib, p | eja, p | ejb, p | eka, p | ekb;
1833  p | eni, p | enj, p | enk;
1834  p | isfold;
1835  }
1836 #endif // MSM_MIGRATION
1837 
1839  qhblockIndex = bmsg->qhBlockIndex;
1840  ehblockSend = bmsg->ehBlockSend;
1841  delete bmsg;
1842 
1843  // set message priority
1844  priority = mgrLocal->nlevels
1845  + 2*(mgrLocal->nlevels - ehblockSend.nblock_wrap.level) - 1;
1846  // allocate qh buffer
1847  qh.init(map->blockLevel[qhblockIndex.level](qhblockIndex.n).nrange);
1848  // allocate eh buffer
1849  eh.init(ehblockSend.nrange);
1850  // preprocess "fold factor" if active for this level
1851  if (map->foldfactor[qhblockIndex.level].active) {
1852  // allocate ehfold buffer
1853  ehfold = eh;
1854  // set index range of potentials
1855  eia = eh.ia();
1856  eib = eh.ib();
1857  eja = eh.ja();
1858  ejb = eh.jb();
1859  eka = eh.ka();
1860  ekb = eh.kb();
1861  eni = eh.ni();
1862  enj = eh.nj();
1863  enk = eh.nk();
1864  if (map->blockLevel[qhblockIndex.level].nn() == 1) {
1865  if (map->ispx) { eia = qh.ia(); eib = qh.ib(); eni = qh.ni(); }
1866  if (map->ispy) { eja = qh.ja(); ejb = qh.jb(); enj = qh.nj(); }
1867  if (map->ispz) { eka = qh.ka(); ekb = qh.kb(); enk = qh.nk(); }
1868  }
1869  else {
1870  // find destination block index
1871  int level = qhblockIndex.level;
1873  ehblockSend.nrange_wrap.lower(), level);
1874  map->wrapBlockIndex(bn);
1875  if (map->ispx) {
1876  eia = bn.n.i * map->bsx[level];
1877  eib = eia + qh.ni() - 1;
1878  eni = qh.ni();
1879  }
1880  if (map->ispy) {
1881  eja = bn.n.j * map->bsy[level];
1882  ejb = eja + qh.nj() - 1;
1883  enj = qh.nj();
1884  }
1885  if (map->ispz) {
1886  eka = bn.n.k * map->bsz[level];
1887  ekb = eka + qh.nk() - 1;
1888  enk = qh.nk();
1889  }
1890  }
1891  isfold = 1;
1892  } // if fold factor
1893  } // setup()
1894 
1896  const msm::Grid<Mtype> *ptrgc,
1897  const msm::Grid<Mtype> *ptrgvc
1898  ) {
1899  pgc = ptrgc;
1900  pgvc = ptrgvc;
1901  } // setupWeights()
1902 
1903 
1904  void compute(GridMsg *gmsg) {
1905 #ifdef MSM_TIMING
1906  double startTime, stopTime;
1907  startTime = CkWallTimer();
1908 #endif
1909  //
1910  // receive block of charges
1911  //
1912  int pid;
1913  // qh is resized only the first time, memory allocation persists
1914  gmsg->get(qh, pid, sequence);
1915  delete gmsg;
1916 #ifdef MSM_TIMING
1917  stopTime = CkWallTimer();
1918  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
1919 #endif
1920 
1921  //
1922  // grid cutoff calculation
1923  // this charge block -> this potential block
1924  //
1925 
1926 #ifdef MSM_TIMING
1927  startTime = stopTime;
1928 #endif
1929  // resets indexing on block
1930  eh.init(ehblockSend.nrange); // (always have to re-init nrange for eh)
1931  eh.reset(0);
1932  // index range of weights
1933  int gia = pgc->ia();
1934  int gib = pgc->ib();
1935  int gja = pgc->ja();
1936  int gjb = pgc->jb();
1937  int gka = pgc->ka();
1938  int gkb = pgc->kb();
1939  int gni = pgc->ni();
1940  int gnj = pgc->nj();
1941  // index range of charge grid
1942  int qia = qh.ia();
1943  int qib = qh.ib();
1944  int qja = qh.ja();
1945  int qjb = qh.jb();
1946  int qka = qh.ka();
1947  int qkb = qh.kb();
1948  int qni = qh.ni();
1949  int qnj = qh.nj();
1950  // index range of potentials
1951  int ia = eh.ia();
1952  int ib = eh.ib();
1953  int ja = eh.ja();
1954  int jb = eh.jb();
1955  int ka = eh.ka();
1956  int kb = eh.kb();
1957 
1958  int index = 0;
1959 
1960  // access buffers directly
1961  const Mtype *gcbuffer = pgc->data().buffer();
1962  //const Mtype *gvcbuffer = pgvc->data().buffer();
1963  const Vtype *qhbuffer = qh.data().buffer();
1964  Vtype *ehbuffer = eh.data().buffer();
1965  //Vtype *gvsumbuffer = mgrLocal->gvsum.data().buffer();
1966 
1967 #ifndef MSM_COMM_ONLY
1968  // loop over potentials
1969  for (int k = ka; k <= kb; k++) {
1970  // clip charges to weights along k
1971  int mka = ( qka >= gka + k ? qka : gka + k );
1972  int mkb = ( qkb <= gkb + k ? qkb : gkb + k );
1973 
1974  for (int j = ja; j <= jb; j++) {
1975  // clip charges to weights along j
1976  int mja = ( qja >= gja + j ? qja : gja + j );
1977  int mjb = ( qjb <= gjb + j ? qjb : gjb + j );
1978 
1979  for (int i = ia; i <= ib; i++) {
1980  // clip charges to weights along i
1981  int mia = ( qia >= gia + i ? qia : gia + i );
1982  int mib = ( qib <= gib + i ? qib : gib + i );
1983 
1984  // accumulate sum to this eh point
1985  Vtype ehsum = 0;
1986 
1987 #if 0
1988  // loop over charge grid
1989  for (int qk = mka; qk <= mkb; qk++) {
1990  int qkoff = (qk - qka) * qnj;
1991  int gkoff = ((qk-k) - gka) * gnj;
1992 
1993  for (int qj = mja; qj <= mjb; qj++) {
1994  int qjkoff = (qkoff + qj - qja) * qni;
1995  int gjkoff = (gkoff + (qj-j) - gja) * gni;
1996 
1997 // help the vectorizer make reasonable decisions
1998 #if defined(__INTEL_COMPILER)
1999 #pragma vector always
2000 #endif
2001  for (int qi = mia; qi <= mib; qi++) {
2002  int qijkoff = qjkoff + qi - qia;
2003  int gijkoff = gjkoff + (qi-i) - gia;
2004 
2005  ehsum += gcbuffer[gijkoff] * qhbuffer[qijkoff];
2006  }
2007  }
2008  } // end loop over charge grid
2009 #else
2010 
2011 #if 0
2012  // loop over charge grid
2013  int nn = mib - mia + 1;
2014  for (int qk = mka; qk <= mkb; qk++) {
2015  int qkoff = (qk - qka) * qnj;
2016  int gkoff = ((qk-k) - gka) * gnj;
2017 
2018  for (int qj = mja; qj <= mjb; qj++) {
2019  int qjkoff = (qkoff + qj - qja) * qni;
2020  int gjkoff = (gkoff + (qj-j) - gja) * gni;
2021 
2022  const Float *qbuf = qhbuffer + (qjkoff - qia + mia);
2023  const Float *gbuf = gcbuffer + (gjkoff - i - gia + mia);
2024 #ifdef MSM_PROFILING
2025  mgrLocal->xLoopCnt[nn]++;
2026 #endif
2027 // help the vectorizer make reasonable decisions
2028 #if defined(__INTEL_COMPILER)
2029 #pragma vector always
2030 #endif
2031  for (int ii = 0; ii < nn; ii++) {
2032  ehsum += gbuf[ii] * qbuf[ii];
2033  }
2034  }
2035  } // end loop over charge grid
2036 #else
2037  // loop over charge grid
2038  int nn = mib - mia + 1;
2039  if (nn == 8) { // hard coded inner loop = 8
2040  int qnji = qnj * qni;
2041  int qkoff = -qka*qnji - qja*qni - qia + mia;
2042  int gnji = gnj * gni;
2043  int gkoff = (-k-gka)*gnji + (-j-gja)*gni - i - gia + mia;
2044 
2045  for (int qk = mka; qk <= mkb; qk++) {
2046  int qjkoff = qkoff + qk*qnji;
2047  int gjkoff = gkoff + qk*gnji;
2048 
2049  for (int qj = mja; qj <= mjb; qj++) {
2050  const Vtype *qbuf = qhbuffer + (qjkoff + qj*qni);
2051  const Mtype *gbuf = gcbuffer + (gjkoff + qj*gni);
2052  //const Mtype *gvcbuf = gvcbuffer + (gjkoff + qj*gni);
2053  //Vtype *gvsumbuf = gvsumbuffer + (gjkoff + qj*gni);
2054 #ifdef MSM_PROFILING
2055  mgrLocal->xLoopCnt[nn]++;
2056 #endif
2057 // help the vectorizer make reasonable decisions
2058 #if defined(__INTEL_COMPILER)
2059 #pragma vector always
2060 #endif
2061  for (int ii = 0; ii < 8; ii++) {
2062  ehsum += gbuf[ii] * qbuf[ii];
2063  //gvsumbuf[ii] += qbuf[ii] * qbuf[ii] * gvcbuf[ii];
2064  }
2065  }
2066  } // end loop over charge grid
2067  }
2068  else { // variable length inner loop < 8
2069  int qnji = qnj * qni;
2070  int qkoff = -qka*qnji - qja*qni - qia + mia;
2071  int gnji = gnj * gni;
2072  int gkoff = (-k-gka)*gnji + (-j-gja)*gni - i - gia + mia;
2073 
2074  for (int qk = mka; qk <= mkb; qk++) {
2075  int qjkoff = qkoff + qk*qnji;
2076  int gjkoff = gkoff + qk*gnji;
2077 
2078  for (int qj = mja; qj <= mjb; qj++) {
2079  const Vtype *qbuf = qhbuffer + (qjkoff + qj*qni);
2080  const Mtype *gbuf = gcbuffer + (gjkoff + qj*gni);
2081  //const Mtype *gvcbuf = gvcbuffer + (gjkoff + qj*gni);
2082  //Vtype *gvsumbuf = gvsumbuffer + (gjkoff + qj*gni);
2083 #ifdef MSM_PROFILING
2084  mgrLocal->xLoopCnt[nn]++;
2085 #endif
2086 // help the vectorizer make reasonable decisions
2087 #if defined(__INTEL_COMPILER)
2088 #pragma vector always
2089 #endif
2090  for (int ii = 0; ii < nn; ii++) {
2091  ehsum += gbuf[ii] * qbuf[ii];
2092  //gvsumbuf[ii] += qbuf[ii] * qbuf[ii] * gvcbuf[ii];
2093  }
2094  }
2095  } // end loop over charge grid
2096  }
2097 #endif // 0
2098 
2099 #endif // 0
2100 
2101  ehbuffer[index] = ehsum;
2102  index++;
2103  }
2104  }
2105  } // end loop over potentials
2106 #endif // !MSM_COMM_ONLY
2107 
2108 #ifdef MSM_PROFILING
2109  mgrLocal->doneProfiling();
2110 #endif
2111 
2112  //
2113  // send block of potentials
2114  //
2115 
2116 #ifdef MSM_FOLD_FACTOR
2117  // if "fold factor" is active for this level,
2118  // need to sum unfolded potential grid back into periodic grid
2119  if (isfold) {
2120  // copy unfolded grid
2121  ehfold = eh;
2122  // reset eh indexing to correctly folded size
2123  eh.set(eia, eni, eja, enj, eka, enk);
2124  eh.reset(0);
2125 #ifdef DEBUG_MSM_GRID
2126  printf("level=%d ehfold: [%d..%d] x [%d..%d] x [%d..%d] "
2127  "(%d x %d x %d)\n"
2128  " eh: [%d..%d] x [%d..%d] x [%d..%d] "
2129  "(%d x %d x %d)\n"
2130  " eh lower: %d %d %d\n",
2131  qhblockIndex.level,
2132  ehfold.ia(), ehfold.ib(),
2133  ehfold.ja(), ehfold.jb(),
2134  ehfold.ka(), ehfold.kb(),
2135  ehfold.ni(), ehfold.nj(), ehfold.nk(),
2136  eh.ia(), eh.ib(),
2137  eh.ja(), eh.jb(),
2138  eh.ka(), eh.kb(),
2139  eh.ni(), eh.nj(), eh.nk(),
2140  ehblockSend.nrange_wrap.lower().i,
2141  ehblockSend.nrange_wrap.lower().j,
2142  ehblockSend.nrange_wrap.lower().k
2143  );
2144 #endif
2145  const Vtype *ehfoldbuf = ehfold.data().buffer();
2146  Vtype *ehbuf = eh.data().buffer();
2147  // now we "fold" eh by calculating the
2148  // wrap around sum of ehfold into correctly sized eh
2149  int index = 0;
2150  for (int k = ka; k <= kb; k++) {
2151  int kk = k;
2152  if (kk < eka) do { kk += enk; } while (kk < eka);
2153  else if (kk > ekb) do { kk -= enk; } while (kk > ekb);
2154  int koff = (kk - eka) * enj;
2155  for (int j = ja; j <= jb; j++) {
2156  int jj = j;
2157  if (jj < eja) do { jj += enj; } while (jj < eja);
2158  else if (jj > ejb) do { jj -= enj; } while (jj > ejb);
2159  int jkoff = (koff + (jj - eja)) * eni;
2160  for (int i = ia; i <= ib; i++, index++) {
2161  int ii = i;
2162  if (ii < eia) do { ii += eni; } while (ii < eia);
2163  else if (ii > eib) do { ii -= eni; } while (ii > eib);
2164  int ijkoff = jkoff + (ii - eia);
2165  ehbuf[ijkoff] += ehfoldbuf[index];
2166  }
2167  }
2168  }
2169  }
2170  else {
2171  // shift grid index range to its true (wrapped) values
2172  eh.updateLower( ehblockSend.nrange_wrap.lower() );
2173  }
2174 #else // !MSM_FOLD_FACTOR
2175  // shift grid index range to its true (wrapped) values
2176  eh.updateLower( ehblockSend.nrange_wrap.lower() );
2177 #endif // MSM_FOLD_FACTOR
2178 
2179 #ifdef MSM_TIMING
2180  stopTime = CkWallTimer();
2181  mgrLocal->msmTiming[MsmTimer::GRIDCUTOFF] += stopTime - startTime;
2182 #endif
2183  } // compute()
2184 
2185 };
2186 
2187 
2188 //
2189 // MsmGridCutoff wraps kernel template for approximations
2190 // that involve only function values (e.g., CUBIC, QUINTIC).
2191 // Elements of 1D chare array.
2192 //
2194  public CBase_MsmGridCutoff,
2195  public MsmGridCutoffKernel<Float,Float>
2196 {
2197  public:
2198  CProxyElement_MsmBlock msmBlockElementProxy; // root of reduction
2199  CkSectionInfo cookie; // need to save cookie for section reduction
2200 #ifdef MSM_REDUCE_GRID
2202 #endif // MSM_REDUCE_GRID
2203 
2205 
2206  MsmGridCutoff(CkMigrateMessage *m)
2207 #if ! defined(MSM_MIGRATION)
2208  { }
2209 #else // MSM_MIGRATION
2210  : CBase_MsmGridCutoff(m) {
2211 #ifdef DEBUG_MSM_MIGRATE
2212  printf("MsmGridCutoff element %d migrated to processor %d\n",
2213  thisIndex, CkMyPe());
2214 #endif
2215  init();
2216  // access type dependent constants from map
2218  &(map->gc[ehblockSend.nblock_wrap.level]),
2219  &(map->gvc[ehblockSend.nblock_wrap.level])
2220  );
2221  }
2222 
2223  virtual void pup(PUP::er& p) {
2224 #ifdef DEBUG_MSM_MIGRATE
2225  printf("MsmGridCutoff element %d pupped on processor %d\n",
2226  thisIndex, CkMyPe());
2227 #endif
2228  CBase_MsmGridCutoff::pup(p); // pack our superclass
2230  }
2231 #endif // MSM_MIGRATION
2232 
2233  void init() {
2235  }
2236 
2237  void setup(MsmGridCutoffInitMsg *bmsg) {
2238  // base class consumes this init proxy message
2240  // access type dependent constants from map
2242  &(map->gc[ehblockSend.nblock_wrap.level]),
2243  &(map->gvc[ehblockSend.nblock_wrap.level])
2244  );
2245 #ifdef MSM_REDUCE_GRID
2246  // allocate full buffer space needed for section reduction
2247  int level = ehblockSend.nblock_wrap.level;
2248  int i = ehblockSend.nblock_wrap.n.i;
2249  int j = ehblockSend.nblock_wrap.n.j;
2250  int k = ehblockSend.nblock_wrap.n.k;
2251  ehfull.init( map->blockLevel[level](i,j,k).nrange );
2252 #endif // MSM_REDUCE_GRID
2253 #ifdef DEBUG_MSM_GRID
2254  printf("MsmGridCutoff[%d]: setup()"
2255  " send to level=%d block=(%d,%d,%d)\n",
2256  thisIndex, ehblockSend.nblock_wrap.level,
2257  ehblockSend.nblock_wrap.n.i,
2258  ehblockSend.nblock_wrap.n.j,
2259  ehblockSend.nblock_wrap.n.k);
2260 #endif
2261  }
2262 
2263  void setupSections(MsmGridCutoffSetupMsg *msg) {
2264 #ifdef DEBUG_MSM_GRID
2265  CkPrintf("MSM GRID CUTOFF %d setup section on PE %d\n",
2266  thisIndex, CkMyPe());
2267 #endif
2268  CkGetSectionInfo(cookie, msg); // init the cookie
2269  msg->get(&msmBlockElementProxy); // get proxy to MsmBlock
2270  delete msg;
2271  }
2272 
2273  void compute(GridMsg *gmsg) {
2274 #ifdef DEBUG_MSM_GRID
2275  printf("MsmGridCutoff %d: compute()\n", thisIndex);
2276 #endif
2277  // base class consumes this grid message
2279 
2280 #ifdef MSM_TIMING
2281  double startTime, stopTime;
2282  startTime = CkWallTimer();
2283 #endif
2284 #ifdef MSM_REDUCE_GRID
2285 
2286  // perform section reduction over potential grids
2287  CProxy_CkMulticastMgr mcastProxy =
2288  CkpvAccess(BOCclass_group).multicastMgr;
2289  CkMulticastMgr *mcastPtr =
2290  CProxy_CkMulticastMgr(mcastProxy).ckLocalBranch();
2291  CkCallback cb(CkIndex_MsmBlock::sumReducedPotential(NULL),
2292  msmBlockElementProxy);
2293  // sum into "full" sized buffer needed for contribute
2294  ehfull.reset(0);
2295  ehfull += eh;
2296  mcastPtr->contribute(
2297  ehfull.nn() * sizeof(Float), ehfull.data().buffer(),
2298  CkReduction::sum_float, cookie, cb);
2299 
2300 #else
2301  // place eh into message
2302  const msm::BlockIndex& bindex = ehblockSend.nblock_wrap;
2303  int msgsz = eh.data().len() * sizeof(Float);
2304  GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
2305  SET_PRIORITY(gm, sequence, priority);
2306  gm->put(eh, bindex.level, sequence);
2307  // lookup in ComputeMsmMgr proxy array by level
2308  mgrLocal->msmBlock[bindex.level](
2309  bindex.n.i, bindex.n.j, bindex.n.k).addPotential(gm);
2310 
2311 #endif // MSM_REDUCE_GRID
2312 
2313 #ifdef MSM_TIMING
2314  stopTime = CkWallTimer();
2315  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
2316  mgrLocal->doneTiming();
2317 #endif
2318  } // compute()
2319 
2320 }; // MsmGridCutoff
2321 
2322 
2323 //
2324 // MsmC1HermiteGridCutoff wraps kernel template for
2325 // C1 Hermite approximation. Elements of 1D chare array.
2326 //
2328  public CBase_MsmC1HermiteGridCutoff,
2329  public MsmGridCutoffKernel<C1Vector,C1Matrix>
2330 {
2331  public:
2332  CProxyElement_MsmC1HermiteBlock msmBlockElementProxy; // root of reduction
2333  CkSectionInfo cookie; // need to save cookie for section reduction
2334 #ifdef MSM_REDUCE_GRID
2336 #endif // MSM_REDUCE_GRID
2337 
2339 
2340  MsmC1HermiteGridCutoff(CkMigrateMessage *m)
2341 #if ! defined(MSM_MIGRATION)
2342  { }
2343 #else // MSM_MIGRATION
2344  : CBase_MsmC1HermiteGridCutoff(m) {
2345 #ifdef DEBUG_MSM_MIGRATE
2346  printf("MsmC1HermiteGridCutoff element %d migrated to processor %d\n",
2347  thisIndex, CkMyPe());
2348 #endif
2349  init();
2350  // access type dependent constants from map
2352  &(map->gc_c1hermite[ehblockSend.nblock_wrap.level]),
2353  NULL
2354  );
2355  }
2356 
2357  virtual void pup(PUP::er& p) {
2358 #ifdef DEBUG_MSM_MIGRATE
2359  printf("MsmC1HermiteGridCutoff element %d pupped on processor %d\n",
2360  thisIndex, CkMyPe());
2361 #endif
2362  CBase_MsmC1HermiteGridCutoff::pup(p); // pack our superclass
2364  }
2365 #endif // MSM_MIGRATION
2366 
2367  void init() {
2369  }
2370 
2371  void setup(MsmGridCutoffInitMsg *bmsg) {
2372  // base class consumes this init proxy message
2374  // access type dependent constants from map
2376  &(map->gc_c1hermite[ehblockSend.nblock_wrap.level]),
2377  NULL
2378  );
2379 #ifdef DEBUG_MSM_GRID
2380  printf("MsmC1HermiteGridCutoff[%d]: setup()"
2381  " send to level=%d block=(%d,%d,%d)\n",
2382  thisIndex, ehblockSend.nblock_wrap.level,
2383  ehblockSend.nblock_wrap.n.i,
2384  ehblockSend.nblock_wrap.n.j,
2385  ehblockSend.nblock_wrap.n.k);
2386 #endif
2387 #ifdef MSM_REDUCE_GRID
2388  // allocate full buffer space needed for section reduction
2389  int level = ehblockSend.nblock_wrap.level;
2390  int i = ehblockSend.nblock_wrap.n.i;
2391  int j = ehblockSend.nblock_wrap.n.j;
2392  int k = ehblockSend.nblock_wrap.n.k;
2393  ehfull.init( map->blockLevel[level](i,j,k).nrange );
2394 #endif // MSM_REDUCE_GRID
2395  }
2396 
2397  void setupSections(MsmC1HermiteGridCutoffSetupMsg *msg) {
2398 #ifdef DEBUG_MSM_GRID
2399  CkPrintf("MSM C1 HERMITE GRID CUTOFF %d setup section on PE %d\n",
2400  thisIndex, CkMyPe());
2401 #endif
2402  CkGetSectionInfo(cookie, msg); // init the cookie
2403  msg->get(&msmBlockElementProxy); // get proxy to MsmC1HermiteBlock
2404  delete msg;
2405  }
2406 
2407  void compute(GridMsg *gmsg) {
2408 #ifdef DEBUG_MSM_GRID
2409  printf("MsmC1HermiteGridCutoff %d: compute()\n", thisIndex);
2410 #endif
2411 #if 0
2412  // base class consumes this grid message
2414 #else
2415  compute_specialized(gmsg);
2416 #endif
2417 
2418 #ifdef MSM_TIMING
2419  double startTime, stopTime;
2420  startTime = CkWallTimer();
2421 #endif
2422 #ifdef MSM_REDUCE_GRID
2423 
2424  // perform section reduction over potential grids
2425  CProxy_CkMulticastMgr mcastProxy =
2426  CkpvAccess(BOCclass_group).multicastMgr;
2427  CkMulticastMgr *mcastPtr =
2428  CProxy_CkMulticastMgr(mcastProxy).ckLocalBranch();
2429  CkCallback cb(CkIndex_MsmC1HermiteBlock::sumReducedPotential(NULL),
2430  msmBlockElementProxy);
2431  // sum into "full" sized buffer needed for contribute
2432  ehfull.reset(0);
2433  ehfull += eh;
2434  mcastPtr->contribute(
2435  ehfull.nn() * sizeof(C1Vector), ehfull.data().buffer(),
2436  CkReduction::sum_float, cookie, cb);
2437 
2438 #else
2439  // place eh into message
2440  const msm::BlockIndex& bindex = ehblockSend.nblock_wrap;
2441  int msgsz = eh.data().len() * sizeof(C1Vector);
2442  GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
2443  SET_PRIORITY(gm, sequence, priority);
2444  gm->put(eh, bindex.level, sequence);
2445  // lookup in ComputeMsmMgr proxy array by level
2446  mgrLocal->msmC1HermiteBlock[bindex.level](
2447  bindex.n.i, bindex.n.j, bindex.n.k).addPotential(gm);
2448 
2449 #endif // MSM_REDUCE_GRID
2450 
2451 #ifdef MSM_TIMING
2452  stopTime = CkWallTimer();
2453  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
2454  mgrLocal->doneTiming();
2455 #endif
2456  } // compute()
2457 
2458  // try to improve performance of the major computational part
2459  void compute_specialized(GridMsg *gmsg);
2460 
2461 }; // MsmC1HermiteGridCutoff
2462 
2463 void MsmC1HermiteGridCutoff::compute_specialized(GridMsg *gmsg) {
2464 #ifdef MSM_TIMING
2465  double startTime, stopTime;
2466  startTime = CkWallTimer();
2467 #endif
2468  //
2469  // receive block of charges
2470  //
2471  int pid;
2472  // qh is resized only the first time, memory allocation persists
2473  gmsg->get(qh, pid, sequence);
2474  delete gmsg;
2475 #ifdef MSM_TIMING
2476  stopTime = CkWallTimer();
2477  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
2478 #endif
2479 
2480  //
2481  // grid cutoff calculation
2482  // this charge block -> this potential block
2483  //
2484 
2485 #ifdef MSM_TIMING
2486  startTime = stopTime;
2487 #endif
2488  // resets indexing on block
2489  eh.init(ehblockSend.nrange); // (always have to re-init nrange for eh)
2490  eh.reset(0);
2491  // index range of weights
2492  int gia = pgc->ia();
2493  int gib = pgc->ib();
2494  int gja = pgc->ja();
2495  int gjb = pgc->jb();
2496  int gka = pgc->ka();
2497  int gkb = pgc->kb();
2498  int gni = pgc->ni();
2499  int gnj = pgc->nj();
2500  // index range of charge grid
2501  int qia = qh.ia();
2502  int qib = qh.ib();
2503  int qja = qh.ja();
2504  int qjb = qh.jb();
2505  int qka = qh.ka();
2506  int qkb = qh.kb();
2507  int qni = qh.ni();
2508  int qnj = qh.nj();
2509  // index range of potentials
2510  int ia = eh.ia();
2511  int ib = eh.ib();
2512  int ja = eh.ja();
2513  int jb = eh.jb();
2514  int ka = eh.ka();
2515  int kb = eh.kb();
2516 
2517  int index = 0;
2518 
2519  // access buffers directly
2520  const C1Matrix *gcbuffer = pgc->data().buffer();
2521  const C1Vector *qhbuffer = qh.data().buffer();
2522  C1Vector *ehbuffer = eh.data().buffer();
2523 #ifdef DEBUG_MEMORY_ALIGNMENT
2524  printf("gcbuffer mem: addr=%p div32=%lu mod32=%lu\n",
2525  gcbuffer,
2526  (unsigned long)(gcbuffer)/32,
2527  (unsigned long)(gcbuffer)%32);
2528  printf("qhbuffer mem: addr=%p div32=%lu mod32=%lu\n",
2529  qhbuffer,
2530  (unsigned long)(qhbuffer)/32,
2531  (unsigned long)(qhbuffer)%32);
2532  printf("ehbuffer mem: addr=%p div32=%lu mod32=%lu\n",
2533  ehbuffer,
2534  (unsigned long)(ehbuffer)/32,
2535  (unsigned long)(ehbuffer)%32);
2536 #endif
2537 
2538 #ifndef MSM_COMM_ONLY
2539  // loop over potentials
2540  for (int k = ka; k <= kb; k++) {
2541  // clip charges to weights along k
2542  int mka = ( qka >= gka + k ? qka : gka + k );
2543  int mkb = ( qkb <= gkb + k ? qkb : gkb + k );
2544 
2545  for (int j = ja; j <= jb; j++) {
2546  // clip charges to weights along j
2547  int mja = ( qja >= gja + j ? qja : gja + j );
2548  int mjb = ( qjb <= gjb + j ? qjb : gjb + j );
2549 
2550  for (int i = ia; i <= ib; i++) {
2551  // clip charges to weights along i
2552  int mia = ( qia >= gia + i ? qia : gia + i );
2553  int mib = ( qib <= gib + i ? qib : gib + i );
2554 
2555  // accumulate sum to this eh point
2556  C1Vector ehsum = 0;
2557 
2558  // loop over charge grid
2559  int nn = mib - mia + 1;
2560 
2561  {
2562  int qnji = qnj * qni;
2563  int qkoff = -qka*qnji - qja*qni - qia + mia;
2564  int gnji = gnj * gni;
2565  int gkoff = (-k-gka)*gnji + (-j-gja)*gni - i - gia + mia;
2566 
2567  for (int qk = mka; qk <= mkb; qk++) {
2568  int qjkoff = qkoff + qk*qnji;
2569  int gjkoff = gkoff + qk*gnji;
2570 
2571  for (int qj = mja; qj <= mjb; qj++) {
2572  const C1Vector *qbuf = qhbuffer + (qjkoff + qj*qni);
2573  const C1Matrix *gbuf = gcbuffer + (gjkoff + qj*gni);
2574 #ifdef MSM_PROFILING
2575  mgrLocal->xLoopCnt[nn]++;
2576 #endif
2577 // help the vectorizer make reasonable decisions
2578 #if defined(__INTEL_COMPILER)
2579 #pragma vector always
2580 #endif
2581  for (int ii = 0; ii < nn; ii++) {
2582 
2583 #if 0
2584  ehsum += gbuf[ii] * qbuf[ii];
2585 #else
2586  // skip matvec when matrix is 0
2587  // first matrix element tells us if this is the case
2588  if ( *((int *)(gbuf)) != 0) {
2589 
2590  // expand matrix-vector multiply
2591 #if defined(__INTEL_COMPILER)
2592 #pragma vector always
2593 #endif
2594  for (int km=0, jm=0; jm < C1_VECTOR_SIZE; jm++) {
2595  for (int im=0; im < C1_VECTOR_SIZE; im++, km++) {
2596  ehsum.velem[jm] += gbuf->melem[km] * qbuf->velem[im];
2597  }
2598  }
2599  } // if
2600  gbuf++;
2601  qbuf++;
2602 #endif
2603  }
2604  }
2605  } // end loop over charge grid
2606 
2607  }
2608 
2609  ehbuffer[index] = ehsum;
2610  index++;
2611  }
2612  }
2613  } // end loop over potentials
2614 #endif // !MSM_COMM_ONLY
2615 
2616 #ifdef MSM_PROFILING
2617  mgrLocal->doneProfiling();
2618 #endif
2619 
2620  //
2621  // send block of potentials
2622  //
2623 
2624 #ifdef MSM_FOLD_FACTOR
2625  // if "fold factor" is active for this level,
2626  // need to sum unfolded potential grid back into periodic grid
2627  if (isfold) {
2628  // copy unfolded grid
2629  ehfold = eh;
2630  // reset eh indexing to correctly folded size
2631  eh.set(eia, eni, eja, enj, eka, enk);
2632  eh.reset(0);
2633 #ifdef DEBUG_MSM_GRID
2634  printf("level=%d ehfold: [%d..%d] x [%d..%d] x [%d..%d] "
2635  "(%d x %d x %d)\n"
2636  " eh: [%d..%d] x [%d..%d] x [%d..%d] "
2637  "(%d x %d x %d)\n"
2638  " eh lower: %d %d %d\n",
2639  qhblockIndex.level,
2640  ehfold.ia(), ehfold.ib(),
2641  ehfold.ja(), ehfold.jb(),
2642  ehfold.ka(), ehfold.kb(),
2643  ehfold.ni(), ehfold.nj(), ehfold.nk(),
2644  eh.ia(), eh.ib(),
2645  eh.ja(), eh.jb(),
2646  eh.ka(), eh.kb(),
2647  eh.ni(), eh.nj(), eh.nk(),
2648  ehblockSend.nrange_wrap.lower().i,
2649  ehblockSend.nrange_wrap.lower().j,
2650  ehblockSend.nrange_wrap.lower().k
2651  );
2652 #endif
2653  const C1Vector *ehfoldbuf = ehfold.data().buffer();
2654  C1Vector *ehbuf = eh.data().buffer();
2655  // now we "fold" eh by calculating the
2656  // wrap around sum of ehfold into correctly sized eh
2657  int index = 0;
2658  for (int k = ka; k <= kb; k++) {
2659  int kk = k;
2660  if (kk < eka) do { kk += enk; } while (kk < eka);
2661  else if (kk > ekb) do { kk -= enk; } while (kk > ekb);
2662  int koff = (kk - eka) * enj;
2663  for (int j = ja; j <= jb; j++) {
2664  int jj = j;
2665  if (jj < eja) do { jj += enj; } while (jj < eja);
2666  else if (jj > ejb) do { jj -= enj; } while (jj > ejb);
2667  int jkoff = (koff + (jj - eja)) * eni;
2668  for (int i = ia; i <= ib; i++, index++) {
2669  int ii = i;
2670  if (ii < eia) do { ii += eni; } while (ii < eia);
2671  else if (ii > eib) do { ii -= eni; } while (ii > eib);
2672  int ijkoff = jkoff + (ii - eia);
2673  ehbuf[ijkoff] += ehfoldbuf[index];
2674  }
2675  }
2676  }
2677  }
2678  else {
2679  // shift grid index range to its true (wrapped) values
2680  eh.updateLower( ehblockSend.nrange_wrap.lower() );
2681  }
2682 #else // !MSM_FOLD_FACTOR
2683  // shift grid index range to its true (wrapped) values
2684  eh.updateLower( ehblockSend.nrange_wrap.lower() );
2685 #endif // MSM_FOLD_FACTOR
2686 
2687 #ifdef MSM_TIMING
2688  stopTime = CkWallTimer();
2689  mgrLocal->msmTiming[MsmTimer::GRIDCUTOFF] += stopTime - startTime;
2690 #endif
2691 } // MsmC1HermiteGridCutoff::compute_specialized()
2692 
2693 // MsmGridCutoff
2694 //
2696 
2697 
2699 //
2700 // MsmBlock
2701 //
2702 // Performs restriction and prolongation.
2703 //
2704 // Each level of the MSM grid hierarchy is partitioned into MsmBlocks,
2705 // holding both charge and potential grid blocks.
2706 //
2707 // The MsmBlockKernel provides templated routines for the MSM
2708 // restriction and prolongation algorithms. Overall is very small
2709 // part of computational work (less than 2% total for C1 Hermite,
2710 // less than 4% total for cubic).
2711 // XXX Could be made faster with factored restriction and prolongation
2712 // algorithms --- especially important for higher order or for
2713 // generalizing to coarser grid spacing that is not 2h.
2714 // XXX Haven't yet determined factorization for C1 Hermite.
2715 //
2716 // The classes that inherit from MsmBlockKernel provide
2717 // 3D chare array elements for each level with significant management:
2718 // - receive and sum charges from below
2719 // (either PatchData or lower level MsmBlock)
2720 // - calculate restriction to 2h grid
2721 // - send up (if not on highest level)
2722 // - section broadcast to MsmGridCutoff
2723 // - receive and sum potentials from above and from
2724 // section reduction of MsmGridCutoff
2725 // - calculate prolongation to (1/2)h grid and send down,
2726 // OR send to PatchData
2727 //
2728 // XXX Grid cutoff calculation below is now replaced with
2729 // MsmGridCutoff to provide enough parallel work units.
2730 //
2731 
2732 template <class Vtype, class Mtype>
2734  public:
2735  CProxy_ComputeMsmMgr mgrProxy;
2736  ComputeMsmMgr *mgrLocal; // for quick access to data
2741 #ifndef MSM_GRID_CUTOFF_DECOMP
2742  const msm::Grid<Mtype> *gcWeights;
2743  msm::Grid<Vtype> ehCutoff;
2744 #endif
2752 
2754 
2755  int sequence; // from incoming message for message priority
2756 
2758  MsmBlockKernel(CkMigrateMessage *m) { }
2759 
2760  void init();
2761 
2762 #ifndef MSM_GRID_CUTOFF_DECOMP
2763  void setupStencils(
2764  const msm::Grid<Mtype> *res,
2765  const msm::Grid<Mtype> *pro,
2766  const msm::Grid<Mtype> *gc
2767  )
2768  {
2769  resStencil = res;
2770  proStencil = pro;
2771  gcWeights = gc;
2772  }
2773 #else
2775  const msm::Grid<Mtype> *res,
2776  const msm::Grid<Mtype> *pro
2777  )
2778  {
2779  resStencil = res;
2780  proStencil = pro;
2781  }
2782 #endif
2783 
2784  void restrictionKernel();
2785 #ifndef MSM_GRID_CUTOFF_DECOMP
2786  void gridCutoffKernel();
2787 #endif
2788  void prolongationKernel();
2789 
2790 }; // class MsmBlockKernel<Vtype,Mtype>
2791 
2792 template <class Vtype, class Mtype>
2794  blockIndex = bindex;
2795  mgrProxy = CProxy_ComputeMsmMgr(CkpvAccess(BOCclass_group).computeMsmMgr);
2796  mgrLocal = CProxy_ComputeMsmMgr::ckLocalBranch(
2797  CkpvAccess(BOCclass_group).computeMsmMgr);
2798  map = &(mgrLocal->mapData());
2799  bd = &(map->blockLevel[blockIndex.level](blockIndex.n));
2800  qh.init( bd->nrange );
2801  eh.init( bd->nrange );
2802 #ifndef MSM_GRID_CUTOFF_DECOMP
2803  ehCutoff.init( bd->nrangeCutoff );
2804 #endif
2805  qhRestricted.init( bd->nrangeRestricted );
2806  ehProlongated.init( bd->nrangeProlongated );
2807 #ifdef DEBUG_MSM_GRID
2808  printf("MsmBlockKernel level=%d, n=%d %d %d: constructor\n",
2809  blockIndex.level, blockIndex.n.i, blockIndex.n.j, blockIndex.n.k);
2810 #endif
2811 #ifdef MSM_TIMING
2812  mgrLocal->addTiming();
2813 #endif
2814  init();
2815 } // MsmBlockKernel<Vtype,Mtype>::MsmBlockKernel()
2816 
2817 
2818 template <class Vtype, class Mtype>
2820  qh.reset(0);
2821  eh.reset(0);
2822 #ifndef MSM_GRID_CUTOFF_DECOMP
2823  ehCutoff.reset(0);
2824 #endif
2825  qhRestricted.reset(0);
2826  ehProlongated.reset(0);
2827  cntRecvsCharge = 0;
2828  cntRecvsPotential = 0;
2829 } // MsmBlockKernel<Vtype,Mtype>::init()
2830 
2831 
2832 template <class Vtype, class Mtype>
2834 {
2835 #ifdef DEBUG_MSM_GRID
2836  printf("MsmBlockKernel level=%d, id=%d %d %d: restriction\n",
2837  blockIndex.level, blockIndex.n.i, blockIndex.n.j, blockIndex.n.k);
2838 #endif
2839 
2840 #ifdef MSM_TIMING
2841  double startTime, stopTime;
2842  startTime = CkWallTimer();
2843 #endif
2844 
2845 #ifndef MSM_COMM_ONLY
2846  // stencil data for approximating charge on restricted grid
2847  const int approx = mgrLocal->approx;
2848  const int nstencil = ComputeMsmMgr::Nstencil[approx];
2849  const int *offset = ComputeMsmMgr::IndexOffset[approx];
2850  const msm::Grid<Mtype>& res = *resStencil;
2851 
2852  // index range for h grid charges
2853  int ia1 = qh.ia();
2854  int ib1 = qh.ib();
2855  int ja1 = qh.ja();
2856  int jb1 = qh.jb();
2857  int ka1 = qh.ka();
2858  int kb1 = qh.kb();
2859 
2860  // index range for restricted (2h) grid charges
2861  int ia2 = qhRestricted.ia();
2862  int ib2 = qhRestricted.ib();
2863  int ja2 = qhRestricted.ja();
2864  int jb2 = qhRestricted.jb();
2865  int ka2 = qhRestricted.ka();
2866  int kb2 = qhRestricted.kb();
2867 
2868  // reset grid
2869  qhRestricted.reset(0);
2870 
2871  // loop over restricted (2h) grid
2872  for (int k2 = ka2; k2 <= kb2; k2++) {
2873  int k1 = 2 * k2;
2874  for (int j2 = ja2; j2 <= jb2; j2++) {
2875  int j1 = 2 * j2;
2876  for (int i2 = ia2; i2 <= ib2; i2++) {
2877  int i1 = 2 * i2;
2878 
2879  // loop over stencils on h grid
2880  Vtype& q2hsum = qhRestricted(i2,j2,k2);
2881 
2882  for (int k = 0; k < nstencil; k++) {
2883  int kn = k1 + offset[k];
2884  if (kn < ka1) continue;
2885  else if (kn > kb1) break;
2886 
2887  for (int j = 0; j < nstencil; j++) {
2888  int jn = j1 + offset[j];
2889  if (jn < ja1) continue;
2890  else if (jn > jb1) break;
2891 
2892  for (int i = 0; i < nstencil; i++) {
2893  int in = i1 + offset[i];
2894  if (in < ia1) continue;
2895  else if (in > ib1) break;
2896 
2897  q2hsum += res(i,j,k) * qh(in,jn,kn);
2898  }
2899  }
2900  } // end loop over stencils on h grid
2901 
2902  }
2903  }
2904  } // end loop over restricted (2h) grid
2905 #else
2906  qhRestricted.reset(0);
2907 #endif // !MSM_COMM_ONLY
2908 
2909 #ifdef MSM_TIMING
2910  stopTime = CkWallTimer();
2911  mgrLocal->msmTiming[MsmTimer::RESTRICT] += stopTime - startTime;
2912 #endif
2913 } // MsmBlockKernel<Vtype,Mtype>::restrictionKernel()
2914 
2915 
2916 #ifndef MSM_GRID_CUTOFF_DECOMP
2917 template <class Vtype, class Mtype>
2919 {
2920 #ifdef DEBUG_MSM_GRID
2921  printf("MsmBlockKernel level=%d, id=%d %d %d: grid cutoff\n",
2922  blockIndex.level, blockIndex.n.i, blockIndex.n.j, blockIndex.n.k);
2923 #endif
2924 #ifdef MSM_TIMING
2925  double startTime, stopTime;
2926  startTime = CkWallTimer();
2927 #endif
2928 #ifndef MSM_COMM_ONLY
2929  // need grid of weights for this level
2930  msm::Grid<Mtype>& gc = *gcWeights;
2931  // index range of weights
2932  int gia = gc.ia();
2933  int gib = gc.ib();
2934  int gja = gc.ja();
2935  int gjb = gc.jb();
2936  int gka = gc.ka();
2937  int gkb = gc.kb();
2938  // index range of charge grid
2939  int qia = qh.ia();
2940  int qib = qh.ib();
2941  int qja = qh.ja();
2942  int qjb = qh.jb();
2943  int qka = qh.ka();
2944  int qkb = qh.kb();
2945  // index range of potentials
2946  int ia = ehCutoff.ia();
2947  int ib = ehCutoff.ib();
2948  int ja = ehCutoff.ja();
2949  int jb = ehCutoff.jb();
2950  int ka = ehCutoff.ka();
2951  int kb = ehCutoff.kb();
2952  // reset grid
2953  ehCutoff.reset(0);
2954  // loop over potentials
2955  for (int k = ka; k <= kb; k++) {
2956  for (int j = ja; j <= jb; j++) {
2957  for (int i = ia; i <= ib; i++) {
2958  // clip charges to weights
2959  int mia = ( qia >= gia + i ? qia : gia + i );
2960  int mib = ( qib <= gib + i ? qib : gib + i );
2961  int mja = ( qja >= gja + j ? qja : gja + j );
2962  int mjb = ( qjb <= gjb + j ? qjb : gjb + j );
2963  int mka = ( qka >= gka + k ? qka : gka + k );
2964  int mkb = ( qkb <= gkb + k ? qkb : gkb + k );
2965  // accumulate sum to this eh point
2966  Vtype& ehsum = ehCutoff(i,j,k);
2967  // loop over smaller charge grid
2968  for (int qk = mka; qk <= mkb; qk++) {
2969  for (int qj = mja; qj <= mjb; qj++) {
2970  for (int qi = mia; qi <= mib; qi++) {
2971  ehsum += gc(qi-i, qj-j, qk-k) * qh(qi,qj,qk);
2972  }
2973  }
2974  } // end loop over smaller charge grid
2975 
2976  }
2977  }
2978  } // end loop over potentials
2979 #else
2980  ehCutoff.reset(0);
2981 #endif // !MSM_COMM_ONLY
2982 #ifdef MSM_TIMING
2983  stopTime = CkWallTimer();
2984  mgrLocal->msmTiming[MsmTimer::GRIDCUTOFF] += stopTime - startTime;
2985 #endif
2986 } // MsmBlockKernel<Vtype,Mtype>::gridCutoffKernel()
2987 #endif // MSM_GRID_CUTOFF_DECOMP
2988 
2989 
2990 template <class Vtype, class Mtype>
2992 {
2993 #ifdef DEBUG_MSM_GRID
2994  printf("MsmBlockKernel level=%d, id=%d %d %d: prolongation\n",
2995  blockIndex.level, blockIndex.n.i, blockIndex.n.j, blockIndex.n.k);
2996 #endif
2997 
2998 #ifdef MSM_TIMING
2999  double startTime, stopTime;
3000  startTime = CkWallTimer();
3001 #endif
3002 #ifndef MSM_COMM_ONLY
3003  // stencil data for approximating potential on prolongated grid
3004  const int approx = mgrLocal->approx;
3005  const int nstencil = ComputeMsmMgr::Nstencil[approx];
3006  const int *offset = ComputeMsmMgr::IndexOffset[approx];
3007  const msm::Grid<Mtype>& pro = *proStencil;
3008 
3009  // index range for prolongated h grid potentials
3010  int ia1 = ehProlongated.ia();
3011  int ib1 = ehProlongated.ib();
3012  int ja1 = ehProlongated.ja();
3013  int jb1 = ehProlongated.jb();
3014  int ka1 = ehProlongated.ka();
3015  int kb1 = ehProlongated.kb();
3016 
3017  // index range for 2h grid potentials
3018  int ia2 = eh.ia();
3019  int ib2 = eh.ib();
3020  int ja2 = eh.ja();
3021  int jb2 = eh.jb();
3022  int ka2 = eh.ka();
3023  int kb2 = eh.kb();
3024 
3025  // loop over 2h grid
3026  for (int k2 = ka2; k2 <= kb2; k2++) {
3027  int k1 = 2 * k2;
3028  for (int j2 = ja2; j2 <= jb2; j2++) {
3029  int j1 = 2 * j2;
3030  for (int i2 = ia2; i2 <= ib2; i2++) {
3031  int i1 = 2 * i2;
3032 
3033  // loop over stencils on prolongated h grid
3034  for (int k = 0; k < nstencil; k++) {
3035  int kn = k1 + offset[k];
3036  if (kn < ka1) continue;
3037  else if (kn > kb1) break;
3038 
3039  for (int j = 0; j < nstencil; j++) {
3040  int jn = j1 + offset[j];
3041  if (jn < ja1) continue;
3042  else if (jn > jb1) break;
3043 
3044  for (int i = 0; i < nstencil; i++) {
3045  int in = i1 + offset[i];
3046  if (in < ia1) continue;
3047  else if (in > ib1) break;
3048 
3049  ehProlongated(in,jn,kn) += pro(i,j,k) * eh(i2,j2,k2);
3050  }
3051  }
3052  } // end loop over stencils on prolongated h grid
3053 
3054  }
3055  }
3056  } // end loop over 2h grid
3057 #else
3058  ehProlongated.reset(0);
3059 #endif // !MSM_COMM_ONLY
3060 #ifdef MSM_TIMING
3061  stopTime = CkWallTimer();
3062  mgrLocal->msmTiming[MsmTimer::PROLONGATE] += stopTime - startTime;
3063 #endif
3064 } // MsmBlockKernel<Vtype,Mtype>::prolongationKernel()
3065 
3066 
3067 //
3068 // MsmBlock handles grids of function values only
3069 // (for cubic, quintic, etc., approximation)
3070 //
3071 class MsmBlock :
3072  public CBase_MsmBlock,
3073  public MsmBlockKernel<Float,Float>
3074 {
3075  public:
3076  CProxySection_MsmGridCutoff msmGridCutoffBroadcast;
3077  CProxySection_MsmGridCutoff msmGridCutoffReduction;
3078 
3079  MsmBlock(int level) :
3081  msm::BlockIndex(level,
3082  msm::Ivec(thisIndex.x, thisIndex.y, thisIndex.z))
3083  )
3084  {
3085 #ifndef MSM_GRID_CUTOFF_DECOMP
3086  setupStencils(&(map->grespro), &(map->grespro), &(map->gc[level]));
3087 #else
3088  setupStencils(&(map->grespro), &(map->grespro));
3089 #endif
3090  }
3091  MsmBlock(CkMigrateMessage *m) : MsmBlockKernel<Float,Float>(m) { }
3092 
3093  void setupSections();
3094 
3095  void sumReducedPotential(CkReductionMsg *msg) {
3096 #ifdef MSM_TIMING
3097  double startTime, stopTime;
3098  startTime = CkWallTimer();
3099 #endif
3100  msm::Grid<Float> ehfull;
3101  ehfull.init( msm::IndexRange(eh) );
3102  memcpy(ehfull.data().buffer(), msg->getData(), msg->getSize());
3103  delete msg;
3104  int priority = mgrLocal->nlevels
3105  + 2*(mgrLocal->nlevels - blockIndex.level)-1;
3106  int msgsz = ehfull.data().len() * sizeof(Float);
3107  GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
3108  SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
3109  gm->put(ehfull, blockIndex.level, sequence); // send my level
3110 #ifdef MSM_TIMING
3111  stopTime = CkWallTimer();
3112  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
3113 #endif
3114  addPotential(gm);
3115  }
3116 
3117  void addCharge(GridMsg *); // entry
3118 
3119  void restriction() {
3120  restrictionKernel();
3121  sendUpCharge();
3122  }
3123  void sendUpCharge();
3124  void gridCutoff();
3125 #ifndef MSM_GRID_CUTOFF_DECOMP
3126  void sendAcrossPotential();
3127 #endif
3128 
3129  void addPotential(GridMsg *); // entry
3130 
3131  void prolongation() {
3132  prolongationKernel();
3133  sendDownPotential();
3134  }
3135  void sendDownPotential();
3136  void sendPatch();
3137 }; // class MsmBlock
3138 
3139 
3141 {
3142 #ifdef DEBUG_MSM_GRID
3143  CkPrintf("LEVEL %d MSM BLOCK (%d,%d,%d): "
3144  "creating broadcast section on PE %d\n",
3145  blockIndex.level, thisIndex.x, thisIndex.y, thisIndex.z, CkMyPe());
3146 #endif
3147  std::vector<CkArrayIndex1D> elems;
3148  elems.reserve(bd->indexGridCutoff.len());
3149  for (int n = 0; n < bd->indexGridCutoff.len(); n++) {
3150  elems.emplace_back(bd->indexGridCutoff[n]);
3151  }
3152  msmGridCutoffBroadcast = CProxySection_MsmGridCutoff::ckNew(
3153  mgrLocal->msmGridCutoff, elems.data(), elems.size()
3154  );
3155  CProxy_CkMulticastMgr mcastProxy = CkpvAccess(BOCclass_group).multicastMgr;
3156  CkMulticastMgr *mcastPtr = CProxy_CkMulticastMgr(mcastProxy).ckLocalBranch();
3157  msmGridCutoffBroadcast.ckSectionDelegate(mcastPtr);
3158 
3159 #ifdef DEBUG_MSM_GRID
3160  char s[1024];
3161  sprintf(s, "LEVEL %d MSM BLOCK (%d,%d,%d): "
3162  "creating reduction section on PE %d\n",
3163  blockIndex.level, thisIndex.x, thisIndex.y, thisIndex.z, CkMyPe());
3164 #endif
3165  std::vector<CkArrayIndex1D> elems2;
3166  elems2.reserve(bd->recvGridCutoff.len());
3167 #ifdef DEBUG_MSM_GRID
3168  strcat(s, "receiving from MsmGridCutoff ID:");
3169 #endif
3170  for (int n = 0; n < bd->recvGridCutoff.len(); n++) {
3171 #ifdef DEBUG_MSM_GRID
3172  char t[20];
3173  sprintf(t, " %d", bd->recvGridCutoff[n]);
3174  strcat(s, t);
3175 #endif
3176  elems2.emplace_back(bd->recvGridCutoff[n]);
3177  }
3178 #ifdef DEBUG_MSM_GRID
3179  strcat(s, "\n");
3180  CkPrintf(s);
3181 #endif
3182  msmGridCutoffReduction = CProxySection_MsmGridCutoff::ckNew(
3183  mgrLocal->msmGridCutoff, elems2.data(), elems2.size()
3184  );
3185  msmGridCutoffReduction.ckSectionDelegate(mcastPtr);
3187  CProxyElement_MsmBlock thisElementProxy = thisProxy(thisIndex);
3188  msg->put(&thisElementProxy);
3189 
3190  msmGridCutoffReduction.setupSections(msg); // broadcast to entire section
3191 
3192  /* XXX alternatively, setup default reduction client
3193  *
3194  mcastPtr->setReductionClient(msmGridCutoffReduction,
3195  new CkCallback(CkIndex_MsmBlock::myReductionEntry(NULL),
3196  thisElementProxy));
3197  *
3198  */
3199 }
3200 
3201 
3203 {
3204 #ifdef MSM_TIMING
3205  double startTime, stopTime;
3206  startTime = CkWallTimer();
3207 #endif
3208  int pid;
3209  gm->get(subgrid, pid, sequence);
3210  delete gm;
3211  qh += subgrid;
3212 #ifdef MSM_TIMING
3213  stopTime = CkWallTimer();
3214  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
3215 #endif
3216  if (++cntRecvsCharge == bd->numRecvsCharge) {
3217  int nlevels = mgrLocal->numLevels();
3218  if (blockIndex.level < nlevels-1) {
3219  restriction();
3220  }
3221  gridCutoff();
3222  }
3223 } // MsmBlock::addCharge()
3224 
3225 
3227 {
3228 #ifdef MSM_TIMING
3229  double startTime, stopTime;
3230  startTime = CkWallTimer();
3231 #endif
3232  int lnext = blockIndex.level + 1;
3233  // buffer portions of grid to send to Blocks on next level
3234  for (int n = 0; n < bd->sendUp.len(); n++) {
3235  // initialize the proper subgrid indexing range
3236  subgrid.init( bd->sendUp[n].nrange );
3237  // extract the values from the larger grid into the subgrid
3238  qhRestricted.extract(subgrid);
3239  // translate the subgrid indexing range to match the MSM block
3240  subgrid.updateLower( bd->sendUp[n].nrange_wrap.lower() );
3241  // add the subgrid charges into the block
3242  msm::BlockIndex& bindex = bd->sendUp[n].nblock_wrap;
3243  ASSERT(bindex.level == lnext);
3244  // place subgrid into message
3245  // SET MESSAGE PRIORITY
3246  int msgsz = subgrid.nn() * sizeof(Float);
3247  GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
3248  SET_PRIORITY(gm, sequence, MSM_PRIORITY + lnext);
3249  gm->put(subgrid, blockIndex.level, sequence); // send my level
3250  // lookup in ComputeMsmMgr proxy array by level
3251  mgrLocal->msmBlock[lnext](
3252  bindex.n.i, bindex.n.j, bindex.n.k).addCharge(gm);
3253  } // for
3254 #ifdef MSM_TIMING
3255  stopTime = CkWallTimer();
3256  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
3257 #endif
3258 } // MsmBlock::sendUpCharge()
3259 
3260 
3262 {
3263 #ifdef DEBUG_MSM_GRID
3264  printf("MsmBlock level=%d, id=%d %d %d: grid cutoff\n",
3265  blockIndex.level, blockIndex.n.i, blockIndex.n.j, blockIndex.n.k);
3266 #endif
3267 #ifndef MSM_GRID_CUTOFF_DECOMP
3268  gridCutoffKernel();
3269  sendAcrossPotential();
3270 #else // MSM_GRID_CUTOFF_DECOMP
3271 
3272  // send charge block to MsmGridCutoff compute objects
3273 #ifdef MSM_TIMING
3274  double startTime, stopTime;
3275  startTime = CkWallTimer();
3276 #endif
3277  int priority = mgrLocal->nlevels + 2*(mgrLocal->nlevels - blockIndex.level)-1;
3278  int msgsz = qh.data().len() * sizeof(Float);
3279  int len = bd->indexGridCutoff.len();
3280 
3281 #if 0
3282  // send charge message to each MsmGridCutoff compute element in list
3283  for (int n = 0; n < len; n++) {
3284 #ifdef MSM_TIMING
3285  startTime = CkWallTimer();
3286 #endif
3287  int index = bd->indexGridCutoff[n];
3288  GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
3289  SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
3290  gm->put(qh, blockIndex.level, sequence); // send my level
3291 #ifdef MSM_TIMING
3292  stopTime = CkWallTimer();
3293  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
3294 #endif
3295  mgrLocal->msmGridCutoff[index].compute(gm);
3296  }
3297 #else
3298 
3299  // broadcast charge message to section
3300  GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
3301  SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
3302  gm->put(qh, blockIndex.level, sequence); // send my level
3303  msmGridCutoffBroadcast.compute(gm);
3304 #ifdef MSM_TIMING
3305  stopTime = CkWallTimer();
3306  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
3307 #endif
3308 
3309 #endif // 0
3310 
3311 #endif // MSM_GRID_CUTOFF_DECOMP
3312 
3313 } // MsmBlock::gridCutoff()
3314 
3315 
3316 #ifndef MSM_GRID_CUTOFF_DECOMP
3317 void MsmBlock::sendAcrossPotential()
3318 {
3319 #ifdef MSM_TIMING
3320  double startTime, stopTime;
3321  startTime = CkWallTimer();
3322 #endif
3323  int lnext = blockIndex.level;
3324  int priority = mgrLocal->nlevels + 2*(mgrLocal->nlevels - blockIndex.level)-1;
3325  // buffer portions of grid to send to Blocks on this level
3326  for (int n = 0; n < bd->sendAcross.len(); n++) {
3327  // initialize the proper subgrid indexing range
3328  subgrid.init( bd->sendAcross[n].nrange );
3329  // extract the values from the larger grid into the subgrid
3330  ehCutoff.extract(subgrid);
3331  // translate the subgrid indexing range to match the MSM block
3332  subgrid.updateLower( bd->sendAcross[n].nrange_wrap.lower() );
3333  // add the subgrid charges into the block
3334  msm::BlockIndex& bindex = bd->sendAcross[n].nblock_wrap;
3335  ASSERT(bindex.level == lnext);
3336  // place subgrid into message
3337  int msgsz = subgrid.nn() * sizeof(Float);
3338  GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
3339  SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
3340  gm->put(subgrid, blockIndex.level, sequence); // send my level
3341  // lookup in ComputeMsmMgr proxy array by level
3342  mgrLocal->msmBlock[lnext](
3343  bindex.n.i, bindex.n.j, bindex.n.k).addPotential(gm);
3344  } // for
3345 #ifdef MSM_TIMING
3346  stopTime = CkWallTimer();
3347  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
3348 #endif
3349 } // MsmBlock::sendAcrossPotential()
3350 #endif
3351 
3352 
3354 {
3355 #ifdef MSM_TIMING
3356  double startTime, stopTime;
3357  startTime = CkWallTimer();
3358 #endif
3359  int pid;
3360  int pseq;
3361  gm->get(subgrid, pid, pseq); // receive sender's level
3362  delete gm;
3363  eh += subgrid;
3364 #ifdef MSM_TIMING
3365  stopTime = CkWallTimer();
3366  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
3367 #endif
3368  if (++cntRecvsPotential == bd->numRecvsPotential) {
3369  if (blockIndex.level > 0) {
3370  prolongation();
3371  }
3372  else {
3373  sendPatch();
3374  }
3375  }
3376 } // MsmBlock::addPotential()
3377 
3378 
3380 {
3381 #ifdef MSM_TIMING
3382  double startTime, stopTime;
3383  startTime = CkWallTimer();
3384 #endif
3385  int lnext = blockIndex.level - 1;
3386  int priority = mgrLocal->nlevels + 2*(mgrLocal->nlevels - blockIndex.level);
3387  // buffer portions of grid to send to Blocks on next level
3388  for (int n = 0; n < bd->sendDown.len(); n++) {
3389  // initialize the proper subgrid indexing range
3390  subgrid.init( bd->sendDown[n].nrange );
3391  // extract the values from the larger grid into the subgrid
3392  ehProlongated.extract(subgrid);
3393  // translate the subgrid indexing range to match the MSM block
3394  subgrid.updateLower( bd->sendDown[n].nrange_wrap.lower() );
3395  // add the subgrid charges into the block
3396  msm::BlockIndex& bindex = bd->sendDown[n].nblock_wrap;
3397  ASSERT(bindex.level == lnext);
3398  // place subgrid into message
3399  int msgsz = subgrid.nn() * sizeof(Float);
3400  GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
3401  SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
3402  gm->put(subgrid, blockIndex.level, sequence); // send my level
3403  // lookup in ComputeMsmMgr proxy array by level
3404  mgrLocal->msmBlock[lnext](
3405  bindex.n.i, bindex.n.j, bindex.n.k).addPotential(gm);
3406  } // for
3407 #ifdef MSM_TIMING
3408  stopTime = CkWallTimer();
3409  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
3410  mgrLocal->doneTiming();
3411 #endif
3412  init(); // reinitialize for next computation
3413 } // MsmBlock::sendDownPotential()
3414 
3415 
3417 {
3418 #ifdef MSM_TIMING
3419  double startTime, stopTime;
3420  startTime = CkWallTimer();
3421 #endif
3422  int lnext = blockIndex.level;
3423  int priority = mgrLocal->nlevels + 2*(mgrLocal->nlevels - blockIndex.level);
3424  ASSERT(lnext == 0);
3425  // buffer portions of grid to send to Blocks on next level
3426  for (int n = 0; n < bd->sendPatch.len(); n++) {
3427  // initialize the proper subgrid indexing range
3428  subgrid.init( bd->sendPatch[n].nrange );
3429  // extract the values from the larger grid into the subgrid
3430  eh.extract(subgrid);
3431  // translate the subgrid indexing range to match the MSM block
3432  subgrid.updateLower( bd->sendPatch[n].nrange_unwrap.lower() );
3433  // add the subgrid charges into the block, need its patch ID
3434  int pid = bd->sendPatch[n].patchID;
3435  // place subgrid into message
3436  int msgsz = subgrid.nn() * sizeof(Float);
3437  GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
3438  SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
3439  gm->put(subgrid, pid, sequence); // send patch ID
3440  // lookup which PE has this patch
3441  PatchMap *pm = PatchMap::Object();
3442  int pe = pm->node(pid);
3443  mgrProxy[pe].addPotential(gm);
3444  }
3445 #ifdef MSM_TIMING
3446  stopTime = CkWallTimer();
3447  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
3448  mgrLocal->doneTiming();
3449 #endif
3450  init(); // reinitialize for next computation
3451 } // MsmBlock::sendPatch()
3452 
3453 
3454 //
3455 // MsmC1HermiteBlock handles grids of vector elements
3456 // for C1 Hermite approximation
3457 //
3459  public CBase_MsmC1HermiteBlock,
3460  public MsmBlockKernel<C1Vector,C1Matrix>
3461 {
3462  public:
3463  CProxySection_MsmC1HermiteGridCutoff msmGridCutoffBroadcast;
3464  CProxySection_MsmC1HermiteGridCutoff msmGridCutoffReduction;
3465 
3466  MsmC1HermiteBlock(int level) :
3468  msm::BlockIndex(level,
3469  msm::Ivec(thisIndex.x, thisIndex.y, thisIndex.z))
3470  )
3471  {
3472  int isfirstlevel = (level == 0);
3473  int istoplevel = (level == map->gridrange.len()-1);
3474  const msm::Grid<C1Matrix> *res =
3475  (istoplevel ? NULL : &(map->gres_c1hermite[level]));
3476  const msm::Grid<C1Matrix> *pro =
3477  (isfirstlevel ? NULL : &(map->gpro_c1hermite[level-1]));
3478 #ifndef MSM_GRID_CUTOFF_DECOMP
3479  const msm::Grid<C1Matrix> *gc = &(map->gc_c1hermite[level]);
3480  setupStencils(res, pro, gc);
3481 #else
3482  setupStencils(res, pro);
3483 #endif
3484  }
3485  MsmC1HermiteBlock(CkMigrateMessage *m) :
3487 
3488  void setupSections();
3489 
3490  void sumReducedPotential(CkReductionMsg *msg) {
3491 #ifdef MSM_TIMING
3492  double startTime, stopTime;
3493  startTime = CkWallTimer();
3494 #endif
3495  msm::Grid<C1Vector> ehfull;
3496  ehfull.init( msm::IndexRange(eh) );
3497  memcpy(ehfull.data().buffer(), msg->getData(), msg->getSize());
3498  delete msg;
3499  int priority = mgrLocal->nlevels
3500  + 2*(mgrLocal->nlevels - blockIndex.level)-1;
3501  int msgsz = ehfull.data().len() * sizeof(C1Vector);
3502  GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
3503  SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
3504  gm->put(ehfull, blockIndex.level, sequence); // send my level
3505 #ifdef MSM_TIMING
3506  stopTime = CkWallTimer();
3507  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
3508 #endif
3509  addPotential(gm);
3510  }
3511 
3512  void addCharge(GridMsg *); // entry
3513 
3514  void restriction() {
3515  restrictionKernel();
3516  sendUpCharge();
3517  }
3518  void sendUpCharge();
3519  void gridCutoff();
3520 #ifndef MSM_GRID_CUTOFF_DECOMP
3521  void sendAcrossPotential();
3522 #endif
3523 
3524  void addPotential(GridMsg *); // entry
3525 
3526  void prolongation() {
3527  prolongationKernel();
3528  sendDownPotential();
3529  }
3530  void sendDownPotential();
3531  void sendPatch();
3532 }; // class MsmC1HermiteBlock
3533 
3534 
3536 {
3537 #ifdef DEBUG_MSM_GRID
3538  CkPrintf("LEVEL %d MSM C1 HERMITE BLOCK (%d,%d,%d): "
3539  "creating broadcast section on PE %d\n",
3540  blockIndex.level, thisIndex.x, thisIndex.y, thisIndex.z, CkMyPe());
3541 #endif
3542  std::vector<CkArrayIndex1D> elems;
3543  elems.reserve(bd->indexGridCutoff.len());
3544  for (int n = 0; n < bd->indexGridCutoff.len(); n++) {
3545  elems.emplace_back(bd->indexGridCutoff[n]);
3546  }
3547  msmGridCutoffBroadcast = CProxySection_MsmC1HermiteGridCutoff::ckNew(
3548  mgrLocal->msmC1HermiteGridCutoff, elems.data(), elems.size()
3549  );
3550  CProxy_CkMulticastMgr mcastProxy = CkpvAccess(BOCclass_group).multicastMgr;
3551  CkMulticastMgr *mcastPtr = CProxy_CkMulticastMgr(mcastProxy).ckLocalBranch();
3552  msmGridCutoffBroadcast.ckSectionDelegate(mcastPtr);
3553 
3554 #ifdef DEBUG_MSM_GRID
3555  char s[1024];
3556  sprintf(s, "LEVEL %d MSM C1 HERMITE BLOCK (%d,%d,%d): "
3557  "creating reduction section on PE %d\n",
3558  blockIndex.level, thisIndex.x, thisIndex.y, thisIndex.z, CkMyPe());
3559 #endif
3560  std::vector<CkArrayIndex1D> elems2;
3561  elems2.reserve(bd->recvGridCutoff.len());
3562 #ifdef DEBUG_MSM_GRID
3563  strcat(s, "receiving from MsmC1HermiteGridCutoff ID:");
3564 #endif
3565  for (int n = 0; n < bd->recvGridCutoff.len(); n++) {
3566 #ifdef DEBUG_MSM_GRID
3567  char t[20];
3568  sprintf(t, " %d", bd->recvGridCutoff[n]);
3569  strcat(s, t);
3570 #endif
3571  elems2.emplace_back(bd->recvGridCutoff[n]);
3572  }
3573 #ifdef DEBUG_MSM_GRID
3574  strcat(s, "\n");
3575  CkPrintf(s);
3576 #endif
3577  msmGridCutoffReduction = CProxySection_MsmC1HermiteGridCutoff::ckNew(
3578  mgrLocal->msmC1HermiteGridCutoff, elems2.data(), elems2.size()
3579  );
3580  msmGridCutoffReduction.ckSectionDelegate(mcastPtr);
3582  CProxyElement_MsmC1HermiteBlock thisElementProxy = thisProxy(thisIndex);
3583  msg->put(&thisElementProxy);
3584 
3585  msmGridCutoffReduction.setupSections(msg); // broadcast to entire section
3586 
3587  /* XXX alternatively, setup default reduction client
3588  *
3589  mcastPtr->setReductionClient(msmGridCutoffReduction,
3590  new CkCallback(CkIndex_MsmC1HermiteBlock::myReductionEntry(NULL),
3591  thisElementProxy));
3592  *
3593  */
3594 } // MsmC1HermiteBlock::setupSections()
3595 
3596 
3598 {
3599 #ifdef MSM_TIMING
3600  double startTime, stopTime;
3601  startTime = CkWallTimer();
3602 #endif
3603  int pid;
3604  gm->get(subgrid, pid, sequence);
3605  delete gm;
3606  qh += subgrid;
3607 #ifdef MSM_TIMING
3608  stopTime = CkWallTimer();
3609  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
3610 #endif
3611  if (++cntRecvsCharge == bd->numRecvsCharge) {
3612  int nlevels = mgrLocal->numLevels();
3613  if (blockIndex.level < nlevels-1) {
3614  restriction();
3615  }
3616  gridCutoff();
3617  }
3618 } // MsmC1HermiteBlock::addCharge()
3619 
3620 
3622 {
3623 #ifdef MSM_TIMING
3624  double startTime, stopTime;
3625  startTime = CkWallTimer();
3626 #endif
3627  int lnext = blockIndex.level + 1;
3628  // buffer portions of grid to send to Blocks on next level
3629  for (int n = 0; n < bd->sendUp.len(); n++) {
3630  // initialize the proper subgrid indexing range
3631  subgrid.init( bd->sendUp[n].nrange );
3632  // extract the values from the larger grid into the subgrid
3633  qhRestricted.extract(subgrid);
3634  // translate the subgrid indexing range to match the MSM block
3635  subgrid.updateLower( bd->sendUp[n].nrange_wrap.lower() );
3636  // add the subgrid charges into the block
3637  msm::BlockIndex& bindex = bd->sendUp[n].nblock_wrap;
3638  ASSERT(bindex.level == lnext);
3639  // place subgrid into message
3640  // SET MESSAGE PRIORITY
3641  int msgsz = subgrid.nn() * sizeof(C1Vector);
3642  GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
3643  SET_PRIORITY(gm, sequence, MSM_PRIORITY + lnext);
3644  gm->put(subgrid, blockIndex.level, sequence); // send my level
3645  // lookup in ComputeMsmMgr proxy array by level
3646  mgrLocal->msmC1HermiteBlock[lnext](
3647  bindex.n.i, bindex.n.j, bindex.n.k).addCharge(gm);
3648  } // for
3649 #ifdef MSM_TIMING
3650  stopTime = CkWallTimer();
3651  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
3652 #endif
3653 } // MsmC1HermiteBlock::sendUpCharge()
3654 
3655 
3657 {
3658 #ifdef DEBUG_MSM_GRID
3659  printf("MsmC1HermiteBlock level=%d, id=%d %d %d: grid cutoff\n",
3660  blockIndex.level, blockIndex.n.i, blockIndex.n.j, blockIndex.n.k);
3661 #endif
3662 #ifndef MSM_GRID_CUTOFF_DECOMP
3663  gridCutoffKernel();
3664  sendAcrossPotential();
3665 #else // MSM_GRID_CUTOFF_DECOMP
3666 
3667  // send charge block to MsmGridCutoff compute objects
3668 #ifdef MSM_TIMING
3669  double startTime, stopTime;
3670  startTime = CkWallTimer();
3671 #endif
3672  int priority = mgrLocal->nlevels + 2*(mgrLocal->nlevels - blockIndex.level)-1;
3673  int msgsz = qh.data().len() * sizeof(C1Vector);
3674  int len = bd->indexGridCutoff.len();
3675 
3676 #if 0
3677  // send charge message to each MsmGridCutoff compute element in list
3678  for (int n = 0; n < len; n++) {
3679 #ifdef MSM_TIMING
3680  startTime = CkWallTimer();
3681 #endif
3682  int index = bd->indexGridCutoff[n];
3683  GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
3684  SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
3685  gm->put(qh, blockIndex.level, sequence); // send my level
3686 #ifdef MSM_TIMING
3687  stopTime = CkWallTimer();
3688  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
3689 #endif
3690  mgrLocal->msmGridCutoff[index].compute(gm);
3691  }
3692 #else
3693 
3694  // broadcast charge message to section
3695  GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
3696  SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
3697  gm->put(qh, blockIndex.level, sequence); // send my level
3698  msmGridCutoffBroadcast.compute(gm);
3699 #ifdef MSM_TIMING
3700  stopTime = CkWallTimer();
3701  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
3702 #endif
3703 
3704 #endif // 0
3705 
3706 #endif // MSM_GRID_CUTOFF_DECOMP
3707 
3708 } // MsmC1HermiteBlock::gridCutoff()
3709 
3710 
3711 #ifndef MSM_GRID_CUTOFF_DECOMP
3712 void MsmC1HermiteBlock::sendAcrossPotential()
3713 {
3714 #ifdef MSM_TIMING
3715  double startTime, stopTime;
3716  startTime = CkWallTimer();
3717 #endif
3718  int lnext = blockIndex.level;
3719  int priority = mgrLocal->nlevels + 2*(mgrLocal->nlevels - blockIndex.level)-1;
3720  // buffer portions of grid to send to Blocks on this level
3721  for (int n = 0; n < bd->sendAcross.len(); n++) {
3722  // initialize the proper subgrid indexing range
3723  subgrid.init( bd->sendAcross[n].nrange );
3724  // extract the values from the larger grid into the subgrid
3725  ehCutoff.extract(subgrid);
3726  // translate the subgrid indexing range to match the MSM block
3727  subgrid.updateLower( bd->sendAcross[n].nrange_wrap.lower() );
3728  // add the subgrid charges into the block
3729  msm::BlockIndex& bindex = bd->sendAcross[n].nblock_wrap;
3730  ASSERT(bindex.level == lnext);
3731  // place subgrid into message
3732  int msgsz = subgrid.nn() * sizeof(C1Vector);
3733  GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
3734  SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
3735  gm->put(subgrid, blockIndex.level, sequence); // send my level
3736  // lookup in ComputeMsmMgr proxy array by level
3737  mgrLocal->msmC1HermiteBlock[lnext](
3738  bindex.n.i, bindex.n.j, bindex.n.k).addPotential(gm);
3739  } // for
3740 #ifdef MSM_TIMING
3741  stopTime = CkWallTimer();
3742  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
3743 #endif
3744 } // MsmC1HermiteBlock::sendAcrossPotential()
3745 #endif
3746 
3747 
3749 {
3750 #ifdef MSM_TIMING
3751  double startTime, stopTime;
3752  startTime = CkWallTimer();
3753 #endif
3754  int pid;
3755  int pseq;
3756  gm->get(subgrid, pid, pseq); // receive sender's level
3757  delete gm;
3758  eh += subgrid;
3759 #ifdef MSM_TIMING
3760  stopTime = CkWallTimer();
3761  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
3762 #endif
3763  if (++cntRecvsPotential == bd->numRecvsPotential) {
3764  if (blockIndex.level > 0) {
3765  prolongation();
3766  }
3767  else {
3768  sendPatch();
3769  }
3770  }
3771 } // MsmC1HermiteBlock::addPotential()
3772 
3773 
3775 {
3776 #ifdef MSM_TIMING
3777  double startTime, stopTime;
3778  startTime = CkWallTimer();
3779 #endif
3780  int lnext = blockIndex.level - 1;
3781  int priority = mgrLocal->nlevels + 2*(mgrLocal->nlevels - blockIndex.level);
3782  // buffer portions of grid to send to Blocks on next level
3783  for (int n = 0; n < bd->sendDown.len(); n++) {
3784  // initialize the proper subgrid indexing range
3785  subgrid.init( bd->sendDown[n].nrange );
3786  // extract the values from the larger grid into the subgrid
3787  ehProlongated.extract(subgrid);
3788  // translate the subgrid indexing range to match the MSM block
3789  subgrid.updateLower( bd->sendDown[n].nrange_wrap.lower() );
3790  // add the subgrid charges into the block
3791  msm::BlockIndex& bindex = bd->sendDown[n].nblock_wrap;
3792  ASSERT(bindex.level == lnext);
3793  // place subgrid into message
3794  int msgsz = subgrid.nn() * sizeof(C1Vector);
3795  GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
3796  SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
3797  gm->put(subgrid, blockIndex.level, sequence); // send my level
3798  // lookup in ComputeMsmMgr proxy array by level
3799  mgrLocal->msmC1HermiteBlock[lnext](
3800  bindex.n.i, bindex.n.j, bindex.n.k).addPotential(gm);
3801  } // for
3802 #ifdef MSM_TIMING
3803  stopTime = CkWallTimer();
3804  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
3805  mgrLocal->doneTiming();
3806 #endif
3807  init(); // reinitialize for next computation
3808 } // MsmC1HermiteBlock::sendDownPotential()
3809 
3810 
3812 {
3813 #ifdef MSM_TIMING
3814  double startTime, stopTime;
3815  startTime = CkWallTimer();
3816 #endif
3817  int lnext = blockIndex.level;
3818  int priority = mgrLocal->nlevels + 2*(mgrLocal->nlevels - blockIndex.level);
3819  ASSERT(lnext == 0);
3820  // buffer portions of grid to send to Blocks on next level
3821  for (int n = 0; n < bd->sendPatch.len(); n++) {
3822  // initialize the proper subgrid indexing range
3823  subgrid.init( bd->sendPatch[n].nrange );
3824  // extract the values from the larger grid into the subgrid
3825  eh.extract(subgrid);
3826  // translate the subgrid indexing range to match the MSM block
3827  subgrid.updateLower( bd->sendPatch[n].nrange_unwrap.lower() );
3828  // add the subgrid charges into the block, need its patch ID
3829  int pid = bd->sendPatch[n].patchID;
3830  // place subgrid into message
3831  int msgsz = subgrid.nn() * sizeof(C1Vector);
3832  GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
3833  SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
3834  gm->put(subgrid, pid, sequence); // send patch ID
3835  // lookup which PE has this patch
3836  PatchMap *pm = PatchMap::Object();
3837  int pe = pm->node(pid);
3838  mgrProxy[pe].addPotential(gm);
3839  }
3840 #ifdef MSM_TIMING
3841  stopTime = CkWallTimer();
3842  mgrLocal->msmTiming[MsmTimer::COMM] += stopTime - startTime;
3843  mgrLocal->doneTiming();
3844 #endif
3845  init(); // reinitialize for next computation
3846 } // MsmC1HermiteBlock::sendPatch()
3847 
3848 
3849 // MsmBlock
3850 //
3852 
3853 
3855  msmProxy(thisgroup), msmCompute(0)
3856 {
3857 #ifdef DEBUG_MSM_VERBOSE
3858  printf("ComputeMsmMgr: (constructor) PE %d\n", CkMyPe());
3859 #endif
3860  CkpvAccess(BOCclass_group).computeMsmMgr = thisgroup;
3861 
3862 #ifdef MSM_TIMING
3863  if (CkMyPe() == 0) {
3864  msmTimer = CProxy_MsmTimer::ckNew();
3865  }
3866  initTiming();
3867 #endif
3868 #ifdef MSM_PROFILING
3869  if (CkMyPe() == 0) {
3870  msmProfiler = CProxy_MsmProfiler::ckNew();
3871  }
3872  initProfiling();
3873 #endif
3874 }
3875 
3877 {
3878 #ifdef DEBUG_MSM_VERBOSE
3879  printf("ComputeMsmMgr: (destructor) PE %d\n", CkMyPe());
3880 #endif
3881  // free memory?
3882 }
3883 
3884 
3885 //
3886 // Given basis vector length "len" (and with user grid spacing)
3887 // If using periodic boundary conditions along this basis vector,
3888 // h is calculated to be close to desired grid spacing such that
3889 // nn = 2^k or 3*2^k. For non-periodic boundaries, we can set h
3890 // to the desired grid spacing, and set ia and ib to pad 1/2 the
3891 // interpolating stencil width.
3892 //
3894  int& ia, int& ib, int isperiodic)
3895 {
3896  ASSERT(gridspacing > 0);
3897  if (isperiodic) {
3898  const BigReal hmin = (4./5) * gridspacing;
3899  const BigReal hmax = 1.5 * hmin;
3900  hh = len;
3901  nn = 1; // start with one grid point across length
3902  while (hh >= hmax) {
3903  hh *= 0.5; // halve spacing and double grid points
3904  nn <<= 1;
3905  }
3906  if (hh < hmin) {
3907  if (nn < 4) {
3908  NAMD_die("Basis vector is too short or MSM grid spacing is too large");
3909  }
3910  hh *= (4./3); // scale hh by 4/3 and nn by 3/4
3911  nn >>= 2;
3912  nn *= 3;
3913  }
3914  // now we have: hmin <= h < hmax,
3915  // where nn is a power of 2 times no more than one power of 3
3916  ia = 0;
3917  ib = nn-1;
3918  }
3919  else {
3920  hh = gridspacing;
3921  // Instead of "nn = (int) ceil(len / hh);"
3922  // len is divisible by hh, up to roundoff error, so round to closest nn
3923  nn = (int) floor(len/hh + 0.5);
3924  ia = -s_edge;
3925  ib = nn + s_edge;
3926  }
3927 } // ComputeMsmMgr::setup_hgrid_1d()
3928 
3929 
3930 // make sure that block sizes divide evenly into periodic dimensions
3931 // call only for periodic dimensions
3933 {
3934  if (n % bsize != 0) {
3935  // n is either 2^k or 3*2^k
3936  int newbsize = 1;
3937  if (n % 3 == 0) newbsize = 3;
3938  while (newbsize < bsize && newbsize < n) newbsize *= 2;
3939  if (bsize < newbsize) newbsize /= 2;
3940  if (n % newbsize != 0) {
3941  NAMD_die("MSM grid size for periodic dimensions must be "
3942  "a power of 2 times at most one power of 3");
3943  }
3944  bsize = newbsize;
3945  }
3946  return;
3947 }
3948 
3949 
3950 //
3951 // This is the major routine that sets everything up for MSM based on
3952 // 1. cell basis vectors and/or max and min coordinates plus padding
3953 // 2. cutoff and MSM-related parameters from SimParameter
3954 // Includes determining grid spacings along periodic dimensions,
3955 // determining grid dimensions and number of levels for system,
3956 // then calculating all needed coefficients for grid cutoff part
3957 // and grid transfer parts (restriction and prolongation).
3958 //
3959 // Then sets up Map for parallel decomposition based on
3960 // MSM block size parameters from SimParameter.
3961 //
3962 // Then determines chare array element placement of MsmBlock and
3963 // MsmGridCutoff arrays based on number of PEs and number of nodes.
3964 //
3965 // Then allocates (on PE 0) MsmBlock (3D chare arrays, one per level)
3966 // and MsmGridCutoff (one 1D chare array for all block-block interactions)
3967 // and then broadcasts array proxies across group.
3968 //
3970 {
3971 #ifdef DEBUG_MSM_VERBOSE
3972  printf("ComputeMsmMgr: initialize() PE %d\n", CkMyPe());
3973 #endif
3974 
3975  smin = msg->smin;
3976  smax = msg->smax;
3977  delete msg;
3978 
3979 #if 0
3980  printf("PE%d: initializing MSM\n", CkMyPe());
3981 #endif
3982 
3984 
3985  // get required sim params, check validity
3986  lattice = simParams->lattice;
3987 
3988  // set user-defined extent of system
3989  Vector rmin(simParams->MSMxmin, simParams->MSMymin, simParams->MSMzmin);
3990  Vector rmax(simParams->MSMxmax, simParams->MSMymax, simParams->MSMzmax);
3991  Vector sdmin = lattice.scale(rmin);
3992  Vector sdmax = lattice.scale(rmax);
3993  // swap coordinates between min and max to correct for possible rotation
3994  if (sdmin.x > sdmax.x) { double t=sdmin.x; sdmin.x=sdmax.x; sdmax.x=t; }
3995  if (sdmin.y > sdmax.y) { double t=sdmin.y; sdmin.y=sdmax.y; sdmax.y=t; }
3996  if (sdmin.z > sdmax.z) { double t=sdmin.z; sdmin.z=sdmax.z; sdmax.z=t; }
3997  // extend smin, smax by user-defined extent, where appropriate
3998  if ( ! lattice.a_p() && (sdmin.x != 0 || sdmax.x != 0)) {
3999  if (sdmin.x < smin.x) {
4000  smin.x = sdmin.x;
4001  if (CkMyPe() == 0) {
4002  iout << iINFO << "MSM extending minimum X to "
4003  << simParams->MSMxmin << " A\n" << endi;
4004  }
4005  }
4006  if (sdmax.x > smax.x) {
4007  smax.x = sdmax.x;
4008  if (CkMyPe() == 0) {
4009  iout << iINFO << "MSM extending maximum X to "
4010  << simParams->MSMxmax << " A\n" << endi;
4011  }
4012  }
4013  }
4014  if ( ! lattice.b_p() && (sdmin.y != 0 || sdmax.y != 0)) {
4015  if (sdmin.y < smin.y) {
4016  smin.y = sdmin.y;
4017  if (CkMyPe() == 0) {
4018  iout << iINFO << "MSM extending minimum Y to "
4019  << simParams->MSMymin << " A\n" << endi;
4020  }
4021  }
4022  if (sdmax.y > smax.y) {
4023  smax.y = sdmax.y;
4024  if (CkMyPe() == 0) {
4025  iout << iINFO << "MSM extending maximum Y to "
4026  << simParams->MSMymax << " A\n" << endi;
4027  }
4028  }
4029  }
4030  if ( ! lattice.c_p() && (sdmin.z != 0 || sdmax.z != 0)) {
4031  if (sdmin.z < smin.z) {
4032  smin.z = sdmin.z;
4033  if (CkMyPe() == 0) {
4034  iout << iINFO << "MSM extending minimum Z to "
4035  << simParams->MSMzmin << " A\n" << endi;
4036  }
4037  }
4038  if (sdmax.z > smax.z) {
4039  smax.z = sdmax.z;
4040  if (CkMyPe() == 0) {
4041  iout << iINFO << "MSM extending maximum Z to "
4042  << simParams->MSMzmax << " A\n" << endi;
4043  }
4044  }
4045  }
4046 
4047 #ifdef DEBUG_MSM_VERBOSE
4048  printf("smin = %g %g %g smax = %g %g %g\n",
4049  smin.x, smin.y, smin.z, smax.x, smax.y, smax.z);
4050 #endif
4051 
4052  approx = simParams->MSMApprox;
4053  if (approx < 0 || approx >= NUM_APPROX) {
4054  NAMD_die("MSM: unknown approximation requested (MSMApprox)");
4055  }
4056 
4057  split = simParams->MSMSplit;
4058  if (split < 0 || split >= NUM_SPLIT) {
4059  NAMD_die("MSM: unknown splitting requested (MSMSplit)");
4060  }
4061 
4062  if (CkMyPe() == 0) {
4063  const char *approx_str, *split_str;
4064  switch (approx) {
4065  case CUBIC: approx_str = "C1 cubic"; break;
4066  case QUINTIC: approx_str = "C1 quintic"; break;
4067  case QUINTIC2: approx_str = "C2 quintic"; break;
4068  case SEPTIC: approx_str = "C1 septic"; break;
4069  case SEPTIC3: approx_str = "C3 septic"; break;
4070  case NONIC: approx_str = "C1 nonic"; break;
4071  case NONIC4: approx_str = "C4 nonic"; break;
4072  case C1HERMITE: approx_str = "C1 Hermite"; break;
4073  default: approx_str = "unknown"; break;
4074  }
4075  switch (split) {
4076  case TAYLOR2: split_str = "C2 Taylor"; break;
4077  case TAYLOR3: split_str = "C3 Taylor"; break;
4078  case TAYLOR4: split_str = "C4 Taylor"; break;
4079  case TAYLOR5: split_str = "C5 Taylor"; break;
4080  case TAYLOR6: split_str = "C6 Taylor"; break;
4081  case TAYLOR7: split_str = "C7 Taylor"; break;
4082  case TAYLOR8: split_str = "C8 Taylor"; break;
4083  default: split_str = "unknown"; break;
4084  }
4085  iout << iINFO << "MSM using "
4086  << approx_str << " interpolation\n";
4087  iout << iINFO << "MSM using "
4088  << split_str << " splitting function\n";
4089  }
4090 
4091  a = simParams->cutoff;
4092 
4093  if (approx == C1HERMITE) {
4094  gridScalingFactor = 2;
4095  }
4096  else {
4097  gridScalingFactor = 1;
4098  }
4099 
4101  if (gridspacing <= 0) {
4102  NAMD_die("MSM: grid spacing must be greater than 0 (MSMGridSpacing)");
4103  }
4104  else if (gridspacing >= a) {
4105  NAMD_die("MSM: grid spacing must be less than cutoff (MSMGridSpacing)");
4106  }
4107 
4108  padding = gridScalingFactor * simParams->MSMPadding;
4109  if (padding < 0) {
4110  NAMD_die("MSM: padding must be non-negative (MSMPadding)");
4111  }
4112 
4113  // set maximum number of levels (default 0 adapts levels to system)
4114  nlevels = simParams->MSMLevels;
4115 
4116  // XXX dispersion unused for now
4117  dispersion = 0;
4118  if ( ! dispersion && split >= TAYLOR2_DISP) {
4119  NAMD_die("MSM: requested splitting for long-range dispersion "
4120  "(not implemented)");
4121  }
4122 
4123  // set block sizes for grid decomposition
4124  int bsx = simParams->MSMBlockSizeX / int(gridScalingFactor);
4125  int bsy = simParams->MSMBlockSizeY / int(gridScalingFactor);
4126  int bsz = simParams->MSMBlockSizeZ / int(gridScalingFactor);
4127  if (bsx <= 0 || bsy <= 0 || bsz <= 0) {
4128  NAMD_die("MSM: invalid block size requested (MSMBlockSize[XYZ])");
4129  }
4130 #ifdef MSM_FIXED_SIZE_GRID_MSG
4131  else if (bsx * bsy * bsz > MSM_MAX_BLOCK_VOLUME) {
4132  NAMD_die("MSM: requested block size (MSMBlockSize[XYZ]) too big");
4133  }
4134 #endif
4135  if (CkMyPe() == 0) {
4136  iout << iINFO << "MSM block size decomposition along X is "
4137  << bsx << " grid points\n";
4138  iout << iINFO << "MSM block size decomposition along Y is "
4139  << bsy << " grid points\n";
4140  iout << iINFO << "MSM block size decomposition along Z is "
4141  << bsz << " grid points\n";
4142  }
4143 
4144  s_edge = (PolyDegree[approx] - 1) / 2; // stencil edge size
4145  omega = 2 * PolyDegree[approx]; // smallest non-periodic grid length
4146 
4147  BigReal xlen, ylen, zlen;
4148  Vector sgmin, sgmax; // grid min and max, in scaled coordinates
4149  int ispx = lattice.a_p();
4150  int ispy = lattice.b_p();
4151  int ispz = lattice.c_p();
4152  int ispany = (ispx || ispy || ispz); // is there any periodicity?
4153 
4154  if (ispx) { // periodic along basis vector
4155  xlen = lattice.a().length();
4156  sgmax.x = 0.5;
4157  sgmin.x = -0.5;
4158  }
4159  else { // non-periodic
4160  sgmax.x = smax.x + padding; // pad the edges
4161  sgmin.x = smin.x - padding;
4162  ASSERT(gridspacing > 0);
4163  // restrict center to be on a grid point
4164  BigReal mupper = ceil(sgmax.x / (2*gridspacing));
4165  BigReal mlower = floor(sgmin.x / (2*gridspacing));
4166  sgmax.x = 2*gridspacing*mupper;
4167  sgmin.x = 2*gridspacing*mlower;
4168  xlen = sgmax.x - sgmin.x;
4169  }
4170 #ifdef DEBUG_MSM_VERBOSE
4171  printf("xlen = %g sgmin.x = %g sgmax.x = %g\n", xlen, sgmin.x, sgmax.x);
4172 #endif
4173 
4174  if (ispy) { // periodic along basis vector
4175  ylen = lattice.b().length();
4176  sgmax.y = 0.5;
4177  sgmin.y = -0.5;
4178  }
4179  else { // non-periodic
4180  sgmax.y = smax.y + padding; // pad the edges
4181  sgmin.y = smin.y - padding;
4182  ASSERT(gridspacing > 0);
4183  // restrict center to be on a grid point
4184  BigReal mupper = ceil(sgmax.y / (2*gridspacing));
4185  BigReal mlower = floor(sgmin.y / (2*gridspacing));
4186  sgmax.y = 2*gridspacing*mupper;
4187  sgmin.y = 2*gridspacing*mlower;
4188  ylen = sgmax.y - sgmin.y;
4189  }
4190 #ifdef DEBUG_MSM_VERBOSE
4191  printf("ylen = %g sgmin.y = %g sgmax.y = %g\n", ylen, sgmin.y, sgmax.y);
4192 #endif
4193 
4194  if (ispz) { // periodic along basis vector
4195  zlen = lattice.c().length();
4196  sgmax.z = 0.5;
4197  sgmin.z = -0.5;
4198  }
4199  else { // non-periodic
4200  sgmax.z = smax.z + padding; // pad the edges
4201  sgmin.z = smin.z - padding;
4202  ASSERT(gridspacing > 0);
4203  // restrict center to be on a grid point
4204  BigReal mupper = ceil(sgmax.z / (2*gridspacing));
4205  BigReal mlower = floor(sgmin.z / (2*gridspacing));
4206  sgmax.z = 2*gridspacing*mupper;
4207  sgmin.z = 2*gridspacing*mlower;
4208  zlen = sgmax.z - sgmin.z;
4209  }
4210 #ifdef DEBUG_MSM_VERBOSE
4211  printf("zlen = %g sgmin.z = %g sgmax.z = %g\n", zlen, sgmin.z, sgmax.z);
4212 #endif
4213  sglower = sgmin;
4214 
4215  int ia, ib, ja, jb, ka, kb;
4216  setup_hgrid_1d(xlen, hxlen, nhx, ia, ib, ispx);
4217  setup_hgrid_1d(ylen, hylen, nhy, ja, jb, ispy);
4218  setup_hgrid_1d(zlen, hzlen, nhz, ka, kb, ispz);
4219  hxlen_1 = 1 / hxlen;
4220  hylen_1 = 1 / hylen;
4221  hzlen_1 = 1 / hzlen;
4222  if (CkMyPe() == 0) {
4223  if (ispx || ispy || ispz) {
4224  iout << iINFO << "MSM grid spacing along X is "<< hxlen << " A\n";
4225  iout << iINFO << "MSM grid spacing along Y is "<< hylen << " A\n";
4226  iout << iINFO << "MSM grid spacing along Z is "<< hzlen << " A\n";
4227  }
4228  else {
4229  iout << iINFO << "MSM grid spacing is " << gridspacing << " A\n";
4230  }
4231  if ( ! ispx || ! ispy || ! ispz ) {
4232  iout << iINFO<<"MSM non-periodic padding is "<< padding << " A\n";
4233  }
4234  }
4235 
4236  int ni = ib - ia + 1;
4237  int nj = jb - ja + 1;
4238  int nk = kb - ka + 1;
4239  int n;
4240 
4241 #if 0
4242  // reserve temp space for factored grid transfer operation
4243  n = (nk > omega ? nk : omega); // row along z-dimension
4244  lzd.resize(n);
4245  n *= (nj > omega ? nj : omega); // plane along yz-dimensions
4246  lyzd.resize(n);
4247 #endif
4248 
4249  int lastnelems = 1;
4250  int smallestnbox = 1;
4251  int isclamped = 0;
4252  int maxlevels = nlevels; // user-defined number of levels
4253 
4254 #ifdef DEBUG_MSM_VERBOSE
4255  printf("maxlevels = %d\n", maxlevels);
4256 #endif
4257  if (nlevels <= 0) { // instead we set number of levels
4258  n = ni;
4259  if (n < nj) n = nj;
4260  if (n < nk) n = nk;
4261  for (maxlevels = 1; n > 0; n >>= 1) maxlevels++;
4262  if (ispany == 0) { // no periodicity
4263  // use rule of thumb 3/4 diameter of grid cutoff sphere
4264  int ngci = (int) ceil(3*a / hxlen) - 1;
4265  int ngcj = (int) ceil(3*a / hylen) - 1;
4266  int ngck = (int) ceil(3*a / hzlen) - 1;
4267  int omega3 = omega * omega * omega;
4268  int nhalf = (int) sqrt((double)ni * nj * nk);
4269  lastnelems = (nhalf > omega3 ? nhalf : omega3);
4270  smallestnbox = ngci * ngcj * ngck; // smaller grids don't reduce work
4271  isclamped = 1;
4272  }
4273  }
4274 #ifdef DEBUG_MSM_VERBOSE
4275  printf("maxlevels = %d\n", maxlevels);
4276 #endif
4277 
4278  // allocate space for storing grid dimensions for each level
4279  map.gridrange.resize(maxlevels);
4280 
4281  // set periodicity flags
4282  map.ispx = ispx;
4283  map.ispy = ispy;
4284  map.ispz = ispz;
4285 
4286  int level = 0;
4287  int done = 0;
4288  int alldone = 0;
4289  do {
4290  map.gridrange[level].setbounds(ia, ib, ja, jb, ka, kb);
4291 
4292  // Msm index?
4293 
4294  if (++level == nlevels) done |= 0x07; // user limit on levels
4295 
4296  if (isclamped) {
4297  int nelems = ni * nj * nk;
4298  if (nelems <= lastnelems) done |= 0x07;
4299  if (nelems <= smallestnbox) done |= 0x07;
4300  }
4301 
4302  alldone = (done == 0x07); // make sure all dimensions are done
4303 
4304  if (ispx) {
4305  ni >>= 1;
4306  ib = ni-1;
4307  if (ni & 1) done |= 0x07; // == 3 or 1
4308  else if (ni == 2) done |= 0x01; // can do one more
4309  }
4310  else {
4311  ia = -((-ia+1)/2) - s_edge;
4312  ib = (ib+1)/2 + s_edge;
4313  ni = ib - ia + 1;
4314  if (ni <= omega) done |= 0x01; // can do more restrictions
4315  }
4316 
4317  if (ispy) {
4318  nj >>= 1;
4319  jb = nj-1;
4320  if (nj & 1) done |= 0x07; // == 3 or 1
4321  else if (nj == 2) done |= 0x02; // can do one more
4322  }
4323  else {
4324  ja = -((-ja+1)/2) - s_edge;
4325  jb = (jb+1)/2 + s_edge;
4326  nj = jb - ja + 1;
4327  if (nj <= omega) done |= 0x02; // can do more restrictions
4328  }
4329 
4330  if (ispz) {
4331  nk >>= 1;
4332  kb = nk-1;
4333  if (nk & 1) done |= 0x07; // == 3 or 1
4334  else if (nk == 2) done |= 0x04; // can do one more
4335  }
4336  else {
4337  ka = -((-ka+1)/2) - s_edge;
4338  kb = (kb+1)/2 + s_edge;
4339  nk = kb - ka + 1;
4340  if (nk <= omega) done |= 0x04; // can do more restrictions
4341  }
4342  } while ( ! alldone );
4343  nlevels = level;
4344 
4345  // for periodic boundaries, don't visit top level (all 0)
4346  // toplevel visited only for all nonperiodic boundaries
4347  int toplevel = (ispany ? nlevels : nlevels - 1);
4348 
4349  // resize down to the actual number of levels (does not change alloc)
4350  map.gridrange.resize(nlevels);
4351 
4352  // print out some information about MSM
4353  if (CkMyPe() == 0) {
4354  iout << iINFO << "MSM using " << nlevels << " levels\n";
4355  for (n = 0; n < nlevels; n++) {
4356  char s[100];
4357  snprintf(s, sizeof(s), " level %d: "
4358  "[%d..%d] x [%d..%d] x [%d..%d]\n", n,
4359  map.gridrange[n].ia(), map.gridrange[n].ib(),
4360  map.gridrange[n].ja(), map.gridrange[n].jb(),
4361  map.gridrange[n].ka(), map.gridrange[n].kb());
4362  iout << iINFO << s;
4363  }
4364  iout << endi;
4365  }
4366 
4367  // find grid spacing basis vectors
4368  hu = hxlen * lattice.a().unit();
4369  hv = hylen * lattice.b().unit();
4370  hw = hzlen * lattice.c().unit();
4371  hufx = Float(hu.x);
4372  hufy = Float(hu.y);
4373  hufz = Float(hu.z);
4374  hvfx = Float(hv.x);
4375  hvfy = Float(hv.y);
4376  hvfz = Float(hv.z);
4377  hwfx = Float(hw.x);
4378  hwfy = Float(hw.y);
4379  hwfz = Float(hw.z);
4380 
4381  ru = lattice.a_r();
4382  rv = lattice.b_r();
4383  rw = lattice.c_r();
4384 
4385  // determine grid spacings in scaled space
4386  shx = ru * hu;
4387  shy = rv * hv;
4388  shz = rw * hw;
4389  shx_1 = 1 / shx;
4390  shy_1 = 1 / shy;
4391  shz_1 = 1 / shz;
4392 
4393  // row vectors to transform interpolated force back to real space
4394  // XXX Is not needed.
4395  sx_shx = shx_1 * Vector(ru.x, rv.x, rw.x);
4396  sy_shy = shy_1 * Vector(ru.y, rv.y, rw.y);
4397  sz_shz = shz_1 * Vector(ru.z, rv.z, rw.z);
4398  srx_x = Float(sx_shx.x);
4399  srx_y = Float(sx_shx.y);
4400  srx_z = Float(sx_shx.z);
4401  sry_x = Float(sy_shy.x);
4402  sry_y = Float(sy_shy.y);
4403  sry_z = Float(sy_shy.z);
4404  srz_x = Float(sz_shz.x);
4405  srz_y = Float(sz_shz.y);
4406  srz_z = Float(sz_shz.z);
4407 
4408  Vector pu = cross(hv, hw);
4409  BigReal s = (hu * pu) / (pu * pu);
4410  pu *= s; // pu is orthogonal projection of hu onto hv CROSS hw
4411 
4412  Vector pv = cross(hw, hu);
4413  s = (hv * pv) / (pv * pv);
4414  pv *= s; // pv is orthogonal projection of hv onto hw CROSS hu
4415 
4416  Vector pw = cross(hu, hv);
4417  s = (hw * pw) / (pw * pw);
4418  pw *= s; // pw is orthogonal projection of hw onto hu CROSS hv
4419 
4420  // radii for parallelepiped of weights enclosing grid cutoff sphere
4421  ni = (int) ceil(2*a / pu.length() ) - 1;
4422  nj = (int) ceil(2*a / pv.length() ) - 1;
4423  nk = (int) ceil(2*a / pw.length() ) - 1;
4424 
4425  Float scaling = 1;
4426  Float scaling_factor = 0.5f;
4427  BigReal a_1 = 1/a;
4428  BigReal a_p = a_1;
4429  if (dispersion) {
4430  a_p = a_p * a_p * a_p; // = 1/a^3
4431  a_p = a_p * a_p; // = 1/a^6
4432  scaling_factor = 1.f/64; // = 1/2^6
4433  }
4434  int i, j, k;
4435  if (approx < C1HERMITE) {
4436  // resize gc and gvc constants for number of levels
4437  map.gc.resize(nlevels);
4438  map.gvc.resize(nlevels);
4439 
4440  for (level = 0; level < toplevel; level++) {
4441  map.gc[level].setbounds(-ni, ni, -nj, nj, -nk, nk);
4442  map.gvc[level].setbounds(-ni, ni, -nj, nj, -nk, nk);
4443 
4444  for (k = -nk; k <= nk; k++) {
4445  for (j = -nj; j <= nj; j++) {
4446  for (i = -ni; i <= ni; i++) {
4447  if (level == 0) {
4448  BigReal s, t, gs=0, gt=0, g=0, dgs=0, dgt=0, dg=0;
4449  BigReal vlen = (i*hu + j*hv + k*hw).length();
4450  s = vlen * a_1;
4451  t = 0.5 * s;
4452  if (t >= 1) {
4453  g = 0;
4454  dg = 0;
4455  }
4456  else {
4457  splitting(gt, dgt, t, split);
4458  if (s >= 1) {
4459  BigReal s_1 = 1/s;
4460  if (dispersion) {
4461  gs = s_1 * s_1 * s_1; // = 1/s^3
4462  gs = gs * gs; // = 1/s^6
4463  dgs = -6 * gs * s_1;
4464  }
4465  else {
4466  gs = s_1;
4467  dgs = -gs * s_1;
4468  }
4469  }
4470  else {
4471  splitting(gs, dgs, s, split);
4472  }
4473  g = (gs - scaling_factor * gt) * a_p;
4474  BigReal c=0;
4475  if (i || j || k) {
4476  c = a_p * a_1 / vlen;
4477  }
4478  dg = 0.5 * (dgs - 0.5*scaling_factor * dgt) * c;
4479 
4480  // Msm index?
4481 
4482  }
4483  map.gc[0](i,j,k) = Float(g);
4484  map.gvc[0](i,j,k) = Float(dg);
4485  } // if level 0
4486  else {
4487  map.gc[level](i,j,k) = scaling * map.gc[0](i,j,k);
4488  map.gvc[level](i,j,k) = scaling * map.gvc[0](i,j,k);
4489  }
4490 
4491  } // for i
4492  } // for j
4493  } // for k
4494  scaling *= scaling_factor;
4495 
4496  } // for level
4497 
4498  // for summed virial factors
4499  gvsum.setbounds(-ni, ni, -nj, nj, -nk, nk);
4500  // make sure final virial sum is initialized to 0
4501  for (i = 0; i < VMAX; i++) { virial[i] = 0; }
4502 
4503  if (toplevel < nlevels) {
4504  // nonperiodic along all basis vector directions
4505  // calculate top level weights where all grid points
4506  // interact with each other
4507  ni = map.gridrange[toplevel].ni();
4508  nj = map.gridrange[toplevel].nj();
4509  nk = map.gridrange[toplevel].nk();
4510  map.gc[toplevel].setbounds(-ni, ni, -nj, nj, -nk, nk);
4511 
4512  // Msm index?
4513 
4514  for (k = -nk; k <= nk; k++) {
4515  for (j = -nj; j <= nj; j++) {
4516  for (i = -ni; i <= ni; i++) {
4517  BigReal s, gs, d;
4518  BigReal vlen = (i*hu + j*hv + k*hw).length();
4519  s = vlen * a_1;
4520  if (s >= 1) {
4521  BigReal s_1 = 1/s;
4522  if (dispersion) {
4523  gs = s_1 * s_1 * s_1; // = 1/s^3
4524  gs = gs * gs; // = 1/s^6
4525  }
4526  else {
4527  gs = s_1;
4528  }
4529  }
4530  else {
4531  splitting(gs, d, s, split);
4532  }
4533  map.gc[toplevel](i,j,k) = scaling * Float(gs * a_p);
4534  } // for i
4535  } // for j
4536  } // for k
4537  } // if toplevel
4538 
4539  // generate grespro stencil
4540  const int nstencil = Nstencil[approx];
4541  const Float *phi = PhiStencil[approx];
4542  map.grespro.set(0, nstencil, 0, nstencil, 0, nstencil);
4543  for (k = 0; k < nstencil; k++) {
4544  for (j = 0; j < nstencil; j++) {
4545  for (i = 0; i < nstencil; i++) {
4546  map.grespro(i,j,k) = phi[i] * phi[j] * phi[k];
4547  }
4548  }
4549  }
4550 
4551  } // end if approx < C1HERMITE
4552  else {
4553  // C1HERMITE
4554  // resize gc_c1hermite constants for number of levels
4555  map.gc_c1hermite.resize(nlevels);
4556  scaling = 1;
4557 
4558  for (level = 0; level < toplevel; level++) {
4559 
4560  Vector hmu = scaling * hu;
4561  Vector hmv = scaling * hv;
4562  Vector hmw = scaling * hw;
4563  BigReal am = scaling * a;
4564 
4565  map.gc_c1hermite[level].setbounds(-ni, ni, -nj, nj, -nk, nk);
4566 
4567  for (k = -nk; k <= nk; k++) {
4568  for (j = -nj; j <= nj; j++) {
4569  for (i = -ni; i <= ni; i++) {
4570  C1Matrix& m = map.gc_c1hermite[level](i,j,k);
4571  Vector rv = i*hmu + j*hmv + k*hmw;
4572  BigReal r2 = rv * rv;
4573  m.set(0);
4574  if (r2 < 4*am*am) {
4575  // accumulate D( g_{a}(0,r) ) term for this level
4576  gc_c1hermite_elem_accum(m, 1, rv, am, split);
4577  // accumulate D( -g_{2a}(0,r) ) term for this level
4578  gc_c1hermite_elem_accum(m, -1, rv, 2*am, split);
4579  } // if within cutoff
4580  }
4581  }
4582  } // end loop over gc_c1hermite elements for this level
4583  scaling *= 2; // double grid spacing and cutoff at each iteration
4584 
4585  } // end loop over levels
4586 
4587  if (toplevel < nlevels) {
4588  Vector hmu = scaling * hu;
4589  Vector hmv = scaling * hv;
4590  Vector hmw = scaling * hw;
4591  BigReal am = scaling * a;
4592 
4593  // nonperiodic along all basis vector directions
4594  // calculate top level weights where all grid points
4595  // interact with each other
4596  ni = map.gridrange[toplevel].ni();
4597  nj = map.gridrange[toplevel].nj();
4598  nk = map.gridrange[toplevel].nk();
4599  map.gc_c1hermite[toplevel].setbounds(-ni, ni, -nj, nj, -nk, nk);
4600 
4601  for (k = -nk; k <= nk; k++) {
4602  for (j = -nj; j <= nj; j++) {
4603  for (i = -ni; i <= ni; i++) {
4604  C1Matrix& m = map.gc_c1hermite[level](i,j,k);
4605  Vector rv = i*hmu + j*hmv + k*hmw;
4606  m.set(0);
4607  // accumulate D( g_{a}(0,r) ) term for this level
4608  gc_c1hermite_elem_accum(m, 1, rv, am, split);
4609  }
4610  }
4611  } // end loop over gc_c1hermite elements for top level
4612 
4613  } // end if top level
4614 
4615  // C1 Hermite restriction and prolongation stencils
4616  map.gres_c1hermite.resize(nlevels-1);
4617  map.gpro_c1hermite.resize(nlevels-1);
4618 
4619  enum {
4620  ND = 3, // stencil diameter
4621  NR = ND/2 // stencil radius
4622  };
4623 
4624  // the master basis functions PHI0 and PHI1 for the 3-point stencil
4625  // and their derivatives DPHI0 and DPHI1
4626  const double PHI0[ND] = { 0.5, 1, 0.5 };
4627  const double DPHI0[ND] = { 1.5, 0, -1.5 };
4628  const double PHI1[ND] = { -0.125, 0, 0.125 };
4629  const double DPHI1[ND] = { -0.25, 1, -0.25 };
4630 
4631  // for intermediate calculations
4632  double xphi0_base_array[ND];
4633  double dxphi0_base_array[ND];
4634  double yphi0_base_array[ND];
4635  double dyphi0_base_array[ND];
4636  double zphi0_base_array[ND];
4637  double dzphi0_base_array[ND];
4638  double xphi1_base_array[ND];
4639  double dxphi1_base_array[ND];
4640  double yphi1_base_array[ND];
4641  double dyphi1_base_array[ND];
4642  double zphi1_base_array[ND];
4643  double dzphi1_base_array[ND];
4644  // will point to center of stencil arrays
4645  double *xphi0, *dxphi0, *xphi1, *dxphi1;
4646  double *yphi0, *dyphi0, *yphi1, *dyphi1;
4647  double *zphi0, *dzphi0, *zphi1, *dzphi1;
4648 
4649  for (n = 0; n < ND; n++) {
4650  xphi0_base_array[n] = PHI0[n];
4651  dxphi0_base_array[n] = hxlen_1 * DPHI0[n]; // scale by grid spacing
4652  xphi1_base_array[n] = hxlen * PHI1[n]; // scale by grid spacing
4653  dxphi1_base_array[n] = DPHI1[n];
4654  yphi0_base_array[n] = PHI0[n];
4655  dyphi0_base_array[n] = hylen_1 * DPHI0[n]; // scale by grid spacing
4656  yphi1_base_array[n] = hylen * PHI1[n]; // scale by grid spacing
4657  dyphi1_base_array[n] = DPHI1[n];
4658  zphi0_base_array[n] = PHI0[n];
4659  dzphi0_base_array[n] = hzlen_1 * DPHI0[n]; // scale by grid spacing
4660  zphi1_base_array[n] = hzlen * PHI1[n]; // scale by grid spacing
4661  dzphi1_base_array[n] = DPHI1[n];
4662  }
4663  xphi0 = xphi0_base_array + NR; // point into center of arrays
4664  dxphi0 = dxphi0_base_array + NR;
4665  xphi1 = xphi1_base_array + NR;
4666  dxphi1 = dxphi1_base_array + NR;
4667  yphi0 = yphi0_base_array + NR;
4668  dyphi0 = dyphi0_base_array + NR;
4669  yphi1 = yphi1_base_array + NR;
4670  dyphi1 = dyphi1_base_array + NR;
4671  zphi0 = zphi0_base_array + NR;
4672  dzphi0 = dzphi0_base_array + NR;
4673  zphi1 = zphi1_base_array + NR;
4674  dzphi1 = dzphi1_base_array + NR;
4675 
4676  for (level = 0; level < nlevels-1; level++) {
4677  // allocate space for restriction and prolongation stencils
4678  map.gres_c1hermite[level].set(0, ND, 0, ND, 0, ND);
4679  map.gpro_c1hermite[level].set(0, ND, 0, ND, 0, ND);
4680 
4681  // scale up to next level grid spacing
4682  //
4683  // have to determine for each dimension whether or not
4684  // a periodic grid spacing has increased
4685  // (equivalent to if there are fewer grid points)
4686  for (n = -NR; n <= NR; n++) {
4687  if ( ! ispx ||
4688  map.gridrange[level+1].ni() < map.gridrange[level].ni() ) {
4689  dxphi0[n] *= 0.5;
4690  xphi1[n] *= 2;
4691  }
4692  if ( ! ispy ||
4693  map.gridrange[level+1].nj() < map.gridrange[level].nj() ) {
4694  dyphi0[n] *= 0.5;
4695  yphi1[n] *= 2;
4696  }
4697  if ( ! ispz ||
4698  map.gridrange[level+1].nk() < map.gridrange[level].nk() ) {
4699  dzphi0[n] *= 0.5;
4700  zphi1[n] *= 2;
4701  }
4702  }
4703 
4704  // loop over restriction stencil matrices
4705  // calculate from partial derivatives
4706  for (k = -NR; k <= NR; k++) {
4707  for (j = -NR; j <= NR; j++) {
4708  for (i = -NR; i <= NR; i++) {
4709  Float *t = map.gres_c1hermite[level](i+NR,j+NR,k+NR).melem;
4710 
4711  t[C1INDEX(D000,D000)] = xphi0[i] * yphi0[j] * zphi0[k];
4712  t[C1INDEX(D000,D100)] = dxphi0[i] * yphi0[j] * zphi0[k];
4713  t[C1INDEX(D000,D010)] = xphi0[i] * dyphi0[j] * zphi0[k];
4714  t[C1INDEX(D000,D001)] = xphi0[i] * yphi0[j] * dzphi0[k];
4715  t[C1INDEX(D000,D110)] = dxphi0[i] * dyphi0[j] * zphi0[k];
4716  t[C1INDEX(D000,D101)] = dxphi0[i] * yphi0[j] * dzphi0[k];
4717  t[C1INDEX(D000,D011)] = xphi0[i] * dyphi0[j] * dzphi0[k];
4718  t[C1INDEX(D000,D111)] = dxphi0[i] * dyphi0[j] * dzphi0[k];
4719 
4720  t[C1INDEX(D100,D000)] = xphi1[i] * yphi0[j] * zphi0[k];
4721  t[C1INDEX(D100,D100)] = dxphi1[i] * yphi0[j] * zphi0[k];
4722  t[C1INDEX(D100,D010)] = xphi1[i] * dyphi0[j] * zphi0[k];
4723  t[C1INDEX(D100,D001)] = xphi1[i] * yphi0[j] * dzphi0[k];
4724  t[C1INDEX(D100,D110)] = dxphi1[i] * dyphi0[j] * zphi0[k];
4725  t[C1INDEX(D100,D101)] = dxphi1[i] * yphi0[j] * dzphi0[k];
4726  t[C1INDEX(D100,D011)] = xphi1[i] * dyphi0[j] * dzphi0[k];
4727  t[C1INDEX(D100,D111)] = dxphi1[i] * dyphi0[j] * dzphi0[k];
4728 
4729  t[C1INDEX(D010,D000)] = xphi0[i] * yphi1[j] * zphi0[k];
4730  t[C1INDEX(D010,D100)] = dxphi0[i] * yphi1[j] * zphi0[k];
4731  t[C1INDEX(D010,D010)] = xphi0[i] * dyphi1[j] * zphi0[k];
4732  t[C1INDEX(D010,D001)] = xphi0[i] * yphi1[j] * dzphi0[k];
4733  t[C1INDEX(D010,D110)] = dxphi0[i] * dyphi1[j] * zphi0[k];
4734  t[C1INDEX(D010,D101)] = dxphi0[i] * yphi1[j] * dzphi0[k];
4735  t[C1INDEX(D010,D011)] = xphi0[i] * dyphi1[j] * dzphi0[k];
4736  t[C1INDEX(D010,D111)] = dxphi0[i] * dyphi1[j] * dzphi0[k];
4737 
4738  t[C1INDEX(D001,D000)] = xphi0[i] * yphi0[j] * zphi1[k];
4739  t[C1INDEX(D001,D100)] = dxphi0[i] * yphi0[j] * zphi1[k];
4740  t[C1INDEX(D001,D010)] = xphi0[i] * dyphi0[j] * zphi1[k];
4741  t[C1INDEX(D001,D001)] = xphi0[i] * yphi0[j] * dzphi1[k];
4742  t[C1INDEX(D001,D110)] = dxphi0[i] * dyphi0[j] * zphi1[k];
4743  t[C1INDEX(D001,D101)] = dxphi0[i] * yphi0[j] * dzphi1[k];
4744  t[C1INDEX(D001,D011)] = xphi0[i] * dyphi0[j] * dzphi1[k];
4745  t[C1INDEX(D001,D111)] = dxphi0[i] * dyphi0[j] * dzphi1[k];
4746 
4747  t[C1INDEX(D110,D000)] = xphi1[i] * yphi1[j] * zphi0[k];
4748  t[C1INDEX(D110,D100)] = dxphi1[i] * yphi1[j] * zphi0[k];
4749  t[C1INDEX(D110,D010)] = xphi1[i] * dyphi1[j] * zphi0[k];
4750  t[C1INDEX(D110,D001)] = xphi1[i] * yphi1[j] * dzphi0[k];
4751  t[C1INDEX(D110,D110)] = dxphi1[i] * dyphi1[j] * zphi0[k];
4752  t[C1INDEX(D110,D101)] = dxphi1[i] * yphi1[j] * dzphi0[k];
4753  t[C1INDEX(D110,D011)] = xphi1[i] * dyphi1[j] * dzphi0[k];
4754  t[C1INDEX(D110,D111)] = dxphi1[i] * dyphi1[j] * dzphi0[k];
4755 
4756  t[C1INDEX(D101,D000)] = xphi1[i] * yphi0[j] * zphi1[k];
4757  t[C1INDEX(D101,D100)] = dxphi1[i] * yphi0[j] * zphi1[k];
4758  t[C1INDEX(D101,D010)] = xphi1[i] * dyphi0[j] * zphi1[k];
4759  t[C1INDEX(D101,D001)] = xphi1[i] * yphi0[j] * dzphi1[k];
4760  t[C1INDEX(D101,D110)] = dxphi1[i] * dyphi0[j] * zphi1[k];
4761  t[C1INDEX(D101,D101)] = dxphi1[i] * yphi0[j] * dzphi1[k];
4762  t[C1INDEX(D101,D011)] = xphi1[i] * dyphi0[j] * dzphi1[k];
4763  t[C1INDEX(D101,D111)] = dxphi1[i] * dyphi0[j] * dzphi1[k];
4764 
4765  t[C1INDEX(D011,D000)] = xphi0[i] * yphi1[j] * zphi1[k];
4766  t[C1INDEX(D011,D100)] = dxphi0[i] * yphi1[j] * zphi1[k];
4767  t[C1INDEX(D011,D010)] = xphi0[i] * dyphi1[j] * zphi1[k];
4768  t[C1INDEX(D011,D001)] = xphi0[i] * yphi1[j] * dzphi1[k];
4769  t[C1INDEX(D011,D110)] = dxphi0[i] * dyphi1[j] * zphi1[k];
4770  t[C1INDEX(D011,D101)] = dxphi0[i] * yphi1[j] * dzphi1[k];
4771  t[C1INDEX(D011,D011)] = xphi0[i] * dyphi1[j] * dzphi1[k];
4772  t[C1INDEX(D011,D111)] = dxphi0[i] * dyphi1[j] * dzphi1[k];
4773 
4774  t[C1INDEX(D111,D000)] = xphi1[i] * yphi1[j] * zphi1[k];
4775  t[C1INDEX(D111,D100)] = dxphi1[i] * yphi1[j] * zphi1[k];
4776  t[C1INDEX(D111,D010)] = xphi1[i] * dyphi1[j] * zphi1[k];
4777  t[C1INDEX(D111,D001)] = xphi1[i] * yphi1[j] * dzphi1[k];
4778  t[C1INDEX(D111,D110)] = dxphi1[i] * dyphi1[j] * zphi1[k];
4779  t[C1INDEX(D111,D101)] = dxphi1[i] * yphi1[j] * dzphi1[k];
4780  t[C1INDEX(D111,D011)] = xphi1[i] * dyphi1[j] * dzphi1[k];
4781  t[C1INDEX(D111,D111)] = dxphi1[i] * dyphi1[j] * dzphi1[k];
4782  }
4783  }
4784  } // end loops over restriction stencil matrices
4785 
4786  // loop over prolongation stencil matrices
4787  // prolongation stencil matrices are the transpose of restriction
4788  for (k = -NR; k <= NR; k++) {
4789  for (j = -NR; j <= NR; j++) {
4790  for (i = -NR; i <= NR; i++) {
4791  Float *t = map.gres_c1hermite[level](i+NR,j+NR,k+NR).melem;
4792  Float *tt = map.gpro_c1hermite[level](i+NR,j+NR,k+NR).melem;
4793 
4794  tt[C1INDEX(D000,D000)] = t[C1INDEX(D000,D000)];
4795  tt[C1INDEX(D000,D100)] = t[C1INDEX(D100,D000)];
4796  tt[C1INDEX(D000,D010)] = t[C1INDEX(D010,D000)];
4797  tt[C1INDEX(D000,D001)] = t[C1INDEX(D001,D000)];
4798  tt[C1INDEX(D000,D110)] = t[C1INDEX(D110,D000)];
4799  tt[C1INDEX(D000,D101)] = t[C1INDEX(D101,D000)];
4800  tt[C1INDEX(D000,D011)] = t[C1INDEX(D011,D000)];
4801  tt[C1INDEX(D000,D111)] = t[C1INDEX(D111,D000)];
4802 
4803  tt[C1INDEX(D100,D000)] = t[C1INDEX(D000,D100)];
4804  tt[C1INDEX(D100,D100)] = t[C1INDEX(D100,D100)];
4805  tt[C1INDEX(D100,D010)] = t[C1INDEX(D010,D100)];
4806  tt[C1INDEX(D100,D001)] = t[C1INDEX(D001,D100)];
4807  tt[C1INDEX(D100,D110)] = t[C1INDEX(D110,D100)];
4808  tt[C1INDEX(D100,D101)] = t[C1INDEX(D101,D100)];
4809  tt[C1INDEX(D100,D011)] = t[C1INDEX(D011,D100)];
4810  tt[C1INDEX(D100,D111)] = t[C1INDEX(D111,D100)];
4811 
4812  tt[C1INDEX(D010,D000)] = t[C1INDEX(D000,D010)];
4813  tt[C1INDEX(D010,D100)] = t[C1INDEX(D100,D010)];
4814  tt[C1INDEX(D010,D010)] = t[C1INDEX(D010,D010)];
4815  tt[C1INDEX(D010,D001)] = t[C1INDEX(D001,D010)];
4816  tt[C1INDEX(D010,D110)] = t[C1INDEX(D110,D010)];
4817  tt[C1INDEX(D010,D101)] = t[C1INDEX(D101,D010)];
4818  tt[C1INDEX(D010,D011)] = t[C1INDEX(D011,D010)];
4819  tt[C1INDEX(D010,D111)] = t[C1INDEX(D111,D010)];
4820 
4821  tt[C1INDEX(D001,D000)] = t[C1INDEX(D000,D001)];
4822  tt[C1INDEX(D001,D100)] = t[C1INDEX(D100,D001)];
4823  tt[C1INDEX(D001,D010)] = t[C1INDEX(D010,D001)];
4824  tt[C1INDEX(D001,D001)] = t[C1INDEX(D001,D001)];
4825  tt[C1INDEX(D001,D110)] = t[C1INDEX(D110,D001)];
4826  tt[C1INDEX(D001,D101)] = t[C1INDEX(D101,D001)];
4827  tt[C1INDEX(D001,D011)] = t[C1INDEX(D011,D001)];
4828  tt[C1INDEX(D001,D111)] = t[C1INDEX(D111,D001)];
4829 
4830  tt[C1INDEX(D110,D000)] = t[C1INDEX(D000,D110)];
4831  tt[C1INDEX(D110,D100)] = t[C1INDEX(D100,D110)];
4832  tt[C1INDEX(D110,D010)] = t[C1INDEX(D010,D110)];
4833  tt[C1INDEX(D110,D001)] = t[C1INDEX(D001,D110)];
4834  tt[C1INDEX(D110,D110)] = t[C1INDEX(D110,D110)];
4835  tt[C1INDEX(D110,D101)] = t[C1INDEX(D101,D110)];
4836  tt[C1INDEX(D110,D011)] = t[C1INDEX(D011,D110)];
4837  tt[C1INDEX(D110,D111)] = t[C1INDEX(D111,D110)];
4838 
4839  tt[C1INDEX(D101,D000)] = t[C1INDEX(D000,D101)];
4840  tt[C1INDEX(D101,D100)] = t[C1INDEX(D100,D101)];
4841  tt[C1INDEX(D101,D010)] = t[C1INDEX(D010,D101)];
4842  tt[C1INDEX(D101,D001)] = t[C1INDEX(D001,D101)];
4843  tt[C1INDEX(D101,D110)] = t[C1INDEX(D110,D101)];
4844  tt[C1INDEX(D101,D101)] = t[C1INDEX(D101,D101)];
4845  tt[C1INDEX(D101,D011)] = t[C1INDEX(D011,D101)];
4846  tt[C1INDEX(D101,D111)] = t[C1INDEX(D111,D101)];
4847 
4848  tt[C1INDEX(D011,D000)] = t[C1INDEX(D000,D011)];
4849  tt[C1INDEX(D011,D100)] = t[C1INDEX(D100,D011)];
4850  tt[C1INDEX(D011,D010)] = t[C1INDEX(D010,D011)];
4851  tt[C1INDEX(D011,D001)] = t[C1INDEX(D001,D011)];
4852  tt[C1INDEX(D011,D110)] = t[C1INDEX(D110,D011)];
4853  tt[C1INDEX(D011,D101)] = t[C1INDEX(D101,D011)];
4854  tt[C1INDEX(D011,D011)] = t[C1INDEX(D011,D011)];
4855  tt[C1INDEX(D011,D111)] = t[C1INDEX(D111,D011)];
4856 
4857  tt[C1INDEX(D111,D000)] = t[C1INDEX(D000,D111)];
4858  tt[C1INDEX(D111,D100)] = t[C1INDEX(D100,D111)];
4859  tt[C1INDEX(D111,D010)] = t[C1INDEX(D010,D111)];
4860  tt[C1INDEX(D111,D001)] = t[C1INDEX(D001,D111)];
4861  tt[C1INDEX(D111,D110)] = t[C1INDEX(D110,D111)];
4862  tt[C1INDEX(D111,D101)] = t[C1INDEX(D101,D111)];
4863  tt[C1INDEX(D111,D011)] = t[C1INDEX(D011,D111)];
4864  tt[C1INDEX(D111,D111)] = t[C1INDEX(D111,D111)];
4865  }
4866  }
4867  } // end loops over prolongation stencil matrices
4868 
4869  } // end loop over levels
4870 
4871  } // end if C1HERMITE
4872 
4873  // calculate self energy factor for splitting
4874  BigReal gs=0, d=0;
4875  splitting(gs, d, 0, split);
4876  gzero = gs * a_p;
4877 
4878  if (CkMyPe() == 0) {
4879  iout << iINFO << "MSM finished calculating stencils\n" << endi;
4880  }
4881 
4882  // allocate map for patches
4883  PatchMap *pm = PatchMap::Object();
4884  int numpatches = pm->numPatches();
4885  map.patchList.resize(numpatches);
4886 #ifdef DEBUG_MSM_VERBOSE
4887  printf("numPatches = %d\n", numpatches);
4888 #endif
4889 
4890  // allocate map for blocks for each grid level
4891  map.blockLevel.resize(nlevels);
4892  map.bsx.resize(nlevels);
4893  map.bsy.resize(nlevels);
4894  map.bsz.resize(nlevels);
4895 #ifdef MSM_FOLD_FACTOR
4896  map.foldfactor.resize(nlevels);
4897 #endif
4898  for (level = 0; level < nlevels; level++) {
4899  msm::IndexRange& g = map.gridrange[level];
4901  int gia = g.ia();
4902  int gni = g.ni();
4903  int gja = g.ja();
4904  int gnj = g.nj();
4905  int gka = g.ka();
4906  int gnk = g.nk();
4907  map.bsx[level] = bsx;
4908  map.bsy[level] = bsy;
4909  map.bsz[level] = bsz;
4910  if (/* map.bsx[level] < gni ||
4911  map.bsy[level] < gnj ||
4912  map.bsz[level] < gnk */ 1) {
4913  // make sure that block sizes divide evenly into periodic dimensions
4914  if (ispx) setup_periodic_blocksize(map.bsx[level], gni);
4915  if (ispy) setup_periodic_blocksize(map.bsy[level], gnj);
4916  if (ispz) setup_periodic_blocksize(map.bsz[level], gnk);
4917 #ifdef MSM_DEBUG_VERBOSE
4918  if (CkMyPe() == 0) {
4919  printf("level = %d\n map.bs* = %d %d %d gn* = %d %d %d\n",
4920  level, map.bsx[level], map.bsy[level], map.bsz[level],gni,gnj,gnk);
4921  }
4922 #endif
4923  // subdivide grid into multiple blocks
4924  // == ceil(gni / bsx), etc.
4925  int bni = (gni / map.bsx[level]) + (gni % map.bsx[level] != 0);
4926  int bnj = (gnj / map.bsy[level]) + (gnj % map.bsy[level] != 0);
4927  int bnk = (gnk / map.bsz[level]) + (gnk % map.bsz[level] != 0);
4928 #ifdef MSM_FOLD_FACTOR
4929  if (/* level > 2 && */ (bni == 1 || bnj == 1 || bnk == 1)) {
4930  map.foldfactor[level].set(bsx / gni, bsy / gnj, bsz / gnk);
4931 #if 0
4932  if (CkMyPe() == 0) {
4933  printf("Setting MSM FoldFactor level %d: %d %d %d\n",
4934  level, bsx / gni, bsy / gnj, bsz / gnk);
4935  }
4936 #endif
4937  }
4938 #endif
4939  b.set(0, bni, 0, bnj, 0, bnk);
4940  for (k = 0; k < bnk; k++) {
4941  for (j = 0; j < bnj; j++) {
4942  for (i = 0; i < bni; i++) {
4943  b(i,j,k).reset();
4944  int ia = gia + i*map.bsx[level];
4945  int ib = ia + map.bsx[level] - 1;
4946  int ja = gja + j*map.bsy[level];
4947  int jb = ja + map.bsy[level] - 1;
4948  int ka = gka + k*map.bsz[level];
4949  int kb = ka + map.bsz[level] - 1;
4950  if (ib >= gia + gni) ib = gia + gni - 1;
4951  if (jb >= gja + gnj) jb = gja + gnj - 1;
4952  if (kb >= gka + gnk) kb = gka + gnk - 1;
4953  b(i,j,k).nrange.setbounds(ia, ib, ja, jb, ka, kb);
4954  }
4955  }
4956  }
4957  }
4958  /*
4959  else {
4960  // make entire grid into single block
4961  b.set(0, 1, 0, 1, 0, 1);
4962  b(0,0,0).reset();
4963  b(0,0,0).nrange.set(gia, gni, gja, gnj, gka, gnk);
4964  // set correct block dimensions
4965  map.bsx[level] = gni;
4966  map.bsy[level] = gnj;
4967  map.bsz[level] = gnk;
4968  }
4969  */
4970  }
4971  //CkExit();
4972 #ifdef DEBUG_MSM_VERBOSE
4973  printf("Done allocating map for grid levels\n");
4974  printf("Grid level decomposition:\n");
4975  for (level = 0; level < nlevels; level++) {
4977  int bia = b.ia();
4978  int bib = b.ib();
4979  int bja = b.ja();
4980  int bjb = b.jb();
4981  int bka = b.ka();
4982  int bkb = b.kb();
4983  for (k = bka; k <= bkb; k++) {
4984  for (j = bja; j <= bjb; j++) {
4985  for (i = bia; i <= bib; i++) {
4986  int ia = b(i,j,k).nrange.ia();
4987  int ib = b(i,j,k).nrange.ib();
4988  int ja = b(i,j,k).nrange.ja();
4989  int jb = b(i,j,k).nrange.jb();
4990  int ka = b(i,j,k).nrange.ka();
4991  int kb = b(i,j,k).nrange.kb();
4992  printf("level=%d id=%d %d %d [%d..%d] x [%d..%d] x [%d..%d]"
4993  " --> %d\n",
4994  level, i, j, k, ia, ib, ja, jb, ka, kb,
4995  b(i,j,k).nrange.nn());
4996  }
4997  }
4998  }
4999  }
5000 #endif
5001  if (CkMyPe() == 0) {
5002  iout << iINFO << "MSM finished creating map for grid levels\n" << endi;
5003  }
5004 
5005  initialize2();
5006 } // ComputeMsmMgr::initialize()
5007 
5008 
5009 void ComputeMsmMgr::initialize2()
5010 {
5012  PatchMap *pm = PatchMap::Object();
5013  int numpatches = pm->numPatches();
5014  int i, j, k, n, level;
5015 
5016  // initialize grid of PatchDiagram
5017  // a = cutoff
5018  BigReal sysdima = lattice.a_r().unit() * lattice.a();
5019  BigReal sysdimb = lattice.b_r().unit() * lattice.b();
5020  BigReal sysdimc = lattice.c_r().unit() * lattice.c();
5021  BigReal patchdim = simParams->patchDimension;
5022  BigReal xmargin = 0.5 * (patchdim - a) / sysdima;
5023  BigReal ymargin = 0.5 * (patchdim - a) / sysdimb;
5024  BigReal zmargin = 0.5 * (patchdim - a) / sysdimc;
5025 #if 0
5026  // set min and max grid indices for patch covering
5027  // for non-periodic boundaries they conform to grid
5028  // periodic permits wrapping, so set to min/max for int
5029  int ia_min = (lattice.a_p() ? INT_MIN : map.gridrange[0].ia());
5030  int ib_max = (lattice.a_p() ? INT_MAX : map.gridrange[0].ib());
5031  int ja_min = (lattice.b_p() ? INT_MIN : map.gridrange[0].ja());
5032  int jb_max = (lattice.b_p() ? INT_MAX : map.gridrange[0].jb());
5033  int ka_min = (lattice.c_p() ? INT_MIN : map.gridrange[0].ka());
5034  int kb_max = (lattice.c_p() ? INT_MAX : map.gridrange[0].kb());
5035 #endif
5036  int pid;
5037  for (pid = 0; pid < numpatches; pid++) {
5038  // shortcut reference to this patch diagram
5039  msm::PatchDiagram& p = map.patchList[pid];
5040  p.reset();
5041  // find extent of patch including margin
5042  BigReal xmin = pm->min_a(pid) - xmargin;
5043  BigReal xmax = pm->max_a(pid) + xmargin;
5044  BigReal ymin = pm->min_b(pid) - ymargin;
5045  BigReal ymax = pm->max_b(pid) + ymargin;
5046  BigReal zmin = pm->min_c(pid) - zmargin;
5047  BigReal zmax = pm->max_c(pid) + zmargin;
5048  // find grid point covering of patch plus outer edge stencil
5049  int ia = int(floor((xmin - sglower.x) * shx_1)) - s_edge;
5050  int ib = int(floor((xmax - sglower.x) * shx_1)) + 1 + s_edge;
5051  int ja = int(floor((ymin - sglower.y) * shy_1)) - s_edge;
5052  int jb = int(floor((ymax - sglower.y) * shy_1)) + 1 + s_edge;
5053  int ka = int(floor((zmin - sglower.z) * shz_1)) - s_edge;
5054  int kb = int(floor((zmax - sglower.z) * shz_1)) + 1 + s_edge;
5055  // for edge patches along non-periodic boundaries
5056  // clamp subgrid to full grid boundaries
5057  if ( ! lattice.a_p() ) { // non-periodic along lattice basis vector a
5058  int mi = pm->index_a(pid);
5059  if (mi == 0) ia = map.gridrange[0].ia();
5060  if (mi == pm->gridsize_a()-1) ib = map.gridrange[0].ib();
5061  }
5062  if ( ! lattice.b_p() ) { // non-periodic along lattice basis vector b
5063  int mj = pm->index_b(pid);
5064  if (mj == 0) ja = map.gridrange[0].ja();
5065  if (mj == pm->gridsize_b()-1) jb = map.gridrange[0].jb();
5066  }
5067  if ( ! lattice.c_p() ) { // non-periodic along lattice basis vector a
5068  int mk = pm->index_c(pid);
5069  if (mk == 0) ka = map.gridrange[0].ka();
5070  if (mk == pm->gridsize_c()-1) kb = map.gridrange[0].kb();
5071  }
5072 #if 0
5073  // truncate subgrid covering to grid min/max
5074  // so that subgrid does not extend beyond full grid
5075  // works for both periodic and non-periodic boundary conditions
5076  if (ia < ia_min) ia = ia_min;
5077  if (ib > ib_max) ib = ib_max;
5078  if (ja < ja_min) ja = ja_min;
5079  if (jb > jb_max) jb = jb_max;
5080  if (ka < ka_min) ka = ka_min;
5081  if (kb > kb_max) kb = kb_max;
5082  // check for edge patch and extend subgrid to grid min/max
5083  // so that subgrid fully covers up to the edge of full grid
5084  int mi = pm->index_a(pid);
5085  int mj = pm->index_b(pid);
5086  int mk = pm->index_c(pid);
5087  int npi = pm->gridsize_a();
5088  int npj = pm->gridsize_b();
5089  int npk = pm->gridsize_c();
5090  if (mi == 0) ia = ia_min;
5091  if (mi == npi-1) ib = ib_max;
5092  if (mj == 0) ja = ja_min;
5093  if (mj == npj-1) jb = jb_max;
5094  if (mk == 0) ka = ka_min;
5095  if (mk == npk-1) kb = kb_max;
5096 #endif
5097 #if 0
5098  printf("patch %d: grid covering: [%d..%d] x [%d..%d] x [%d..%d]\n",
5099  pid, ia, ib, ja, jb, ka, kb);
5100  fflush(stdout);
5101 #endif
5102  // set the index range for this patch's surrounding grid points
5103  p.nrange.setbounds(ia,ib,ja,jb,ka,kb);
5104  // find lower and upper blocks of MSM h-grid
5105  msm::BlockIndex blower = map.blockOfGridIndex(msm::Ivec(ia,ja,ka),0);
5106  msm::BlockIndex bupper = map.blockOfGridIndex(msm::Ivec(ib,jb,kb),0);
5107  int maxarrlen = (bupper.n.i - blower.n.i + 1) *
5108  (bupper.n.j - blower.n.j + 1) * (bupper.n.k - blower.n.k + 1);
5109  p.send.setmax(maxarrlen); // allocate space for send array
5110  // loop over the blocks
5111 #if 0
5112  printf("blower: level=%d n=%d %d %d bupper: level=%d n=%d %d %d\n",
5113  blower.level, blower.n.i, blower.n.j, blower.n.k,
5114  bupper.level, bupper.n.i, bupper.n.j, bupper.n.k);
5115  fflush(stdout);
5116 #endif
5117  for (int kk = blower.n.k; kk <= bupper.n.k; kk++) {
5118  for (int jj = blower.n.j; jj <= bupper.n.j; jj++) {
5119  for (int ii = blower.n.i; ii <= bupper.n.i; ii++) {
5120 #if 0
5121  printf("ii=%d jj=%d kk=%d\n", ii, jj, kk);
5122  fflush(stdout);
5123 #endif
5124  // determine actual block and range to send to
5125  msm::BlockSend bs;
5126  bs.nblock.n = msm::Ivec(ii,jj,kk);
5127  bs.nblock.level = 0;
5129  map.wrapBlockSend(bs); // determine wrapping to true block index
5130  p.send.append(bs); // append this block to the send array
5131  // increment counter for receive block
5132  map.blockLevel[0](bs.nblock_wrap.n).numRecvsCharge++;
5133  // initialize patch send back from this block
5134  msm::PatchSend ps;
5135  ps.nrange = bs.nrange_wrap;
5136  ps.nrange_unwrap = bs.nrange;
5137  ps.patchID = pid;
5138  map.blockLevel[0](bs.nblock_wrap.n).sendPatch.append(ps);
5139  // increment number of receives back to this patch
5140  p.numRecvs++;
5141  }
5142  }
5143  }
5144  // number of receives should be same as number of sends
5145  ASSERT(p.numRecvs == p.send.len() );
5146  }
5147 #ifdef DEBUG_MSM_VERBOSE
5148 if (CkMyPe() == 0) {
5149  printf("Done allocating map for patches\n");
5150  printf("Patch level decomposition:\n");
5151  for (pid = 0; pid < numpatches; pid++) {
5152  msm::PatchDiagram& p = map.patchList[pid];
5153  int ia = p.nrange.ia();
5154  int ib = p.nrange.ib();
5155  int ja = p.nrange.ja();
5156  int jb = p.nrange.jb();
5157  int ka = p.nrange.ka();
5158  int kb = p.nrange.kb();
5159  printf("patch id=%d [%d..%d] x [%d..%d] x [%d..%d]\n",
5160  pid, ia, ib, ja, jb, ka, kb);
5161  }
5162 }
5163 #endif
5164  if (CkMyPe() == 0) {
5165  iout << iINFO << "MSM finished creating map for patches\n" << endi;
5166  }
5167 
5168  // initialize grid of BlockDiagram for each level
5169  int polydeg = PolyDegree[approx];
5170  numGridCutoff = 0;
5171  for (level = 0; level < nlevels; level++) {
5173  int bni = b.ni();
5174  int bnj = b.nj();
5175  int bnk = b.nk();
5176 #ifdef MSM_SKIP_BEYOND_SPHERE
5177  int gia, gib, gja, gjb, gka, gkb;
5178  if (approx == C1HERMITE) {
5179  gia = map.gc_c1hermite[level].ia();
5180  gib = map.gc_c1hermite[level].ib();
5181  gja = map.gc_c1hermite[level].ja();
5182  gjb = map.gc_c1hermite[level].jb();
5183  gka = map.gc_c1hermite[level].ka();
5184  gkb = map.gc_c1hermite[level].kb();
5185  }
5186  else {
5187  gia = map.gc[level].ia();
5188  gib = map.gc[level].ib();
5189  gja = map.gc[level].ja();
5190  gjb = map.gc[level].jb();
5191  gka = map.gc[level].ka();
5192  gkb = map.gc[level].kb();
5193  }
5194 #endif
5195 #ifdef MSM_SKIP_TOO_DISTANT_BLOCKS
5196  int bsx = map.bsx[level];
5197  int bsy = map.bsy[level];
5198  int bsz = map.bsz[level];
5199 #endif
5200 #ifdef MSM_FOLD_FACTOR
5201  if (map.foldfactor[level].active) {
5202  bsx *= map.foldfactor[level].numrep.i;
5203  bsy *= map.foldfactor[level].numrep.j;
5204  bsz *= map.foldfactor[level].numrep.k;
5205  }
5206 #endif
5207  for (k = 0; k < bnk; k++) {
5208  for (j = 0; j < bnj; j++) {
5209  for (i = 0; i < bni; i++) {
5210 
5211  // Grid cutoff calculation, sendAcross
5212  int ia = b(i,j,k).nrange.ia();
5213  int ib = b(i,j,k).nrange.ib();
5214  int ja = b(i,j,k).nrange.ja();
5215  int jb = b(i,j,k).nrange.jb();
5216  int ka = b(i,j,k).nrange.ka();
5217  int kb = b(i,j,k).nrange.kb();
5218  if (approx == C1HERMITE) {
5219  ia += map.gc_c1hermite[level].ia();
5220  ib += map.gc_c1hermite[level].ib();
5221  ja += map.gc_c1hermite[level].ja();
5222  jb += map.gc_c1hermite[level].jb();
5223  ka += map.gc_c1hermite[level].ka();
5224  kb += map.gc_c1hermite[level].kb();
5225  }
5226  else {
5227  ia += map.gc[level].ia();
5228  ib += map.gc[level].ib();
5229  ja += map.gc[level].ja();
5230  jb += map.gc[level].jb();
5231  ka += map.gc[level].ka();
5232  kb += map.gc[level].kb();
5233  }
5234  msm::Ivec na = map.clipIndexToLevel(msm::Ivec(ia,ja,ka), level);
5235  msm::Ivec nb = map.clipIndexToLevel(msm::Ivec(ib,jb,kb), level);
5236  b(i,j,k).nrangeCutoff.setbounds(na.i, nb.i, na.j, nb.j, na.k, nb.k);
5237  // determine sendAcross blocks
5238 #ifdef MSM_FOLD_FACTOR
5239  msm::BlockIndex blower = map.blockOfGridIndexFold(na, level);
5240  msm::BlockIndex bupper = map.blockOfGridIndexFold(nb, level);
5241 #else
5242  msm::BlockIndex blower = map.blockOfGridIndex(na, level);
5243  msm::BlockIndex bupper = map.blockOfGridIndex(nb, level);
5244 #endif
5245  int maxarrlen = (bupper.n.i - blower.n.i + 1) *
5246  (bupper.n.j - blower.n.j + 1) * (bupper.n.k - blower.n.k + 1);
5247  b(i,j,k).sendAcross.setmax(maxarrlen); // allocate send array
5248  b(i,j,k).indexGridCutoff.setmax(maxarrlen); // alloc indexing
5249  b(i,j,k).recvGridCutoff.setmax(maxarrlen); // alloc indexing
5250  // loop over sendAcross blocks
5251  int ii, jj, kk;
5252 #if 0
5253  {
5254  msm::IndexRange& bn = b(i,j,k).nrange;
5255  printf("ME %4d [%d..%d] x [%d..%d] x [%d..%d]\n",
5256  bn.nn(),
5257  bn.ia(), bn.ib(),
5258  bn.ja(), bn.jb(),
5259  bn.ka(), bn.kb());
5260  }
5261 #endif
5262  for (kk = blower.n.k; kk <= bupper.n.k; kk++) {
5263  for (jj = blower.n.j; jj <= bupper.n.j; jj++) {
5264  for (ii = blower.n.i; ii <= bupper.n.i; ii++) {
5265 #ifdef MSM_SKIP_TOO_DISTANT_BLOCKS
5266  // make sure that block (ii,jj,kk) interacts with (i,j,k)
5267  int si = sign(ii-i);
5268  int sj = sign(jj-j);
5269  int sk = sign(kk-k);
5270  int di = (ii-i)*bsx + si*(1-bsx);
5271  int dj = (jj-j)*bsy + sj*(1-bsy);
5272  int dk = (kk-k)*bsz + sk*(1-bsz);
5273  Vector d = di*hu + dj*hv + dk*hw;
5274  if (d.length2() >= 4*a*a) continue;
5275 #endif
5276  // determine actual block and range to send to
5277  msm::BlockSend bs;
5278  bs.nblock.n = msm::Ivec(ii,jj,kk);
5279  bs.nblock.level = level;
5280 #ifdef MSM_FOLD_FACTOR
5282  b(i,j,k).nrangeCutoff);
5283  map.wrapBlockSendFold(bs); // wrap to true block index
5284 #else
5286  b(i,j,k).nrangeCutoff);
5287  map.wrapBlockSend(bs); // wrap to true block index
5288 #endif
5289 #ifdef MSM_SKIP_BEYOND_SPHERE
5290 #if 0
5291  printf("send to volume %4d [%d..%d] x [%d..%d] x [%d..%d]\n",
5292  bs.nrange.nn(),
5293  bs.nrange.ia(), bs.nrange.ib(),
5294  bs.nrange.ja(), bs.nrange.jb(),
5295  bs.nrange.ka(), bs.nrange.kb());
5296 #endif
5297  msm::IndexRange& bm = b(i,j,k).nrange;
5298  msm::IndexRange& bn = bs.nrange;
5299  int qia = bm.ia();
5300  int qib = bm.ib();
5301  int qja = bm.ja();
5302  int qjb = bm.jb();
5303  int qka = bm.ka();
5304  int qkb = bm.kb();
5305  int inc_in = (bn.ni() > 1 ? bn.ni()-1 : 1);
5306  int inc_jn = (bn.nj() > 1 ? bn.nj()-1 : 1);
5307  int inc_kn = (bn.nk() > 1 ? bn.nk()-1 : 1);
5308  // loop over corner points of potential grid
5309  int iscalc = 0;
5310  for (int kn = bn.ka(); kn <= bn.kb(); kn += inc_kn) {
5311  for (int jn = bn.ja(); jn <= bn.jb(); jn += inc_jn) {
5312  for (int in = bn.ia(); in <= bn.ib(); in += inc_in) {
5313  // clip charges to weights
5314  int mia = ( qia >= gia + in ? qia : gia + in );
5315  int mib = ( qib <= gib + in ? qib : gib + in );
5316  int mja = ( qja >= gja + jn ? qja : gja + jn );
5317  int mjb = ( qjb <= gjb + jn ? qjb : gjb + jn );
5318  int mka = ( qka >= gka + kn ? qka : gka + kn );
5319  int mkb = ( qkb <= gkb + kn ? qkb : gkb + kn );
5320  int inc_im = (mib-mia > 0 ? mib-mia : 1);
5321  int inc_jm = (mjb-mja > 0 ? mjb-mja : 1);
5322  int inc_km = (mkb-mka > 0 ? mkb-mka : 1);
5323 
5324  // loop over corner points of charge grid
5325  for (int km = mka; km <= mkb; km += inc_km) {
5326  for (int jm = mja; jm <= mjb; jm += inc_jm) {
5327  for (int im = mia; im <= mib; im += inc_im) {
5328 
5329  Float g;
5330  if (approx == C1HERMITE) {
5331  g = map.gc_c1hermite[level](im-in,jm-jn,km-kn).melem[0];
5332  }
5333  else {
5334  g = map.gc[level](im-in,jm-jn,km-kn);
5335  }
5336  iscalc |= (g != 0);
5337  }
5338  }
5339  }
5340 
5341  }
5342  }
5343  }
5344  if ( ! iscalc) {
5345  //printf("SKIPPING\n"); // XXX
5346  continue; // skip because overlap is beyond nonzero gc sphere
5347  }
5348 #endif
5349  b(i,j,k).sendAcross.append(bs);
5350  b(i,j,k).indexGridCutoff.append(numGridCutoff);
5351  // receiving block records this grid cutoff ID
5352  b(bs.nblock_wrap.n).recvGridCutoff.append(numGridCutoff);
5353  // increment counter for receive block
5354  b(bs.nblock_wrap.n).numRecvsPotential++;
5355 
5356  numGridCutoff++; // one MsmGridCutoff for each send across
5357  }
5358  }
5359  } // end loop over sendAcross blocks
5360 
5361  // Restriction, sendUp
5362  if (level < nlevels-1) {
5363  int ia2, ib2, ja2, jb2, ka2, kb2;
5364  ia = b(i,j,k).nrange.ia();
5365  ib = b(i,j,k).nrange.ib();
5366  ja = b(i,j,k).nrange.ja();
5367  jb = b(i,j,k).nrange.jb();
5368  ka = b(i,j,k).nrange.ka();
5369  kb = b(i,j,k).nrange.kb();
5370  // determine expansion of h-grid onto 2h-grid
5371  if ( ia==ib && ((ia & 1)==0) ) {
5372  ia2 = ib2 = ia / 2;
5373  }
5374  else {
5375  ia2 = (ia / 2) - ((polydeg+1) / 2) + 1;
5376  ib2 = ((ib+1) / 2) + ((polydeg+1) / 2) - 1;
5377  }
5378  if ( ja==jb && ((ja & 1)==0) ) {
5379  ja2 = jb2 = ja / 2;
5380  }
5381  else {
5382  ja2 = (ja / 2) - ((polydeg+1) / 2) + 1;
5383  jb2 = ((jb+1) / 2) + ((polydeg+1) / 2) - 1;
5384  }
5385  if ( ka==kb && ((ka & 1)==0) ) {
5386  ka2 = kb2 = ka / 2;
5387  }
5388  else {
5389  ka2 = (ka / 2) - ((polydeg+1) / 2) + 1;
5390  kb2 = ((kb+1) / 2) + ((polydeg+1) / 2) - 1;
5391  }
5392  // clip to boundaries of 2h-grid
5393  msm::Ivec na2, nb2;
5394  na2 = map.clipIndexToLevel(msm::Ivec(ia2,ja2,ka2), level+1);
5395  nb2 = map.clipIndexToLevel(msm::Ivec(ib2,jb2,kb2), level+1);
5396  b(i,j,k).nrangeRestricted.setbounds(na2.i, nb2.i, na2.j, nb2.j,
5397  na2.k, nb2.k);
5398  // determine sendUp blocks
5399  msm::BlockIndex blower = map.blockOfGridIndex(na2, level+1);
5400  msm::BlockIndex bupper = map.blockOfGridIndex(nb2, level+1);
5401  int maxarrlen = (bupper.n.i - blower.n.i + 1) *
5402  (bupper.n.j - blower.n.j + 1) * (bupper.n.k - blower.n.k + 1);
5403  b(i,j,k).sendUp.setmax(maxarrlen); // allocate send array
5404  // loop over sendUp blocks
5405  int ii, jj, kk;
5406  for (kk = blower.n.k; kk <= bupper.n.k; kk++) {
5407  for (jj = blower.n.j; jj <= bupper.n.j; jj++) {
5408  for (ii = blower.n.i; ii <= bupper.n.i; ii++) {
5409  // determine actual block and range to send to
5410  msm::BlockSend bs;
5411  bs.nblock.n = msm::Ivec(ii,jj,kk);
5412  bs.nblock.level = level+1;
5414  b(i,j,k).nrangeRestricted);
5415  map.wrapBlockSend(bs); // wrap to true block index
5416  b(i,j,k).sendUp.append(bs);
5417  // increment counter for receive block
5418  map.blockLevel[level+1](bs.nblock_wrap.n).numRecvsCharge++;
5419  }
5420  }
5421  } // end loop over sendUp blocks
5422 
5423  } // end if restriction
5424 
5425  // Prolongation, sendDown
5426  if (level > 0) {
5427  int ia2 = b(i,j,k).nrange.ia();
5428  int ib2 = b(i,j,k).nrange.ib();
5429  int ja2 = b(i,j,k).nrange.ja();
5430  int jb2 = b(i,j,k).nrange.jb();
5431  int ka2 = b(i,j,k).nrange.ka();
5432  int kb2 = b(i,j,k).nrange.kb();
5433  // determine expansion of 2h-grid onto h-grid
5434  ia = 2*ia2 - polydeg;
5435  ib = 2*ib2 + polydeg;
5436  ja = 2*ja2 - polydeg;
5437  jb = 2*jb2 + polydeg;
5438  ka = 2*ka2 - polydeg;
5439  kb = 2*kb2 + polydeg;
5440  // clip to boundaries of h-grid
5441  msm::Ivec na, nb;
5442  na = map.clipIndexToLevel(msm::Ivec(ia,ja,ka), level-1);
5443  nb = map.clipIndexToLevel(msm::Ivec(ib,jb,kb), level-1);
5444  b(i,j,k).nrangeProlongated.setbounds(na.i, nb.i, na.j, nb.j,
5445  na.k, nb.k);
5446  // determine sendDown blocks
5447  msm::BlockIndex blower = map.blockOfGridIndex(na, level-1);
5448  msm::BlockIndex bupper = map.blockOfGridIndex(nb, level-1);
5449  int maxarrlen = (bupper.n.i - blower.n.i + 1) *
5450  (bupper.n.j - blower.n.j + 1) * (bupper.n.k - blower.n.k + 1);
5451  b(i,j,k).sendDown.setmax(maxarrlen); // allocate send array
5452  // loop over sendUp blocks
5453  int ii, jj, kk;
5454  for (kk = blower.n.k; kk <= bupper.n.k; kk++) {
5455  for (jj = blower.n.j; jj <= bupper.n.j; jj++) {
5456  for (ii = blower.n.i; ii <= bupper.n.i; ii++) {
5457  // determine actual block and range to send to
5458  msm::BlockSend bs;
5459  bs.nblock.n = msm::Ivec(ii,jj,kk);
5460  bs.nblock.level = level-1;
5462  b(i,j,k).nrangeProlongated);
5463  map.wrapBlockSend(bs); // wrap to true block index
5464  b(i,j,k).sendDown.append(bs);
5465  // increment counter for receive block
5466  map.blockLevel[level-1](bs.nblock_wrap.n).numRecvsPotential++;
5467  }
5468  }
5469  } // end loop over sendDown blocks
5470 
5471  } // end if prolongation
5472 
5473 #ifdef MSM_REDUCE_GRID
5474  // using a reduction decreases the number of messages
5475  // from MsmGridCutoff elements to just 1
5476  b(i,j,k).numRecvsPotential -= ( b(i,j,k).indexGridCutoff.len() - 1 );
5477 #endif
5478 
5479  }
5480  }
5481  } // end loop over block diagram
5482 
5483  } // end loop over levels
5484  // end of Map setup
5485 
5486  // XXX
5487  //
5488  // NO, WAIT!
5489  // More Map setup below for node mapping!
5490  //
5491  // XXX
5492 
5493  // allocate chare arrays
5494 
5495  if (1) {
5496  PatchMap *pm = PatchMap::Object();
5497  patchPtr.resize( pm->numPatches() );
5498  for (int i = 0; i < pm->numPatches(); i++) {
5499  patchPtr[i] = NULL;
5500  }
5501 #ifdef DEBUG_MSM_VERBOSE
5502  printf("Allocating patchPtr array length %d\n", pm->numPatches());
5503 #endif
5504  if (CkMyPe() == 0) {
5505  iout << iINFO << "MSM has " << pm->numPatches()
5506  << " interpolation / anterpolation objects"
5507  << " (one per patch)\n" << endi;
5508  }
5509  }
5510 
5511 #ifdef MSM_NODE_MAPPING
5512  if (1) {
5513  // Node aware initial assignment of chares
5514  //
5515  // Create map object for each 3D chare array of MsmBlock and the
5516  // 1D chare array of MsmGridCutoff. Design map to equally distribute
5517  // blocks across nodes, assigned to node PEs in round robin manner.
5518  // Attempt to reduce internode communication bandwidth by assigning
5519  // each MsmGridCutoff element to either its source node or its
5520  // destination node, again assigned to node PEs in round robin manner.
5521 #if 0
5522  // for testing
5523 #if 0
5524  int numNodes = 16;
5525  int numPes = 512;
5526 #else
5527  int numNodes = 32;
5528  int numPes = 1024;
5529 #endif
5530 #else
5531  int numNodes = CkNumNodes();
5532  int numPes = CkNumPes();
5533 #endif
5534  int numPesPerNode = numPes / numNodes;
5535  int numBlocks = 0; // find total number of blocks
5536  for (level = 0; level < nlevels; level++) {
5537  numBlocks += map.blockLevel[level].nn();
5538  }
5539 
5540  // final result is arrays for blocks and gcuts, each with pe number
5541  blockAssign.resize(numBlocks);
5543  //printf("XXX numBlocks = %d\n", numBlocks);
5544  //printf("XXX numGridCutoff = %d\n", numGridCutoff);
5545 
5546  msm::Array<float> blockWork(numBlocks);
5547  msm::Array<float> gcutWork(numGridCutoff);
5548 
5549  msm::Array<float> nodeWork(numNodes);
5550  nodeWork.reset(0);
5551 #ifdef MSM_NODE_MAPPING_STATS
5552  msm::Array<float> peWork(numPes);
5553  peWork.reset(0);
5554 #endif
5555 
5556  msm::PriorityQueue<WorkIndex> nodeQueue(numNodes);
5557  for (n = 0; n < numNodes; n++) {
5558  nodeQueue.insert(WorkIndex(0, n));
5559  }
5560 
5561  int bindex = 0; // index for block array
5562  for (level = 0; level < nlevels; level++) {
5564  int bni = b.ni();
5565  int bnj = b.nj();
5566  int bnk = b.nk();
5567  for (k = 0; k < bnk; k++) { // for all blocks
5568  for (j = 0; j < bnj; j++) {
5569  for (i = 0; i < bni; i++) {
5570  WorkIndex wn;
5571  nodeQueue.remove(wn);
5572  float bw = calcBlockWork(b(i,j,k));
5573  blockAssign[bindex] = wn.index;
5574  nodeWork[wn.index] += bw;
5575  wn.work += bw;
5576  blockWork[bindex] = bw;
5577  nodeQueue.insert(wn);
5578  bindex++;
5579  }
5580  }
5581  } // end for all blocks
5582  } // end for all levels
5583 
5584 #if 0
5585  for (n = 0; n < numBlocks; n++) {
5586  WorkIndex wn;
5587  nodeQueue.remove(wn);
5588  float bw = calcBlockWork(n);
5589  blockAssign[n] = wn.index;
5590  nodeWork[wn.index] += bw;
5591  wn.work += bw;
5592  blockWork[n] = bw;
5593  nodeQueue.insert(wn);
5594  }
5595 #endif
5596 
5597  // assign grid cutoff objects to nodes (gcutAssign)
5598  // choose whichever of source or destination node has less work
5599  int gindex = 0; // index for grid cutoff array
5600  for (level = 0; level < nlevels; level++) { // for all levels
5602  int bni = b.ni();
5603  int bnj = b.nj();
5604  int bnk = b.nk();
5605  for (k = 0; k < bnk; k++) { // for all blocks
5606  for (j = 0; j < bnj; j++) {
5607  for (i = 0; i < bni; i++) {
5608  int isrc = blockFlatIndex(level, i, j, k);
5609  int nsrc = blockAssign[isrc]; // node block isrc is assigned
5610  int numSendAcross = b(i,j,k).sendAcross.len();
5611  ASSERT( numSendAcross == b(i,j,k).indexGridCutoff.len() );
5612  for (n = 0; n < numSendAcross; n++) {
5613  msm::BlockSend& bs = b(i,j,k).sendAcross[n];
5614  msm::BlockIndex& bn = bs.nblock_wrap;
5615  int idest = blockFlatIndex(level, bn.n.i, bn.n.j, bn.n.k);
5616  int ndest = blockAssign[idest]; // node block idest is assigned
5617  gcutWork[gindex] = calcGcutWork(bs);
5618  if (nodeWork[nsrc] <= nodeWork[ndest]) {
5619  gcutAssign[gindex] = nsrc;
5620  nodeWork[nsrc] += gcutWork[gindex];
5621  }
5622  else {
5623  gcutAssign[gindex] = ndest;
5624  nodeWork[ndest] += gcutWork[gindex];
5625  }
5626  gindex++;
5627  } // end for numSendAcross
5628  }
5629  }
5630  } // end for all blocks
5631  } // end for all levels
5632 
5633  msm::Array< msm::PriorityQueue<WorkIndex> > peQueue(numNodes);
5634  for (n = 0; n < numNodes; n++) {
5635  peQueue[n].init(numPesPerNode);
5636  for (int poff = 0; poff < numPesPerNode; poff++) {
5637  peQueue[n].insert(WorkIndex(0, n*numPesPerNode + poff));
5638  }
5639  }
5640 
5641  for (n = 0; n < numBlocks; n++) {
5642  WorkIndex wn;
5643  int node = blockAssign[n];
5644  peQueue[node].remove(wn);
5645  blockAssign[n] = wn.index;
5646  wn.work += blockWork[n];
5647  peQueue[node].insert(wn);
5648 #ifdef MSM_NODE_MAPPING_STATS
5649  peWork[wn.index] += blockWork[n];
5650 #endif
5651  }
5652 
5653  for (n = 0; n < numGridCutoff; n++) {
5654  WorkIndex wn;
5655  int node = gcutAssign[n];
5656  peQueue[node].remove(wn);
5657  gcutAssign[n] = wn.index;
5658  wn.work += gcutWork[n];
5659  peQueue[node].insert(wn);
5660 #ifdef MSM_NODE_MAPPING_STATS
5661  peWork[wn.index] += gcutWork[n];
5662 #endif
5663  }
5664 
5665 #ifdef MSM_NODE_MAPPING_STATS
5666  if (CkMyPe() == 0) {
5667  printf("Mapping of MSM work (showing scaled estimated work units):\n");
5668  for (n = 0; n < numNodes; n++) {
5669  printf(" node %d work %8.3f\n", n, nodeWork[n]);
5670  for (int poff = 0; poff < numPesPerNode; poff++) {
5671  int p = n*numPesPerNode + poff;
5672  printf(" pe %d work %8.3f\n", p, peWork[p]);
5673  }
5674  }
5675  //CkExit();
5676  }
5677 #endif
5678 
5679 #if 0
5680  int numBlocks = 0; // find total number of blocks
5681  for (level = 0; level < nlevels; level++) {
5682  numBlocks += map.blockLevel[level].nn();
5683  }
5684 
5685  // final result is arrays for blocks and gcuts, each with pe number
5686  blockAssign.resize(numBlocks);
5687  gcutAssign.resize(numGridCutoff);
5688 
5689  nodecnt.resize(numNodes);
5690 
5691  // assign blocks to nodes
5692  // the following algorithm divides as evenly as possible the
5693  // blocks across the nodes
5694  int r = numBlocks % numNodes;
5695  int q = numBlocks / numNodes;
5696  int qp = q + 1;
5697  for (n = 0; n < numNodes - r; n++) {
5698  int moffset = n * q;
5699  for (int m = 0; m < q; m++) {
5700  blockAssign[moffset + m] = n;
5701  }
5702  nodecnt[n] = q;
5703  }
5704  for ( ; n < numNodes; n++) {
5705  int moffset = (numNodes - r)*q + (n - (numNodes - r))*qp;
5706  for (int m = 0; m < qp; m++) {
5707  blockAssign[moffset + m] = n;
5708  }
5709  nodecnt[n] = qp;
5710  }
5711 #if 0
5712  if (CkMyPe() == 0) {
5713  CkPrintf("%d objects to %d nodes\n", q, numNodes-r);
5714  if (r != 0) {
5715  CkPrintf("%d objects to %d nodes\n", qp, r);
5716  }
5717  CkPrintf("%d =? %d\n", (numNodes-r)*q + r*qp, numBlocks);
5718  }
5719 #endif
5720 
5721  // assign grid cutoff objects to nodes (gcutAssign)
5722  // choose whichever of source or destination node has less work
5723  int gindex = 0; // index for grid cutoff array
5724  for (level = 0; level < nlevels; level++) { // for all levels
5726  int bni = b.ni();
5727  int bnj = b.nj();
5728  int bnk = b.nk();
5729  for (k = 0; k < bnk; k++) { // for all blocks
5730  for (j = 0; j < bnj; j++) {
5731  for (i = 0; i < bni; i++) {
5732  int isrc = blockFlatIndex(level, i, j, k);
5733  int nsrc = blockAssign[isrc]; // node block isrc is assigned
5734  int numSendAcross = b(i,j,k).sendAcross.len();
5735  ASSERT( numSendAcross == b(i,j,k).indexGridCutoff.len() );
5736  for (n = 0; n < numSendAcross; n++) {
5737  msm::BlockIndex &bn = b(i,j,k).sendAcross[n].nblock_wrap;
5738  int idest = blockFlatIndex(level, bn.n.i, bn.n.j, bn.n.k);
5739  int ndest = blockAssign[idest]; // node block idest is assigned
5740  // assign this grid cutoff work to least subscribed node
5741  if (nodecnt[nsrc] <= nodecnt[ndest]) {
5742  gcutAssign[gindex] = nsrc;
5743  nodecnt[nsrc]++;
5744  }
5745  else {
5746  gcutAssign[gindex] = ndest;
5747  nodecnt[ndest]++;
5748  }
5749  gindex++;
5750  } // end for numSendAcross
5751  }
5752  }
5753  } // end for all blocks
5754  } // end for all levels
5755 
5756  // now change the node assignments into PE assignments
5757  // use round robin assignment to PEs within each node
5758  int ppn = numPes / numNodes; // num PEs per node
5759  // reset nodecnt - this array will now store PE offset for that node
5760  for (n = 0; n < numNodes; n++) nodecnt[n] = 0;
5761  for (n = 0; n < numBlocks; n++) {
5762  int node = blockAssign[n];
5763  blockAssign[n] = node * ppn + nodecnt[node]; // PE number within node
5764  nodecnt[node]++; // increment to next PE
5765  if (nodecnt[node] >= ppn) nodecnt[node] = 0; // with wrap around
5766  }
5767  for (n = 0; n < numGridCutoff; n++) {
5768  int node = gcutAssign[n];
5769  gcutAssign[n] = node * ppn + nodecnt[node]; // PE number within node
5770  nodecnt[node]++; // increment to next PE
5771  if (nodecnt[node] >= ppn) nodecnt[node] = 0; // with wrap around
5772  }
5773 
5774  // print mapping
5775 #if 0
5776  if (CkMyPe() == 0) {
5777  for (n = 0; n < numBlocks; n++) {
5778  CkPrintf("block %d: node=%d pe=%d\n",
5779  n, blockAssign[n]/ppn, blockAssign[n]);
5780  }
5781 #if 0
5782  for (n = 0; n < numGridCutoff; n++) {
5783  CkPrintf("grid cutoff %d: node=%d pe=%d\n",
5784  n, gcutAssign[n]/ppn, gcutAssign[n]);
5785  }
5786 #endif
5787  }
5788 #endif
5789 
5790 #endif // 0
5791 
5792  } // end node aware initial assignment of chares
5793 #endif // MSM_NODE_MAPPING
5794 
5795 } // ComputeMsmMgr::initialize2()
5796 
5797 
5799  int i, j, k, n, level;
5800 
5801  if (CkMyPe() == 0) {
5802 
5803  // on PE 0, create 3D chare array of MsmBlock for each level;
5804  // broadcast this array of proxies to the rest of the group
5805  if (approx == C1HERMITE) {
5806  msmC1HermiteBlock.resize(nlevels);
5807  }
5808  else {
5809  msmBlock.resize(nlevels);
5810  }
5811  for (level = 0; level < nlevels; level++) {
5812  int ni = map.blockLevel[level].ni();
5813  int nj = map.blockLevel[level].nj();
5814  int nk = map.blockLevel[level].nk();
5815 #ifdef MSM_NODE_MAPPING
5816  CkPrintf("Using MsmBlockMap for level %d\n", level);
5817  CProxy_MsmBlockMap blockMap = CProxy_MsmBlockMap::ckNew(level);
5818  CkArrayOptions opts(ni, nj, nk);
5819  opts.setMap(blockMap);
5820  if (approx == C1HERMITE) {
5821  msmC1HermiteBlock[level] =
5822  CProxy_MsmC1HermiteBlock::ckNew(level, opts);
5823  }
5824  else {
5825  msmBlock[level] = CProxy_MsmBlock::ckNew(level, opts);
5826  }
5827 #else
5828  if (approx == C1HERMITE) {
5829  msmC1HermiteBlock[level] =
5830  CProxy_MsmC1HermiteBlock::ckNew(level, ni, nj, nk);
5831  }
5832  else {
5833  msmBlock[level] = CProxy_MsmBlock::ckNew(level, ni, nj, nk);
5834  }
5835 #endif
5836 #ifdef DEBUG_MSM_VERBOSE
5837  printf("Create MsmBlock[%d] 3D chare array ( %d x %d x %d )\n",
5838  level, ni, nj, nk);
5839 #endif
5840  char msg[128];
5841  int nijk = ni * nj * nk;
5842  sprintf(msg, "MSM grid level %d decomposed into %d block%s"
5843  " ( %d x %d x %d )\n",
5844  level, nijk, (nijk==1 ? "" : "s"), ni, nj, nk);
5845  iout << iINFO << msg;
5846  }
5847  if (approx == C1HERMITE) {
5849  msg->put(msmC1HermiteBlock);
5850  msmProxy.recvMsmC1HermiteBlockProxy(msg); // broadcast
5851  }
5852  else {
5854  msg->put(msmBlock);
5855  msmProxy.recvMsmBlockProxy(msg); // broadcast
5856  }
5857 
5858 #ifdef MSM_GRID_CUTOFF_DECOMP
5859  // on PE 0, create 1D chare array of MsmGridCutoff
5860  // broadcast this array proxy to the rest of the group
5861 #ifdef MSM_NODE_MAPPING
5862  CkPrintf("Using MsmGridCutoffMap\n");
5863  CProxy_MsmGridCutoffMap gcutMap = CProxy_MsmGridCutoffMap::ckNew();
5864  CkArrayOptions optsgcut(numGridCutoff);
5865  optsgcut.setMap(gcutMap);
5866  if (approx == C1HERMITE) {
5867  msmC1HermiteGridCutoff = CProxy_MsmC1HermiteGridCutoff::ckNew(optsgcut);
5868  }
5869  else {
5870  msmGridCutoff = CProxy_MsmGridCutoff::ckNew(optsgcut);
5871  }
5872 #else
5873  if (approx == C1HERMITE) {
5875  CProxy_MsmC1HermiteGridCutoff::ckNew(numGridCutoff);
5876  }
5877  else {
5878  msmGridCutoff = CProxy_MsmGridCutoff::ckNew(numGridCutoff);
5879  }
5880 #endif
5881  if (approx == C1HERMITE) {
5884  gcmsg->put(&msmC1HermiteGridCutoff);
5885  msmProxy.recvMsmC1HermiteGridCutoffProxy(gcmsg);
5886  }
5887  else {
5889  gcmsg->put(&msmGridCutoff);
5890  msmProxy.recvMsmGridCutoffProxy(gcmsg);
5891  }
5892 
5893  // XXX PE 0 initializes each MsmGridCutoff
5894  // one-to-many
5895  // for M length chare array, better for each PE to initialize M/P?
5896  for (level = 0; level < nlevels; level++) { // for all levels
5898  int bni = b.ni();
5899  int bnj = b.nj();
5900  int bnk = b.nk();
5901  for (k = 0; k < bnk; k++) { // for all blocks
5902  for (j = 0; j < bnj; j++) {
5903  for (i = 0; i < bni; i++) {
5904  // source for charges
5905  msm::BlockIndex bi = msm::BlockIndex(level, msm::Ivec(i,j,k));
5906  int numSendAcross = b(i,j,k).sendAcross.len();
5907  ASSERT( numSendAcross == b(i,j,k).indexGridCutoff.len() );
5908  // for this source, loop over destinations for potentials
5909  for (n = 0; n < numSendAcross; n++) {
5910  msm::BlockSend &bs = b(i,j,k).sendAcross[n];
5911  int index = b(i,j,k).indexGridCutoff[n];
5912  MsmGridCutoffInitMsg *bsmsg = new MsmGridCutoffInitMsg(bi, bs);
5913  if (approx == C1HERMITE) {
5914  msmC1HermiteGridCutoff[index].setup(bsmsg);
5915  }
5916  else {
5917  msmGridCutoff[index].setup(bsmsg);
5918  }
5919  } // traverse sendAcross, indexGridCutoff arrays
5920 
5921  }
5922  }
5923  } // end for all blocks
5924 
5925  } // end for all levels
5926 
5927  iout << iINFO << "MSM grid cutoff calculation decomposed into "
5928  << numGridCutoff << " work objects\n";
5929 #endif
5930  iout << endi;
5931  }
5932 
5933 #ifdef DEBUG_MSM_VERBOSE
5934  printf("end of initialization\n");
5935 #endif
5936 } // ComputeMsmMgr::initialize_create()
5937 
5938 
5940 {
5941  msg->get(msmBlock);
5942  delete(msg);
5943 }
5944 
5946 {
5947  msg->get(&msmGridCutoff);
5948  delete(msg);
5949 }
5950 
5953  )
5954 {
5955  msg->get(msmC1HermiteBlock);
5956  delete(msg);
5957 }
5958 
5961  )
5962 {
5963  msg->get(&msmC1HermiteGridCutoff);
5964  delete(msg);
5965 }
5966 
5967 void ComputeMsmMgr::update(CkQdMsg *msg)
5968 {
5969 #ifdef DEBUG_MSM_VERBOSE
5970  printf("ComputeMsmMgr: update() PE %d\n", CkMyPe());
5971 #endif
5972  delete msg;
5973 
5974  // have to setup sections AFTER initialization is finished
5975  if (CkMyPe() == 0) {
5976  for (int level = 0; level < nlevels; level++) {
5977  if (approx == C1HERMITE) {
5978  msmC1HermiteBlock[level].setupSections();
5979  }
5980  else {
5981  msmBlock[level].setupSections();
5982  }
5983  }
5984  }
5985 
5986  // XXX how do update for constant pressure simulation?
5987 }
5988 
5989 
5991 {
5992 #ifdef DEBUG_MSM_VERBOSE
5993  printf("ComputeMsmMgr: compute() PE=%d\n", CkMyPe());
5994 #endif
5995 
5996  int n;
5997  for (n = 0; n < patchIDList.len(); n++) {
5998  int patchID = patchIDList[n];
5999  if (patchPtr[patchID] == NULL) {
6000  char msg[100];
6001  snprintf(msg, sizeof(msg),
6002  "Expected MSM data for patch %d does not exist on PE %d",
6003  patchID, CkMyPe());
6004  NAMD_die(msg);
6005  }
6006  if (approx == C1HERMITE) {
6007  patchPtr[patchID]->anterpolationC1Hermite();
6008  }
6009  else {
6010  patchPtr[patchID]->anterpolation();
6011  }
6012  // all else should follow from here
6013  }
6014  return;
6015 }
6016 
6017 
6019 {
6020  int pid; // receive patch ID
6021  int pseq;
6022  if (approx == C1HERMITE) {
6023  gm->get(subgrid_c1hermite, pid, pseq);
6024  }
6025  else {
6026  gm->get(subgrid, pid, pseq);
6027  }
6028  delete gm;
6029  if (patchPtr[pid] == NULL) {
6030  char msg[100];
6031  snprintf(msg, sizeof(msg), "Expecting patch %d to exist on PE %d",
6032  pid, CkMyPe());
6033  NAMD_die(msg);
6034  }
6035  if (approx == C1HERMITE) {
6036  patchPtr[pid]->addPotentialC1Hermite(subgrid_c1hermite);
6037  }
6038  else {
6039  patchPtr[pid]->addPotential(subgrid);
6040  }
6041 }
6042 
6043 
6045 {
6047 }
6048 
6049 
6051 //
6052 // ComputeMsm
6053 // MSM compute objects, starts and finishes calculation;
6054 // there is up to one compute object per PE
6055 //
6056 
6058 {
6059  CProxy_ComputeMsmMgr::ckLocalBranch(
6060  CkpvAccess(BOCclass_group).computeMsmMgr)->setCompute(this);
6061  SimParameters *simParams = Node::Object()->simParameters;
6062  qscaling = sqrtf(COULOMB / simParams->dielectric);
6064 #ifdef DEBUG_MSM_VERBOSE
6065  printf("ComputeMsm: (constructor) PE=%d\n", CkMyPe());
6066 #endif
6067 }
6068 
6070 {
6071  // free memory
6072 #ifdef DEBUG_MSM_VERBOSE
6073  printf("ComputeMsm: (destructor) PE=%d\n", CkMyPe());
6074 #endif
6075 }
6076 
6078 {
6079  // for each patch do stuff
6080 #ifdef DEBUG_MSM_VERBOSE
6081  printf("ComputeMsm: doWork() PE=%d\n", CkMyPe());
6082 #endif
6083 
6084 #if 0
6085 #ifdef MSM_TIMING
6086  myMgr->initTiming();
6087 #endif
6088 #ifdef MSM_PROFILING
6089  myMgr->initProfiling();
6090 #endif
6091 #endif
6092 
6093  // patchList is inherited from ComputeHomePatches
6095  numLocalPatches = patchList.size();
6096  cntLocalPatches = 0;
6097  ASSERT(cntLocalPatches < numLocalPatches);
6098 
6099 #ifdef DEBUG_MSM_VERBOSE
6100  printf("patchList size = %d\n", patchList.size() );
6101 #endif
6102 
6103  // Skip computations if nothing to do.
6104  if ( ! patchList[0].p->flags.doFullElectrostatics ) {
6105  for (ap = ap.begin(); ap != ap.end(); ap++) {
6106  CompAtom *x = (*ap).positionBox->open();
6107  Results *r = (*ap).forceBox->open();
6108  (*ap).positionBox->close(&x);
6109  (*ap).forceBox->close(&r);
6110  }
6111  reduction->submit();
6112  return;
6113  }
6114  msm::Map& map = myMgr->mapData();
6115  // This is the patchPtr array for MSM; any local patch will be set up
6116  // with a non-NULL pointer to its supporting data structure.
6117  msm::PatchPtrArray& patchPtr = myMgr->patchPtrArray();
6118  // also store just a list of IDs for the local patches
6119  msm::Array<int> patchIDList(numLocalPatches);
6120  patchIDList.resize(0); // to use append on pre-allocated array buffer
6121  int cnt=0, n;
6122  for (ap = ap.begin(); ap != ap.end(); ap++) {
6123  CompAtom *x = (*ap).positionBox->open();
6124  CompAtomExt *xExt = (*ap).p->getCompAtomExtInfo();
6125  if ( patchList[0].p->flags.doMolly ) {
6126  (*ap).positionBox->close(&x);
6127  x = (*ap).avgPositionBox->open();
6128  }
6129  int numAtoms = (*ap).p->getNumAtoms();
6130  int patchID = (*ap).patchID;
6131  patchIDList.append(patchID);
6132  if (patchPtr[patchID] == NULL) {
6133  // create PatchData if it doesn't exist for this patchID
6134  patchPtr[patchID] = new msm::PatchData(myMgr, patchID);
6135 #ifdef DEBUG_MSM_VERBOSE
6136  printf("Creating new PatchData: patchID=%d PE=%d\n",
6137  patchID, CkMyPe());
6138 #endif
6139  }
6140  msm::PatchData& patch = *(patchPtr[patchID]);
6141  patch.init(numAtoms);
6142  msm::AtomCoordArray& coord = patch.coordArray();
6143  ASSERT(coord.len() == numAtoms);
6144  for (n = 0; n < numAtoms; n++) {
6145  coord[n].position = x[n].position;
6146  coord[n].charge = qscaling * x[n].charge;
6147  coord[n].id = xExt[n].id;
6148  }
6149  if ( patchList[0].p->flags.doMolly ) {
6150  (*ap).avgPositionBox->close(&x);
6151  }
6152  else {
6153  (*ap).positionBox->close(&x);
6154  }
6155  patch.sequence = sequence();
6156  }
6157 
6158  myMgr->compute(patchIDList);
6159 }
6160 
6162 {
6163  if (++cntLocalPatches != numLocalPatches) return;
6164 
6165  // NAMD patches
6167 #ifdef DEBUG_MSM
6168  for (ap = ap.begin(); ap != ap.end(); ap++) {
6169  int patchID = (*ap).patchID;
6170  ASSERT(myMgr->patchPtrArray()[patchID]->cntRecvs ==
6171  myMgr->mapData().patchList[patchID].numRecvs);
6172  }
6173 #endif
6174 
6175  // get results from ComputeMsmMgr
6176  msm::PatchPtrArray& patchPtr = myMgr->patchPtrArray();
6177 
6178 #ifdef DEBUG_MSM_VERBOSE
6179  printf("ComputeMsm: saveResults() PE=%d\n", CkMyPe());
6180 #endif
6181  // store force updates
6182  // submit reductions
6183 
6184  // add in forces
6185  int cnt=0, n;
6186  for (ap = ap.begin(); ap != ap.end(); ap++) {
6187  Results *r = (*ap).forceBox->open();
6188  Force *f = r->f[Results::slow];
6189  int numAtoms = (*ap).p->getNumAtoms();
6190  int patchID = (*ap).patchID;
6191  if (patchPtr[patchID] == NULL) {
6192  char msg[100];
6193  snprintf(msg, sizeof(msg), "Expecting patch %d to exist on PE %d",
6194  patchID, CkMyPe());
6195  NAMD_die(msg);
6196  }
6197  msm::PatchData& patch = *(patchPtr[patchID]);
6198  ASSERT(numAtoms == patch.force.len() );
6199  for (n = 0; n < numAtoms; n++) {
6200  f[n] += patch.force[n];
6201  }
6202  (*ap).forceBox->close(&r);
6203 
6204  reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += patch.energy;
6205 // reduction->item(REDUCTION_VIRIAL_SLOW_XX) += patch.virial[0][0];
6206 // reduction->item(REDUCTION_VIRIAL_SLOW_XY) += patch.virial[0][1];
6207 // reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += patch.virial[0][2];
6208 // reduction->item(REDUCTION_VIRIAL_SLOW_YX) += patch.virial[1][0];
6209 // reduction->item(REDUCTION_VIRIAL_SLOW_YY) += patch.virial[1][1];
6210 // reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += patch.virial[1][2];
6211 // reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += patch.virial[2][0];
6212 // reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += patch.virial[2][1];
6213 // reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += patch.virial[2][2];
6214  Float *virial = myMgr->virial;
6215  reduction->item(REDUCTION_VIRIAL_SLOW_XX) += virial[ComputeMsmMgr::VXX];
6216  reduction->item(REDUCTION_VIRIAL_SLOW_XY) += virial[ComputeMsmMgr::VXY];
6217  reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += virial[ComputeMsmMgr::VXZ];
6218  reduction->item(REDUCTION_VIRIAL_SLOW_YX) += virial[ComputeMsmMgr::VXY];
6219  reduction->item(REDUCTION_VIRIAL_SLOW_YY) += virial[ComputeMsmMgr::VYY];
6220  reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += virial[ComputeMsmMgr::VYZ];
6221  reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += virial[ComputeMsmMgr::VXZ];
6222  reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += virial[ComputeMsmMgr::VYZ];
6223  reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += virial[ComputeMsmMgr::VZZ];
6224  }
6225  reduction->submit();
6226 }
6227 
6228 // method definitions for PatchData
6229 namespace msm {
6230 
6232  mgr = pmgr;
6233  map = &(mgr->mapData());
6234  patchID = pid;
6235  //PatchMap *pm = PatchMap::Object();
6236  pd = &(map->patchList[pid]);
6240  subgrid_c1hermite.resize(map->bsx[0] * map->bsy[0] * map->bsz[0]);
6241  }
6242  else {
6243  qh.init(pd->nrange);
6244  eh.init(pd->nrange);
6245  subgrid.resize(map->bsx[0] * map->bsy[0] * map->bsz[0]);
6246  }
6247 #ifdef MSM_TIMING
6248  mgr->addTiming();
6249 #endif
6250  }
6251 
6252  void PatchData::init(int natoms) {
6253  coord.resize(natoms);
6254  force.resize(natoms);
6255  cntRecvs = 0;
6256  energy = 0;
6257  //memset(virial, 0, 3*3*sizeof(BigReal));
6258  for (int i = 0; i < natoms; i++) force[i] = 0;
6260  qh_c1hermite.reset(0);
6261  eh_c1hermite.reset(0);
6262  }
6263  else {
6264  qh.reset(0);
6265  eh.reset(0);
6266  }
6267  }
6268 
6270 #ifdef DEBUG_MSM_GRID
6271  printf("patchID %d: anterpolation\n", patchID);
6272 #endif
6273 
6274 #ifdef MSM_TIMING
6275  double startTime, stopTime;
6276  startTime = CkWallTimer();
6277 #endif
6278 #ifndef MSM_COMM_ONLY
6282 
6283  const Double rs_edge = Double( mgr->s_edge );
6284  const int s_size = ComputeMsmMgr::PolyDegree[mgr->approx] + 1;
6285 
6286  const int ia = qh.ia();
6287  const int ib = qh.ib();
6288  const int ja = qh.ja();
6289  const int jb = qh.jb();
6290  const int ka = qh.ka();
6291  const int kb = qh.kb();
6292  const int ni = qh.ni();
6293  const int nj = qh.nj();
6294  Float *qhbuffer = qh.data().buffer();
6295 
6296  // loop over atoms
6297  for (int n = 0; n < coord.len(); n++) {
6298  Float q = coord[n].charge;
6299  if (0==q) continue;
6300 
6301  ScaledPosition s = mgr->lattice.scale(coord[n].position);
6302 
6303  BigReal sx_hx = (s.x - mgr->sglower.x) * mgr->shx_1;
6304  BigReal sy_hy = (s.y - mgr->sglower.y) * mgr->shy_1;
6305  BigReal sz_hz = (s.z - mgr->sglower.z) * mgr->shz_1;
6306 
6307  BigReal xlo = floor(sx_hx) - rs_edge;
6308  BigReal ylo = floor(sy_hy) - rs_edge;
6309  BigReal zlo = floor(sz_hz) - rs_edge;
6310 
6311  // calculate Phi stencils along each dimension
6312  Float xdelta = Float(sx_hx - xlo);
6313  mgr->stencil_1d(xphi, xdelta);
6314  Float ydelta = Float(sy_hy - ylo);
6315  mgr->stencil_1d(yphi, ydelta);
6316  Float zdelta = Float(sz_hz - zlo);
6317  mgr->stencil_1d(zphi, zdelta);
6318 
6319  int ilo = int(xlo);
6320  int jlo = int(ylo);
6321  int klo = int(zlo);
6322 
6323  // test to see if stencil is within edges of grid
6324  int iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
6325  ja <= jlo && (jlo+(s_size-1)) <= jb &&
6326  ka <= klo && (klo+(s_size-1)) <= kb );
6327 
6328  if ( ! iswithin ) {
6329 #if 0
6330  printf("PE %d: atom %d: pos= %g %g %g patchID=%d\n",
6331  CkMyPe(), coord[n].id,
6332  coord[n].position.x, coord[n].position.y, coord[n].position.z,
6333  patchID);
6334  printf("PE %d: atom subgrid [%d..%d] x [%d..%d] x [%d..%d]\n",
6335  CkMyPe(),
6336  ilo, ilo+s_size-1, jlo, jlo+s_size-1, klo, klo+s_size-1);
6337  printf("PE %d: patch grid [%d..%d] x [%d..%d] x [%d..%d]\n",
6338  CkMyPe(),
6339  ia, ib, ja, jb, ka, kb);
6340 #endif
6341  char msg[100];
6342  snprintf(msg, sizeof(msg), "Atom %d is outside of the MSM grid.",
6343  coord[n].id);
6344  NAMD_die(msg);
6345  }
6346 
6347  // determine charge on cube of grid points around atom
6348  for (int k = 0; k < s_size; k++) {
6349  int koff = ((k+klo) - ka) * nj;
6350  Float ck = zphi[k] * q;
6351  for (int j = 0; j < s_size; j++) {
6352  int jkoff = (koff + (j+jlo) - ja) * ni;
6353  Float cjk = yphi[j] * ck;
6354  for (int i = 0; i < s_size; i++) {
6355  int ijkoff = jkoff + (i+ilo) - ia;
6356  qhbuffer[ijkoff] += xphi[i] * cjk;
6357  }
6358  }
6359  }
6360 
6361  } // end loop over atoms
6362 #endif // !MSM_COMM_ONLY
6363 #ifdef MSM_TIMING
6364  stopTime = CkWallTimer();
6365  mgr->msmTiming[MsmTimer::ANTERP] += stopTime - startTime;
6366 #endif
6367 
6368  sendCharge();
6369  }
6370 
6372 #ifdef MSM_TIMING
6373  double startTime, stopTime;
6374 #endif
6375  int priority = 1;
6376  // buffer portions of grid to send to Blocks on level 0
6377  // allocate the largest buffer space we'll need
6378  //Grid<BigReal> subgrid;
6379  //subgrid.resize(map->bsx[0] * map->bsy[0] * map->bsz[0]);
6380  for (int n = 0; n < pd->send.len(); n++) {
6381 #ifdef MSM_TIMING
6382  startTime = CkWallTimer();
6383 #endif
6384  // initialize the proper subgrid indexing range
6385  subgrid.init( pd->send[n].nrange );
6386  // extract the values from the larger grid into the subgrid
6387  qh.extract(subgrid);
6388  // translate the subgrid indexing range to match the MSM block
6389  subgrid.updateLower( pd->send[n].nrange_wrap.lower() );
6390  // add the subgrid charges into the block
6391  BlockIndex& bindex = pd->send[n].nblock_wrap;
6392  // place subgrid into message
6393  int msgsz = subgrid.data().len() * sizeof(Float);
6394  GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
6395  SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
6396  gm->put(subgrid, bindex.level, sequence);
6397 #ifdef MSM_TIMING
6398  stopTime = CkWallTimer();
6399  mgr->msmTiming[MsmTimer::COMM] += stopTime - startTime;
6400 #endif
6401  mgr->msmBlock[bindex.level](
6402  bindex.n.i, bindex.n.j, bindex.n.k).addCharge(gm);
6403  }
6404  }
6405 
6407 #ifdef MSM_TIMING
6408  double startTime, stopTime;
6409  startTime = CkWallTimer();
6410 #endif
6411  eh += epart;
6412 #ifdef MSM_TIMING
6413  stopTime = CkWallTimer();
6414  mgr->msmTiming[MsmTimer::COMM] += stopTime - startTime;
6415 #endif
6416  if (++cntRecvs == pd->numRecvs) {
6417  interpolation();
6418  }
6419  }
6420 
6422 #ifdef DEBUG_MSM_GRID
6423  printf("patchID %d: interpolation\n", patchID);
6424 #endif
6425 
6426 #ifdef MSM_TIMING
6427  double startTime, stopTime;
6428  startTime = CkWallTimer();
6429 #endif
6430 #ifndef MSM_COMM_ONLY
6431  BigReal energy_self = 0;
6432 
6439 
6440  const Double rs_edge = Double( mgr->s_edge );
6441  const int s_size = ComputeMsmMgr::PolyDegree[mgr->approx] + 1;
6442 
6443  const Float hx_1 = Float(mgr->hxlen_1); // real space inverse grid spacing
6444  const Float hy_1 = Float(mgr->hylen_1);
6445  const Float hz_1 = Float(mgr->hzlen_1);
6446 
6447  const int ia = eh.ia();
6448  const int ib = eh.ib();
6449  const int ja = eh.ja();
6450  const int jb = eh.jb();
6451  const int ka = eh.ka();
6452  const int kb = eh.kb();
6453  const int ni = eh.ni();
6454  const int nj = eh.nj();
6455  Float *ehbuffer = eh.data().buffer();
6456 
6457  // loop over atoms
6458  for (int n = 0; n < coord.len(); n++) {
6459  Float q = coord[n].charge;
6460  if (0==q) continue;
6461 
6462  ScaledPosition s = mgr->lattice.scale(coord[n].position);
6463 
6464  BigReal sx_hx = (s.x - mgr->sglower.x) * mgr->shx_1;
6465  BigReal sy_hy = (s.y - mgr->sglower.y) * mgr->shy_1;
6466  BigReal sz_hz = (s.z - mgr->sglower.z) * mgr->shz_1;
6467 
6468  BigReal xlo = floor(sx_hx) - rs_edge;
6469  BigReal ylo = floor(sy_hy) - rs_edge;
6470  BigReal zlo = floor(sz_hz) - rs_edge;
6471 
6472  // calculate Phi stencils along each dimension
6473  Float xdelta = Float(sx_hx - xlo);
6474  mgr->d_stencil_1d(dxphi, xphi, xdelta, hx_1);
6475  Float ydelta = Float(sy_hy - ylo);
6476  mgr->d_stencil_1d(dyphi, yphi, ydelta, hy_1);
6477  Float zdelta = Float(sz_hz - zlo);
6478  mgr->d_stencil_1d(dzphi, zphi, zdelta, hz_1);
6479 
6480  int ilo = int(xlo);
6481  int jlo = int(ylo);
6482  int klo = int(zlo);
6483 
6484 #if 0
6485  // XXX don't need to test twice!
6486 
6487  // test to see if stencil is within edges of grid
6488  int iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
6489  ja <= jlo && (jlo+(s_size-1)) <= jb &&
6490  ka <= klo && (klo+(s_size-1)) <= kb );
6491 
6492  if ( ! iswithin ) {
6493  char msg[100];
6494  snprintf(msg, sizeof(msg), "Atom %d is outside of the MSM grid.",
6495  coord[n].id);
6496  NAMD_die(msg);
6497  }
6498 #endif
6499 
6500  // determine force on atom from surrounding potential grid points
6501  //Force f = 0;
6502  //BigReal e = 0;
6503  Float fx=0, fy=0, fz=0, e=0;
6504  for (int k = 0; k < s_size; k++) {
6505  int koff = ((k+klo) - ka) * nj;
6506  for (int j = 0; j < s_size; j++) {
6507  int jkoff = (koff + (j+jlo) - ja) * ni;
6508  Float cx = yphi[j] * zphi[k];
6509  Float cy = dyphi[j] * zphi[k];
6510  Float cz = yphi[j] * dzphi[k];
6511  for (int i = 0; i < s_size; i++) {
6512  int ijkoff = jkoff + (i+ilo) - ia;
6513  Float ec = ehbuffer[ijkoff];
6514  fx += ec * dxphi[i] * cx;
6515  fy += ec * xphi[i] * cy;
6516  fz += ec * xphi[i] * cz;
6517  e += ec * xphi[i] * cx;
6518  }
6519  }
6520  }
6521 
6522 #if 0
6523  force[n].x -= q * (mgr->srx_x * fx + mgr->srx_y * fy + mgr->srx_z * fz);
6524  force[n].y -= q * (mgr->sry_x * fx + mgr->sry_y * fy + mgr->sry_z * fz);
6525  force[n].z -= q * (mgr->srz_x * fx + mgr->srz_y * fy + mgr->srz_z * fz);
6526 #endif
6527  force[n].x -= q * fx;
6528  force[n].y -= q * fy;
6529  force[n].z -= q * fz;
6530  energy += q * e;
6531  energy_self += q * q;
6532 
6533  } // end loop over atoms
6534 
6535  energy_self *= mgr->gzero;
6536  energy -= energy_self;
6537  energy *= 0.5;
6538 #endif // !MSM_COMM_ONLY
6539 #ifdef MSM_TIMING
6540  stopTime = CkWallTimer();
6541  mgr->msmTiming[MsmTimer::INTERP] += stopTime - startTime;
6542  mgr->doneTiming();
6543 #endif
6544  mgr->doneCompute();
6545  }
6546 
6548 #ifdef DEBUG_MSM_GRID
6549  printf("patchID %d: anterpolationC1Hermite\n", patchID);
6550 #endif
6551 
6552 #ifdef MSM_TIMING
6553  double startTime, stopTime;
6554  startTime = CkWallTimer();
6555 #endif
6556 #ifndef MSM_COMM_ONLY
6557  Float xphi[2], xpsi[2];
6558  Float yphi[2], ypsi[2];
6559  Float zphi[2], zpsi[2];
6560 
6561  const Float hx = Float(mgr->hxlen); // real space grid spacing
6562  const Float hy = Float(mgr->hylen);
6563  const Float hz = Float(mgr->hzlen);
6564 
6565  const int ia = qh_c1hermite.ia();
6566  const int ib = qh_c1hermite.ib();
6567  const int ja = qh_c1hermite.ja();
6568  const int jb = qh_c1hermite.jb();
6569  const int ka = qh_c1hermite.ka();
6570  const int kb = qh_c1hermite.kb();
6571  const int ni = qh_c1hermite.ni();
6572  const int nj = qh_c1hermite.nj();
6573  C1Vector *qhbuffer = qh_c1hermite.data().buffer();
6574 
6575  // loop over atoms
6576  for (int n = 0; n < coord.len(); n++) {
6577  Float q = coord[n].charge;
6578  if (0==q) continue;
6579 
6580  ScaledPosition s = mgr->lattice.scale(coord[n].position);
6581 
6582  BigReal sx_hx = (s.x - mgr->sglower.x) * mgr->shx_1;
6583  BigReal sy_hy = (s.y - mgr->sglower.y) * mgr->shy_1;
6584  BigReal sz_hz = (s.z - mgr->sglower.z) * mgr->shz_1;
6585 
6586  BigReal xlo = floor(sx_hx);
6587  BigReal ylo = floor(sy_hy);
6588  BigReal zlo = floor(sz_hz);
6589 
6590  // calculate Phi stencils along each dimension
6591  Float xdelta = Float(sx_hx - xlo);
6592  mgr->stencil_1d_c1hermite(xphi, xpsi, xdelta, hx);
6593  Float ydelta = Float(sy_hy - ylo);
6594  mgr->stencil_1d_c1hermite(yphi, ypsi, ydelta, hy);
6595  Float zdelta = Float(sz_hz - zlo);
6596  mgr->stencil_1d_c1hermite(zphi, zpsi, zdelta, hz);
6597 
6598  int ilo = int(xlo);
6599  int jlo = int(ylo);
6600  int klo = int(zlo);
6601 
6602  // test to see if stencil is within edges of grid
6603  int iswithin = ( ia <= ilo && ilo < ib &&
6604  ja <= jlo && jlo < jb &&
6605  ka <= klo && klo < kb );
6606 
6607  if ( ! iswithin ) {
6608  char msg[100];
6609  snprintf(msg, sizeof(msg), "Atom %d is outside of the MSM grid.",
6610  coord[n].id);
6611  NAMD_die(msg);
6612  }
6613 
6614  // determine charge on cube of grid points around atom
6615  for (int k = 0; k < 2; k++) {
6616  int koff = ((k+klo) - ka) * nj;
6617  Float c_zphi = zphi[k] * q;
6618  Float c_zpsi = zpsi[k] * q;
6619  for (int j = 0; j < 2; j++) {
6620  int jkoff = (koff + (j+jlo) - ja) * ni;
6621  Float c_yphi_zphi = yphi[j] * c_zphi;
6622  Float c_ypsi_zphi = ypsi[j] * c_zphi;
6623  Float c_yphi_zpsi = yphi[j] * c_zpsi;
6624  Float c_ypsi_zpsi = ypsi[j] * c_zpsi;
6625  for (int i = 0; i < 2; i++) {
6626  int ijkoff = jkoff + (i+ilo) - ia;
6627  qhbuffer[ijkoff].velem[D000] += xphi[i] * c_yphi_zphi;
6628  qhbuffer[ijkoff].velem[D100] += xpsi[i] * c_yphi_zphi;
6629  qhbuffer[ijkoff].velem[D010] += xphi[i] * c_ypsi_zphi;
6630  qhbuffer[ijkoff].velem[D001] += xphi[i] * c_yphi_zpsi;
6631  qhbuffer[ijkoff].velem[D110] += xpsi[i] * c_ypsi_zphi;
6632  qhbuffer[ijkoff].velem[D101] += xpsi[i] * c_yphi_zpsi;
6633  qhbuffer[ijkoff].velem[D011] += xphi[i] * c_ypsi_zpsi;
6634  qhbuffer[ijkoff].velem[D111] += xpsi[i] * c_ypsi_zpsi;
6635  }
6636  }
6637  }
6638 
6639  } // end loop over atoms
6640 
6641 #endif // !MSM_COMM_ONLY
6642 #ifdef MSM_TIMING
6643  stopTime = CkWallTimer();
6644  mgr->msmTiming[MsmTimer::ANTERP] += stopTime - startTime;
6645 #endif
6646 
6648  }
6649 
6651 #ifdef MSM_TIMING
6652  double startTime, stopTime;
6653 #endif
6654  int priority = 1;
6655  // buffer portions of grid to send to Blocks on level 0
6656  for (int n = 0; n < pd->send.len(); n++) {
6657 #ifdef MSM_TIMING
6658  startTime = CkWallTimer();
6659 #endif
6660  // initialize the proper subgrid indexing range
6661  subgrid_c1hermite.init( pd->send[n].nrange );
6662  // extract the values from the larger grid into the subgrid
6664  // translate the subgrid indexing range to match the MSM block
6665  subgrid_c1hermite.updateLower( pd->send[n].nrange_wrap.lower() );
6666  // add the subgrid charges into the block
6667  BlockIndex& bindex = pd->send[n].nblock_wrap;
6668  // place subgrid into message
6669  int msgsz = subgrid_c1hermite.data().len() * sizeof(C1Vector);
6670  GridMsg *gm = new(msgsz, sizeof(int)) GridMsg;
6671  SET_PRIORITY(gm, sequence, MSM_PRIORITY + priority);
6672  gm->put(subgrid_c1hermite, bindex.level, sequence);
6673 #ifdef MSM_TIMING
6674  stopTime = CkWallTimer();
6675  mgr->msmTiming[MsmTimer::COMM] += stopTime - startTime;
6676 #endif
6677  mgr->msmC1HermiteBlock[bindex.level](
6678  bindex.n.i, bindex.n.j, bindex.n.k).addCharge(gm);
6679  }
6680  }
6681 
6683 #ifdef MSM_TIMING
6684  double startTime, stopTime;
6685  startTime = CkWallTimer();
6686 #endif
6687  eh_c1hermite += epart;
6688 #ifdef MSM_TIMING
6689  stopTime = CkWallTimer();
6690  mgr->msmTiming[MsmTimer::COMM] += stopTime - startTime;
6691 #endif
6692  if (++cntRecvs == pd->numRecvs) {
6694  }
6695  }
6696 
6698 #ifdef DEBUG_MSM_GRID
6699  printf("patchID %d: interpolation\n", patchID);
6700 #endif
6701 
6702 #ifdef MSM_TIMING
6703  double startTime, stopTime;
6704  startTime = CkWallTimer();
6705 #endif
6706 #ifndef MSM_COMM_ONLY
6707  BigReal energy_self = 0;
6708 
6709  Float xphi[2], dxphi[2], xpsi[2], dxpsi[2];
6710  Float yphi[2], dyphi[2], ypsi[2], dypsi[2];
6711  Float zphi[2], dzphi[2], zpsi[2], dzpsi[2];
6712 
6713  const Float hx = Float(mgr->hxlen); // real space grid spacing
6714  const Float hy = Float(mgr->hylen);
6715  const Float hz = Float(mgr->hzlen);
6716 
6717  const Float hx_1 = Float(mgr->hxlen_1); // real space inverse grid spacing
6718  const Float hy_1 = Float(mgr->hylen_1);
6719  const Float hz_1 = Float(mgr->hzlen_1);
6720 
6721  const int ia = eh_c1hermite.ia();
6722  const int ib = eh_c1hermite.ib();
6723  const int ja = eh_c1hermite.ja();
6724  const int jb = eh_c1hermite.jb();
6725  const int ka = eh_c1hermite.ka();
6726  const int kb = eh_c1hermite.kb();
6727  const int ni = eh_c1hermite.ni();
6728  const int nj = eh_c1hermite.nj();
6729  C1Vector *ehbuffer = eh_c1hermite.data().buffer();
6730 
6731  // loop over atoms
6732  for (int n = 0; n < coord.len(); n++) {
6733  Float q = coord[n].charge;
6734  if (0==q) continue;
6735 
6736  ScaledPosition s = mgr->lattice.scale(coord[n].position);
6737 
6738  BigReal sx_hx = (s.x - mgr->sglower.x) * mgr->shx_1;
6739  BigReal sy_hy = (s.y - mgr->sglower.y) * mgr->shy_1;
6740  BigReal sz_hz = (s.z - mgr->sglower.z) * mgr->shz_1;
6741 
6742  BigReal xlo = floor(sx_hx);
6743  BigReal ylo = floor(sy_hy);
6744  BigReal zlo = floor(sz_hz);
6745 
6746  // calculate Phi stencils along each dimension
6747  Float xdelta = Float(sx_hx - xlo);
6748  mgr->d_stencil_1d_c1hermite(dxphi, xphi, dxpsi, xpsi,
6749  xdelta, hx, hx_1);
6750  Float ydelta = Float(sy_hy - ylo);
6751  mgr->d_stencil_1d_c1hermite(dyphi, yphi, dypsi, ypsi,
6752  ydelta, hy, hy_1);
6753  Float zdelta = Float(sz_hz - zlo);
6754  mgr->d_stencil_1d_c1hermite(dzphi, zphi, dzpsi, zpsi,
6755  zdelta, hz, hz_1);
6756 
6757  int ilo = int(xlo);
6758  int jlo = int(ylo);
6759  int klo = int(zlo);
6760 
6761 #if 0
6762  // XXX don't need to test twice!
6763 
6764  // test to see if stencil is within edges of grid
6765  int iswithin = ( ia <= ilo && ilo < ib &&
6766  ja <= jlo && jlo < jb &&
6767  ka <= klo && klo < kb );
6768 
6769  if ( ! iswithin ) {
6770  char msg[100];
6771  snprintf(msg, sizeof(msg), "Atom %d is outside of the MSM grid.",
6772  coord[n].id);
6773  NAMD_die(msg);
6774  }
6775 #endif
6776 
6777  // determine force on atom from surrounding potential grid points
6778  Float fx=0, fy=0, fz=0, e=0;
6779  for (int k = 0; k < 2; k++) {
6780  int koff = ((k+klo) - ka) * nj;
6781  for (int j = 0; j < 2; j++) {
6782  int jkoff = (koff + (j+jlo) - ja) * ni;
6783  Float c_yphi_zphi = yphi[j] * zphi[k];
6784  Float c_ypsi_zphi = ypsi[j] * zphi[k];
6785  Float c_yphi_zpsi = yphi[j] * zpsi[k];
6786  Float c_ypsi_zpsi = ypsi[j] * zpsi[k];
6787  Float c_yphi_dzphi = yphi[j] * dzphi[k];
6788  Float c_ypsi_dzphi = ypsi[j] * dzphi[k];
6789  Float c_yphi_dzpsi = yphi[j] * dzpsi[k];
6790  Float c_ypsi_dzpsi = ypsi[j] * dzpsi[k];
6791  Float c_dyphi_zphi = dyphi[j] * zphi[k];
6792  Float c_dypsi_zphi = dypsi[j] * zphi[k];
6793  Float c_dyphi_zpsi = dyphi[j] * zpsi[k];
6794  Float c_dypsi_zpsi = dypsi[j] * zpsi[k];
6795  for (int i = 0; i < 2; i++) {
6796  int ijkoff = jkoff + (i+ilo) - ia;
6797  fx += dxphi[i] * (c_yphi_zphi * ehbuffer[ijkoff].velem[D000]
6798  + c_ypsi_zphi * ehbuffer[ijkoff].velem[D010]
6799  + c_yphi_zpsi * ehbuffer[ijkoff].velem[D001]
6800  + c_ypsi_zpsi * ehbuffer[ijkoff].velem[D011])
6801  + dxpsi[i] * (c_yphi_zphi * ehbuffer[ijkoff].velem[D100]
6802  + c_ypsi_zphi * ehbuffer[ijkoff].velem[D110]
6803  + c_yphi_zpsi * ehbuffer[ijkoff].velem[D101]
6804  + c_ypsi_zpsi * ehbuffer[ijkoff].velem[D111]);
6805  fy += xphi[i] * (c_dyphi_zphi * ehbuffer[ijkoff].velem[D000]
6806  + c_dypsi_zphi * ehbuffer[ijkoff].velem[D010]
6807  + c_dyphi_zpsi * ehbuffer[ijkoff].velem[D001]
6808  + c_dypsi_zpsi * ehbuffer[ijkoff].velem[D011])
6809  + xpsi[i] * (c_dyphi_zphi * ehbuffer[ijkoff].velem[D100]
6810  + c_dypsi_zphi * ehbuffer[ijkoff].velem[D110]
6811  + c_dyphi_zpsi * ehbuffer[ijkoff].velem[D101]
6812  + c_dypsi_zpsi * ehbuffer[ijkoff].velem[D111]);
6813  fz += xphi[i] * (c_yphi_dzphi * ehbuffer[ijkoff].velem[D000]
6814  + c_ypsi_dzphi * ehbuffer[ijkoff].velem[D010]
6815  + c_yphi_dzpsi * ehbuffer[ijkoff].velem[D001]
6816  + c_ypsi_dzpsi * ehbuffer[ijkoff].velem[D011])
6817  + xpsi[i] * (c_yphi_dzphi * ehbuffer[ijkoff].velem[D100]
6818  + c_ypsi_dzphi * ehbuffer[ijkoff].velem[D110]
6819  + c_yphi_dzpsi * ehbuffer[ijkoff].velem[D101]
6820  + c_ypsi_dzpsi * ehbuffer[ijkoff].velem[D111]);
6821  e += xphi[i] * (c_yphi_zphi * ehbuffer[ijkoff].velem[D000]
6822  + c_ypsi_zphi * ehbuffer[ijkoff].velem[D010]
6823  + c_yphi_zpsi * ehbuffer[ijkoff].velem[D001]
6824  + c_ypsi_zpsi * ehbuffer[ijkoff].velem[D011])
6825  + xpsi[i] * (c_yphi_zphi * ehbuffer[ijkoff].velem[D100]
6826  + c_ypsi_zphi * ehbuffer[ijkoff].velem[D110]
6827  + c_yphi_zpsi * ehbuffer[ijkoff].velem[D101]
6828  + c_ypsi_zpsi * ehbuffer[ijkoff].velem[D111]);
6829  }
6830  }
6831  }
6832 
6833 #if 0
6834  force[n].x -= q * (mgr->srx_x * fx + mgr->srx_y * fy + mgr->srx_z * fz);
6835  force[n].y -= q * (mgr->sry_x * fx + mgr->sry_y * fy + mgr->sry_z * fz);
6836  force[n].z -= q * (mgr->srz_x * fx + mgr->srz_y * fy + mgr->srz_z * fz);
6837 #endif
6838  force[n].x -= q * fx;
6839  force[n].y -= q * fy;
6840  force[n].z -= q * fz;
6841  energy += q * e;
6842  energy_self += q * q;
6843 
6844  } // end loop over atoms
6845 
6846  energy_self *= mgr->gzero;
6847  energy -= energy_self;
6848  energy *= 0.5;
6849 #endif // !MSM_COMM_ONLY
6850 #ifdef MSM_TIMING
6851  stopTime = CkWallTimer();
6852  mgr->msmTiming[MsmTimer::INTERP] += stopTime - startTime;
6853  mgr->doneTiming();
6854 #endif
6855  mgr->doneCompute();
6856  }
6857 
6858 } // namespace msm
6859 
6860 
6861 #include "ComputeMsmMgr.def.h"
static Node * Object()
Definition: Node.h:86
void sendUpCharge()
Definition: ComputeMsm.C:3226
char msmGridCutoffProxyData[sizeof(CProxy_MsmGridCutoff)]
Definition: ComputeMsm.C:202
Array< int > bsz
Definition: MsmMap.h:960
IndexRange clipBlockToIndexRangeFold(const BlockIndex &nb, const IndexRange &nrange) const
Definition: MsmMap.h:1080
msm::Grid< Float > ehfull
Definition: ComputeMsm.C:2201
Array< Grid< C1Matrix > > gpro_c1hermite
Definition: MsmMap.h:952
Array< PatchDiagram > patchList
Definition: MsmMap.h:955
msm::Grid< C1Vector > ehfull
Definition: ComputeMsm.C:2335
void print()
Definition: ComputeMsm.C:303
Grid< C1Vector > qh_c1hermite
Definition: ComputeMsm.C:1747
msm::Array< int > blockAssign
Definition: ComputeMsm.C:484
void reset(const T &t)
Definition: MsmMap.h:670
void resize(int n)
Definition: MsmMap.h:254
Grid< Float > eh
Definition: ComputeMsm.C:1745
float calcGcutWork(const msm::BlockSend &bs)
Definition: ComputeMsm.C:514
float calcBlockWork(const msm::BlockDiagram &b)
Definition: ComputeMsm.C:494
CkSectionInfo cookie
Definition: ComputeMsm.C:2199
int registerArray(CkArrayIndex &numElements, CkArrayID aid)
Definition: ComputeMsm.C:1675
std::ostream & iINFO(std::ostream &s)
Definition: InfoStream.C:81
msm::Map & mapData()
Definition: ComputeMsm.C:447
void setCompute(ComputeMsm *c)
Definition: ComputeMsm.C:443
int len() const
Definition: MsmMap.h:218
int procNum(int, const CkArrayIndex &idx)
Definition: ComputeMsm.C:1678
void sumReducedPotential(CkReductionMsg *msg)
Definition: ComputeMsm.C:3095
#define MSM_MAX_BLOCK_SIZE
Definition: MsmMap.h:36
Definition: MsmMap.h:187
void prolongation()
Definition: ComputeMsm.C:3131
BlockIndex nblock_wrap
Definition: MsmMap.h:869
msm::PatchPtrArray patchPtr
Definition: ComputeMsm.C:476
int sequence(void)
Definition: Compute.h:64
Grid< C1Vector > subgrid_c1hermite
Definition: ComputeMsm.C:1749
MsmC1HermiteBlock(CkMigrateMessage *m)
Definition: ComputeMsm.C:3485
void put(const msm::Array< CProxy_MsmC1HermiteBlock > &a)
Definition: ComputeMsm.C:182
Vector a_r() const
Definition: Lattice.h:268
int nlower_i
Definition: ComputeMsm.C:115
IndexRange nrange_unwrap
Definition: MsmMap.h:886
Definition: MsmMap.h:187
void addCharge(GridMsg *)
Definition: ComputeMsm.C:3597
int k
Definition: MsmMap.h:411
msm::Map * map
Definition: ComputeMsm.C:2737
int ComputeID
Definition: NamdTypes.h:183
void get(msm::Array< CProxy_MsmBlock > &a)
Definition: ComputeMsm.C:168
const Array< T > & data() const
Definition: MsmMap.h:666
msm::Grid< Vtype > ehfold
Definition: ComputeMsm.C:1800
BigReal shz
Definition: ComputeMsm.C:613
#define MSM_MAX_BLOCK_VOLUME
Definition: MsmMap.h:37
Array< int > bsy
Definition: MsmMap.h:960
static int anterpolation(NL_Msm *)
Definition: msm_longrng.c:779
int ispx
Definition: MsmMap.h:958
int gridsize_c(void) const
Definition: PatchMap.h:66
int ka() const
Definition: MsmMap.h:438
static PatchMap * Object()
Definition: PatchMap.h:27
void wrapBlockSendFold(BlockSend &bs) const
Definition: MsmMap.h:1158
int numLevels() const
Definition: ComputeMsm.C:449
BigReal max_c(int pid) const
Definition: PatchMap.h:96
int numVirialContrib
Definition: ComputeMsm.C:524
Definition: Vector.h:64
int nextent_i
Definition: ComputeMsm.C:118
BigReal min_a(int pid) const
Definition: PatchMap.h:91
SimParameters * simParameters
Definition: Node.h:178
BlockIndex nblock
Definition: MsmMap.h:867
ComputeHomePatchList patchList
Vector c_r() const
Definition: Lattice.h:270
double Double
Definition: MsmMap.h:75
int index_a(int pid) const
Definition: PatchMap.h:86
int seqnum
Definition: ComputeMsm.C:122
void addPotential(GridMsg *)
Definition: ComputeMsm.C:3748
int i
Definition: MsmMap.h:411
static const int IndexOffset[NUM_APPROX][MAX_NSTENCIL_SKIP_ZERO]
Definition: ComputeMsm.C:656
msm::Grid< Vtype > eh
Definition: ComputeMsm.C:1799
void doneCompute()
Definition: ComputeMsm.C:6044
int ispy
Definition: MsmMap.h:958
void compute(GridMsg *gmsg)
Definition: ComputeMsm.C:1904
#define COULOMB
Definition: common.h:46
BigReal & item(int i)
Definition: ReductionMgr.h:312
__device__ __forceinline__ float3 cross(const float3 v1, const float3 v2)
#define MSM_PRIORITY
Definition: Priorities.h:36
void gridCutoff()
Definition: ComputeMsm.C:3261
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
BigReal z
Definition: Vector.h:66
msm::BlockSend ehblockSend
Definition: ComputeMsm.C:1795
Position position
Definition: NamdTypes.h:53
int ia() const
Definition: MsmMap.h:434
CProxySection_MsmC1HermiteGridCutoff msmGridCutoffBroadcast
Definition: ComputeMsm.C:3463
char msmGridCutoffProxyData[sizeof(CProxy_MsmC1HermiteGridCutoff)]
Definition: ComputeMsm.C:220
ForceArray & forceArray()
Definition: ComputeMsm.C:1757
ComputeMsm * msmCompute
Definition: ComputeMsm.C:462
BlockIndex blockOfGridIndexFold(const Ivec &n, int level) const
Definition: MsmMap.h:1008
ComputeMsm(ComputeID c)
Definition: ComputeMsm.C:6057
void done(int lc[], int n)
Definition: ComputeMsm.C:325
SubmitReduction * willSubmit(int setID, int size=-1)
Definition: ReductionMgr.C:365
msm::BlockIndex qhblockIndex
Definition: ComputeMsm.C:1794
Vector sx_shx
Definition: ComputeMsm.C:615
int idnum
Definition: ComputeMsm.C:114
BigReal shy_1
Definition: ComputeMsm.C:614
ScaledPosition smin
Definition: ComputeMsm.C:593
Definition: MsmMap.h:187
void sendDownPotential()
Definition: ComputeMsm.C:3379
Vector sy_shy
Definition: ComputeMsm.C:616
BigReal gridspacing
Definition: ComputeMsm.C:595
static ReductionMgr * Object(void)
Definition: ReductionMgr.h:278
#define iout
Definition: InfoStream.h:51
void doWork()
Definition: ComputeMsm.C:6077
msm::Grid< Vtype > subgrid
Definition: ComputeMsm.C:2753
BlockIndex blockOfGridIndex(const Ivec &n, int level) const
Definition: MsmMap.h:991
int ispz
Definition: MsmMap.h:958
void get(msm::Array< CProxy_MsmC1HermiteBlock > &a)
Definition: ComputeMsm.C:192
void setup_periodic_blocksize(int &bsize, int n)
Definition: ComputeMsm.C:3932
BigReal energy
Definition: ComputeMsm.C:1750
CProxy_ComputeMsmMgr mgrProxy
Definition: ComputeMsm.C:2735
ComputeMsmMgr * mgrLocal
Definition: ComputeMsm.C:2736
BigReal length(void) const
Definition: Vector.h:169
int nbytes
Definition: ComputeMsm.C:121
void extract(Grid< T > &g)
Definition: MsmMap.h:748
#define C1INDEX(drj, dri)
Definition: MsmMap.h:191
void set(Float r)
Definition: MsmMap.h:89
void put(const CProxyElement_MsmBlock *q)
Definition: ComputeMsm.C:252
char msmBlockProxyData[maxlevels *sizeof(CProxy_MsmBlock)]
Definition: ComputeMsm.C:155
BigReal shz_1
Definition: ComputeMsm.C:614
BigReal shx
Definition: ComputeMsm.C:613
void setbounds(int pia, int pib, int pja, int pjb, int pka, int pkb)
Definition: MsmMap.h:431
void sendCharge()
Definition: ComputeMsm.C:6371
void compute(msm::Array< int > &patchIDList)
Definition: ComputeMsm.C:5990
void stencil_1d(Float phi[], Float t)
Definition: ComputeMsm.C:761
BigReal hylen_1
Definition: ComputeMsm.C:600
Ivec clipIndexToLevel(const Ivec &n, int level) const
Definition: MsmMap.h:966
void restrictionKernel()
Definition: ComputeMsm.C:2833
BigReal gridScalingFactor
Definition: ComputeMsm.C:597
Vector b_r() const
Definition: Lattice.h:269
void doneVirialContrib()
Definition: ComputeMsm.C:539
BigReal min_b(int pid) const
Definition: PatchMap.h:93
void set(int pia, int pni, int pja, int pnj, int pka, int pnk)
Definition: MsmMap.h:608
Vector sglower
Definition: ComputeMsm.C:610
msm::BlockIndex blockIndex
Definition: ComputeMsm.C:2751
Array< BlockSend > send
Definition: MsmMap.h:899
void setbounds(int pia, int pib, int pja, int pjb, int pka, int pkb)
Definition: MsmMap.h:612
CProxyElement_MsmC1HermiteBlock msmBlockElementProxy
Definition: ComputeMsm.C:2332
Array< Grid< BlockDiagram > > blockLevel
Definition: MsmMap.h:956
void addPotentialC1Hermite(const Grid< C1Vector > &epart)
Definition: ComputeMsm.C:6682
void initVirialContrib()
Definition: ComputeMsm.C:529
static int restriction(NL_Msm *, int level)
Definition: msm_longrng.c:1485
BigReal hzlen_1
Definition: ComputeMsm.C:600
Charge charge
Definition: NamdTypes.h:54
void put(const msm::Array< CProxy_MsmBlock > &a)
Definition: ComputeMsm.C:159
Ivec extent() const
Definition: MsmMap.h:445
BigReal hxlen
Definition: ComputeMsm.C:599
int j
Definition: MsmMap.h:411
MsmC1HermiteBlock(int level)
Definition: ComputeMsm.C:3466
int cntVirialContrib
Definition: ComputeMsm.C:525
void recvMsmBlockProxy(MsmBlockProxyMsg *)
Definition: ComputeMsm.C:5939
CProxyElement_MsmBlock msmBlockElementProxy
Definition: ComputeMsm.C:2198
Definition: MsmMap.h:187
ResizeArrayIter< T > end(void) const
msm::BlockIndex qhBlockIndex
Definition: ComputeMsm.C:238
void init(const IndexRange &n)
Definition: MsmMap.h:603
void addPotential(GridMsg *)
Definition: ComputeMsm.C:6018
void recvMsmC1HermiteBlockProxy(MsmC1HermiteBlockProxyMsg *)
Definition: ComputeMsm.C:5951
static const Float PhiStencil[NUM_APPROX_FORMS][MAX_NSTENCIL_SKIP_ZERO]
Definition: ComputeMsm.C:660
WorkIndex(float w, int i)
Definition: ComputeMsm.C:350
ScaledPosition smax
Definition: ComputeMsm.C:594
void prolongationKernel()
Definition: ComputeMsm.C:2991
CProxy_ComputeMsmMgr msmProxy
Definition: ComputeMsm.C:461
Array< Grid< C1Matrix > > gres_c1hermite
Definition: MsmMap.h:951
int gridsize_a(void) const
Definition: PatchMap.h:64
MsmBlockKernel(CkMigrateMessage *m)
Definition: ComputeMsm.C:2758
MsmBlockMap(CkMigrateMessage *m)
Definition: ComputeMsm.C:1674
virtual ~ComputeMsm()
Definition: ComputeMsm.C:6069
static const int Nstencil[NUM_APPROX]
Definition: ComputeMsm.C:652
msm::Grid< Float > gvsum
Definition: ComputeMsm.C:523
char msmBlockProxyData[maxlevels *sizeof(CProxy_MsmC1HermiteBlock)]
Definition: ComputeMsm.C:178
static int sign(int n)
Definition: ComputeMsm.C:452
#define ASSERT(E)
int kb() const
Definition: MsmMap.h:439
ScaledPosition smax
Definition: ComputeMsm.h:21
const msm::Grid< Mtype > * pgc
Definition: ComputeMsm.C:1801
void get(CProxyElement_MsmBlock *q)
Definition: ComputeMsm.C:259
void get(CProxy_MsmC1HermiteGridCutoff *p)
Definition: ComputeMsm.C:229
int nlower_k
Definition: ComputeMsm.C:117
int registerArray(CkArrayIndex &numElements, CkArrayID aid)
Definition: ComputeMsm.C:1705
void setup_hgrid_1d(BigReal len, BigReal &hh, int &nn, int &ia, int &ib, int isperiodic)
Definition: ComputeMsm.C:3893
int nextent_j
Definition: ComputeMsm.C:119
int nn() const
Definition: MsmMap.h:443
Float virial[VMAX]
Definition: ComputeMsm.C:527
void addCharge(GridMsg *)
Definition: ComputeMsm.C:3202
gridSize z
void anterpolation()
Definition: ComputeMsm.C:6269
Force * f[maxNumForces]
Definition: PatchTypes.h:67
int index_b(int pid) const
Definition: PatchMap.h:87
BigReal hzlen
Definition: ComputeMsm.C:599
int xloopcnt[MAX]
Definition: ComputeMsm.C:340
int jb() const
Definition: MsmMap.h:437
char * gdata
Definition: ComputeMsm.C:113
BigReal shy
Definition: ComputeMsm.C:613
IndexRange nrange
Definition: MsmMap.h:868
int nj() const
Definition: MsmMap.h:441
BigReal x
Definition: Vector.h:66
float work
Definition: ComputeMsm.C:347
IndexRange nrange
Definition: MsmMap.h:911
Array< int > bsx
Definition: MsmMap.h:960
IndexRange nrangeCutoff
Definition: MsmMap.h:912
msm::Grid< Float > subgrid
Definition: ComputeMsm.C:480
CProxySection_MsmC1HermiteGridCutoff msmGridCutoffReduction
Definition: ComputeMsm.C:3464
msm::BlockSend ehBlockSend
Definition: ComputeMsm.C:239
Array< IndexRange > gridrange
Definition: MsmMap.h:942
BigReal MSMGridSpacing
void put(const msm::Grid< T > &g, int id, int seq)
Definition: ComputeMsm.C:126
AtomCoordArray & coordArray()
Definition: ComputeMsm.C:1756
void recvMsmGridCutoffProxy(MsmGridCutoffProxyMsg *)
Definition: ComputeMsm.C:5945
void NAMD_die(const char *err_msg)
Definition: common.C:85
int operator<=(const WorkIndex &wn)
Definition: ComputeMsm.C:351
CProxySection_MsmGridCutoff msmGridCutoffBroadcast
Definition: ComputeMsm.C:3076
msm::Grid< Vtype > qh
Definition: ComputeMsm.C:1798
MsmBlockMap(int lvl)
Definition: ComputeMsm.C:1664
void initialize_create()
Definition: ComputeMsm.C:5798
CProxy_MsmGridCutoff msmGridCutoff
Definition: ComputeMsm.C:467
void wrapBlockIndex(BlockIndex &bn) const
Definition: MsmMap.h:1214
void get(CProxy_MsmGridCutoff *p)
Definition: ComputeMsm.C:210
Definition: MsmMap.h:187
int nlower_j
Definition: ComputeMsm.C:116
IndexRange clipBlockToIndexRange(const BlockIndex &nb, const IndexRange &nrange) const
Definition: MsmMap.h:1053
ForceArray force
Definition: ComputeMsm.C:1743
void setupWeights(const msm::Grid< Mtype > *ptrgc, const msm::Grid< Mtype > *ptrgvc)
Definition: ComputeMsm.C:1895
void addPotential(const Grid< Float > &epart)
Definition: ComputeMsm.C:6406
CProxy_MsmC1HermiteGridCutoff msmC1HermiteGridCutoff
Definition: ComputeMsm.C:468
float Float
Definition: MsmMap.h:74
BigReal max_b(int pid) const
Definition: PatchMap.h:94
int blockFlatIndex(int level, int i, int j, int k)
Definition: ComputeMsm.C:487
static void ndsplitting(BigReal pg[], BigReal s, int n, int _split)
Definition: ComputeMsm.C:1266
int index_c(int pid) const
Definition: PatchMap.h:88
msm::Grid< Vtype > eh
Definition: ComputeMsm.C:2740
void updateLower(const Ivec &n)
Definition: MsmMap.h:677
PatchData(ComputeMsmMgr *pmgr, int pid)
Definition: ComputeMsm.C:6231
Float melem[C1_MATRIX_SIZE]
Definition: MsmMap.h:111
static int interpolation(NL_Msm *)
Definition: msm_longrng.c:960
void get(CProxyElement_MsmC1HermiteBlock *q)
Definition: ComputeMsm.C:282
void done(double tm[], int n)
Definition: ComputeMsm.C:299
BigReal max_a(int pid) const
Definition: PatchMap.h:92
void interpolation()
Definition: ComputeMsm.C:6421
void addPotential(GridMsg *)
Definition: ComputeMsm.C:3353
void restriction()
Definition: ComputeMsm.C:3119
BigReal length2(void) const
Definition: Vector.h:173
Definition: MsmMap.h:187
#define simParams
Definition: Output.C:127
void wrapBlockSend(BlockSend &bs) const
Definition: MsmMap.h:1106
void print()
Definition: ComputeMsm.C:329
const msm::Grid< Mtype > * proStencil
Definition: ComputeMsm.C:2746
int numPatches(void) const
Definition: PatchMap.h:59
void sendPatch()
Definition: ComputeMsm.C:3416
int node(int pid) const
Definition: PatchMap.h:114
int nk() const
Definition: MsmMap.h:442
msm::Grid< C1Vector > subgrid_c1hermite
Definition: ComputeMsm.C:481
void d_stencil_1d_c1hermite(Float dphi[], Float phi[], Float dpsi[], Float psi[], Float t, Float h, Float h_1)
Definition: ComputeMsm.C:1252
IndexRange nrange_wrap
Definition: MsmMap.h:870
int nextent_k
Definition: ComputeMsm.C:120
ScaledPosition smin
Definition: ComputeMsm.h:21
Array< FoldFactor > foldfactor
Definition: MsmMap.h:962
void subtractVirialContrib()
Definition: ComputeMsm.C:536
BigReal y
Definition: Vector.h:66
Vector b() const
Definition: Lattice.h:253
void d_stencil_1d(Float dphi[], Float phi[], Float t, Float h_1)
Definition: ComputeMsm.C:937
msm::Array< CProxy_MsmBlock > msmBlock
Definition: ComputeMsm.C:464
static const int PolyDegree[NUM_APPROX]
Definition: ComputeMsm.C:649
Definition: MsmMap.h:187
void stencil_1d_c1hermite(Float phi[], Float psi[], Float t, Float h)
Definition: ComputeMsm.C:1244
void setupSections()
Definition: ComputeMsm.C:3140
void setup(MsmGridCutoffInitMsg *bmsg)
Definition: ComputeMsm.C:1838
void resize(int n)
Definition: MsmMap.h:616
ComputeMsmMgr * mgrLocal
Definition: ComputeMsm.C:1792
AtomCoordArray coord
Definition: ComputeMsm.C:1742
static void splitting(BigReal &g, BigReal &dg, BigReal r_a, int _split)
Definition: ComputeMsm.C:667
msm::BlockDiagram * bd
Definition: ComputeMsm.C:2738
msm::Array< CProxy_MsmC1HermiteBlock > msmC1HermiteBlock
Definition: ComputeMsm.C:465
void recvMsmC1HermiteGridCutoffProxy(MsmC1HermiteGridCutoffProxyMsg *)
Definition: ComputeMsm.C:5959
msm::Grid< Vtype > qh
Definition: ComputeMsm.C:2739
msm::PatchPtrArray & patchPtrArray()
Definition: ComputeMsm.C:445
static void gc_c1hermite_elem_accum(C1Matrix &matrix, BigReal _c, Vector rv, BigReal _a, int _split)
Definition: ComputeMsm.C:1410
IndexRange nrange
Definition: MsmMap.h:898
Grid< C1Vector > eh_c1hermite
Definition: ComputeMsm.C:1748
void interpolationC1Hermite()
Definition: ComputeMsm.C:6697
msm::Array< int > gcutAssign
Definition: ComputeMsm.C:485
void update(CkQdMsg *)
Definition: ComputeMsm.C:5967
CkSectionInfo cookie
Definition: ComputeMsm.C:2333
Array< Grid< Float > > gc
Definition: MsmMap.h:944
PatchDiagram * pd
Definition: ComputeMsm.C:1741
msm::Grid< Vtype > ehProlongated
Definition: ComputeMsm.C:2748
ScaledPosition scale(Position p) const
Definition: Lattice.h:83
BigReal patchDimension
MsmBlockKernel(const msm::BlockIndex &)
Definition: ComputeMsm.C:2793
gridSize y
int ib() const
Definition: MsmMap.h:435
void sendDownPotential()
Definition: ComputeMsm.C:3774
void setMgr(ComputeMsmMgr *mgr)
Definition: ComputeMsm.h:32
Ivec lower() const
Definition: MsmMap.h:444
MsmBlock(CkMigrateMessage *m)
Definition: ComputeMsm.C:3091
Array< Grid< Float > > gvc
Definition: MsmMap.h:945
Vector sz_shz
Definition: ComputeMsm.C:617
void sendChargeC1Hermite()
Definition: ComputeMsm.C:6650
Definition: MsmMap.h:187
void put(const CProxyElement_MsmC1HermiteBlock *q)
Definition: ComputeMsm.C:274
BigReal dielectric
CProxySection_MsmGridCutoff msmGridCutoffReduction
Definition: ComputeMsm.C:3077
void submit(void)
Definition: ReductionMgr.h:323
int size(void) const
Definition: ResizeArray.h:127
msm::Map map
Definition: ComputeMsm.C:471
void set(Float r)
Definition: MsmMap.h:113
BigReal min_c(int pid) const
Definition: PatchMap.h:95
void put(const CProxy_MsmC1HermiteGridCutoff *p)
Definition: ComputeMsm.C:223
int ja() const
Definition: MsmMap.h:436
Grid< Float > grespro
Definition: MsmMap.h:946
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
Grid< Float > qh
Definition: ComputeMsm.C:1744
void get(msm::Grid< T > &g, int &id, int &seq)
Definition: ComputeMsm.C:141
int ni() const
Definition: MsmMap.h:440
int b_p() const
Definition: Lattice.h:274
MsmBlock(int level)
Definition: ComputeMsm.C:3079
char msmBlockElementProxyData[sizeof(CProxyElement_MsmBlock)]
Definition: ComputeMsm.C:249
void addVirialContrib()
Definition: ComputeMsm.C:533
BigReal MSMPadding
MsmGridCutoffInitMsg(const msm::BlockIndex &i, const msm::BlockSend &b)
Definition: ComputeMsm.C:240
void anterpolationC1Hermite()
Definition: ComputeMsm.C:6547
gridSize x
IndexRange nrange
Definition: MsmMap.h:885
Array< Grid< C1Matrix > > gc_c1hermite
Definition: MsmMap.h:949
BigReal shx_1
Definition: ComputeMsm.C:614
char msmBlockElementProxyData[sizeof(CProxyElement_MsmC1HermiteBlock)]
Definition: ComputeMsm.C:271
msm::Grid< Vtype > qhRestricted
Definition: ComputeMsm.C:2747
ComputeMsmMgr * mgr
Definition: ComputeMsm.C:1739
Grid< Float > subgrid
Definition: ComputeMsm.C:1746
int a_p() const
Definition: Lattice.h:273
BigReal padding
Definition: ComputeMsm.C:596
void reset()
Definition: MsmMap.h:901
BigReal gzero
Definition: ComputeMsm.C:608
const msm::Grid< Mtype > * pgvc
Definition: ComputeMsm.C:1802
BigReal hylen
Definition: ComputeMsm.C:599
Vector a() const
Definition: Lattice.h:252
int gridsize_b(void) const
Definition: PatchMap.h:65
void initialize(MsmInitMsg *)
Definition: ComputeMsm.C:3969
Float velem[C1_VECTOR_SIZE]
Definition: MsmMap.h:87
const T * buffer() const
Definition: MsmMap.h:259
double timing[MAX]
Definition: ComputeMsm.C:313
void sumReducedPotential(CkReductionMsg *msg)
Definition: ComputeMsm.C:3490
void setupStencils(const msm::Grid< Mtype > *res, const msm::Grid< Mtype > *pro)
Definition: ComputeMsm.C:2774
ResizeArrayIter< T > begin(void) const
int procNum(int, const CkArrayIndex &idx)
Definition: ComputeMsm.C:1708
Vector unit(void) const
Definition: Vector.h:182
Vector c() const
Definition: Lattice.h:254
static int prolongation(NL_Msm *, int level)
Definition: msm_longrng.c:1582
void saveResults()
Definition: ComputeMsm.C:6161
const msm::Grid< Mtype > * resStencil
Definition: ComputeMsm.C:2745
double BigReal
Definition: common.h:114
void init(int natoms)
Definition: ComputeMsm.C:6252
int c_p() const
Definition: Lattice.h:275
BigReal hxlen_1
Definition: ComputeMsm.C:600
void put(const CProxy_MsmGridCutoff *p)
Definition: ComputeMsm.C:205
void append(const T &t)
Definition: MsmMap.h:250
Lattice lattice
Definition: ComputeMsm.C:592