NAMD
ComputeMsmMsa.C
Go to the documentation of this file.
1 
7 #ifdef CHARM_HAS_MSA
8 
9 #include "InfoStream.h"
10 #include "Node.h"
11 #include "PDB.h"
12 #include "PatchMap.h"
13 #include "PatchMap.inl"
14 #include "AtomMap.h"
15 #include "ComputeMsmMsa.h"
16 #include "ComputeMsmMsaMgr.decl.h"
17 #include "PatchMgr.h"
18 #include "Molecule.h"
19 #include "ReductionMgr.h"
20 #include "ComputeMgr.h"
21 #include "ComputeMgr.decl.h"
22 // #define DEBUGM
23 #define MIN_DEBUG_LEVEL 3
24 #include "Debug.h"
25 #include "SimParameters.h"
26 #include "WorkDistrib.h"
27 #include "varsizemsg.h"
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <errno.h>
31 #include "pup_stl.h"
32 #include "MsmMacros.h"
33 
34 
35 void MsmMsaData::pup(PUP::er &p)
36 {
37  p|ispx, p|ispy, p|ispz;
38  p|hx_1, p|hy_1, p|hz_1;
39  p|a;
40  p|origin_x, p|origin_y, p|origin_z;
41  p|nlevels, p|maxlevels, p|toplevel;
42  p|approx, p|split;
43  p|qh, p|eh;
44  p|grid_len;
45  p|grid_idstart;
46  p|scaling;
47  p|gc_len;
48  p|gc_idstart;
49  p|gc;
50  p|gctop_len;
51  p|gctop_idstart;
52  p|gctop;
53  p|num_clients_qh, p|num_clients_eh;
54  p|num_anterpolation_chares;
55  p|num_interpolation_chares;
56  p|num_restriction_chares;
57  p|num_prolongation_chares;
58  p|num_gridcutoff_chares;
59  p|dim_gridcutoff_chares;
60  p|dim_gridtransfer_chares;
61  p|num_total_restriction_chares;
62  p|num_total_prolongation_chares;
63  p|num_total_gridcutoff_chares;
64  p|num_energy_chares;
65  p|num_points_per_chare;
66  p|self_energy_const;
67 }
68 
69 void MsmMsaData::print()
70 {
71 #if 0
72  printf("MSM data:\n");
73  printf("ispx=%d ispy=%d ispz=%d\n", ispx, ispy, ispz);
74  printf("hx=%g hy=%g hz=%g\n", 1/hx_1, 1/hy_1, 1/hz_1);
75  printf("a=%g\n", a);
76  printf("origin = %g %g %g\n", origin_x, origin_y, origin_z);
77  printf("nlevels=%d maxlevels=%d toplevel=%d\n", nlevels, maxlevels, toplevel);
78  printf("approx=%d split=%d\n", approx, split);
79 #endif
80 }
81 
82 struct ComputeMsmMsaAtom {
83  Position position;
84  float msmcharge; // scaled for MSM
85  int id;
86 };
87 
88 class MsmMsaCoordMsg : public CMessage_MsmMsaCoordMsg {
89 public:
90  int numAtoms;
91  Lattice lattice;
92  ComputeMsmMsaAtom *coord;
93 };
94 
95 class ComputeMsmMsaMgr : public CBase_ComputeMsmMsaMgr {
96 public:
97  ComputeMsmMsaMgr(); // entry
98  ~ComputeMsmMsaMgr();
99 
100  void initialize(CkQdMsg *); // entry with message
101 
102  void setCompute(ComputeMsmMsa *c) { msmCompute = c; } // local
103 
104  void recvMsmMsaData(const MsmMsaData &); // entry with parameter marshalling
105 
106  void initWorkers(CkQdMsg *); // entry with message
107  void startWorkers(CkQdMsg *); // entry with message
108 
109  void anterpolate(MsmMsaCoordMsg *); // entry with coordinates message
110  void interpolate(CkQdMsg *); // entry with message
111 
112  const MsmMsaData &getMsmMsaData() const { return msmData; } // local
113 
114 private:
115  CProxy_ComputeMsmMsaMgr msmProxy;
116  ComputeMsmMsa *msmCompute;
117 
118  MsmMsaCoordMsg *coordMsg;
119  int numAtoms;
120  MsmMsaForce *force;
121  int numForces;
122 
123  MsmMsaData msmData;
124 
125  CProxy_MsmMsaLevel msmLevelProxy; // 1D chare array (number of grid levels)
126  CProxy_MsmMsaEnergy msmEnergyProxy; // 1D chare array
127 
128  MsmMsaGrid::Accum qhacc; // accumulate charge grid for anterpolation
129  MsmMsaGrid::Read qhread; // read charge grid after anterpolation
130  MsmMsaGrid::Accum ehacc; // accumulate potential grid before interpolation
131  MsmMsaGrid::Read ehread; // read potential grid for interpolation
132  int msa_setup_anterpolation; // have MSAs been initially set up?
133  int msa_setup_interpolation; // have MSAs been initially set up?
134 };
135 
136 class MsmMsaLevel : public CBase_MsmMsaLevel {
137 public:
138  MsmMsaLevel(MsmMsaGrid &qh, MsmMsaGrid &eh, MsmMsaGrid &q2h, MsmMsaGrid &e2h);
139  MsmMsaLevel(MsmMsaGrid &qh, MsmMsaGrid &eh);
140  MsmMsaLevel(CkMigrateMessage *m) { }
141  void compute();
142 private:
143  int lastlevel;
144  CProxy_MsmMsaGridCutoff gridcutoff;
145  CProxy_MsmMsaRestriction restriction;
146  CProxy_MsmMsaProlongation prolongation;
147 };
148 
149 class MsmMsaGridCutoff : public CBase_MsmMsaGridCutoff {
150 public:
151  MsmMsaGridCutoff(int level, MsmMsaGrid &qh, MsmMsaGrid &eh);
152  MsmMsaGridCutoff(CkMigrateMessage *m) { }
153  void compute();
154 private:
155  MsmMsaGrid qh, eh; // MSA handles to this level charge and potential grids
156  MsmMsaGrid::Accum qhacc, ehacc;
157  MsmMsaGrid::Read qhread, ehread;
158  int msa_setup;
159  int mylevel; // which level am I on?
160  int mia, mja, mka; // my lowest grid point index of my sub-cube
161  int mib, mjb, mkb; // my highest grid point index of my sub-cube
162 };
163 
164 class MsmMsaRestriction : public CBase_MsmMsaRestriction {
165 public:
166  MsmMsaRestriction(int level, MsmMsaGrid &qh, MsmMsaGrid &q2h);
167  MsmMsaRestriction(CkMigrateMessage *m) { }
168  void compute();
169 private:
170  MsmMsaGrid qh, q2h; // MSA handles to charge grids mylevel, (mylevel+1)
171  MsmMsaGrid::Accum qhacc, q2hacc;
172  MsmMsaGrid::Read qhread, q2hread;
173  int msa_setup;
174  int mylevel; // which level am I on?
175  int mia, mja, mka; // my lowest grid point index of (mylevel+1) sub-cube
176  int mib, mjb, mkb; // my highest grid point index of (mylevel+1) sub-cube
177 };
178 
179 class MsmMsaProlongation : public CBase_MsmMsaProlongation {
180 public:
181  MsmMsaProlongation(int level, MsmMsaGrid &eh, MsmMsaGrid &e2h);
182  MsmMsaProlongation(CkMigrateMessage *m) { }
183  void compute();
184 private:
185  MsmMsaGrid eh, e2h; // MSA handles to potential grids mylevel, (mylevel+1)
186  MsmMsaGrid::Accum ehacc, e2hacc;
187  MsmMsaGrid::Read ehread, e2hread;
188  int msa_setup;
189  int mylevel; // which level am I on?
190  int mia, mja, mka; // my lowest grid point index of (mylevel+1) sub-cube
191  int mib, mjb, mkb; // my highest grid point index of (mylevel+1) sub-cube
192 };
193 
194 class MsmMsaEnergy : public CBase_MsmMsaEnergy {
195 public:
196  MsmMsaEnergy(MsmMsaGrid &qh, MsmMsaGrid &eh);
197  MsmMsaEnergy(CkMigrateMessage *m) { }
198  ~MsmMsaEnergy();
199  void compute();
200 private:
201  MsmMsaGrid qh, eh;
202  MsmMsaGrid::Accum qhacc, ehacc;
203  MsmMsaGrid::Read qhread, ehread;
204  float *qhbuffer;
205  int msa_setup;
206  int mli, mlj, mlk; // my 3D index within level 0
207  int mia, mja, mka; // my lowest grid point index of my sub-cube
208  int mib, mjb, mkb; // my highest grid point index of my sub-cube
209  SubmitReduction *reduction;
210 };
211 
212 
213 ComputeMsmMsa::ComputeMsmMsa(ComputeID c) :
215 {
216  CProxy_ComputeMsmMsaMgr::ckLocalBranch(
217  CkpvAccess(BOCclass_group).computeMsmMsaMgr)->setCompute(this);
219  qscaling = sqrt(COULOMB / simParams->dielectric);
221 }
222 
223 ComputeMsmMsa::~ComputeMsmMsa()
224 {
225 }
226 
227 void ComputeMsmMsa::doWork()
228 {
229  ResizeArrayIter<PatchElem> ap(patchList);
230 
231  // Skip computations if nothing to do.
232  if ( ! patchList[0].p->flags.doFullElectrostatics ) {
233  for (ap = ap.begin(); ap != ap.end(); ap++) {
234  CompAtom *x = (*ap).positionBox->open();
235  Results *r = (*ap).forceBox->open();
236  (*ap).positionBox->close(&x);
237  (*ap).forceBox->close(&r);
238  reduction->submit();
239  }
240  return;
241  }
242 
243  // allocate message
244  int numLocalAtoms = 0;
245  for (ap = ap.begin(); ap != ap.end(); ap++) {
246  numLocalAtoms += (*ap).p->getNumAtoms();
247  }
248 
249  MsmMsaCoordMsg *msg = new (numLocalAtoms, 0) MsmMsaCoordMsg;
250  msg->numAtoms = numLocalAtoms;
251  msg->lattice = patchList[0].p->flags.lattice;
252  ComputeMsmMsaAtom *data_ptr = msg->coord;
253 
254  // get positions
255  for (ap = ap.begin(); ap != ap.end(); ap++) {
256  CompAtom *x = (*ap).positionBox->open();
257  CompAtomExt *xExt = (*ap).p->getCompAtomExtInfo();
258  if ( patchList[0].p->flags.doMolly ) {
259  (*ap).positionBox->close(&x);
260  x = (*ap).avgPositionBox->open();
261  }
262  int numAtoms = (*ap).p->getNumAtoms();
263 
264  for (int i=0; i < numAtoms; i++) {
265  data_ptr->position = x[i].position;
266  data_ptr->msmcharge = qscaling * x[i].charge;
267  data_ptr->id = xExt[i].id;
268  ++data_ptr;
269  }
270 
271  if ( patchList[0].p->flags.doMolly ) { (*ap).avgPositionBox->close(&x); }
272  else { (*ap).positionBox->close(&x); }
273  }
274 
275  CProxy_ComputeMsmMsaMgr msmProxy(
276  CkpvAccess(BOCclass_group).computeMsmMsaMgr);
277  msmProxy[CkMyPe()].anterpolate(msg);
278  msmProxy[CkMyPe()].interpolate(new CkQdMsg);
279 }
280 
281 
282 void ComputeMsmMsa::saveResults(int n, const MsmMsaForce force[], double self_energy)
283 {
284  ResizeArrayIter<PatchElem> ap(patchList);
285 
286  // add in forces
287  int j = 0;
288  for (ap = ap.begin(); ap != ap.end(); ap++) {
289  Results *r = (*ap).forceBox->open();
290  Force *f = r->f[Results::slow];
291  int numAtoms = (*ap).p->getNumAtoms();
292  for (int i=0; i < numAtoms; i++, j++) {
293  f[i].x += force[j].x;
294  f[i].y += force[j].y;
295  f[i].z += force[j].z;
296  }
297  (*ap).forceBox->close(&r);
298  }
299  reduction->item(REDUCTION_ELECT_ENERGY_SLOW) -= self_energy;
300  reduction->submit();
301 }
302 
303 
304 ComputeMsmMsaMgr::ComputeMsmMsaMgr() :
305  msmProxy(thisgroup), msmCompute(0),
306  coordMsg(0), numAtoms(0), force(0), numForces(0),
307  msa_setup_anterpolation(0), msa_setup_interpolation(0)
308 {
309  CkpvAccess(BOCclass_group).computeMsmMsaMgr = thisgroup;
310 }
311 
312 ComputeMsmMsaMgr::~ComputeMsmMsaMgr()
313 {
314  delete [] force;
315 }
316 
317 
318 static void rescale_nonperiodic_cell(
319  Vector &u, Vector &v, Vector &w, Vector &c,
320  Vector &ru, Vector &rv, Vector &rw,
321  const Vector &smin, const Vector &smax,
322  double padding, double gridspacing, int isperiodic)
323 {
324  const double ZERO_TOLERANCE = 1e-4;
325  double xlen = 1, inv_xlen = 1; // don't rescale periodic directions
326  double ylen = 1, inv_ylen = 1;
327  double zlen = 1, inv_zlen = 1;
328  double ulen_1 = u.rlength();
329  double vlen_1 = v.rlength();
330  double wlen_1 = w.rlength();
331  double rpadx = padding * ulen_1; // padding distance in recip space
332  double rpady = padding * vlen_1;
333  double rpadz = padding * wlen_1;
334  double rhx = gridspacing * ulen_1; // grid spacing in recip space
335  double rhy = gridspacing * vlen_1;
336  double rhz = gridspacing * wlen_1;
337  double xmin = smin.x, xmax = smax.x; // scaled coordinate extremes
338  double ymin = smin.y, ymax = smax.y;
339  double zmin = smin.z, zmax = smax.z;
340  Vector rc = 0; // don't move center along periodic directions
341  int is_periodic_x = (isperiodic & PERIODIC_VEC1);
342  int is_periodic_y = (isperiodic & PERIODIC_VEC2);
343  int is_periodic_z = (isperiodic & PERIODIC_VEC3);
344 
345 #if 0
346  /* affine linear transformation of coordinates to reciprocal space,
347  * where periodic vector directions map into [-0.5, 0.5) */
348  //printf("*** center=%.4f %.4f %.4f\n", c.x, c.y, c.z);
349  d = coord[0].position - c;
350  s.x = ru * d; // dot product
351  s.y = rv * d;
352  s.z = rw * d;
353  xmin = xmax = s.x;
354  ymin = ymax = s.y;
355  zmin = zmax = s.z;
356  for (i = 1; i < numatoms; i++) {
357  d = coord[i].position - c;
358  s.x = ru * d; // dot product
359  s.y = rv * d;
360  s.z = rw * d;
361  if (s.x < xmin) xmin = s.x;
362  else if (s.x > xmax) xmax = s.x;
363  if (s.y < ymin) ymin = s.y;
364  else if (s.y > ymax) ymax = s.y;
365  if (s.z < zmin) zmin = s.z;
366  else if (s.z > zmax) zmax = s.z;
367  }
368 #if 0
369  printf("*** xmin=%.4f xmax=%.4f\n", xmin, xmax);
370  printf("*** ymin=%.4f ymax=%.4f\n", ymin, ymax);
371  printf("*** zmin=%.4f zmax=%.4f\n", zmin, zmax);
372 #endif
373 #endif
374 
375  if ( ! is_periodic_x) {
376  xmax += rpadx; // pad the edges
377  xmin -= rpadx;
378  if (rhx > 0) { // restrict center to rhx lattice points
379  double mupper = ceil(xmax / (2*rhx));
380  double mlower = floor(xmin / (2*rhx));
381  xmax = 2*rhx*mupper;
382  xmin = 2*rhx*mlower;
383  }
384  rc.x = 0.5*(xmin + xmax);
385  xlen = xmax - xmin;
386  if (xlen < ZERO_TOLERANCE) {
387  xlen = 1; // leave scaling unchanged
388  }
389  else {
390  inv_xlen = 1/xlen;
391  }
392  }
393 
394  if ( ! is_periodic_y) {
395  ymax += rpady; // pad the edges
396  ymin -= rpady;
397  if (rhy > 0) { // restrict center to rhy lattice points
398  double mupper = ceil(ymax / (2*rhy));
399  double mlower = floor(ymin / (2*rhy));
400  ymax = 2*rhy*mupper;
401  ymin = 2*rhy*mlower;
402  }
403  rc.y = 0.5*(ymin + ymax);
404  ylen = ymax - ymin;
405  if (ylen < ZERO_TOLERANCE) {
406  ylen = 1; // leave scaling unchanged
407  }
408  else {
409  inv_ylen = 1/ylen;
410  }
411  }
412 
413  if ( ! is_periodic_z) {
414  zmax += rpadz; // pad the edges
415  zmin -= rpadz;
416  if (rhz > 0) { // restrict center to rhz lattice points
417  double mupper = ceil(zmax / (2*rhz));
418  double mlower = floor(zmin / (2*rhz));
419  zmax = 2*rhz*mupper;
420  zmin = 2*rhz*mlower;
421  }
422  rc.z = 0.5*(zmin + zmax);
423  zlen = zmax - zmin;
424  if (zlen < ZERO_TOLERANCE) {
425  zlen = 1; // leave scaling unchanged
426  }
427  else {
428  inv_zlen = 1/zlen;
429  }
430  }
431 
432 #if 0
433  printf("xmin=%g xmax=%g\n", xmin, xmax);
434  printf("ymin=%g ymax=%g\n", ymin, ymax);
435  printf("zmin=%g zmax=%g\n", zmin, zmax);
436  printf("xlen=%g ylen=%g zlen=%g\n", xlen, ylen, zlen);
437  printf("rc_x=%g rc_y=%g rc_z=%g\n", rc.x, rc.y, rc.z);
438 #endif
439 
440  // transform new center back to real space, then rescale basis vectors
441  c.x = (u.x*rc.x + v.x*rc.y + w.x*rc.z) + c.x;
442  c.y = (u.y*rc.x + v.y*rc.y + w.y*rc.z) + c.y;
443  c.z = (u.z*rc.x + v.z*rc.y + w.z*rc.z) + c.z;
444 
445 #if 0
446  printf("c_x=%g c_y=%g c_z=%g\n", c.x, c.y, c.z);
447 #endif
448 
449  u *= xlen;
450  v *= ylen;
451  w *= zlen;
452 
453  ru *= inv_xlen;
454  rv *= inv_ylen;
455  rw *= inv_zlen;
456 }
457 
458 
459 struct MsmMsaInterpParams {
460  int nu;
461  int stencil;
462  int omega;
463 };
464 
465 // ordering must be identical to APPROX enum constants
466 static const MsmMsaInterpParams InterpParams[] = {
467  { 1, 4, 6 }, // cubic
468  { 2, 6, 10 }, // quintic
469  { 2, 6, 10 }, // quintic, C2
470  { 3, 8, 14 }, // septic
471  { 3, 8, 14 }, // septic, C3
472  { 4, 10, 18 }, // nonic
473  { 4, 10, 18 }, // nonic, C4
474  { 1, 4, 6 }, // B-spline
475 };
476 
477 
478 static int setup_hgrid_1d(
479  double len, // cell length
480  double gridspacing, // desired grid spacing
481  double &hh, // determine h to fit cell length
482  int &nn, // determine number of h's covering cell
483  int &aindex, // determine smallest grid index
484  int &bindex, // determine largest grid index
485  int isperiodic, // is this dimension periodic?
486  int approx // which approximation
487  ) {
488 
489  const int nu = InterpParams[approx].nu; // interp stencil radius
490 
491  // ASSERT(hmax > 0);
492  if (isperiodic) {
493  const double hmin = (4./5) * gridspacing; // minimum bound on h
494  const double hmax = 1.5 * hmin;
495  double h = len;
496  int n = 1; // start with one grid point across domain
497  while (h >= hmax) {
498  h *= 0.5; // halve h
499  n <<= 1; // double grid points
500  }
501  if (h < hmin) {
502  if (n < 4) { // either len is too small or hmin is too large
503  return -1;
504  }
505  h *= (4./3); // scale h by 4/3
506  n >>= 2; // scale n by 3/4
507  n *= 3;
508  }
509  // now we have: hmin <= h < hmax
510  // now we have: n is power of two times no more than one power of 3
511  hh = h;
512  nn = n;
513  aindex = 0;
514  bindex = n-1;
515  }
516  else { // non-periodic
517  double h = gridspacing;
518  int n = (int) floorf(len / h) + 1;
519  hh = h;
520  nn = n;
521  aindex = -nu;
522  bindex = n + nu;
523  }
524  return 0;
525 }
526 
527 
528 void ComputeMsmMsaMgr::initialize(CkQdMsg *msg)
529 {
530  delete msg;
531  //printf("MSM initialize PE=%d\n", CkMyPe());
532  if (CkMyPe() != 0) return; // initialize only on PE 0, broadcast MsmMsaData
533 
534  // initialize MSM here
535  SimParameters *simParams = Node::Object()->simParameters;
536  const double a = simParams->cutoff;
537  const double gridspacing = simParams->MSMGridSpacing;
538  const double padding = simParams->MSMPadding;
539  const int approx = simParams->MSMApprox;
540  const int split = simParams->MSMSplit;
541  int nlevels = simParams->MSMLevels;
542  const Lattice &lattice = simParams->lattice;
543  int msmflags = 0;
544  msmflags |= (lattice.a_p() ? PERIODIC_VEC1 : 0);
545  msmflags |= (lattice.b_p() ? PERIODIC_VEC2 : 0);
546  msmflags |= (lattice.c_p() ? PERIODIC_VEC3 : 0);
547  int allperiodic = (PERIODIC_VEC1 | PERIODIC_VEC2 | PERIODIC_VEC3);
548  Vector u(lattice.a()), v(lattice.b()), w(lattice.c());
549  Vector c(lattice.origin());
550  Vector ru(lattice.a_r()), rv(lattice.b_r()), rw(lattice.c_r());
551  if ( (msmflags & allperiodic) != allperiodic ) {
552  ScaledPosition smin, smax;
553  Node::Object()->pdb->get_extremes(smin, smax);
554  //printf("smin = %g %g %g smax = %g %g %g\n",
555  // smin.x, smin.y, smin.z, smax.x, smax.y, smax.z);
556  rescale_nonperiodic_cell(u, v, w, c, ru, rv, rw, smin, smax,
557  padding, gridspacing, msmflags);
558  }
559 
560  if (u.x <= 0 || u.y != 0 || u.z != 0 ||
561  v.x != 0 || v.y <= 0 || v.z != 0 ||
562  w.x != 0 || w.y != 0 || w.z <= 0) {
563  NAMD_die("MSM requires cell basis be along x, y, and z coordinate axes.");
564  }
565 
566  // setup grids
567  MsmMsaData &p = msmData; // the MSM data will be broadcast to all PEs
568 
569  const int nu = InterpParams[approx].nu;
570  const int omega = InterpParams[approx].omega;
571  const int ispx = ((msmflags & PERIODIC_VEC1) != 0);
572  const int ispy = ((msmflags & PERIODIC_VEC2) != 0);
573  const int ispz = ((msmflags & PERIODIC_VEC3) != 0);
574  const int ispany = ((msmflags & PERIODIC_ALL) != 0);
575 
576  const double xlen = u.x; // XXX want to extend to non-orthogonal cells
577  const double ylen = v.y;
578  const double zlen = w.z;
579 
580  double scaling;
581  double d; // temporary for SPOLY derivative
582 
583  int ia, ib, ja, jb, ka, kb, ni, nj, nk;
584  int nx, ny, nz; // counts the grid points that span just the domain
585 
586  double hx, hy, hz;
587  double gx, gy, gz;
588 
589  int i, j, k, n;
590  int index;
591  int level, toplevel, maxlevels;
592  int lastnelems = 1;
593  int isclamped = 0;
594  int done, alldone;
595 
596  int rc = 0; // return code
597 
598  //CkPrintf("ispx=%d ispy=%d ispz=%d\n", ispx, ispy, ispz);
599  p.ispx = ispx;
600  p.ispy = ispy;
601  p.ispz = ispz;
602 
603  rc = setup_hgrid_1d(xlen, gridspacing, hx, nx, ia, ib, ispx, approx);
604  if (rc) NAMD_die("MSM failed to setup grid along x dimension.");
605 
606  rc = setup_hgrid_1d(ylen, gridspacing, hy, ny, ja, jb, ispy, approx);
607  if (rc) NAMD_die("MSM failed to setup grid along y dimension.");
608 
609  rc = setup_hgrid_1d(zlen, gridspacing, hz, nz, ka, kb, ispz, approx);
610  if (rc) NAMD_die("MSM failed to setup grid along z dimension.");
611 
612  p.a = (float) a; // cutoff distance
613 
614  p.hx_1 = (float) (1/hx); // inverse of grid spacing
615  p.hy_1 = (float) (1/hy);
616  p.hz_1 = (float) (1/hz);
617 
618  // XXX set coordinate for h-grid (0,0,0) point
619  gx = c.x - ((nx >> 1) * hx);
620  gy = c.y - ((ny >> 1) * hy);
621  gz = c.z - ((nz >> 1) * hz);
622 
623  p.origin_x = (float) gx; // grid(0,0,0) location
624  p.origin_y = (float) gy;
625  p.origin_z = (float) gz;
626 
627  ni = ib - ia + 1;
628  nj = jb - ja + 1;
629  nk = kb - ka + 1;
630 
631  p.approx = approx;
632  p.split = split;
633 
634  if (nlevels <= 0) {
635  // automatically set number of levels
636  n = ni;
637  if (n < nj) n = nj;
638  if (n < nk) n = nk;
639  for (maxlevels = 1; n > 0; n >>= 1) maxlevels++;
640  nlevels = maxlevels;
641  if (ispany == 0) { // no periodicity
642  int omega3 = omega * omega * omega;
643  int nhalf = (int) sqrtf(ni*nj*nk); // scale down for performance?
644  lastnelems = (nhalf > omega3 ? nhalf : omega3);
645  isclamped = 1;
646  }
647  }
648  else {
649  // user-defined number of levels
650  maxlevels = nlevels;
651  }
652 
653  p.maxlevels = maxlevels;
654  p.grid_len.resize(maxlevels);
655  p.grid_idstart.resize(maxlevels);
656  p.scaling.resize(maxlevels);
657 
658 #if 0
659  /* allocate any additional levels that may be needed */
660  if (pm->maxlevels < maxlevels) {
661  void *vqh, *veh, *vgc;
662  if (issprec) {
663  vqh = realloc(pm->qh, maxlevels * sizeof(NL_MsmMsagrid_float));
664  if (NULL == vqh) return NL_MSM_ERROR_MALLOC;
665  veh = realloc(pm->eh, maxlevels * sizeof(NL_MsmMsagrid_float));
666  if (NULL == veh) return NL_MSM_ERROR_MALLOC;
667  vgc = realloc(pm->gc, maxlevels * sizeof(NL_MsmMsagrid_float));
668  if (NULL == vgc) return NL_MSM_ERROR_MALLOC;
669  pm->qh_f = (NL_MsmMsagrid_float *) vqh;
670  pm->eh_f = (NL_MsmMsagrid_float *) veh;
671  pm->gc_f = (NL_MsmMsagrid_float *) vgc;
672  /* initialize the newest grids appended to array */
673  for (level = pm->maxlevels; level < maxlevels; level++) {
674  GRID_INIT( &(pm->qh_f[level]) );
675  GRID_INIT( &(pm->eh_f[level]) );
676  GRID_INIT( &(pm->gc_f[level]) );
677  }
678  }
679  else {
680  vqh = realloc(pm->qh, maxlevels * sizeof(NL_MsmMsagrid_double));
681  if (NULL == vqh) return NL_MSM_ERROR_MALLOC;
682  veh = realloc(pm->eh, maxlevels * sizeof(NL_MsmMsagrid_double));
683  if (NULL == veh) return NL_MSM_ERROR_MALLOC;
684  vgc = realloc(pm->gc, maxlevels * sizeof(NL_MsmMsagrid_double));
685  if (NULL == vgc) return NL_MSM_ERROR_MALLOC;
686  pm->qh = (NL_MsmMsagrid_double *) vqh;
687  pm->eh = (NL_MsmMsagrid_double *) veh;
688  pm->gc = (NL_MsmMsagrid_double *) vgc;
689  /* initialize the newest grids appended to array */
690  for (level = pm->maxlevels; level < maxlevels; level++) {
691  GRID_INIT( &(pm->qh[level]) );
692  GRID_INIT( &(pm->eh[level]) );
693  GRID_INIT( &(pm->gc[level]) );
694  }
695  }
696  pm->maxlevels = maxlevels;
697  }
698 #endif
699 
700  level = 0;
701  done = 0;
702  alldone = 0;
703  scaling = 1.0;
704  do {
705 #if 0
706  if (issprec) {
707  GRID_RESIZE( &(pm->qh_f[level]), float, ia, ni, ja, nj, ka, nk);
708  GRID_RESIZE( &(pm->eh_f[level]), float, ia, ni, ja, nj, ka, nk);
709  }
710  else {
711  GRID_RESIZE( &(pm->qh[level]), double, ia, ni, ja, nj, ka, nk);
712  GRID_RESIZE( &(pm->eh[level]), double, ia, ni, ja, nj, ka, nk);
713  }
714 #endif
715  p.grid_len[level] = Int3(ni,nj,nk);
716  p.grid_idstart[level] = Int3(ia,ja,ka);
717  p.scaling[level] = (float) scaling;
718  scaling *= 0.5;
719 
720  if (++level == nlevels) done |= 0x07; // user limit on levels
721 
722  alldone = (done == 0x07); // make sure all dimensions are done
723 
724  if (isclamped) {
725  int nelems = ni * nj * nk;
726  if (nelems <= lastnelems) done |= 0x07;
727  }
728 
729  if (ispx) {
730  ni >>= 1;
731  ib = ni-1;
732  if (ni & 1) done |= 0x07; // == 3 or 1
733  else if (ni == 2) done |= 0x01; // can do one more
734  }
735  else {
736  ia = -((-ia+1)/2) - nu;
737  ib = (ib+1)/2 + nu;
738  ni = ib - ia + 1;
739  if (ni <= omega) done |= 0x01; // can do more restrictions
740  }
741 
742  if (ispy) {
743  nj >>= 1;
744  jb = nj-1;
745  if (nj & 1) done |= 0x07; // == 3 or 1
746  else if (nj == 2) done |= 0x02; // can do one more
747  }
748  else {
749  ja = -((-ja+1)/2) - nu;
750  jb = (jb+1)/2 + nu;
751  nj = jb - ja + 1;
752  if (nj <= omega) done |= 0x02; // can do more restrictions
753  }
754 
755  if (ispz) {
756  nk >>= 1;
757  kb = nk-1;
758  if (nk & 1) done |= 0x07; // == 3 or 1
759  else if (nk == 2) done |= 0x04; // can do one more
760  }
761  else {
762  ka = -((-ka+1)/2) - nu;
763  kb = (kb+1)/2 + nu;
764  nk = kb - ka + 1;
765  if (nk <= omega) done |= 0x04; // can do more restrictions
766  }
767 
768  } while ( ! alldone );
769  nlevels = level;
770  p.nlevels = nlevels;
771 
772  toplevel = (ispany ? nlevels : nlevels - 1);
773  p.toplevel = -1; // set only for non-periodic system
774 
775  // ellipsoid axes for grid cutoff weights
776  ni = (int) ceil(2*a/hx) - 1;
777  nj = (int) ceil(2*a/hy) - 1;
778  nk = (int) ceil(2*a/hz) - 1;
779 
780  p.gc_len = Int3(2*ni+1, 2*nj+1, 2*nk+1);
781  p.gc_idstart = Int3(-ni, -nj, -nk);
782  p.gc.resize( (2*ni+1)*(2*nj+1)*(2*nk+1) );
783 
784  index = 0;
785  for (k = -nk; k <= nk; k++) {
786  for (j = -nj; j <= nj; j++) {
787  for (i = -ni; i <= ni; i++) {
788  double s, t, gs, gt, g;
789  s = sqrt((i*hx)*(i*hx) + (j*hy)*(j*hy) + (k*hz)*(k*hz)) / a;
790  t = 0.5 * s;
791  if (t >= 1) {
792  g = 0;
793  }
794  else if (s >= 1) {
795  gs = 1/s;
796  SPOLY(&gt, &d, t, split);
797  g = (gs - 0.5 * gt) / a;
798  }
799  else {
800  SPOLY(&gs, &d, s, split);
801  SPOLY(&gt, &d, t, split);
802  g = (gs - 0.5 * gt) / a;
803  }
804  p.gc[index] = (float) g;
805  index++;
806  }
807  }
808  }
809 
810  if (toplevel < nlevels) {
811  // nonperiodic in all dimensions,
812  // calculate top level weights, ellipsoid axes are length of grid
813 #ifdef MSM_DEBUG
814  CkPrintf("MSM setting top level weights\n");
815 #endif
816  ni = p.grid_len[toplevel].nx;
817  nj = p.grid_len[toplevel].ny;
818  nk = p.grid_len[toplevel].nz;
819 
820  p.toplevel = toplevel;
821  p.gctop_len = Int3(2*ni+1, 2*nj+1, 2*nk+1);
822  p.gctop_idstart = Int3(-ni, -nj, -nk);
823  p.gctop.resize( (2*ni+1)*(2*nj+1)*(2*nk+1) );
824 
825  index = 0;
826  for (k = -nk; k <= nk; k++) {
827  for (j = -nj; j <= nj; j++) {
828  for (i = -ni; i <= ni; i++) {
829  double s, gs;
830  s = sqrt((i*hx)*(i*hx) + (j*hy)*(j*hy) + (k*hz)*(k*hz)) / a;
831  if (s >= 1) {
832  gs = 1/s;
833  }
834  else {
835  SPOLY(&gs, &d, s, split);
836  }
837  p.gctop[index] = (float) (gs / a);
838  index++;
839  }
840  }
841  } // end loops over k-j-i for coarsest level weights
842  }
843 
844  // calculate self energy factor for splitting
845  double s, gs;
846  s = 0;
847  SPOLY(&gs, &d, s, split);
848  p.self_energy_const = gs / a;
849 #if 0
850  s = 0;
851  for (n = 0; n < natoms; n++) {
852  double q = atom[4*n + 3];
853  s += q * q;
854  }
855  self_energy *= 0.5*s;
856 #endif
857 
858  // count number of client chares to the MSAs
859  p.num_points_per_chare.nx = simParams->MSMBlockSizeX;
860  p.num_points_per_chare.ny = simParams->MSMBlockSizeY;
861  p.num_points_per_chare.nz = simParams->MSMBlockSizeZ;
862  if (nlevels > 1) {
863  p.num_restriction_chares.resize(nlevels-1);
864  p.num_prolongation_chares.resize(nlevels-1);
865  p.dim_gridtransfer_chares.resize(nlevels-1);
866  }
867  p.num_gridcutoff_chares.resize(nlevels);
868  p.dim_gridcutoff_chares.resize(nlevels);
869  p.num_clients_qh.resize(nlevels);
870  p.num_clients_eh.resize(nlevels);
871 
872  p.num_anterpolation_chares = (PatchMap::Object())->numNodesWithPatches();
873  p.num_interpolation_chares = p.num_anterpolation_chares;
874  p.num_total_gridcutoff_chares = 0;
875  p.num_total_restriction_chares = 0;
876  p.num_total_prolongation_chares = 0;
877  for (n = 0; n < nlevels; n++) {
878  int ni = p.grid_len[n].nx;
879  int nj = p.grid_len[n].ny;
880  int nk = p.grid_len[n].nz;
881  int nci = ROUNDUP_QUOTIENT(ni, p.num_points_per_chare.nx);
882  int ncj = ROUNDUP_QUOTIENT(nj, p.num_points_per_chare.ny);
883  int nck = ROUNDUP_QUOTIENT(nk, p.num_points_per_chare.nz);
884  int nc = nci * ncj * nck;
885 #ifdef MSM_DEBUG
886  CkPrintf("n=%d nc=%d nci=%d ncj=%d nck=%d\n", n, nc, nci, ncj, nck);
887 #endif
888  p.num_gridcutoff_chares[n] = nc;
889  p.dim_gridcutoff_chares[n].nx = nci;
890  p.dim_gridcutoff_chares[n].ny = ncj;
891  p.dim_gridcutoff_chares[n].nz = nck;
892  p.num_total_gridcutoff_chares += nc;
893  if (n > 0) {
894  p.num_restriction_chares[n-1] = nc;
895  p.num_prolongation_chares[n-1] = nc;
896  p.dim_gridtransfer_chares[n-1].nx = nci;
897  p.dim_gridtransfer_chares[n-1].ny = ncj;
898  p.dim_gridtransfer_chares[n-1].nz = nck;
899  p.num_total_restriction_chares += nc;
900  p.num_total_prolongation_chares += nc;
901  }
902  else {
903  p.num_energy_chares = nc;
904  }
905  }
906 
907  p.num_clients_qh[0] = p.num_anterpolation_chares +
908  p.num_gridcutoff_chares[0] + p.num_energy_chares;
909  p.num_clients_eh[0] = p.num_interpolation_chares +
910  p.num_gridcutoff_chares[0] + p.num_energy_chares;
911  if (nlevels > 1) {
912  p.num_clients_qh[0] += p.num_restriction_chares[0];
913  p.num_clients_eh[0] += p.num_prolongation_chares[0];
914  }
915  for (n = 1; n < nlevels; n++) {
916  p.num_clients_qh[n] = p.num_gridcutoff_chares[n] +
917  p.num_restriction_chares[n-1];
918  p.num_clients_eh[n] = p.num_gridcutoff_chares[n] +
919  p.num_prolongation_chares[n-1];
920  if (n < nlevels-1) {
921  p.num_clients_qh[n] += p.num_restriction_chares[n];
922  p.num_clients_eh[n] += p.num_prolongation_chares[n];
923  }
924  }
925 
926 #ifdef MSM_DEBUG
927  CkPrintf("nlevels = %d\n", nlevels);
928  for (n = 0; n < nlevels; n++) {
929  CkPrintf("num clients qh[%d] = %d\n", n, p.num_clients_qh[n]);
930  CkPrintf("num clients eh[%d] = %d\n", n, p.num_clients_eh[n]);
931  }
932  CkPrintf("num anterpolation chares = %d\n", p.num_anterpolation_chares);
933  CkPrintf("num interpolation chares = %d\n", p.num_interpolation_chares);
934  for (n = 0; n < nlevels; n++) {
935  CkPrintf("num grid cutoff chares[%d] = %d\n",
936  n, p.num_gridcutoff_chares[n]);
937  }
938  for (n = 0; n < nlevels-1; n++) {
939  CkPrintf("num restriction chares[%d] = %d\n",
940  n, p.num_restriction_chares[n]);
941  }
942  for (n = 0; n < nlevels-1; n++) {
943  CkPrintf("num prolongation chares[%d] = %d\n",
944  n, p.num_prolongation_chares[n]);
945  }
946 #endif
947 
948 #if 0
949  // allocate MSAs
950  if (qh || eh) {
951  CkAbort("attempted to reallocate MSAs");
952  }
953  char *qh_memory = new char[nlevels * sizeof(MsmMsaGrid)];
954  qh = static_cast<MsmMsaGrid *>((void *)qh_memory);
955  char *eh_memory = new char[nlevels * sizeof(MsmMsaGrid)];
956  eh = static_cast<MsmMsaGrid *>((void *)eh_memory);
957 #else
958  p.qh.resize(nlevels);
959  p.eh.resize(nlevels);
960 #endif
961  for (n = 0; n < nlevels; n++) {
962  ia = p.grid_idstart[n].nx;
963  ib = p.grid_len[n].nx + ia - 1;
964  ja = p.grid_idstart[n].ny;
965  jb = p.grid_len[n].ny + ja - 1;
966  ka = p.grid_idstart[n].nz;
967  kb = p.grid_len[n].nz + ka - 1;
968 #if 0
969  // using placement new to call non-default constructor
970  // on each MsmMsaGrid element
971  new(qh_memory + n*sizeof(MsmMsaGrid))MsmMsaGrid(ia, ib, ja, jb, ka, kb,
972  p.num_clients_qh[n]);
973  new(eh_memory + n*sizeof(MsmMsaGrid))MsmMsaGrid(ia, ib, ja, jb, ka, kb,
974  p.num_clients_eh[n]);
975 #else
976  p.qh[n] = MsmMsaGrid(ia, ib, ja, jb, ka, kb, p.num_clients_qh[n]);
977  p.eh[n] = MsmMsaGrid(ia, ib, ja, jb, ka, kb, p.num_clients_eh[n]);
978 #endif
979  }
980  msmProxy.recvMsmMsaData(msmData); // broadcast MsmMsaData to chare group
981 }
982 
983 
984 void ComputeMsmMsaMgr::recvMsmMsaData(const MsmMsaData &m)
985 {
986  //printf("MSM recvMsmMsaData PE=%d\n", CkMyPe());
987  if (CkMyPe() != 0) {
988  msmData = m;
989  }
990  else {
991  msmData.print(); // PE 0
992  }
993 }
994 
995 
996 void ComputeMsmMsaMgr::initWorkers(CkQdMsg *msg)
997 {
998  delete msg;
999  //printf("MSM initWorkers PE=%d\n", CkMyPe());
1000  if (CkMyPe() != 0) return;
1001  // PE 0 creates the compute chare arrays
1002 
1003  MsmMsaData &p = msmData;
1004  int n;
1005  msmLevelProxy = CProxy_MsmMsaLevel::ckNew(); // create empty chare array
1006  for (n = 0; n < p.nlevels-1; n++) {
1007  msmLevelProxy[n].insert(p.qh[n], p.eh[n], p.qh[n+1], p.eh[n+1]);
1008  }
1009  msmLevelProxy[n].insert(p.qh[n], p.eh[n]); // top level
1010  msmLevelProxy.doneInserting();
1011 #ifdef MSM_DEBUG
1012  CkPrintf("Created %d MsmMsaLevel chares\n", p.nlevels);
1013 #endif
1014 
1015  msmEnergyProxy = CProxy_MsmMsaEnergy::ckNew(p.qh[0], p.eh[0], p.num_energy_chares);
1016 #ifdef MSM_DEBUG
1017  CkPrintf("Created %d MsmMsaEnergy chares\n", p.num_energy_chares);
1018 #endif
1019 }
1020 
1021 
1022 void ComputeMsmMsaMgr::startWorkers(CkQdMsg *msg)
1023 {
1024  delete msg;
1025  //printf("MSM startWorkers PE=%d\n", CkMyPe());
1026  if (CkMyPe() != 0) return;
1027  // PE 0 starts the workers;
1028  // they loop forever, waiting for compute work from the MSAs
1029 
1030  msmLevelProxy.compute();
1031  msmEnergyProxy.compute();
1032 }
1033 
1034 
1036 enum { MAX_POLY_DEGREE = 9 };
1037 
1040 static const int PolyDegree[APPROX_END] = {
1041  3, 5, 5, 7, 7, 9, 9, 3,
1042 };
1043 
1044 
1045 void ComputeMsmMsaMgr::anterpolate(MsmMsaCoordMsg *msg)
1046 {
1047 #ifdef MSM_DEBUG
1048  CkPrintf("Anterpolation compute %d starting, PE %d\n", thisIndex, CkMyPe());
1049 #endif
1050  coordMsg = msg; // save message for interpolation
1051  ComputeMsmMsaAtom *coord = coordMsg->coord;
1052  int numAtoms = coordMsg->numAtoms;
1053  MsmMsaData &p = msmData;
1054  if ( ! msa_setup_anterpolation) {
1055  p.qh[0].enroll(p.num_clients_qh[0]);
1056  qhacc = p.qh[0].getInitialAccum();
1057  qhread = qhacc.syncToRead(); // immediately sync to read phase
1058  msa_setup_anterpolation = 1;
1059  }
1060  int ni = p.grid_len[0].nx;
1061  int nj = p.grid_len[0].ny;
1062  int nk = p.grid_len[0].nz;
1063  int ia = p.grid_idstart[0].nx;
1064  int ja = p.grid_idstart[0].ny;
1065  int ka = p.grid_idstart[0].nz;
1066  int ib = ia + ni - 1;
1067  int jb = ja + nj - 1;
1068  int kb = ka + nk - 1;
1069  qhacc = qhread.syncToEAccum(); // we accumulate to it
1070 #ifdef MSM_DEBUG
1071  CkPrintf("Anterpolation compute %d doing work, PE %d\n", thisIndex, CkMyPe());
1072 #endif
1073  //
1074  // here is the compute work
1075  //
1076 
1077  float xphi[MAX_POLY_DEGREE+1]; // Phi stencil along x-dimension
1078  float yphi[MAX_POLY_DEGREE+1]; // Phi stencil along y-dimension
1079  float zphi[MAX_POLY_DEGREE+1]; // Phi stencil along z-dimension
1080 
1081  int n;
1082  const int s_size = PolyDegree[p.approx] + 1; // stencil size
1083  const int s_edge = (PolyDegree[p.approx] - 1) >> 1; // stencil "edge" size
1084 
1085  for (n = 0; n < numAtoms; n++) {
1086  // atomic charge
1087  float q = coord[n].msmcharge;
1088  if (0==q) continue;
1089 
1090  // distance between atom and origin measured in grid points
1091  // (does subtraction and division in double, then converts to float)
1092  float rx_hx = (coord[n].position.x - p.origin_x) * p.hx_1;
1093  float ry_hy = (coord[n].position.y - p.origin_y) * p.hy_1;
1094  float rz_hz = (coord[n].position.z - p.origin_z) * p.hz_1;
1095 
1096  // find smallest numbered grid point in stencil
1097  int ilo = (int) floorf(rx_hx) - s_edge;
1098  int jlo = (int) floorf(ry_hy) - s_edge;
1099  int klo = (int) floorf(rz_hz) - s_edge;
1100 
1101  // calculate Phi stencils along each dimension
1102  float delta;
1103  delta = rx_hx - (float) ilo;
1104  STENCIL_1D(xphi, delta, p.approx);
1105  delta = ry_hy - (float) jlo;
1106  STENCIL_1D(yphi, delta, p.approx);
1107  delta = rz_hz - (float) klo;
1108  STENCIL_1D(zphi, delta, p.approx);
1109 
1110  // test to see if stencil is within edges of grid
1111  int iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
1112  ja <= jlo && (jlo+(s_size-1)) <= jb &&
1113  ka <= klo && (klo+(s_size-1)) <= kb );
1114 
1115  if ( iswithin ) { // no wrapping needed
1116  // determine charge on cube of grid points around atom
1117  int i, j, k;
1118  for (k = 0; k < s_size; k++) {
1119  float ck = zphi[k] * q;
1120  for (j = 0; j < s_size; j++) {
1121  float cjk = yphi[j] * ck;
1122  for (i = 0; i < s_size; i++) {
1123  qhacc.accumulate(i+ilo,j+jlo,k+klo) += xphi[i] * cjk;
1124  }
1125  }
1126  }
1127  } // if is within
1128  else { // requires wrapping around grid
1129  int ip, jp, kp;
1130 
1131  // adjust ilo, jlo, klo so they are within grid indexing
1132  if (p.ispx) {
1133  if (ilo < ia) do { ilo += ni; } while (ilo < ia);
1134  else if (ilo > ib) do { ilo -= ni; } while (ilo > ib);
1135  }
1136  else if (ilo < ia || (ilo+(s_size-1)) > ib) {
1137  CkPrintf("Error anterpolation: ilo=%d out-of-range [%d,%d]\n",
1138  ilo, ia, (ib-s_size+1));
1139  continue /* XXX NL_MSM_ERROR_RANGE */;
1140  }
1141 
1142  if (p.ispy) {
1143  if (jlo < ja) do { jlo += nj; } while (jlo < ja);
1144  else if (jlo > jb) do { jlo -= nj; } while (jlo > jb);
1145  }
1146  else if (jlo < ja || (jlo+(s_size-1)) > jb) {
1147  CkPrintf("Error anterpolation: jlo=%d out-of-range [%d,%d]\n",
1148  jlo, ja, (jb-s_size+1));
1149  continue /* XXX NL_MSM_ERROR_RANGE */;
1150  }
1151 
1152  if (p.ispz) {
1153  if (klo < ka) do { klo += nk; } while (klo < ka);
1154  else if (klo > kb) do { klo -= nk; } while (klo > kb);
1155  }
1156  else if (klo < ka || (klo+(s_size-1)) > kb) {
1157  CkPrintf("Error anterpolation: klo=%d out-of-range [%d,%d]\n",
1158  klo, ka, (kb-s_size+1));
1159  continue /* XXX NL_MSM_ERROR_RANGE */;
1160  }
1161 
1162  // determine charge on cube of grid points around atom, with wrapping
1163  int i, j, k;
1164  for (k = 0, kp = klo; k < s_size; k++, kp++) {
1165  if (kp > kb) kp = ka; /* wrap stencil around grid */
1166  float ck = zphi[k] * q;
1167  for (j = 0, jp = jlo; j < s_size; j++, jp++) {
1168  if (jp > jb) jp = ja; /* wrap stencil around grid */
1169  float cjk = yphi[j] * ck;
1170  for (i = 0, ip = ilo; i < s_size; i++, ip++) {
1171  if (ip > ib) ip = ia; /* wrap stencil around grid */
1172  qhacc.accumulate(ip,jp,kp) += xphi[i] * cjk;
1173  }
1174  }
1175  }
1176  } // else
1177  } // end loop over atoms
1178 
1179  //
1180  // end of compute work
1181  //
1182  qhread = qhacc.syncToRead(); // let consumers read
1183 #ifdef MSM_DEBUG
1184  CkPrintf("Anterpolation compute %d exiting, PE %d\n", thisIndex, CkMyPe());
1185 #endif
1186 }
1187 
1188 
1189 void ComputeMsmMsaMgr::interpolate(CkQdMsg *msg)
1190 {
1191  delete msg;
1192 #ifdef MSM_DEBUG
1193  CkPrintf("Interpolation compute %d starting, PE %d\n", thisIndex, CkMyPe());
1194 #endif
1195  ComputeMsmMsaAtom *coord = coordMsg->coord;
1196  int numAtoms = coordMsg->numAtoms;
1197  if (numForces < numAtoms) {
1198  delete [] force;
1199  force = new MsmMsaForce[numAtoms];
1200  numForces = numAtoms;
1201  }
1202  MsmMsaData &p = msmData;
1203  if ( ! msa_setup_interpolation) {
1204  p.eh[0].enroll(p.num_clients_eh[0]);
1205  ehacc = p.eh[0].getInitialAccum();
1206  ehread = ehacc.syncToRead(); // immediately sync to read phase
1207  msa_setup_interpolation = 1;
1208  }
1209  int ni = p.grid_len[0].nx;
1210  int nj = p.grid_len[0].ny;
1211  int nk = p.grid_len[0].nz;
1212  int ia = p.grid_idstart[0].nx;
1213  int ja = p.grid_idstart[0].ny;
1214  int ka = p.grid_idstart[0].nz;
1215  int ib = ia + ni - 1;
1216  int jb = ja + nj - 1;
1217  int kb = ka + nk - 1;
1218  ehacc = ehread.syncToEAccum(); // let producers accumulate
1219  ehread = ehacc.syncToRead(); // then we read it
1220 #ifdef MSM_DEBUG
1221  CkPrintf("Interpolation compute %d doing work, PE %d\n", thisIndex, CkMyPe());
1222 #endif
1223 #ifdef MSM_DEBUG
1224  if (thisIndex==0) {
1225 #if 1
1226  CkPrintf("+++ eh[0,0,0] = %g\n", ehread.get(0,0,0));
1227 #else
1228  int i, j, k;
1229  for (k = ka; k <= kb; k++) {
1230  for (j = ja; j <= jb; j++) {
1231  for (i = ia; i <= ib; i++) {
1232  CkPrintf("+++ eh[%d,%d,%d] = %g\n", i, j, k, ehread.get(i,j,k));
1233  }
1234  }
1235  }
1236 #endif
1237  }
1238 #endif
1239  //
1240  // here is the compute work
1241  //
1242  double qself = 0; // accumulate charge for self-energy
1243 
1244  float xphi[MAX_POLY_DEGREE+1]; // Phi stencil along x-dimension
1245  float yphi[MAX_POLY_DEGREE+1]; // Phi stencil along y-dimension
1246  float zphi[MAX_POLY_DEGREE+1]; // Phi stencil along z-dimension
1247  float dxphi[MAX_POLY_DEGREE+1]; // derivative of Phi along x-dimension
1248  float dyphi[MAX_POLY_DEGREE+1]; // derivative of Phi along y-dimension
1249  float dzphi[MAX_POLY_DEGREE+1]; // derivative of Phi along z-dimension
1250  float fx, fy, fz; // accumulate each force
1251 
1252  int n;
1253  const int s_size = PolyDegree[p.approx] + 1; // stencil size
1254  const int s_edge = (PolyDegree[p.approx] - 1) >> 1; // stencil "edge" size
1255 
1256  for (n = 0; n < numAtoms; n++) {
1257  // atomic charge
1258  float q = coord[n].msmcharge;
1259  if (0==q) {
1260  force[n].x = 0;
1261  force[n].y = 0;
1262  force[n].z = 0;
1263  continue;
1264  }
1265  qself += q*q;
1266 
1267  // distance between atom and origin measured in grid points
1268  // (does subtraction and division in double, then converts to float)
1269  float rx_hx = (coord[n].position.x - p.origin_x) * p.hx_1;
1270  float ry_hy = (coord[n].position.y - p.origin_y) * p.hy_1;
1271  float rz_hz = (coord[n].position.z - p.origin_z) * p.hz_1;
1272 
1273  // find smallest numbered grid point in stencil
1274  int ilo = (int) floorf(rx_hx) - s_edge;
1275  int jlo = (int) floorf(ry_hy) - s_edge;
1276  int klo = (int) floorf(rz_hz) - s_edge;
1277 
1278  // calculate Phi stencils along each dimension
1279  float delta;
1280  delta = rx_hx - (float) ilo;
1281  D_STENCIL_1D(dxphi, xphi, p.hx_1, delta, p.approx);
1282  delta = ry_hy - (float) jlo;
1283  D_STENCIL_1D(dyphi, yphi, p.hy_1, delta, p.approx);
1284  delta = rz_hz - (float) klo;
1285  D_STENCIL_1D(dzphi, zphi, p.hz_1, delta, p.approx);
1286 
1287  // test to see if stencil is within edges of grid
1288  int iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
1289  ja <= jlo && (jlo+(s_size-1)) <= jb &&
1290  ka <= klo && (klo+(s_size-1)) <= kb );
1291 
1292  if ( iswithin ) { // no wrapping needed
1293  // determine charge on cube of grid points around atom
1294  int i, j, k;
1295  fx = fy = fz = 0;
1296  for (k = 0; k < s_size; k++) {
1297  for (j = 0; j < s_size; j++) {
1298  float cx = yphi[j] * zphi[k];
1299  float cy = dyphi[j] * zphi[k];
1300  float cz = yphi[j] * dzphi[k];
1301  for (i = 0; i < s_size; i++) {
1302  float e = ehread.get(i+ilo,j+jlo,k+klo);
1303  fx += e * dxphi[i] * cx;
1304  fy += e * xphi[i] * cy;
1305  fz += e * xphi[i] * cz;
1306  }
1307  }
1308  }
1309  } // if is within
1310  else { // requires wrapping around grid
1311  int ip, jp, kp;
1312 
1313  // adjust ilo, jlo, klo so they are within grid indexing
1314  if (p.ispx) {
1315  if (ilo < ia) do { ilo += ni; } while (ilo < ia);
1316  else if (ilo > ib) do { ilo -= ni; } while (ilo > ib);
1317  }
1318  else if (ilo < ia || (ilo+(s_size-1)) > ib) {
1319  CkPrintf("Error interpolation: ilo=%d out-of-range [%d,%d]\n",
1320  ilo, ia, (ib-s_size+1));
1321  continue /* XXX NL_MSM_ERROR_RANGE */;
1322  }
1323 
1324  if (p.ispy) {
1325  if (jlo < ja) do { jlo += nj; } while (jlo < ja);
1326  else if (jlo > jb) do { jlo -= nj; } while (jlo > jb);
1327  }
1328  else if (jlo < ja || (jlo+(s_size-1)) > jb) {
1329  CkPrintf("Error interpolation: jlo=%d out-of-range [%d,%d]\n",
1330  jlo, ja, (jb-s_size+1));
1331  continue /* XXX NL_MSM_ERROR_RANGE */;
1332  }
1333 
1334  if (p.ispz) {
1335  if (klo < ka) do { klo += nk; } while (klo < ka);
1336  else if (klo > kb) do { klo -= nk; } while (klo > kb);
1337  }
1338  else if (klo < ka || (klo+(s_size-1)) > kb) {
1339  CkPrintf("Error interpolation: klo=%d out-of-range [%d,%d]\n",
1340  klo, ka, (kb-s_size+1));
1341  continue /* XXX NL_MSM_ERROR_RANGE */;
1342  }
1343 
1344  // determine charge on cube of grid points around atom, with wrapping
1345  int i, j, k;
1346  fx = fy = fz = 0;
1347  for (k = 0, kp = klo; k < s_size; k++, kp++) {
1348  if (kp > kb) kp = ka; /* wrap stencil around grid */
1349  for (j = 0, jp = jlo; j < s_size; j++, jp++) {
1350  if (jp > jb) jp = ja; /* wrap stencil around grid */
1351  float cx = yphi[j] * zphi[k];
1352  float cy = dyphi[j] * zphi[k];
1353  float cz = yphi[j] * dzphi[k];
1354  for (i = 0, ip = ilo; i < s_size; i++, ip++) {
1355  if (ip > ib) ip = ia; /* wrap stencil around grid */
1356  float e = ehread.get(ip,jp,kp);
1357  fx += e * dxphi[i] * cx;
1358  fy += e * xphi[i] * cy;
1359  fz += e * xphi[i] * cz;
1360  }
1361  }
1362  }
1363  } // else
1364 
1365  // update force
1366  force[n].x = -q * fx;
1367  force[n].y = -q * fy;
1368  force[n].z = -q * fz;
1369 
1370  } // end loop over atoms
1371  double self_energy = 0.5*p.self_energy_const*qself;
1372 
1373  //
1374  // end of compute work
1375  //
1376 #ifdef MSM_DEBUG
1377  CkPrintf("Interpolation compute %d read sync done, PE %d\n",
1378  thisIndex, CkMyPe());
1379 #endif
1380  delete coordMsg; // get rid of earlier coordMsg
1381  msmCompute->saveResults(numAtoms, force, self_energy);
1382 #if 0
1383  int startAtomID = thisIndex * MsmMsa::NUM_ATOMS_PER_CHARE;
1384  msmProxy.doneForces(startAtomID, nforces, forcebuffer);
1385 #endif
1386 #ifdef MSM_DEBUG
1387  CkPrintf("Interpolation compute %d exiting, PE %d\n", thisIndex, CkMyPe());
1388 #endif
1389 }
1390 
1391 
1392 MsmMsaLevel::MsmMsaLevel(MsmMsaGrid &qh, MsmMsaGrid &eh, MsmMsaGrid &q2h, MsmMsaGrid &e2h)
1393 {
1394  const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
1395  CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
1396  int level = thisIndex;
1397  lastlevel = -1; // this is an intermediate level
1398  const Int3 &dg = p.dim_gridcutoff_chares[level];
1399  gridcutoff = CProxy_MsmMsaGridCutoff::ckNew(level, qh, eh, dg.nx, dg.ny, dg.nz);
1400  const Int3 &dt = p.dim_gridtransfer_chares[level];
1401  restriction = CProxy_MsmMsaRestriction::ckNew(level, qh, q2h, dt.nx, dt.ny, dt.nz);
1402  prolongation =CProxy_MsmMsaProlongation::ckNew(level, eh, e2h, dt.nx, dt.ny, dt.nz);
1403 #ifdef MSM_DEBUG
1404  CkPrintf("Created %d grid cutoff chares\n", p.num_gridcutoff_chares[level]);
1405 #endif
1406 }
1407 
1408 
1409 MsmMsaLevel::MsmMsaLevel(MsmMsaGrid &qh, MsmMsaGrid &eh)
1410 {
1411  const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
1412  CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
1413  int level = thisIndex;
1414  lastlevel = level;
1415  const Int3 &dg = p.dim_gridcutoff_chares[level];
1416  gridcutoff = CProxy_MsmMsaGridCutoff::ckNew(level, qh, eh, dg.nx, dg.ny, dg.nz);
1417 #ifdef MSM_DEBUG
1418  CkPrintf("Created %d grid cutoff chares\n", p.num_gridcutoff_chares[level]);
1419 #endif
1420 }
1421 
1422 
1423 void MsmMsaLevel::compute()
1424 {
1425  if (thisIndex != lastlevel) {
1426  restriction.compute();
1427  prolongation.compute();
1428  }
1429  gridcutoff.compute();
1430 }
1431 
1432 
1433 MsmMsaGridCutoff::MsmMsaGridCutoff(int level, MsmMsaGrid &qh_, MsmMsaGrid &eh_)
1434  : qh(qh_), eh(eh_)
1435 {
1436  mylevel = level;
1437  const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
1438  CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
1439  // find the points on my part of my level's qh and eh grids
1440  const Int3 &len = p.grid_len[mylevel];
1441  const Int3 &idstart = p.grid_idstart[mylevel];
1442  mia = idstart.nx + thisIndex.x * p.num_points_per_chare.nx;
1443  mja = idstart.ny + thisIndex.y * p.num_points_per_chare.ny;
1444  mka = idstart.nz + thisIndex.z * p.num_points_per_chare.nz;
1445  mib = mia + p.num_points_per_chare.nx - 1;
1446  mjb = mja + p.num_points_per_chare.ny - 1;
1447  mkb = mka + p.num_points_per_chare.nz - 1;
1448  if (mib > idstart.nx + len.nx - 1) {
1449  mib = idstart.nx + len.nx - 1;
1450  }
1451  if (mjb > idstart.ny + len.ny - 1) {
1452  mjb = idstart.ny + len.ny - 1;
1453  }
1454  if (mkb > idstart.nz + len.nz - 1) {
1455  mkb = idstart.nz + len.nz - 1;
1456  }
1457  msa_setup = 0;
1458 }
1459 
1460 
1461 void MsmMsaGridCutoff::compute()
1462 {
1463 #ifdef MSM_DEBUG
1464  CkPrintf("Grid cutoff compute (%d,%d,%d) PE %d\n",
1465  thisIndex.x, thisIndex.y, thisIndex.z, CkMyPe());
1466 #endif
1467  const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
1468  CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
1469  if ( ! msa_setup) {
1470  qh.enroll(p.num_clients_qh[mylevel]);
1471  eh.enroll(p.num_clients_eh[mylevel]);
1472  qhacc = qh.getInitialAccum();
1473  qhread = qhacc.syncToRead(); // immediately sync to read phase
1474  ehacc = eh.getInitialAccum();
1475  ehread = ehacc.syncToRead(); // immediately sync to read phase
1476  msa_setup = 1;
1477  }
1478 for (;;) { // loop forever
1479  qhacc = qhread.syncToEAccum(); // let producers accumulate
1480  qhread = qhacc.syncToRead(); // then we read charge
1481  ehacc = ehread.syncToEAccum(); // and we accumulate potential
1482  //
1483  // here is the compute work
1484  //
1485 
1486  int ni = p.grid_len[mylevel].nx;
1487  int nj = p.grid_len[mylevel].ny;
1488  int nk = p.grid_len[mylevel].nz;
1489  int ia = p.grid_idstart[mylevel].nx;
1490  int ja = p.grid_idstart[mylevel].ny;
1491  int ka = p.grid_idstart[mylevel].nz;
1492  int ib = ia + ni - 1;
1493  int jb = ja + nj - 1;
1494  int kb = ka + nk - 1;
1495  int ispnone = ! ( p.ispx || p.ispy || p.ispz );
1496  int gia, gib, gja, gjb, gka, gkb, gni, gnj;
1497  const float *gc = 0;
1498  if (mylevel != p.toplevel) {
1499  gc = &(p.gc[0]);
1500  gia = p.gc_idstart.nx;
1501  gib = gia + p.gc_len.nx - 1;
1502  gja = p.gc_idstart.ny;
1503  gjb = gja + p.gc_len.ny - 1;
1504  gka = p.gc_idstart.nz;
1505  gkb = gka + p.gc_len.nz - 1;
1506  gni = p.gc_len.nx;
1507  gnj = p.gc_len.ny;
1508  }
1509  else {
1510  gc = &(p.gctop[0]);
1511  gia = p.gctop_idstart.nx;
1512  gib = gia + p.gctop_len.nx - 1;
1513  gja = p.gctop_idstart.ny;
1514  gjb = gja + p.gctop_len.ny - 1;
1515  gka = p.gctop_idstart.nz;
1516  gkb = gka + p.gctop_len.nz - 1;
1517  gni = p.gctop_len.nx;
1518  gnj = p.gctop_len.ny;
1519  }
1520  int i, j, k;
1521  int gia_clip, gib_clip;
1522  int gja_clip, gjb_clip;
1523  int gka_clip, gkb_clip;
1524  int id, jd, kd;
1525  int kgoff, jkgoff, ngindex;
1526 
1527  float scaling = p.scaling[mylevel];
1528  float eh_sum = 0;
1529 
1530 #ifdef MSM_DEBUG
1531  if (mylevel==0 && thisIndex.x==0 && thisIndex.y==0 && thisIndex.z==0) {
1532  int index = 0;
1533  CkPrintf("+++ qh[0,0,0] = %g\n", qhread.get(0,0,0));
1534  CkPrintf("+++ p.toplevel = %d\n", p.toplevel);
1535  CkPrintf("+++ scaling = %g\n", scaling);
1536 #if 0
1537  for (k = gka; k <= gkb; k++) {
1538  for (j = gja; j <= gjb; j++) {
1539  for (i = gia; i <= gib; i++, index++) {
1540  printf("+++ gc[%d,%d,%d] = %g\n", i, j, k, gc[index]);
1541  }
1542  }
1543  }
1544 #endif
1545  }
1546 #endif
1547 
1548  if ( ispnone ) { // non-periodic boundaries
1549 
1550  // loop over my sub-grid points
1551  for (k = mka; k <= mkb; k++) {
1552 
1553  // clip gc ranges to keep offset for k index within grid
1554  gka_clip = (k + gka < ka ? ka - k : gka);
1555  gkb_clip = (k + gkb > kb ? kb - k : gkb);
1556 
1557  for (j = mja; j <= mjb; j++) {
1558 
1559  // clip gc ranges to keep offset for j index within grid
1560  gja_clip = (j + gja < ja ? ja - j : gja);
1561  gjb_clip = (j + gjb > jb ? jb - j : gjb);
1562 
1563  for (i = mia; i <= mib; i++) {
1564 
1565  // clip gc ranges to keep offset for i index within grid
1566  gia_clip = (i + gia < ia ? ia - i : gia);
1567  gib_clip = (i + gib > ib ? ib - i : gib);
1568 
1569  // sum over "sphere" of weighted charge
1570  eh_sum = 0;
1571  for (kd = gka_clip; kd <= gkb_clip; kd++) {
1572  kgoff = (kd-gka) * gnj; // find gc flat index
1573 
1574  for (jd = gja_clip; jd <= gjb_clip; jd++) {
1575  jkgoff = (kgoff + (jd-gja)) * gni; // find gc flat index
1576 
1577  for (id = gia_clip; id <= gib_clip; id++) {
1578  ngindex = jkgoff + (id-gia); // gc flat index
1579  // sum weighted charge
1580  eh_sum += qhread.get(i+id,j+jd,k+kd) * gc[ngindex];
1581  }
1582  }
1583  } // end loop over "sphere" of charge
1584 
1585  // add contribution into MSA
1586  ehacc.accumulate(i,j,k) += scaling * eh_sum;
1587  }
1588  }
1589  } // end loop over my sub-grid points
1590 
1591  } // if non-periodic boundaries
1592  else {
1593  // some boundary is periodic
1594  int ilo, jlo, klo;
1595  int ip, jp, kp;
1596 
1597  // loop over my sub-grid points
1598  for (k = mka; k <= mkb; k++) {
1599  klo = k + gka;
1600  if ( ! p.ispz ) { // non-periodic z
1601  // clip gc ranges to keep offset for k index within grid
1602  gka_clip = (k + gka < ka ? ka - k : gka);
1603  gkb_clip = (k + gkb > kb ? kb - k : gkb);
1604  if (klo < ka) klo = ka; // keep lowest qh index within grid
1605  }
1606  else { // periodic z
1607  gka_clip = gka;
1608  gkb_clip = gkb;
1609  if (klo < ka) do { klo += nk; } while (klo < ka);
1610  }
1611  // ASSERT(klo <= kb);
1612 
1613  for (j = mja; j <= mjb; j++) {
1614  jlo = j + gja;
1615  if ( ! p.ispy ) { // non-periodic y
1616  // clip gc ranges to keep offset for j index within grid
1617  gja_clip = (j + gja < ja ? ja - j : gja);
1618  gjb_clip = (j + gjb > jb ? jb - j : gjb);
1619  if (jlo < ja) jlo = ja; // keep lowest qh index within grid
1620  }
1621  else { // periodic y
1622  gja_clip = gja;
1623  gjb_clip = gjb;
1624  if (jlo < ja) do { jlo += nj; } while (jlo < ja);
1625  }
1626  // ASSERT(jlo <= jb);
1627 
1628  for (i = mia; i <= mib; i++) {
1629  ilo = i + gia;
1630  if ( ! p.ispx ) { // nonperiodic x
1631  // clip gc ranges to keep offset for i index within grid
1632  gia_clip = (i + gia < ia ? ia - i : gia);
1633  gib_clip = (i + gib > ib ? ib - i : gib);
1634  if (ilo < ia) ilo = ia; // keep lowest qh index within grid
1635  }
1636  else { // periodic x
1637  gia_clip = gia;
1638  gib_clip = gib;
1639  if (ilo < ia) do { ilo += ni; } while (ilo < ia);
1640  }
1641  // ASSERT(ilo <= ib);
1642 
1643  // sum over "sphere" of weighted charge
1644  eh_sum = 0;
1645  for (kd = gka_clip, kp = klo; kd <= gkb_clip; kd++, kp++) {
1646  // clipping makes conditional always fail for nonperiodic
1647  if (kp > kb) kp = ka; // wrap z direction
1648  kgoff = (kd-gka) * gnj; // find gc flat index
1649 
1650  for (jd = gja_clip, jp = jlo; jd <= gjb_clip; jd++, jp++) {
1651  // clipping makes conditional always fail for nonperiodic
1652  if (jp > jb) jp = ja; // wrap y direction
1653  jkgoff = (kgoff + (jd-gja)) * gni; // find gc flat index */
1654 
1655  for (id = gia_clip, ip = ilo; id <= gib_clip; id++, ip++) {
1656  // clipping makes conditional always fail for nonperiodic
1657  if (ip > ib) ip = ia; // wrap x direction
1658  ngindex = jkgoff + (id-gia); // gc flat index
1659  // sum weighted charge
1660  eh_sum += qhread.get(ip,jp,kp) * gc[ngindex];
1661  }
1662  }
1663  } // end loop over "sphere" of charge
1664 
1665  // add contribution into MSA
1666  ehacc.accumulate(i,j,k) += scaling * eh_sum;
1667  }
1668  }
1669  } // end loop over my sub-grid points
1670 
1671  } // else some boundary is periodic
1672 
1673  //
1674  // end of compute work
1675  //
1676  ehread = ehacc.syncToRead(); // let consumers read potential
1677 #ifdef MSM_DEBUG
1678  CkPrintf("Grid cutoff compute %d exiting, PE %d\n", thisIndex, CkMyPe());
1679 #endif
1680 } // loop forever
1681 }
1682 
1683 
1686 enum { MAX_NSTENCIL = 11 };
1687 
1689 static const int Nstencil[APPROX_END] = {
1690  5, 7, 7, 9, 9, 11, 11, 7
1691 };
1692 
1695 static const int IndexOffset[APPROX_END][MAX_NSTENCIL] = {
1696  /* cubic */
1697  {-3, -1, 0, 1, 3},
1698 
1699  /* quintic C1 */
1700  {-5, -3, -1, 0, 1, 3, 5},
1701 
1702  /* quintic C2 (same as quintic C1) */
1703  {-5, -3, -1, 0, 1, 3, 5},
1704 
1705  /* septic C1 */
1706  {-7, -5, -3, -1, 0, 1, 3, 5, 7},
1707 
1708  /* septic C3 (same as septic C3) */
1709  {-7, -5, -3, -1, 0, 1, 3, 5, 7},
1710 
1711  /* nonic C1 */
1712  {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
1713 
1714  /* nonic C4 (same as nonic C1) */
1715  {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
1716 
1717  /* bspline */
1718  {-3, -2, -1, 0, 1, 2, 3},
1719 };
1720 
1723 static const float PhiStencil[APPROX_END][MAX_NSTENCIL] = {
1724  /* cubic */
1725  {-1.f/16, 9.f/16, 1, 9.f/16, -1.f/16},
1726 
1727  /* quintic C1 */
1728  {3.f/256, -25.f/256, 75.f/128, 1, 75.f/128, -25.f/256, 3.f/256},
1729 
1730  /* quintic C2 (same as quintic C1) */
1731  {3.f/256, -25.f/256, 75.f/128, 1, 75.f/128, -25.f/256, 3.f/256},
1732 
1733  /* septic C1 */
1734  { -5.f/2048, 49.f/2048, -245.f/2048, 1225.f/2048, 1, 1225.f/2048,
1735  -245.f/2048, 49.f/2048, -5.f/2048 },
1736 
1737  /* septic C3 (same as septic C3) */
1738  { -5.f/2048, 49.f/2048, -245.f/2048, 1225.f/2048, 1, 1225.f/2048,
1739  -245.f/2048, 49.f/2048, -5.f/2048 },
1740 
1741  /* nonic C1 */
1742  { 35.f/65536, -405.f/65536, 567.f/16384, -2205.f/16384,
1743  19845.f/32768, 1, 19845.f/32768, -2205.f/16384, 567.f/16384,
1744  -405.f/65536, 35.f/65536 },
1745 
1746  /* nonic C4 (same as nonic C1) */
1747  { 35.f/65536, -405.f/65536, 567.f/16384, -2205.f/16384,
1748  19845.f/32768, 1, 19845.f/32768, -2205.f/16384, 567.f/16384,
1749  -405.f/65536, 35.f/65536 },
1750 
1751  /* bspline */
1752  { 1.f/48, 1.f/6, 23.f/48, 2.f/3, 23.f/48, 1.f/6, 1.f/48 },
1753 };
1754 
1755 
1756 MsmMsaRestriction::MsmMsaRestriction(int level, MsmMsaGrid &qh_, MsmMsaGrid &q2h_)
1757  : qh(qh_), q2h(q2h_)
1758 {
1759  mylevel = level;
1760  const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
1761  CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
1762  // find the points on my sub-grid of (mylevel+1) grid
1763  const Int3 &len = p.grid_len[mylevel+1];
1764  const Int3 &idstart = p.grid_idstart[mylevel+1];
1765  mia = idstart.nx + thisIndex.x * p.num_points_per_chare.nx;
1766  mja = idstart.ny + thisIndex.y * p.num_points_per_chare.ny;
1767  mka = idstart.nz + thisIndex.z * p.num_points_per_chare.nz;
1768  mib = mia + p.num_points_per_chare.nx - 1;
1769  mjb = mja + p.num_points_per_chare.ny - 1;
1770  mkb = mka + p.num_points_per_chare.nz - 1;
1771  if (mib > idstart.nx + len.nx - 1) {
1772  mib = idstart.nx + len.nx - 1;
1773  }
1774  if (mjb > idstart.ny + len.ny - 1) {
1775  mjb = idstart.ny + len.ny - 1;
1776  }
1777  if (mkb > idstart.nz + len.nz - 1) {
1778  mkb = idstart.nz + len.nz - 1;
1779  }
1780  msa_setup = 0;
1781 }
1782 
1783 
1784 void MsmMsaRestriction::compute()
1785 {
1786 #ifdef MSM_DEBUG
1787  CkPrintf("Restriction compute %d, PE %d\n", thisIndex, CkMyPe());
1788 #endif
1789  const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
1790  CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
1791  if ( ! msa_setup) {
1792  qh.enroll(p.num_clients_qh[mylevel]);
1793  q2h.enroll(p.num_clients_qh[mylevel+1]);
1794  qhacc = qh.getInitialAccum();
1795  qhread = qhacc.syncToRead(); // immediately sync to read phase
1796  q2hacc = q2h.getInitialAccum();
1797  q2hread = q2hacc.syncToRead(); // immediately sync to read phase
1798  msa_setup = 1;
1799  }
1800 for (;;) { // loop forever
1801  qhacc = qhread.syncToEAccum(); // let producers accumulate h-level charge
1802  qhread = qhacc.syncToRead(); // then we read h-level charge
1803  q2hacc = q2hread.syncToEAccum(); // and we accumulate 2h-level charge
1804 #ifdef MSM_DEBUG
1805  CkPrintf("Restriction compute %d doing work, PE %d\n", thisIndex, CkMyPe());
1806 #endif
1807  //
1808  // here is the compute work
1809  //
1810 
1811  const int nstencil = Nstencil[p.approx];
1812  const int *offset = IndexOffset[p.approx];
1813  const float *phi = PhiStencil[p.approx];
1814 
1815  int ni1 = p.grid_len[mylevel].nx;
1816  int nj1 = p.grid_len[mylevel].ny;
1817  int nk1 = p.grid_len[mylevel].nz;
1818  int ia1 = p.grid_idstart[mylevel].nx;
1819  int ja1 = p.grid_idstart[mylevel].ny;
1820  int ka1 = p.grid_idstart[mylevel].nz;
1821  int ib1 = ia1 + ni1 - 1;
1822  int jb1 = ja1 + nj1 - 1;
1823  int kb1 = ka1 + nk1 - 1;
1824  int i, j, k, i1, j1, k1, i2, j2, k2;
1825  int i1off, j1off, k1off;
1826 
1827  float q2h_sum, cjk;
1828 
1829  for (k2 = mka; k2 <= mkb; k2++) {
1830  k1 = k2 * 2;
1831  for (j2 = mja; j2 <= mjb; j2++) {
1832  j1 = j2 * 2;
1833  for (i2 = mia; i2 <= mib; i2++) {
1834  i1 = i2 * 2;
1835 
1836  q2h_sum = 0;
1837  for (k = 0; k < nstencil; k++) {
1838  k1off = k1 + offset[k];
1839  if (k1off < ka1) {
1840  if (p.ispz) do { k1off += nk1; } while (k1off < ka1);
1841  else continue;
1842  }
1843  else if (k1off > kb1) {
1844  if (p.ispz) do { k1off -= nk1; } while (k1off > kb1);
1845  else break;
1846  }
1847  for (j = 0; j < nstencil; j++) {
1848  j1off = j1 + offset[j];
1849  if (j1off < ja1) {
1850  if (p.ispy) do { j1off += nj1; } while (j1off < ja1);
1851  else continue;
1852  }
1853  else if (j1off > jb1) {
1854  if (p.ispy) do { j1off -= nj1; } while (j1off > jb1);
1855  else break;
1856  }
1857  cjk = phi[j] * phi[k];
1858  for (i = 0; i < nstencil; i++) {
1859  i1off = i1 + offset[i];
1860  if (i1off < ia1) {
1861  if (p.ispx) do { i1off += ni1; } while (i1off < ia1);
1862  else continue;
1863  }
1864  else if (i1off > ib1) {
1865  if (p.ispx) do { i1off -= ni1; } while (i1off > ib1);
1866  else break;
1867  }
1868  q2h_sum += qhread.get(i1off,j1off,k1off) * phi[i] * cjk;
1869  }
1870  }
1871  } // end loop over finer grid stencil
1872 
1873  q2hacc.accumulate(i2,j2,k2) += q2h_sum;
1874  }
1875  }
1876  } // end loop over each coarser grid point
1877 
1878  //
1879  // end of compute work
1880  //
1881  q2hread = q2hacc.syncToRead(); // let consumers read 2h-level charge
1882 #ifdef MSM_DEBUG
1883  CkPrintf("Restriction compute %d exiting, PE %d\n", thisIndex, CkMyPe());
1884 #endif
1885 } // loop forever
1886 }
1887 
1888 
1889 MsmMsaProlongation::MsmMsaProlongation(int level, MsmMsaGrid &eh_, MsmMsaGrid &e2h_)
1890  : eh(eh_), e2h(e2h_)
1891 {
1892  mylevel = level;
1893  const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
1894  CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
1895  // find the points on my sub-grid of (mylevel+1) grid
1896  const Int3 &len = p.grid_len[mylevel+1];
1897  const Int3 &idstart = p.grid_idstart[mylevel+1];
1898  mia = idstart.nx + thisIndex.x * p.num_points_per_chare.nx;
1899  mja = idstart.ny + thisIndex.y * p.num_points_per_chare.ny;
1900  mka = idstart.nz + thisIndex.z * p.num_points_per_chare.nz;
1901  mib = mia + p.num_points_per_chare.nx - 1;
1902  mjb = mja + p.num_points_per_chare.ny - 1;
1903  mkb = mka + p.num_points_per_chare.nz - 1;
1904  if (mib > idstart.nx + len.nx - 1) {
1905  mib = idstart.nx + len.nx - 1;
1906  }
1907  if (mjb > idstart.ny + len.ny - 1) {
1908  mjb = idstart.ny + len.ny - 1;
1909  }
1910  if (mkb > idstart.nz + len.nz - 1) {
1911  mkb = idstart.nz + len.nz - 1;
1912  }
1913  msa_setup = 0;
1914 }
1915 
1916 
1917 void MsmMsaProlongation::compute()
1918 {
1919 #ifdef MSM_DEBUG
1920  CkPrintf("Prolongation compute %d, PE %d\n", thisIndex, CkMyPe());
1921 #endif
1922  const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
1923  CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
1924  if ( ! msa_setup) {
1925  eh.enroll(p.num_clients_eh[mylevel]);
1926  e2h.enroll(p.num_clients_eh[mylevel+1]);
1927  ehacc = eh.getInitialAccum();
1928  ehread = ehacc.syncToRead(); // immediately sync to read phase
1929  e2hacc = e2h.getInitialAccum();
1930  e2hread = e2hacc.syncToRead(); // immediately sync to read phase
1931  msa_setup = 1;
1932  }
1933 for (;;) { // loop forever
1934  ehacc = ehread.syncToEAccum(); // we accumulate h-level potential
1935  e2hacc = e2hread.syncToEAccum(); // let producers accumulate 2h-level
1936  e2hread = e2hacc.syncToRead(); // then we read 2h-level potentials
1937 #ifdef MSM_DEBUG
1938  CkPrintf("Prolongation compute %d doing work, PE %d\n", thisIndex, CkMyPe());
1939 #endif
1940  //
1941  // here is the compute work
1942  //
1943 
1944  const int nstencil = Nstencil[p.approx];
1945  const int *offset = IndexOffset[p.approx];
1946  const float *phi = PhiStencil[p.approx];
1947 
1948  int ni1 = p.grid_len[mylevel].nx;
1949  int nj1 = p.grid_len[mylevel].ny;
1950  int nk1 = p.grid_len[mylevel].nz;
1951  int ia1 = p.grid_idstart[mylevel].nx;
1952  int ja1 = p.grid_idstart[mylevel].ny;
1953  int ka1 = p.grid_idstart[mylevel].nz;
1954  int ib1 = ia1 + ni1 - 1;
1955  int jb1 = ja1 + nj1 - 1;
1956  int kb1 = ka1 + nk1 - 1;
1957  int i, j, k, i1, j1, k1, i2, j2, k2;
1958  int i1off, j1off, k1off;
1959 
1960  float cjk;
1961 
1962  for (k2 = mka; k2 <= mkb; k2++) {
1963  k1 = k2 * 2;
1964  for (j2 = mja; j2 <= mjb; j2++) {
1965  j1 = j2 * 2;
1966  for (i2 = mia; i2 <= mib; i2++) {
1967  i1 = i2 * 2;
1968 
1969  for (k = 0; k < nstencil; k++) {
1970  k1off = k1 + offset[k];
1971  if (k1off < ka1) {
1972  if (p.ispz) do { k1off += nk1; } while (k1off < ka1);
1973  else continue;
1974  }
1975  else if (k1off > kb1) {
1976  if (p.ispz) do { k1off -= nk1; } while (k1off > kb1);
1977  else break;
1978  }
1979  for (j = 0; j < nstencil; j++) {
1980  j1off = j1 + offset[j];
1981  if (j1off < ja1) {
1982  if (p.ispy) do { j1off += nj1; } while (j1off < ja1);
1983  else continue;
1984  }
1985  else if (j1off > jb1) {
1986  if (p.ispy) do { j1off -= nj1; } while (j1off > jb1);
1987  else break;
1988  }
1989  cjk = phi[j] * phi[k];
1990  for (i = 0; i < nstencil; i++) {
1991  i1off = i1 + offset[i];
1992  if (i1off < ia1) {
1993  if (p.ispx) do { i1off += ni1; } while (i1off < ia1);
1994  else continue;
1995  }
1996  else if (i1off > ib1) {
1997  if (p.ispx) do { i1off -= ni1; } while (i1off > ib1);
1998  else break;
1999  }
2000  ehacc.accumulate(i1off,j1off,k1off) +=
2001  e2hread(i2,j2,k2) * phi[i] * cjk;
2002  }
2003  }
2004  } // end loop over finer grid stencil
2005 
2006  }
2007  }
2008  } // end loop over each coarser grid point
2009 
2010  //
2011  // end of compute work
2012  //
2013  ehread = ehacc.syncToRead(); // let consumers read h-level potential
2014 #ifdef MSM_DEBUG
2015  CkPrintf("Prolongation compute %d exiting, PE %d\n", thisIndex, CkMyPe());
2016 #endif
2017 } // loop forever
2018 }
2019 
2020 
2021 MsmMsaEnergy::MsmMsaEnergy(MsmMsaGrid &qh_, MsmMsaGrid &eh_) : qh(qh_), eh(eh_)
2022 {
2023  const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
2024  CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
2025  int npoints = p.num_points_per_chare.nx *
2026  p.num_points_per_chare.ny * p.num_points_per_chare.nz;
2027  qhbuffer = new float[npoints];
2028  int ni = p.grid_len[0].nx;
2029  int nj = p.grid_len[0].ny;
2030  int nk = p.grid_len[0].nz;
2031  int nci = ROUNDUP_QUOTIENT(ni, p.num_points_per_chare.nx);
2032  int ncj = ROUNDUP_QUOTIENT(nj, p.num_points_per_chare.ny);
2033  int nck = ROUNDUP_QUOTIENT(nk, p.num_points_per_chare.nz);
2034  mlk = thisIndex / (ncj * nci);
2035  int krem = thisIndex % (ncj * nci);
2036  mlj = krem / nci;
2037  mli = krem % nci;
2038  // find the points on my part of my level's qh and eh grids
2039  mia = p.grid_idstart[0].nx + mli * p.num_points_per_chare.nx;
2040  mja = p.grid_idstart[0].ny + mlj * p.num_points_per_chare.ny;
2041  mka = p.grid_idstart[0].nz + mlk * p.num_points_per_chare.nz;
2042  mib = mia + p.num_points_per_chare.nx - 1;
2043  mjb = mja + p.num_points_per_chare.ny - 1;
2044  mkb = mka + p.num_points_per_chare.nz - 1;
2045  if (mib > p.grid_idstart[0].nx + ni - 1) {
2046  mib = p.grid_idstart[0].nx + ni - 1;
2047  }
2048  if (mjb > p.grid_idstart[0].ny + nj - 1) {
2049  mjb = p.grid_idstart[0].ny + nj - 1;
2050  }
2051  if (mkb > p.grid_idstart[0].nz + nk - 1) {
2052  mkb = p.grid_idstart[0].nz + nk - 1;
2053  }
2054  msa_setup = 0;
2056 }
2057 
2058 MsmMsaEnergy::~MsmMsaEnergy()
2059 {
2060  delete[] qhbuffer;
2061 }
2062 
2063 
2064 void MsmMsaEnergy::compute()
2065 {
2066 #ifdef MSM_DEBUG
2067  CkPrintf("Energy compute %d, PE %d\n", thisIndex, CkMyPe());
2068 #endif
2069  const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
2070  CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
2071  if ( ! msa_setup) {
2072  qh.enroll(p.num_clients_qh[0]);
2073  eh.enroll(p.num_clients_eh[0]);
2074  qhacc = qh.getInitialAccum();
2075  qhread = qhacc.syncToRead(); // immediately sync to read phase
2076  ehacc = eh.getInitialAccum();
2077  ehread = ehacc.syncToRead();
2078  msa_setup = 1;
2079  }
2080 for (;;) { // loop forever
2081  double sum = 0;
2082  qhacc = qhread.syncToEAccum(); // let producers accumulate charge
2083  qhread = qhacc.syncToRead(); // then we read it
2084  //
2085  // Must be very careful of phase sync order between eh and qh
2086  // qhread sync done must be after enrollment in eh
2087  // (otherwise block gridcutoff)
2088  // but before ehacc sync to read
2089  // (otherwise block prolongation)
2090  //
2091  // Since we can't have both qh and eh open for reading at once,
2092  // we read qh values into local storage
2093  //
2094  int i, j, k, index;
2095  for (index = 0, k = mka; k <= mkb; k++) {
2096  for (j = mja; j <= mjb; j++) {
2097  for (i = mia; i <= mib; i++, index++) {
2098  qhbuffer[index] = qhread.get(i,j,k);
2099  }
2100  }
2101  }
2102  //
2103  // Must do qhread sync done BEFORE ehacc sync to read
2104  //
2105  ehacc = ehread.syncToEAccum(); // let producers accumulate potential
2106  ehread = ehacc.syncToRead(); // then we read it
2107 #ifdef MSM_DEBUG
2108  CkPrintf("Energy compute %d doing work, PE %d\n", thisIndex, CkMyPe());
2109 #endif
2110  for (index = 0, k = mka; k <= mkb; k++) {
2111  for (j = mja; j <= mjb; j++) {
2112  for (i = mia; i <= mib; i++, index++) {
2113  sum += qhbuffer[index] * ehread.get(i,j,k);
2114  }
2115  }
2116  }
2117  //contribute(sizeof(double), &sum, CkReduction::sum_double);
2118  reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += 0.5*sum;
2119  reduction->submit();
2120 #ifdef MSM_DEBUG
2121  CkPrintf("Energy compute %d exiting, PE %d\n", thisIndex, CkMyPe());
2122 #endif
2123 } // loop forever
2124 }
2125 
2126 
2127 #include "ComputeMsmMsaMgr.def.h"
2128 
2129 #endif // CHARM_HAS_MSA
2130 
static Node * Object()
Definition: Node.h:86
#define GRID_RESIZE(_p, TYPE, __i0, __ni, __j0, __nj, __k0, __nk)
Definition: msm_defn.h:77
Vector a_r() const
Definition: Lattice.h:268
int ComputeID
Definition: NamdTypes.h:183
static const int Nstencil[NL_MSM_APPROX_END]
Definition: msm_longrng.c:208
static PatchMap * Object()
Definition: PatchMap.h:27
Definition: Vector.h:64
SimParameters * simParameters
Definition: Node.h:178
Vector c_r() const
Definition: Lattice.h:270
static int numatoms
Definition: ScriptTcl.C:64
#define COULOMB
Definition: common.h:46
BigReal z
Definition: Vector.h:66
Position position
Definition: NamdTypes.h:53
SubmitReduction * willSubmit(int setID, int size=-1)
Definition: ReductionMgr.C:365
static ReductionMgr * Object(void)
Definition: ReductionMgr.h:278
Vector origin() const
Definition: Lattice.h:262
int nx
Definition: ComputeMoa.h:65
Vector b_r() const
Definition: Lattice.h:269
static const int IndexOffset[NL_MSM_APPROX_END][MAX_NSTENCIL]
Definition: msm_longrng.c:214
static int restriction(NL_Msm *, int level)
Definition: msm_longrng.c:1485
Charge charge
Definition: NamdTypes.h:54
int ny
Definition: ComputeMoa.h:65
#define GRID_INIT(_p)
Definition: msm_defn.h:55
BigReal rlength(void)
Definition: Vector.h:177
Force * f[maxNumForces]
Definition: PatchTypes.h:67
BigReal x
Definition: Vector.h:66
#define ROUNDUP_QUOTIENT(n, k)
Definition: MsmMacros.h:4
BigReal MSMGridSpacing
void NAMD_die(const char *err_msg)
Definition: common.C:85
PDB * pdb
Definition: Node.h:180
#define STENCIL_1D(_phi, _delta, _approx)
Definition: msm_longrng.c:280
std::vector< std::string > split(const std::string &text, std::string delimiter)
Definition: MoleculeQM.C:73
static const double PhiStencil[NL_MSM_APPROX_END][MAX_NSTENCIL]
Definition: msm_longrng.c:242
#define simParams
Definition: Output.C:127
static void rescale_nonperiodic_cell(Vector &u, Vector &v, Vector &w, Vector &c, Vector &ru, Vector &rv, Vector &rw, int isperiodic, int numatoms, const ComputeMsmSerialAtom *coord, double padding, double gridspacing)
static int setup_hgrid_1d(NL_Msm *pm, double len, double *hh, int *nn, int *aindex, int *bindex, int isperiodic)
Definition: msm_setup.c:658
BigReal y
Definition: Vector.h:66
Vector b() const
Definition: Lattice.h:253
static int gridcutoff(NL_Msm *, int level)
Definition: msm_longrng.c:1677
#define SPOLY(pg, pdg, ra, split)
Definition: msm_defn.h:140
int nz
Definition: ComputeMoa.h:65
BigReal dielectric
int b_p() const
Definition: Lattice.h:274
BigReal MSMPadding
gridSize x
static const int PolyDegree[NL_MSM_APPROX_END]
Definition: msm_longrng.c:163
int a_p() const
Definition: Lattice.h:273
Vector a() const
Definition: Lattice.h:252
void get_extremes(ScaledPosition &xmin, ScaledPosition &xmax) const
Definition: PDB.h:104
#define D_STENCIL_1D(_dphi, _phi, _h_1, _delta, _approx)
Definition: msm_longrng.c:468
Vector c() const
Definition: Lattice.h:254
static int prolongation(NL_Msm *, int level)
Definition: msm_longrng.c:1582
int c_p() const
Definition: Lattice.h:275