NAMD
PmeSolverUtil.h
Go to the documentation of this file.
1 #ifndef PMESOLVERUTIL_H
2 #define PMESOLVERUTIL_H
3 
4 #include "PmeBase.h" // PmeGrid -structure
5 #include "NamdTypes.h" // CudaAtom, float2
6 #include "Lattice.h"
7 
8 // CHC: multiple grid evaluation
9 const unsigned int NUM_GRID_MAX = 5;
10 
11 //
12 // Stores the patch origin (x, y, z) and number of atoms (w)
13 //
14 struct PatchInfo {
15  int x, y, z, w;
16  PatchInfo() {}
17  PatchInfo(int x, int y, int z, int w) : x(x), y(y), z(z), w(w) {}
18 };
19 
21 
22 // y = absolute grid position [0 ... pmeGrid.K2-1]
23 static inline int getPencilIndexY(const PmeGrid& pmeGrid, const int y) {
24  return (y*pmeGrid.yBlocks + pmeGrid.yBlocks - 1)/pmeGrid.K2;
25 }
26 
27 // z = absolute grid position [0 ... pmeGrid.K3-1]
28 static inline int getPencilIndexZ(const PmeGrid& pmeGrid, const int z) {
29  return (z*pmeGrid.zBlocks + pmeGrid.zBlocks - 1)/pmeGrid.K3;
30 }
31 
32 static void getPencilDim(const PmeGrid& pmeGrid, const int permutation,
33  const int jblock, const int kblock,
34  int& i0, int& i1, int& j0, int& j1, int& k0, int& k1) {
35 
36  int isize, jsize, ksize;
37  int jblocks, kblocks;
38 
39  switch(permutation) {
40  case Perm_X_Y_Z:
41  isize = pmeGrid.K1;
42  jsize = pmeGrid.K2;
43  ksize = pmeGrid.K3;
44  jblocks = pmeGrid.yBlocks;
45  kblocks = pmeGrid.zBlocks;
46  break;
47  case Perm_cX_Y_Z:
48  isize = pmeGrid.K1/2+1;
49  jsize = pmeGrid.K2;
50  ksize = pmeGrid.K3;
51  jblocks = pmeGrid.yBlocks;
52  kblocks = pmeGrid.zBlocks;
53  break;
54  case Perm_Y_Z_cX:
55  isize = pmeGrid.K2;
56  jsize = pmeGrid.K3;
57  ksize = pmeGrid.K1/2+1;
58  jblocks = pmeGrid.zBlocks;
59  kblocks = pmeGrid.xBlocks;
60  break;
61  case Perm_Z_cX_Y:
62  isize = pmeGrid.K3;
63  jsize = pmeGrid.K1/2+1;
64  ksize = pmeGrid.K2;
65  jblocks = pmeGrid.xBlocks;
66  kblocks = pmeGrid.yBlocks;
67  break;
68  default:
69  NAMD_bug("getPencilDim, invalid permutation");
70  break;
71  }
72 
73  if (jblock < 0 || jblock >= jblocks || kblock < 0 || kblock >= kblocks)
74  NAMD_bug("getPencilDim, invalid block indices");
75 
76  i0 = 0;
77  i1 = isize - 1;
78 
79  j0 = jsize*jblock/jblocks;
80  j1 = jsize*(jblock+1)/jblocks - 1;
81 
82  k0 = ksize*kblock/kblocks;
83  k1 = ksize*(kblock+1)/kblocks - 1;
84 }
85 
86 //
87 // Return block dimensions [i0...i1] x [j0...j1] x [k0...k1]
88 //
89 static void getBlockDim(const PmeGrid& pmeGrid, const int permutation,
90  const int iblock, const int jblock, const int kblock,
91  int& i0, int& i1, int& j0, int& j1, int& k0, int& k1) {
92 
93  getPencilDim(pmeGrid, permutation, jblock, kblock, i0, i1, j0, j1, k0, k1);
94 
95  int iblocks;
96 
97  switch(permutation) {
98  case Perm_X_Y_Z:
99  iblocks = pmeGrid.xBlocks;
100  break;
101  case Perm_cX_Y_Z:
102  iblocks = pmeGrid.xBlocks;
103  break;
104  case Perm_Y_Z_cX:
105  iblocks = pmeGrid.yBlocks;
106  break;
107  case Perm_Z_cX_Y:
108  iblocks = pmeGrid.zBlocks;
109  break;
110  default:
111  NAMD_bug("getBlockDim, invalid permutation");
112  break;
113  }
114 
115  if (iblock < 0 || iblock >= iblocks)
116  NAMD_bug("getBlockDim, invalid block index");
117 
118  int isize = i1-i0+1;
119 
120  i0 = isize*iblock/iblocks;
121  i1 = isize*(iblock+1)/iblocks - 1;
122 }
123 
124 //
125 // Abstract FFTCompute base class for out-of-place FFTs
126 // NOTE: Must call init -method to initialize this class
127 //
128 class FFTCompute {
129 public:
131  dataSrc = NULL;
132  dataSrcSize = 0;
133  dataSrcAllocated = false;
134  dataDst = NULL;
135  dataDstSize = 0;
136  dataDstAllocated = false;
137  }
138  void init(
139  float* dataSrc_in, int dataSrcSize_in,
140  float* dataDst_in, int dataDstSize_in,
141  int permutation, PmeGrid pmeGrid,
142  int pmePencilType, int jblock, int kblock, int flags) {
143 
144  if (dataSrc_in != NULL && dataSrc_in == dataDst_in)
145  NAMD_bug("FFTCompute::init, only out-of-place FFTs supported");
146 
147  int permutationDst = permutation;
148  if (permutation == Perm_X_Y_Z) {
149  permutationDst = Perm_cX_Y_Z;
150  }
151 
152  if (dataSrc_in == NULL) {
153  // Sets data and dataSize
154  dataSrcSize = getDataSizeRequired(permutation, pmeGrid, jblock, kblock);
155  dataSrc = allocateData(dataSrcSize);
156  dataSrcAllocated = true;
157  } else {
158  if (dataSrcSize_in < getDataSizeRequired(permutation, pmeGrid, jblock, kblock))
159  NAMD_bug("FFTCompute::init, invalid dataSrcSize_in");
160  dataSrcSize = dataSrcSize_in;
161  dataSrc = dataSrc_in;
162  dataSrcAllocated = false;
163  }
164 
165  if (dataDst_in == NULL) {
166  // Sets data and dataSize
167  dataDstSize = getDataSizeRequired(permutationDst, pmeGrid, jblock, kblock);
168  dataDst = allocateData(dataDstSize);
169  dataDstAllocated = true;
170  } else {
171  if (dataDstSize_in < getDataSizeRequired(permutationDst, pmeGrid, jblock, kblock))
172  NAMD_bug("FFTCompute::init, invalid dataDstSize_in");
173  dataDstSize = dataDstSize_in;
174  dataDst = dataDst_in;
175  dataDstAllocated = false;
176  }
177 
178  // Final sanity check
179  if (dataDst == NULL || dataSrc == NULL ||
180  dataDstSize < getDataSizeRequired(permutationDst, pmeGrid, jblock, kblock) ||
181  dataSrcSize < getDataSizeRequired(permutation, pmeGrid, jblock, kblock))
182  NAMD_bug("FFTCompute::init, error setting up data buffers");
183 
184  // Now "data" is pointer to grid data with at least size getDataSizeRequired(...)
185  if (pmePencilType == 3) {
186  // 3D FFT
187  if (pmeGrid.xBlocks != 1 || pmeGrid.yBlocks != 1 || pmeGrid.zBlocks != 1)
188  NAMD_bug("FFTCompute::init, 3D FFT requires a single pencil");
189  int n[3] = {pmeGrid.K1, pmeGrid.K2, pmeGrid.K3};
190  plan3D(n, flags);
191  } else if (pmePencilType == 1) {
192  int i0, i1, j0, j1, k0, k1;
193  getPencilDim(pmeGrid, permutation, jblock, kblock, i0, i1, j0, j1, k0, k1);
194  int n[1] = {i1-i0+1};
195  int howmany = (j1-j0+1)*(k1-k0+1);
196  if (permutation == Perm_X_Y_Z) {
197  plan1DX(n, howmany, flags);
198  } else if (permutation == Perm_Y_Z_cX) {
199  plan1DY(n, howmany, flags);
200  } else if (permutation == Perm_Z_cX_Y) {
201  plan1DZ(n, howmany, flags);
202  } else {
203  NAMD_bug("FFTCompute::init, invalid permutation");
204  }
205  } else if (pmePencilType == 2) {
206  // 2D FFTs of xy planes
207  int i0, i1, j0, j1, k0, k1;
208  getPencilDim(pmeGrid, permutation, 0, kblock, i0, i1, j0, j1, k0, k1);
209  int n[2] = {pmeGrid.K1, pmeGrid.K2};
210  int howmany = k1-k0+1;
211  plan2D(n, howmany, flags);
212  } else {
213  NAMD_bug("FFTCompute::init, invalid pmePencilType");
214  }
215  }
216  virtual ~FFTCompute() {}
217  virtual void forward()=0;
218  virtual void backward()=0;
219  float* getDataSrc() {return dataSrc;}
220  float* getDataDst() {return dataDst;}
221 protected:
224  // Pointer to data, allocated here only if dataAllocated = true
225  float* dataSrc;
226  float* dataDst;
227  // Size of data
230  // If dataAllocated = true, implementation must deallocate dataPtr
233 private:
234  // Return the size of data in floats that this FFT will require
235  int getDataSizeRequired(int permutation, PmeGrid pmeGrid,
236  int jblock, int kblock) {
237 
238  this->jblock = jblock;
239  this->kblock = kblock;
240 
241  int i0, i1, j0, j1, k0, k1;
242  getPencilDim(pmeGrid, permutation, jblock, kblock,
243  i0, i1, j0, j1, k0, k1);
244 
245  int size = (i1-i0+1)*(j1-j0+1)*(k1-k0+1);
246 
247  isize = (i1-i0+1);
248  jsize = (j1-j0+1);
249  ksize = (k1-k0+1);
250 
251  if (permutation == Perm_X_Y_Z)
252  return size;
253  else
254  return 2*size;
255  }
256  // Returns pointer to array of size dataSizeRequired
257  virtual float* allocateData(const int dataSizeRequired)=0;
258  virtual void plan3D(int *n, int flags)=0;
259  virtual void plan2D(int *n, int howmany, int flags)=0;
260  virtual void plan1DX(int *n, int howmany, int flags)=0;
261  virtual void plan1DY(int *n, int howmany, int flags)=0;
262  virtual void plan1DZ(int *n, int howmany, int flags)=0;
263 };
264 
265 //
266 // PmeKSpaceCompute base class
267 //
269 protected:
271  //int K1_len, K2_start, K2_end, K3_start, K3_end;
272  double *bm1, *bm2, *bm3;
273  double kappa;
274  const int permutation;
275  const int jblock, kblock;
277  int j0, k0;
278  // indicate the grid index in multiple grids
279  unsigned int multipleGridIndex;
280 public:
282  const int jblock, const int kblock, double kappa, unsigned int multipleGridIndex = 0) :
285 
286  bm1 = new double[pmeGrid.K1];
287  bm2 = new double[pmeGrid.K2];
288  bm3 = new double[pmeGrid.K3];
289  // Use compute_b_moduli from PmeKSpace.C
290  extern void compute_b_moduli(double *bm, int K, int order);
294 
295  int i0, i1, j1, k1;
296  getPencilDim(pmeGrid, permutation, jblock, kblock, i0, i1, j0, j1, k0, k1);
297  size1 = i1-i0+1;
298  size2 = j1-j0+1;
299  size3 = k1-k0+1;
300  }
301  virtual ~PmeKSpaceCompute() {
302  delete [] bm1;
303  delete [] bm2;
304  delete [] bm3;
305  }
306  virtual void solve(Lattice &lattice, const bool doEnergy, const bool doVirial, float* data)=0;
307  virtual double getEnergy()=0;
308  virtual void getVirial(double *virial)=0;
309  virtual void setGrid(unsigned int iGrid) {
310  multipleGridIndex = iGrid;
311  }
312 };
313 
314 //
315 // Abstract PmeRealSpaceCompute base class
316 //
318 protected:
319  // Number of patches and atoms
320  // int numPatches;
321  int numAtoms;
322  // Grid definition
324  // Grid (y, z) location
325  int y0, z0;
326  // Grid size in real-space
328  // Grid data
329  int dataSize;
330  float *data;
331  // Pencil position
332  const int jblock, kblock;
333  // Grid index in multiple grids
334  unsigned int multipleGridIndex;
335 public:
336  PmeRealSpaceCompute(PmeGrid pmeGrid, const int jblock, const int kblock, unsigned int grid = 0) :
338  int x0, x1, y1, z1;
339  getPencilDim(pmeGrid, Perm_X_Y_Z, jblock, kblock, x0, x1, y0, y1, z0, z1);
340  xsize = x1-x0+1;
341  ysize = y1-y0+1;
342  zsize = z1-z0+1;
343  // Allocate enough data for storing the complex data
344  // dataSize = 2*(xsize/2+1)*ysize*zsize;
345  // Only allocate enough data for storing the real-space data
346  // Complex data is stored in FFTCompute
348  }
349  virtual ~PmeRealSpaceCompute() {}
350  // Setup patches and atoms in preparation for charge spreading and force gathering
351  // virtual void setPatchesAtoms(const int numPatches, const PatchInfo* patches,
352  // const int numAtoms, const CudaAtom* atoms)=0;
353  // Setup atoms in preparation for charge spreading and force gathering
354  virtual void copyAtoms(const int numAtoms, const CudaAtom* atoms)=0;
355  // Spread charges on grid
356  virtual void spreadCharge(Lattice &lattice)=0;
357  // Gather forces off the grid
358  virtual void gatherForce(Lattice &lattice, CudaForce* force)=0;
359  // // Calculate self energy
360  // virtual double calcSelfEnergy()=0;
361  float* getData() {return data;}
362  int getDataSize() {return dataSize;}
363  void setGrid(unsigned int i) {
364  multipleGridIndex = i;
365  }
366 
367  static inline double calcGridCoord(const double x, const double recip11, const int nfftx) {
368  double w;
369  w = x*recip11 + 2.0;
370  return (double)((double)nfftx*(w - (floor(w + 0.5) - 0.5)));
371  }
372 
373  static inline void calcGridCoord(const double x, const double y, const double z,
374  const double recip11, const double recip22, const double recip33,
375  const int nfftx, const int nffty, const int nfftz,
376  double &frx, double &fry, double &frz) {
377  double w;
378  w = x*recip11 + 2.0;
379  frx = (double)((double)nfftx*(w - (floor(w + 0.5) - 0.5)));
380  w = y*recip22 + 2.0;
381  fry = (double)((double)nffty*(w - (floor(w + 0.5) - 0.5)));
382  w = z*recip33 + 2.0;
383  frz = (double)((double)nfftz*(w - (floor(w + 0.5) - 0.5)));
384  }
385 
386  static inline void calcGridCoord(const float x, const float y, const float z,
387  const float recip11, const float recip22, const float recip33,
388  const int nfftx, const int nffty, const int nfftz,
389  float &frx, float &fry, float &frz) {
390  float w;
391  w = x*recip11 + 2.0f;
392  frx = (float)(nfftx*(w - (floorf(w + 0.5f) - 0.5f)));
393  w = y*recip22 + 2.0f;
394  fry = (float)(nffty*(w - (floorf(w + 0.5f) - 0.5f)));
395  w = z*recip33 + 2.0f;
396  frz = (float)(nfftz*(w - (floorf(w + 0.5f) - 0.5f)));
397  }
398 
399  static inline void calcGridCoord(const float x, const float y, const float z,
400  const int nfftx, const int nffty, const int nfftz, float &frx, float &fry, float &frz) {
401  frx = (float)(nfftx)*x;
402  fry = (float)(nffty)*y;
403  frz = (float)(nfftz)*z;
404  }
405 
406  static inline void calcGridCoord(const double x, const double y, const double z,
407  const int nfftx, const int nffty, const int nfftz, double &frx, double &fry, double &frz) {
408  frx = (double)(nfftx)*x;
409  fry = (double)(nffty)*y;
410  frz = (double)(nfftz)*z;
411  }
412 
413 };
414 
415 // WORKAROUND-HIP: vector types are "using", not struct, and cannot be forward-declared
416 #ifdef NAMD_HIP
417 #include <hip/hip_vector_types.h>
418 #else
419 struct float2;
420 #endif
421 //
422 // Abstract PmeTranspose base class
423 //
425 protected:
427  const int permutation;
428  const int jblock, kblock;
430  int dataSize;
431  int nblock;
432  std::vector<int> pos;
433 public:
434  PmeTranspose(PmeGrid pmeGrid, const int permutation, const int jblock, const int kblock) :
436 
437  int i0, i1, j0, j1, k0, k1;
438  getPencilDim(pmeGrid, permutation, jblock, kblock, i0, i1, j0, j1, k0, k1);
439  isize = (i1-i0+1);
440  jsize = (j1-j0+1);
441  ksize = (k1-k0+1);
442  dataSize = (i1-i0+1)*(j1-j0+1)*(k1-k0+1);
443 
444  switch(permutation) {
445  case Perm_cX_Y_Z:
447  break;
448  case Perm_Y_Z_cX:
450  break;
451  case Perm_Z_cX_Y:
453  break;
454  default:
455  NAMD_bug("PmeTranspose::PmeTranspose, invalid permutation");
456  break;
457  }
458 
459  // Pos marks the beginning of blocks
460  pos.resize(nblock+1);
461 
462  int x1;
463  for (int iblock=0;iblock < nblock;iblock++) {
464  // Get the dimension of the transpose block
465  // We are transposing nblock of these blocks and storing them into a pencil of size
466  // ny * nz * xsize at location ny*nz*x0
467  int x0, y0dummy, y1dummy, z0dummy, z1dummy;
468  getBlockDim(pmeGrid, permutation, iblock, jblock, kblock, x0, x1, y0dummy, y1dummy, z0dummy, z1dummy);
469 
470  pos[iblock] = x0;
471  }
472  // Last position begins at x1+1
473  pos[nblock] = x1+1;
474  }
475  virtual ~PmeTranspose() {}
476  virtual void transposeXYZtoYZX(const float2* data)=0;
477  virtual void transposeXYZtoZXY(const float2* data)=0;
478 };
479 
480 #endif // PMESOLVERUTIL_H
virtual void gatherForce(Lattice &lattice, CudaForce *force)=0
int zBlocks
Definition: PmeBase.h:25
virtual ~FFTCompute()
void compute_b_moduli(double *bm, int K, int order)
Definition: PmeKSpace.C:42
const int permutation
virtual void solve(Lattice &lattice, const bool doEnergy, const bool doVirial, float *data)=0
PmeRealSpaceCompute(PmeGrid pmeGrid, const int jblock, const int kblock, unsigned int grid=0)
int K2
Definition: PmeBase.h:21
int K1
Definition: PmeBase.h:21
virtual void backward()=0
static int getPencilIndexY(const PmeGrid &pmeGrid, const int y)
Definition: PmeSolverUtil.h:23
virtual ~PmeTranspose()
float * getDataSrc()
static void getPencilDim(const PmeGrid &pmeGrid, const int permutation, const int jblock, const int kblock, int &i0, int &i1, int &j0, int &j1, int &k0, int &k1)
Definition: PmeSolverUtil.h:32
static void calcGridCoord(const float x, const float y, const float z, const float recip11, const float recip22, const float recip33, const int nfftx, const int nffty, const int nfftz, float &frx, float &fry, float &frz)
std::vector< int > pos
const unsigned int NUM_GRID_MAX
Definition: PmeSolverUtil.h:9
virtual void transposeXYZtoYZX(const float2 *data)=0
static void calcGridCoord(const float x, const float y, const float z, const int nfftx, const int nffty, const int nfftz, float &frx, float &fry, float &frz)
virtual void transposeXYZtoZXY(const float2 *data)=0
PmeTranspose(PmeGrid pmeGrid, const int permutation, const int jblock, const int kblock)
PmeGrid pmeGrid
static double calcGridCoord(const double x, const double recip11, const int nfftx)
bool dataDstAllocated
int yBlocks
Definition: PmeBase.h:25
#define order
Definition: PmeRealSpace.C:235
int order
Definition: PmeBase.h:23
void NAMD_bug(const char *err_msg)
Definition: common.C:195
const int jblock
virtual void getVirial(double *virial)=0
float * getDataDst()
const int kblock
void setGrid(unsigned int i)
bool dataSrcAllocated
int K3
Definition: PmeBase.h:21
const int permutation
static void getBlockDim(const PmeGrid &pmeGrid, const int permutation, const int iblock, const int jblock, const int kblock, int &i0, int &i1, int &j0, int &j1, int &k0, int &k1)
Definition: PmeSolverUtil.h:89
static void calcGridCoord(const double x, const double y, const double z, const double recip11, const double recip22, const double recip33, const int nfftx, const int nffty, const int nfftz, double &frx, double &fry, double &frz)
unsigned int multipleGridIndex
void init(float *dataSrc_in, int dataSrcSize_in, float *dataDst_in, int dataDstSize_in, int permutation, PmeGrid pmeGrid, int pmePencilType, int jblock, int kblock, int flags)
static void calcGridCoord(const double x, const double y, const double z, const int nfftx, const int nffty, const int nfftz, double &frx, double &fry, double &frz)
virtual void spreadCharge(Lattice &lattice)=0
float * dataDst
virtual void forward()=0
virtual void setGrid(unsigned int iGrid)
int xBlocks
Definition: PmeBase.h:25
PmeKSpaceCompute(PmeGrid pmeGrid, const int permutation, const int jblock, const int kblock, double kappa, unsigned int multipleGridIndex=0)
virtual ~PmeKSpaceCompute()
float * dataSrc
PatchInfo(int x, int y, int z, int w)
Definition: PmeSolverUtil.h:17
virtual ~PmeRealSpaceCompute()
static int getPencilIndexZ(const PmeGrid &pmeGrid, const int z)
Definition: PmeSolverUtil.h:28
virtual double getEnergy()=0
virtual void copyAtoms(const int numAtoms, const CudaAtom *atoms)=0
unsigned int multipleGridIndex