NAMD
PmeSolverUtil.h
Go to the documentation of this file.
1 #ifndef PMESOLVERUTIL_H
2 #define PMESOLVERUTIL_H
3 
4 #include "PmeBase.h" // PmeGrid -structure
5 #include "NamdTypes.h" // CudaAtom, float2
6 #include "Lattice.h"
7 
8 //
9 // Stores the patch origin (x, y, z) and number of atoms (w)
10 //
11 struct PatchInfo {
12  int x, y, z, w;
13  PatchInfo() {}
14  PatchInfo(int x, int y, int z, int w) : x(x), y(y), z(z), w(w) {}
15 };
16 
18 
19 // y = absolute grid position [0 ... pmeGrid.K2-1]
20 static inline int getPencilIndexY(const PmeGrid& pmeGrid, const int y) {
21  return (y*pmeGrid.yBlocks + pmeGrid.yBlocks - 1)/pmeGrid.K2;
22 }
23 
24 // z = absolute grid position [0 ... pmeGrid.K3-1]
25 static inline int getPencilIndexZ(const PmeGrid& pmeGrid, const int z) {
26  return (z*pmeGrid.zBlocks + pmeGrid.zBlocks - 1)/pmeGrid.K3;
27 }
28 
29 static void getPencilDim(const PmeGrid& pmeGrid, const int permutation,
30  const int jblock, const int kblock,
31  int& i0, int& i1, int& j0, int& j1, int& k0, int& k1) {
32 
33  int isize, jsize, ksize;
34  int jblocks, kblocks;
35 
36  switch(permutation) {
37  case Perm_X_Y_Z:
38  isize = pmeGrid.K1;
39  jsize = pmeGrid.K2;
40  ksize = pmeGrid.K3;
41  jblocks = pmeGrid.yBlocks;
42  kblocks = pmeGrid.zBlocks;
43  break;
44  case Perm_cX_Y_Z:
45  isize = pmeGrid.K1/2+1;
46  jsize = pmeGrid.K2;
47  ksize = pmeGrid.K3;
48  jblocks = pmeGrid.yBlocks;
49  kblocks = pmeGrid.zBlocks;
50  break;
51  case Perm_Y_Z_cX:
52  isize = pmeGrid.K2;
53  jsize = pmeGrid.K3;
54  ksize = pmeGrid.K1/2+1;
55  jblocks = pmeGrid.zBlocks;
56  kblocks = pmeGrid.xBlocks;
57  break;
58  case Perm_Z_cX_Y:
59  isize = pmeGrid.K3;
60  jsize = pmeGrid.K1/2+1;
61  ksize = pmeGrid.K2;
62  jblocks = pmeGrid.xBlocks;
63  kblocks = pmeGrid.yBlocks;
64  break;
65  default:
66  NAMD_bug("getPencilDim, invalid permutation");
67  break;
68  }
69 
70  if (jblock < 0 || jblock >= jblocks || kblock < 0 || kblock >= kblocks)
71  NAMD_bug("getPencilDim, invalid block indices");
72 
73  i0 = 0;
74  i1 = isize - 1;
75 
76  j0 = jsize*jblock/jblocks;
77  j1 = jsize*(jblock+1)/jblocks - 1;
78 
79  k0 = ksize*kblock/kblocks;
80  k1 = ksize*(kblock+1)/kblocks - 1;
81 }
82 
83 //
84 // Return block dimensions [i0...i1] x [j0...j1] x [k0...k1]
85 //
86 static void getBlockDim(const PmeGrid& pmeGrid, const int permutation,
87  const int iblock, const int jblock, const int kblock,
88  int& i0, int& i1, int& j0, int& j1, int& k0, int& k1) {
89 
90  getPencilDim(pmeGrid, permutation, jblock, kblock, i0, i1, j0, j1, k0, k1);
91 
92  int iblocks;
93 
94  switch(permutation) {
95  case Perm_X_Y_Z:
96  iblocks = pmeGrid.xBlocks;
97  break;
98  case Perm_cX_Y_Z:
99  iblocks = pmeGrid.xBlocks;
100  break;
101  case Perm_Y_Z_cX:
102  iblocks = pmeGrid.yBlocks;
103  break;
104  case Perm_Z_cX_Y:
105  iblocks = pmeGrid.zBlocks;
106  break;
107  default:
108  NAMD_bug("getBlockDim, invalid permutation");
109  break;
110  }
111 
112  if (iblock < 0 || iblock >= iblocks)
113  NAMD_bug("getBlockDim, invalid block index");
114 
115  int isize = i1-i0+1;
116 
117  i0 = isize*iblock/iblocks;
118  i1 = isize*(iblock+1)/iblocks - 1;
119 }
120 
121 //
122 // Abstract FFTCompute base class for out-of-place FFTs
123 // NOTE: Must call init -method to initialize this class
124 //
125 class FFTCompute {
126 public:
128  dataSrc = NULL;
129  dataSrcSize = 0;
130  dataSrcAllocated = false;
131  dataDst = NULL;
132  dataDstSize = 0;
133  dataDstAllocated = false;
134  }
135  void init(
136  float* dataSrc_in, int dataSrcSize_in,
137  float* dataDst_in, int dataDstSize_in,
138  int permutation, PmeGrid pmeGrid,
139  int pmePencilType, int jblock, int kblock, int flags) {
140 
141  if (dataSrc_in != NULL && dataSrc_in == dataDst_in)
142  NAMD_bug("FFTCompute::init, only out-of-place FFTs supported");
143 
144  int permutationDst = permutation;
145  if (permutation == Perm_X_Y_Z) {
146  permutationDst = Perm_cX_Y_Z;
147  }
148 
149  if (dataSrc_in == NULL) {
150  // Sets data and dataSize
151  dataSrcSize = getDataSizeRequired(permutation, pmeGrid, jblock, kblock);
152  dataSrc = allocateData(dataSrcSize);
153  dataSrcAllocated = true;
154  } else {
155  if (dataSrcSize_in < getDataSizeRequired(permutation, pmeGrid, jblock, kblock))
156  NAMD_bug("FFTCompute::init, invalid dataSrcSize_in");
157  dataSrcSize = dataSrcSize_in;
158  dataSrc = dataSrc_in;
159  dataSrcAllocated = false;
160  }
161 
162  if (dataDst_in == NULL) {
163  // Sets data and dataSize
164  dataDstSize = getDataSizeRequired(permutationDst, pmeGrid, jblock, kblock);
165  dataDst = allocateData(dataDstSize);
166  dataDstAllocated = true;
167  } else {
168  if (dataDstSize_in < getDataSizeRequired(permutationDst, pmeGrid, jblock, kblock))
169  NAMD_bug("FFTCompute::init, invalid dataDstSize_in");
170  dataDstSize = dataDstSize_in;
171  dataDst = dataDst_in;
172  dataDstAllocated = false;
173  }
174 
175  // Final sanity check
176  if (dataDst == NULL || dataSrc == NULL ||
177  dataDstSize < getDataSizeRequired(permutationDst, pmeGrid, jblock, kblock) ||
178  dataSrcSize < getDataSizeRequired(permutation, pmeGrid, jblock, kblock))
179  NAMD_bug("FFTCompute::init, error setting up data buffers");
180 
181  // Now "data" is pointer to grid data with at least size getDataSizeRequired(...)
182  if (pmePencilType == 3) {
183  // 3D FFT
184  if (pmeGrid.xBlocks != 1 || pmeGrid.yBlocks != 1 || pmeGrid.zBlocks != 1)
185  NAMD_bug("FFTCompute::init, 3D FFT requires a single pencil");
186  int n[3] = {pmeGrid.K1, pmeGrid.K2, pmeGrid.K3};
187  plan3D(n, flags);
188  } else if (pmePencilType == 1) {
189  int i0, i1, j0, j1, k0, k1;
190  getPencilDim(pmeGrid, permutation, jblock, kblock, i0, i1, j0, j1, k0, k1);
191  int n[1] = {i1-i0+1};
192  int howmany = (j1-j0+1)*(k1-k0+1);
193  if (permutation == Perm_X_Y_Z) {
194  plan1DX(n, howmany, flags);
195  } else if (permutation == Perm_Y_Z_cX) {
196  plan1DY(n, howmany, flags);
197  } else if (permutation == Perm_Z_cX_Y) {
198  plan1DZ(n, howmany, flags);
199  } else {
200  NAMD_bug("FFTCompute::init, invalid permutation");
201  }
202  } else if (pmePencilType == 2) {
203  // 2D FFTs of xy planes
204  int i0, i1, j0, j1, k0, k1;
205  getPencilDim(pmeGrid, permutation, 0, kblock, i0, i1, j0, j1, k0, k1);
206  int n[2] = {pmeGrid.K1, pmeGrid.K2};
207  int howmany = k1-k0+1;
208  plan2D(n, howmany, flags);
209  } else {
210  NAMD_bug("FFTCompute::init, invalid pmePencilType");
211  }
212  }
213  virtual ~FFTCompute() {}
214  virtual void forward()=0;
215  virtual void backward()=0;
216  float* getDataSrc() {return dataSrc;}
217  float* getDataDst() {return dataDst;}
218 protected:
221  // Pointer to data, allocated here only if dataAllocated = true
222  float* dataSrc;
223  float* dataDst;
224  // Size of data
227  // If dataAllocated = true, implementation must deallocate dataPtr
230 private:
231  // Return the size of data in floats that this FFT will require
232  int getDataSizeRequired(int permutation, PmeGrid pmeGrid,
233  int jblock, int kblock) {
234 
235  this->jblock = jblock;
236  this->kblock = kblock;
237 
238  int i0, i1, j0, j1, k0, k1;
239  getPencilDim(pmeGrid, permutation, jblock, kblock,
240  i0, i1, j0, j1, k0, k1);
241 
242  int size = (i1-i0+1)*(j1-j0+1)*(k1-k0+1);
243 
244  isize = (i1-i0+1);
245  jsize = (j1-j0+1);
246  ksize = (k1-k0+1);
247 
248  if (permutation == Perm_X_Y_Z)
249  return size;
250  else
251  return 2*size;
252  }
253  // Returns pointer to array of size dataSizeRequired
254  virtual float* allocateData(const int dataSizeRequired)=0;
255  virtual void plan3D(int *n, int flags)=0;
256  virtual void plan2D(int *n, int howmany, int flags)=0;
257  virtual void plan1DX(int *n, int howmany, int flags)=0;
258  virtual void plan1DY(int *n, int howmany, int flags)=0;
259  virtual void plan1DZ(int *n, int howmany, int flags)=0;
260 };
261 
262 //
263 // PmeKSpaceCompute base class
264 //
266 protected:
268  //int K1_len, K2_start, K2_end, K3_start, K3_end;
269  double *bm1, *bm2, *bm3;
270  double kappa;
271  const int permutation;
272  const int jblock, kblock;
274  int j0, k0;
275 public:
277  const int jblock, const int kblock, double kappa) :
278  pmeGrid(pmeGrid), permutation(permutation),
279  jblock(jblock), kblock(kblock), kappa(kappa) {
280 
281  bm1 = new double[pmeGrid.K1];
282  bm2 = new double[pmeGrid.K2];
283  bm3 = new double[pmeGrid.K3];
284  // Use compute_b_moduli from PmeKSpace.C
285  extern void compute_b_moduli(double *bm, int K, int order);
286  compute_b_moduli(bm1, pmeGrid.K1, pmeGrid.order);
287  compute_b_moduli(bm2, pmeGrid.K2, pmeGrid.order);
288  compute_b_moduli(bm3, pmeGrid.K3, pmeGrid.order);
289 
290  int i0, i1, j1, k1;
291  getPencilDim(pmeGrid, permutation, jblock, kblock, i0, i1, j0, j1, k0, k1);
292  size1 = i1-i0+1;
293  size2 = j1-j0+1;
294  size3 = k1-k0+1;
295  }
296  virtual ~PmeKSpaceCompute() {
297  delete [] bm1;
298  delete [] bm2;
299  delete [] bm3;
300  }
301  virtual void solve(Lattice &lattice, const bool doEnergy, const bool doVirial, float* data)=0;
302  virtual double getEnergy()=0;
303  virtual void getVirial(double *virial)=0;
304 };
305 
306 //
307 // Abstract PmeRealSpaceCompute base class
308 //
310 protected:
311  // Number of patches and atoms
312  // int numPatches;
313  int numAtoms;
314  // Grid definition
316  // Grid (y, z) location
317  int y0, z0;
318  // Grid size in real-space
320  // Grid data
321  int dataSize;
322  float *data;
323  // Pencil position
324  const int jblock, kblock;
325 public:
326  PmeRealSpaceCompute(PmeGrid pmeGrid, const int jblock, const int kblock) :
327  pmeGrid(pmeGrid), jblock(jblock), kblock(kblock), data(NULL) {
328  int x0, x1, y1, z1;
329  getPencilDim(pmeGrid, Perm_X_Y_Z, jblock, kblock, x0, x1, y0, y1, z0, z1);
330  xsize = x1-x0+1;
331  ysize = y1-y0+1;
332  zsize = z1-z0+1;
333  // Allocate enough data for storing the complex data
334  // dataSize = 2*(xsize/2+1)*ysize*zsize;
335  // Only allocate enough data for storing the real-space data
336  // Complex data is stored in FFTCompute
338  }
339  virtual ~PmeRealSpaceCompute() {}
340  // Setup patches and atoms in preparation for charge spreading and force gathering
341  // virtual void setPatchesAtoms(const int numPatches, const PatchInfo* patches,
342  // const int numAtoms, const CudaAtom* atoms)=0;
343  // Setup atoms in preparation for charge spreading and force gathering
344  virtual void copyAtoms(const int numAtoms, const CudaAtom* atoms)=0;
345  // Spread charges on grid
346  virtual void spreadCharge(Lattice &lattice)=0;
347  // Gather forces off the grid
348  virtual void gatherForce(Lattice &lattice, CudaForce* force)=0;
349  // // Calculate self energy
350  // virtual double calcSelfEnergy()=0;
351  float* getData() {return data;}
352  int getDataSize() {return dataSize;}
353 
354  static inline double calcGridCoord(const double x, const double recip11, const int nfftx) {
355  double w;
356  w = x*recip11 + 2.0;
357  return (double)((double)nfftx*(w - (floor(w + 0.5) - 0.5)));
358  }
359 
360  static inline void calcGridCoord(const double x, const double y, const double z,
361  const double recip11, const double recip22, const double recip33,
362  const int nfftx, const int nffty, const int nfftz,
363  double &frx, double &fry, double &frz) {
364  double w;
365  w = x*recip11 + 2.0;
366  frx = (double)((double)nfftx*(w - (floor(w + 0.5) - 0.5)));
367  w = y*recip22 + 2.0;
368  fry = (double)((double)nffty*(w - (floor(w + 0.5) - 0.5)));
369  w = z*recip33 + 2.0;
370  frz = (double)((double)nfftz*(w - (floor(w + 0.5) - 0.5)));
371  }
372 
373  static inline void calcGridCoord(const float x, const float y, const float z,
374  const float recip11, const float recip22, const float recip33,
375  const int nfftx, const int nffty, const int nfftz,
376  float &frx, float &fry, float &frz) {
377  float w;
378  w = x*recip11 + 2.0f;
379  frx = (float)(nfftx*(w - (floorf(w + 0.5f) - 0.5f)));
380  w = y*recip22 + 2.0f;
381  fry = (float)(nffty*(w - (floorf(w + 0.5f) - 0.5f)));
382  w = z*recip33 + 2.0f;
383  frz = (float)(nfftz*(w - (floorf(w + 0.5f) - 0.5f)));
384  }
385 
386  static inline void calcGridCoord(const float x, const float y, const float z,
387  const int nfftx, const int nffty, const int nfftz, float &frx, float &fry, float &frz) {
388  frx = (float)(nfftx)*x;
389  fry = (float)(nffty)*y;
390  frz = (float)(nfftz)*z;
391  }
392 
393  static inline void calcGridCoord(const double x, const double y, const double z,
394  const int nfftx, const int nffty, const int nfftz, double &frx, double &fry, double &frz) {
395  frx = (double)(nfftx)*x;
396  fry = (double)(nffty)*y;
397  frz = (double)(nfftz)*z;
398  }
399 
400 };
401 
402 // WORKAROUND-HIP: vector types are "using", not struct, and cannot be forward-declared
403 #ifdef NAMD_HIP
404 #include <hip/hip_vector_types.h>
405 #else
406 struct float2;
407 #endif
408 //
409 // Abstract PmeTranspose base class
410 //
412 protected:
414  const int permutation;
415  const int jblock, kblock;
417  int dataSize;
418  int nblock;
419  std::vector<int> pos;
420 public:
421  PmeTranspose(PmeGrid pmeGrid, const int permutation, const int jblock, const int kblock) :
422  pmeGrid(pmeGrid), permutation(permutation), jblock(jblock), kblock(kblock) {
423 
424  int i0, i1, j0, j1, k0, k1;
425  getPencilDim(pmeGrid, permutation, jblock, kblock, i0, i1, j0, j1, k0, k1);
426  isize = (i1-i0+1);
427  jsize = (j1-j0+1);
428  ksize = (k1-k0+1);
429  dataSize = (i1-i0+1)*(j1-j0+1)*(k1-k0+1);
430 
431  switch(permutation) {
432  case Perm_cX_Y_Z:
433  nblock = pmeGrid.xBlocks;
434  break;
435  case Perm_Y_Z_cX:
436  nblock = pmeGrid.yBlocks;
437  break;
438  case Perm_Z_cX_Y:
439  nblock = pmeGrid.zBlocks;
440  break;
441  default:
442  NAMD_bug("PmeTranspose::PmeTranspose, invalid permutation");
443  break;
444  }
445 
446  // Pos marks the beginning of blocks
447  pos.resize(nblock+1);
448 
449  int x1;
450  for (int iblock=0;iblock < nblock;iblock++) {
451  // Get the dimension of the transpose block
452  // We are transposing nblock of these blocks and storing them into a pencil of size
453  // ny * nz * xsize at location ny*nz*x0
454  int x0, y0dummy, y1dummy, z0dummy, z1dummy;
455  getBlockDim(pmeGrid, permutation, iblock, jblock, kblock, x0, x1, y0dummy, y1dummy, z0dummy, z1dummy);
456 
457  pos[iblock] = x0;
458  }
459  // Last position begins at x1+1
460  pos[nblock] = x1+1;
461  }
462  virtual ~PmeTranspose() {}
463  virtual void transposeXYZtoYZX(const float2* data)=0;
464  virtual void transposeXYZtoZXY(const float2* data)=0;
465 };
466 
467 #endif // PMESOLVERUTIL_H
virtual void gatherForce(Lattice &lattice, CudaForce *force)=0
int zBlocks
Definition: PmeBase.h:22
virtual ~FFTCompute()
void compute_b_moduli(double *bm, int K, int order)
Definition: PmeKSpace.C:42
const int permutation
virtual void solve(Lattice &lattice, const bool doEnergy, const bool doVirial, float *data)=0
PmeRealSpaceCompute(PmeGrid pmeGrid, const int jblock, const int kblock)
int K2
Definition: PmeBase.h:18
int K1
Definition: PmeBase.h:18
static __thread atom * atoms
virtual void backward()=0
static int getPencilIndexY(const PmeGrid &pmeGrid, const int y)
Definition: PmeSolverUtil.h:20
virtual ~PmeTranspose()
float * getDataSrc()
static void getPencilDim(const PmeGrid &pmeGrid, const int permutation, const int jblock, const int kblock, int &i0, int &i1, int &j0, int &j1, int &k0, int &k1)
Definition: PmeSolverUtil.h:29
static void calcGridCoord(const float x, const float y, const float z, const float recip11, const float recip22, const float recip33, const int nfftx, const int nffty, const int nfftz, float &frx, float &fry, float &frz)
std::vector< int > pos
virtual void transposeXYZtoYZX(const float2 *data)=0
static void calcGridCoord(const float x, const float y, const float z, const int nfftx, const int nffty, const int nfftz, float &frx, float &fry, float &frz)
virtual void transposeXYZtoZXY(const float2 *data)=0
PmeTranspose(PmeGrid pmeGrid, const int permutation, const int jblock, const int kblock)
PmeGrid pmeGrid
static double calcGridCoord(const double x, const double recip11, const int nfftx)
bool dataDstAllocated
int yBlocks
Definition: PmeBase.h:22
#define order
Definition: PmeRealSpace.C:235
int order
Definition: PmeBase.h:20
void NAMD_bug(const char *err_msg)
Definition: common.C:129
PmeKSpaceCompute(PmeGrid pmeGrid, const int permutation, const int jblock, const int kblock, double kappa)
const int jblock
gridSize z
virtual void getVirial(double *virial)=0
float * getDataDst()
const int kblock
bool dataSrcAllocated
int K3
Definition: PmeBase.h:18
const int permutation
static void getBlockDim(const PmeGrid &pmeGrid, const int permutation, const int iblock, const int jblock, const int kblock, int &i0, int &i1, int &j0, int &j1, int &k0, int &k1)
Definition: PmeSolverUtil.h:86
static void calcGridCoord(const double x, const double y, const double z, const double recip11, const double recip22, const double recip33, const int nfftx, const int nffty, const int nfftz, double &frx, double &fry, double &frz)
void init(float *dataSrc_in, int dataSrcSize_in, float *dataDst_in, int dataDstSize_in, int permutation, PmeGrid pmeGrid, int pmePencilType, int jblock, int kblock, int flags)
static void calcGridCoord(const double x, const double y, const double z, const int nfftx, const int nffty, const int nfftz, double &frx, double &fry, double &frz)
virtual void spreadCharge(Lattice &lattice)=0
gridSize y
float * dataDst
virtual void forward()=0
int xBlocks
Definition: PmeBase.h:22
gridSize x
virtual ~PmeKSpaceCompute()
float * dataSrc
PatchInfo(int x, int y, int z, int w)
Definition: PmeSolverUtil.h:14
virtual ~PmeRealSpaceCompute()
static int getPencilIndexZ(const PmeGrid &pmeGrid, const int z)
Definition: PmeSolverUtil.h:25
virtual double getEnergy()=0
virtual void copyAtoms(const int numAtoms, const CudaAtom *atoms)=0