PmeSolverUtil.h

Go to the documentation of this file.
00001 #ifndef PMESOLVERUTIL_H
00002 #define PMESOLVERUTIL_H
00003 
00004 #include "PmeBase.h"   // PmeGrid -structure
00005 #include "NamdTypes.h" // CudaAtom, float2
00006 #include "Lattice.h"
00007 
00008 //
00009 // Stores the patch origin (x, y, z) and number of atoms (w)
00010 //
00011 struct PatchInfo {
00012   int x, y, z, w;
00013   PatchInfo() {}
00014   PatchInfo(int x, int y, int z, int w) : x(x), y(y), z(z), w(w) {}
00015 };
00016 
00017 enum {Perm_X_Y_Z, Perm_cX_Y_Z, Perm_Y_Z_cX, Perm_Z_cX_Y};
00018 
00019 // y = absolute grid position [0 ... pmeGrid.K2-1]
00020 static inline int getPencilIndexY(const PmeGrid& pmeGrid, const int y) {
00021   return (y*pmeGrid.yBlocks + pmeGrid.yBlocks - 1)/pmeGrid.K2;
00022 }
00023 
00024 // z = absolute grid position [0 ... pmeGrid.K3-1]
00025 static inline int getPencilIndexZ(const PmeGrid& pmeGrid, const int z) {
00026   return (z*pmeGrid.zBlocks + pmeGrid.zBlocks - 1)/pmeGrid.K3;
00027 }
00028 
00029 static void getPencilDim(const PmeGrid& pmeGrid, const int permutation,
00030   const int jblock, const int kblock,
00031   int& i0, int& i1, int& j0, int& j1, int& k0, int& k1) {
00032 
00033   int isize, jsize, ksize;
00034   int jblocks, kblocks;
00035 
00036   switch(permutation) {
00037     case Perm_X_Y_Z:
00038     isize = pmeGrid.K1;
00039     jsize = pmeGrid.K2;
00040     ksize = pmeGrid.K3;
00041     jblocks = pmeGrid.yBlocks;
00042     kblocks = pmeGrid.zBlocks;
00043     break;
00044     case Perm_cX_Y_Z:
00045     isize = pmeGrid.K1/2+1;
00046     jsize = pmeGrid.K2;
00047     ksize = pmeGrid.K3;
00048     jblocks = pmeGrid.yBlocks;
00049     kblocks = pmeGrid.zBlocks;
00050     break;
00051     case Perm_Y_Z_cX:
00052     isize = pmeGrid.K2;
00053     jsize = pmeGrid.K3;
00054     ksize = pmeGrid.K1/2+1;
00055     jblocks = pmeGrid.zBlocks;
00056     kblocks = pmeGrid.xBlocks;
00057     break;
00058     case Perm_Z_cX_Y:
00059     isize = pmeGrid.K3;
00060     jsize = pmeGrid.K1/2+1;
00061     ksize = pmeGrid.K2;
00062     jblocks = pmeGrid.xBlocks;
00063     kblocks = pmeGrid.yBlocks;
00064     break;
00065     default:
00066     NAMD_bug("getPencilDim, invalid permutation");
00067     break;
00068   }
00069 
00070   if (jblock < 0 || jblock >= jblocks || kblock < 0 || kblock >= kblocks)
00071     NAMD_bug("getPencilDim, invalid block indices");
00072 
00073   i0 = 0;
00074   i1 = isize - 1;
00075 
00076   j0 = jsize*jblock/jblocks;
00077   j1 = jsize*(jblock+1)/jblocks - 1;
00078 
00079   k0 = ksize*kblock/kblocks;
00080   k1 = ksize*(kblock+1)/kblocks - 1;
00081 }
00082 
00083 //
00084 // Return block dimensions [i0...i1] x [j0...j1] x [k0...k1]
00085 //
00086 static void getBlockDim(const PmeGrid& pmeGrid, const int permutation,
00087   const int iblock, const int jblock, const int kblock,
00088   int& i0, int& i1, int& j0, int& j1, int& k0, int& k1) {
00089 
00090   getPencilDim(pmeGrid, permutation, jblock, kblock, i0, i1, j0, j1, k0, k1);
00091 
00092   int iblocks;
00093 
00094   switch(permutation) {
00095     case Perm_X_Y_Z:
00096     iblocks = pmeGrid.xBlocks;
00097     break;
00098     case Perm_cX_Y_Z:
00099     iblocks = pmeGrid.xBlocks;
00100     break;
00101     case Perm_Y_Z_cX:
00102     iblocks = pmeGrid.yBlocks;
00103     break;
00104     case Perm_Z_cX_Y:
00105     iblocks = pmeGrid.zBlocks;
00106     break;
00107     default:
00108     NAMD_bug("getBlockDim, invalid permutation");
00109     break;
00110   }
00111 
00112   if (iblock < 0 || iblock >= iblocks)
00113     NAMD_bug("getBlockDim, invalid block index");
00114 
00115   int isize = i1-i0+1;
00116 
00117   i0 = isize*iblock/iblocks;
00118   i1 = isize*(iblock+1)/iblocks - 1;
00119 }
00120 
00121 //
00122 // Abstract FFTCompute base class for out-of-place FFTs
00123 // NOTE: Must call init -method to initialize this class
00124 //
00125 class FFTCompute {
00126 public:
00127   FFTCompute() {
00128     dataSrc = NULL;
00129     dataSrcSize = 0;
00130     dataSrcAllocated = false;
00131     dataDst = NULL;
00132     dataDstSize = 0;
00133     dataDstAllocated = false;
00134   }
00135   void init(
00136     float* dataSrc_in, int dataSrcSize_in,
00137     float* dataDst_in, int dataDstSize_in,
00138     int permutation, PmeGrid pmeGrid, 
00139     int pmePencilType, int jblock, int kblock, int flags) {
00140 
00141     if (dataSrc_in != NULL && dataSrc_in == dataDst_in)
00142       NAMD_bug("FFTCompute::init, only out-of-place FFTs supported");
00143 
00144     int permutationDst = permutation;
00145     if (permutation == Perm_X_Y_Z) {
00146       permutationDst = Perm_cX_Y_Z;
00147     }
00148 
00149     if (dataSrc_in == NULL) {
00150       // Sets data and dataSize
00151       dataSrcSize = getDataSizeRequired(permutation, pmeGrid, jblock, kblock);
00152       dataSrc = allocateData(dataSrcSize);
00153       dataSrcAllocated = true;
00154     } else {
00155       if (dataSrcSize_in < getDataSizeRequired(permutation, pmeGrid, jblock, kblock))
00156         NAMD_bug("FFTCompute::init, invalid dataSrcSize_in");
00157       dataSrcSize = dataSrcSize_in;
00158       dataSrc = dataSrc_in;
00159       dataSrcAllocated = false;
00160     }
00161 
00162     if (dataDst_in == NULL) {
00163       // Sets data and dataSize
00164       dataDstSize = getDataSizeRequired(permutationDst, pmeGrid, jblock, kblock);
00165       dataDst = allocateData(dataDstSize);
00166       dataDstAllocated = true;
00167     } else {
00168       if (dataDstSize_in < getDataSizeRequired(permutationDst, pmeGrid, jblock, kblock))
00169         NAMD_bug("FFTCompute::init, invalid dataDstSize_in");
00170       dataDstSize = dataDstSize_in;
00171       dataDst = dataDst_in;
00172       dataDstAllocated = false;
00173     }
00174 
00175     // Final sanity check
00176     if (dataDst == NULL || dataSrc == NULL ||
00177       dataDstSize < getDataSizeRequired(permutationDst, pmeGrid, jblock, kblock) ||
00178       dataSrcSize < getDataSizeRequired(permutation, pmeGrid, jblock, kblock))
00179       NAMD_bug("FFTCompute::init, error setting up data buffers");
00180 
00181     // Now "data" is pointer to grid data with at least size getDataSizeRequired(...)
00182     if (pmePencilType == 3) {
00183       // 3D FFT
00184       if (pmeGrid.xBlocks != 1 || pmeGrid.yBlocks != 1 || pmeGrid.zBlocks != 1)
00185         NAMD_bug("FFTCompute::init, 3D FFT requires a single pencil");
00186       int n[3] = {pmeGrid.K1, pmeGrid.K2, pmeGrid.K3};
00187       plan3D(n, flags);
00188     } else if (pmePencilType == 1) {
00189       int i0, i1, j0, j1, k0, k1;
00190       getPencilDim(pmeGrid, permutation, jblock, kblock, i0, i1, j0, j1, k0, k1);
00191       int n[1] = {i1-i0+1};
00192       int howmany = (j1-j0+1)*(k1-k0+1);
00193       if (permutation == Perm_X_Y_Z) {
00194         plan1DX(n, howmany, flags);
00195       } else if (permutation == Perm_Y_Z_cX) {
00196         plan1DY(n, howmany, flags);
00197       } else if (permutation == Perm_Z_cX_Y) {
00198         plan1DZ(n, howmany, flags);
00199       } else {
00200         NAMD_bug("FFTCompute::init, invalid permutation");
00201       }
00202     } else if (pmePencilType == 2) {
00203       // 2D FFTs of xy planes
00204       int i0, i1, j0, j1, k0, k1;
00205       getPencilDim(pmeGrid, permutation, 0, kblock, i0, i1, j0, j1, k0, k1);
00206       int n[2] = {pmeGrid.K1, pmeGrid.K2};
00207       int howmany = k1-k0+1;
00208       plan2D(n, howmany, flags);
00209     } else {
00210       NAMD_bug("FFTCompute::init, invalid pmePencilType");
00211     }       
00212   }
00213   virtual ~FFTCompute() {}
00214   virtual void forward()=0;
00215   virtual void backward()=0;
00216   float* getDataSrc() {return dataSrc;}
00217   float* getDataDst() {return dataDst;}
00218 protected:
00219   int jblock, kblock;
00220   int isize, jsize, ksize;
00221   // Pointer to data, allocated here only if dataAllocated = true
00222   float* dataSrc;
00223   float* dataDst;
00224   // Size of data
00225   int dataSrcSize;
00226   int dataDstSize;
00227   // If dataAllocated = true, implementation must deallocate dataPtr
00228   bool dataSrcAllocated;
00229   bool dataDstAllocated;
00230 private:
00231   // Return the size of data in floats that this FFT will require
00232   int getDataSizeRequired(int permutation, PmeGrid pmeGrid, 
00233     int jblock, int kblock) {
00234 
00235     this->jblock = jblock;
00236     this->kblock = kblock;
00237 
00238     int i0, i1, j0, j1, k0, k1;
00239     getPencilDim(pmeGrid, permutation, jblock, kblock,
00240       i0, i1, j0, j1, k0, k1);
00241     
00242     int size = (i1-i0+1)*(j1-j0+1)*(k1-k0+1);
00243 
00244     isize = (i1-i0+1);
00245     jsize = (j1-j0+1);
00246     ksize = (k1-k0+1);
00247 
00248     if (permutation == Perm_X_Y_Z)
00249       return size;
00250     else
00251       return 2*size;
00252   }
00253   // Returns pointer to array of size dataSizeRequired
00254   virtual float* allocateData(const int dataSizeRequired)=0;
00255   virtual void plan3D(int *n, int flags)=0;
00256   virtual void plan2D(int *n, int howmany, int flags)=0;
00257   virtual void plan1DX(int *n, int howmany, int flags)=0;
00258   virtual void plan1DY(int *n, int howmany, int flags)=0;
00259   virtual void plan1DZ(int *n, int howmany, int flags)=0;
00260 };
00261 
00262 //
00263 // PmeKSpaceCompute base class
00264 //
00265 class PmeKSpaceCompute {
00266 protected:
00267   PmeGrid pmeGrid;
00268   //int K1_len, K2_start, K2_end, K3_start, K3_end;
00269   double *bm1, *bm2, *bm3;
00270   double kappa;
00271   const int permutation;
00272   const int jblock, kblock;
00273   int size1, size2, size3;
00274   int j0, k0;
00275 public:
00276   PmeKSpaceCompute(PmeGrid pmeGrid, const int permutation,
00277     const int jblock, const int kblock, double kappa) : 
00278     pmeGrid(pmeGrid), permutation(permutation),
00279     jblock(jblock), kblock(kblock), kappa(kappa) {
00280 
00281     bm1 = new double[pmeGrid.K1];
00282     bm2 = new double[pmeGrid.K2];
00283     bm3 = new double[pmeGrid.K3];
00284     // Use compute_b_moduli from PmeKSpace.C
00285     extern void compute_b_moduli(double *bm, int K, int order);
00286     compute_b_moduli(bm1, pmeGrid.K1, pmeGrid.order);
00287     compute_b_moduli(bm2, pmeGrid.K2, pmeGrid.order);
00288     compute_b_moduli(bm3, pmeGrid.K3, pmeGrid.order);
00289 
00290     int i0, i1, j1, k1;
00291     getPencilDim(pmeGrid, permutation, jblock, kblock, i0, i1, j0, j1, k0, k1);
00292     size1 = i1-i0+1;
00293     size2 = j1-j0+1;
00294     size3 = k1-k0+1;
00295   }
00296   virtual ~PmeKSpaceCompute() {
00297     delete [] bm1;
00298     delete [] bm2;
00299     delete [] bm3;
00300   }
00301   virtual void solve(Lattice &lattice, const bool doEnergy, const bool doVirial, float* data)=0;
00302   virtual double getEnergy()=0;
00303   virtual void getVirial(double *virial)=0;
00304 };
00305 
00306 //
00307 // Abstract PmeRealSpaceCompute base class
00308 //
00309 class PmeRealSpaceCompute {
00310 protected:
00311   // Number of patches and atoms
00312   // int numPatches;
00313   int numAtoms;
00314   // Grid definition
00315   PmeGrid pmeGrid;
00316   // Grid (y, z) location
00317   int y0, z0;
00318   // Grid size in real-space
00319   int xsize, ysize, zsize;
00320   // Grid data
00321   int dataSize;
00322   float *data;
00323   // Pencil position 
00324   const int jblock, kblock;
00325 public:
00326   PmeRealSpaceCompute(PmeGrid pmeGrid, const int jblock, const int kblock) : 
00327     pmeGrid(pmeGrid), jblock(jblock), kblock(kblock), data(NULL) {
00328       int x0, x1, y1, z1;
00329       getPencilDim(pmeGrid, Perm_X_Y_Z, jblock, kblock, x0, x1, y0, y1, z0, z1);
00330       xsize = x1-x0+1;
00331       ysize = y1-y0+1;
00332       zsize = z1-z0+1;
00333       // Allocate enough data for storing the complex data
00334       // dataSize = 2*(xsize/2+1)*ysize*zsize;
00335       // Only allocate enough data for storing the real-space data
00336       // Complex data is stored in FFTCompute
00337       dataSize = xsize*ysize*zsize;
00338     }
00339   virtual ~PmeRealSpaceCompute() {}
00340   // Setup patches and atoms in preparation for charge spreading and force gathering
00341   // virtual void setPatchesAtoms(const int numPatches, const PatchInfo* patches,
00342   //  const int numAtoms, const CudaAtom* atoms)=0;
00343   // Setup atoms in preparation for charge spreading and force gathering
00344   virtual void copyAtoms(const int numAtoms, const CudaAtom* atoms)=0;
00345   // Spread charges on grid
00346   virtual void spreadCharge(Lattice &lattice)=0;
00347   // Gather forces off the grid
00348   virtual void gatherForce(Lattice &lattice, CudaForce* force)=0;
00349   // // Calculate self energy
00350   // virtual double calcSelfEnergy()=0;
00351   float* getData() {return data;}
00352   int getDataSize() {return dataSize;}
00353 
00354   static inline double calcGridCoord(const double x, const double recip11, const int nfftx) {
00355     double w;
00356     w = x*recip11 + 2.0;
00357     return (double)((double)nfftx*(w - (floor(w + 0.5) - 0.5)));
00358   }
00359 
00360   static inline void calcGridCoord(const double x, const double y, const double z,
00361     const double recip11, const double recip22, const double recip33,
00362     const int nfftx, const int nffty, const int nfftz,
00363     double &frx, double &fry, double &frz) {
00364     double w;
00365     w = x*recip11 + 2.0;
00366     frx = (double)((double)nfftx*(w - (floor(w + 0.5) - 0.5)));
00367     w = y*recip22 + 2.0;
00368     fry = (double)((double)nffty*(w - (floor(w + 0.5) - 0.5)));
00369     w = z*recip33 + 2.0;
00370     frz = (double)((double)nfftz*(w - (floor(w + 0.5) - 0.5)));
00371   }
00372 
00373   static inline void calcGridCoord(const float x, const float y, const float z,
00374     const float recip11, const float recip22, const float recip33,
00375     const int nfftx, const int nffty, const int nfftz,
00376     float &frx, float &fry, float &frz) {
00377     float w;
00378     w = x*recip11 + 2.0f;
00379     frx = (float)(nfftx*(w - (floorf(w + 0.5f) - 0.5f)));
00380     w = y*recip22 + 2.0f;
00381     fry = (float)(nffty*(w - (floorf(w + 0.5f) - 0.5f)));
00382     w = z*recip33 + 2.0f;
00383     frz = (float)(nfftz*(w - (floorf(w + 0.5f) - 0.5f)));
00384   }
00385 
00386   static inline void calcGridCoord(const float x, const float y, const float z,
00387     const int nfftx, const int nffty, const int nfftz, float &frx, float &fry, float &frz) {
00388     frx = (float)(nfftx)*x;
00389     fry = (float)(nffty)*y;
00390     frz = (float)(nfftz)*z;
00391   }
00392 
00393   static inline void calcGridCoord(const double x, const double y, const double z,
00394     const int nfftx, const int nffty, const int nfftz, double &frx, double &fry, double &frz) {
00395     frx = (double)(nfftx)*x;
00396     fry = (double)(nffty)*y;
00397     frz = (double)(nfftz)*z;
00398   }
00399 
00400 };
00401 
00402 struct float2;
00403 //
00404 // Abstract PmeTranspose base class
00405 //
00406 class PmeTranspose {
00407 protected:
00408   PmeGrid pmeGrid;
00409   const int permutation;
00410   const int jblock, kblock;
00411   int isize, jsize, ksize;
00412   int dataSize;
00413   int nblock;
00414   std::vector<int> pos;
00415 public:
00416   PmeTranspose(PmeGrid pmeGrid, const int permutation, const int jblock, const int kblock) : 
00417     pmeGrid(pmeGrid), permutation(permutation), jblock(jblock), kblock(kblock) {
00418 
00419     int i0, i1, j0, j1, k0, k1;
00420     getPencilDim(pmeGrid, permutation, jblock, kblock, i0, i1, j0, j1, k0, k1);
00421     isize = (i1-i0+1);
00422     jsize = (j1-j0+1);
00423     ksize = (k1-k0+1);
00424     dataSize = (i1-i0+1)*(j1-j0+1)*(k1-k0+1);
00425 
00426     switch(permutation) {
00427       case Perm_cX_Y_Z:
00428       nblock = pmeGrid.xBlocks;
00429       break;
00430       case Perm_Y_Z_cX:
00431       nblock = pmeGrid.yBlocks;
00432       break;
00433       case Perm_Z_cX_Y:
00434       nblock = pmeGrid.zBlocks;
00435       break;
00436       default:
00437       NAMD_bug("PmeTranspose::PmeTranspose, invalid permutation");
00438       break;
00439     }
00440 
00441     // Pos marks the beginning of blocks
00442     pos.resize(nblock+1);
00443 
00444     int x1;
00445     for (int iblock=0;iblock < nblock;iblock++) {
00446       // Get the dimension of the transpose block
00447       // We are transposing nblock of these blocks and storing them into a pencil of size
00448       // ny * nz * xsize at location ny*nz*x0
00449       int x0, y0dummy, y1dummy, z0dummy, z1dummy;
00450       getBlockDim(pmeGrid, permutation, iblock, jblock, kblock, x0, x1, y0dummy, y1dummy, z0dummy, z1dummy);
00451 
00452       pos[iblock] = x0;
00453     }
00454     // Last position begins at x1+1
00455     pos[nblock] = x1+1;
00456   }
00457   virtual ~PmeTranspose() {}
00458   virtual void transposeXYZtoYZX(const float2* data)=0;
00459   virtual void transposeXYZtoZXY(const float2* data)=0;
00460 };
00461 
00462 #endif // PMESOLVERUTIL_H

Generated on Thu Sep 21 01:17:14 2017 for NAMD by  doxygen 1.4.7