NAMD
CudaPmeSolverUtilKernel.h
Go to the documentation of this file.
1 #ifndef CUDAPMESOLVERUTILKERNEL_H
2 #define CUDAPMESOLVERUTILKERNEL_H
3 #include "HipDefines.h"
4 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
5 
7  static constexpr int kDim = 3;
8  static constexpr int kNumThreads = 128;
9  static constexpr int kThetaPad = 4;
10  static constexpr int kPatchGridDimPad = 24;
11  static constexpr int kPatchGridDim = 22;
12 
16  bool deviceCompatible = false;
17  bool simulationCompatible = false;
18  bool latticeCompatible = false;
19 
20  bool compatible() const {
22  }
23 
24  int numPatches = 0;
25  const CudaLocalRecord* localRecords = nullptr;
26 
28  int3* d_patchGridOffsets = nullptr;
29  int3* h_patchGridOffsets = nullptr;
30 };
31 
32 
33 void spread_charge(const float4 *atoms, const int numAtoms,
34  const int nfftx, const int nffty, const int nfftz,
35  const int xsize, const int ysize, const int zsize,
36  const int xdim, const int y00, const int z00,
37  const bool periodicY, const bool periodicZ,
38  float* data, const int order, cudaStream_t stream);
39 
40 void spread_charge_v2(
41  const PatchLevelPmeData patchLevelPmeData,
42  const float4 *atoms, const int numAtoms,
43  const int nfftx, const int nffty, const int nfftz,
44  const float nfftx_f, const float nffty_f, const float nfftz_f,
45  const int order3,
46  const int xsize, const int ysize, const int zsize,
47  const int xdim, const int y00, const int z00,
48  const bool periodicY, const bool periodicZ,
49  float* data, const int order, cudaStream_t stream);
50 
51 void scalar_sum(const bool orderXYZ, const int nfft1, const int nfft2, const int nfft3,
52  const int size1, const int size2, const int size3, const double kappa,
53  const float recip1x, const float recip1y, const float recip1z,
54  const float recip2x, const float recip2y, const float recip2z,
55  const float recip3x, const float recip3y, const float recip3z,
56  const double volume,
57  const float* prefac1, const float* prefac2, const float* prefac3,
58  const int k2_00, const int k3_00,
59  const bool doEnergyVirial, double* energy, double* virial, float2* data,
60  // const int cuda_arch,
61  cudaStream_t stream);
62 
63 void gather_force(
64  const PatchLevelPmeData patchLevelPmeData,
65  const float4 *atoms, const int numAtoms,
66  // const float recip11, const float recip22, const float recip33,
67  const int nfftx, const int nffty, const int nfftz,
68  const int xsize, const int ysize, const int zsize,
69  const int xdim, const int y00, const int z00,
70  const bool periodicY, const bool periodicZ,
71  const float* data, const int order, float3* force,
72 #ifdef NAMD_CUDA
73  const cudaTextureObject_t gridTexObj,
74 #endif
75  cudaStream_t stream);
76 
77 double compute_selfEnergy(
78  double *d_selfEnergy,
79  const float4 *d_atoms,
80  int natoms,
81  double ewaldcof,
82  cudaStream_t stream);
83 
84 // void calc_sum_charge_squared(const float4 *atoms, const int numAtoms, double* sum_charge_squared,
85 // cudaStream_t stream);
86 
87 template <typename T>
89  T* data_in;
91  int nx;
92  int zsize_out;
93  int xsize_out;
94  int ysize_out;
95 };
96 
98  const int nx, const int ny, const int nz,
99  const int xsize_in, const int ysize_in,
100  const int ysize_out, const int zsize_out,
101  const float2* data_in, float2* data_out, cudaStream_t stream);
102 
104  const int numBatches, TransposeBatch<float2>* batches,
105  const int max_nx, const int ny, const int nz,
106  const int xsize_in, const int ysize_in, cudaStream_t stream);
107 
108 void transpose_xyz_zxy(
109  const int nx, const int ny, const int nz,
110  const int xsize_in, const int ysize_in,
111  const int zsize_out, const int xsize_out,
112  const float2* data_in, float2* data_out, cudaStream_t stream);
113 
115  const int numBatches, TransposeBatch<float2>* batches,
116  const int max_nx, const int ny, const int nz,
117  const int xsize_in, const int ysize_in,
118  cudaStream_t stream);
119 
120 // void prepareAlchemicalAtomArraysWrapper(
121 // const unsigned grid,
122 // const int num_atoms,
123 // const int* d_partition,
124 // float4* d_atoms,
125 // cudaStream_t stream);
126 
128  double* d_selfEnergy,
129  double* d_selfEnergy_FEP,
130  double& h_selfEnergy,
131  double& h_selfEnergyFEP,
132  const float4* d_atoms,
133  const int* d_partition,
134  const int num_atoms,
135  const double ewaldcof,
136  const bool alchDecouple,
137  const double lambda1Up,
138  const double lambda2Up,
139  const double lambda1Down,
140  const double lambda2Down,
141  cudaStream_t stream);
142 
144  double* d_selfEnergy,
145  double* d_selfEnergy_TI_1,
146  double* d_selfEnergy_TI_2,
147  double& h_selfEnergy,
148  double& h_selfEnergy_TI_1,
149  double& h_selfEnergy_TI_2,
150  const float4* d_atoms,
151  const int* d_partition,
152  const int num_atoms,
153  const double ewaldcof,
154  const bool alchDecouple,
155  const double lambda1Up,
156  const double lambda1Down,
157  cudaStream_t stream);
158 
160  float3* forces,
161  const float* factors,
162  const size_t num_arrays,
163  const int num_atoms,
164  cudaStream_t stream);
165 
166 #endif // NAMD_CUDA
167 
168 #endif // CUDAPMESOLVERUTILKERNEL_H
void spread_charge_v2(const PatchLevelPmeData patchLevelPmeData, const float4 *atoms, const int numAtoms, const int nfftx, const int nffty, const int nfftz, const float nfftx_f, const float nffty_f, const float nfftz_f, const int order3, const int xsize, const int ysize, const int zsize, const int xdim, const int y00, const int z00, const bool periodicY, const bool periodicZ, float *data, const int order, cudaStream_t stream)
static constexpr int kNumThreads
void batchTranspose_xyz_yzx(const int numBatches, TransposeBatch< float2 > *batches, const int max_nx, const int ny, const int nz, const int xsize_in, const int ysize_in, cudaStream_t stream)
void transpose_xyz_yzx(const int nx, const int ny, const int nz, const int xsize_in, const int ysize_in, const int ysize_out, const int zsize_out, const float2 *data_in, float2 *data_out, cudaStream_t stream)
void scalar_sum(const bool orderXYZ, const int nfft1, const int nfft2, const int nfft3, const int size1, const int size2, const int size3, const double kappa, const float recip1x, const float recip1y, const float recip1z, const float recip2x, const float recip2y, const float recip2z, const float recip3x, const float recip3y, const float recip3z, const double volume, const float *prefac1, const float *prefac2, const float *prefac3, const int k2_00, const int k3_00, const bool doEnergyVirial, double *energy, double *virial, float2 *data, cudaStream_t stream)
void calcSelfEnergyTIWrapper(double *d_selfEnergy, double *d_selfEnergy_TI_1, double *d_selfEnergy_TI_2, double &h_selfEnergy, double &h_selfEnergy_TI_1, double &h_selfEnergy_TI_2, const float4 *d_atoms, const int *d_partition, const int num_atoms, const double ewaldcof, const bool alchDecouple, const double lambda1Up, const double lambda1Down, cudaStream_t stream)
#define order
Definition: PmeRealSpace.C:235
void transpose_xyz_zxy(const int nx, const int ny, const int nz, const int xsize_in, const int ysize_in, const int zsize_out, const int xsize_out, const float2 *data_in, float2 *data_out, cudaStream_t stream)
void gather_force(const PatchLevelPmeData patchLevelPmeData, const float4 *atoms, const int numAtoms, const int nfftx, const int nffty, const int nfftz, const int xsize, const int ysize, const int zsize, const int xdim, const int y00, const int z00, const bool periodicY, const bool periodicZ, const float *data, const int order, float3 *force, const cudaTextureObject_t gridTexObj, cudaStream_t stream)
void calcSelfEnergyFEPWrapper(double *d_selfEnergy, double *d_selfEnergy_FEP, double &h_selfEnergy, double &h_selfEnergyFEP, const float4 *d_atoms, const int *d_partition, const int num_atoms, const double ewaldcof, const bool alchDecouple, const double lambda1Up, const double lambda2Up, const double lambda1Down, const double lambda2Down, cudaStream_t stream)
static constexpr int kPatchGridDimPad
double compute_selfEnergy(double *d_selfEnergy, const float4 *d_atoms, int natoms, double ewaldcof, cudaStream_t stream)
void spread_charge(const float4 *atoms, const int numAtoms, const int nfftx, const int nffty, const int nfftz, const int xsize, const int ysize, const int zsize, const int xdim, const int y00, const int z00, const bool periodicY, const bool periodicZ, float *data, const int order, cudaStream_t stream)
void scaleAndMergeForceWrapper(float3 *forces, const float *factors, const size_t num_arrays, const int num_atoms, cudaStream_t stream)
const CudaLocalRecord * localRecords
static constexpr int kDim
void batchTranspose_xyz_zxy(const int numBatches, TransposeBatch< float2 > *batches, const int max_nx, const int ny, const int nz, const int xsize_in, const int ysize_in, cudaStream_t stream)
static constexpr int kThetaPad
static constexpr int kPatchGridDim