1 #ifndef CUDAPMESOLVERUTILKERNEL_H 2 #define CUDAPMESOLVERUTILKERNEL_H 4 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 7 static constexpr
int kDim = 3;
34 const int nfftx,
const int nffty,
const int nfftz,
35 const int xsize,
const int ysize,
const int zsize,
36 const int xdim,
const int y00,
const int z00,
37 const bool periodicY,
const bool periodicZ,
38 float* data,
const int order, cudaStream_t stream);
42 const float4 *atoms,
const int numAtoms,
43 const int nfftx,
const int nffty,
const int nfftz,
44 const float nfftx_f,
const float nffty_f,
const float nfftz_f,
46 const int xsize,
const int ysize,
const int zsize,
47 const int xdim,
const int y00,
const int z00,
48 const bool periodicY,
const bool periodicZ,
49 float* data,
const int order, cudaStream_t stream);
51 void scalar_sum(
const bool orderXYZ,
const int nfft1,
const int nfft2,
const int nfft3,
52 const int size1,
const int size2,
const int size3,
const double kappa,
53 const float recip1x,
const float recip1y,
const float recip1z,
54 const float recip2x,
const float recip2y,
const float recip2z,
55 const float recip3x,
const float recip3y,
const float recip3z,
57 const float* prefac1,
const float* prefac2,
const float* prefac3,
58 const int k2_00,
const int k3_00,
59 const bool doEnergyVirial,
double* energy,
double* virial, float2* data,
65 const float4 *atoms,
const int numAtoms,
67 const int nfftx,
const int nffty,
const int nfftz,
68 const int xsize,
const int ysize,
const int zsize,
69 const int xdim,
const int y00,
const int z00,
70 const bool periodicY,
const bool periodicZ,
71 const float* data,
const int order, float3* force,
73 const cudaTextureObject_t gridTexObj,
79 const float4 *d_atoms,
98 const int nx,
const int ny,
const int nz,
99 const int xsize_in,
const int ysize_in,
100 const int ysize_out,
const int zsize_out,
101 const float2* data_in, float2* data_out, cudaStream_t stream);
105 const int max_nx,
const int ny,
const int nz,
106 const int xsize_in,
const int ysize_in, cudaStream_t stream);
109 const int nx,
const int ny,
const int nz,
110 const int xsize_in,
const int ysize_in,
111 const int zsize_out,
const int xsize_out,
112 const float2* data_in, float2* data_out, cudaStream_t stream);
116 const int max_nx,
const int ny,
const int nz,
117 const int xsize_in,
const int ysize_in,
118 cudaStream_t stream);
128 double* d_selfEnergy,
129 double* d_selfEnergy_FEP,
130 double& h_selfEnergy,
131 double& h_selfEnergyFEP,
132 const float4* d_atoms,
133 const int* d_partition,
135 const double ewaldcof,
136 const bool alchDecouple,
137 const double lambda1Up,
138 const double lambda2Up,
139 const double lambda1Down,
140 const double lambda2Down,
141 cudaStream_t stream);
144 double* d_selfEnergy,
145 double* d_selfEnergy_TI_1,
146 double* d_selfEnergy_TI_2,
147 double& h_selfEnergy,
148 double& h_selfEnergy_TI_1,
149 double& h_selfEnergy_TI_2,
150 const float4* d_atoms,
151 const int* d_partition,
153 const double ewaldcof,
154 const bool alchDecouple,
155 const double lambda1Up,
156 const double lambda1Down,
157 cudaStream_t stream);
161 const float* factors,
162 const size_t num_arrays,
164 cudaStream_t stream);
168 #endif // CUDAPMESOLVERUTILKERNEL_H void spread_charge_v2(const PatchLevelPmeData patchLevelPmeData, const float4 *atoms, const int numAtoms, const int nfftx, const int nffty, const int nfftz, const float nfftx_f, const float nffty_f, const float nfftz_f, const int order3, const int xsize, const int ysize, const int zsize, const int xdim, const int y00, const int z00, const bool periodicY, const bool periodicZ, float *data, const int order, cudaStream_t stream)
int gatherForceSharedBytes
static constexpr int kNumThreads
void batchTranspose_xyz_yzx(const int numBatches, TransposeBatch< float2 > *batches, const int max_nx, const int ny, const int nz, const int xsize_in, const int ysize_in, cudaStream_t stream)
void transpose_xyz_yzx(const int nx, const int ny, const int nz, const int xsize_in, const int ysize_in, const int ysize_out, const int zsize_out, const float2 *data_in, float2 *data_out, cudaStream_t stream)
bool simulationCompatible
int3 * d_patchGridOffsets
void scalar_sum(const bool orderXYZ, const int nfft1, const int nfft2, const int nfft3, const int size1, const int size2, const int size3, const double kappa, const float recip1x, const float recip1y, const float recip1z, const float recip2x, const float recip2y, const float recip2z, const float recip3x, const float recip3y, const float recip3z, const double volume, const float *prefac1, const float *prefac2, const float *prefac3, const int k2_00, const int k3_00, const bool doEnergyVirial, double *energy, double *virial, float2 *data, cudaStream_t stream)
int spreadChargeSharedBytes
void calcSelfEnergyTIWrapper(double *d_selfEnergy, double *d_selfEnergy_TI_1, double *d_selfEnergy_TI_2, double &h_selfEnergy, double &h_selfEnergy_TI_1, double &h_selfEnergy_TI_2, const float4 *d_atoms, const int *d_partition, const int num_atoms, const double ewaldcof, const bool alchDecouple, const double lambda1Up, const double lambda1Down, cudaStream_t stream)
void transpose_xyz_zxy(const int nx, const int ny, const int nz, const int xsize_in, const int ysize_in, const int zsize_out, const int xsize_out, const float2 *data_in, float2 *data_out, cudaStream_t stream)
void gather_force(const PatchLevelPmeData patchLevelPmeData, const float4 *atoms, const int numAtoms, const int nfftx, const int nffty, const int nfftz, const int xsize, const int ysize, const int zsize, const int xdim, const int y00, const int z00, const bool periodicY, const bool periodicZ, const float *data, const int order, float3 *force, const cudaTextureObject_t gridTexObj, cudaStream_t stream)
void calcSelfEnergyFEPWrapper(double *d_selfEnergy, double *d_selfEnergy_FEP, double &h_selfEnergy, double &h_selfEnergyFEP, const float4 *d_atoms, const int *d_partition, const int num_atoms, const double ewaldcof, const bool alchDecouple, const double lambda1Up, const double lambda2Up, const double lambda1Down, const double lambda2Down, cudaStream_t stream)
static constexpr int kPatchGridDimPad
double compute_selfEnergy(double *d_selfEnergy, const float4 *d_atoms, int natoms, double ewaldcof, cudaStream_t stream)
void spread_charge(const float4 *atoms, const int numAtoms, const int nfftx, const int nffty, const int nfftz, const int xsize, const int ysize, const int zsize, const int xdim, const int y00, const int z00, const bool periodicY, const bool periodicZ, float *data, const int order, cudaStream_t stream)
void scaleAndMergeForceWrapper(float3 *forces, const float *factors, const size_t num_arrays, const int num_atoms, cudaStream_t stream)
const CudaLocalRecord * localRecords
static constexpr int kDim
void batchTranspose_xyz_zxy(const int numBatches, TransposeBatch< float2 > *batches, const int max_nx, const int ny, const int nz, const int xsize_in, const int ysize_in, cudaStream_t stream)
static constexpr int kThetaPad
static constexpr int kPatchGridDim
int3 * h_patchGridOffsets