1 #ifndef CUDAPMESOLVERUTIL_H 2 #define CUDAPMESOLVERUTIL_H 11 #include <hipfft/hipfft.h> 23 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 24 void writeComplexToDisk(
const float2 *d_data,
const int size,
const char* filename, cudaStream_t stream);
26 void writeRealToDisk(
const float *d_data,
const int size,
const char* filename, cudaStream_t stream);
28 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 29 #define cufftCheck(stmt) do { \ 30 cufftResult err = stmt; \ 31 if (err != CUFFT_SUCCESS) { \ 33 sprintf(msg, "%s in file %s, function %s\n", #stmt,__FILE__,__FUNCTION__); \ 43 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 44 cufftHandle forwardPlan, backwardPlan;
45 cufftType_t forwardType, backwardType;
52 float* allocateData(
const int dataSizeRequired);
53 void plan3D(
int *n,
int flags);
54 void plan2D(
int *n,
int howmany,
int flags);
55 void plan1DX(
int *n,
int howmany,
int flags);
56 void plan1DY(
int *n,
int howmany,
int flags);
57 void plan1DZ(
int *n,
int howmany,
int flags);
78 float *d_bm1, *d_bm2, *d_bm3;
84 EnergyVirial* d_energyVirial;
85 EnergyVirial* h_energyVirial;
86 cudaEvent_t copyEnergyVirialEvent;
90 static void energyAndVirialCheck(
void *arg,
double walltime);
96 int deviceID, cudaStream_t stream,
unsigned int iGrid = 0);
98 void solve(
Lattice &lattice,
const bool doEnergy,
const bool doVirial,
float* data);
115 bool gridTexObjActive;
116 cudaTextureObject_t gridTexObj;
125 void setupGridData(
float*
data,
int data_len);
127 size_t d_atomsCapacity;
133 size_t d_forceCapacity;
138 cudaEvent_t gatherForceEvent;
143 static void cuda_gatherforce_check(
void *arg,
double walltime);
146 int deviceID, cudaStream_t stream);
163 #ifndef P2P_ENABLE_3D 170 std::vector<float2*> dataPtrsYZX;
171 std::vector<float2*> dataPtrsZXY;
179 void copyDataToPeerDevice(
const int iblock,
180 const int iblock_out,
const int jblock_out,
const int kblock_out,
181 int deviceID_out,
int permutation_out, float2* data_out);
184 const int jblock,
const int kblock,
int deviceID, cudaStream_t stream);
186 void setDataPtrsYZX(std::vector<float2*>& dataPtrsNew, float2* data);
187 void setDataPtrsZXY(std::vector<float2*>& dataPtrsNew, float2* data);
194 #ifndef P2P_ENABLE_3D 223 #ifndef USE_TABLE_ARRAYS 236 #if defined(NAMD_CUDA) || defined(NAMD_HIP) //to enable when hipfft full support is ready 338 double3* patchMin, double3* patchMax, double3* awayDists);
341 void calcSelfEnergyAlch(
int step);
342 void scaleAndComputeFEPEnergyVirials(
const EnergyVirial* energyVirials,
int step,
double& energy,
double& energy_F,
double (&virial)[9]);
343 void scaleAndComputeTIEnergyVirials(
const EnergyVirial* energyVirials,
int step,
double& energy,
double& energy_TI_1,
double& energy_TI_2,
double (&virial)[9]);
344 void scaleAndMergeForce(
int step);
349 #endif // CUDAPMESOLVERUTIL_H void finishReduction(bool doEnergyVirial)
CudaPmeOneDevice(PmeGrid pmeGrid_, int deviceID_, int deviceIndex_)
cufftHandle * backwardPlans
void energyAndVirialSetCallback(CudaPmePencilXYZ *pencilPtr)
bool force_scaling_alch_first_time
CudaPmeTranspose(PmeGrid pmeGrid, const int permutation, const int jblock, const int kblock, int deviceID, cudaStream_t stream)
double * d_selfEnergy_FEP
~CudaPmeRealSpaceCompute()
EnergyVirial * d_energyVirials
void checkPatchLevelLatticeCompatibilityAndComputeOffsets(const Lattice &lattice, const int numPatches, const CudaLocalRecord *localRecords, double3 *patchMin, double3 *patchMax, double3 *awayDists)
void checkPatchLevelSimParamCompatibility(const int order, const bool periodicY, const bool periodicZ)
void spreadCharge(Lattice &lattice)
void copyAtoms(const int numAtoms, const CudaAtom *atoms)
PatchLevelPmeData patchLevelPmeData
CudaPmeRealSpaceCompute(PmeGrid pmeGrid, const int jblock, const int kblock, int deviceID, cudaStream_t stream)
void copyDataToPeerDeviceZXY(const int iblock, int deviceID_out, int permutation_out, float2 *data_out)
void copyDataDeviceToDevice(const int iblock, float2 *data_out)
CudaFFTCompute(int deviceID, cudaStream_t stream)
int computeSharedMemoryPatchLevelSpreadCharge(const int numThreads, const int3 patchGridDim, const int order)
void copyDataDeviceToHost(const int iblock, float2 *h_data, const int h_dataSize)
bool self_energy_alch_first_time
void getVirial(double *virial)
cudaTextureObject_t * gridTexObjArrays
double * d_selfEnergy_TI_2
void writeHostComplexToDisk(const float2 *h_data, const int size, const char *filename)
void gatherForce(Lattice &lattice, CudaForce *force)
void writeComplexToDisk(const float2 *d_data, const int size, const char *filename, cudaStream_t stream)
void waitStreamSynchronize()
void solve(Lattice &lattice, const bool doEnergy, const bool doVirial, float *data)
void gatherForceSetCallback(ComputePmeCUDADevice *devicePtr_in)
void setDataPtrsYZX(std::vector< float2 *> &dataPtrsNew, float2 *data)
void transposeXYZtoYZX(const float2 *data)
void setDataPtrsZXY(std::vector< float2 *> &dataPtrsNew, float2 *data)
CudaPmeKSpaceCompute(PmeGrid pmeGrid, const int permutation, const int jblock, const int kblock, double kappa, int deviceID, cudaStream_t stream, unsigned int iGrid=0)
void writeRealToDisk(const float *d_data, const int size, const char *filename, cudaStream_t stream)
cufftHandle * forwardPlans
void copyDataHostToDevice(const int iblock, float2 *data_in, float2 *data_out)
void transposeXYZtoZXY(const float2 *data)
void waitGatherForceDone()
void copyDataToPeerDeviceYZX(const int iblock, int deviceID_out, int permutation_out, float2 *data_out)
void waitEnergyAndVirial()
int computeSharedMemoryPatchLevelGatherForce(const int numThreads, const int3 patchGridDim, const int order)
float2 * getBuffer(const int iblock)
int getShiftedGrid(const double x, const int grid)
double * d_selfEnergy_TI_1
EnergyVirial * h_energyVirials
float * d_scaling_factors
void checkPatchLevelDeviceCompatibility()
void compute(const Lattice &lattice, int doEnergyVirial, int step)