1 #ifndef CUDAPMESOLVERUTIL_H
2 #define CUDAPMESOLVERUTIL_H
18 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
24 #define cufftCheck(stmt) do { \
25 cufftResult err = stmt; \
26 if (err != CUFFT_SUCCESS) { \
28 sprintf(msg, "%s in file %s, function %s\n", #stmt,__FILE__,__FUNCTION__); \
33 #define rocfftCheck(stmt) do { \
34 rocfft_status err = stmt; \
35 if (err != rocfft_status_success) { \
37 sprintf(msg, "%s in file %s, function %s\n", #stmt,__FILE__,__FUNCTION__); \
49 cufftHandle forwardPlan, backwardPlan;
50 cufftType_t forwardType, backwardType;
52 rocfft_plan forwardPlan, backwardPlan;
53 rocfft_execution_info forwardPlanInfo, backwardPlanInfo;
54 void * forwardWorkBuffer;
55 void * backwardWorkBuffer;
57 rocfft_transform_type forwardTransformType, rocfft_transform_type backwardTransformType,
58 size_t dimensions,
const size_t* lengths,
size_t howmany);
65 float* allocateData(
const int dataSizeRequired);
66 void plan3D(
int *n,
int flags);
67 void plan2D(
int *n,
int howmany,
int flags);
68 void plan1DX(
int *n,
int howmany,
int flags);
69 void plan1DY(
int *n,
int howmany,
int flags);
70 void plan1DZ(
int *n,
int howmany,
int flags);
74 CudaFFTCompute(
int deviceID, cudaStream_t stream) : deviceID(deviceID), stream(stream) {}
91 float *d_bm1, *d_bm2, *d_bm3;
97 EnergyVirial* d_energyVirial;
98 EnergyVirial* h_energyVirial;
99 cudaEvent_t copyEnergyVirialEvent;
103 static void energyAndVirialCheck(
void *arg,
double walltime);
109 int deviceID, cudaStream_t stream);
111 void solve(
Lattice &lattice,
const bool doEnergy,
const bool doVirial,
float* data);
128 bool gridTexObjActive;
129 cudaTextureObject_t gridTexObj;
138 void setupGridData(
float*
data,
int data_len);
151 cudaEvent_t gatherForceEvent;
156 static void cuda_gatherforce_check(
void *arg,
double walltime);
159 int deviceID, cudaStream_t stream);
176 #ifndef P2P_ENABLE_3D
183 std::vector<float2*> dataPtrsYZX;
184 std::vector<float2*> dataPtrsZXY;
192 void copyDataToPeerDevice(
const int iblock,
193 const int iblock_out,
const int jblock_out,
const int kblock_out,
194 int deviceID_out,
int permutation_out,
float2* data_out);
197 const int jblock,
const int kblock,
int deviceID, cudaStream_t stream);
207 #ifndef P2P_ENABLE_3D
215 #endif // CUDAPMESOLVERUTIL_H
void energyAndVirialSetCallback(CudaPmePencilXYZ *pencilPtr)
void setDataPtrsYZX(std::vector< float2 * > &dataPtrsNew, float2 *data)
CudaPmeTranspose(PmeGrid pmeGrid, const int permutation, const int jblock, const int kblock, int deviceID, cudaStream_t stream)
~CudaPmeRealSpaceCompute()
static __thread atom * atoms
void spreadCharge(Lattice &lattice)
void copyAtoms(const int numAtoms, const CudaAtom *atoms)
CudaPmeRealSpaceCompute(PmeGrid pmeGrid, const int jblock, const int kblock, int deviceID, cudaStream_t stream)
void copyDataToPeerDeviceZXY(const int iblock, int deviceID_out, int permutation_out, float2 *data_out)
__thread cudaStream_t stream
void copyDataDeviceToDevice(const int iblock, float2 *data_out)
CudaFFTCompute(int deviceID, cudaStream_t stream)
void copyDataDeviceToHost(const int iblock, float2 *h_data, const int h_dataSize)
void writeHostComplexToDisk(const float2 *h_data, const int size, const char *filename)
void getVirial(double *virial)
CudaPmeKSpaceCompute(PmeGrid pmeGrid, const int permutation, const int jblock, const int kblock, double kappa, int deviceID, cudaStream_t stream)
void writeComplexToDisk(const float2 *d_data, const int size, const char *filename, cudaStream_t stream)
void setDataPtrsZXY(std::vector< float2 * > &dataPtrsNew, float2 *data)
void gatherForce(Lattice &lattice, CudaForce *force)
void waitStreamSynchronize()
void solve(Lattice &lattice, const bool doEnergy, const bool doVirial, float *data)
void gatherForceSetCallback(ComputePmeCUDADevice *devicePtr_in)
void transposeXYZtoYZX(const float2 *data)
void copyDataHostToDevice(const int iblock, float2 *data_in, float2 *data_out)
void transposeXYZtoZXY(const float2 *data)
void waitGatherForceDone()
void copyDataToPeerDeviceYZX(const int iblock, int deviceID_out, int permutation_out, float2 *data_out)
float2 * getBuffer(const int iblock)
void writeRealToDisk(const float *d_data, const int size, const char *filename, cudaStream_t stream)