1 #ifndef CUDACOMPUTENONBONDEDKERNEL_HIP_H 2 #define CUDACOMPUTENONBONDEDKERNEL_HIP_H 52 const bool doStreaming;
57 int2 *d_exclusionsByAtom = NULL;
58 unsigned int* overflowExclusions;
59 size_t overflowExclusionsSize;
61 int2* exclIndexMaxDiff;
62 size_t exclIndexMaxDiffSize;
72 unsigned int* patchNumCount;
73 size_t patchNumCountSize;
76 size_t patchReadyQueueSize;
78 float *force_x, *force_y, *force_z, *force_w;
80 float *forceSlow_x, *forceSlow_y, *forceSlow_z, *forceSlow_w;
86 static __device__ __host__ __forceinline__
int 88 return (numAtoms+tilesize-1)/tilesize;
91 static __device__ __host__ __forceinline__
int 97 const int2* h_exclIndexMaxDiff,
const int* h_atomIndex, cudaStream_t stream);
100 const int numPatches,
const int atomStorageSize,
const bool alchOn,
102 const int* d_vdwTypes,
const int* d_id,
const int* d_sortOrder,
103 const int* d_partition, cudaStream_t stream);
106 const int atomStorageSize,
const bool atomsChanged,
107 const bool doMinimize,
const bool doPairlist,
108 const bool doEnergy,
const bool doVirial,
const bool doSlow,
const bool doAlch,
109 const bool doAlchVdwForceSwitching,
110 const bool doFEP,
const bool doTI,
const bool doTable,
111 const float3 lata,
const float3 latb,
const float3 latc,
112 const float4* h_xyzq,
const float cutoff2,
114 float4* d_forces, float4* d_forcesSlow,
115 float4* h_forces, float4* h_forcesSlow,
AlchData *srcFlags,
116 bool lambdaWindowUpdated,
char *part,
117 bool CUDASOAintegratorOn,
bool useDeviceMigration,
118 cudaStream_t stream);
121 const int atomStorageSize,
const bool doEnergy,
const bool doVirial,
const bool doSlow,
const bool doGBIS,
122 float4* d_forces, float4* d_forcesSlow,
127 void bindExclusions(
int numExclusions,
unsigned int* exclusion_bits);
138 #endif // CUDACOMPUTENONBONDEDKERNEL_H
Alchemical datastructure that holds the lambda-relevant paramenters for FEP/TI.
void nonbondedForce(CudaTileListKernel &tlKernel, const int atomStorageSize, const bool atomsChanged, const bool doMinimize, const bool doPairlist, const bool doEnergy, const bool doVirial, const bool doSlow, const bool doAlch, const bool doAlchVdwForceSwitching, const bool doFEP, const bool doTI, const bool doTable, const float3 lata, const float3 latb, const float3 latc, const float4 *h_xyzq, const float cutoff2, const CudaNBConstants nbConstants, float4 *d_forces, float4 *d_forcesSlow, float4 *h_forces, float4 *h_forcesSlow, AlchData *fepFlags, bool lambdaWindowUpdated, char *part, bool CUDASOAintegratorOn, bool useDeviceMigration, cudaStream_t stream)
CudaComputeNonbondedKernel(int deviceID, CudaNonbondedTables &cudaNonbondedTables, bool doStreaming)
void updateVdwTypesExcl(const int atomStorageSize, const int *h_vdwTypes, const int2 *h_exclIndexMaxDiff, const int *h_atomIndex, cudaStream_t stream)
~CudaComputeNonbondedKernel()
void reallocate_forceSOA(int atomStorageSize)
void bindExclusions(int numExclusions, unsigned int *exclusion_bits)
static __device__ __host__ __forceinline__ int computeAtomPad(const int numAtoms, const int tilesize=WARPSIZE)
void reduceVirialEnergy(CudaTileListKernel &tlKernel, const int atomStorageSize, const bool doEnergy, const bool doVirial, const bool doSlow, const bool doGBIS, float4 *d_forces, float4 *d_forcesSlow, VirialEnergy *d_virialEnergy, cudaStream_t stream)
void setExclusionsByAtom(int2 *h_data, const int num_atoms)
void updateVdwTypesExclOnGPU(CudaTileListKernel &tlKernel, const int numPatches, const int atomStorageSize, const bool alchOn, CudaLocalRecord *localRecords, const int *d_vdwTypes, const int *d_id, const int *d_sortOrder, const int *d_partition, cudaStream_t stream)
void getVirialEnergy(VirialEnergy *h_virialEnergy, cudaStream_t stream)
static __device__ __host__ __forceinline__ int computeNumTiles(const int numAtoms, const int tilesize=WARPSIZE)
int * getPatchReadyQueue()