9 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
10 #error Cannot build with GPU Acceleration (NAMD_CUDA or NAMD_HIP) and NAMD_AVXTILES defined
20 struct AVXTilesAtom {
float x,
y,
z, q; };
21 struct AVXTilesForce {
float x,
y,
z, w; };
26 inline int numAtoms()
const {
return _numAtoms; }
27 inline int numFreeAtoms()
const {
return _numFreeAtoms; }
28 inline int numTiles()
const {
return _numTiles; }
32 inline void touch() { _touched =
true; }
34 inline void realloc(
const int numAtoms,
CudaAtom *ac) {
35 atoms = (AVXTilesAtom *)ac;
36 if (numAtoms != _numAtoms) {
37 if (numAtoms > _numAtoms) _touched =
true;
39 _numTiles = ((numAtoms - 1) >> 4) + 1;
40 if (_numTiles > _numTilesAlloc) _realloc();
44 inline void zeroForces(
const int doSlow) {
46 memset(
forces, 0, _numAtoms *
sizeof(AVXTilesForce));
47 if (doSlow) memset(forcesSlow, 0, _numAtoms *
sizeof(AVXTilesForce));
52 void atomUpdate(
const CompAtom *compAtom,
57 inline void buildBoundingBoxes(
const int step) {
58 if (step != _lastBuild) _buildBoundingBoxes(step);
61 void nativeForceVirialUpdate(
const int doSlow,
const int doVirial,
63 Force * __restrict__ natForces,
64 Force * __restrict__ natForcesSlow,
65 const Force * __restrict__ natForcesVirial,
66 const Force * __restrict__ natForcesSlowVirial,
67 double virial[6],
double virialSlow[6]);
72 AVXTilesForce *forcesSlow;
76 #ifdef MEM_OPT_VERSION
84 float *bbox_x, *bbox_y, *bbox_z, *bbox_wx, *bbox_wy, *bbox_wz;
87 int _numAtoms, _numFreeAtoms, _numTiles, _numTilesAlloc, _lastBuild;
90 void _buildBoundingBoxes(
const int step);
91 template <
int doSlow,
int doVirial,
int touched>
92 void _nativeForceVirialUpdate(
const CompAtom *p,
const Vector ¢er,
93 Force * __restrict__ natForces,
94 Force * __restrict__ natForcesSlow,
95 const Force * __restrict__ natForcesVirial,
96 const Force * __restrict__ natForcesSlowVirial,
97 double virial[6],
double virialSlow[6]);
100 #endif // NAMD_AVXTILES
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ vdwTypes
static __thread atom * atoms
static __thread float4 * forces
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ exclIndexMaxDiff
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t float const PatchPairRecord *__restrict__ const int *__restrict__ atomIndex