NAMD
CudaComputeNonbondedKernel.h
Go to the documentation of this file.
1 #ifndef CUDACOMPUTENONBONDEDKERNEL_H
2 #define CUDACOMPUTENONBONDEDKERNEL_H
3 #include "CudaUtils.h"
4 #include "CudaRecord.h"
5 #include "CudaTileListKernel.h"
6 #include "CudaNonbondedTables.h"
7 #ifdef NAMD_CUDA
8 
9 
24 struct AlchData{
25  float scaling;
26  float switchdist2;
27  float cutoff2;
28  float switchfactor;
30  // float alchLambda;
31  float lambdaUp;
32  float lambdaDown;
33  float elecLambdaUp;
35  float vdwLambdaUp;
37 
38  float lambda2Up;
39  float lambda2Down;
42  float vdwLambda2Up;
44 
45  float vdwShiftUp;
46  float vdwShift2Up;
47  float vdwShiftDown;
50 };
51 
53 private:
54 
55  const int deviceID;
56  CudaNonbondedTables& cudaNonbondedTables;
57  const bool doStreaming;
58 
59  // Exclusions
60  int2 *d_exclusionsByAtom;
61  unsigned int* overflowExclusions;
62  size_t overflowExclusionsSize;
63 
64  int2* exclIndexMaxDiff;
65  size_t exclIndexMaxDiffSize;
66 
67  // Atom indices
68  int* atomIndex;
69  size_t atomIndexSize;
70 
71  // VdW types
72  int* vdwTypes;
73  size_t vdwTypesSize;
74 
75  unsigned int* patchNumCount;
76  size_t patchNumCountSize;
77 
78  int* patchReadyQueue;
79  size_t patchReadyQueueSize;
80 
81  float *force_x, *force_y, *force_z, *force_w;
82  size_t forceSize;
83  float *forceSlow_x, *forceSlow_y, *forceSlow_z, *forceSlow_w;
84  size_t forceSlowSize;
85 
86  // Drude/NbThole
87  int *isDrude;
88  size_t isDrudeSize;
89  float* drudeAtomAlpha;
90  size_t drudeAtomAlphaSize;
91 public:
92  CudaComputeNonbondedKernel(int deviceID, CudaNonbondedTables& cudaNonbondedTables, bool doStreaming);
94 
95  static __device__ __host__ __forceinline__ int
96  computeNumTiles(const int numAtoms, const int tilesize = WARPSIZE) {
97  return (numAtoms+tilesize-1)/tilesize;
98  }
99 
100  static __device__ __host__ __forceinline__ int
101  computeAtomPad(const int numAtoms, const int tilesize = WARPSIZE) {
102  return computeNumTiles(numAtoms, tilesize)*tilesize;
103  }
104 
105  void updateVdwTypesExcl(const int atomStorageSize, const int* h_vdwTypes,
106  const int2* h_exclIndexMaxDiff, const int* h_atomIndex, cudaStream_t stream);
107 
109  const int numPatches, const int atomStorageSize, const bool alchOn,
110  CudaLocalRecord* localRecords,
111  const int* d_vdwTypes, const int* d_id, const int* d_sortOrder,
112  const int* d_partition, cudaStream_t stream);
113 
114  void nonbondedForce(CudaTileListKernel& tlKernel,
115  const int atomStorageSize, const bool atomsChanged, const bool doMinimize,
116  const bool doPairlist, const bool doEnergy, const bool doVirial,
117  const bool doSlow, const bool doAlch, const bool doAlchVdwForceSwitching,
118  const bool doFEP, const bool doTI,
119  const bool doNbThole, const bool doTable,
120  const float3 lata, const float3 latb, const float3 latc,
121  const float4* h_xyzq, const float cutoff2,
122  const CudaNBConstants nbConstants,
123  float4* d_forces, float4* d_forcesSlow,
124  float4* h_forces, float4* h_forcesSlow, AlchData *fepFlags,
125  bool lambdaWindowUpdated, char *part,
126  bool CUDASOAintegratorOn, bool useDeviceMigration,
127  const float drudeNbtholeCut2,
128  cudaStream_t stream);
129 
130  void reduceVirialEnergy(CudaTileListKernel& tlKernel,
131  const int atomStorageSize, const bool doEnergy, const bool doVirial, const bool doSlow, const bool doGBIS,
132  float4* d_forces, float4* d_forcesSlow,
133  VirialEnergy* d_virialEnergy, cudaStream_t stream);
134 
135  void getVirialEnergy(VirialEnergy* h_virialEnergy, cudaStream_t stream);
136 
137  void bindExclusions(int numExclusions, unsigned int* exclusion_bits);
138 
139  int* getPatchReadyQueue();
140 
141  void reallocate_forceSOA(int atomStorageSize);
142 
143  void setExclusionsByAtom(int2* h_data, const int num_atoms);
144 
145  void updateDrudeData(const int atomStorageSize, const float* h_drudeAtomAlpha, const int* h_isDrude, cudaStream_t stream);
146 };
147 
148 #endif // NAMD_CUDA
149 #endif // CUDACOMPUTENONBONDEDKERNEL_H
Alchemical datastructure that holds the lambda-relevant paramenters for FEP/TI.
CudaComputeNonbondedKernel(int deviceID, CudaNonbondedTables &cudaNonbondedTables, bool doStreaming)
void updateVdwTypesExcl(const int atomStorageSize, const int *h_vdwTypes, const int2 *h_exclIndexMaxDiff, const int *h_atomIndex, cudaStream_t stream)
void updateDrudeData(const int atomStorageSize, const float *h_drudeAtomAlpha, const int *h_isDrude, cudaStream_t stream)
#define WARPSIZE
Definition: CudaUtils.h:17
void reallocate_forceSOA(int atomStorageSize)
void bindExclusions(int numExclusions, unsigned int *exclusion_bits)
static __device__ __host__ __forceinline__ int computeAtomPad(const int numAtoms, const int tilesize=WARPSIZE)
void reduceVirialEnergy(CudaTileListKernel &tlKernel, const int atomStorageSize, const bool doEnergy, const bool doVirial, const bool doSlow, const bool doGBIS, float4 *d_forces, float4 *d_forcesSlow, VirialEnergy *d_virialEnergy, cudaStream_t stream)
void nonbondedForce(CudaTileListKernel &tlKernel, const int atomStorageSize, const bool atomsChanged, const bool doMinimize, const bool doPairlist, const bool doEnergy, const bool doVirial, const bool doSlow, const bool doAlch, const bool doAlchVdwForceSwitching, const bool doFEP, const bool doTI, const bool doNbThole, const bool doTable, const float3 lata, const float3 latb, const float3 latc, const float4 *h_xyzq, const float cutoff2, const CudaNBConstants nbConstants, float4 *d_forces, float4 *d_forcesSlow, float4 *h_forces, float4 *h_forcesSlow, AlchData *fepFlags, bool lambdaWindowUpdated, char *part, bool CUDASOAintegratorOn, bool useDeviceMigration, const float drudeNbtholeCut2, cudaStream_t stream)
void setExclusionsByAtom(int2 *h_data, const int num_atoms)
void updateVdwTypesExclOnGPU(CudaTileListKernel &tlKernel, const int numPatches, const int atomStorageSize, const bool alchOn, CudaLocalRecord *localRecords, const int *d_vdwTypes, const int *d_id, const int *d_sortOrder, const int *d_partition, cudaStream_t stream)
void getVirialEnergy(VirialEnergy *h_virialEnergy, cudaStream_t stream)
static __device__ __host__ __forceinline__ int computeNumTiles(const int numAtoms, const int tilesize=WARPSIZE)