1 #ifndef CUDATILELISTKERNEL_H
2 #define CUDATILELISTKERNEL_H
3 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
92 PtrSize(T* ptr,
int size) : ptr(ptr), size(size) {}
100 cudaEvent_t tileListStatEvent;
101 bool tileListStatEventRecord;
116 int numTileListsGBIS;
128 int cudaComputesSize;
131 const bool doStreaming;
133 int patchNumListsSize;
136 int emptyPatchesSize;
138 int h_emptyPatchesSize;
141 unsigned int* sortKeySrc;
143 unsigned int* sortKeyDst;
146 int maxTileListLen_sortKeys;
148 unsigned int* sortKeys;
152 int minmaxListLenSize;
173 int tileListsGBISSize;
182 int* tileJatomStart1;
183 int tileJatomStart1Size;
184 int* tileJatomStart2;
185 int tileJatomStart2Size;
186 int* tileJatomStartGBIS;
187 int tileJatomStartGBISSize;
191 int boundingBoxesSize;
194 unsigned int* tileListDepth1;
195 int tileListDepth1Size;
196 unsigned int* tileListDepth2;
197 int tileListDepth2Size;
201 int tileListOrder1Size;
203 int tileListOrder2Size;
232 int tileListVirialEnergySize;
234 int tileListVirialEnergyLength;
235 int tileListVirialEnergyGBISLength;
239 void setActiveBuffer(
int activeBufferIn) {activeBuffer = activeBufferIn;}
242 const bool useJtiles,
243 const int begin_bit,
const bool highDepthBitsSet,
246 PtrSize<TileList> tileListsSrc, PtrSize<int> tileJatomStartSrc,
248 PtrSize<PatchPairRecord> patchPairsSrc, PtrSize<TileExcl> tileExclsSrc,
251 PtrSize<TileList> tileListsDst, PtrSize<int> tileJatomStartDst,
253 PtrSize<PatchPairRecord> patchPairsDst, PtrSize<TileExcl> tileExclsDst,
256 void writeTileList(
const char* filename,
const int numTileLists,
257 const TileList* d_tileLists, cudaStream_t stream);
258 void writeTileJatomStart(
const char* filename,
const int numJtiles,
259 const int* d_tileJatomStart, cudaStream_t stream);
290 return ((activeBuffer == 1) ? tileLists1 : tileLists2);
292 unsigned int*
getTileListDepth() {
return ((activeBuffer == 1) ? tileListDepth1 : tileListDepth2);}
311 const int numPatchesIn,
const int atomStorageSizeIn,
const int maxTileListLenIn,
312 const float3
lata,
const float3
latb,
const float3
latc,
313 const CudaPatchRecord* h_cudaPatches,
const float4* h_xyzq,
const float plcutoff2In,
314 const size_t maxShmemPerBlock, cudaStream_t stream);
328 if (!doStreaming)
return NULL;
338 #endif // CUDATILELISTKERNEL_H
CudaTileListKernel(int deviceID, bool doStreaming)
void prepareTileList(cudaStream_t stream)
void setTileListVirialEnergyLength(int len)
const int const int begin_bit
PatchPairRecord * getPatchPairs()
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 lata
void clearTileListStat(cudaStream_t stream)
TileExcl * getTileExcls()
void setTileListVirialEnergyGBISLength(int len)
int getTileListVirialEnergyGBISLength()
CudaPatchRecord * getCudaPatches()
unsigned int * getTileListDepth()
const int const int const int const keyT keyT *__restrict__ keyT *__restrict__ valT *__restrict__ tileListOrderSrc
BoundingBox * getBoundingBoxes()
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 latb
__thread cudaStream_t stream
void updateComputes(const int numComputesIn, const CudaComputeRecord *h_cudaComputes, cudaStream_t stream)
TileList * getTileListsGBIS()
TileListStat * getTileListStatDevPtr()
TileList * getTileLists()
const int numTileListsDst
int getNumTileListsGBIS()
void finishTileList(cudaStream_t stream)
int getTileListVirialEnergyLength()
int * getTileJatomStart()
int * getTileJatomStartGBIS()
const int const int const int const keyT keyT *__restrict__ keyT *__restrict__ tileListDepthDst
TileListVirialEnergy * getTileListVirialEnergy()
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 latc
void buildTileLists(const int numTileListsPrev, const int numPatchesIn, const int atomStorageSizeIn, const int maxTileListLenIn, const float3 lata, const float3 latb, const float3 latc, const CudaPatchRecord *h_cudaPatches, const float4 *h_xyzq, const float plcutoff2In, const size_t maxShmemPerBlock, cudaStream_t stream)
const int const int const int const keyT keyT *__restrict__ tileListDepthSrc
void reSortTileLists(const bool doGBIS, cudaStream_t stream)