1 #ifndef CUDATILELISTKERNEL_H 2 #define CUDATILELISTKERNEL_H 5 #include <cuda_runtime.h> 55 #ifdef NODEGROUP_FORCE_REGISTER 124 template <
typename T>
126 PtrSize(T* ptr,
size_t size) : ptr(ptr), size(size) {}
134 cudaEvent_t tileListStatEvent;
135 bool tileListStatEventRecord;
150 int numTileListsGBIS;
159 size_t cudaPatchesSize;
162 size_t cudaComputesSize;
165 const bool doStreaming;
167 size_t patchNumListsSize;
170 size_t emptyPatchesSize;
172 size_t h_emptyPatchesSize;
175 unsigned int* sortKeySrc;
176 size_t sortKeySrcSize;
177 unsigned int* sortKeyDst;
178 size_t sortKeyDstSize;
180 int maxTileListLen_sortKeys;
182 unsigned int* sortKeys;
186 size_t minmaxListLenSize;
206 size_t tileLists1Size;
208 size_t tileLists2Size;
210 size_t tileListsGBISSize;
214 size_t patchPairs1Size;
216 size_t patchPairs2Size;
219 int* tileJatomStart1;
220 size_t tileJatomStart1Size;
221 int* tileJatomStart2;
222 size_t tileJatomStart2Size;
223 int* tileJatomStartGBIS;
224 size_t tileJatomStartGBISSize;
228 size_t boundingBoxesSize;
231 unsigned int* tileListDepth1;
232 size_t tileListDepth1Size;
233 unsigned int* tileListDepth2;
234 size_t tileListDepth2Size;
238 size_t tileListOrder1Size;
240 size_t tileListOrder2Size;
244 size_t tileListPosSize;
256 size_t tileExcls1Size;
258 size_t tileExcls2Size;
262 size_t tempStorageSize;
269 size_t tileListVirialEnergySize;
271 int tileListVirialEnergyLength;
272 int tileListVirialEnergyGBISLength;
276 void setActiveBuffer(
int activeBufferIn) {activeBuffer = activeBufferIn;}
279 const bool useJtiles,
280 const int begin_bit,
const bool highDepthBitsSet,
282 const int numTileListsSrc,
const int numJtilesSrc,
283 PtrSize<TileList> tileListsSrc, PtrSize<int> tileJatomStartSrc,
284 PtrSize<unsigned int> tileListDepthSrc, PtrSize<int> tileListOrderSrc,
285 PtrSize<PatchPairRecord> patchPairsSrc, PtrSize<TileExcl> tileExclsSrc,
287 const int numTileListsDst,
const int numJtilesDst,
288 PtrSize<TileList> tileListsDst, PtrSize<int> tileJatomStartDst,
289 PtrSize<unsigned int> tileListDepthDst, PtrSize<int> tileListOrderDst,
290 PtrSize<PatchPairRecord> patchPairsDst, PtrSize<TileExcl> tileExclsDst,
291 cudaStream_t stream);
293 void writeTileList(
const char* filename,
const int numTileLists,
294 const TileList* d_tileLists, cudaStream_t stream);
295 void writeTileJatomStart(
const char* filename,
const int numJtiles,
296 const int* d_tileJatomStart, cudaStream_t stream);
301 size_t outputOrderSize;
328 return ((activeBuffer == 1) ? tileLists1 : tileLists2);
330 unsigned int*
getTileListDepth() {
return ((activeBuffer == 1) ? tileListDepth1 : tileListDepth2);}
350 int atomStorageSizeIn,
int numPatchesIn,
352 cudaStream_t stream);
355 const int numPatchesIn,
const int atomStorageSizeIn,
const int maxTileListLenIn,
356 const float3 lata,
const float3 latb,
const float3 latc,
357 const CudaPatchRecord* h_cudaPatches,
const float4* h_xyzq,
const float plcutoff2In,
358 const size_t maxShmemPerBlock, cudaStream_t stream,
const bool atomsChanged,
359 const bool allocatePart,
bool CUDASOAintegratorOn,
bool deviceMigration);
373 if (!doStreaming)
return NULL;
383 #endif // CUDATILELISTKERNEL_H
CudaTileListKernel(int deviceID, bool doStreaming)
void prepareTileList(cudaStream_t stream)
void setTileListVirialEnergyLength(int len)
PatchPairRecord * getPatchPairs()
void clearTileListStat(cudaStream_t stream)
TileExcl * getTileExcls()
void setTileListVirialEnergyGBISLength(int len)
void prepareBuffers(int atomStorageSizeIn, int numPatchesIn, const CudaPatchRecord *h_cudaPatches, cudaStream_t stream)
int getTileListVirialEnergyGBISLength()
CudaPatchRecord * getCudaPatches()
unsigned int * getTileListDepth()
BoundingBox * getBoundingBoxes()
void updateComputes(const int numComputesIn, const CudaComputeRecord *h_cudaComputes, cudaStream_t stream)
TileList * getTileListsGBIS()
TileListStat * getTileListStatDevPtr()
TileList * getTileLists()
int getNumTileListsGBIS()
void finishTileList(cudaStream_t stream)
int getTileListVirialEnergyLength()
int * getTileJatomStart()
int * getTileJatomStartGBIS()
TileListVirialEnergy * getTileListVirialEnergy()
void buildTileLists(const int numTileListsPrev, const int numPatchesIn, const int atomStorageSizeIn, const int maxTileListLenIn, const float3 lata, const float3 latb, const float3 latc, const CudaPatchRecord *h_cudaPatches, const float4 *h_xyzq, const float plcutoff2In, const size_t maxShmemPerBlock, cudaStream_t stream, const bool atomsChanged, const bool allocatePart, bool CUDASOAintegratorOn, bool deviceMigration)
void reSortTileLists(const bool doGBIS, cudaStream_t stream)