1 #ifndef CUDATILELISTKERNEL_H 2 #define CUDATILELISTKERNEL_H 53 #ifdef NODEGROUP_FORCE_REGISTER 122 template <
typename T>
124 PtrSize(T* ptr,
size_t size) : ptr(ptr), size(size) {}
132 cudaEvent_t tileListStatEvent;
133 bool tileListStatEventRecord;
148 int numTileListsGBIS;
157 size_t cudaPatchesSize;
160 size_t cudaComputesSize;
163 const bool doStreaming;
165 size_t patchNumListsSize;
168 size_t emptyPatchesSize;
170 size_t h_emptyPatchesSize;
173 unsigned int* sortKeySrc;
174 size_t sortKeySrcSize;
175 unsigned int* sortKeyDst;
176 size_t sortKeyDstSize;
178 int maxTileListLen_sortKeys;
180 unsigned int* sortKeys;
184 size_t minmaxListLenSize;
204 size_t tileLists1Size;
206 size_t tileLists2Size;
208 size_t tileListsGBISSize;
212 size_t patchPairs1Size;
214 size_t patchPairs2Size;
217 int* tileJatomStart1;
218 size_t tileJatomStart1Size;
219 int* tileJatomStart2;
220 size_t tileJatomStart2Size;
221 int* tileJatomStartGBIS;
222 size_t tileJatomStartGBISSize;
226 size_t boundingBoxesSize;
229 unsigned int* tileListDepth1;
230 size_t tileListDepth1Size;
231 unsigned int* tileListDepth2;
232 size_t tileListDepth2Size;
236 size_t tileListOrder1Size;
238 size_t tileListOrder2Size;
242 size_t tileListPosSize;
254 size_t tileExcls1Size;
256 size_t tileExcls2Size;
260 size_t tempStorageSize;
267 size_t tileListVirialEnergySize;
269 int tileListVirialEnergyLength;
270 int tileListVirialEnergyGBISLength;
274 void setActiveBuffer(
int activeBufferIn) {activeBuffer = activeBufferIn;}
277 const bool useJtiles,
278 const int begin_bit,
const bool highDepthBitsSet,
280 const int numTileListsSrc,
const int numJtilesSrc,
281 PtrSize<TileList> tileListsSrc, PtrSize<int> tileJatomStartSrc,
282 PtrSize<unsigned int> tileListDepthSrc, PtrSize<int> tileListOrderSrc,
283 PtrSize<PatchPairRecord> patchPairsSrc, PtrSize<TileExcl> tileExclsSrc,
285 const int numTileListsDst,
const int numJtilesDst,
286 PtrSize<TileList> tileListsDst, PtrSize<int> tileJatomStartDst,
287 PtrSize<unsigned int> tileListDepthDst, PtrSize<int> tileListOrderDst,
288 PtrSize<PatchPairRecord> patchPairsDst, PtrSize<TileExcl> tileExclsDst,
289 cudaStream_t stream);
291 void writeTileList(
const char* filename,
const int numTileLists,
292 const TileList* d_tileLists, cudaStream_t stream);
293 void writeTileJatomStart(
const char* filename,
const int numJtiles,
294 const int* d_tileJatomStart, cudaStream_t stream);
299 size_t outputOrderSize;
326 return ((activeBuffer == 1) ? tileLists1 : tileLists2);
328 unsigned int*
getTileListDepth() {
return ((activeBuffer == 1) ? tileListDepth1 : tileListDepth2);}
348 int atomStorageSizeIn,
int numPatchesIn,
350 cudaStream_t stream);
353 const int numPatchesIn,
const int atomStorageSizeIn,
const int maxTileListLenIn,
354 const float3 lata,
const float3 latb,
const float3 latc,
355 const CudaPatchRecord* h_cudaPatches,
const float4* h_xyzq,
const float plcutoff2In,
356 const size_t maxShmemPerBlock, cudaStream_t stream,
const bool atomsChanged,
357 const bool allocatePart,
bool CUDASOAintegratorOn,
bool deviceMigration);
371 if (!doStreaming)
return NULL;
381 #endif // CUDATILELISTKERNEL_H
CudaTileListKernel(int deviceID, bool doStreaming)
void prepareTileList(cudaStream_t stream)
void setTileListVirialEnergyLength(int len)
PatchPairRecord * getPatchPairs()
void clearTileListStat(cudaStream_t stream)
TileExcl * getTileExcls()
void setTileListVirialEnergyGBISLength(int len)
void prepareBuffers(int atomStorageSizeIn, int numPatchesIn, const CudaPatchRecord *h_cudaPatches, cudaStream_t stream)
int getTileListVirialEnergyGBISLength()
CudaPatchRecord * getCudaPatches()
unsigned int * getTileListDepth()
BoundingBox * getBoundingBoxes()
void updateComputes(const int numComputesIn, const CudaComputeRecord *h_cudaComputes, cudaStream_t stream)
TileList * getTileListsGBIS()
TileListStat * getTileListStatDevPtr()
TileList * getTileLists()
int getNumTileListsGBIS()
void finishTileList(cudaStream_t stream)
int getTileListVirialEnergyLength()
int * getTileJatomStart()
int * getTileJatomStartGBIS()
TileListVirialEnergy * getTileListVirialEnergy()
void buildTileLists(const int numTileListsPrev, const int numPatchesIn, const int atomStorageSizeIn, const int maxTileListLenIn, const float3 lata, const float3 latb, const float3 latc, const CudaPatchRecord *h_cudaPatches, const float4 *h_xyzq, const float plcutoff2In, const size_t maxShmemPerBlock, cudaStream_t stream, const bool atomsChanged, const bool allocatePart, bool CUDASOAintegratorOn, bool deviceMigration)
void reSortTileLists(const bool doGBIS, cudaStream_t stream)