1039 atomStorageSize = atomStorageSizeIn;
1040 maxTileListLen = maxTileListLenIn;
1045 reallocate_device<int>(&patchNumLists, &patchNumListsSize,
numPatches);
1046 reallocate_device<int>(&emptyPatches, &emptyPatchesSize,
numPatches+1);
1047 reallocate_host<int>(&h_emptyPatches, &h_emptyPatchesSize,
numPatches+1);
1051 reallocate_device<TileList>(&tileLists1, &tileLists1Size, numTileListsPrev,
OVERALLOC);
1052 reallocate_device<PatchPairRecord>(&patchPairs1, &patchPairs1Size, numTileListsPrev,
OVERALLOC);
1059 reallocate_device<int>(&tilePos, &tilePosSize, numComputes,
OVERALLOC);
1064 calcTileListPosKernel<DEFAULTKERNEL_NUM_THREAD> <<< nblock, nthread, 0,
stream >>> (numComputes, cudaComputes,
cudaPatches, tilePos);
1072 updatePatchesKernel<WARPSIZE> <<< nblock, nthread, 0,
stream >>> (numComputes, tilePos, cudaComputes,
cudaPatches, tileLists1);
1081 reallocate_device<unsigned int>(&tileListDepth2, &tileListDepth2Size, numTileListsPrev + 1,
OVERALLOC);
1082 reallocate_device<int>(&tileListOrder2, &tileListOrder2Size, numTileListsPrev,
OVERALLOC);
1085 reallocate_device<unsigned int>(&tileListDepth1, &tileListDepth1Size, numTileListsPrev + 1,
OVERALLOC);
1087 reallocate_device<int>(&tileListOrder1, &tileListOrder1Size, numTileListsPrev,
OVERALLOC);
1089 reallocate_device<float4>(&
xyzq, &xyzqSize, atomStorageSize,
OVERALLOC);
1091 copy_HtoD<float4>(h_xyzq,
xyzq, atomStorageSize,
stream);
1095 int numBoundingBoxes = atomStorageSize/
WARPSIZE;
1101 buildBoundingBoxesKernel <<< nblock, nthread, 0, stream >>> (atomStorageSize,
xyzq,
boundingBoxes);
1111 if(shmem_size > maxShmemPerBlock){
1112 NAMD_die(
"CudaTileListKernel::buildTileLists, maximum shared memory allocation exceeded. Too many atoms in a patch");
1119 int reallocCount = 0;
1121 reallocate_device<int>(&tileJatomStart1, &tileJatomStart1Size, numJtiles,
OVERALLOC);
1125 buildTileListsBBKernel <<< nblock, nthread, shmem_size, stream >>> (
1126 numTileListsPrev, tileLists1,
cudaPatches, tilePos,
1129 tileListDepth1, tileListOrder1, patchPairs1,
1135 copy_DtoH<TileListStat>(d_tileListStat, h_tileListStat, 1,
stream);
1137 numJtiles = h_tileListStat->numJtiles;
1139 if (h_tileListStat->tilesSizeExceeded) {
1141 if (reallocCount > 1) {
1142 NAMD_die(
"CudaTileListKernel::buildTileLists, multiple reallocations detected");
1156 reallocate_device<TileListVirialEnergy>(&tileListVirialEnergy, &tileListVirialEnergySize,
numTileLists,
OVERALLOC);
1160 reallocate_device<int>(&tileJatomStart2, &tileJatomStart2Size, numJtiles,
OVERALLOC);
1161 reallocate_device<TileExcl>(&tileExcls1, &tileExcls1Size, numJtiles,
OVERALLOC);
1162 reallocate_device<TileExcl>(&tileExcls2, &tileExcls2Size, numJtiles,
OVERALLOC);
1165 int numJtilesSrc = numJtiles;
1167 int numJtilesDst = numJtiles;
1173 numTileListsSrc, numJtilesSrc,
1174 PtrSize<TileList>(tileLists1, tileLists1Size), PtrSize<int>(tileJatomStart1, tileJatomStart1Size),
1175 PtrSize<unsigned int>(tileListDepth1, tileListDepth1Size), PtrSize<int>(tileListOrder1, tileListOrder1Size),
1176 PtrSize<PatchPairRecord>(patchPairs1, patchPairs1Size), PtrSize<TileExcl>(NULL, 0),
1177 numTileListsDst, numJtilesDst,
1178 PtrSize<TileList>(tileLists2, tileLists2Size), PtrSize<int>(tileJatomStart2, tileJatomStart2Size),
1179 PtrSize<unsigned int>(tileListDepth2, tileListDepth2Size), PtrSize<int>(tileListOrder2, tileListOrder2Size),
1180 PtrSize<PatchPairRecord>(patchPairs2, patchPairs2Size), PtrSize<TileExcl>(NULL, 0),
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 lata
void clearTileListStat(cudaStream_t stream)
#define UPDATEPATCHESKERNEL_NUM_THREAD
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 latb
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t float plcutoff2
__thread cudaStream_t stream
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ jtiles
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int unsigned int *__restrict__ const CudaPatchRecord *__restrict__ cudaPatches
#define DEFAULTKERNEL_NUM_THREAD
const int numTileListsDst
__host__ __device__ __forceinline__ int buildTileListsBBKernel_shmem_sizePerThread(const int maxTileListLen)
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int numPatches
void NAMD_die(const char *err_msg)
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ xyzq
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ boundingBoxes
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int unsigned int *__restrict__ const CudaPatchRecord *__restrict__ float4 *__restrict__ float4 *__restrict__ int *__restrict__ int *__restrict__ outputOrder
__global__ void const int numTileLists
#define BOUNDINGBOXKERNEL_NUM_WARP
__thread DeviceCUDA * deviceCUDA
#define TILELISTKERNELNEW_NUM_WARP
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 latc