CudaTileListKernel Class Reference

#include <CudaTileListKernel.h>

List of all members.

Public Member Functions

 CudaTileListKernel (int deviceID, bool doStreaming)
 ~CudaTileListKernel ()
int getNumEmptyPatches ()
int * getEmptyPatches ()
int getNumExcluded ()
float get_plcutoff2 ()
int getNumTileLists ()
int getNumTileListsGBIS ()
int getNumJtiles ()
BoundingBoxgetBoundingBoxes ()
int * getJtiles ()
float4 * get_xyzq ()
TileListStatgetTileListStatDevPtr ()
void clearTileListStat (cudaStream_t stream)
int * getTileJatomStart ()
TileListgetTileLists ()
unsigned int * getTileListDepth ()
int * getTileListOrder ()
TileExclgetTileExcls ()
PatchPairRecordgetPatchPairs ()
int * getTileJatomStartGBIS ()
TileListgetTileListsGBIS ()
TileListVirialEnergygetTileListVirialEnergy ()
CudaPatchRecordgetCudaPatches ()
void prepareTileList (cudaStream_t stream)
void finishTileList (cudaStream_t stream)
void updateComputes (const int numComputesIn, const CudaComputeRecord *h_cudaComputes, cudaStream_t stream)
void buildTileLists (const int numTileListsPrev, const int numPatchesIn, const int atomStorageSizeIn, const int maxTileListLenIn, const float3 lata, const float3 latb, const float3 latc, const CudaPatchRecord *h_cudaPatches, const float4 *h_xyzq, const float plcutoff2In, cudaStream_t stream)
void reSortTileLists (const bool doGBIS, cudaStream_t stream)
void setTileListVirialEnergyLength (int len)
void setTileListVirialEnergyGBISLength (int len)
int getTileListVirialEnergyLength ()
int getTileListVirialEnergyGBISLength ()
int getNumPatches ()
int getNumComputes ()
int * getOutputOrder ()

Classes

struct  PtrSize


Detailed Description

Definition at line 87 of file CudaTileListKernel.h.


Constructor & Destructor Documentation

CudaTileListKernel::CudaTileListKernel ( int  deviceID,
bool  doStreaming 
)

CudaTileListKernel::~CudaTileListKernel (  ) 

Definition at line 804 of file CudaTileListKernel.cu.

References cudaCheck.

00804                                         {
00805   cudaCheck(cudaSetDevice(deviceID));
00806   deallocate_device<TileListStat>(&d_tileListStat);
00807   deallocate_host<TileListStat>(&h_tileListStat);
00808   //
00809   if (patchNumLists != NULL) deallocate_device<int>(&patchNumLists);
00810   if (emptyPatches != NULL) deallocate_device<int>(&emptyPatches);
00811   if (h_emptyPatches != NULL) deallocate_host<int>(&h_emptyPatches);
00812   if (sortKeySrc != NULL) deallocate_device<unsigned int>(&sortKeySrc);
00813   if (sortKeyDst != NULL) deallocate_device<unsigned int>(&sortKeyDst);
00814   //
00815   if (cudaPatches != NULL) deallocate_device<CudaPatchRecord>(&cudaPatches);
00816   if (cudaComputes != NULL) deallocate_device<CudaComputeRecord>(&cudaComputes);
00817   if (patchPairs1 != NULL) deallocate_device<PatchPairRecord>(&patchPairs1);
00818   if (patchPairs2 != NULL) deallocate_device<PatchPairRecord>(&patchPairs2);
00819   if (tileLists1 != NULL) deallocate_device<TileList>(&tileLists1);
00820   if (tileLists2 != NULL) deallocate_device<TileList>(&tileLists2);
00821   if (tileJatomStart1 != NULL) deallocate_device<int>(&tileJatomStart1);
00822   if (tileJatomStart2 != NULL) deallocate_device<int>(&tileJatomStart2);
00823   if (boundingBoxes != NULL) deallocate_device<BoundingBox>(&boundingBoxes);
00824   if (tileListDepth1 != NULL) deallocate_device<unsigned int>(&tileListDepth1);
00825   if (tileListDepth2 != NULL) deallocate_device<unsigned int>(&tileListDepth2);
00826   if (tileListOrder1 != NULL) deallocate_device<int>(&tileListOrder1);
00827   if (tileListOrder2 != NULL) deallocate_device<int>(&tileListOrder2);
00828   if (tileListPos != NULL) deallocate_device<int>(&tileListPos);
00829   if (tileExcls1 != NULL) deallocate_device<TileExcl>(&tileExcls1);
00830   if (tileExcls2 != NULL) deallocate_device<TileExcl>(&tileExcls2);
00831   if (tempStorage != NULL) deallocate_device<char>(&tempStorage);
00832   if (jtiles != NULL) deallocate_device<int>(&jtiles);
00833   if (tilePos != NULL) deallocate_device<int>(&tilePos);
00834 
00835   if (tileListsGBIS != NULL) deallocate_device<TileList>(&tileListsGBIS);
00836   if (tileJatomStartGBIS != NULL) deallocate_device<int>(&tileJatomStartGBIS);
00837 
00838   if (tileListVirialEnergy != NULL) deallocate_device<TileListVirialEnergy>(&tileListVirialEnergy);
00839 
00840   if (xyzq != NULL) deallocate_device<float4>(&xyzq);
00841 
00842   if (sortKeys != NULL) deallocate_device<unsigned int>(&sortKeys);
00843   if (minmaxListLen != NULL) deallocate_device<int2>(&minmaxListLen);
00844 
00845   cudaCheck(cudaEventDestroy(tileListStatEvent));
00846 }


Member Function Documentation

void CudaTileListKernel::buildTileLists ( const int  numTileListsPrev,
const int  numPatchesIn,
const int  atomStorageSizeIn,
const int  maxTileListLenIn,
const float3  lata,
const float3  latb,
const float3  latc,
const CudaPatchRecord h_cudaPatches,
const float4 *  h_xyzq,
const float  plcutoff2In,
cudaStream_t  stream 
)

Definition at line 982 of file CudaTileListKernel.cu.

References BOUNDINGBOXKERNEL_NUM_WARP, buildTileListsBBKernel_shmem_sizePerThread(), clearTileListStat(), cudaCheck, deviceCUDA, DeviceCUDA::getMaxNumBlocks(), NAMD_die(), TileListStat::numTileLists, numTileListsDst, OVERALLOC, TILELISTKERNELNEW_NUM_WARP, TileListStat::tilesSizeExceeded, and WARPSIZE.

00986                        {
00987 
00988   numPatches = numPatchesIn;
00989   atomStorageSize = atomStorageSizeIn;
00990   maxTileListLen = maxTileListLenIn;
00991   plcutoff2 = plcutoff2In;
00992 
00993   if (doStreaming) {
00994     // Re-allocate patchNumLists
00995     reallocate_device<int>(&patchNumLists, &patchNumListsSize, numPatches);
00996     reallocate_device<int>(&emptyPatches, &emptyPatchesSize, numPatches+1);
00997     reallocate_host<int>(&h_emptyPatches, &h_emptyPatchesSize, numPatches+1);
00998   }
00999 
01000   // Re-allocate (tileLists1, patchPairs1
01001   reallocate_device<TileList>(&tileLists1, &tileLists1Size, numTileListsPrev, OVERALLOC);
01002   reallocate_device<PatchPairRecord>(&patchPairs1, &patchPairs1Size, numTileListsPrev, OVERALLOC);
01003 
01004   // Copy cudaPatches to device
01005   reallocate_device<CudaPatchRecord>(&cudaPatches, &cudaPatchesSize, numPatches);
01006   copy_HtoD<CudaPatchRecord>(h_cudaPatches, cudaPatches, numPatches, stream);
01007 
01008   // Re-allocate temporary storage
01009   reallocate_device<int>(&tilePos, &tilePosSize, numComputes, OVERALLOC);
01010   // Calculate tile list positions (tilePos)
01011   {
01012     int nthread = 1024;
01013     int nblock = 1;
01014     calcTileListPosKernel<1024> <<< nblock, nthread, 0, stream >>>
01015     (numComputes, cudaComputes, cudaPatches, tilePos);
01016     cudaCheck(cudaGetLastError());
01017   }
01018 
01019   // Build (tileLists1.patchInd, tileLists1.offsetXYZ)
01020   {
01021     int nthread = 512;
01022     int nblock = min(deviceCUDA->getMaxNumBlocks(), (numComputes-1)/(nthread/32)+1);
01023     updatePatchesKernel<32> <<< nblock, nthread, 0, stream >>>
01024     (numComputes, tilePos, cudaComputes, cudaPatches, tileLists1);
01025     cudaCheck(cudaGetLastError());
01026   }
01027 
01028   // ---------------------------------------------------------------------------------------------
01029 
01030 
01031   // NOTE: tileListDepth2 and tileListOrder2 must have at least same size as
01032   // tileListDepth2 and tileListOrder2 since they're used in sorting
01033   reallocate_device<unsigned int>(&tileListDepth2, &tileListDepth2Size, numTileListsPrev + 1, OVERALLOC);
01034   reallocate_device<int>(&tileListOrder2, &tileListOrder2Size, numTileListsPrev, OVERALLOC);
01035 
01036   // Allocate with +1 to include last term in the exclusive sum
01037   reallocate_device<unsigned int>(&tileListDepth1, &tileListDepth1Size, numTileListsPrev + 1, OVERALLOC);
01038 
01039   reallocate_device<int>(&tileListOrder1, &tileListOrder1Size, numTileListsPrev, OVERALLOC);
01040 
01041   reallocate_device<float4>(&xyzq, &xyzqSize, atomStorageSize, OVERALLOC);
01042 
01043   copy_HtoD<float4>(h_xyzq, xyzq, atomStorageSize, stream);
01044 
01045   // Fills in boundingBoxes[0 ... numBoundingBoxes-1]
01046   {
01047     int numBoundingBoxes = atomStorageSize/WARPSIZE;
01048     reallocate_device<BoundingBox>(&boundingBoxes, &boundingBoxesSize, numBoundingBoxes, OVERALLOC);
01049 
01050     int nwarp = BOUNDINGBOXKERNEL_NUM_WARP;
01051     int nthread = WARPSIZE*nwarp;
01052     int nblock = min(deviceCUDA->getMaxNumBlocks(), (atomStorageSize-1)/nthread+1);
01053     buildBoundingBoxesKernel <<< nblock, nthread, 0, stream >>> (atomStorageSize, xyzq, boundingBoxes);
01054     cudaCheck(cudaGetLastError());
01055   }
01056 
01057   {
01058     int nwarp = TILELISTKERNELNEW_NUM_WARP;
01059     int nthread = WARPSIZE*nwarp;
01060     int nblock = min(deviceCUDA->getMaxNumBlocks(), (numTileListsPrev-1)/nthread+1);
01061 
01062     int shmem_size = buildTileListsBBKernel_shmem_sizePerThread(maxTileListLen)*nthread;
01063 
01064     // NOTE: In the first call numJtiles = 1. buildTileListsBBKernel will return and
01065     //       tell the required size in h_tileListStat->numJtiles. In subsequent calls,
01066     //       re-allocation only happens when the size is exceeded.
01067     h_tileListStat->tilesSizeExceeded = true;
01068     int reallocCount = 0;
01069     while (h_tileListStat->tilesSizeExceeded) {
01070       reallocate_device<int>(&tileJatomStart1, &tileJatomStart1Size, numJtiles, OVERALLOC);
01071 
01072       clearTileListStat(stream);
01073       // clear_device_array<TileListStat>(d_tileListStat, 1, stream);
01074 
01075       buildTileListsBBKernel <<< nblock, nthread, shmem_size, stream >>>
01076       (numTileListsPrev, tileLists1, cudaPatches, tilePos,
01077         lata, latb, latc, plcutoff2, maxTileListLen,
01078         boundingBoxes, tileJatomStart1, tileJatomStart1Size,
01079         tileListDepth1, tileListOrder1, patchPairs1,
01080         d_tileListStat);
01081 
01082       cudaCheck(cudaGetLastError());
01083 
01084       // get (numATileLists, numJtiles, tilesSizeExceeded)
01085       copy_DtoH<TileListStat>(d_tileListStat, h_tileListStat, 1, stream);
01086       cudaCheck(cudaStreamSynchronize(stream));
01087       numJtiles = h_tileListStat->numJtiles;
01088 
01089       if (h_tileListStat->tilesSizeExceeded) {
01090         reallocCount++;
01091         if (reallocCount > 1) {
01092           NAMD_die("CudaTileListKernel::buildTileLists, multiple reallocations detected");
01093         }
01094       }
01095 
01096     }
01097 
01098     numTileLists = h_tileListStat->numTileLists;
01099 
01100     reallocate_device<int>(&jtiles, &jtilesSize, numJtiles, OVERALLOC);
01101   }
01102 
01103   // Re-allocate tileListVirialEnergy.
01104   // NOTE: Since numTileLists here is an upper estimate (since it's based on bounding boxes),
01105   //       we're quaranteed to have enough space
01106   reallocate_device<TileListVirialEnergy>(&tileListVirialEnergy, &tileListVirialEnergySize, numTileLists, OVERALLOC);
01107 
01108   reallocate_device<TileList>(&tileLists2, &tileLists2Size, numTileLists, OVERALLOC);
01109   reallocate_device<PatchPairRecord>(&patchPairs2, &patchPairs2Size, numTileLists, OVERALLOC);
01110   reallocate_device<int>(&tileJatomStart2, &tileJatomStart2Size, numJtiles, OVERALLOC);
01111   reallocate_device<TileExcl>(&tileExcls1, &tileExcls1Size, numJtiles, OVERALLOC);
01112   reallocate_device<TileExcl>(&tileExcls2, &tileExcls2Size, numJtiles, OVERALLOC);
01113 
01114   int numTileListsSrc = numTileListsPrev;
01115   int numJtilesSrc    = numJtiles;
01116   int numTileListsDst = numTileLists;
01117   int numJtilesDst    = numJtiles;
01118 
01119   // Sort tiles
01120   sortTileLists(
01121     false,
01122     0, false,
01123     numTileListsSrc, numJtilesSrc,
01124     PtrSize<TileList>(tileLists1, tileLists1Size), PtrSize<int>(tileJatomStart1, tileJatomStart1Size),
01125     PtrSize<unsigned int>(tileListDepth1, tileListDepth1Size), PtrSize<int>(tileListOrder1, tileListOrder1Size),
01126     PtrSize<PatchPairRecord>(patchPairs1, patchPairs1Size), PtrSize<TileExcl>(NULL, 0),
01127     numTileListsDst, numJtilesDst,
01128     PtrSize<TileList>(tileLists2, tileLists2Size), PtrSize<int>(tileJatomStart2, tileJatomStart2Size),
01129     PtrSize<unsigned int>(tileListDepth2, tileListDepth2Size), PtrSize<int>(tileListOrder2, tileListOrder2Size),
01130     PtrSize<PatchPairRecord>(patchPairs2, patchPairs2Size), PtrSize<TileExcl>(NULL, 0),
01131     stream);
01132 
01133   // Set active buffer to 2
01134   setActiveBuffer(2);
01135 
01136   if (doOutputOrder) reallocate_device<int>(&outputOrder, &outputOrderSize, numTileLists, OVERALLOC);
01137 }

void CudaTileListKernel::clearTileListStat ( cudaStream_t  stream  ) 

Definition at line 852 of file CudaTileListKernel.cu.

References getNumEmptyPatches(), and TileListStat::patchReadyQueueCount.

Referenced by buildTileLists(), and CudaComputeNonbondedKernel::nonbondedForce().

00852                                                               {
00853   // clear tileListStat, for patchReadyQueueCount, which is set equal to the number of empty patches
00854   memset(h_tileListStat, 0, sizeof(TileListStat));
00855   h_tileListStat->patchReadyQueueCount = getNumEmptyPatches();
00856   copy_HtoD<TileListStat>(h_tileListStat, d_tileListStat, 1, stream);
00857 }

void CudaTileListKernel::finishTileList ( cudaStream_t  stream  ) 

Definition at line 859 of file CudaTileListKernel.cu.

References cudaCheck.

00859                                                            {
00860   copy_DtoH<TileListStat>(d_tileListStat, h_tileListStat, 1, stream);
00861   cudaCheck(cudaEventRecord(tileListStatEvent, stream));
00862   tileListStatEventRecord = true;
00863 }

float CudaTileListKernel::get_plcutoff2 (  )  [inline]

Definition at line 277 of file CudaTileListKernel.h.

00277 {return plcutoff2;}

float4* CudaTileListKernel::get_xyzq (  )  [inline]

Definition at line 283 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbondedKernel::nonbondedForce(), and CudaComputeNonbondedKernel::reduceVirialEnergy().

00283 {return xyzq;}

BoundingBox* CudaTileListKernel::getBoundingBoxes (  )  [inline]

Definition at line 281 of file CudaTileListKernel.h.

00281 {return boundingBoxes;}

CudaPatchRecord* CudaTileListKernel::getCudaPatches (  )  [inline]

Definition at line 302 of file CudaTileListKernel.h.

00302 {return cudaPatches;}

int* CudaTileListKernel::getEmptyPatches (  )  [inline]

Definition at line 273 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbonded::launchWork().

00273 {return h_emptyPatches;}

int* CudaTileListKernel::getJtiles (  )  [inline]

Definition at line 282 of file CudaTileListKernel.h.

00282 {return jtiles;}

int CudaTileListKernel::getNumComputes (  )  [inline]

Definition at line 325 of file CudaTileListKernel.h.

00325 {return numComputes;}

int CudaTileListKernel::getNumEmptyPatches (  )  [inline]

Definition at line 272 of file CudaTileListKernel.h.

Referenced by clearTileListStat(), and CudaComputeNonbonded::launchWork().

00272 {return numEmptyPatches;}

int CudaTileListKernel::getNumExcluded (  )  [inline]

Definition at line 275 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbonded::finishReductions().

00275 {return numExcluded;}

int CudaTileListKernel::getNumJtiles (  )  [inline]

Definition at line 280 of file CudaTileListKernel.h.

00280 {return numJtiles;}

int CudaTileListKernel::getNumPatches (  )  [inline]

Definition at line 323 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbonded::launchWork(), and CudaComputeNonbondedKernel::nonbondedForce().

00323 {return numPatches;}

int CudaTileListKernel::getNumTileLists (  )  [inline]

Definition at line 278 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbondedKernel::nonbondedForce().

00278 {return numTileLists;}

int CudaTileListKernel::getNumTileListsGBIS (  )  [inline]

Definition at line 279 of file CudaTileListKernel.h.

00279 {return numTileListsGBIS;}

int* CudaTileListKernel::getOutputOrder (  )  [inline]

Definition at line 326 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbondedKernel::nonbondedForce().

00326                         {
00327     if (!doStreaming) return NULL;
00328     if (doOutputOrder) {
00329       return outputOrder;
00330     } else {
00331       return NULL;
00332     }
00333   }

PatchPairRecord* CudaTileListKernel::getPatchPairs (  )  [inline]

Definition at line 295 of file CudaTileListKernel.h.

00295 {return ((activeBuffer == 1) ? patchPairs1 : patchPairs2);}

TileExcl* CudaTileListKernel::getTileExcls (  )  [inline]

Definition at line 294 of file CudaTileListKernel.h.

00294 {return ((activeBuffer == 1) ? tileExcls1 : tileExcls2);}

int* CudaTileListKernel::getTileJatomStart (  )  [inline]

Definition at line 288 of file CudaTileListKernel.h.

00288 {return ((activeBuffer == 1) ? tileJatomStart1 : tileJatomStart2);}

int* CudaTileListKernel::getTileJatomStartGBIS (  )  [inline]

Definition at line 297 of file CudaTileListKernel.h.

00297 {return tileJatomStartGBIS;}

unsigned int* CudaTileListKernel::getTileListDepth (  )  [inline]

Definition at line 292 of file CudaTileListKernel.h.

00292 {return ((activeBuffer == 1) ? tileListDepth1 : tileListDepth2);}

int* CudaTileListKernel::getTileListOrder (  )  [inline]

Definition at line 293 of file CudaTileListKernel.h.

00293 {return ((activeBuffer == 1) ? tileListOrder1 : tileListOrder2);}

TileList* CudaTileListKernel::getTileLists (  )  [inline]

Definition at line 289 of file CudaTileListKernel.h.

00289                            {
00290     return ((activeBuffer == 1) ? tileLists1 : tileLists2);
00291   }

TileList* CudaTileListKernel::getTileListsGBIS (  )  [inline]

Definition at line 298 of file CudaTileListKernel.h.

00298 {return tileListsGBIS;}

TileListStat* CudaTileListKernel::getTileListStatDevPtr (  )  [inline]

Definition at line 285 of file CudaTileListKernel.h.

00285 {return d_tileListStat;}

TileListVirialEnergy* CudaTileListKernel::getTileListVirialEnergy (  )  [inline]

Definition at line 300 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbondedKernel::reduceVirialEnergy().

00300 {return tileListVirialEnergy;}

int CudaTileListKernel::getTileListVirialEnergyGBISLength (  )  [inline]

Definition at line 321 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbondedKernel::reduceVirialEnergy().

00321 {return tileListVirialEnergyGBISLength;}

int CudaTileListKernel::getTileListVirialEnergyLength (  )  [inline]

Definition at line 320 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbondedKernel::reduceVirialEnergy().

00320 {return tileListVirialEnergyLength;}

void CudaTileListKernel::prepareTileList ( cudaStream_t  stream  ) 

Definition at line 848 of file CudaTileListKernel.cu.

00848                                                             {
00849   clear_device_array<int>(jtiles, numJtiles, stream);
00850 }

void CudaTileListKernel::reSortTileLists ( const bool  doGBIS,
cudaStream_t  stream 
)

Definition at line 1550 of file CudaTileListKernel.cu.

References cudaCheck, NAMD_die(), TileListStat::numExcluded, TileListStat::numTileLists, TileListStat::numTileListsGBIS, and OVERALLOC.

01550                                                                                {
01551   // Store previous number of active lists
01552   int numTileListsPrev = numTileLists;
01553 
01554   // Wait for finishTileList() to stop copying
01555   if (!tileListStatEventRecord)
01556     NAMD_die("CudaTileListKernel::reSortTileLists, tileListStatEvent not recorded");
01557   cudaCheck(cudaEventSynchronize(tileListStatEvent));
01558 
01559   // Get numTileLists, numTileListsGBIS, and numExcluded
01560   {
01561     numTileLists     = h_tileListStat->numTileLists;
01562     numTileListsGBIS = h_tileListStat->numTileListsGBIS;
01563     numExcluded      = h_tileListStat->numExcluded;
01564   }
01565 
01566   // Sort {tileLists2, tileJatomStart2, tileExcl2} => {tileLists1, tileJatomStart1, tileExcl1}
01567   // VdW tile list in {tileLists1, tileJatomStart1, tileExcl1}
01568   sortTileLists(true, 0, true,
01569     numTileListsPrev, numJtiles,
01570     PtrSize<TileList>(tileLists2, tileLists2Size), PtrSize<int>(tileJatomStart2, tileJatomStart2Size),
01571     PtrSize<unsigned int>(tileListDepth2, tileListDepth2Size), PtrSize<int>(tileListOrder2, tileListOrder2Size),
01572     PtrSize<PatchPairRecord>(NULL, 0), PtrSize<TileExcl>(tileExcls2, tileExcls2Size),
01573     numTileLists, numJtiles,
01574     PtrSize<TileList>(tileLists1, tileLists1Size), PtrSize<int>(tileJatomStart1, tileJatomStart1Size),
01575     PtrSize<unsigned int>(tileListDepth1, tileListDepth1Size), PtrSize<int>(tileListOrder1, tileListOrder1Size),
01576     PtrSize<PatchPairRecord>(NULL, 0), PtrSize<TileExcl>(tileExcls1, tileExcls1Size),
01577     stream);
01578 
01579   // fprintf(stderr, "reSortTileLists, writing tile lists to disk...\n");
01580   // writeTileList("tileList.txt", numTileLists, tileLists1, stream);
01581   // writeTileJatomStart("tileJatomStart.txt", numJtiles, tileJatomStart1, stream);
01582 
01583   // markJtileOverlap(4, numTileLists, tileLists1, numJtiles, tileJatomStart1, stream);
01584 
01585   // NOTE:
01586   // Only {tileList1, tileJatomStart1, tileExcl1} are used from here on,
01587   // the rest {tileListDepth1, tileListOrder1, patchPairs1} may be re-used by the GBIS sorting
01588 
01589   if (doGBIS) {
01590     // GBIS is used => produce a second tile list
01591     // GBIS tile list in {tileListGBIS, tileJatomStartGBIS, patchPairs1}
01592     reallocate_device<TileList>(&tileListsGBIS, &tileListsGBISSize, numTileListsGBIS, OVERALLOC);
01593     reallocate_device<int>(&tileJatomStartGBIS, &tileJatomStartGBISSize, numJtiles, OVERALLOC);
01594 
01595     sortTileLists(true, 16, true,
01596       numTileListsPrev, numJtiles,
01597       PtrSize<TileList>(tileLists2, tileLists2Size), PtrSize<int>(tileJatomStart2, tileJatomStart2Size),
01598       PtrSize<unsigned int>(tileListDepth2, tileListDepth2Size), PtrSize<int>(tileListOrder2, tileListOrder2Size),
01599       PtrSize<PatchPairRecord>(patchPairs2, patchPairs2Size), PtrSize<TileExcl>(NULL, 0),
01600       numTileListsGBIS, numJtiles,
01601       PtrSize<TileList>(tileListsGBIS, tileListsGBISSize), PtrSize<int>(tileJatomStartGBIS, tileJatomStartGBISSize),
01602       PtrSize<unsigned int>(tileListDepth1, tileListDepth1Size), PtrSize<int>(tileListOrder1, tileListOrder1Size),
01603       PtrSize<PatchPairRecord>(patchPairs1, patchPairs1Size), PtrSize<TileExcl>(NULL, 0),
01604       stream);
01605   }
01606 
01607   // Set active buffer to be 1
01608   setActiveBuffer(1);
01609 
01610 }

void CudaTileListKernel::setTileListVirialEnergyGBISLength ( int  len  ) 

Definition at line 1648 of file CudaTileListKernel.cu.

References NAMD_die().

01648                                                                   {
01649   if (len > tileListVirialEnergySize) {
01650     NAMD_die("CudaTileListKernel::setTileListVirialEnergyGBISLength, size overflow");
01651   }
01652   tileListVirialEnergyGBISLength = len;
01653 }

void CudaTileListKernel::setTileListVirialEnergyLength ( int  len  ) 

Definition at line 1641 of file CudaTileListKernel.cu.

References NAMD_die().

Referenced by CudaComputeNonbondedKernel::nonbondedForce().

01641                                                               {
01642   if (len > tileListVirialEnergySize) {
01643     NAMD_die("CudaTileListKernel::setTileListVirialEnergyLength, size overflow");
01644   }
01645   tileListVirialEnergyLength = len;
01646 }

void CudaTileListKernel::updateComputes ( const int  numComputesIn,
const CudaComputeRecord h_cudaComputes,
cudaStream_t  stream 
)

Definition at line 865 of file CudaTileListKernel.cu.

00866                                                                 {
00867 
00868   numComputes = numComputesIn;
00869 
00870   reallocate_device<CudaComputeRecord>(&cudaComputes, &cudaComputesSize, numComputes);
00871   copy_HtoD<CudaComputeRecord>(h_cudaComputes, cudaComputes, numComputes, stream);
00872 
00873   if (doStreaming) doOutputOrder = true;
00874 }


The documentation for this class was generated from the following files:
Generated on Sat Sep 23 01:17:19 2017 for NAMD by  doxygen 1.4.7