CudaTileListKernel.h

Go to the documentation of this file.
00001 #ifndef CUDATILELISTKERNEL_H
00002 #define CUDATILELISTKERNEL_H
00003 #ifdef NAMD_CUDA
00004 
00005 // Exclusion mask: bit 1 = atom pair is included, 0 = atom pair is excluded
00006 struct TileExcl {
00007   unsigned int excl[32];
00008 };
00009 
00010 struct TileList {
00011   int iatomStart;
00012   int jtileStart;
00013   int jtileEnd;
00014   float3 offsetXYZ;
00015   int2 patchInd;        // Patch indices for this list
00016   union {  
00017     int2 patchNumList;    // Number of lists contributing to each patch
00018     // int icompute;
00019   };
00020   int icompute;
00021 };
00022 
00023 struct PatchPairRecord {
00024   int iatomSize;
00025   int iatomFreeSize;
00026   int jatomSize;
00027   int jatomFreeSize;
00028 };
00029 
00030 //
00031 // Bounding box structure
00032 //
00033 struct BoundingBox {
00034   float x, y, z;      // Center
00035   float wx, wy, wz;   // Half-width
00036 };
00037 
00038 //
00039 // Stripped-down CUDA version of compute record
00040 //
00041 struct CudaComputeRecord {
00042   int2 patchInd;
00043   float3 offsetXYZ;
00044 };
00045 
00046 //
00047 // Stripped-down CUDA version of patch record
00048 //
00049 struct CudaPatchRecord {
00050   int numAtoms;
00051   int numFreeAtoms;
00052   int atomStart;
00053 };
00054 
00055 //
00056 // Tile list status. Used to communicate tile list sizes between GPU and CPU
00057 //
00058 struct TileListStat {
00059   int numTileLists;
00060   int numTileListsGBIS;
00061   int numJtiles;
00062   int numExcluded;
00063   int patchReadyQueueCount;
00064   int outputOrderIndex;
00065   bool tilesSizeExceeded;
00066 };
00067 
00068 struct TileListVirialEnergy {
00069   float shx, shy, shz;
00070   float forcex, forcey, forcez;
00071   float forceSlowx, forceSlowy, forceSlowz;
00072   double energyVdw;
00073   double energyElec;
00074   double energySlow;
00075   double energyGBIS;
00076 };
00077 
00078 struct VirialEnergy {
00079   double virial[9];
00080   double virialSlow[9];
00081   double energyVdw;
00082   double energyElec;
00083   double energySlow;
00084   double energyGBIS;
00085 };
00086 
00087 class CudaTileListKernel {
00088 private:
00089 
00090   template <typename T>
00091   struct PtrSize {
00092     PtrSize(T* ptr, int size) : ptr(ptr), size(size) {}
00093     T* ptr;
00094     int size;
00095   };
00096 
00097   const int deviceID;
00098 
00099   // Events
00100   cudaEvent_t tileListStatEvent;
00101   bool tileListStatEventRecord;
00102 
00103   // Pair list cutoff squared
00104   float plcutoff2;
00105 
00106   // Number of patches
00107   int numPatches;
00108 
00109   // Number of computes
00110   int numComputes;
00111 
00112   // Number of tile lists
00113   int numTileLists;
00114 
00115   // Number of tile lists for GBIS
00116   int numTileListsGBIS;
00117 
00118   // Number of tiles
00119   int numJtiles;
00120 
00121   // Maximum number of tiles per tile list
00122   int maxTileListLen;
00123 
00124   CudaPatchRecord* cudaPatches;
00125   int cudaPatchesSize;
00126 
00127   CudaComputeRecord* cudaComputes;
00128   int cudaComputesSize;
00129 
00130   // --- For Streaming ---
00131   const bool doStreaming;
00132   int* patchNumLists;
00133   int patchNumListsSize;
00134 
00135   int* emptyPatches;
00136   int emptyPatchesSize;
00137   int* h_emptyPatches;
00138   int h_emptyPatchesSize;
00139   int numEmptyPatches;
00140 
00141   unsigned int* sortKeySrc;
00142   int sortKeySrcSize;
00143   unsigned int* sortKeyDst;
00144   int sortKeyDstSize;
00145 
00146   int maxTileListLen_sortKeys;
00147   
00148   unsigned int* sortKeys;
00149   int sortKeysSize;
00150 
00151   int2* minmaxListLen;
00152   int minmaxListLenSize;
00153 
00154   int sortKeys_endbit;
00155   // ---------------------
00156 
00157   // Single entry pinned host and device buffers for communicating tile list status
00158   TileListStat* h_tileListStat;
00159   TileListStat* d_tileListStat;
00160 
00161   // Atom coordinates and charge
00162   float4* xyzq;
00163   int xyzqSize;
00164   // Atom coordinate storage size
00165   int atomStorageSize;
00166 
00167   // Tile lists
00168   TileList* tileLists1;
00169   int tileLists1Size;
00170   TileList* tileLists2;
00171   int tileLists2Size;
00172   TileList* tileListsGBIS;
00173   int tileListsGBISSize;
00174 
00175   // Pair pairs
00176   PatchPairRecord* patchPairs1;
00177   int patchPairs1Size;
00178   PatchPairRecord* patchPairs2;
00179   int patchPairs2Size;
00180 
00181   // j-atom start for tiles
00182   int* tileJatomStart1;
00183   int tileJatomStart1Size;
00184   int* tileJatomStart2;
00185   int tileJatomStart2Size;
00186   int* tileJatomStartGBIS;
00187   int tileJatomStartGBISSize;
00188 
00189   // Bounding boxes
00190   BoundingBox* boundingBoxes;
00191   int boundingBoxesSize;
00192 
00193   // Depth of each tile list
00194   unsigned int* tileListDepth1;
00195   int tileListDepth1Size;
00196   unsigned int* tileListDepth2;
00197   int tileListDepth2Size;
00198 
00199   // Tile list order
00200   int* tileListOrder1;
00201   int tileListOrder1Size;
00202   int* tileListOrder2;
00203   int tileListOrder2Size;
00204 
00205   // Position of each tile list = ExclusiveSum(tileListDepths)
00206   int* tileListPos;
00207   int tileListPosSize;
00208 
00209   // jtile occupancy and position
00210   int* jtiles;
00211   int jtilesSize;
00212 
00213   // Temporary buffers used in buildTileLists
00214   int* tilePos;
00215   int tilePosSize;
00216 
00217   // Exclusions
00218   TileExcl* tileExcls1;
00219   int tileExcls1Size;
00220   TileExcl* tileExcls2;
00221   int tileExcls2Size;
00222 
00223   // Temporary storage for CUB
00224   char* tempStorage;
00225   int tempStorageSize;
00226 
00227   // Number of exclusions detected
00228   int numExcluded;
00229 
00230   // Virials and energies for tile lists
00231   TileListVirialEnergy* tileListVirialEnergy;
00232   int tileListVirialEnergySize;
00233 
00234   int tileListVirialEnergyLength;
00235   int tileListVirialEnergyGBISLength;
00236 
00237   int activeBuffer;
00238 
00239   void setActiveBuffer(int activeBufferIn) {activeBuffer = activeBufferIn;}
00240 
00241   void sortTileLists(
00242     const bool useJtiles,
00243     const int begin_bit, const bool highDepthBitsSet,
00244     // Source
00245     const int numTileListsSrc, const int numJtilesSrc,
00246     PtrSize<TileList> tileListsSrc, PtrSize<int> tileJatomStartSrc,
00247     PtrSize<unsigned int> tileListDepthSrc, PtrSize<int> tileListOrderSrc,
00248     PtrSize<PatchPairRecord> patchPairsSrc, PtrSize<TileExcl> tileExclsSrc,
00249     // Destination
00250     const int numTileListsDst, const int numJtilesDst,
00251     PtrSize<TileList> tileListsDst, PtrSize<int> tileJatomStartDst,
00252     PtrSize<unsigned int> tileListDepthDst, PtrSize<int> tileListOrderDst,
00253     PtrSize<PatchPairRecord> patchPairsDst, PtrSize<TileExcl> tileExclsDst,
00254     cudaStream_t stream);
00255 
00256   void writeTileList(const char* filename, const int numTileLists,
00257     const TileList* d_tileLists, cudaStream_t stream);
00258   void writeTileJatomStart(const char* filename, const int numJtiles,
00259     const int* d_tileJatomStart, cudaStream_t stream);
00260   // void markJtileOverlap(const int width, const int numTileLists, TileList* d_tileLists,
00261   //   const int numJtiles, int* d_tileJatomStart, cudaStream_t stream);
00262 
00263   int* outputOrder;
00264   int outputOrderSize;
00265   bool doOutputOrder;
00266 
00267 public:
00268 
00269         CudaTileListKernel(int deviceID, bool doStreaming);
00270         ~CudaTileListKernel();
00271 
00272   int getNumEmptyPatches() {return numEmptyPatches;}
00273   int* getEmptyPatches() {return h_emptyPatches;}
00274 
00275   int getNumExcluded() {return numExcluded;}
00276 
00277   float get_plcutoff2() {return plcutoff2;}
00278   int getNumTileLists() {return numTileLists;}
00279   int getNumTileListsGBIS() {return numTileListsGBIS;}
00280   int getNumJtiles() {return numJtiles;}
00281   BoundingBox* getBoundingBoxes() {return boundingBoxes;}
00282   int* getJtiles() {return jtiles;}
00283         float4* get_xyzq() {return xyzq;}
00284 
00285   TileListStat* getTileListStatDevPtr() {return d_tileListStat;}
00286   void clearTileListStat(cudaStream_t stream);
00287 
00288   int* getTileJatomStart() {return ((activeBuffer == 1) ? tileJatomStart1 : tileJatomStart2);}
00289   TileList* getTileLists() {
00290     return ((activeBuffer == 1) ? tileLists1 : tileLists2);
00291   }
00292   unsigned int* getTileListDepth() {return ((activeBuffer == 1) ? tileListDepth1 : tileListDepth2);}
00293   int* getTileListOrder() {return ((activeBuffer == 1) ? tileListOrder1 : tileListOrder2);}
00294   TileExcl* getTileExcls() {return ((activeBuffer == 1) ? tileExcls1 : tileExcls2);}
00295   PatchPairRecord* getPatchPairs() {return ((activeBuffer == 1) ? patchPairs1 : patchPairs2);}
00296 
00297   int* getTileJatomStartGBIS() {return tileJatomStartGBIS;}
00298   TileList* getTileListsGBIS() {return tileListsGBIS;}
00299 
00300   TileListVirialEnergy* getTileListVirialEnergy() {return tileListVirialEnergy;}
00301 
00302   CudaPatchRecord* getCudaPatches() {return cudaPatches;}
00303 
00304   void prepareTileList(cudaStream_t stream);
00305         void finishTileList(cudaStream_t stream);
00306 
00307   void updateComputes(const int numComputesIn,
00308     const CudaComputeRecord* h_cudaComputes, cudaStream_t stream);
00309 
00310   void buildTileLists(const int numTileListsPrev,
00311     const int numPatchesIn, const int atomStorageSizeIn, const int maxTileListLenIn,
00312     const float3 lata, const float3 latb, const float3 latc,
00313     const CudaPatchRecord* h_cudaPatches, const float4* h_xyzq, const float plcutoff2In, cudaStream_t stream);
00314 
00315   void reSortTileLists(const bool doGBIS, cudaStream_t stream);
00316   // void applyOutputOrder(cudaStream_t stream);
00317 
00318   void setTileListVirialEnergyLength(int len);
00319   void setTileListVirialEnergyGBISLength(int len);
00320   int getTileListVirialEnergyLength() {return tileListVirialEnergyLength;}
00321   int getTileListVirialEnergyGBISLength() {return tileListVirialEnergyGBISLength;}
00322 
00323   int getNumPatches() {return numPatches;}
00324 
00325   int getNumComputes() {return numComputes;}
00326   int* getOutputOrder() {
00327     if (!doStreaming) return NULL;
00328     if (doOutputOrder) {
00329       return outputOrder;
00330     } else {
00331       return NULL;
00332     }
00333   }
00334 
00335 };
00336 #endif // NAMD_CUDA
00337 #endif // CUDATILELISTKERNEL_H

Generated on Mon Nov 20 01:17:12 2017 for NAMD by  doxygen 1.4.7