CudaComputeNonbonded.h

Go to the documentation of this file.
00001 #ifndef CUDACOMPUTENONBONDED_H
00002 #define CUDACOMPUTENONBONDED_H
00003 #include <vector>
00004 #include "Compute.h"
00005 #include "Box.h"
00006 #include "PatchTypes.h"
00007 #include "CudaUtils.h"
00008 #include "ComputeNonbondedUtil.h"
00009 #include "CudaNonbondedTables.h"
00010 #include "CudaTileListKernel.h"
00011 #include "CudaComputeNonbondedKernel.h"
00012 #include "CudaComputeGBISKernel.h"
00013 #include "ComputeMgr.h"
00014 #ifdef NAMD_CUDA
00015 #include <cuda.h>
00016 
00017 // 2^11 ints * 2^5 bits = 2^16 bits = range of unsigned short excl_index
00018 // 2^27 ints * 2^5 bits = 2^32 bits = range of unsigned int excl_index
00019 #define MAX_EXCLUSIONS (1<<27)
00020 
00021 class CudaComputeNonbonded : public Compute, public ComputeNonbondedUtil {
00022 public:
00023   struct ComputeRecord {
00024     ComputeID cid;
00025     PatchID pid[2];
00026     // Index to patches[] -array
00027     int patchInd[2];
00028     Vector offset;
00029   };
00030 
00031   struct PatchRecord {
00032     PatchRecord(PatchID patchID) : patchID(patchID) {
00033       patch = NULL;
00034       compAtom = NULL;
00035       results = NULL;
00036       positionBox = NULL;
00037       forceBox = NULL;
00038       intRadBox = NULL;
00039       psiSumBox = NULL;
00040       bornRadBox = NULL;
00041       dEdaSumBox = NULL;
00042       dHdrPrefixBox = NULL;
00043     }
00044     PatchID patchID;
00045     Patch *patch;
00046     int numAtoms;
00047     int numFreeAtoms;
00048     int atomStart;
00049     // Pe where the patch was registered
00050     int pe;
00051     // For priority sorting
00052     int reversePriorityRankInPe;
00053     bool isSamePhysicalNode;
00054     bool isSameNode;
00055     // Storage for open positionBox
00056     CompAtom *compAtom;
00057     // Storage for open forceBox
00058     Results *results;
00059     // Boxes
00060     Box<Patch,CompAtom> *positionBox;
00061     Box<Patch,Results> *forceBox;
00062     Box<Patch,Real>   *intRadBox; //5 GBIS Boxes
00063     Box<Patch,GBReal> *psiSumBox;
00064     Box<Patch,Real>   *bornRadBox;
00065     Box<Patch,GBReal> *dEdaSumBox;
00066     Box<Patch,Real>   *dHdrPrefixBox;
00067     Real   *intRad; //5 GBIS arrays
00068     GBReal *psiSum;
00069     Real   *bornRad;
00070     GBReal *dEdaSum;
00071     Real   *dHdrPrefix;
00072     bool operator < (const PatchRecord& pr) const {
00073       return (patchID < pr.patchID);
00074     }
00075     bool operator == (const PatchRecord& pr) const {
00076       return (patchID == pr.patchID);
00077     }
00078   };
00079 
00080 private:
00081   // This variable is set in atomUpdate() by any Pe
00082   bool atomsChangedIn;
00083   // This variable is set in doWork() by masterPe
00084   bool atomsChanged;
00085 
00086   bool computesChanged;
00087 
00088   const int deviceID;
00089   cudaStream_t stream;
00090 
00091   // PME and VdW CUDA kernels
00092   CudaComputeNonbondedKernel nonbondedKernel;
00093 
00094   // GBIS kernel
00095   CudaComputeGBISKernel GBISKernel;
00096 
00097   // Tile list CUDA kernels
00098   CudaTileListKernel tileListKernel;
00099 
00100   // Exclusions
00101   int2 *exclusionsByAtom;
00102 
00103   // VdW-types
00104   // Pinned host memory
00105   int* vdwTypes;
00106   int vdwTypesSize;
00107 
00108   // Maximum number of tiles per tile list
00109   int maxTileListLen;
00110 
00111   // Pinned host memory
00112   int2* exclIndexMaxDiff;
00113   int exclIndexMaxDiffSize;
00114 
00115   // Pinned host memory
00116   int* atomIndex;
00117   int atomIndexSize;
00118 
00119   // Required (xyzq, vdwTypes) storage
00120         int atomStorageSize;
00121 
00122   // Atom and charge storage
00123   // Pinned host memory
00124   CudaAtom* atoms;
00125   int atomsSize;
00126 
00127   // Force storage
00128   float4* h_forces;
00129   int h_forcesSize;
00130   float4* h_forcesSlow;
00131   int h_forcesSlowSize;
00132 
00133   float4* d_forces;
00134   int d_forcesSize;
00135   float4* d_forcesSlow;
00136   int d_forcesSlowSize;
00137 
00138   // Virial and energy storage
00139   VirialEnergy* h_virialEnergy;
00140   VirialEnergy* d_virialEnergy;
00141 
00142   // GBIS storage
00143   //--------------
00144   // Pinned host memory
00145   float* intRad0H;
00146   int intRad0HSize;
00147   // Pinned host memory
00148   float* intRadSH;
00149   int intRadSHSize;
00150   // Mapped host memory
00151   GBReal* psiSumH;
00152   int psiSumHSize;
00153   // Pinned host memory
00154   float* bornRadH;
00155   int bornRadHSize;
00156   // Mapped host memory
00157   GBReal* dEdaSumH;
00158   int dEdaSumHSize;
00159   // Pinned host memory
00160   float* dHdrPrefixH;
00161   int dHdrPrefixHSize;
00162 
00163   // Event and sanity check flag for making sure event was actually recorded
00164   cudaEvent_t forceDoneEvent;
00165   bool forceDoneEventRecord;
00166   // Check counter for event polling
00167   int checkCount;
00168 
00169   // Node lock
00170   CmiNodeLock lock;
00171   // List of local PEs that have patches
00172   std::vector<int> pes;
00173   // List of patch indices on each rank
00174   std::vector< std::vector<int> > rankPatches;
00175   // Master Pe = Pe where this Compute and reduction lives
00176   int masterPe;
00177 
00178   // Are we in skip?
00179   bool doSkip;
00180 
00181   // Device-wide patch and compute records, and the list of patches
00182   std::vector<ComputeRecord> computes;
00183   std::vector<PatchRecord> patches;
00184 
00185   // CUDA versions of patches
00186   // Pinned host memory
00187   CudaPatchRecord* cudaPatches;
00188 
00189   SubmitReduction *reduction;
00190 
00191   // Pair lists
00192   int pairlistsValid;
00193   float pairlistTolerance;
00194   int usePairlists;
00195   int savePairlists;
00196   float plcutoff2;
00197 
00198   bool reSortDone;
00199 
00200   // Flags
00201   bool doSlow;
00202   bool doEnergy;
00203   bool doVirial;
00204 
00205   // Walltime for force compute start
00206   double beforeForceCompute;
00207 
00208   static inline void updateVdwTypesExclLoop(int first, int last, void *result, int paraNum, void *param);
00209   void updateVdwTypesExclSubset(int first, int last);
00210 
00211   static inline void copyAtomsLoop(int first, int last, void *result, int paraNum, void *param);
00212   void copyAtomsSubset(int first, int last);
00213 
00214   void addPatch(PatchID pid);
00215   void addCompute(ComputeID cid, PatchID pid1, PatchID pid2, Vector offset);
00216   void updatePatches();
00217   int calcNumTileLists();
00218   void getMaxMovementTolerance(float& maxAtomMovement, float& maxPatchTolerance);
00219   void updateVdwTypesExcl();
00220   void buildNeighborlist();
00221   void skip();
00222   void doGBISphase1();
00223   void doGBISphase2();
00224   void doGBISphase3();
00225   void doForce();
00226   void finishSetOfPatchesOnPe(std::vector<int>& patchSet);
00227   void finishPatches();
00228   void finishGBISPhase(int i);
00229   void finishTimers();
00230   void reSortTileLists();
00231   void forceDone();
00232   static void forceDoneCheck(void *arg, double walltime);
00233   void forceDoneSetCallback();
00234   void updateComputes();
00235   void buildExclusions();
00236   void skipPatch(int i);
00237   void openBox(int i);
00238   void reallocateArrays();
00239   void copyGBISphase(int i);
00240   void updatePatch(int i);
00241   int findPid(PatchID pid);
00242   void assignPatch(int i);
00243   ComputeMgr* computeMgr;
00244   int patchesCounter;
00245 
00246   const bool doStreaming;
00247   int* patchReadyQueue;
00248   int patchReadyQueueNext, patchReadyQueueLen;
00249 
00250   void finishPatch(int i);
00251   void unregisterBox(int i);
00252 
00253   // void writeId(const char* filename);
00254   // void writeXYZ(const char* filename);
00255 
00256 public:
00257   CudaComputeNonbonded(ComputeID c, int deviceID, CudaNonbondedTables& cudaNonbondedTables, bool doStreaming);
00258   ~CudaComputeNonbonded();
00259   void registerComputeSelf(ComputeID cid, PatchID pid);
00260   void registerComputePair(ComputeID cid, PatchID* pid, int* trans);
00261   void assignPatches(ComputeMgr* computeMgrIn);
00262   virtual void initialize();
00263   virtual void atomUpdate();
00264   virtual int noWork();
00265   virtual void doWork();
00266   void launchWork();
00267   void finishReductions();
00268   void unregisterBoxesOnPe();
00269   void assignPatchesOnPe();
00270   void openBoxesOnPe();
00271   void skipPatchesOnPe();
00272   void finishPatchesOnPe();
00273   void finishPatchOnPe(int i);
00274   void messageEnqueueWork();
00275   virtual void patchReady(PatchID, int doneMigration, int seq);
00276   virtual void gbisP2PatchReady(PatchID, int seq);
00277   virtual void gbisP3PatchReady(PatchID, int seq);
00278 };
00279 
00280 #endif // NAMD_CUDA
00281 #endif // CUDACOMPUTENONBONDED_H

Generated on Thu Nov 23 01:17:12 2017 for NAMD by  doxygen 1.4.7