ComputeBondedCUDA.h

Go to the documentation of this file.
00001 #ifndef COMPUTEBONDEDCUDA_H
00002 #define COMPUTEBONDEDCUDA_H
00003 #include "Compute.h"
00004 #include "ComputeMap.h"
00005 #include "CudaNonbondedTables.h"
00006 #include "ComputeBondedCUDAKernel.h"
00007 #include "ComputeHomeTuples.h"
00008 #ifdef NAMD_CUDA
00009 #ifdef BONDED_CUDA
00010 
00011 #include <vector>
00012 #include <array>
00013 
00014 class ComputeBondedCUDA : public Compute {
00015 
00016 public:
00017 
00018   static const int CudaTupleTypeSize[Tuples::NUM_TUPLE_TYPES];
00019 
00020 private:
00021   bool initializeCalled;
00022 
00023   // Device ID and stream
00024   const int deviceID;
00025   cudaStream_t stream;
00026 
00027   // Master PE for this compute
00028   const int masterPe;
00029 
00030   // List of all patch IDs on this object
00031   std::vector<int> allPatchIDs;
00032 
00033   // List of tuple patches for the entire compute (i.e. across all PEs)
00034   TuplePatchList tuplePatchList;
00035 
00036   // For every PE, list of patches that it has registered
00037   std::vector< std::vector<int> > patchIDsPerRank;
00038 
00039   // List of PEs involved in the computation
00040   std::vector<int> pes;
00041 
00042   // Self compute
00043   struct SelfCompute {
00044     int type;
00045     std::vector<int> patchIDs;
00046     Tuples* tuples;
00047     SelfCompute(int type=-1) : type(type), tuples(NULL) {}
00048     int operator==(const SelfCompute &elem) const {
00049       return (elem.type == type);
00050     }
00051   };
00052 
00053   // Home compute, each PE has one
00054   struct HomeCompute {
00055     std::vector<char> isBasePatch;
00056     std::vector<int> patchIDs;
00057     // Multiple tuples per PE, each of different kind
00058     std::vector< Tuples* > tuples;
00059   };
00060 
00061   // Computes for each PE
00062   struct ComputeRecord {
00063     HomeCompute homeCompute;
00064     // Self computes, organized by type
00065     std::vector< SelfCompute > selfComputes;
00066   };
00067 
00068   // Collection of all computes for each PE
00069   std::vector< ComputeRecord > computes;
00070 
00071   // For every tuple type, list of tuples
00072   // NOTE: These are pointers to the data recorded in "computes" and
00073   //       are here to make it easier to traverse across all tuples of certain kind
00074   std::array< std::list<Tuples*>, Tuples::NUM_TUPLE_TYPES > tupleList;
00075 
00076   int numTuplesPerType[Tuples::NUM_TUPLE_TYPES];
00077 
00078   AtomMap atomMap;
00079   std::vector< AtomMapper* > atomMappers;
00080 
00081   struct PatchRecord {
00082     int atomStart;
00083     int numAtoms;
00084   };
00085   std::vector<PatchRecord> patches;
00086 
00087   // Patch "patchID" is found in patches[patchIndex[patchID]]
00088   std::vector<int> patchIndex;
00089 
00090   // Maps multiplicit indices
00091   std::vector<int> dihedralMultMap;
00092   std::vector<int> improperMultMap;
00093 
00094   // Number of exclusions per rank, separated into modified and non-modified
00095   struct NumExcl {
00096     int numModifiedExclusions;
00097     int numExclusions;
00098   };
00099   std::vector<NumExcl> numExclPerRank;
00100 
00101   // Flags that indicate wether this GPU has exclusions and modified exclusions
00102   bool hasExclusions;
00103   bool hasModifiedExclusions;
00104 
00105   // All tuple data
00106   char* tupleData;
00107   int tupleDataSize;
00108 
00109   // Bonded CUDA kernel
00110   ComputeBondedCUDAKernel bondedKernel;
00111 
00112   // Pointer to computeMgr that created this object
00113   ComputeMgr* computeMgr;
00114 
00115   // Node-wide counter for patches.
00116   int patchesCounter;
00117 
00118   // "Force done event" for event polling
00119   cudaEvent_t forceDoneEvent;
00120 
00121   // Check counter for event polling
00122   int checkCount;
00123 
00124   // Node lock
00125   CmiNodeLock lock;
00126 
00127   // This variable is set in atomUpdate() by any Pe
00128   bool atomsChangedIn;
00129   // This variable is set in doWork() by masterPe
00130   bool atomsChanged;
00131 
00132   // Reduction
00133   SubmitReduction *reduction;
00134 
00135   // Required storage
00136   int atomStorageSize;
00137 
00138   // Flags pointer
00139   Flags* flags;
00140 
00141   // Lattice and energy and virial booleans
00142   Lattice lattice;
00143   bool doEnergy;
00144   bool doVirial;
00145   bool doSlow;
00146   bool doMolly;
00147 
00148   // Walltime for force compute start
00149   double beforeForceCompute;
00150 
00151   bool accelMDdoDihe;
00152 
00153   // Atom storage in pinned host memory
00154   CudaAtom* atoms;
00155   int atomsSize;
00156 
00157   // Force storage in pinned host memory
00158   FORCE_TYPE* forces;
00159   int forcesSize;
00160 
00161   double* energies_virials;
00162 
00163   void mapAtoms();
00164   void unmapAtoms();
00165 
00166   void updatePatches();
00167 
00168   static void forceDoneCheck(void *arg, double walltime);
00169   void forceDoneSetCallback();
00170 
00171   void finishPatches();
00172 
00173   // ------------ For copyTupleData -------------------
00174   struct TupleCopyWork {
00175     int tupletype;
00176     int ntuples;
00177     void* tupleElemList;
00178     int tupleDataPos;
00179   };
00180 
00181   std::vector<TupleCopyWork> tupleCopyWorkList;
00182 
00183   int exclusionStartPos;
00184   int exclusionStartPos2;
00185 
00186   void copyBondData(const int ntuples, const BondElem* __restrict__ src,
00187     const BondValue* __restrict__ bond_array, CudaBond* __restrict__ dst);
00188 
00189   void copyAngleData(const int ntuples, const AngleElem* __restrict__ src,
00190     const AngleValue* __restrict__ angle_array, CudaAngle* __restrict__ dst);
00191 
00192   template <bool doDihedral, typename T, typename P>
00193   void copyDihedralData(const int ntuples, const T* __restrict__ src,
00194     const P* __restrict__ p_array, CudaDihedral* __restrict__ dst);
00195 
00196   void copyExclusionData(const int ntuples, const ExclElem* __restrict__ src, const int typeSize,
00197     CudaExclusion* __restrict__ dst1, CudaExclusion* __restrict__ dst2, int& pos, int& pos2);
00198 
00199   void copyCrosstermData(const int ntuples, const CrosstermElem* __restrict__ src,
00200     const CrosstermValue* __restrict__ crossterm_array, CudaCrossterm* __restrict__ dst);
00201 
00202   static void tupleCopyWorker(int first, int last, void *result, int paraNum, void *param);
00203   void tupleCopyWorker(int first, int last);
00204   // --------------------------------------------------
00205 
00206 public:
00207 
00208   ComputeBondedCUDA(ComputeID c, ComputeMgr* computeMgr, int deviceID, CudaNonbondedTables& cudaNonbondedTables);
00209   ~ComputeBondedCUDA();
00210   void registerCompute(int pe, int type, PatchIDList& pids);
00211   void registerSelfCompute(int pe, int type, int pid);
00212   void unregisterBoxesOnPe();
00213   void assignPatchesOnPe();
00214   virtual void patchReady(PatchID, int doneMigration, int seq);
00215   virtual void initialize();
00216   virtual void atomUpdate();
00217   virtual int noWork();
00218   virtual void doWork();
00219   void messageEnqueueWork();
00220   // void updatePatches();
00221   void openBoxesOnPe();
00222   void loadTuplesOnPe();
00223   void copyTupleData();
00224   void launchWork();
00225 
00226   void finishPatchesOnPe();
00227   void finishReductions();
00228   
00229 };
00230 
00231 #endif // BONDED_CUDA
00232 #endif // NAMD_CUDA
00233 #endif // COMPUTEBONDEDCUDA_H

Generated on Sat Sep 23 01:17:11 2017 for NAMD by  doxygen 1.4.7