1 #ifndef COMPUTEBONDEDCUDA_H     2 #define COMPUTEBONDEDCUDA_H    13 #if defined(NAMD_CUDA) || defined(NAMD_HIP)    20 class ComputeBondedCUDA : 
public Compute {
    24   static const int CudaTupleTypeSize[Tuples::NUM_TUPLE_TYPES];
    25   static const int CudaTupleTypeSizeStage[Tuples::NUM_TUPLE_TYPES];
    28   bool initializeCalled;
    33 #ifdef NODEGROUP_FORCE_REGISTER    34   std::atomic<int> tupleWorkIndex;
    41   std::vector<int> allPatchIDs;
    47   std::vector< std::vector<int> > patchIDsPerRank;
    55     std::vector<int> patchIDs;
    57     SelfCompute(
int type=-1) : type(type), tuples(NULL) {}
    58     int operator==(
const SelfCompute &elem)
 const {
    59       return (elem.type == type);
    65     std::vector<char> isBasePatch;
    66     std::vector<int> patchIDs;
    68     std::vector< Tuples* > tuples;
    72   struct ComputeRecord {
    73     HomeCompute homeCompute;
    75     std::vector< SelfCompute > selfComputes;
    79   std::vector< ComputeRecord > computes;
    84   std::array< std::list<Tuples*>, Tuples::NUM_TUPLE_TYPES > tupleList;
    86   int numTuplesPerType[Tuples::NUM_TUPLE_TYPES];
    89   std::vector< AtomMapper* > atomMappers;
    95   std::vector<PatchRecord> patches;
    98   std::vector<int> patchIndex;
   101   std::vector<int> dihedralMultMap;
   102   std::vector<int> improperMultMap;
   106     int numModifiedExclusions;
   109   std::vector<NumExcl> numExclPerRank;
   113   bool hasModifiedExclusions;
   117   size_t tupleDataSize;
   119   std::vector<CudaBondStage> bondTupleData;
   120   std::vector<CudaAngleStage> angleTupleData;
   121   std::vector<CudaDihedralStage> dihedralTupleData;
   122   std::vector<CudaDihedralStage> improperTupleData;
   123   std::vector<CudaExclusionStage> modifiedExclusionTupleData;
   124   std::vector<CudaExclusionStage> exclusionTupleData;
   125   std::vector<CudaCrosstermStage> crosstermTupleData;
   126   std::vector<CudaTholeStage> tholeTupleData;
   127   std::vector<CudaAnisoStage> anisoTupleData;
   128   std::vector<CudaOneFourNbTholeStage> oneFourNbTholeTupleData;
   132 #ifdef NODEGROUP_FORCE_REGISTER   133   MigrationBondedCUDAKernel migrationKernel;
   134 #endif  // NODEGROUP_FORCE_REGISTER   143   double3* h_patchMapCenter;
   144   double3* d_patchMapCenter;
   150   cudaEvent_t forceDoneEvent;
   157   CmiNodeLock printLock;
   185   double beforeForceCompute;
   198   double* energies_virials;
   203   int pswitchTable[3*3];
   208   void updatePatches();
   210   static void forceDoneCheck(
void *arg, 
double walltime);
   211   void forceDoneSetCallback();
   215   struct TupleCopyWork {
   219     int64_t tupleDataPos;
   222   std::vector<TupleCopyWork> tupleCopyWorkList;
   224   int64_t exclusionStartPos;
   225   int64_t exclusionStartPos2;
   226   std::vector<CudaBondStage> hostCudaBondStage;
   228 #ifdef NODEGROUP_FORCE_REGISTER    229   template <
typename T>
   230   void sortTupleList(std::vector<T>& tuples, std::vector<int>& tupleCounts, std::vector<int>& tupleOffsets);
   231   void sortAndCopyToDevice();
   232   void migrateTuples(
bool startup);
   234   template <
typename T, 
typename P, 
typename D>
   235     void copyTupleToStage(
const T& src, 
const P* __restrict__ p_array, D& dstval);
   237   template <
typename T, 
typename P, 
typename D>
   238     void copyToStage(
const int ntuples, 
const T* __restrict__ src,
   239     const P* __restrict__ p_array, std::vector<D>& dst);
   241   void copyExclusionDataStage(
const int ntuples, 
const ExclElem* __restrict__ src, 
const int typeSize,
   242     std::vector<CudaExclusionStage>& dst1, std::vector<CudaExclusionStage>& dst2, int64_t& pos, int64_t& pos2);
   245   void copyBondData(
const int ntuples, 
const BondElem* __restrict__ src,
   248   void copyBondDatafp32(
const int ntuples, 
const BondElem* __restrict__ src,
   251   void copyAngleData(
const int ntuples, 
const AngleElem* __restrict__ src,
   254   template <
bool doDihedral, 
typename T, 
typename P>
   255   void copyDihedralData(
const int ntuples, 
const T* __restrict__ src,
   256     const P* __restrict__ p_array, 
CudaDihedral* __restrict__ dst);
   258   template <
bool doDihedral, 
typename T, 
typename P>
   259   void copyDihedralDatafp32(
const int ntuples, 
const T* __restrict__ src,
   260     const P* __restrict__ p_array, 
CudaDihedral* __restrict__ dst);
   262   void copyExclusionData(
const int ntuples, 
const ExclElem* __restrict__ src, 
const int typeSize,
   265   void copyCrosstermData(
const int ntuples, 
const CrosstermElem* __restrict__ src,
   268   void copyTholeData(
const int ntuples, 
const TholeElem* __restrict__ src,
   271   void copyAnisoData(
const int ntuples, 
const AnisoElem* __restrict src,
   274   void copyOneFourNbTholeData(
const int ntuples, 
const OneFourNbTholeElem* __restrict src,
   278   static void tupleCopyWorker(
int first, 
int last, 
void *result, 
int paraNum, 
void *param);
   279   void tupleCopyWorker(
int first, 
int last);
   280 static void tupleCopyWorkerExcl(
int first, 
int last, 
void *result, 
int paraNum, 
void *param);
   281   void tupleCopyWorkerExcl(
int first, 
int last);
   283 #ifdef NODEGROUP_FORCE_REGISTER   284   void tupleCopyWorkerType(
int tupletype);
   294   ~ComputeBondedCUDA();
   295   void registerCompute(
int pe, 
int type, 
PatchIDList& pids);
   296   void registerSelfCompute(
int pe, 
int type, 
int pid);
   297   void unregisterBoxesOnPe();
   298   void assignPatchesOnPe();
   304   void messageEnqueueWork();
   306   void openBoxesOnPe(
int startup = 1);
   307   void loadTuplesOnPe(
const int startup = 1);
   308   void copyTupleData();
   309   void copyTupleDataSN();
   311   void updateCudaAlchParameters();
   313   void updateHostCudaAlchFlags();
   314   void updateKernelCudaAlchFlags();
   315   void updateHostCudaAlchParameters();
   316   void updateKernelCudaAlchParameters();
   317   void updateHostCudaAlchLambdas();
   318   void updateKernelCudaAlchLambdas();
   320 #ifdef NODEGROUP_FORCE_REGISTER   321   void updatePatchRecords();
   324   void registerPointersToHost();
   325   void copyHostRegisterToDevice(); 
   326   void copyPatchData();
   327   void copyTupleDataGPU(
const int startup);
   328   void updatePatchOrder(
const std::vector<CudaLocalRecord>& data);
   329 #endif  // NODEGROUP_FORCE_REGISTER   331   void finishPatchesOnPe();
   332   void finishPatches();
   333   void finishReductions();
   335   std::vector<int>& getBondedPes(
void) {
return pes;}
   337   std::vector<PatchRecord>& getPatches() { 
return patches; }
   340 #endif // BONDED_CUDA   342 #endif // COMPUTEBONDEDCUDA_H 
virtual void initialize()
 
int operator==(const AtomSigInfo &s1, const AtomSigInfo &s2)
 
virtual void atomUpdate()
 
virtual void patchReady(PatchID, int doneMigration, int seq)