namd/doxygen/PatchData_8h_source.html

 #ifndef PATCHDATA_H
 #define PATCHDATA_H
 #if __cplusplus < 201103L
 #undef USING_ATOMICS
 #else
 #define USING_ATOMICS
 #endif

 #ifdef USING_ATOMICS
 #include <atomic>
 #endif
 #include "ComputeCUDAMgr.h"
 #include "PatchData.decl.h"
 #include "NamdTypes.h"
 #include "HomePatch.h"
 #include "Sequencer.h"
 #include "ResizeArray.h"
 #include "CudaUtils.h"
 #include "CudaTileListKernel.h"    // CudaPatchRecord
 #include "CudaTileListKernel.hip.h"
 #include "CudaRecord.h"
 #include "ReductionMgr.h"
 #include "Controller.h"
 #include "TupleTypesCUDA.h"
 #include "ComputeMgr.h"

 #ifdef NAMD_CUDA
 #include <cuda.h>
 #endif
 #ifdef NAMD_HIP
 #include <hip/hip_runtime.h>
 #endif

 #include "HipDefines.h"

 #ifdef NODEGROUP_FORCE_REGISTER
 #ifdef NAMD_NCCL_ALLREDUCE
 #ifdef NAMD_CUDA
 #include "nccl.h"
 #endif
 #ifdef NAMD_HIP
 #include "rccl.h"
 #endif
 #endif


 class CollectionMaster;
 class Output;
 class IMDOutput;

 // DeviceData holds the CUDAMgr pointers to force data
 // These are particular to each device
 struct DeviceData{
   // NONBONDED STUFF
   float4              *f_nbond;
   float4              *f_nbond_slow;
   int                  f_nbond_size;
   CudaPatchRecord     *nbond_precord;
   CudaTileListKernel  *nbond_tkernel;
   cudaStream_t         nbond_stream;
   float4* nb_datoms; //The idea is to fill this after integration
   size_t size_nb_datoms;
   int nb_precord_size;


   //BONDED STUFF
   double      *f_bond;
   double      *f_bond_nbond;
   double      *f_bond_slow;
   int          f_bond_size;
   int          bond_pr_size;
   int          bond_pi_size;
   int          forceStride;
   PatchRecord *bond_pr;
   int         *bond_pi;
   float4      *b_datoms;

   // SLOW STUFF
   CudaForce *f_slow;
   int        f_slow_size;
   int*       slow_patchPositions;
   int        slow_patchPositionsSize;
   int*       slow_pencilPatchIndex;
   int        slow_pencilPatchIndexSize;
   int*       slow_patchID;
   int        slow_patchIDSize;
   CudaAtom*  s_datoms; // slow atoms that will be built for PME
   int*       s_datoms_partition;

   bool*      h_hasPatches;
   bool*      d_hasPatches;

   int*       d_globalToLocalID;
   int*       d_patchToDeviceMap;

   std::vector<HomePatch*> patches; // Pointers to HomePatches this device owns

   // Mapping data
   std::vector<CudaLocalRecord> h_localPatches;
   CudaLocalRecord* d_localPatches;

   std::vector<CudaPeerRecord> h_peerPatches;
   CudaPeerRecord* d_peerPatches;

   int numAtomsHome;
   int numPatchesHome;
   int numPatchesHomeAndProxy;

   DeviceData();
   ~DeviceData();
 };

 #endif

 class PatchData : public CBase_PatchData {

   public:

     PatchData();
     ~PatchData();
     void setDeviceKernelUpdateCounter();

 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
     // Pressure control
     cudaTensor *d_langevinPiston_strainRate;
     cudaTensor *d_langevinPiston_origStrainRate;
     cudaTensor *d_strainRate_old;  // for langevinPistonBarrier no
     cudaTensor *d_positionRescaleFactor;  // for langevinPistonBarrier no
 #endif

     // I need "reduction-like" object here to reduce stuff on a single-node
     // avoiding reduction objects will let me avoid calling  submit() on every ULT
     NodeReduction *reduction;
     // backup reduction values for MC barostat
     NodeReduction *nodeReductionSave;

     // Single node flags are stored here and copied by patches
     Flags flags;

     // Controller pointer for outputting stuff
     Controller *c_out;
     // Lattice from masterPe
     Lattice *lat;
     Tensor  *factor;
     Vector  *origin;

     // Provide global (node level) access to the collection chare
     CollectionMaster *ptrCollectionMaster;
     Output *ptrOutput;
     IMDOutput *imd;
     PDB* pdb;

     // Pointer to script
     ScriptTcl *script;

     // Flag for marking migrations
     CmiNodeLock printlock;
 #ifdef NODEGROUP_FORCE_REGISTER
     // SOA register of all device SOA force arrays
     // Bonded SOA pointers
     double** h_soa_fb_x;
     double** h_soa_fb_y;
     double** h_soa_fb_z;

     // Nonbonded SOA pointers
     double** h_soa_fn_x;
     double** h_soa_fn_y;
     double** h_soa_fn_z;

     // Slow SOA pointers
     double** h_soa_fs_x;
     double** h_soa_fs_y;
     double** h_soa_fs_z;

     double** h_soa_pos_x;
     double** h_soa_pos_y;
     double** h_soa_pos_z;

     double** h_soa_vel_x;
     double** h_soa_vel_y;
     double** h_soa_vel_z;

     float**   h_soa_charge;

     // Device Migration
     int**     h_soa_sortOrder;
     int**     h_soa_unsortOrder;
     int**     h_soa_id;
     int**     h_soa_vdwType;
     double3** h_soa_patchCenter;
     int4**    h_soa_migrationDestination;
     int**    h_soa_sortSoluteIndex;

     FullAtom** h_atomdata_AoS;
     CudaLocalRecord** h_peer_record;

     int**     h_soa_partition;

     // we do need a device queue for registering work done
     // work queue for each device
     bool**         h_devHasForces;
     unsigned int** d_queues;
     unsigned int*  d_queueCounters;
     std::vector<int> migrationFlagPerDevice; // Migration flag for each device
     std::vector<int> tupleReallocationFlagPerDevice; // Migration flag for each device
     std::vector<int> atomReallocationFlagPerDevice; // Migration flag for each device

     std::atomic<int> maxNumBonds;
     std::atomic<int> maxNumAngles;
     std::atomic<int> maxNumDihedrals;
     std::atomic<int> maxNumImpropers;
     std::atomic<int> maxNumModifiedExclusions;
     std::atomic<int> maxNumExclusions;
     std::atomic<int> maxNumCrossterms;
     std::vector<int> devicePatchMapFlag; // Device Patch Map Creation flag per PE
 #ifdef NAMD_NCCL_ALLREDUCE
     ncclUniqueId  ncclId;
 #endif

      TupleDataStagePeer h_tupleDataStage;
      TupleIntArraysPeer h_tupleCount;
      TupleIntArraysPeer h_tupleOffset;

     ComputeBondedCUDA     **cudaBondedList;
     CudaComputeNonbonded  **cudaNonbondedList;

     std::vector<DeviceData> devData;
     ComputeMgr *master_mgr;
     CmiNodeLock nodeLock;
     std::vector<int> cbStore;
   std::atomic<int> suspendCounter;

     std::atomic<int> updateCounter;
 #endif
 };

 #endif // PATCHDATA_H
CudaUtils.h

Controller
Definition: Controller.h:106

PDB
Definition: PDB.h:36

Sequencer.h

ComputeCUDAMgr.h

IMDOutput
Definition: IMDOutput.h:20

HomePatch.h

PatchData::printlock
CmiNodeLock printlock
Definition: PatchData.h:157

Vector
Definition: Vector.h:72

HipDefines.h

ResizeArray.h

CudaForce
Definition: CudaRecord.h:62

PatchData::c_out
Controller * c_out
Definition: PatchData.h:141

CudaTileListKernel
Definition: CudaTileListKernel.h:119

NodeReduction
Definition: ReductionMgr.h:364

ComputeMgr.h

PatchRecord
Definition: CudaRecord.h:15

CudaLocalRecord
Definition: CudaRecord.h:35

PatchData::imd
IMDOutput * imd
Definition: PatchData.h:150

CudaPatchRecord
Definition: CudaTileListKernel.h:49

PatchData::reduction
NodeReduction * reduction
Definition: PatchData.h:133

PatchData
Definition: PatchData.h:115

PatchData::lat
Lattice * lat
Definition: PatchData.h:143

PatchData::d_positionRescaleFactor
cudaTensor * d_positionRescaleFactor
Definition: PatchData.h:128

PatchData::flags
Flags flags
Definition: PatchData.h:138

PatchData::origin
Vector * origin
Definition: PatchData.h:145

TupleTypesCUDA.h

PatchData::PatchData
PatchData()
Definition: PatchData.C:49

TupleDataStagePeer
Definition: TupleTypesCUDA.h:259

ScriptTcl
Definition: ScriptTcl.h:22

Output
Definition: Output.h:35

Flags
Definition: PatchTypes.h:13

CudaComputeNonbonded
Definition: CudaComputeNonbonded.h:31

PatchData::ptrCollectionMaster
CollectionMaster * ptrCollectionMaster
Definition: PatchData.h:148

PatchData::script
ScriptTcl * script
Definition: PatchData.h:154

PatchData::d_langevinPiston_strainRate
cudaTensor * d_langevinPiston_strainRate
Definition: PatchData.h:125

ComputeMgr
Definition: ComputeMgr.h:62

PatchData::d_langevinPiston_origStrainRate
cudaTensor * d_langevinPiston_origStrainRate
Definition: PatchData.h:126

ReductionMgr.h

CudaAtom
Definition: CudaRecord.h:58

PatchData::pdb
PDB * pdb
Definition: PatchData.h:151

TupleIntArraysPeer
Definition: TupleTypesCUDA.h:229

CudaTileListKernel.h

FullAtom
Definition: NamdTypes.h:200

PatchData::d_strainRate_old
cudaTensor * d_strainRate_old
Definition: PatchData.h:127

PatchData::ptrOutput
Output * ptrOutput
Definition: PatchData.h:149

PatchData::nodeReductionSave
NodeReduction * nodeReductionSave
Definition: PatchData.h:135

CudaPeerRecord
Definition: CudaRecord.h:21

PatchData::setDeviceKernelUpdateCounter
void setDeviceKernelUpdateCounter()
Definition: PatchData.C:67

CudaTileListKernel.hip.h

NamdTypes.h

Controller.h

Tensor
Definition: Tensor.h:15

PatchData::factor
Tensor * factor
Definition: PatchData.h:144

cudaTensor
Definition: CudaUtils.h:75

PatchData::~PatchData
~PatchData()
Definition: PatchData.C:73

Lattice
Definition: Lattice.h:17

CollectionMaster
Definition: CollectionMaster.h:43

CudaRecord.h