namd/doxygen/ComputeNonbondedCUDAKernel_8h_source.html

 #if defined(NAMD_CUDA) || defined(NAMD_HIP)

 #include "HipDefines.h"

 //this type defined in multiple files

 typedef float GBReal;


 void cuda_errcheck(const char *msg);


 // Number of warps per block for Non-bonded CUDA kernel

 #define NUM_WARP 4


 // Exclusion mask: bit 1 = atom pair is excluded

 struct exclmask {

   unsigned int excl[32];

 };


 struct __align__(16) patch_pair {

   float3 offset;

   int patch1_start;      // Coordinate/force start for this patch

   int patch1_size;       // Size of the patch

   int patch2_start;

   int patch2_size;

   int patch1_ind;        // Patch index

   int patch2_ind;

   int patch1_num_pairs;  // Number of pairs that involve this patch

   int patch2_num_pairs;

   union {

     bool patch_done[2];      // After-GPU-computation shared memory temporary storage

     struct {

       int plist_start;       // Pair list start

       int plist_size;        // Pair list size

     };

   };

   int exclmask_start;    // Exclusion mask start

   int patch1_free_size;  // Size of the free atoms in patch

   int patch2_free_size;  // Size of the free atoms in patch

 //  int pad1, pad2;

 };


 #define PATCH_PAIR_SIZE (sizeof(patch_pair)/4)


 struct __align__(16) atom {  // must be multiple of 16!

   float3 position;

   float charge;

 };


 struct __align__(16) atom_param {  // must be multiple of 16!

   int vdw_type;

   int index;

   int excl_index;

   int excl_maxdiff;  // maxdiff == 0 -> only excluded from self

 };


 #define COPY_ATOM( DEST, SOURCE ) { \

   DEST.position.x = SOURCE.position.x; \

   DEST.position.y = SOURCE.position.y; \

   DEST.position.z = SOURCE.position.z; \

   DEST.charge = SOURCE.charge; \

   }


 #define COPY_PARAM( DEST, SOURCE ) { \

   DEST.sqrt_epsilon = SOURCE.sqrt_epsilon; \

   DEST.half_sigma = SOURCE.half_sigma; \

   DEST.index = SOURCE.index; \

   DEST.excl_index = SOURCE.excl_index; \

   DEST.excl_maxdiff = SOURCE.excl_maxdiff; \

   }


 #define COPY_ATOM_TO_SHARED( ATOM, PARAM, SHARED ) { \

     COPY_ATOM( SHARED, ATOM ) \

     COPY_PARAM( SHARED, PARAM ) \

   }


 #define COPY_ATOM_FROM_SHARED( ATOM, PARAM, SHARED ) { \

     COPY_ATOM( ATOM, SHARED ) \

     COPY_PARAM( PARAM, SHARED ) \

   }


 // 2^11 ints * 2^5 bits = 2^16 bits = range of unsigned short excl_index

 // 2^27 ints * 2^5 bits = 2^32 bits = range of unsigned int excl_index

 #define MAX_EXCLUSIONS (1<<27)

 #define MAX_CONST_EXCLUSIONS 2048  // cache size is 8k


 void cuda_bind_exclusions(const unsigned int *t, int n);


 void cuda_bind_lj_table(const float2 *t, int _lj_table_size);


 // #define FORCE_TABLE_SIZE 512

 // maximum size of CUDA array 1D texture reference is 2^13 = 8192

 // #define FORCE_TABLE_SIZE 8192

 // CUDA docs lie, older devices can only handle 4096

 #define FORCE_TABLE_SIZE 4096


 void cuda_bind_force_table(const float4 *t, const float4 *et);


 void cuda_init();


 void cuda_bind_patch_pairs(patch_pair *h_patch_pairs, int npatch_pairs,

                            int npatches, int natoms, int nexclmask, int plist_len);


 void cuda_bind_atom_params(const atom_param *t);

 void cuda_bind_vdw_types(const int *t);


 void cuda_bind_atoms(const atom *a);


 void cuda_bind_forces(float4 *f, float4 *f_slow);


 void cuda_bind_virials(float *v, int *queue, int *blockorder);


 void cuda_nonbonded_forces(float3 lata, float3 latb, float3 latc,

                            float cutoff2, float plcutoff2,

                            int cbegin, int ccount, int ctotal,

                            int doSlow, int doEnergy, int usePairlists, int savePairlists,

                            int doStreaming, int saveOrder, cudaStream_t &strm);


 //GBIS methods

 void cuda_GBIS_P1(

   int cbegin,

   int ccount,

   int pbegin,

   int pcount,

   float a_cut,

   float rho_0,

   float3 lata,

   float3 latb,

   float3 latc,

   cudaStream_t &strm

   );

 void cuda_GBIS_P2(

   int cbegin,

   int ccount,

   int pbegin,

   int pcount,

   float a_cut,

   float r_cut,

   float scaling,

   float kappa,

   float smoothDist,

   float epsilon_p,

   float epsilon_s,

   float3 lata,

   float3 latb,

   float3 latc,

   int doEnergy,

   int doFullElec,

   cudaStream_t &strm

   );

 void cuda_GBIS_P3(

   int cbegin,

   int ccount,

   int pbegin,

   int pcount,

   float a_cut,

   float rho_0,

   float scaling,

   float3 lata,

   float3 latb,

   float3 latc,

   cudaStream_t &strm

   );


 void cuda_bind_GBIS_intRad(float *intRad0H, float *intRadSH);

 void cuda_bind_GBIS_energy(float *energy_gbis);

 void cuda_bind_GBIS_psiSum(GBReal *psiSumH);

 void cuda_bind_GBIS_bornRad(float *bornRadH);

 void cuda_bind_GBIS_dEdaSum(GBReal *dEdaSumH);

 void cuda_bind_GBIS_dHdrPrefix(float *dHdrPrefixH);


 //end GBIS methods


 int cuda_stream_finished();


 #endif  // NAMD_CUDA


cuda_bind_force_table
void cuda_bind_force_table(const float4 *t, const float4 *et)
Definition: ComputeNonbondedCUDAKernel.cu:90

exclmask
Definition: ComputeNonbondedCUDAKernel.h:12

cuda_bind_forces
void cuda_bind_forces(float4 *f, float4 *f_slow)
Definition: ComputeNonbondedCUDAKernel.cu:371

float2
Definition: PmeSolver.C:4

cuda_stream_finished
int cuda_stream_finished()

cuda_bind_exclusions
void cuda_bind_exclusions(const unsigned int *t, int n)
Definition: ComputeNonbondedCUDAKernel.cu:22

cuda_bind_atoms
void cuda_bind_atoms(const atom *a)
Definition: ComputeNonbondedCUDAKernel.cu:364

HipDefines.h

lata
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 lata
Definition: CudaComputeNonbondedKernel.cu:254

bornRadH
static __thread float * bornRadH
Definition: ComputeNonbondedCUDA.C:865

dHdrPrefixH
static __thread float * dHdrPrefixH
Definition: ComputeNonbondedCUDA.C:867

plist_size
static __thread int plist_size
Definition: ComputeNonbondedCUDAKernel.cu:149

cuda_bind_GBIS_energy
void cuda_bind_GBIS_energy(float *e)
Definition: ComputeNonbondedCUDAKernel.cu:389

cuda_bind_GBIS_dEdaSum
void cuda_bind_GBIS_dEdaSum(GBReal *dEdaSumH)
Definition: ComputeNonbondedCUDAKernel.cu:412

latb
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 latb
Definition: CudaComputeNonbondedKernel.cu:254

plcutoff2
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t float plcutoff2
Definition: CudaComputeNonbondedKernel.cu:254

cuda_nonbonded_forces
void cuda_nonbonded_forces(float3 lata, float3 latb, float3 latc, float cutoff2, float plcutoff2, int cbegin, int ccount, int ctotal, int doSlow, int doEnergy, int usePairlists, int savePairlists, int doStreaming, int saveOrder, cudaStream_t &strm)
Definition: ComputeNonbondedCUDAKernel.cu:499

intRadSH
static __thread float * intRadSH
Definition: ComputeNonbondedCUDA.C:863

cuda_bind_patch_pairs
void cuda_bind_patch_pairs(patch_pair *h_patch_pairs, int npatch_pairs, int npatches, int natoms, int plist_len, int nexclmask)
Definition: ComputeNonbondedCUDAKernel.cu:293

cuda_bind_vdw_types
void cuda_bind_vdw_types(const int *t)
Definition: ComputeNonbondedCUDAKernel.cu:358

cuda_bind_lj_table
void cuda_bind_lj_table(const float2 *t, int _lj_table_size)
Definition: ComputeNonbondedCUDAKernel.cu:59

exclmask::excl
unsigned int excl[32]
Definition: ComputeNonbondedCUDAKernel.h:13

cuda_bind_GBIS_dHdrPrefix
void cuda_bind_GBIS_dHdrPrefix(float *dHdrPrefixH)
Definition: ComputeNonbondedCUDAKernel.cu:417

cuda_bind_GBIS_bornRad
void cuda_bind_GBIS_bornRad(float *bornRadH)
Definition: ComputeNonbondedCUDAKernel.cu:406

cuda_bind_virials
void cuda_bind_virials(float *v, int *queue, int *blockorder)
Definition: ComputeNonbondedCUDAKernel.cu:378

cuda_bind_GBIS_psiSum
void cuda_bind_GBIS_psiSum(GBReal *psiSumH)
Definition: ComputeNonbondedCUDAKernel.cu:401

cuda_GBIS_P3
void cuda_GBIS_P3(int cbegin, int ccount, int pbegin, int pcount, float a_cut, float rho_0, float scaling, float3 lata, float3 latb, float3 latc, cudaStream_t &strm)
Definition: ComputeNonbondedCUDAKernel.cu:675

cuda_GBIS_P1
void cuda_GBIS_P1(int cbegin, int ccount, int pbegin, int pcount, float a_cut, float rho_0, float3 lata, float3 latb, float3 latc, cudaStream_t &strm)
Definition: ComputeNonbondedCUDAKernel.cu:568

cuda_init
void cuda_init()
Definition: ComputeNonbondedCUDAKernel.cu:203

cutoff2
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cutoff2
Definition: CudaComputeNonbondedKernel.cu:254

cuda_errcheck
void cuda_errcheck(const char *msg)
Definition: ComputeNonbondedCUDA.C:45

cuda_bind_GBIS_intRad
void cuda_bind_GBIS_intRad(float *intRad0H, float *intRadSH)
Definition: ComputeNonbondedCUDAKernel.cu:393

energy_gbis
static __thread float * energy_gbis
Definition: ComputeNonbondedCUDA.C:857

__align__
struct __align__(16) patch_pair
Definition: ComputeNonbondedCUDAKernel.h:16

charge
k< npairi;++k){TABENERGY(const int numtypes=simParams->tableNumTypes;const float table_spacing=simParams->tableSpacing;const int npertype=(int)(namdnearbyint(simParams->tableMaxDist/simParams->tableSpacing)+1);) int table_i=(r2iilist[2 *k] >> 14)+r2_delta_expc;const int j=pairlisti[k];#define p_j BigReal diffa=r2list[k]-r2_table[table_i];#define table_four_i TABENERGY(register const int tabtype=-1-(lj_pars->A< 0?lj_pars->A:0);) BigReal kqq=kq_i *p_j-> charge
Definition: ComputeNonbondedBase2.h:179

cuda_GBIS_P2
void cuda_GBIS_P2(int cbegin, int ccount, int pbegin, int pcount, float a_cut, float r_cut, float scaling, float kappa, float smoothDist, float epsilon_p, float epsilon_s, float3 lata, float3 latb, float3 latc, int doEnergy, int doFullElec, cudaStream_t &strm)
Definition: ComputeNonbondedCUDAKernel.cu:613

latc
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 latc
Definition: CudaComputeNonbondedKernel.cu:254

cuda_bind_atom_params
void cuda_bind_atom_params(const atom_param *t)
Definition: ComputeNonbondedCUDAKernel.cu:352

intRad0H
static __thread float * intRad0H
Definition: ComputeNonbondedCUDA.C:861

GBReal
float GBReal
Definition: ComputeGBIS.inl:17