ComputeNonbondedCUDAKernelBase.h File Reference

Go to the source code of this file.

Defines

#define NAME(X)   SLOWNAME( X )
#define SLOW(X)
#define SLOWNAME(X)   ENERGYNAME( X )
#define ENERGY(X)
#define ENERGYNAME(X)   PAIRLISTNAME( X )
#define GENPAIRLIST(X)
#define USEPAIRLIST(X)   X
#define PAIRLISTNAME(X)   LAST( X )
#define LAST(X)   X
#define SH_BUF_SIZE   NUM_WARP*WARPSIZE*3*sizeof(float)

Functions

__device__ static __forceinline__
void NAME() 
finish_forces_virials (const int start, const int size, const int patch_ind, const atom *atoms, volatile float *sh_buf, volatile float *sh_slow_buf, volatile float *sh_vcc, float4 *tmpforces, float4 *slow_tmpforces, float4 *forces, float4 *slow_forces, float *tmpvirials, float *slow_tmpvirials, float *virials, float *slow_virials)
template<typename T, int n, int sh_buf_size>
__device__ __forceinline__
void NAME() 
reduceVariables (volatile T *sh_buf, T *dst, T val1, T val2, T val3)
static __global__ void GENPAIRLIST (__launch_bounds__(NUM_WARP *WARPSIZE, 10)) USEPAIRLIST(__launch_bounds__(NUM_WARP *WARPSIZE

Variables

static __global__ void const
patch_pair * 
patch_pairs
static __global__ void const
patch_pair const atom * 
atoms
static __global__ void const
patch_pair const atom const
atom_param * 
atom_params
static __global__ void const
patch_pair const atom const
atom_param const int * 
vdw_types
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int * 
plist
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 * 
tmpforces
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 float4 * 
slow_tmpforces
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 float4 float4 * 
forces
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 float4 float4 float4 * 
slow_forces
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 float4 float4 float4
float * 
tmpvirials
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 float4 float4 float4
float float * 
slow_tmpvirials
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 float4 float4 float4
float float float * 
virials
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 float4 float4 float4
float float float float * 
slow_virials
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 float4 float4 float4
float float float float unsigned
int * 
global_counters
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 float4 float4 float4
float float float float unsigned
int int * 
force_ready_queue
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 float4 float4 float4
float float float float unsigned
int int const unsigned int * 
overflow_exclusions
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 float4 float4 float4
float float float float unsigned
int int const unsigned int
const int 
npatches
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 float4 float4 float4
float float float float unsigned
int int const unsigned int
const int const int 
block_begin
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 float4 float4 float4
float float float float unsigned
int int const unsigned int
const int const int const
int 
total_block_count
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 float4 float4 float4
float float float float unsigned
int int const unsigned int
const int const int const
int int * 
block_order
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 float4 float4 float4
float float float float unsigned
int int const unsigned int
const int const int const
int int exclmask
exclmasks
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 float4 float4 float4
float float float float unsigned
int int const unsigned int
const int const int const
int int exclmask const int 
lj_table_size
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 float4 float4 float4
float float float float unsigned
int int const unsigned int
const int const int const
int int exclmask const int
const float3 
lata
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 float4 float4 float4
float float float float unsigned
int int const unsigned int
const int const int const
int int exclmask const int
const float3 const float3 
latb
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 float4 float4 float4
float float float float unsigned
int int const unsigned int
const int const int const
int int exclmask const int
const float3 const float3
const float3 
latc
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 float4 float4 float4
float float float float unsigned
int int const unsigned int
const int const int const
int int exclmask const int
const float3 const float3
const float3 const float 
cutoff2
static __global__ void const
patch_pair const atom const
atom_param const int unsigned
int float4 float4 float4 float4
float float float float unsigned
int int const unsigned int
const int const int const
int int exclmask const int
const float3 const float3
const float3 const float const
float 
plcutoff2


Define Documentation

#define ENERGY (  ) 

Definition at line 22 of file ComputeNonbondedCUDAKernelBase.h.

#define ENERGYNAME (  )     PAIRLISTNAME( X )

Definition at line 23 of file ComputeNonbondedCUDAKernelBase.h.

#define GENPAIRLIST (  ) 

Definition at line 34 of file ComputeNonbondedCUDAKernelBase.h.

#define LAST (  )     X

Definition at line 39 of file ComputeNonbondedCUDAKernelBase.h.

#define NAME (  )     SLOWNAME( X )

Definition at line 4 of file ComputeNonbondedCUDAKernelBase.h.

#define PAIRLISTNAME (  )     LAST( X )

Definition at line 36 of file ComputeNonbondedCUDAKernelBase.h.

#define SH_BUF_SIZE   NUM_WARP*WARPSIZE*3*sizeof(float)

#define SLOW (  ) 

Definition at line 12 of file ComputeNonbondedCUDAKernelBase.h.

#define SLOWNAME (  )     ENERGYNAME( X )

Definition at line 13 of file ComputeNonbondedCUDAKernelBase.h.

#define USEPAIRLIST (  )     X

Definition at line 35 of file ComputeNonbondedCUDAKernelBase.h.


Function Documentation

__device__ static __forceinline__ void NAME() finish_forces_virials ( const int  start,
const int  size,
const int  patch_ind,
const atom *  atoms,
volatile float *  sh_buf,
volatile float *  sh_slow_buf,
volatile float *  sh_vcc,
float4 *  tmpforces,
float4 *  slow_tmpforces,
float4 *  forces,
float4 *  slow_forces,
float *  tmpvirials,
float *  slow_tmpvirials,
float *  virials,
float *  slow_virials 
) [static]

static __global__ void GENPAIRLIST ( __launch_bounds__(NUM_WARP *WARPSIZE, 10)   )  [static]

template<typename T, int n, int sh_buf_size>
__device__ __forceinline__ void NAME() reduceVariables ( volatile T *  sh_buf,
T *  dst,
val1,
val2,
val3 
)

Definition at line 92 of file ComputeNonbondedCUDAKernelBase.h.

References cuda_static_assert, if(), NUM_WARP, WARP_FULL_MASK, WARP_SHUFFLE_XOR, and WARPSIZE.

00092                                                                                {
00093         // Sanity check
00094         cuda_static_assert(n > 0 && n <= NUM_WARP);
00095 #ifdef KEPLER_SHUFFLE
00096   // Requires NUM_WARP*n*sizeof(float) shared memory
00097   cuda_static_assert(sh_buf_size >= NUM_WARP*n*sizeof(T));
00098   // Reduce within warp
00099   for (int i=WARPSIZE/2;i >= 1;i/=2) {
00100     if (n >= 1) val1 += WARP_SHUFFLE_XOR(WARP_FULL_MASK, val1, i, WARPSIZE);
00101     if (n >= 2) val2 += WARP_SHUFFLE_XOR(WARP_FULL_MASK, val2, i, WARPSIZE);
00102     if (n >= 3) val3 += WARP_SHUFFLE_XOR(WARP_FULL_MASK, val3, i, WARPSIZE);
00103   }
00104   if (threadIdx.x == 0) {
00105     if (n >= 1) sh_buf[threadIdx.y*n + 0] = val1;
00106     if (n >= 2) sh_buf[threadIdx.y*n + 1] = val2;
00107     if (n >= 3) sh_buf[threadIdx.y*n + 2] = val3;
00108   }
00109   BLOCK_SYNC;
00110   if (threadIdx.x < n && threadIdx.y == 0) {
00111     T finalval = (T)0;
00112 #pragma unroll
00113     for (int i=0;i < NUM_WARP;++i) {
00114       finalval += sh_buf[i*n + threadIdx.x];
00115     }
00116     atomicAdd(&dst[threadIdx.x], finalval);
00117   }
00118 #else // ! KEPLER_SHUFFLE
00119   // Requires NUM_WARP*n*WARPSIZE*sizeof(float) shared memory
00120   cuda_static_assert(sh_buf_size >= NUM_WARP*n*WARPSIZE*sizeof(T));
00121   volatile T* sh_bufy = &sh_buf[threadIdx.y*n*WARPSIZE];
00122   if (n >= 1) sh_bufy[threadIdx.x*n + 0] = val1;
00123   if (n >= 2) sh_bufy[threadIdx.x*n + 1] = val2;
00124   if (n >= 3) sh_bufy[threadIdx.x*n + 2] = val3;
00125   // Reducue within warp
00126   for (int d=1;d < WARPSIZE;d*=2) {
00127     int pos = threadIdx.x + d;
00128     T val1t, val2t, val3t;
00129     if (n >= 1) val1t = (pos < WARPSIZE) ? sh_bufy[pos*n + 0] : (T)0;
00130     if (n >= 2) val2t = (pos < WARPSIZE) ? sh_bufy[pos*n + 1] : (T)0;
00131     if (n >= 3) val3t = (pos < WARPSIZE) ? sh_bufy[pos*n + 2] : (T)0;
00132     if (n >= 1) sh_bufy[threadIdx.x*n + 0] += val1t;
00133     if (n >= 2) sh_bufy[threadIdx.x*n + 1] += val2t;
00134     if (n >= 3) sh_bufy[threadIdx.x*n + 2] += val3t;
00135   }
00136   BLOCK_SYNC;
00137   if (threadIdx.x < n && threadIdx.y == 0) {
00138     T finalval = (T)0;
00139 #pragma unroll
00140     for (int i=0;i < NUM_WARP;++i) {
00141       finalval += sh_buf[i*n*WARPSIZE + threadIdx.x];
00142     }
00143     atomicAdd(&dst[threadIdx.x], finalval);
00144   }
00145 #endif // KEPLER_SHUFFLE
00146 }


Variable Documentation

__global__ void const patch_pair const atom const atom_param* atom_params

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom* atoms

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom const atom_param const int unsigned int float4 float4 float4 float4 float float float float unsigned int int const unsigned int const int const int block_begin

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom const atom_param const int unsigned int float4 float4 float4 float4 float float float float unsigned int int const unsigned int const int const int const int int* block_order

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom const atom_param const int unsigned int float4 float4 float4 float4 float float float float unsigned int int const unsigned int const int const int const int int exclmask const int const float3 const float3 const float3 const float cutoff2

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

Referenced by SELF().

__global__ void const patch_pair const atom const atom_param const int unsigned int float4 float4 float4 float4 float float float float unsigned int int const unsigned int const int const int const int int exclmask* exclmasks

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom const atom_param const int unsigned int float4 float4 float4 float4 float float float float unsigned int int* force_ready_queue

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom const atom_param const int unsigned int float4 float4 float4* forces

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom const atom_param const int unsigned int float4 float4 float4 float4 float float float float unsigned int* global_counters

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom const atom_param const int unsigned int float4 float4 float4 float4 float float float float unsigned int int const unsigned int const int const int const int int exclmask const int const float3 lata

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom const atom_param const int unsigned int float4 float4 float4 float4 float float float float unsigned int int const unsigned int const int const int const int int exclmask const int const float3 const float3 latb

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom const atom_param const int unsigned int float4 float4 float4 float4 float float float float unsigned int int const unsigned int const int const int const int int exclmask const int const float3 const float3 const float3 latc

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom const atom_param const int unsigned int float4 float4 float4 float4 float float float float unsigned int int const unsigned int const int const int const int int exclmask const int lj_table_size

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom const atom_param const int unsigned int float4 float4 float4 float4 float float float float unsigned int int const unsigned int const int npatches

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

Referenced by ComputeNonbondedCUDA::assignPatches(), ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::registerPatches(), and WorkDistrib::sortPmePes().

__global__ void const patch_pair const atom const atom_param const int unsigned int float4 float4 float4 float4 float float float float unsigned int int const unsigned int* overflow_exclusions

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair* patch_pairs

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom const atom_param const int unsigned int float4 float4 float4 float4 float float float float unsigned int int const unsigned int const int const int const int int exclmask const int const float3 const float3 const float3 const float const float plcutoff2

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

Referenced by SELF().

__global__ void const patch_pair const atom const atom_param const int unsigned int* plist

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom const atom_param const int unsigned int float4 float4 float4 float4* slow_forces

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom const atom_param const int unsigned int float4 float4* slow_tmpforces

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom const atom_param const int unsigned int float4 float4 float4 float4 float float* slow_tmpvirials

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom const atom_param const int unsigned int float4 float4 float4 float4 float float float float* slow_virials

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom const atom_param const int unsigned int float4* tmpforces

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom const atom_param const int unsigned int float4 float4 float4 float4 float* tmpvirials

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom const atom_param const int unsigned int float4 float4 float4 float4 float float float float unsigned int int const unsigned int const int const int const int total_block_count

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom const atom_param const int* vdw_types

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.

__global__ void const patch_pair const atom const atom_param const int unsigned int float4 float4 float4 float4 float float float* virials

Definition at line 157 of file ComputeNonbondedCUDAKernelBase.h.


Generated on Wed Nov 22 01:17:17 2017 for NAMD by  doxygen 1.4.7