CudaComputeGBISKernel.cu File Reference

#include <cuda.h>
#include <cub/cub.cuh>
#include "CudaUtils.h"
#include "CudaTileListKernel.h"
#include "CudaComputeGBISKernel.h"
#include "ComputeGBIS.inl"
#include "DeviceCUDA.h"

Go to the source code of this file.

Classes

struct  GBISParam< phase >
struct  GBISInput< phase >
struct  GBISResults< phase >
struct  GBISParam< 1 >
struct  GBISInput< 1 >
struct  GBISResults< 1 >
struct  GBISParam< 2 >
struct  GBISInput< 2 >
struct  GBISResults< 2 >
struct  GBISParam< 3 >
struct  GBISInput< 3 >
struct  GBISResults< 3 >

Defines

#define GBIS_CUDA
#define LARGE_FLOAT   (float)(1.0e10)
#define GBISKERNEL_NUM_WARP   4
#define CALL(DOENERGY, DOSLOW)

Functions

__device__ __forceinline__
void 
shuffleNext (float &w)
template<bool doEnergy, bool doSlow>
__device__ __forceinline__
void 
calcGBISPhase (const float r2, const float dx, const float dy, const float dz, const float cutoff2, const GBISParam< 1 > param, const GBISInput< 1 > inp, GBISResults< 1 > &resI, GBISResults< 1 > &resJ, float &energy)
__device__ __forceinline__
void 
writeResult (const int i, const GBISResults< 1 > res, float *out, float4 *forces)
template<bool doEnergy, bool doSlow>
__device__ __forceinline__
void 
calcGBISPhase (const float r2, const float dx, const float dy, const float dz, const float cutoff2, const GBISParam< 2 > param, const GBISInput< 2 > inp, GBISResults< 2 > &resI, GBISResults< 2 > &resJ, float &energyT)
__device__ __forceinline__
void 
writeResult (const int i, const GBISResults< 2 > res, float *out, float4 *forces)
template<bool doEnergy, bool doSlow>
__device__ __forceinline__
void 
calcGBISPhase (const float r2, const float dx, const float dy, const float dz, const float cutoff2, const GBISParam< 3 > param, const GBISInput< 3 > inp, GBISResults< 3 > &resI, GBISResults< 3 > &resJ, float &energy)
__device__ __forceinline__
void 
writeResult (const int i, const GBISResults< 3 > res, float *out, float4 *forces)
template<bool doEnergy, bool doSlow, int phase>
__global__ void __launch_bounds__ (32 *GBISKERNEL_NUM_WARP, 6) GBIS_Kernel(const int numTileLists

Variables

__thread DeviceCUDAdeviceCUDA
__global__ void const TileList
*__restrict__ 
tileLists
__global__ void const TileList
*__restrict__ const int *__restrict__ 
tileJatomStart
__global__ void const TileList
*__restrict__ const int *__restrict__
const PatchPairRecord *__restrict__ 
patchPairs
__global__ void const TileList
*__restrict__ const int *__restrict__
const PatchPairRecord *__restrict__
const float3 
lata
__global__ void const TileList
*__restrict__ const int *__restrict__
const PatchPairRecord *__restrict__
const float3 const float3 
latb
__global__ void const TileList
*__restrict__ const int *__restrict__
const PatchPairRecord *__restrict__
const float3 const float3
const float3 
latc
__global__ void const TileList
*__restrict__ const int *__restrict__
const PatchPairRecord *__restrict__
const float3 const float3
const float3 const float4
*__restrict__ 
xyzq
__global__ void const TileList
*__restrict__ const int *__restrict__
const PatchPairRecord *__restrict__
const float3 const float3
const float3 const float4
*__restrict__ const float 
cutoff2
__global__ void const TileList
*__restrict__ const int *__restrict__
const PatchPairRecord *__restrict__
const float3 const float3
const float3 const float4
*__restrict__ const float
const GBISParam< phase > 
param
__global__ void const TileList
*__restrict__ const int *__restrict__
const PatchPairRecord *__restrict__
const float3 const float3
const float3 const float4
*__restrict__ const float
const GBISParam< phase > const
float *__restrict__ 
inp1
__global__ void const TileList
*__restrict__ const int *__restrict__
const PatchPairRecord *__restrict__
const float3 const float3
const float3 const float4
*__restrict__ const float
const GBISParam< phase > const
float *__restrict__ const
float *__restrict__ 
inp2
__global__ void const TileList
*__restrict__ const int *__restrict__
const PatchPairRecord *__restrict__
const float3 const float3
const float3 const float4
*__restrict__ const float
const GBISParam< phase > const
float *__restrict__ const
float *__restrict__ const
float *__restrict__ 
inp3
__global__ void const TileList
*__restrict__ const int *__restrict__
const PatchPairRecord *__restrict__
const float3 const float3
const float3 const float4
*__restrict__ const float
const GBISParam< phase > const
float *__restrict__ const
float *__restrict__ const
float *__restrict__ float
*__restrict__ 
out
__global__ void const TileList
*__restrict__ const int *__restrict__
const PatchPairRecord *__restrict__
const float3 const float3
const float3 const float4
*__restrict__ const float
const GBISParam< phase > const
float *__restrict__ const
float *__restrict__ const
float *__restrict__ float
*__restrict__ float4 *__restrict__ 
forces


Define Documentation

#define CALL ( DOENERGY,
DOSLOW   ) 

Value:

GBIS_Kernel<DOENERGY, DOSLOW, 2> \
     <<< nblock, nthread, 0, stream >>> \
    (tlKernel.getNumTileListsGBIS(), tlKernel.getTileListsGBIS(), tlKernel.getTileJatomStartGBIS(), \
      tlKernel.getPatchPairs(), lata, latb, latc, tlKernel.get_xyzq(), param.r_cut2, \
      param, bornRad, NULL, NULL, dEdaSum, d_forces, tlKernel.getTileListVirialEnergy())

#define GBIS_CUDA

Definition at line 6 of file CudaComputeGBISKernel.cu.

#define GBISKERNEL_NUM_WARP   4

Definition at line 16 of file CudaComputeGBISKernel.cu.

#define LARGE_FLOAT   (float)(1.0e10)

Definition at line 14 of file CudaComputeGBISKernel.cu.


Function Documentation

template<bool doEnergy, bool doSlow, int phase>
__global__ void __launch_bounds__ ( 32 *  GBISKERNEL_NUM_WARP,
 
) const

template<bool doEnergy, bool doSlow>
__device__ __forceinline__ void calcGBISPhase ( const float  r2,
const float  dx,
const float  dy,
const float  dz,
const float  cutoff2,
const GBISParam< 3 >  param,
const GBISInput< 3 >  inp,
GBISResults< 3 > &  resI,
GBISResults< 3 > &  resJ,
float &  energy 
)

Definition at line 262 of file CudaComputeGBISKernel.cu.

References CalcDH(), f, and param.

00264                                                              {
00265 
00266   if (r2 < cutoff2 && r2 > 0.01f) {
00267     float r_i = rsqrtf(r2);
00268     float r  = r2 * r_i;
00269     float dhij, dhji;
00270     int dij, dji;
00271     CalcDH(r, r2, r_i, param.a_cut, inp.qi,       inp.intRadSJ, dhij, dij);
00272     CalcDH(r, r2, r_i, param.a_cut, inp.intRadJ0, inp.intRadIS, dhji, dji);
00273 
00274     float forceAlpha = r_i*(inp.dHdrPrefixI*dhij + inp.dHdrPrefixJ*dhji);
00275     float tmpx = dx * forceAlpha;
00276     float tmpy = dy * forceAlpha;
00277     float tmpz = dz * forceAlpha;
00278     resI.force.x += tmpx;
00279     resI.force.y += tmpy;
00280     resI.force.z += tmpz;
00281     resJ.force.x -= tmpx;
00282     resJ.force.y -= tmpy;
00283     resJ.force.z -= tmpz;
00284   }
00285 
00286 }

template<bool doEnergy, bool doSlow>
__device__ __forceinline__ void calcGBISPhase ( const float  r2,
const float  dx,
const float  dy,
const float  dz,
const float  cutoff2,
const GBISParam< 2 >  param,
const GBISInput< 2 >  inp,
GBISResults< 2 > &  resI,
GBISResults< 2 > &  resJ,
float &  energyT 
)

Definition at line 135 of file CudaComputeGBISKernel.cu.

References f, if(), and param.

00137                                                               {
00138 
00139   if (r2 < cutoff2 && r2 > 0.01f) {
00140     float r_i = rsqrtf(r2);
00141     float r  = r2 * r_i;
00142     //float bornRadJ = sh_jBornRad[j];
00143 
00144     //calculate GB energy
00145     float qiqj = inp.qi*inp.qj;
00146     float aiaj = inp.bornRadI*inp.bornRadJ;
00147     float aiaj4 = 4*aiaj;
00148     float expr2aiaj4 = exp(-r2/aiaj4);
00149     float fij = sqrt(r2 + aiaj*expr2aiaj4);
00150     float f_i = 1/fij;
00151     float expkappa = exp(-param.kappa*fij);
00152     float Dij = param.epsilon_p_i - expkappa*param.epsilon_s_i;
00153     float gbEij = qiqj*Dij*f_i;
00154 
00155     //calculate energy derivatives
00156     float ddrfij = r*f_i*(1.f - 0.25f*expr2aiaj4);
00157     float ddrf_i = -ddrfij*f_i*f_i;
00158     float ddrDij = param.kappa*expkappa*ddrfij*param.epsilon_s_i;
00159     float ddrGbEij = qiqj*(ddrDij*f_i+Dij*ddrf_i);
00160 
00161     //NAMD smoothing function
00162     float scale = 1.f;
00163     float ddrScale = 0.f;
00164     float forcedEdr;
00165     if (param.smoothDist > 0.f) {
00166       scale = r2 * param.r_cut_2 - 1.f;
00167       scale *= scale;
00168       ddrScale = r*(r2-param.r_cut2)*param.r_cut_4;
00169       if (doEnergy) energyT += gbEij * scale;
00170       forcedEdr = (ddrGbEij)*scale + (gbEij)*ddrScale;
00171     } else {
00172       if (doEnergy) energyT += gbEij;
00173       forcedEdr = ddrGbEij;
00174     }
00175 
00176     //add dEda
00177     if (doSlow) {
00178       float dEdai = 0.5f*qiqj*f_i*f_i
00179                 *(param.kappa*param.epsilon_s_i*expkappa-Dij*f_i)
00180                 *(aiaj+0.25f*r2)*expr2aiaj4/inp.bornRadI*scale;//0
00181       resI.dEdaSum += dEdai;
00182       float dEdaj = 0.5f*qiqj*f_i*f_i
00183                 *(param.kappa*param.epsilon_s_i*expkappa-Dij*f_i)
00184                 *(aiaj+0.25f*r2)*expr2aiaj4/inp.bornRadJ*scale;//0
00185       resJ.dEdaSum += dEdaj;
00186     }
00187 
00188     forcedEdr *= r_i;
00189     float tmpx = dx*forcedEdr;
00190     float tmpy = dy*forcedEdr;
00191     float tmpz = dz*forcedEdr;
00192     resI.force.x += tmpx;
00193     resI.force.y += tmpy;
00194     resI.force.z += tmpz;
00195     resJ.force.x -= tmpx;
00196     resJ.force.y -= tmpy;
00197     resJ.force.z -= tmpz;
00198   }
00199 
00200   // GB Self Energy
00201   if (doEnergy) {
00202     if (r2 < 0.01f) {
00203       float fij = inp.bornRadI;//inf
00204       float expkappa = exp(-param.kappa*fij);//0
00205       float Dij = param.epsilon_p_i - expkappa*param.epsilon_s_i;
00206       float gbEij = inp.qi*(inp.qi / (-param.scaling) )*Dij/fij;
00207       energyT += 0.5f*gbEij;
00208     } //same atom or within cutoff
00209   }
00210 }

template<bool doEnergy, bool doSlow>
__device__ __forceinline__ void calcGBISPhase ( const float  r2,
const float  dx,
const float  dy,
const float  dz,
const float  cutoff2,
const GBISParam< 1 >  param,
const GBISInput< 1 >  inp,
GBISResults< 1 > &  resI,
GBISResults< 1 > &  resJ,
float &  energy 
)

Definition at line 66 of file CudaComputeGBISKernel.cu.

References CalcH(), f, and param.

00068                                                              {
00069 
00070   if (r2 < cutoff2 && r2 > 0.01f) {
00071     float r_i = rsqrtf(r2);
00072     float r  = r2 * r_i;
00073     float hij;
00074     int dij;
00075     CalcH(r, r2, r_i, param.a_cut, inp.qi, inp.qj, hij, dij);
00076     resI.psiSum += hij;
00077     float hji;
00078     int dji;
00079     CalcH(r, r2, r_i, param.a_cut, inp.intRad0j, inp.intRadSi, hji, dji);
00080     resJ.psiSum += hji;
00081   }
00082 
00083 }

__device__ __forceinline__ void shuffleNext ( float &  w  ) 

Definition at line 19 of file CudaComputeGBISKernel.cu.

References WARP_FULL_MASK, WARP_SHUFFLE, and WARPSIZE.

00019                            {
00020   w = WARP_SHUFFLE(WARP_FULL_MASK, w, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
00021 }

__device__ __forceinline__ void writeResult ( const int  i,
const GBISResults< 3 >  res,
float *  out,
float4 *  forces 
)

Definition at line 288 of file CudaComputeGBISKernel.cu.

References x, y, and z.

00288                                                                                                                {
00289   atomicAdd(&forces[i].x, res.force.x);
00290   atomicAdd(&forces[i].y, res.force.y);
00291   atomicAdd(&forces[i].z, res.force.z);
00292 }

__device__ __forceinline__ void writeResult ( const int  i,
const GBISResults< 2 >  res,
float *  out,
float4 *  forces 
)

Definition at line 212 of file CudaComputeGBISKernel.cu.

References x, y, and z.

00212                                                                                                                {
00213   atomicAdd(&out[i], res.dEdaSum);
00214   atomicAdd(&forces[i].x, res.force.x);
00215   atomicAdd(&forces[i].y, res.force.y);
00216   atomicAdd(&forces[i].z, res.force.z);
00217 }

__device__ __forceinline__ void writeResult ( const int  i,
const GBISResults< 1 >  res,
float *  out,
float4 *  forces 
)

Definition at line 85 of file CudaComputeGBISKernel.cu.

00085                                                                                                                {
00086   atomicAdd(&out[i], res.psiSum);
00087 }


Variable Documentation

__global__ void const TileList* __restrict__ const int* __restrict__ const PatchPairRecord* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cutoff2

Definition at line 303 of file CudaComputeGBISKernel.cu.

__thread DeviceCUDA* deviceCUDA

Definition at line 18 of file DeviceCUDA.C.

__global__ void const TileList* __restrict__ const int* __restrict__ const PatchPairRecord* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float const GBISParam<phase> const float* __restrict__ const float* __restrict__ const float* __restrict__ float* __restrict__ float4* __restrict__ forces

Definition at line 303 of file CudaComputeGBISKernel.cu.

__global__ void const TileList* __restrict__ const int* __restrict__ const PatchPairRecord* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float const GBISParam<phase> const float* __restrict__ inp1

Definition at line 303 of file CudaComputeGBISKernel.cu.

__global__ void const TileList* __restrict__ const int* __restrict__ const PatchPairRecord* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float const GBISParam<phase> const float* __restrict__ const float* __restrict__ inp2

Definition at line 303 of file CudaComputeGBISKernel.cu.

__global__ void const TileList* __restrict__ const int* __restrict__ const PatchPairRecord* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float const GBISParam<phase> const float* __restrict__ const float* __restrict__ const float* __restrict__ inp3

Definition at line 303 of file CudaComputeGBISKernel.cu.

__global__ void const TileList* __restrict__ const int* __restrict__ const PatchPairRecord* __restrict__ const float3 lata

Definition at line 303 of file CudaComputeGBISKernel.cu.

__global__ void const TileList* __restrict__ const int* __restrict__ const PatchPairRecord* __restrict__ const float3 const float3 latb

Definition at line 303 of file CudaComputeGBISKernel.cu.

__global__ void const TileList* __restrict__ const int* __restrict__ const PatchPairRecord* __restrict__ const float3 const float3 const float3 latc

Definition at line 303 of file CudaComputeGBISKernel.cu.

__global__ void const TileList* __restrict__ const int* __restrict__ const PatchPairRecord* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float const GBISParam<phase> const float* __restrict__ const float* __restrict__ const float* __restrict__ float* __restrict__ out

Definition at line 303 of file CudaComputeGBISKernel.cu.

__global__ void const TileList* __restrict__ const int* __restrict__ const PatchPairRecord* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float const GBISParam<phase> param

Definition at line 303 of file CudaComputeGBISKernel.cu.

Referenced by calcGBISPhase(), compress_molecule_info(), GBISInput< 2 >::initQi(), and Molecule::Molecule().

__global__ void const TileList* __restrict__ const int* __restrict__ const PatchPairRecord* __restrict__ patchPairs

Definition at line 303 of file CudaComputeGBISKernel.cu.

__global__ void const TileList* __restrict__ const int* __restrict__ tileJatomStart

Definition at line 303 of file CudaComputeGBISKernel.cu.

__global__ void const TileList* __restrict__ tileLists

Definition at line 303 of file CudaComputeGBISKernel.cu.

__global__ void const TileList* __restrict__ const int* __restrict__ const PatchPairRecord* __restrict__ const float3 const float3 const float3 const float4* __restrict__ xyzq

Definition at line 303 of file CudaComputeGBISKernel.cu.

Referenced by doStreaming().


Generated on Fri Sep 22 01:17:15 2017 for NAMD by  doxygen 1.4.7