CudaComputeNonbondedKernel.cu File Reference

#include <cuda.h>
#include <cub/cub.cuh>
#include "CudaComputeNonbondedKernel.h"
#include "CudaTileListKernel.h"
#include "DeviceCUDA.h"

Go to the source code of this file.

Defines

#define OVERALLOC   1.2f
#define MAX_CONST_EXCLUSIONS   2048
#define NONBONDKERNEL_NUM_WARP   4
#define LARGE_FLOAT   (float)(1.0e10)
#define REDUCENONBONDEDVIRIALKERNEL_NUM_WARP   32
#define REDUCEVIRIALENERGYKERNEL_NUM_WARP   32
#define REDUCEGBISENERGYKERNEL_NUM_WARP   32
#define CALL(DOENERGY, DOVIRIAL, DOSLOW, DOPAIRLIST, DOSTREAMING)

Functions

void NAMD_die (const char *)
template<bool doEnergy, bool doSlow>
__device__ __forceinline__
void 
calcForceEnergy (const float r2, const float qi, const float qj, const float dx, const float dy, const float dz, const int vdwtypei, const int vdwtypej, const float2 *__restrict__ vdwCoefTable, cudaTextureObject_t vdwCoefTableTex, cudaTextureObject_t forceTableTex, cudaTextureObject_t energyTableTex, float3 &iforce, float3 &iforceSlow, float3 &jforce, float3 &jforceSlow, float &energyVdw, float &energyElec, float &energySlow)
template<bool doSlow>
__device__ __forceinline__
void 
storeForces (const int pos, const float3 force, const float3 forceSlow, float4 *__restrict__ devForces, float4 *__restrict__ devForcesSlow)
template<bool doSlow>
__device__ __forceinline__
void 
storeForces (const int pos, const float3 force, const float3 forceSlow, float3 *__restrict__ forces, float3 *__restrict__ forcesSlow)
template<bool doPairlist>
__device__ __forceinline__
void 
shuffleNext (float &xyzq_j_w, int &vdwtypej, int &jatomIndex, int &jexclMaxdiff, int &jexclIndex)
template<bool doPairlist>
__device__ __forceinline__
void 
shuffleNext (float &xyzq_j_w, int &vdwtypej, int &jatomIndex)
template<bool doSlow>
__device__ __forceinline__
void 
shuffleNext (float3 &jforce, float3 &jforceSlow)
__device__ __forceinline__
float 
distsq (const BoundingBox a, const float4 b)
template<bool doEnergy, bool doVirial, bool doSlow, bool doPairlist, bool doStreaming>
__global__ void __launch_bounds__ (WARPSIZE *NONBONDKERNEL_NUM_WARP, doPairlist?(10):(doEnergy?(10):(10))) nonbondedForceKernel(const int start
 cudaNonbondedTables (cudaNonbondedTables)
 doStreaming (doStreaming)

Variables

__thread DeviceCUDAdeviceCUDA
__constant__ unsigned int constExclusions [MAX_CONST_EXCLUSIONS]
__global__ void const int numTileLists
__global__ void const int
const TileList *__restrict__ 
tileLists
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ 
tileExcls
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ 
tileJatomStart
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int 
vdwCoefTableWidth
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__ 
vdwCoefTable
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ 
vdwTypes
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 
lata
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 
latb
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 
latc
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__ 
xyzq
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float 
cutoff2
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t 
vdwCoefTableTex
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t 
forceTableTex
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t cudaTextureObject_t 
energyTableTex
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t cudaTextureObject_t
const int 
atomStorageSize
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t cudaTextureObject_t
const int const float 
plcutoff2
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t cudaTextureObject_t
const int const float const
PatchPairRecord *__restrict__ 
patchPairs
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t cudaTextureObject_t
const int const float const
PatchPairRecord *__restrict__
const int *__restrict__ 
atomIndex
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t cudaTextureObject_t
const int const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ 
exclIndexMaxDiff
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t cudaTextureObject_t
const int const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const unsigned
int *__restrict__ 
overflowExclusions
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t cudaTextureObject_t
const int const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const unsigned
int *__restrict__ unsigned
int *__restrict__ 
tileListDepth
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t cudaTextureObject_t
const int const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const unsigned
int *__restrict__ unsigned
int *__restrict__ int *__restrict__ 
tileListOrder
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t cudaTextureObject_t
const int const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const unsigned
int *__restrict__ unsigned
int *__restrict__ int *__restrict__
int *__restrict__ 
jtiles
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t cudaTextureObject_t
const int const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const unsigned
int *__restrict__ unsigned
int *__restrict__ int *__restrict__
int *__restrict__ TileListStat
*__restrict__ 
tileListStat
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t cudaTextureObject_t
const int const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const unsigned
int *__restrict__ unsigned
int *__restrict__ int *__restrict__
int *__restrict__ TileListStat
*__restrict__ const BoundingBox
*__restrict__ 
boundingBoxes
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t cudaTextureObject_t
const int const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const unsigned
int *__restrict__ unsigned
int *__restrict__ int *__restrict__
int *__restrict__ TileListStat
*__restrict__ const BoundingBox
*__restrict__ float4 *__restrict__ 
devForces
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t cudaTextureObject_t
const int const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const unsigned
int *__restrict__ unsigned
int *__restrict__ int *__restrict__
int *__restrict__ TileListStat
*__restrict__ const BoundingBox
*__restrict__ float4 *__restrict__
float4 *__restrict__ 
devForcesSlow
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t cudaTextureObject_t
const int const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const unsigned
int *__restrict__ unsigned
int *__restrict__ int *__restrict__
int *__restrict__ TileListStat
*__restrict__ const BoundingBox
*__restrict__ float4 *__restrict__
float4 *__restrict__ const
int 
numPatches
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t cudaTextureObject_t
const int const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const unsigned
int *__restrict__ unsigned
int *__restrict__ int *__restrict__
int *__restrict__ TileListStat
*__restrict__ const BoundingBox
*__restrict__ float4 *__restrict__
float4 *__restrict__ const
int unsigned int *__restrict__ 
patchNumCount
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t cudaTextureObject_t
const int const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const unsigned
int *__restrict__ unsigned
int *__restrict__ int *__restrict__
int *__restrict__ TileListStat
*__restrict__ const BoundingBox
*__restrict__ float4 *__restrict__
float4 *__restrict__ const
int unsigned int *__restrict__
const CudaPatchRecord *__restrict__ 
cudaPatches
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t cudaTextureObject_t
const int const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const unsigned
int *__restrict__ unsigned
int *__restrict__ int *__restrict__
int *__restrict__ TileListStat
*__restrict__ const BoundingBox
*__restrict__ float4 *__restrict__
float4 *__restrict__ const
int unsigned int *__restrict__
const CudaPatchRecord *__restrict__
float4 *__restrict__ 
mapForces
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t cudaTextureObject_t
const int const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const unsigned
int *__restrict__ unsigned
int *__restrict__ int *__restrict__
int *__restrict__ TileListStat
*__restrict__ const BoundingBox
*__restrict__ float4 *__restrict__
float4 *__restrict__ const
int unsigned int *__restrict__
const CudaPatchRecord *__restrict__
float4 *__restrict__ float4
*__restrict__ 
mapForcesSlow
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t cudaTextureObject_t
const int const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const unsigned
int *__restrict__ unsigned
int *__restrict__ int *__restrict__
int *__restrict__ TileListStat
*__restrict__ const BoundingBox
*__restrict__ float4 *__restrict__
float4 *__restrict__ const
int unsigned int *__restrict__
const CudaPatchRecord *__restrict__
float4 *__restrict__ float4
*__restrict__ int *__restrict__ 
mapPatchReadyQueue
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4 *__restrict__
const float cudaTextureObject_t
cudaTextureObject_t cudaTextureObject_t
const int const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const unsigned
int *__restrict__ unsigned
int *__restrict__ int *__restrict__
int *__restrict__ TileListStat
*__restrict__ const BoundingBox
*__restrict__ float4 *__restrict__
float4 *__restrict__ const
int unsigned int *__restrict__
const CudaPatchRecord *__restrict__
float4 *__restrict__ float4
*__restrict__ int *__restrict__
int *__restrict__ 
outputOrder


Define Documentation

#define CALL ( DOENERGY,
DOVIRIAL,
DOSLOW,
DOPAIRLIST,
DOSTREAMING   ) 

Value:

nonbondedForceKernel<DOENERGY, DOVIRIAL, DOSLOW, DOPAIRLIST, DOSTREAMING> \
  <<< nblock, nthread, shMemSize, stream >>>  \
  (start, tlKernel.getNumTileLists(), tlKernel.getTileLists(), tlKernel.getTileExcls(), tlKernel.getTileJatomStart(), \
    cudaNonbondedTables.getVdwCoefTableWidth(), cudaNonbondedTables.getVdwCoefTable(), \
    vdwTypes, lata, latb, latc, tlKernel.get_xyzq(), cutoff2, \
    cudaNonbondedTables.getVdwCoefTableTex(), cudaNonbondedTables.getForceTableTex(), cudaNonbondedTables.getEnergyTableTex(), \
    atomStorageSize, tlKernel.get_plcutoff2(), tlKernel.getPatchPairs(), atomIndex, exclIndexMaxDiff, overflowExclusions, \
    tlKernel.getTileListDepth(), tlKernel.getTileListOrder(), tlKernel.getJtiles(), tlKernel.getTileListStatDevPtr(), \
    tlKernel.getBoundingBoxes(), d_forces, d_forcesSlow, \
    numPatches, patchNumCountPtr, tlKernel.getCudaPatches(), m_forces, m_forcesSlow, m_patchReadyQueue, \
    outputOrderPtr, tlKernel.getTileListVirialEnergy()); called=true

#define LARGE_FLOAT   (float)(1.0e10)

Definition at line 151 of file CudaComputeNonbondedKernel.cu.

#define MAX_CONST_EXCLUSIONS   2048

Definition at line 15 of file CudaComputeNonbondedKernel.cu.

#define NONBONDKERNEL_NUM_WARP   4

Definition at line 18 of file CudaComputeNonbondedKernel.cu.

#define OVERALLOC   1.2f

Definition at line 11 of file CudaComputeNonbondedKernel.cu.

Referenced by CudaTileListKernel::buildTileLists(), CudaTileListKernel::reSortTileLists(), and CudaComputeNonbondedKernel::updateVdwTypesExcl().

#define REDUCEGBISENERGYKERNEL_NUM_WARP   32

Referenced by CudaComputeNonbondedKernel::reduceVirialEnergy().

#define REDUCENONBONDEDVIRIALKERNEL_NUM_WARP   32

Referenced by CudaComputeNonbondedKernel::reduceVirialEnergy().

#define REDUCEVIRIALENERGYKERNEL_NUM_WARP   32

Referenced by CudaComputeNonbondedKernel::reduceVirialEnergy().


Function Documentation

template<bool doEnergy, bool doVirial, bool doSlow, bool doPairlist, bool doStreaming>
__global__ void __launch_bounds__ ( WARPSIZE *  NONBONDKERNEL_NUM_WARP,
doPairlist?  (10):(doEnergy?(10):(10)) 
) const

template<bool doEnergy, bool doSlow>
__device__ __forceinline__ void calcForceEnergy ( const float  r2,
const float  qi,
const float  qj,
const float  dx,
const float  dy,
const float  dz,
const int  vdwtypei,
const int  vdwtypej,
const float2 *__restrict__  vdwCoefTable,
cudaTextureObject_t  vdwCoefTableTex,
cudaTextureObject_t  forceTableTex,
cudaTextureObject_t  energyTableTex,
float3 &  iforce,
float3 &  iforceSlow,
float3 &  jforce,
float3 &  jforceSlow,
float &  energyVdw,
float &  energyElec,
float &  energySlow 
)

Definition at line 22 of file CudaComputeNonbondedKernel.cu.

References __ldg, f, float2::x, and float2::y.

00028                                                           {
00029 
00030   int vdwIndex = vdwtypej + vdwtypei;
00031 #if __CUDA_ARCH__ >= 350
00032   float2 ljab = __ldg(&vdwCoefTable[vdwIndex]);
00033 #else
00034   float2 ljab = tex1D<float2>(vdwCoefTableTex, vdwIndex);
00035 #endif
00036 
00037   float rinv = rsqrtf(r2);
00038   float4 ei;
00039   float4 fi = tex1D<float4>(forceTableTex, rinv);
00040   if (doEnergy) ei = tex1D<float4>(energyTableTex, rinv);
00041 
00042   float fSlow = qi * qj;
00043   float f = ljab.x * fi.z + ljab.y * fi.y + fSlow * fi.x;
00044 
00045   if (doEnergy) {
00046     energyVdw  += ljab.x * ei.z + ljab.y * ei.y;
00047     energyElec += fSlow * ei.x;
00048     if (doSlow) energySlow += fSlow * ei.w;
00049   }
00050   if (doSlow) fSlow *= fi.w;
00051 
00052   float fx = dx * f;
00053   float fy = dy * f;
00054   float fz = dz * f;
00055   iforce.x += fx;
00056   iforce.y += fy;
00057   iforce.z += fz;
00058   jforce.x -= fx;
00059   jforce.y -= fy;
00060   jforce.z -= fz;
00061 
00062   if (doSlow) {
00063     float fxSlow = dx * fSlow;
00064     float fySlow = dy * fSlow;
00065     float fzSlow = dz * fSlow;
00066     iforceSlow.x += fxSlow;
00067     iforceSlow.y += fySlow;
00068     iforceSlow.z += fzSlow;
00069     jforceSlow.x -= fxSlow;
00070     jforceSlow.y -= fySlow;
00071     jforceSlow.z -= fzSlow;
00072   }
00073 }

cudaNonbondedTables ( cudaNonbondedTables   ) 

__device__ __forceinline__ float distsq ( const BoundingBox  a,
const float4  b 
)

Definition at line 143 of file CudaComputeNonbondedKernel.cu.

References f, BoundingBox::wx, BoundingBox::wy, BoundingBox::wz, BoundingBox::x, BoundingBox::y, and BoundingBox::z.

Referenced by buildTileListsBBKernel().

00143                                                                              {
00144   float dx = max(0.0f, fabsf(a.x - b.x) - a.wx);
00145   float dy = max(0.0f, fabsf(a.y - b.y) - a.wy);
00146   float dz = max(0.0f, fabsf(a.z - b.z) - a.wz);
00147   float r2 = dx*dx + dy*dy + dz*dz;
00148   return r2;
00149 }

doStreaming ( doStreaming   ) 

Definition at line 935 of file CudaComputeNonbondedKernel.cu.

References cudaCheck.

Referenced by ComputeCUDAMgr::createCudaComputeNonbonded().

00935                     : deviceID(deviceID), cudaNonbondedTables(cudaNonbondedTables), doStreaming(doStreaming) {
00936   
00937   cudaCheck(cudaSetDevice(deviceID));
00938 
00939   overflowExclusions = NULL;
00940   overflowExclusionsSize = 0;
00941 
00942   exclIndexMaxDiff = NULL;
00943   exclIndexMaxDiffSize = 0;
00944 
00945   atomIndex = NULL;
00946   atomIndexSize = 0;
00947 
00948   vdwTypes = NULL;
00949   vdwTypesSize = 0;
00950 
00951   patchNumCount = NULL;
00952   patchNumCountSize = 0;
00953 
00954   patchReadyQueue = NULL;
00955   patchReadyQueueSize = 0;
00956 
00957 }

void NAMD_die ( const char *   ) 

Definition at line 81 of file common.C.

00083 {
00084    if ( ! err_msg ) err_msg = "(unknown error)";
00085    char *new_err_msg = new char[strlen(err_msg) + 40];
00086    sprintf(new_err_msg,"FATAL ERROR: %s\n",err_msg);
00087    CkPrintf(new_err_msg);
00088    fflush(stdout);
00089    if ( CmiNumPartitions() > 1 ) {
00090      sprintf(new_err_msg,"REPLICA %d FATAL ERROR: %s\n", CmiMyPartition(), err_msg);
00091    }
00092    CmiAbort(new_err_msg);
00093    delete [] new_err_msg;
00094 }

template<bool doSlow>
__device__ __forceinline__ void shuffleNext ( float3 &  jforce,
float3 &  jforceSlow 
)

Definition at line 127 of file CudaComputeNonbondedKernel.cu.

References WARP_FULL_MASK, WARP_SHUFFLE, and WARPSIZE.

00127                                                      {
00128   jforce.x = WARP_SHUFFLE(WARP_FULL_MASK, jforce.x, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
00129   jforce.y = WARP_SHUFFLE(WARP_FULL_MASK, jforce.y, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
00130   jforce.z = WARP_SHUFFLE(WARP_FULL_MASK, jforce.z, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
00131   if (doSlow) {
00132     jforceSlow.x = WARP_SHUFFLE(WARP_FULL_MASK, jforceSlow.x, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
00133     jforceSlow.y = WARP_SHUFFLE(WARP_FULL_MASK, jforceSlow.y, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
00134     jforceSlow.z = WARP_SHUFFLE(WARP_FULL_MASK, jforceSlow.z, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
00135   }
00136 }

template<bool doPairlist>
__device__ __forceinline__ void shuffleNext ( float &  xyzq_j_w,
int &  vdwtypej,
int &  jatomIndex 
)

Definition at line 117 of file CudaComputeNonbondedKernel.cu.

References WARP_FULL_MASK, WARP_SHUFFLE, and WARPSIZE.

00117                                                                   {
00118   xyzq_j_w = WARP_SHUFFLE(WARP_FULL_MASK, xyzq_j_w, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
00119   vdwtypej = WARP_SHUFFLE(WARP_FULL_MASK, vdwtypej, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
00120   if (doPairlist) {
00121     jatomIndex   = WARP_SHUFFLE(WARP_FULL_MASK, jatomIndex, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);    
00122   }
00123 }

template<bool doPairlist>
__device__ __forceinline__ void shuffleNext ( float &  xyzq_j_w,
int &  vdwtypej,
int &  jatomIndex,
int &  jexclMaxdiff,
int &  jexclIndex 
)

Definition at line 105 of file CudaComputeNonbondedKernel.cu.

References WARP_FULL_MASK, WARP_SHUFFLE, and WARPSIZE.

00105                                                                                                       {
00106   xyzq_j_w = WARP_SHUFFLE(WARP_FULL_MASK, xyzq_j_w, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
00107   vdwtypej = WARP_SHUFFLE(WARP_FULL_MASK, vdwtypej, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
00108   if (doPairlist) {
00109     jatomIndex   = WARP_SHUFFLE(WARP_FULL_MASK, jatomIndex, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);    
00110     jexclIndex   = WARP_SHUFFLE(WARP_FULL_MASK, jexclIndex, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
00111     jexclMaxdiff = WARP_SHUFFLE(WARP_FULL_MASK, jexclMaxdiff, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
00112   }
00113 }

template<bool doSlow>
__device__ __forceinline__ void storeForces ( const int  pos,
const float3  force,
const float3  forceSlow,
float3 *__restrict__  forces,
float3 *__restrict__  forcesSlow 
)

Definition at line 91 of file CudaComputeNonbondedKernel.cu.

References x, y, and z.

00092                                                                 {
00093   atomicAdd(&forces[pos].x, force.x);
00094   atomicAdd(&forces[pos].y, force.y);
00095   atomicAdd(&forces[pos].z, force.z);
00096   if (doSlow) {
00097     atomicAdd(&forcesSlow[pos].x, forceSlow.x);
00098     atomicAdd(&forcesSlow[pos].y, forceSlow.y);
00099     atomicAdd(&forcesSlow[pos].z, forceSlow.z);
00100   }
00101 }

template<bool doSlow>
__device__ __forceinline__ void storeForces ( const int  pos,
const float3  force,
const float3  forceSlow,
float4 *__restrict__  devForces,
float4 *__restrict__  devForcesSlow 
)

Definition at line 77 of file CudaComputeNonbondedKernel.cu.

References x, y, and z.

00078                                                                       {
00079   atomicAdd(&devForces[pos].x, force.x);
00080   atomicAdd(&devForces[pos].y, force.y);
00081   atomicAdd(&devForces[pos].z, force.z);
00082   if (doSlow) {
00083     atomicAdd(&devForcesSlow[pos].x, forceSlow.x);
00084     atomicAdd(&devForcesSlow[pos].y, forceSlow.y);
00085     atomicAdd(&devForcesSlow[pos].z, forceSlow.z);
00086   }
00087 }


Variable Documentation

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ atomIndex

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

Referenced by HomePatch::doAtomMigration().

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int atomStorageSize

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

Referenced by doStreaming().

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ boundingBoxes

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

Referenced by doStreaming().

__constant__ unsigned int constExclusions[MAX_CONST_EXCLUSIONS]

Definition at line 16 of file CudaComputeNonbondedKernel.cu.

Referenced by CudaComputeNonbondedKernel::bindExclusions().

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ cudaPatches

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

Referenced by doStreaming().

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cutoff2

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ devForces

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ devForcesSlow

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__thread DeviceCUDA* deviceCUDA

Definition at line 18 of file DeviceCUDA.C.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t energyTableTex

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ exclIndexMaxDiff

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t forceTableTex

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ jtiles

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

Referenced by doStreaming().

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 lata

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 latb

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 latc

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ float4* __restrict__ mapForces

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ float4* __restrict__ float4* __restrict__ mapForcesSlow

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ float4* __restrict__ float4* __restrict__ int* __restrict__ mapPatchReadyQueue

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ const int numPatches

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

Referenced by ParallelIOMgr::calcAtomsInEachPatch(), ComputeGlobal::ComputeGlobal(), Controller::Controller(), WorkDistrib::createAtomLists(), WorkDistrib::createHomePatches(), ProxyMgr::createProxies(), NodeProxyMgr::createProxyInfo(), doStreaming(), dumpbench(), ComputePmeMgr::initialize(), OptPmeMgr::initialize_pencils(), ComputePmeMgr::initialize_pencils(), NodeProxyMgr::NodeProxyMgr(), Node::outputPatchComputeMaps(), WorkDistrib::patchMapInit(), PatchProxyListMsg::PatchProxyListMsg(), WorkDistrib::reinitAtoms(), WorkDistrib::savePatchMap(), WorkDistrib::sendPatchMap(), Node::startup(), NamdCentLB::Strategy(), and NodeProxyMgr::~NodeProxyMgr().

__global__ void const int numTileLists

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ float4* __restrict__ float4* __restrict__ int* __restrict__ int* __restrict__ outputOrder

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

Referenced by doStreaming().

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ overflowExclusions

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ const int unsigned int* __restrict__ patchNumCount

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ patchPairs

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float plcutoff2

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ tileExcls

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ tileJatomStart

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ tileListDepth

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ tileListOrder

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ tileLists

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ tileListStat

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ vdwCoefTable

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t vdwCoefTableTex

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int vdwCoefTableWidth

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ vdwTypes

Definition at line 161 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ xyzq

Definition at line 161 of file CudaComputeNonbondedKernel.cu.


Generated on Sun Nov 19 01:17:16 2017 for NAMD by  doxygen 1.4.7