NAMD
Public Member Functions | List of all members
CudaComputeGBISKernel Class Reference

#include <CudaComputeGBISKernel.h>

Public Member Functions

 CudaComputeGBISKernel (int deviceID)
 
 ~CudaComputeGBISKernel ()
 
void updateIntRad (const int atomStorageSize, float *intRad0H, float *intRadSH, cudaStream_t stream)
 
void updateBornRad (const int atomStorageSize, float *bornRadH, cudaStream_t stream)
 
void update_dHdrPrefix (const int atomStorageSize, float *dHdrPrefixH, cudaStream_t stream)
 
void GBISphase1 (CudaTileListKernel &tlKernel, const int atomStorageSize, const float3 lata, const float3 latb, const float3 latc, const float a_cut, float *h_psiSum, cudaStream_t stream)
 
void GBISphase2 (CudaTileListKernel &tlKernel, const int atomStorageSize, const bool doEnergy, const bool doSlow, const float3 lata, const float3 latb, const float3 latc, const float r_cut, const float scaling, const float kappa, const float smoothDist, const float epsilon_p, const float epsilon_s, float4 *d_forces, float *h_dEdaSum, cudaStream_t stream)
 
void GBISphase3 (CudaTileListKernel &tlKernel, const int atomStorageSize, const float3 lata, const float3 latb, const float3 latc, const float a_cut, float4 *d_forces, cudaStream_t stream)
 

Detailed Description

Definition at line 5 of file CudaComputeGBISKernel.h.

Constructor & Destructor Documentation

CudaComputeGBISKernel::CudaComputeGBISKernel ( int  deviceID)

Definition at line 435 of file CudaComputeGBISKernel.cu.

References cudaCheck.

435  : deviceID(deviceID) {
436  cudaCheck(cudaSetDevice(deviceID));
437 
438  intRad0 = NULL;
439  intRad0Size = 0;
440 
441  intRadS = NULL;
442  intRadSSize = 0;
443 
444  psiSum = NULL;
445  psiSumSize = 0;
446 
447  bornRad = NULL;
448  bornRadSize = 0;
449 
450  dEdaSum = NULL;
451  dEdaSumSize = 0;
452 
453  dHdrPrefix = NULL;
454  dHdrPrefixSize = 0;
455 
456 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
CudaComputeGBISKernel::~CudaComputeGBISKernel ( )

Definition at line 461 of file CudaComputeGBISKernel.cu.

References cudaCheck.

461  {
462  cudaCheck(cudaSetDevice(deviceID));
463  if (intRad0 != NULL) deallocate_device<float>(&intRad0);
464  if (intRadS != NULL) deallocate_device<float>(&intRadS);
465  if (psiSum != NULL) deallocate_device<float>(&psiSum);
466  if (bornRad != NULL) deallocate_device<float>(&bornRad);
467  if (dEdaSum != NULL) deallocate_device<float>(&dEdaSum);
468  if (dHdrPrefix != NULL) deallocate_device<float>(&dHdrPrefix);
469 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:95

Member Function Documentation

void CudaComputeGBISKernel::GBISphase1 ( CudaTileListKernel tlKernel,
const int  atomStorageSize,
const float3  lata,
const float3  latb,
const float3  latc,
const float  a_cut,
float *  h_psiSum,
cudaStream_t  stream 
)

Definition at line 503 of file CudaComputeGBISKernel.cu.

References GBISParam< 1 >::a_cut, cudaCheck, cutoff2, deviceCUDA, FS_MAX, GBISKERNEL_NUM_WARP, CudaTileListKernel::get_xyzq(), DeviceCUDA::getMaxNumBlocks(), CudaTileListKernel::getNumTileListsGBIS(), CudaTileListKernel::getPatchPairs(), CudaTileListKernel::getTileJatomStartGBIS(), CudaTileListKernel::getTileListsGBIS(), lata, latb, latc, stream, and WARPSIZE.

505  {
506 
507  reallocate_device<float>(&psiSum, &psiSumSize, atomStorageSize, 1.2f);
508  clear_device_array<float>(psiSum, atomStorageSize, stream);
509 
510  int nwarp = GBISKERNEL_NUM_WARP;
511  int nthread = WARPSIZE*nwarp;
512  int nblock = min(deviceCUDA->getMaxNumBlocks(), (tlKernel.getNumTileListsGBIS()-1)/nwarp+1);
513 
514  GBISParam<1> param;
515  param.a_cut = a_cut;
516 
517  float cutoff2 = (a_cut + FS_MAX)*(a_cut + FS_MAX);
518  GBIS_Kernel<false, false, 1> <<< nblock, nthread, 0, stream >>> (
519  tlKernel.getNumTileListsGBIS(), tlKernel.getTileListsGBIS(), tlKernel.getTileJatomStartGBIS(),
520  tlKernel.getPatchPairs(), lata, latb, latc, tlKernel.get_xyzq(), cutoff2,
521  param, intRad0, intRadS, NULL, psiSum, NULL, NULL);
522 
523  cudaCheck(cudaGetLastError());
524 
525  copy_DtoH<float>(psiSum, h_psiSum, atomStorageSize, stream);
526 }
PatchPairRecord * getPatchPairs()
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 lata
#define WARPSIZE
Definition: CudaUtils.h:10
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 latb
__thread cudaStream_t stream
TileList * getTileListsGBIS()
int getMaxNumBlocks()
Definition: DeviceCUDA.C:419
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cutoff2
#define FS_MAX
Definition: ComputeGBIS.inl:24
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:22
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 latc
#define GBISKERNEL_NUM_WARP
void CudaComputeGBISKernel::GBISphase2 ( CudaTileListKernel tlKernel,
const int  atomStorageSize,
const bool  doEnergy,
const bool  doSlow,
const float3  lata,
const float3  latb,
const float3  latc,
const float  r_cut,
const float  scaling,
const float  kappa,
const float  smoothDist,
const float  epsilon_p,
const float  epsilon_s,
float4 *  d_forces,
float *  h_dEdaSum,
cudaStream_t  stream 
)

Definition at line 531 of file CudaComputeGBISKernel.cu.

References CALL, cudaCheck, deviceCUDA, GBISParam< 2 >::epsilon_p_i, GBISParam< 2 >::epsilon_s_i, GBISKERNEL_NUM_WARP, DeviceCUDA::getMaxNumBlocks(), CudaTileListKernel::getNumTileListsGBIS(), GBISParam< 2 >::kappa, GBISParam< 2 >::r_cut2, GBISParam< 2 >::r_cut_2, GBISParam< 2 >::r_cut_4, GBISParam< 2 >::scaling, CudaTileListKernel::setTileListVirialEnergyGBISLength(), GBISParam< 2 >::smoothDist, stream, and WARPSIZE.

536  {
537 
538  reallocate_device<float>(&dEdaSum, &dEdaSumSize, atomStorageSize, 1.2f);
539  clear_device_array<float>(dEdaSum, atomStorageSize, stream);
540 
541  if (doEnergy) {
543  }
544 
545  int nwarp = GBISKERNEL_NUM_WARP;
546  int nthread = WARPSIZE*nwarp;
547 
548  int nblock = min(deviceCUDA->getMaxNumBlocks(), (tlKernel.getNumTileListsGBIS()-1)/nwarp+1);
549 
550  GBISParam<2> param;
551  param.r_cut2 = r_cut*r_cut;
552  param.r_cut_2 = 1.f / param.r_cut2;
553  param.r_cut_4 = 4.f*param.r_cut_2*param.r_cut_2;
554  param.epsilon_s_i = 1.f / epsilon_s;
555  param.epsilon_p_i = 1.f / epsilon_p;
556  param.scaling = scaling;
557  param.kappa = kappa;
558  param.smoothDist = smoothDist;
559 
560 #define CALL(DOENERGY, DOSLOW) \
561  GBIS_Kernel<DOENERGY, DOSLOW, 2> <<< nblock, nthread, 0, stream >>> (\
562  tlKernel.getNumTileListsGBIS(), tlKernel.getTileListsGBIS(), tlKernel.getTileJatomStartGBIS(), \
563  tlKernel.getPatchPairs(), lata, latb, latc, tlKernel.get_xyzq(), param.r_cut2, \
564  param, bornRad, NULL, NULL, dEdaSum, d_forces, tlKernel.getTileListVirialEnergy())
565 
566  if (!doEnergy && !doSlow) CALL(false, false);
567  if (!doEnergy && doSlow) CALL(false, true);
568  if ( doEnergy && !doSlow) CALL(true, false);
569  if ( doEnergy && doSlow) CALL(true, true);
570 
571  cudaCheck(cudaGetLastError());
572 
573  copy_DtoH<float>(dEdaSum, h_dEdaSum, atomStorageSize, stream);
574 }
void setTileListVirialEnergyGBISLength(int len)
#define WARPSIZE
Definition: CudaUtils.h:10
__thread cudaStream_t stream
int getMaxNumBlocks()
Definition: DeviceCUDA.C:419
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:22
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
#define CALL(DOENERGY, DOVIRIAL)
#define GBISKERNEL_NUM_WARP
void CudaComputeGBISKernel::GBISphase3 ( CudaTileListKernel tlKernel,
const int  atomStorageSize,
const float3  lata,
const float3  latb,
const float3  latc,
const float  a_cut,
float4 *  d_forces,
cudaStream_t  stream 
)

Definition at line 579 of file CudaComputeGBISKernel.cu.

References GBISParam< 3 >::a_cut, cudaCheck, cutoff2, deviceCUDA, FS_MAX, GBISKERNEL_NUM_WARP, CudaTileListKernel::get_xyzq(), DeviceCUDA::getMaxNumBlocks(), CudaTileListKernel::getNumTileListsGBIS(), CudaTileListKernel::getPatchPairs(), CudaTileListKernel::getTileJatomStartGBIS(), CudaTileListKernel::getTileListsGBIS(), lata, latb, latc, and WARPSIZE.

581  {
582 
583  int nwarp = GBISKERNEL_NUM_WARP;
584  int nthread = WARPSIZE*nwarp;
585  int nblock = min(deviceCUDA->getMaxNumBlocks(), (tlKernel.getNumTileListsGBIS()-1)/nwarp+1);
586 
587  GBISParam<3> param;
588  param.a_cut = a_cut;
589 
590  float cutoff2 = (a_cut + FS_MAX)*(a_cut + FS_MAX);
591  GBIS_Kernel<false, false, 3> <<< nblock, nthread, 0, stream >>> (
592  tlKernel.getNumTileListsGBIS(), tlKernel.getTileListsGBIS(), tlKernel.getTileJatomStartGBIS(),
593  tlKernel.getPatchPairs(), lata, latb, latc, tlKernel.get_xyzq(), cutoff2,
594  param, intRad0, intRadS, dHdrPrefix, NULL, d_forces, NULL);
595 
596  cudaCheck(cudaGetLastError());
597 }
PatchPairRecord * getPatchPairs()
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 lata
#define WARPSIZE
Definition: CudaUtils.h:10
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 latb
__thread cudaStream_t stream
TileList * getTileListsGBIS()
int getMaxNumBlocks()
Definition: DeviceCUDA.C:419
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cutoff2
#define FS_MAX
Definition: ComputeGBIS.inl:24
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:22
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 latc
#define GBISKERNEL_NUM_WARP
void CudaComputeGBISKernel::update_dHdrPrefix ( const int  atomStorageSize,
float *  dHdrPrefixH,
cudaStream_t  stream 
)

Definition at line 495 of file CudaComputeGBISKernel.cu.

References dHdrPrefixH, and stream.

495  {
496  reallocate_device<float>(&dHdrPrefix, &dHdrPrefixSize, atomStorageSize, 1.2f);
497  copy_HtoD<float>(dHdrPrefixH, dHdrPrefix, atomStorageSize, stream);
498 }
static __thread float * dHdrPrefixH
__thread cudaStream_t stream
void CudaComputeGBISKernel::updateBornRad ( const int  atomStorageSize,
float *  bornRadH,
cudaStream_t  stream 
)

Definition at line 487 of file CudaComputeGBISKernel.cu.

References bornRadH, and stream.

487  {
488  reallocate_device<float>(&bornRad, &bornRadSize, atomStorageSize, 1.2f);
489  copy_HtoD<float>(bornRadH, bornRad, atomStorageSize, stream);
490 }
static __thread float * bornRadH
__thread cudaStream_t stream
void CudaComputeGBISKernel::updateIntRad ( const int  atomStorageSize,
float *  intRad0H,
float *  intRadSH,
cudaStream_t  stream 
)

Definition at line 474 of file CudaComputeGBISKernel.cu.

References intRad0H, intRadSH, and stream.

475  {
476 
477  reallocate_device<float>(&intRad0, &intRad0Size, atomStorageSize, 1.2f);
478  reallocate_device<float>(&intRadS, &intRadSSize, atomStorageSize, 1.2f);
479 
480  copy_HtoD<float>(intRad0H, intRad0, atomStorageSize, stream);
481  copy_HtoD<float>(intRadSH, intRadS, atomStorageSize, stream);
482 }
__thread cudaStream_t stream
static __thread float * intRadSH
static __thread float * intRad0H

The documentation for this class was generated from the following files: