NAMD
Classes | Macros | Typedefs | Functions
CudaUtils.h File Reference
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdio.h>
#include "HipDefines.h"
#include <iostream>

Go to the source code of this file.

Classes

struct  cudaTensor
 
struct  cudaVector
 
struct  CudaMInfo
 
struct  CudaStaticAssert< bool >
 
struct  CudaStaticAssert< true >
 
struct  CudaNBConstants
 

Macros

#define WARPSIZE   32
 
#define BOUNDINGBOXSIZE   32
 
#define NAMD_CCCL_MAJOR_VERSION   2
 
#define FORCE_ENERGY_TABLE_SIZE   4096
 
#define COPY_CUDATENSOR(S, D)
 
#define COPY_CUDAVECTOR(S, D)
 
#define PRINT_CUDATENSOR(T, SS)
 
#define ATOMIC_BINS   1
 
#define FEP_BONDED_CUDA_DEBUG
 
#define WARP_FULL_MASK   0xffffffff
 
#define WARP_SHUFFLE_XOR(MASK, VAR, LANE, SIZE)   __shfl_xor(VAR, LANE, SIZE)
 
#define WARP_SHUFFLE_UP(MASK, VAR, DELTA, SIZE)   __shfl_up(VAR, DELTA, SIZE)
 
#define WARP_SHUFFLE_DOWN(MASK, VAR, DELTA, SIZE)   __shfl_down(VAR, DELTA, SIZE)
 
#define WARP_SHUFFLE(MASK, VAR, LANE, SIZE)   __shfl(VAR, LANE, SIZE)
 
#define WARP_ALL(MASK, P)   __all(P)
 
#define WARP_ANY(MASK, P)   __any(P)
 
#define WARP_BALLOT(MASK, P)   __ballot(P)
 
#define WARP_SYNC(MASK)
 
#define BLOCK_SYNC   __syncthreads()
 
#define NAMD_WARP_SYNC(MASK)   WARP_SYNC(MASK)
 
#define cuda_static_assert(expr)   (CudaStaticAssert<(expr) != 0>())
 
#define cudaCheck(stmt)
 
#define curandCheck(stmt)
 

Typedefs

typedef unsigned int WarpMask
 
typedef double BigReal
 

Functions

void cudaDie (const char *msg, cudaError_t err=cudaSuccess)
 
void curandDie (const char *msg, int err=0)
 
void cudaNAMD_bug (const char *msg)
 
void clear_device_array_async_T (void *data, const size_t ndata, cudaStream_t stream, const size_t sizeofT)
 
void clear_device_array_T (void *data, const size_t ndata, const size_t sizeofT)
 
template<class T >
void clear_device_array (T *data, const size_t ndata, cudaStream_t stream=0)
 
template<class T >
void clear_device_array_sync (T *data, const size_t ndata)
 
void allocate_host_T (void **pp, const size_t len, const size_t sizeofT)
 
template<class T >
void allocate_host (T **pp, const size_t len)
 
void allocate_device_T (void **pp, const size_t len, const size_t sizeofT)
 
void allocate_device_T_managed (void **pp, const size_t len, const size_t sizeofT)
 
void allocate_device_T_async (void **pp, const size_t len, const size_t sizeofT, cudaStream_t stream)
 
template<class T >
void allocate_device (T **pp, const size_t len)
 
template<class T >
void allocate_device_managed (T **pp, const size_t len)
 
template<class T >
void allocate_device_async (T **pp, const size_t len, cudaStream_t stream)
 
void deallocate_device_T (void **pp)
 
void deallocate_device_T_async (void **pp, cudaStream_t stream)
 
template<class T >
void deallocate_device (T **pp)
 
template<class T >
void deallocate_device_async (T **pp, cudaStream_t stream)
 
bool reallocate_device_T (void **pp, size_t *curlen, const size_t newlen, const float fac, const size_t sizeofT)
 
template<class T >
bool reallocate_device (T **pp, size_t *curlen, const size_t newlen, const float fac=1.0f)
 
bool reallocate_host_T (void **pp, size_t *curlen, const size_t newlen, const float fac, const unsigned int flag, const size_t sizeofT)
 
template<class T >
bool reallocate_host (T **pp, size_t *curlen, const size_t newlen, const float fac=1.0f, const unsigned int flag=cudaHostAllocDefault)
 
void deallocate_host_T (void **pp)
 
template<class T >
void deallocate_host (T **pp)
 
void copy_HtoD_async_T (const void *h_array, void *d_array, size_t array_len, cudaStream_t stream, const size_t sizeofT)
 
void copy_HtoD_T (const void *h_array, void *d_array, size_t array_len, const size_t sizeofT)
 
void copy_DtoH_async_T (const void *d_array, void *h_array, const size_t array_len, cudaStream_t stream, const size_t sizeofT)
 
void copy_DtoH_T (const void *d_array, void *h_array, const size_t array_len, const size_t sizeofT)
 
void copy_DtoD_async_T (const void *d_src, void *d_dst, const size_t array_len, cudaStream_t stream, const size_t sizeofT)
 
void copy_DtoD_T (const void *d_src, void *d_dst, const size_t array_len, const size_t sizeofT)
 
template<class T >
void copy_HtoD (const T *h_array, T *d_array, size_t array_len, cudaStream_t stream=0)
 
template<class T >
void copy_HtoD_sync (const T *h_array, T *d_array, size_t array_len)
 
template<class T >
void copy_DtoH (const T *d_array, T *h_array, const size_t array_len, cudaStream_t stream=0)
 
template<class T >
void copy_DtoH_sync (const T *d_array, T *h_array, const size_t array_len)
 
template<class T >
void copy_DtoD (const T *d_src, T *h_dst, const size_t array_len, cudaStream_t stream=0)
 
template<class T >
void copy_DtoD_sync (const T *d_src, T *h_dst, const size_t array_len)
 
void copy_PeerDtoD_async_T (const int src_dev, const int dst_dev, const void *d_src, void *d_dst, const size_t array_len, cudaStream_t stream, const size_t sizeofT)
 
template<class T >
void copy_PeerDtoD (const int src_dev, const int dst_dev, const T *d_src, T *d_dst, const size_t array_len, cudaStream_t stream=0)
 
void copy3D_HtoD_T (void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream)
 
template<class T >
void copy3D_HtoD (T *src_data, T *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, cudaStream_t stream=0)
 
void copy3D_DtoH_T (void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream)
 
template<class T >
void copy3D_DtoH (T *src_data, T *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, cudaStream_t stream=0)
 
void copy3D_DtoD_T (void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream)
 
template<class T >
void copy3D_DtoD (T *src_data, T *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, cudaStream_t stream=0)
 
void copy3D_PeerDtoD_T (int src_dev, int dst_dev, void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream)
 
template<class T >
void copy3D_PeerDtoD (int src_dev, int dst_dev, T *src_data, T *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, cudaStream_t stream=0)
 

Macro Definition Documentation

◆ ATOMIC_BINS

#define ATOMIC_BINS   1

Definition at line 79 of file CudaUtils.h.

Referenced by CudaComputeNonbonded::initialize().

◆ BLOCK_SYNC

#define BLOCK_SYNC   __syncthreads()

Definition at line 201 of file CudaUtils.h.

Referenced by GBIS_P1_Kernel(), GBIS_P2_Kernel(), and GBIS_P3_Kernel().

◆ BOUNDINGBOXSIZE

#define BOUNDINGBOXSIZE   32

Definition at line 18 of file CudaUtils.h.

◆ COPY_CUDATENSOR

#define COPY_CUDATENSOR (   S,
 
)
Value:
D.xx = S.xx; \
D.xy = S.xy; \
D.xz = S.xz; \
D.yx = S.yx; \
D.yy = S.yy; \
D.yz = S.yz; \
D.zx = S.zx; \
D.zy = S.zy; \
D.zz = S.zz

Definition at line 51 of file CudaUtils.h.

◆ COPY_CUDAVECTOR

#define COPY_CUDAVECTOR (   S,
 
)
Value:
D.x = S.x; \
D.y = S.y; \
D.z = S.z

Definition at line 62 of file CudaUtils.h.

◆ cuda_static_assert

#define cuda_static_assert (   expr)    (CudaStaticAssert<(expr) != 0>())

Definition at line 231 of file CudaUtils.h.

◆ cudaCheck

#define cudaCheck (   stmt)
Value:
do { \
cudaError_t err = stmt; \
if (err != cudaSuccess) { \
char msg[256]; \
sprintf(msg, "%s in file %s, function %s, line %d\n", #stmt,__FILE__,__FUNCTION__,__LINE__); \
cudaDie(msg, err); \
} \
} while(0)

Definition at line 242 of file CudaUtils.h.

Referenced by allocate_device_T(), allocate_device_T_async(), allocate_device_T_managed(), allocate_host_T(), CudaFFTCompute::backward(), bindTextureObject(), clear_device_array_async_T(), clear_device_array_T(), CudaPmeOneDevice::compute(), copy3D_DtoD_T(), copy3D_DtoH_T(), copy3D_HtoD_T(), copy3D_PeerDtoD_T(), copy_DtoD_async_T(), copy_DtoD_T(), copy_DtoH_async_T(), copy_DtoH_T(), copy_HtoD_async_T(), copy_HtoD_T(), copy_PeerDtoD_async_T(), CudaPmeRealSpaceCompute::copyAtoms(), CudaPmeTranspose::copyDataDeviceToDevice(), CudaPmeTranspose::copyDataDeviceToHost(), CudaPmeTranspose::copyDataHostToDevice(), createStream(), CudaComputeNonbonded::CudaComputeNonbonded(), CudaNonbondedTables::CudaNonbondedTables(), CudaPmeKSpaceCompute::CudaPmeKSpaceCompute(), CudaPmeOneDevice::CudaPmeOneDevice(), CudaPmeRealSpaceCompute::CudaPmeRealSpaceCompute(), CudaPmeTranspose::CudaPmeTranspose(), deallocate_device_T(), deallocate_device_T_async(), deallocate_host_T(), CudaPmeKSpaceCompute::energyAndVirialSetCallback(), CudaComputeNonbonded::finishPatches(), CudaPmeOneDevice::finishReduction(), CudaComputeNonbonded::finishReductions(), CudaFFTCompute::forward(), CudaPmeRealSpaceCompute::gatherForce(), CudaPmeRealSpaceCompute::gatherForceSetCallback(), DeviceCUDA::getMaxNumBlocks(), DeviceCUDA::getMaxNumThreads(), ComputeCUDAMgr::initialize(), DeviceCUDA::initialize(), CudaComputeNonbonded::initialize(), ComputePmeCUDADevice::initialize(), ComputePmeCUDAMgr::initialize_pencils(), CudaPmePencilXY::initializeDevice(), CudaPmePencilX::initializeDevice(), CudaPmePencilY::initializeDevice(), CudaPmePencilZ::initializeDevice(), CudaComputeNonbonded::launchWork(), reallocate_device_T(), reallocate_host_T(), CudaComputeNonbonded::reSortTileLists(), CudaPmeTranspose::setDataPtrsYZX(), CudaPmeTranspose::setDataPtrsZXY(), CudaPmeKSpaceCompute::solve(), CudaPmeRealSpaceCompute::spreadCharge(), CudaPmeTranspose::transposeXYZtoYZX(), CudaPmeTranspose::transposeXYZtoZXY(), ComputeLonepairsCUDA::updateAtoms(), CudaPmeRealSpaceCompute::waitGatherForceDone(), CudaPmeTranspose::waitStreamSynchronize(), writeComplexToDisk(), writeRealToDisk(), ComputePmeCUDADevice::~ComputePmeCUDADevice(), ComputePmeCUDAMgr::~ComputePmeCUDAMgr(), CudaComputeNonbonded::~CudaComputeNonbonded(), CudaFFTCompute::~CudaFFTCompute(), CudaNonbondedTables::~CudaNonbondedTables(), CudaPmeKSpaceCompute::~CudaPmeKSpaceCompute(), CudaPmeOneDevice::~CudaPmeOneDevice(), CudaPmePencilX::~CudaPmePencilX(), CudaPmePencilXY::~CudaPmePencilXY(), CudaPmePencilY::~CudaPmePencilY(), CudaPmePencilZ::~CudaPmePencilZ(), CudaPmeRealSpaceCompute::~CudaPmeRealSpaceCompute(), and CudaPmeTranspose::~CudaPmeTranspose().

◆ curandCheck

#define curandCheck (   stmt)
Value:
do { \
curandStatus_t err = stmt; \
if (err != CURAND_STATUS_SUCCESS) { \
char msg[256]; \
sprintf(msg, "%s in file %s, function %s, line %d\n", #stmt,__FILE__,__FUNCTION__,__LINE__); \
curandDie(msg, (int)err); \
} \
} while(0)

Definition at line 251 of file CudaUtils.h.

◆ FEP_BONDED_CUDA_DEBUG

#define FEP_BONDED_CUDA_DEBUG

Definition at line 136 of file CudaUtils.h.

◆ FORCE_ENERGY_TABLE_SIZE

#define FORCE_ENERGY_TABLE_SIZE   4096

Definition at line 49 of file CudaUtils.h.

Referenced by CudaNonbondedTables::CudaNonbondedTables().

◆ NAMD_CCCL_MAJOR_VERSION

#define NAMD_CCCL_MAJOR_VERSION   2

Definition at line 23 of file CudaUtils.h.

◆ NAMD_WARP_SYNC

#define NAMD_WARP_SYNC (   MASK)    WARP_SYNC(MASK)

Definition at line 206 of file CudaUtils.h.

◆ PRINT_CUDATENSOR

#define PRINT_CUDATENSOR (   T,
  SS 
)
Value:
SS << T.xx << " " << T.xy << " " << T.xz << " " << T.yx << " " << \
T.yy << " " << T.yz << " " << T.zx << " " << T.zy << " " << T.zz << \
std::endl;

Definition at line 67 of file CudaUtils.h.

◆ WARP_ALL

#define WARP_ALL (   MASK,
 
)    __all(P)

Definition at line 197 of file CudaUtils.h.

◆ WARP_ANY

#define WARP_ANY (   MASK,
 
)    __any(P)

Definition at line 198 of file CudaUtils.h.

◆ WARP_BALLOT

#define WARP_BALLOT (   MASK,
 
)    __ballot(P)

Definition at line 199 of file CudaUtils.h.

◆ WARP_FULL_MASK

#define WARP_FULL_MASK   0xffffffff

Definition at line 158 of file CudaUtils.h.

Referenced by GBIS_P1_Kernel(), GBIS_P2_Kernel(), and GBIS_P3_Kernel().

◆ WARP_SHUFFLE

#define WARP_SHUFFLE (   MASK,
  VAR,
  LANE,
  SIZE 
)    __shfl(VAR, LANE, SIZE)

Definition at line 187 of file CudaUtils.h.

Referenced by GBIS_P1_Kernel(), GBIS_P2_Kernel(), and GBIS_P3_Kernel().

◆ WARP_SHUFFLE_DOWN

#define WARP_SHUFFLE_DOWN (   MASK,
  VAR,
  DELTA,
  SIZE 
)    __shfl_down(VAR, DELTA, SIZE)

Definition at line 185 of file CudaUtils.h.

◆ WARP_SHUFFLE_UP

#define WARP_SHUFFLE_UP (   MASK,
  VAR,
  DELTA,
  SIZE 
)    __shfl_up(VAR, DELTA, SIZE)

Definition at line 183 of file CudaUtils.h.

◆ WARP_SHUFFLE_XOR

#define WARP_SHUFFLE_XOR (   MASK,
  VAR,
  LANE,
  SIZE 
)    __shfl_xor(VAR, LANE, SIZE)

Definition at line 181 of file CudaUtils.h.

◆ WARP_SYNC

#define WARP_SYNC (   MASK)

Definition at line 200 of file CudaUtils.h.

◆ WARPSIZE

#define WARPSIZE   32

Typedef Documentation

◆ BigReal

typedef double BigReal

Definition at line 75 of file CudaUtils.h.

◆ WarpMask

typedef unsigned int WarpMask

Definition at line 19 of file CudaUtils.h.

Function Documentation

◆ allocate_device()

template<class T >
void allocate_device ( T **  pp,
const size_t  len 
)

Definition at line 320 of file CudaUtils.h.

References allocate_device_T().

320  {
321  allocate_device_T((void **)pp, len, sizeof(T));
322 }
void allocate_device_T(void **pp, const size_t len, const size_t sizeofT)
Definition: CudaUtils.C:97

◆ allocate_device_async()

template<class T >
void allocate_device_async ( T **  pp,
const size_t  len,
cudaStream_t  stream 
)

Definition at line 330 of file CudaUtils.h.

References allocate_device_T_async().

Referenced by ComputeLonepairsCUDA::updateAtoms().

330  {
331  allocate_device_T_async((void **)pp, len, sizeof(T), stream);
332 }
void allocate_device_T_async(void **pp, const size_t len, const size_t sizeofT, cudaStream_t stream)
Definition: CudaUtils.C:105

◆ allocate_device_managed()

template<class T >
void allocate_device_managed ( T **  pp,
const size_t  len 
)

Definition at line 325 of file CudaUtils.h.

References allocate_device_T_managed().

325  {
326  allocate_device_T_managed((void **)pp, len, sizeof(T));
327 }
void allocate_device_T_managed(void **pp, const size_t len, const size_t sizeofT)
Definition: CudaUtils.C:101

◆ allocate_device_T()

void allocate_device_T ( void **  pp,
const size_t  len,
const size_t  sizeofT 
)

Definition at line 97 of file CudaUtils.C.

References cudaCheck.

Referenced by allocate_device(), allocate_device_T_async(), and bindTextureObject().

97  {
98  cudaCheck(cudaMalloc(pp, sizeofT*len));
99 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ allocate_device_T_async()

void allocate_device_T_async ( void **  pp,
const size_t  len,
const size_t  sizeofT,
cudaStream_t  stream 
)

Definition at line 105 of file CudaUtils.C.

References allocate_device_T(), and cudaCheck.

Referenced by allocate_device_async().

105  {
106 #if (CUDART_VERSION >= 11020)
107  cudaCheck(cudaMallocAsync(pp, sizeofT*len, stream));
108 #else
109  allocate_device_T(pp, len, sizeofT);
110 #endif
111 }
void allocate_device_T(void **pp, const size_t len, const size_t sizeofT)
Definition: CudaUtils.C:97
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ allocate_device_T_managed()

void allocate_device_T_managed ( void **  pp,
const size_t  len,
const size_t  sizeofT 
)

Definition at line 101 of file CudaUtils.C.

References cudaCheck.

Referenced by allocate_device_managed().

101  {
102  cudaCheck(cudaMallocManaged(pp, sizeofT*len));
103 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ allocate_host()

template<class T >
void allocate_host ( T **  pp,
const size_t  len 
)

Definition at line 305 of file CudaUtils.h.

References allocate_host_T().

305  {
306  allocate_host_T((void **)pp, len, sizeof(T));
307 }
void allocate_host_T(void **pp, const size_t len, const size_t sizeofT)
Definition: CudaUtils.C:87

◆ allocate_host_T()

void allocate_host_T ( void **  pp,
const size_t  len,
const size_t  sizeofT 
)

Definition at line 87 of file CudaUtils.C.

References cudaCheck.

Referenced by allocate_host().

87  {
88  cudaCheck(cudaMallocHost(pp, sizeofT*len));
89 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ clear_device_array()

template<class T >
void clear_device_array ( T *  data,
const size_t  ndata,
cudaStream_t  stream = 0 
)

Definition at line 288 of file CudaUtils.h.

References clear_device_array_async_T().

288  {
289  clear_device_array_async_T(data, ndata, stream, sizeof(T));
290 }
void clear_device_array_async_T(void *data, const size_t ndata, cudaStream_t stream, const size_t sizeofT)
Definition: CudaUtils.C:73

◆ clear_device_array_async_T()

void clear_device_array_async_T ( void *  data,
const size_t  ndata,
cudaStream_t  stream,
const size_t  sizeofT 
)

Definition at line 73 of file CudaUtils.C.

References cudaCheck.

Referenced by clear_device_array().

73  {
74  cudaCheck(cudaMemsetAsync(data, 0, sizeofT*ndata, stream));
75 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ clear_device_array_sync()

template<class T >
void clear_device_array_sync ( T *  data,
const size_t  ndata 
)

Definition at line 293 of file CudaUtils.h.

References clear_device_array_T().

293  {
294  clear_device_array_T(data, ndata, sizeof(T));
295 }
void clear_device_array_T(void *data, const size_t ndata, const size_t sizeofT)
Definition: CudaUtils.C:77

◆ clear_device_array_T()

void clear_device_array_T ( void *  data,
const size_t  ndata,
const size_t  sizeofT 
)

Definition at line 77 of file CudaUtils.C.

References cudaCheck.

Referenced by clear_device_array_sync().

77  {
78  cudaCheck(cudaMemset(data, 0, sizeofT*ndata));
79 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ copy3D_DtoD()

template<class T >
void copy3D_DtoD ( T *  src_data,
T *  dst_data,
int  src_x0,
int  src_y0,
int  src_z0,
size_t  src_xsize,
size_t  src_ysize,
int  dst_x0,
int  dst_y0,
int  dst_z0,
size_t  dst_xsize,
size_t  dst_ysize,
size_t  width,
size_t  height,
size_t  depth,
cudaStream_t  stream = 0 
)

Definition at line 549 of file CudaUtils.h.

References copy3D_DtoD_T().

555  {
556  copy3D_DtoD_T(src_data, dst_data,
557  src_x0, src_y0, src_z0,
558  src_xsize, src_ysize,
559  dst_x0, dst_y0, dst_z0,
560  dst_xsize, dst_ysize,
561  width, height, depth,
562  sizeof(T), stream);
563 }
void copy3D_DtoD_T(void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream)
Definition: CudaUtils.C:319

◆ copy3D_DtoD_T()

void copy3D_DtoD_T ( void *  src_data,
void *  dst_data,
int  src_x0,
int  src_y0,
int  src_z0,
size_t  src_xsize,
size_t  src_ysize,
int  dst_x0,
int  dst_y0,
int  dst_z0,
size_t  dst_xsize,
size_t  dst_ysize,
size_t  width,
size_t  height,
size_t  depth,
size_t  sizeofT,
cudaStream_t  stream 
)

Definition at line 319 of file CudaUtils.C.

References cudaCheck.

Referenced by copy3D_DtoD().

325  {
326  cudaMemcpy3DParms parms = {0};
327 
328  parms.srcPos = make_cudaPos(sizeofT*src_x0, src_y0, src_z0);
329  parms.srcPtr = make_cudaPitchedPtr(src_data, sizeofT*src_xsize, src_xsize, src_ysize);
330 
331  parms.dstPos = make_cudaPos(sizeofT*dst_x0, dst_y0, dst_z0);
332  parms.dstPtr = make_cudaPitchedPtr(dst_data, sizeofT*dst_xsize, dst_xsize, dst_ysize);
333 
334  parms.extent = make_cudaExtent(sizeofT*width, height, depth);
335  parms.kind = cudaMemcpyDeviceToDevice;
336 
337  cudaCheck(cudaMemcpy3DAsync(&parms, stream));
338 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ copy3D_DtoH()

template<class T >
void copy3D_DtoH ( T *  src_data,
T *  dst_data,
int  src_x0,
int  src_y0,
int  src_z0,
size_t  src_xsize,
size_t  src_ysize,
int  dst_x0,
int  dst_y0,
int  dst_z0,
size_t  dst_xsize,
size_t  dst_ysize,
size_t  width,
size_t  height,
size_t  depth,
cudaStream_t  stream = 0 
)

Definition at line 520 of file CudaUtils.h.

References copy3D_DtoH_T().

526  {
527  copy3D_DtoH_T(src_data, dst_data,
528  src_x0, src_y0, src_z0,
529  src_xsize, src_ysize,
530  dst_x0, dst_y0, dst_z0,
531  dst_xsize, dst_ysize,
532  width, height, depth,
533  sizeof(T), stream);
534 }
void copy3D_DtoH_T(void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream)
Definition: CudaUtils.C:294

◆ copy3D_DtoH_T()

void copy3D_DtoH_T ( void *  src_data,
void *  dst_data,
int  src_x0,
int  src_y0,
int  src_z0,
size_t  src_xsize,
size_t  src_ysize,
int  dst_x0,
int  dst_y0,
int  dst_z0,
size_t  dst_xsize,
size_t  dst_ysize,
size_t  width,
size_t  height,
size_t  depth,
size_t  sizeofT,
cudaStream_t  stream 
)

Definition at line 294 of file CudaUtils.C.

References cudaCheck.

Referenced by copy3D_DtoH().

300  {
301  cudaMemcpy3DParms parms = {0};
302 
303  parms.srcPos = make_cudaPos(sizeofT*src_x0, src_y0, src_z0);
304  parms.srcPtr = make_cudaPitchedPtr(src_data, sizeofT*src_xsize, src_xsize, src_ysize);
305 
306  parms.dstPos = make_cudaPos(sizeofT*dst_x0, dst_y0, dst_z0);
307  parms.dstPtr = make_cudaPitchedPtr(dst_data, sizeofT*dst_xsize, dst_xsize, dst_ysize);
308 
309  parms.extent = make_cudaExtent(sizeofT*width, height, depth);
310  parms.kind = cudaMemcpyDeviceToHost;
311 
312  cudaCheck(cudaMemcpy3DAsync(&parms, stream));
313 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ copy3D_HtoD()

template<class T >
void copy3D_HtoD ( T *  src_data,
T *  dst_data,
int  src_x0,
int  src_y0,
int  src_z0,
size_t  src_xsize,
size_t  src_ysize,
int  dst_x0,
int  dst_y0,
int  dst_z0,
size_t  dst_xsize,
size_t  dst_ysize,
size_t  width,
size_t  height,
size_t  depth,
cudaStream_t  stream = 0 
)

Definition at line 491 of file CudaUtils.h.

References copy3D_HtoD_T().

497  {
498  copy3D_HtoD_T(src_data, dst_data,
499  src_x0, src_y0, src_z0,
500  src_xsize, src_ysize,
501  dst_x0, dst_y0, dst_z0,
502  dst_xsize, dst_ysize,
503  width, height, depth,
504  sizeof(T), stream);
505 }
void copy3D_HtoD_T(void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream)
Definition: CudaUtils.C:269

◆ copy3D_HtoD_T()

void copy3D_HtoD_T ( void *  src_data,
void *  dst_data,
int  src_x0,
int  src_y0,
int  src_z0,
size_t  src_xsize,
size_t  src_ysize,
int  dst_x0,
int  dst_y0,
int  dst_z0,
size_t  dst_xsize,
size_t  dst_ysize,
size_t  width,
size_t  height,
size_t  depth,
size_t  sizeofT,
cudaStream_t  stream 
)

Definition at line 269 of file CudaUtils.C.

References cudaCheck.

Referenced by copy3D_HtoD().

275  {
276  cudaMemcpy3DParms parms = {0};
277 
278  parms.srcPos = make_cudaPos(sizeofT*src_x0, src_y0, src_z0);
279  parms.srcPtr = make_cudaPitchedPtr(src_data, sizeofT*src_xsize, src_xsize, src_ysize);
280 
281  parms.dstPos = make_cudaPos(sizeofT*dst_x0, dst_y0, dst_z0);
282  parms.dstPtr = make_cudaPitchedPtr(dst_data, sizeofT*dst_xsize, dst_xsize, dst_ysize);
283 
284  parms.extent = make_cudaExtent(sizeofT*width, height, depth);
285  parms.kind = cudaMemcpyHostToDevice;
286 
287  cudaCheck(cudaMemcpy3DAsync(&parms, stream));
288 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ copy3D_PeerDtoD()

template<class T >
void copy3D_PeerDtoD ( int  src_dev,
int  dst_dev,
T *  src_data,
T *  dst_data,
int  src_x0,
int  src_y0,
int  src_z0,
size_t  src_xsize,
size_t  src_ysize,
int  dst_x0,
int  dst_y0,
int  dst_z0,
size_t  dst_xsize,
size_t  dst_ysize,
size_t  width,
size_t  height,
size_t  depth,
cudaStream_t  stream = 0 
)

Definition at line 579 of file CudaUtils.h.

References copy3D_PeerDtoD_T().

586  {
587  copy3D_PeerDtoD_T(src_dev, dst_dev,
588  src_data, dst_data,
589  src_x0, src_y0, src_z0,
590  src_xsize, src_ysize,
591  dst_x0, dst_y0, dst_z0,
592  dst_xsize, dst_ysize,
593  width, height, depth,
594  sizeof(T), stream);
595 }
void copy3D_PeerDtoD_T(int src_dev, int dst_dev, void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream)
Definition: CudaUtils.C:344

◆ copy3D_PeerDtoD_T()

void copy3D_PeerDtoD_T ( int  src_dev,
int  dst_dev,
void *  src_data,
void *  dst_data,
int  src_x0,
int  src_y0,
int  src_z0,
size_t  src_xsize,
size_t  src_ysize,
int  dst_x0,
int  dst_y0,
int  dst_z0,
size_t  dst_xsize,
size_t  dst_ysize,
size_t  width,
size_t  height,
size_t  depth,
size_t  sizeofT,
cudaStream_t  stream 
)

Definition at line 344 of file CudaUtils.C.

References cudaCheck, and cudaDie().

Referenced by copy3D_PeerDtoD().

351  {
352 #ifdef NAMD_HIP
353 // TODO-HIP: Is a workaround possible? cudaMemcpy3D+cudaMemcpyPeer+cudaMemcpy3D
354  cudaDie("cudaMemcpy3DPeerAsync is not supported by HIP");
355 #else
356  cudaMemcpy3DPeerParms parms = {0};
357 
358  parms.srcDevice = src_dev;
359  parms.dstDevice = dst_dev;
360 
361  parms.srcPos = make_cudaPos(sizeofT*src_x0, src_y0, src_z0);
362  parms.srcPtr = make_cudaPitchedPtr(src_data, sizeofT*src_xsize, src_xsize, src_ysize);
363 
364  parms.dstPos = make_cudaPos(sizeofT*dst_x0, dst_y0, dst_z0);
365  parms.dstPtr = make_cudaPitchedPtr(dst_data, sizeofT*dst_xsize, dst_xsize, dst_ysize);
366 
367  parms.extent = make_cudaExtent(sizeofT*width, height, depth);
368 
369  cudaCheck(cudaMemcpy3DPeerAsync(&parms, stream));
370 #endif
371 }
void cudaDie(const char *msg, cudaError_t err)
Definition: CudaUtils.C:9
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ copy_DtoD()

template<class T >
void copy_DtoD ( const T *  d_src,
T *  h_dst,
const size_t  array_len,
cudaStream_t  stream = 0 
)

Definition at line 452 of file CudaUtils.h.

References copy_DtoD_async_T().

452  {
453  copy_DtoD_async_T(d_src, h_dst, array_len, stream, sizeof(T));
454 }
void copy_DtoD_async_T(const void *d_src, void *d_dst, const size_t array_len, cudaStream_t stream, const size_t sizeofT)
Definition: CudaUtils.C:246

◆ copy_DtoD_async_T()

void copy_DtoD_async_T ( const void *  d_src,
void *  d_dst,
const size_t  array_len,
cudaStream_t  stream,
const size_t  sizeofT 
)

Definition at line 246 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_DtoD().

247  {
248  cudaCheck(cudaMemcpyAsync(d_dst, d_src, sizeofT*array_len, cudaMemcpyDeviceToDevice, stream));
249 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ copy_DtoD_sync()

template<class T >
void copy_DtoD_sync ( const T *  d_src,
T *  h_dst,
const size_t  array_len 
)

Definition at line 460 of file CudaUtils.h.

References copy_DtoD_T().

460  {
461  copy_DtoD_T(d_src, h_dst, array_len, sizeof(T));
462 }
void copy_DtoD_T(const void *d_src, void *d_dst, const size_t array_len, const size_t sizeofT)
Definition: CudaUtils.C:251

◆ copy_DtoD_T()

void copy_DtoD_T ( const void *  d_src,
void *  d_dst,
const size_t  array_len,
const size_t  sizeofT 
)

Definition at line 251 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_DtoD_sync().

251  {
252  cudaCheck(cudaMemcpy(d_dst, d_src, sizeofT*array_len, cudaMemcpyDeviceToDevice));
253 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ copy_DtoH()

template<class T >
void copy_DtoH ( const T *  d_array,
T *  h_array,
const size_t  array_len,
cudaStream_t  stream = 0 
)

Definition at line 436 of file CudaUtils.h.

References copy_DtoH_async_T().

436  {
437  copy_DtoH_async_T(d_array, h_array, array_len, stream, sizeof(T));
438 }
void copy_DtoH_async_T(const void *d_array, void *h_array, const size_t array_len, cudaStream_t stream, const size_t sizeofT)
Definition: CudaUtils.C:233

◆ copy_DtoH_async_T()

void copy_DtoH_async_T ( const void *  d_array,
void *  h_array,
const size_t  array_len,
cudaStream_t  stream,
const size_t  sizeofT 
)

Definition at line 233 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_DtoH().

234  {
235  cudaCheck(cudaMemcpyAsync(h_array, d_array, sizeofT*array_len, cudaMemcpyDeviceToHost, stream));
236 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ copy_DtoH_sync()

template<class T >
void copy_DtoH_sync ( const T *  d_array,
T *  h_array,
const size_t  array_len 
)

Definition at line 444 of file CudaUtils.h.

References copy_DtoH_T().

444  {
445  copy_DtoH_T(d_array, h_array, array_len, sizeof(T));
446 }
void copy_DtoH_T(const void *d_array, void *h_array, const size_t array_len, const size_t sizeofT)
Definition: CudaUtils.C:238

◆ copy_DtoH_T()

void copy_DtoH_T ( const void *  d_array,
void *  h_array,
const size_t  array_len,
const size_t  sizeofT 
)

Definition at line 238 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_DtoH_sync().

238  {
239  cudaCheck(cudaMemcpy(h_array, d_array, sizeofT*array_len, cudaMemcpyDeviceToHost));
240 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ copy_HtoD()

template<class T >
void copy_HtoD ( const T *  h_array,
T *  d_array,
size_t  array_len,
cudaStream_t  stream = 0 
)

Definition at line 418 of file CudaUtils.h.

References copy_HtoD_async_T().

Referenced by ComputeLonepairsCUDA::updateAtoms().

418  {
419  copy_HtoD_async_T(h_array, d_array, array_len, stream, sizeof(T));
420 }
void copy_HtoD_async_T(const void *h_array, void *d_array, size_t array_len, cudaStream_t stream, const size_t sizeofT)
Definition: CudaUtils.C:219

◆ copy_HtoD_async_T()

void copy_HtoD_async_T ( const void *  h_array,
void *  d_array,
size_t  array_len,
cudaStream_t  stream,
const size_t  sizeofT 
)

Definition at line 219 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_HtoD().

220  {
221  cudaCheck(cudaMemcpyAsync(d_array, h_array, sizeofT*array_len, cudaMemcpyHostToDevice, stream));
222 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ copy_HtoD_sync()

template<class T >
void copy_HtoD_sync ( const T *  h_array,
T *  d_array,
size_t  array_len 
)

Definition at line 427 of file CudaUtils.h.

References copy_HtoD_T().

427  {
428  copy_HtoD_T(h_array, d_array, array_len, sizeof(T));
429 }
void copy_HtoD_T(const void *h_array, void *d_array, size_t array_len, const size_t sizeofT)
Definition: CudaUtils.C:224

◆ copy_HtoD_T()

void copy_HtoD_T ( const void *  h_array,
void *  d_array,
size_t  array_len,
const size_t  sizeofT 
)

Definition at line 224 of file CudaUtils.C.

References cudaCheck.

Referenced by bindTextureObject(), and copy_HtoD_sync().

225  {
226  cudaCheck(cudaMemcpy(d_array, h_array, sizeofT*array_len, cudaMemcpyHostToDevice));
227 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ copy_PeerDtoD()

template<class T >
void copy_PeerDtoD ( const int  src_dev,
const int  dst_dev,
const T *  d_src,
T *  d_dst,
const size_t  array_len,
cudaStream_t  stream = 0 
)

Definition at line 473 of file CudaUtils.h.

References copy_PeerDtoD_async_T().

474  {
475  copy_PeerDtoD_async_T(src_dev, dst_dev, d_src, d_dst, array_len, stream, sizeof(T));
476 }
void copy_PeerDtoD_async_T(const int src_dev, const int dst_dev, const void *d_src, void *d_dst, const size_t array_len, cudaStream_t stream, const size_t sizeofT)
Definition: CudaUtils.C:259

◆ copy_PeerDtoD_async_T()

void copy_PeerDtoD_async_T ( const int  src_dev,
const int  dst_dev,
const void *  d_src,
void *  d_dst,
const size_t  array_len,
cudaStream_t  stream,
const size_t  sizeofT 
)

Definition at line 259 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_PeerDtoD().

261  {
262  cudaCheck(cudaMemcpyPeerAsync(d_dst, dst_dev, d_src, src_dev, sizeofT*array_len, stream));
263 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ cudaDie()

void cudaDie ( const char *  msg,
cudaError_t  err = cudaSuccess 
)

Definition at line 9 of file CudaUtils.C.

References NAMD_die().

Referenced by copy3D_PeerDtoD_T(), cuda_check_pme_charges(), cuda_check_pme_forces(), and DeviceCUDA::initialize().

9  {
10  char host[128];
11  gethostname(host, 128); host[127] = 0;
12  char devstr[128] = "";
13  int devnum;
14  if ( cudaGetDevice(&devnum) == cudaSuccess ) {
15  sprintf(devstr, " device %d", devnum);
16  }
17  cudaDeviceProp deviceProp;
18  if ( cudaGetDeviceProperties(&deviceProp, devnum) == cudaSuccess ) {
19  sprintf(devstr, " device %d pci %x:%x:%x", devnum,
20  deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
21  }
22  char errmsg[1024];
23  if (err == cudaSuccess) {
24  sprintf(errmsg,"CUDA error %s on Pe %d (%s%s)", msg, CkMyPe(), host, devstr);
25  } else {
26  sprintf(errmsg,"CUDA error %s on Pe %d (%s%s): %s", msg, CkMyPe(), host, devstr, cudaGetErrorString(err));
27  }
28  NAMD_die(errmsg);
29 }
void NAMD_die(const char *err_msg)
Definition: common.C:147

◆ cudaNAMD_bug()

void cudaNAMD_bug ( const char *  msg)

Definition at line 53 of file CudaUtils.C.

References NAMD_bug().

Referenced by CudaFFTCompute::backward(), and CudaFFTCompute::forward().

53 {NAMD_bug(msg);}
void NAMD_bug(const char *err_msg)
Definition: common.C:195

◆ curandDie()

void curandDie ( const char *  msg,
int  err = 0 
)

Definition at line 31 of file CudaUtils.C.

References NAMD_die().

31  {
32  char host[128];
33  gethostname(host, 128); host[127] = 0;
34  char devstr[128] = "";
35  int devnum;
36  if ( cudaGetDevice(&devnum) == cudaSuccess ) {
37  sprintf(devstr, " device %d", devnum);
38  }
39  cudaDeviceProp deviceProp;
40  if ( cudaGetDeviceProperties(&deviceProp, devnum) == cudaSuccess ) {
41  sprintf(devstr, " device %d pci %x:%x:%x", devnum,
42  deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
43  }
44  char errmsg[1024];
45  if (err == cudaSuccess) {
46  sprintf(errmsg,"CUDA cuRAND error %s on Pe %d (%s%s)", msg, CkMyPe(), host, devstr);
47  } else {
48  sprintf(errmsg,"CUDA cuRAND error %s on Pe %d (%s%s): status value %d", msg, CkMyPe(), host, devstr, err);
49  }
50  NAMD_die(errmsg);
51 }
void NAMD_die(const char *err_msg)
Definition: common.C:147

◆ deallocate_device()

template<class T >
void deallocate_device ( T **  pp)

Definition at line 342 of file CudaUtils.h.

References deallocate_device_T().

Referenced by ComputeLonepairsCUDA::~ComputeLonepairsCUDA(), and CudaNonbondedTables::~CudaNonbondedTables().

342  {
343  deallocate_device_T((void **)pp);
344 }
void deallocate_device_T(void **pp)
Definition: CudaUtils.C:118

◆ deallocate_device_async()

template<class T >
void deallocate_device_async ( T **  pp,
cudaStream_t  stream 
)

Definition at line 346 of file CudaUtils.h.

References deallocate_device_T_async().

Referenced by ComputeLonepairsCUDA::updateAtoms().

346  {
347  deallocate_device_T_async((void **)pp, stream);
348 }
void deallocate_device_T_async(void **pp, cudaStream_t stream)
Definition: CudaUtils.C:127

◆ deallocate_device_T()

void deallocate_device_T ( void **  pp)

Definition at line 118 of file CudaUtils.C.

References cudaCheck.

Referenced by deallocate_device(), and deallocate_device_T_async().

118  {
119 
120  if (*pp != NULL) {
121  cudaCheck(cudaFree((void *)(*pp)));
122  *pp = NULL;
123  }
124 
125 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ deallocate_device_T_async()

void deallocate_device_T_async ( void **  pp,
cudaStream_t  stream 
)

Definition at line 127 of file CudaUtils.C.

References cudaCheck, and deallocate_device_T().

Referenced by deallocate_device_async().

127  {
128 #if (CUDART_VERSION >= 11020)
129  if (*pp != NULL) {
130  cudaCheck(cudaFreeAsync((void *)(*pp), stream));
131  *pp = NULL;
132  }
133 #else
135 #endif
136 }
void deallocate_device_T(void **pp)
Definition: CudaUtils.C:118
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ deallocate_host()

template<class T >
void deallocate_host ( T **  pp)

Definition at line 396 of file CudaUtils.h.

References deallocate_host_T().

Referenced by CudaComputeNonbonded::~CudaComputeNonbonded().

396  {
397  deallocate_host_T((void **)pp);
398 }
void deallocate_host_T(void **pp)
Definition: CudaUtils.C:142

◆ deallocate_host_T()

void deallocate_host_T ( void **  pp)

Definition at line 142 of file CudaUtils.C.

References cudaCheck.

Referenced by deallocate_host().

142  {
143 
144  if (*pp != NULL) {
145  cudaCheck(cudaFreeHost((void *)(*pp)));
146  *pp = NULL;
147  }
148 
149 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ reallocate_device()

template<class T >
bool reallocate_device ( T **  pp,
size_t *  curlen,
const size_t  newlen,
const float  fac = 1.0f 
)

Definition at line 364 of file CudaUtils.h.

References reallocate_device_T().

364  {
365  return reallocate_device_T((void **)pp, curlen, newlen, fac, sizeof(T));
366 }
bool reallocate_device_T(void **pp, size_t *curlen, const size_t newlen, const float fac, const size_t sizeofT)
Definition: CudaUtils.C:161

◆ reallocate_device_T()

bool reallocate_device_T ( void **  pp,
size_t *  curlen,
const size_t  newlen,
const float  fac,
const size_t  sizeofT 
)

Definition at line 161 of file CudaUtils.C.

References cudaCheck.

Referenced by reallocate_device().

161  {
162 
163  if (*pp != NULL && *curlen < newlen) {
164  cudaCheck(cudaFree((void *)(*pp)));
165  *pp = NULL;
166  }
167 
168  if (*pp == NULL) {
169  if (fac > 1.0f) {
170  *curlen = (size_t)(((double)(newlen))*(double)fac);
171  } else {
172  *curlen = newlen;
173  }
174  cudaCheck(cudaMalloc(pp, sizeofT*(*curlen)));
175  return true;
176  }
177 
178  return false;
179 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ reallocate_host()

template<class T >
bool reallocate_host ( T **  pp,
size_t *  curlen,
const size_t  newlen,
const float  fac = 1.0f,
const unsigned int  flag = cudaHostAllocDefault 
)

Definition at line 384 of file CudaUtils.h.

References reallocate_host_T().

385  {
386  return reallocate_host_T((void **)pp, curlen, newlen, fac, flag, sizeof(T));
387 }
bool reallocate_host_T(void **pp, size_t *curlen, const size_t newlen, const float fac, const unsigned int flag, const size_t sizeofT)
Definition: CudaUtils.C:194

◆ reallocate_host_T()

bool reallocate_host_T ( void **  pp,
size_t *  curlen,
const size_t  newlen,
const float  fac,
const unsigned int  flag,
const size_t  sizeofT 
)

Definition at line 194 of file CudaUtils.C.

References cudaCheck.

Referenced by reallocate_host().

195  {
196 
197  if (*pp != NULL && *curlen < newlen) {
198  cudaCheck(cudaFreeHost((void *)(*pp)));
199  *pp = NULL;
200  }
201 
202  if (*pp == NULL) {
203  if (fac > 1.0f) {
204  *curlen = (size_t)(((double)(newlen))*(double)fac);
205  } else {
206  *curlen = newlen;
207  }
208  cudaCheck(cudaHostAlloc(pp, sizeofT*(*curlen), flag));
209  return true;
210  }
211 
212  return false;
213 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242