NAMD
|
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdio.h>
#include "HipDefines.h"
#include <iostream>
Go to the source code of this file.
Classes | |
struct | cudaTensor |
struct | cudaVector |
struct | CudaMInfo |
struct | CudaStaticAssert< bool > |
struct | CudaStaticAssert< true > |
struct | CudaNBConstants |
Macros | |
#define | WARPSIZE 32 |
#define | BOUNDINGBOXSIZE 32 |
#define | FORCE_ENERGY_TABLE_SIZE 4096 |
#define | COPY_CUDATENSOR(S, D) |
#define | COPY_CUDAVECTOR(S, D) |
#define | PRINT_CUDATENSOR(T, SS) |
#define | ATOMIC_BINS 1 |
#define | FEP_BONDED_CUDA_DEBUG |
#define | WARP_FULL_MASK 0xffffffff |
#define | WARP_SHUFFLE_XOR(MASK, VAR, LANE, SIZE) __shfl_xor(VAR, LANE, SIZE) |
#define | WARP_SHUFFLE_UP(MASK, VAR, DELTA, SIZE) __shfl_up(VAR, DELTA, SIZE) |
#define | WARP_SHUFFLE_DOWN(MASK, VAR, DELTA, SIZE) __shfl_down(VAR, DELTA, SIZE) |
#define | WARP_SHUFFLE(MASK, VAR, LANE, SIZE) __shfl(VAR, LANE, SIZE) |
#define | WARP_ALL(MASK, P) __all(P) |
#define | WARP_ANY(MASK, P) __any(P) |
#define | WARP_BALLOT(MASK, P) __ballot(P) |
#define | WARP_SYNC(MASK) |
#define | BLOCK_SYNC __syncthreads() |
#define | NAMD_WARP_SYNC(MASK) WARP_SYNC(MASK) |
#define | cuda_static_assert(expr) (CudaStaticAssert<(expr) != 0>()) |
#define | cudaCheck(stmt) |
#define | curandCheck(stmt) |
Typedefs | |
typedef unsigned int | WarpMask |
typedef double | BigReal |
Functions | |
void | cudaDie (const char *msg, cudaError_t err=cudaSuccess) |
void | curandDie (const char *msg, int err=0) |
void | cudaNAMD_bug (const char *msg) |
void | clear_device_array_async_T (void *data, const size_t ndata, cudaStream_t stream, const size_t sizeofT) |
void | clear_device_array_T (void *data, const size_t ndata, const size_t sizeofT) |
template<class T > | |
void | clear_device_array (T *data, const size_t ndata, cudaStream_t stream=0) |
template<class T > | |
void | clear_device_array_sync (T *data, const size_t ndata) |
void | allocate_host_T (void **pp, const size_t len, const size_t sizeofT) |
template<class T > | |
void | allocate_host (T **pp, const size_t len) |
void | allocate_device_T (void **pp, const size_t len, const size_t sizeofT) |
void | allocate_device_T_managed (void **pp, const size_t len, const size_t sizeofT) |
void | allocate_device_T_async (void **pp, const size_t len, const size_t sizeofT, cudaStream_t stream) |
template<class T > | |
void | allocate_device (T **pp, const size_t len) |
template<class T > | |
void | allocate_device_managed (T **pp, const size_t len) |
template<class T > | |
void | allocate_device_async (T **pp, const size_t len, cudaStream_t stream) |
void | deallocate_device_T (void **pp) |
void | deallocate_device_T_async (void **pp, cudaStream_t stream) |
template<class T > | |
void | deallocate_device (T **pp) |
template<class T > | |
void | deallocate_device_async (T **pp, cudaStream_t stream) |
bool | reallocate_device_T (void **pp, size_t *curlen, const size_t newlen, const float fac, const size_t sizeofT) |
template<class T > | |
bool | reallocate_device (T **pp, size_t *curlen, const size_t newlen, const float fac=1.0f) |
bool | reallocate_host_T (void **pp, size_t *curlen, const size_t newlen, const float fac, const unsigned int flag, const size_t sizeofT) |
template<class T > | |
bool | reallocate_host (T **pp, size_t *curlen, const size_t newlen, const float fac=1.0f, const unsigned int flag=cudaHostAllocDefault) |
void | deallocate_host_T (void **pp) |
template<class T > | |
void | deallocate_host (T **pp) |
void | copy_HtoD_async_T (const void *h_array, void *d_array, size_t array_len, cudaStream_t stream, const size_t sizeofT) |
void | copy_HtoD_T (const void *h_array, void *d_array, size_t array_len, const size_t sizeofT) |
void | copy_DtoH_async_T (const void *d_array, void *h_array, const size_t array_len, cudaStream_t stream, const size_t sizeofT) |
void | copy_DtoH_T (const void *d_array, void *h_array, const size_t array_len, const size_t sizeofT) |
void | copy_DtoD_async_T (const void *d_src, void *d_dst, const size_t array_len, cudaStream_t stream, const size_t sizeofT) |
void | copy_DtoD_T (const void *d_src, void *d_dst, const size_t array_len, const size_t sizeofT) |
template<class T > | |
void | copy_HtoD (const T *h_array, T *d_array, size_t array_len, cudaStream_t stream=0) |
template<class T > | |
void | copy_HtoD_sync (const T *h_array, T *d_array, size_t array_len) |
template<class T > | |
void | copy_DtoH (const T *d_array, T *h_array, const size_t array_len, cudaStream_t stream=0) |
template<class T > | |
void | copy_DtoH_sync (const T *d_array, T *h_array, const size_t array_len) |
template<class T > | |
void | copy_DtoD (const T *d_src, T *h_dst, const size_t array_len, cudaStream_t stream=0) |
template<class T > | |
void | copy_DtoD_sync (const T *d_src, T *h_dst, const size_t array_len) |
void | copy_PeerDtoD_async_T (const int src_dev, const int dst_dev, const void *d_src, void *d_dst, const size_t array_len, cudaStream_t stream, const size_t sizeofT) |
template<class T > | |
void | copy_PeerDtoD (const int src_dev, const int dst_dev, const T *d_src, T *d_dst, const size_t array_len, cudaStream_t stream=0) |
void | copy3D_HtoD_T (void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream) |
template<class T > | |
void | copy3D_HtoD (T *src_data, T *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, cudaStream_t stream=0) |
void | copy3D_DtoH_T (void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream) |
template<class T > | |
void | copy3D_DtoH (T *src_data, T *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, cudaStream_t stream=0) |
void | copy3D_DtoD_T (void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream) |
template<class T > | |
void | copy3D_DtoD (T *src_data, T *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, cudaStream_t stream=0) |
void | copy3D_PeerDtoD_T (int src_dev, int dst_dev, void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream) |
template<class T > | |
void | copy3D_PeerDtoD (int src_dev, int dst_dev, T *src_data, T *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, cudaStream_t stream=0) |
#define ATOMIC_BINS 1 |
Definition at line 70 of file CudaUtils.h.
Referenced by CudaComputeNonbonded::initialize().
#define BLOCK_SYNC __syncthreads() |
Definition at line 192 of file CudaUtils.h.
Referenced by GBIS_P1_Kernel(), GBIS_P2_Kernel(), and GBIS_P3_Kernel().
#define BOUNDINGBOXSIZE 32 |
Definition at line 18 of file CudaUtils.h.
#define COPY_CUDATENSOR | ( | S, | |
D | |||
) |
Definition at line 42 of file CudaUtils.h.
#define COPY_CUDAVECTOR | ( | S, | |
D | |||
) |
Definition at line 53 of file CudaUtils.h.
#define cuda_static_assert | ( | expr | ) | (CudaStaticAssert<(expr) != 0>()) |
Definition at line 222 of file CudaUtils.h.
#define cudaCheck | ( | stmt | ) |
Definition at line 233 of file CudaUtils.h.
Referenced by allocate_device_T(), allocate_device_T_async(), allocate_device_T_managed(), allocate_host_T(), CudaFFTCompute::backward(), bindTextureObject(), clear_device_array_async_T(), clear_device_array_T(), CudaPmeOneDevice::compute(), copy3D_DtoD_T(), copy3D_DtoH_T(), copy3D_HtoD_T(), copy3D_PeerDtoD_T(), copy_DtoD_async_T(), copy_DtoD_T(), copy_DtoH_async_T(), copy_DtoH_T(), copy_HtoD_async_T(), copy_HtoD_T(), copy_PeerDtoD_async_T(), CudaPmeRealSpaceCompute::copyAtoms(), CudaPmeTranspose::copyDataDeviceToDevice(), CudaPmeTranspose::copyDataDeviceToHost(), CudaPmeTranspose::copyDataHostToDevice(), createStream(), CudaComputeNonbonded::CudaComputeNonbonded(), CudaNonbondedTables::CudaNonbondedTables(), CudaPmeKSpaceCompute::CudaPmeKSpaceCompute(), CudaPmeOneDevice::CudaPmeOneDevice(), CudaPmeRealSpaceCompute::CudaPmeRealSpaceCompute(), CudaPmeTranspose::CudaPmeTranspose(), deallocate_device_T(), deallocate_device_T_async(), deallocate_host_T(), CudaPmeKSpaceCompute::energyAndVirialSetCallback(), CudaComputeNonbonded::finishPatches(), CudaPmeOneDevice::finishReduction(), CudaComputeNonbonded::finishReductions(), CudaFFTCompute::forward(), CudaPmeRealSpaceCompute::gatherForce(), CudaPmeRealSpaceCompute::gatherForceSetCallback(), DeviceCUDA::getMaxNumBlocks(), DeviceCUDA::getMaxNumThreads(), ComputeCUDAMgr::initialize(), DeviceCUDA::initialize(), CudaComputeNonbonded::initialize(), ComputePmeCUDADevice::initialize(), ComputePmeCUDAMgr::initialize_pencils(), CudaPmePencilXY::initializeDevice(), CudaPmePencilX::initializeDevice(), CudaPmePencilY::initializeDevice(), CudaPmePencilZ::initializeDevice(), CudaComputeNonbonded::launchWork(), reallocate_device_T(), reallocate_host_T(), CudaComputeNonbonded::reSortTileLists(), CudaPmeTranspose::setDataPtrsYZX(), CudaPmeTranspose::setDataPtrsZXY(), CudaPmeKSpaceCompute::solve(), CudaPmeRealSpaceCompute::spreadCharge(), CudaPmeTranspose::transposeXYZtoYZX(), CudaPmeTranspose::transposeXYZtoZXY(), ComputeLonepairsCUDA::updateAtoms(), CudaPmeRealSpaceCompute::waitGatherForceDone(), CudaPmeTranspose::waitStreamSynchronize(), writeComplexToDisk(), writeRealToDisk(), ComputePmeCUDADevice::~ComputePmeCUDADevice(), ComputePmeCUDAMgr::~ComputePmeCUDAMgr(), CudaComputeNonbonded::~CudaComputeNonbonded(), CudaFFTCompute::~CudaFFTCompute(), CudaNonbondedTables::~CudaNonbondedTables(), CudaPmeKSpaceCompute::~CudaPmeKSpaceCompute(), CudaPmeOneDevice::~CudaPmeOneDevice(), CudaPmePencilX::~CudaPmePencilX(), CudaPmePencilXY::~CudaPmePencilXY(), CudaPmePencilY::~CudaPmePencilY(), CudaPmePencilZ::~CudaPmePencilZ(), CudaPmeRealSpaceCompute::~CudaPmeRealSpaceCompute(), and CudaPmeTranspose::~CudaPmeTranspose().
#define curandCheck | ( | stmt | ) |
Definition at line 242 of file CudaUtils.h.
#define FEP_BONDED_CUDA_DEBUG |
Definition at line 127 of file CudaUtils.h.
#define FORCE_ENERGY_TABLE_SIZE 4096 |
Definition at line 40 of file CudaUtils.h.
Referenced by CudaNonbondedTables::CudaNonbondedTables().
#define NAMD_WARP_SYNC | ( | MASK | ) | WARP_SYNC(MASK) |
Definition at line 197 of file CudaUtils.h.
#define PRINT_CUDATENSOR | ( | T, | |
SS | |||
) |
Definition at line 58 of file CudaUtils.h.
#define WARP_ALL | ( | MASK, | |
P | |||
) | __all(P) |
Definition at line 188 of file CudaUtils.h.
#define WARP_ANY | ( | MASK, | |
P | |||
) | __any(P) |
Definition at line 189 of file CudaUtils.h.
#define WARP_BALLOT | ( | MASK, | |
P | |||
) | __ballot(P) |
Definition at line 190 of file CudaUtils.h.
#define WARP_FULL_MASK 0xffffffff |
Definition at line 149 of file CudaUtils.h.
Referenced by GBIS_P1_Kernel(), GBIS_P2_Kernel(), and GBIS_P3_Kernel().
#define WARP_SHUFFLE | ( | MASK, | |
VAR, | |||
LANE, | |||
SIZE | |||
) | __shfl(VAR, LANE, SIZE) |
Definition at line 178 of file CudaUtils.h.
Referenced by GBIS_P1_Kernel(), GBIS_P2_Kernel(), and GBIS_P3_Kernel().
#define WARP_SHUFFLE_DOWN | ( | MASK, | |
VAR, | |||
DELTA, | |||
SIZE | |||
) | __shfl_down(VAR, DELTA, SIZE) |
Definition at line 176 of file CudaUtils.h.
#define WARP_SHUFFLE_UP | ( | MASK, | |
VAR, | |||
DELTA, | |||
SIZE | |||
) | __shfl_up(VAR, DELTA, SIZE) |
Definition at line 174 of file CudaUtils.h.
#define WARP_SHUFFLE_XOR | ( | MASK, | |
VAR, | |||
LANE, | |||
SIZE | |||
) | __shfl_xor(VAR, LANE, SIZE) |
Definition at line 172 of file CudaUtils.h.
#define WARP_SYNC | ( | MASK | ) |
Definition at line 191 of file CudaUtils.h.
#define WARPSIZE 32 |
Definition at line 17 of file CudaUtils.h.
Referenced by CudaPmeOneDevice::compute(), GBIS_P1_Kernel(), GBIS_P2_Kernel(), GBIS_P3_Kernel(), and ComputeBondedCUDAKernel::warpAlign().
typedef double BigReal |
Definition at line 66 of file CudaUtils.h.
typedef unsigned int WarpMask |
Definition at line 19 of file CudaUtils.h.
void allocate_device | ( | T ** | pp, |
const size_t | len | ||
) |
Definition at line 311 of file CudaUtils.h.
References allocate_device_T().
void allocate_device_async | ( | T ** | pp, |
const size_t | len, | ||
cudaStream_t | stream | ||
) |
Definition at line 321 of file CudaUtils.h.
References allocate_device_T_async().
Referenced by ComputeLonepairsCUDA::updateAtoms().
void allocate_device_managed | ( | T ** | pp, |
const size_t | len | ||
) |
Definition at line 316 of file CudaUtils.h.
References allocate_device_T_managed().
void allocate_device_T | ( | void ** | pp, |
const size_t | len, | ||
const size_t | sizeofT | ||
) |
Definition at line 97 of file CudaUtils.C.
References cudaCheck.
Referenced by allocate_device(), allocate_device_T_async(), and bindTextureObject().
void allocate_device_T_async | ( | void ** | pp, |
const size_t | len, | ||
const size_t | sizeofT, | ||
cudaStream_t | stream | ||
) |
Definition at line 105 of file CudaUtils.C.
References allocate_device_T(), and cudaCheck.
Referenced by allocate_device_async().
void allocate_device_T_managed | ( | void ** | pp, |
const size_t | len, | ||
const size_t | sizeofT | ||
) |
Definition at line 101 of file CudaUtils.C.
References cudaCheck.
Referenced by allocate_device_managed().
void allocate_host | ( | T ** | pp, |
const size_t | len | ||
) |
Definition at line 296 of file CudaUtils.h.
References allocate_host_T().
void allocate_host_T | ( | void ** | pp, |
const size_t | len, | ||
const size_t | sizeofT | ||
) |
void clear_device_array | ( | T * | data, |
const size_t | ndata, | ||
cudaStream_t | stream = 0 |
||
) |
Definition at line 279 of file CudaUtils.h.
References clear_device_array_async_T().
void clear_device_array_async_T | ( | void * | data, |
const size_t | ndata, | ||
cudaStream_t | stream, | ||
const size_t | sizeofT | ||
) |
Definition at line 73 of file CudaUtils.C.
References cudaCheck.
Referenced by clear_device_array().
void clear_device_array_sync | ( | T * | data, |
const size_t | ndata | ||
) |
Definition at line 284 of file CudaUtils.h.
References clear_device_array_T().
void clear_device_array_T | ( | void * | data, |
const size_t | ndata, | ||
const size_t | sizeofT | ||
) |
Definition at line 77 of file CudaUtils.C.
References cudaCheck.
Referenced by clear_device_array_sync().
void copy3D_DtoD | ( | T * | src_data, |
T * | dst_data, | ||
int | src_x0, | ||
int | src_y0, | ||
int | src_z0, | ||
size_t | src_xsize, | ||
size_t | src_ysize, | ||
int | dst_x0, | ||
int | dst_y0, | ||
int | dst_z0, | ||
size_t | dst_xsize, | ||
size_t | dst_ysize, | ||
size_t | width, | ||
size_t | height, | ||
size_t | depth, | ||
cudaStream_t | stream = 0 |
||
) |
Definition at line 540 of file CudaUtils.h.
References copy3D_DtoD_T().
void copy3D_DtoD_T | ( | void * | src_data, |
void * | dst_data, | ||
int | src_x0, | ||
int | src_y0, | ||
int | src_z0, | ||
size_t | src_xsize, | ||
size_t | src_ysize, | ||
int | dst_x0, | ||
int | dst_y0, | ||
int | dst_z0, | ||
size_t | dst_xsize, | ||
size_t | dst_ysize, | ||
size_t | width, | ||
size_t | height, | ||
size_t | depth, | ||
size_t | sizeofT, | ||
cudaStream_t | stream | ||
) |
void copy3D_DtoH | ( | T * | src_data, |
T * | dst_data, | ||
int | src_x0, | ||
int | src_y0, | ||
int | src_z0, | ||
size_t | src_xsize, | ||
size_t | src_ysize, | ||
int | dst_x0, | ||
int | dst_y0, | ||
int | dst_z0, | ||
size_t | dst_xsize, | ||
size_t | dst_ysize, | ||
size_t | width, | ||
size_t | height, | ||
size_t | depth, | ||
cudaStream_t | stream = 0 |
||
) |
Definition at line 511 of file CudaUtils.h.
References copy3D_DtoH_T().
void copy3D_DtoH_T | ( | void * | src_data, |
void * | dst_data, | ||
int | src_x0, | ||
int | src_y0, | ||
int | src_z0, | ||
size_t | src_xsize, | ||
size_t | src_ysize, | ||
int | dst_x0, | ||
int | dst_y0, | ||
int | dst_z0, | ||
size_t | dst_xsize, | ||
size_t | dst_ysize, | ||
size_t | width, | ||
size_t | height, | ||
size_t | depth, | ||
size_t | sizeofT, | ||
cudaStream_t | stream | ||
) |
void copy3D_HtoD | ( | T * | src_data, |
T * | dst_data, | ||
int | src_x0, | ||
int | src_y0, | ||
int | src_z0, | ||
size_t | src_xsize, | ||
size_t | src_ysize, | ||
int | dst_x0, | ||
int | dst_y0, | ||
int | dst_z0, | ||
size_t | dst_xsize, | ||
size_t | dst_ysize, | ||
size_t | width, | ||
size_t | height, | ||
size_t | depth, | ||
cudaStream_t | stream = 0 |
||
) |
Definition at line 482 of file CudaUtils.h.
References copy3D_HtoD_T().
void copy3D_HtoD_T | ( | void * | src_data, |
void * | dst_data, | ||
int | src_x0, | ||
int | src_y0, | ||
int | src_z0, | ||
size_t | src_xsize, | ||
size_t | src_ysize, | ||
int | dst_x0, | ||
int | dst_y0, | ||
int | dst_z0, | ||
size_t | dst_xsize, | ||
size_t | dst_ysize, | ||
size_t | width, | ||
size_t | height, | ||
size_t | depth, | ||
size_t | sizeofT, | ||
cudaStream_t | stream | ||
) |
void copy3D_PeerDtoD | ( | int | src_dev, |
int | dst_dev, | ||
T * | src_data, | ||
T * | dst_data, | ||
int | src_x0, | ||
int | src_y0, | ||
int | src_z0, | ||
size_t | src_xsize, | ||
size_t | src_ysize, | ||
int | dst_x0, | ||
int | dst_y0, | ||
int | dst_z0, | ||
size_t | dst_xsize, | ||
size_t | dst_ysize, | ||
size_t | width, | ||
size_t | height, | ||
size_t | depth, | ||
cudaStream_t | stream = 0 |
||
) |
Definition at line 570 of file CudaUtils.h.
References copy3D_PeerDtoD_T().
void copy3D_PeerDtoD_T | ( | int | src_dev, |
int | dst_dev, | ||
void * | src_data, | ||
void * | dst_data, | ||
int | src_x0, | ||
int | src_y0, | ||
int | src_z0, | ||
size_t | src_xsize, | ||
size_t | src_ysize, | ||
int | dst_x0, | ||
int | dst_y0, | ||
int | dst_z0, | ||
size_t | dst_xsize, | ||
size_t | dst_ysize, | ||
size_t | width, | ||
size_t | height, | ||
size_t | depth, | ||
size_t | sizeofT, | ||
cudaStream_t | stream | ||
) |
Definition at line 344 of file CudaUtils.C.
References cudaCheck, and cudaDie().
Referenced by copy3D_PeerDtoD().
void copy_DtoD | ( | const T * | d_src, |
T * | h_dst, | ||
const size_t | array_len, | ||
cudaStream_t | stream = 0 |
||
) |
Definition at line 443 of file CudaUtils.h.
References copy_DtoD_async_T().
void copy_DtoD_async_T | ( | const void * | d_src, |
void * | d_dst, | ||
const size_t | array_len, | ||
cudaStream_t | stream, | ||
const size_t | sizeofT | ||
) |
void copy_DtoD_sync | ( | const T * | d_src, |
T * | h_dst, | ||
const size_t | array_len | ||
) |
Definition at line 451 of file CudaUtils.h.
References copy_DtoD_T().
void copy_DtoD_T | ( | const void * | d_src, |
void * | d_dst, | ||
const size_t | array_len, | ||
const size_t | sizeofT | ||
) |
void copy_DtoH | ( | const T * | d_array, |
T * | h_array, | ||
const size_t | array_len, | ||
cudaStream_t | stream = 0 |
||
) |
Definition at line 427 of file CudaUtils.h.
References copy_DtoH_async_T().
void copy_DtoH_async_T | ( | const void * | d_array, |
void * | h_array, | ||
const size_t | array_len, | ||
cudaStream_t | stream, | ||
const size_t | sizeofT | ||
) |
Definition at line 233 of file CudaUtils.C.
References cudaCheck.
Referenced by copy_DtoH().
void copy_DtoH_sync | ( | const T * | d_array, |
T * | h_array, | ||
const size_t | array_len | ||
) |
Definition at line 435 of file CudaUtils.h.
References copy_DtoH_T().
void copy_DtoH_T | ( | const void * | d_array, |
void * | h_array, | ||
const size_t | array_len, | ||
const size_t | sizeofT | ||
) |
void copy_HtoD | ( | const T * | h_array, |
T * | d_array, | ||
size_t | array_len, | ||
cudaStream_t | stream = 0 |
||
) |
Definition at line 409 of file CudaUtils.h.
References copy_HtoD_async_T().
Referenced by ComputeLonepairsCUDA::updateAtoms().
void copy_HtoD_async_T | ( | const void * | h_array, |
void * | d_array, | ||
size_t | array_len, | ||
cudaStream_t | stream, | ||
const size_t | sizeofT | ||
) |
Definition at line 219 of file CudaUtils.C.
References cudaCheck.
Referenced by copy_HtoD().
void copy_HtoD_sync | ( | const T * | h_array, |
T * | d_array, | ||
size_t | array_len | ||
) |
Definition at line 418 of file CudaUtils.h.
References copy_HtoD_T().
void copy_HtoD_T | ( | const void * | h_array, |
void * | d_array, | ||
size_t | array_len, | ||
const size_t | sizeofT | ||
) |
Definition at line 224 of file CudaUtils.C.
References cudaCheck.
Referenced by bindTextureObject(), and copy_HtoD_sync().
void copy_PeerDtoD | ( | const int | src_dev, |
const int | dst_dev, | ||
const T * | d_src, | ||
T * | d_dst, | ||
const size_t | array_len, | ||
cudaStream_t | stream = 0 |
||
) |
Definition at line 464 of file CudaUtils.h.
References copy_PeerDtoD_async_T().
void copy_PeerDtoD_async_T | ( | const int | src_dev, |
const int | dst_dev, | ||
const void * | d_src, | ||
void * | d_dst, | ||
const size_t | array_len, | ||
cudaStream_t | stream, | ||
const size_t | sizeofT | ||
) |
void cudaDie | ( | const char * | msg, |
cudaError_t | err = cudaSuccess |
||
) |
Definition at line 9 of file CudaUtils.C.
References NAMD_die().
Referenced by copy3D_PeerDtoD_T(), cuda_check_pme_charges(), cuda_check_pme_forces(), and DeviceCUDA::initialize().
void cudaNAMD_bug | ( | const char * | msg | ) |
Definition at line 53 of file CudaUtils.C.
References NAMD_bug().
Referenced by CudaFFTCompute::backward(), and CudaFFTCompute::forward().
void curandDie | ( | const char * | msg, |
int | err = 0 |
||
) |
Definition at line 31 of file CudaUtils.C.
References NAMD_die().
void deallocate_device | ( | T ** | pp | ) |
Definition at line 333 of file CudaUtils.h.
References deallocate_device_T().
Referenced by ComputeLonepairsCUDA::~ComputeLonepairsCUDA().
void deallocate_device_async | ( | T ** | pp, |
cudaStream_t | stream | ||
) |
Definition at line 337 of file CudaUtils.h.
References deallocate_device_T_async().
Referenced by ComputeLonepairsCUDA::updateAtoms().
void deallocate_device_T | ( | void ** | pp | ) |
Definition at line 118 of file CudaUtils.C.
References cudaCheck.
Referenced by deallocate_device(), and deallocate_device_T_async().
void deallocate_device_T_async | ( | void ** | pp, |
cudaStream_t | stream | ||
) |
Definition at line 127 of file CudaUtils.C.
References cudaCheck, and deallocate_device_T().
Referenced by deallocate_device_async().
void deallocate_host | ( | T ** | pp | ) |
void deallocate_host_T | ( | void ** | pp | ) |
bool reallocate_device | ( | T ** | pp, |
size_t * | curlen, | ||
const size_t | newlen, | ||
const float | fac = 1.0f |
||
) |
Definition at line 355 of file CudaUtils.h.
References reallocate_device_T().
bool reallocate_device_T | ( | void ** | pp, |
size_t * | curlen, | ||
const size_t | newlen, | ||
const float | fac, | ||
const size_t | sizeofT | ||
) |
Definition at line 161 of file CudaUtils.C.
References cudaCheck.
Referenced by reallocate_device().
bool reallocate_host | ( | T ** | pp, |
size_t * | curlen, | ||
const size_t | newlen, | ||
const float | fac = 1.0f , |
||
const unsigned int | flag = cudaHostAllocDefault |
||
) |
Definition at line 375 of file CudaUtils.h.
References reallocate_host_T().
bool reallocate_host_T | ( | void ** | pp, |
size_t * | curlen, | ||
const size_t | newlen, | ||
const float | fac, | ||
const unsigned int | flag, | ||
const size_t | sizeofT | ||
) |