NAMD
|
#include <stdio.h>
#include <cuda.h>
#include <cuda_runtime.h>
Go to the source code of this file.
Classes | |
struct | CudaStaticAssert< bool > |
struct | CudaStaticAssert< true > |
Macros | |
#define | WARPSIZE 32 |
#define | FORCE_ENERGY_TABLE_SIZE 4096 |
#define | WARP_FULL_MASK 0xffffffff |
#define | ATOMIC_BINS 1 |
#define | WARP_SHUFFLE_XOR(MASK, VAR, LANE, SIZE) __shfl_xor(VAR, LANE, SIZE) |
#define | WARP_SHUFFLE_UP(MASK, VAR, DELTA, SIZE) __shfl_up(VAR, DELTA, SIZE) |
#define | WARP_SHUFFLE_DOWN(MASK, VAR, DELTA, SIZE) __shfl_down(VAR, DELTA, SIZE) |
#define | WARP_SHUFFLE(MASK, VAR, LANE, SIZE) __shfl(VAR, LANE, SIZE) |
#define | WARP_ALL(MASK, P) __all(P) |
#define | WARP_ANY(MASK, P) __any(P) |
#define | WARP_BALLOT(MASK, P) __ballot(P) |
#define | WARP_SYNC(MASK) |
#define | BLOCK_SYNC __syncthreads() |
#define | cuda_static_assert(expr) (CudaStaticAssert<(expr) != 0>()) |
#define | cudaCheck(stmt) |
Typedefs | |
typedef unsigned int | WarpMask |
Functions | |
void | cudaDie (const char *msg, cudaError_t err=cudaSuccess) |
void | cudaNAMD_bug (const char *msg) |
void | clear_device_array_async_T (void *data, const int ndata, cudaStream_t stream, const size_t sizeofT) |
void | clear_device_array_T (void *data, const int ndata, const size_t sizeofT) |
template<class T > | |
void | clear_device_array (T *data, const int ndata, cudaStream_t stream=0) |
template<class T > | |
void | clear_device_array_sync (T *data, const int ndata) |
void | allocate_host_T (void **pp, const int len, const size_t sizeofT) |
template<class T > | |
void | allocate_host (T **pp, const int len) |
void | allocate_device_T (void **pp, const int len, const size_t sizeofT) |
template<class T > | |
void | allocate_device (T **pp, const int len) |
void | deallocate_device_T (void **pp) |
template<class T > | |
void | deallocate_device (T **pp) |
bool | reallocate_device_T (void **pp, int *curlen, const int newlen, const float fac, const size_t sizeofT) |
template<class T > | |
bool | reallocate_device (T **pp, int *curlen, const int newlen, const float fac=1.0f) |
bool | reallocate_host_T (void **pp, int *curlen, const int newlen, const float fac, const unsigned int flag, const size_t sizeofT) |
template<class T > | |
bool | reallocate_host (T **pp, int *curlen, const int newlen, const float fac=1.0f, const unsigned int flag=cudaHostAllocDefault) |
void | deallocate_host_T (void **pp) |
template<class T > | |
void | deallocate_host (T **pp) |
void | copy_HtoD_async_T (const void *h_array, void *d_array, int array_len, cudaStream_t stream, const size_t sizeofT) |
void | copy_HtoD_T (const void *h_array, void *d_array, int array_len, const size_t sizeofT) |
void | copy_DtoH_async_T (const void *d_array, void *h_array, const int array_len, cudaStream_t stream, const size_t sizeofT) |
void | copy_DtoH_T (const void *d_array, void *h_array, const int array_len, const size_t sizeofT) |
void | copy_DtoD_async_T (const void *d_src, void *d_dst, const int array_len, cudaStream_t stream, const size_t sizeofT) |
void | copy_DtoD_T (const void *d_src, void *d_dst, const int array_len, const size_t sizeofT) |
template<class T > | |
void | copy_HtoD (const T *h_array, T *d_array, int array_len, cudaStream_t stream=0) |
template<class T > | |
void | copy_HtoD_sync (const T *h_array, T *d_array, int array_len) |
template<class T > | |
void | copy_DtoH (const T *d_array, T *h_array, const int array_len, cudaStream_t stream=0) |
template<class T > | |
void | copy_DtoH_sync (const T *d_array, T *h_array, const int array_len) |
template<class T > | |
void | copy_DtoD (const T *d_src, T *h_dst, const int array_len, cudaStream_t stream=0) |
template<class T > | |
void | copy_DtoD_sync (const T *d_src, T *h_dst, const int array_len) |
void | copy_PeerDtoD_async_T (const int src_dev, const int dst_dev, const void *d_src, void *d_dst, const int array_len, cudaStream_t stream, const size_t sizeofT) |
template<class T > | |
void | copy_PeerDtoD (const int src_dev, const int dst_dev, const T *d_src, T *d_dst, const int array_len, cudaStream_t stream=0) |
void | copy3D_HtoD_T (void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream) |
template<class T > | |
void | copy3D_HtoD (T *src_data, T *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, cudaStream_t stream=0) |
void | copy3D_DtoH_T (void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream) |
template<class T > | |
void | copy3D_DtoH (T *src_data, T *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, cudaStream_t stream=0) |
void | copy3D_DtoD_T (void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream) |
template<class T > | |
void | copy3D_DtoD (T *src_data, T *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, cudaStream_t stream=0) |
void | copy3D_PeerDtoD_T (int src_dev, int dst_dev, void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream) |
template<class T > | |
void | copy3D_PeerDtoD (int src_dev, int dst_dev, T *src_data, T *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, cudaStream_t stream=0) |
#define ATOMIC_BINS 1 |
Definition at line 24 of file CudaUtils.h.
Referenced by ComputeBondedCUDAKernel::bondedForce(), bondedForcesKernel(), ComputeBondedCUDAKernel::ComputeBondedCUDAKernel(), CudaComputeNonbonded::initialize(), modifiedExclusionForcesKernel(), reduceBondedBinsKernel(), reduceGBISEnergyKernel(), reduceNonbondedBinsKernel(), reduceNonbondedVirialKernel(), CudaComputeNonbondedKernel::reduceVirialEnergy(), and reduceVirialEnergyKernel().
#define BLOCK_SYNC __syncthreads() |
Definition at line 60 of file CudaUtils.h.
#define cuda_static_assert | ( | expr | ) | (CudaStaticAssert<(expr) != 0>()) |
Definition at line 85 of file CudaUtils.h.
Referenced by reduceVariables().
#define cudaCheck | ( | stmt | ) |
Definition at line 95 of file CudaUtils.h.
Referenced by allocate_device_T(), allocate_host_T(), CudaFFTCompute::backward(), batchTranspose_xyz_yzx(), batchTranspose_xyz_zxy(), CudaComputeNonbondedKernel::bindExclusions(), bindTextureObject(), ComputeBondedCUDAKernel::bondedForce(), CudaTileListKernel::buildTileLists(), clear_device_array_async_T(), clear_device_array_T(), ComputeBondedCUDAKernel::ComputeBondedCUDAKernel(), copy3D_DtoD_T(), copy3D_DtoH_T(), copy3D_HtoD_T(), copy3D_PeerDtoD_T(), copy_DtoD_async_T(), copy_DtoD_T(), copy_DtoH_async_T(), copy_DtoH_T(), copy_HtoD_async_T(), copy_HtoD_T(), copy_PeerDtoD_async_T(), CudaPmeRealSpaceCompute::copyAtoms(), CudaPmeTranspose::copyDataDeviceToDevice(), CudaPmeTranspose::copyDataDeviceToHost(), CudaPmeTranspose::copyDataHostToDevice(), createStream(), CudaComputeGBISKernel::CudaComputeGBISKernel(), CudaComputeNonbonded::CudaComputeNonbonded(), CudaComputeNonbondedKernel::CudaComputeNonbondedKernel(), CudaNonbondedTables::CudaNonbondedTables(), CudaPmeKSpaceCompute::CudaPmeKSpaceCompute(), CudaPmeRealSpaceCompute::CudaPmeRealSpaceCompute(), CudaPmeTranspose::CudaPmeTranspose(), CudaTileListKernel::CudaTileListKernel(), deallocate_device_T(), deallocate_host_T(), ComputeNonbondedCUDA::doWork(), CudaPmeKSpaceCompute::energyAndVirialSetCallback(), CudaComputeNonbonded::finishReductions(), CudaTileListKernel::finishTileList(), CudaFFTCompute::forward(), gather_force(), CudaPmeRealSpaceCompute::gatherForce(), CudaPmeRealSpaceCompute::gatherForceSetCallback(), CudaComputeGBISKernel::GBISphase1(), CudaComputeGBISKernel::GBISphase2(), CudaComputeGBISKernel::GBISphase3(), DeviceCUDA::getMaxNumBlocks(), DeviceCUDA::getMaxNumThreads(), DeviceCUDA::initialize(), CudaComputeNonbonded::initialize(), ComputePmeCUDADevice::initialize(), ComputePmeCUDAMgr::initialize_pencils(), CudaPmePencilXY::initializeDevice(), CudaPmePencilX::initializeDevice(), CudaPmePencilY::initializeDevice(), CudaPmePencilZ::initializeDevice(), CudaComputeNonbonded::launchWork(), CudaComputeNonbondedKernel::nonbondedForce(), read_CUDA_ARCH(), reallocate_device_T(), reallocate_host_T(), CudaComputeNonbondedKernel::reduceVirialEnergy(), CudaTileListKernel::reSortTileLists(), scalar_sum(), CudaPmeTranspose::setDataPtrsYZX(), CudaPmeTranspose::setDataPtrsZXY(), CudaPmeKSpaceCompute::solve(), spread_charge(), CudaPmeRealSpaceCompute::spreadCharge(), transpose_xyz_yzx(), transpose_xyz_zxy(), CudaPmeTranspose::transposeXYZtoYZX(), CudaPmeTranspose::transposeXYZtoZXY(), CudaPmeRealSpaceCompute::waitGatherForceDone(), CudaPmeTranspose::waitStreamSynchronize(), writeComplexToDisk(), writeRealToDisk(), ComputeBondedCUDAKernel::~ComputeBondedCUDAKernel(), ComputePmeCUDADevice::~ComputePmeCUDADevice(), ComputePmeCUDAMgr::~ComputePmeCUDAMgr(), CudaComputeGBISKernel::~CudaComputeGBISKernel(), CudaComputeNonbonded::~CudaComputeNonbonded(), CudaComputeNonbondedKernel::~CudaComputeNonbondedKernel(), CudaFFTCompute::~CudaFFTCompute(), CudaNonbondedTables::~CudaNonbondedTables(), CudaPmeKSpaceCompute::~CudaPmeKSpaceCompute(), CudaPmePencilX::~CudaPmePencilX(), CudaPmePencilXY::~CudaPmePencilXY(), CudaPmePencilY::~CudaPmePencilY(), CudaPmePencilZ::~CudaPmePencilZ(), CudaPmeRealSpaceCompute::~CudaPmeRealSpaceCompute(), CudaPmeTranspose::~CudaPmeTranspose(), and CudaTileListKernel::~CudaTileListKernel().
#define FORCE_ENERGY_TABLE_SIZE 4096 |
Definition at line 19 of file CudaUtils.h.
Referenced by CudaNonbondedTables::CudaNonbondedTables(), and sampleTableTex().
#define WARP_ALL | ( | MASK, | |
P | |||
) | __all(P) |
Definition at line 56 of file CudaUtils.h.
Referenced by storeForces().
#define WARP_ANY | ( | MASK, | |
P | |||
) | __any(P) |
Definition at line 57 of file CudaUtils.h.
Referenced by void().
#define WARP_BALLOT | ( | MASK, | |
P | |||
) | __ballot(P) |
Definition at line 58 of file CudaUtils.h.
Referenced by gather_force(), repackTileListsKernel(), and storeForces().
#define WARP_FULL_MASK 0xffffffff |
Definition at line 21 of file CudaUtils.h.
Referenced by bondedForcesKernel(), buildTileListsBBKernel(), cuda_pme_charges_batched_dev(), cuda_pme_charges_dev(), cuda_pme_forces_dev(), gather_force(), GBIS_Kernel(), GBIS_P1_Kernel(), GBIS_P2_Kernel(), GBIS_P3_Kernel(), modifiedExclusionForcesKernel(), reduceVariables(), repackTileListsKernel(), scalar_sum_kernel(), shuffleNext(), GBISInput< 1 >::shuffleNext(), GBISResults< 1 >::shuffleNext(), GBISInput< 2 >::shuffleNext(), GBISResults< 2 >::shuffleNext(), GBISInput< 3 >::shuffleNext(), GBISResults< 3 >::shuffleNext(), storeForces(), and void().
#define WARP_SHUFFLE | ( | MASK, | |
VAR, | |||
LANE, | |||
SIZE | |||
) | __shfl(VAR, LANE, SIZE) |
Definition at line 54 of file CudaUtils.h.
Referenced by gather_force(), GBIS_Kernel(), GBIS_P1_Kernel(), GBIS_P2_Kernel(), GBIS_P3_Kernel(), scalar_sum_kernel(), shuffleNext(), GBISInput< 1 >::shuffleNext(), GBISResults< 1 >::shuffleNext(), GBISInput< 2 >::shuffleNext(), GBISResults< 2 >::shuffleNext(), GBISInput< 3 >::shuffleNext(), GBISResults< 3 >::shuffleNext(), storeForces(), void(), and warpAggregatingAtomicInc().
#define WARP_SHUFFLE_DOWN | ( | MASK, | |
VAR, | |||
DELTA, | |||
SIZE | |||
) | __shfl_down(VAR, DELTA, SIZE) |
Definition at line 52 of file CudaUtils.h.
#define WARP_SHUFFLE_UP | ( | MASK, | |
VAR, | |||
DELTA, | |||
SIZE | |||
) | __shfl_up(VAR, DELTA, SIZE) |
Definition at line 50 of file CudaUtils.h.
#define WARP_SHUFFLE_XOR | ( | MASK, | |
VAR, | |||
LANE, | |||
SIZE | |||
) | __shfl_xor(VAR, LANE, SIZE) |
Definition at line 48 of file CudaUtils.h.
Referenced by bondedForcesKernel(), modifiedExclusionForcesKernel(), reduceVariables(), and void().
#define WARP_SYNC | ( | MASK | ) |
Definition at line 59 of file CudaUtils.h.
Referenced by cuda_pme_charges_batched_dev(), cuda_pme_charges_dev(), and cuda_pme_forces_dev().
#define WARPSIZE 32 |
Definition at line 10 of file CudaUtils.h.
Referenced by ComputeBondedCUDAKernel::bondedForce(), bondedForcesKernel(), buildBoundingBoxesKernel(), CudaTileListKernel::buildTileLists(), buildTileListsBBKernel(), calcTileListPosKernel(), cuda_GBIS_P1(), cuda_GBIS_P2(), cuda_GBIS_P3(), cuda_nonbonded_forces(), cuda_pme_charges_batched_dev(), cuda_pme_charges_dev(), cuda_pme_forces_dev(), ComputeNonbondedCUDA::doWork(), fillSortKeys(), GBIS_Kernel(), GBIS_P1_Kernel(), GBIS_P2_Kernel(), GBIS_P3_Kernel(), CudaComputeGBISKernel::GBISphase1(), CudaComputeGBISKernel::GBISphase2(), CudaComputeGBISKernel::GBISphase3(), modifiedExclusionForcesKernel(), partition(), reduceVariables(), CudaComputeNonbondedKernel::reduceVirialEnergy(), repackTileListsKernel(), scalar_sum(), scalar_sum_kernel(), shuffleNext(), GBISInput< 1 >::shuffleNext(), GBISResults< 1 >::shuffleNext(), GBISInput< 2 >::shuffleNext(), GBISResults< 2 >::shuffleNext(), GBISInput< 3 >::shuffleNext(), GBISResults< 3 >::shuffleNext(), spread_charge(), spread_charge_kernel(), storeForces(), updatePatchesKernel(), void(), warpAggregatingAtomicInc(), and ComputeBondedCUDAKernel::warpAlign().
typedef unsigned int WarpMask |
Definition at line 11 of file CudaUtils.h.
void allocate_device | ( | T ** | pp, |
const int | len | ||
) |
Definition at line 162 of file CudaUtils.h.
References allocate_device_T().
Definition at line 75 of file CudaUtils.C.
References cudaCheck.
Referenced by allocate_device(), and bindTextureObject().
void allocate_host | ( | T ** | pp, |
const int | len | ||
) |
Definition at line 149 of file CudaUtils.h.
References allocate_host_T().
void clear_device_array | ( | T * | data, |
const int | ndata, | ||
cudaStream_t | stream = 0 |
||
) |
Definition at line 132 of file CudaUtils.h.
References clear_device_array_async_T(), and stream.
void clear_device_array_async_T | ( | void * | data, |
const int | ndata, | ||
cudaStream_t | stream, | ||
const size_t | sizeofT | ||
) |
Definition at line 51 of file CudaUtils.C.
References cudaCheck.
Referenced by clear_device_array().
void clear_device_array_sync | ( | T * | data, |
const int | ndata | ||
) |
Definition at line 137 of file CudaUtils.h.
References clear_device_array_T().
Definition at line 55 of file CudaUtils.C.
References cudaCheck.
Referenced by clear_device_array_sync().
void copy3D_DtoD | ( | T * | src_data, |
T * | dst_data, | ||
int | src_x0, | ||
int | src_y0, | ||
int | src_z0, | ||
size_t | src_xsize, | ||
size_t | src_ysize, | ||
int | dst_x0, | ||
int | dst_y0, | ||
int | dst_z0, | ||
size_t | dst_xsize, | ||
size_t | dst_ysize, | ||
size_t | width, | ||
size_t | height, | ||
size_t | depth, | ||
cudaStream_t | stream = 0 |
||
) |
Definition at line 376 of file CudaUtils.h.
References copy3D_DtoD_T(), and stream.
void copy3D_DtoD_T | ( | void * | src_data, |
void * | dst_data, | ||
int | src_x0, | ||
int | src_y0, | ||
int | src_z0, | ||
size_t | src_xsize, | ||
size_t | src_ysize, | ||
int | dst_x0, | ||
int | dst_y0, | ||
int | dst_z0, | ||
size_t | dst_xsize, | ||
size_t | dst_ysize, | ||
size_t | width, | ||
size_t | height, | ||
size_t | depth, | ||
size_t | sizeofT, | ||
cudaStream_t | stream | ||
) |
void copy3D_DtoH | ( | T * | src_data, |
T * | dst_data, | ||
int | src_x0, | ||
int | src_y0, | ||
int | src_z0, | ||
size_t | src_xsize, | ||
size_t | src_ysize, | ||
int | dst_x0, | ||
int | dst_y0, | ||
int | dst_z0, | ||
size_t | dst_xsize, | ||
size_t | dst_ysize, | ||
size_t | width, | ||
size_t | height, | ||
size_t | depth, | ||
cudaStream_t | stream = 0 |
||
) |
Definition at line 347 of file CudaUtils.h.
References copy3D_DtoH_T(), and stream.
void copy3D_DtoH_T | ( | void * | src_data, |
void * | dst_data, | ||
int | src_x0, | ||
int | src_y0, | ||
int | src_z0, | ||
size_t | src_xsize, | ||
size_t | src_ysize, | ||
int | dst_x0, | ||
int | dst_y0, | ||
int | dst_z0, | ||
size_t | dst_xsize, | ||
size_t | dst_ysize, | ||
size_t | width, | ||
size_t | height, | ||
size_t | depth, | ||
size_t | sizeofT, | ||
cudaStream_t | stream | ||
) |
void copy3D_HtoD | ( | T * | src_data, |
T * | dst_data, | ||
int | src_x0, | ||
int | src_y0, | ||
int | src_z0, | ||
size_t | src_xsize, | ||
size_t | src_ysize, | ||
int | dst_x0, | ||
int | dst_y0, | ||
int | dst_z0, | ||
size_t | dst_xsize, | ||
size_t | dst_ysize, | ||
size_t | width, | ||
size_t | height, | ||
size_t | depth, | ||
cudaStream_t | stream = 0 |
||
) |
Definition at line 318 of file CudaUtils.h.
References copy3D_HtoD_T(), and stream.
void copy3D_HtoD_T | ( | void * | src_data, |
void * | dst_data, | ||
int | src_x0, | ||
int | src_y0, | ||
int | src_z0, | ||
size_t | src_xsize, | ||
size_t | src_ysize, | ||
int | dst_x0, | ||
int | dst_y0, | ||
int | dst_z0, | ||
size_t | dst_xsize, | ||
size_t | dst_ysize, | ||
size_t | width, | ||
size_t | height, | ||
size_t | depth, | ||
size_t | sizeofT, | ||
cudaStream_t | stream | ||
) |
void copy3D_PeerDtoD | ( | int | src_dev, |
int | dst_dev, | ||
T * | src_data, | ||
T * | dst_data, | ||
int | src_x0, | ||
int | src_y0, | ||
int | src_z0, | ||
size_t | src_xsize, | ||
size_t | src_ysize, | ||
int | dst_x0, | ||
int | dst_y0, | ||
int | dst_z0, | ||
size_t | dst_xsize, | ||
size_t | dst_ysize, | ||
size_t | width, | ||
size_t | height, | ||
size_t | depth, | ||
cudaStream_t | stream = 0 |
||
) |
Definition at line 406 of file CudaUtils.h.
References copy3D_PeerDtoD_T(), and stream.
void copy3D_PeerDtoD_T | ( | int | src_dev, |
int | dst_dev, | ||
void * | src_data, | ||
void * | dst_data, | ||
int | src_x0, | ||
int | src_y0, | ||
int | src_z0, | ||
size_t | src_xsize, | ||
size_t | src_ysize, | ||
int | dst_x0, | ||
int | dst_y0, | ||
int | dst_z0, | ||
size_t | dst_xsize, | ||
size_t | dst_ysize, | ||
size_t | width, | ||
size_t | height, | ||
size_t | depth, | ||
size_t | sizeofT, | ||
cudaStream_t | stream | ||
) |
Definition at line 315 of file CudaUtils.C.
References cudaCheck, and cudaDie().
Referenced by copy3D_PeerDtoD().
void copy_DtoD | ( | const T * | d_src, |
T * | h_dst, | ||
const int | array_len, | ||
cudaStream_t | stream = 0 |
||
) |
Definition at line 279 of file CudaUtils.h.
References copy_DtoD_async_T(), and stream.
void copy_DtoD_async_T | ( | const void * | d_src, |
void * | d_dst, | ||
const int | array_len, | ||
cudaStream_t | stream, | ||
const size_t | sizeofT | ||
) |
void copy_DtoD_sync | ( | const T * | d_src, |
T * | h_dst, | ||
const int | array_len | ||
) |
Definition at line 287 of file CudaUtils.h.
References copy_DtoD_T().
void copy_DtoH | ( | const T * | d_array, |
T * | h_array, | ||
const int | array_len, | ||
cudaStream_t | stream = 0 |
||
) |
Definition at line 263 of file CudaUtils.h.
References copy_DtoH_async_T(), and stream.
void copy_DtoH_async_T | ( | const void * | d_array, |
void * | h_array, | ||
const int | array_len, | ||
cudaStream_t | stream, | ||
const size_t | sizeofT | ||
) |
Definition at line 189 of file CudaUtils.C.
References cudaCheck.
Referenced by copy_DtoH().
void copy_DtoH_sync | ( | const T * | d_array, |
T * | h_array, | ||
const int | array_len | ||
) |
Definition at line 271 of file CudaUtils.h.
References copy_DtoH_T().
void copy_DtoH_T | ( | const void * | d_array, |
void * | h_array, | ||
const int | array_len, | ||
const size_t | sizeofT | ||
) |
void copy_HtoD | ( | const T * | h_array, |
T * | d_array, | ||
int | array_len, | ||
cudaStream_t | stream = 0 |
||
) |
Definition at line 245 of file CudaUtils.h.
References copy_HtoD_async_T(), and stream.
void copy_HtoD_async_T | ( | const void * | h_array, |
void * | d_array, | ||
int | array_len, | ||
cudaStream_t | stream, | ||
const size_t | sizeofT | ||
) |
Definition at line 175 of file CudaUtils.C.
References cudaCheck.
Referenced by copy_HtoD().
void copy_HtoD_sync | ( | const T * | h_array, |
T * | d_array, | ||
int | array_len | ||
) |
Definition at line 254 of file CudaUtils.h.
References copy_HtoD_T().
Definition at line 180 of file CudaUtils.C.
References cudaCheck.
Referenced by bindTextureObject(), and copy_HtoD_sync().
void copy_PeerDtoD | ( | const int | src_dev, |
const int | dst_dev, | ||
const T * | d_src, | ||
T * | d_dst, | ||
const int | array_len, | ||
cudaStream_t | stream = 0 |
||
) |
Definition at line 300 of file CudaUtils.h.
References copy_PeerDtoD_async_T(), and stream.
void copy_PeerDtoD_async_T | ( | const int | src_dev, |
const int | dst_dev, | ||
const void * | d_src, | ||
void * | d_dst, | ||
const int | array_len, | ||
cudaStream_t | stream, | ||
const size_t | sizeofT | ||
) |
void cudaDie | ( | const char * | msg, |
cudaError_t | err = cudaSuccess |
||
) |
Definition at line 9 of file CudaUtils.C.
References NAMD_die().
void cudaNAMD_bug | ( | const char * | msg | ) |
Definition at line 31 of file CudaUtils.C.
References NAMD_bug().
Referenced by CudaFFTCompute::backward(), CudaFFTCompute::forward(), gather_force(), and spread_charge().
void deallocate_device | ( | T ** | pp | ) |
void deallocate_host | ( | T ** | pp | ) |
bool reallocate_device | ( | T ** | pp, |
int * | curlen, | ||
const int | newlen, | ||
const float | fac = 1.0f |
||
) |
Definition at line 191 of file CudaUtils.h.
References reallocate_device_T().
bool reallocate_device_T | ( | void ** | pp, |
int * | curlen, | ||
const int | newlen, | ||
const float | fac, | ||
const size_t | sizeofT | ||
) |
Definition at line 117 of file CudaUtils.C.
References cudaCheck.
Referenced by reallocate_device().
bool reallocate_host | ( | T ** | pp, |
int * | curlen, | ||
const int | newlen, | ||
const float | fac = 1.0f , |
||
const unsigned int | flag = cudaHostAllocDefault |
||
) |
Definition at line 211 of file CudaUtils.h.
References reallocate_host_T().
bool reallocate_host_T | ( | void ** | pp, |
int * | curlen, | ||
const int | newlen, | ||
const float | fac, | ||
const unsigned int | flag, | ||
const size_t | sizeofT | ||
) |