NAMD
Functions
CudaUtils.C File Reference
#include <stdio.h>
#include "common.h"
#include "charm++.h"
#include "CudaUtils.h"

Go to the source code of this file.

Functions

void cudaDie (const char *msg, cudaError_t err)
 
void cudaNAMD_bug (const char *msg)
 
void cuda_affinity_initialize ()
 
void clear_device_array_async_T (void *data, const int ndata, cudaStream_t stream, const size_t sizeofT)
 
void clear_device_array_T (void *data, const int ndata, const size_t sizeofT)
 
void allocate_host_T (void **pp, const int len, const size_t sizeofT)
 
void allocate_device_T (void **pp, const int len, const size_t sizeofT)
 
void deallocate_device_T (void **pp)
 
void deallocate_host_T (void **pp)
 
bool reallocate_device_T (void **pp, int *curlen, const int newlen, const float fac, const size_t sizeofT)
 
bool reallocate_host_T (void **pp, int *curlen, const int newlen, const float fac, const unsigned int flag, const size_t sizeofT)
 
void copy_HtoD_async_T (const void *h_array, void *d_array, int array_len, cudaStream_t stream, const size_t sizeofT)
 
void copy_HtoD_T (const void *h_array, void *d_array, int array_len, const size_t sizeofT)
 
void copy_DtoH_async_T (const void *d_array, void *h_array, const int array_len, cudaStream_t stream, const size_t sizeofT)
 
void copy_DtoH_T (const void *d_array, void *h_array, const int array_len, const size_t sizeofT)
 
void copy_DtoD_async_T (const void *d_src, void *d_dst, const int array_len, cudaStream_t stream, const size_t sizeofT)
 
void copy_DtoD_T (const void *d_src, void *d_dst, const int array_len, const size_t sizeofT)
 
void copy_PeerDtoD_async_T (const int src_dev, const int dst_dev, const void *d_src, void *d_dst, const int array_len, cudaStream_t stream, const size_t sizeofT)
 
void copy3D_HtoD_T (void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream)
 
void copy3D_DtoH_T (void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream)
 
void copy3D_DtoD_T (void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream)
 
void copy3D_PeerDtoD_T (int src_dev, int dst_dev, void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream)
 

Function Documentation

void allocate_device_T ( void **  pp,
const int  len,
const size_t  sizeofT 
)

Definition at line 75 of file CudaUtils.C.

References cudaCheck.

Referenced by allocate_device(), and bindTextureObject().

75  {
76  cudaCheck(cudaMalloc(pp, sizeofT*len));
77 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
void allocate_host_T ( void **  pp,
const int  len,
const size_t  sizeofT 
)

Definition at line 65 of file CudaUtils.C.

References cudaCheck.

Referenced by allocate_host().

65  {
66  cudaCheck(cudaMallocHost(pp, sizeofT*len));
67 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
void clear_device_array_async_T ( void data,
const int  ndata,
cudaStream_t  stream,
const size_t  sizeofT 
)

Definition at line 51 of file CudaUtils.C.

References cudaCheck.

Referenced by clear_device_array().

51  {
52  cudaCheck(cudaMemsetAsync(data, 0, sizeofT*ndata, stream));
53 }
__thread cudaStream_t stream
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
void clear_device_array_T ( void data,
const int  ndata,
const size_t  sizeofT 
)

Definition at line 55 of file CudaUtils.C.

References cudaCheck.

Referenced by clear_device_array_sync().

55  {
56  cudaCheck(cudaMemset(data, 0, sizeofT*ndata));
57 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
void copy3D_DtoD_T ( void src_data,
void dst_data,
int  src_x0,
int  src_y0,
int  src_z0,
size_t  src_xsize,
size_t  src_ysize,
int  dst_x0,
int  dst_y0,
int  dst_z0,
size_t  dst_xsize,
size_t  dst_ysize,
size_t  width,
size_t  height,
size_t  depth,
size_t  sizeofT,
cudaStream_t  stream 
)

Definition at line 285 of file CudaUtils.C.

References cudaCheck.

Referenced by copy3D_DtoD().

291  {
292  cudaMemcpy3DParms parms = {0};
293 
294  parms.srcPos = make_cudaPos(sizeofT*src_x0, src_y0, src_z0);
295  parms.srcPtr = make_cudaPitchedPtr(src_data, sizeofT*src_xsize, src_xsize, src_ysize);
296 
297  parms.dstPos = make_cudaPos(sizeofT*dst_x0, dst_y0, dst_z0);
298  parms.dstPtr = make_cudaPitchedPtr(dst_data, sizeofT*dst_xsize, dst_xsize, dst_ysize);
299 
300  parms.extent = make_cudaExtent(sizeofT*width, height, depth);
301  parms.kind = cudaMemcpyDeviceToDevice;
302 
303 #ifdef NAMD_CUDA
304  cudaCheck(cudaMemcpy3DAsync(&parms, stream));
305 #else
306  //TODO-HIP: remove ifdef when HIP implements cudaMemcpy3DAsync
307  cudaCheck(hipMemcpy3D(&parms));
308 #endif
309 }
__thread cudaStream_t stream
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
void copy3D_DtoH_T ( void src_data,
void dst_data,
int  src_x0,
int  src_y0,
int  src_z0,
size_t  src_xsize,
size_t  src_ysize,
int  dst_x0,
int  dst_y0,
int  dst_z0,
size_t  dst_xsize,
size_t  dst_ysize,
size_t  width,
size_t  height,
size_t  depth,
size_t  sizeofT,
cudaStream_t  stream 
)

Definition at line 255 of file CudaUtils.C.

References cudaCheck.

Referenced by copy3D_DtoH().

261  {
262  cudaMemcpy3DParms parms = {0};
263 
264  parms.srcPos = make_cudaPos(sizeofT*src_x0, src_y0, src_z0);
265  parms.srcPtr = make_cudaPitchedPtr(src_data, sizeofT*src_xsize, src_xsize, src_ysize);
266 
267  parms.dstPos = make_cudaPos(sizeofT*dst_x0, dst_y0, dst_z0);
268  parms.dstPtr = make_cudaPitchedPtr(dst_data, sizeofT*dst_xsize, dst_xsize, dst_ysize);
269 
270  parms.extent = make_cudaExtent(sizeofT*width, height, depth);
271  parms.kind = cudaMemcpyDeviceToHost;
272 
273 #ifdef NAMD_CUDA
274  cudaCheck(cudaMemcpy3DAsync(&parms, stream));
275 #else
276  //TODO-HIP: remove ifdef when HIP implements cudaMemcpy3DAsync
277  cudaCheck(hipMemcpy3D(&parms));
278 #endif
279 }
__thread cudaStream_t stream
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
void copy3D_HtoD_T ( void src_data,
void dst_data,
int  src_x0,
int  src_y0,
int  src_z0,
size_t  src_xsize,
size_t  src_ysize,
int  dst_x0,
int  dst_y0,
int  dst_z0,
size_t  dst_xsize,
size_t  dst_ysize,
size_t  width,
size_t  height,
size_t  depth,
size_t  sizeofT,
cudaStream_t  stream 
)

Definition at line 225 of file CudaUtils.C.

References cudaCheck.

Referenced by copy3D_HtoD().

231  {
232  cudaMemcpy3DParms parms = {0};
233 
234  parms.srcPos = make_cudaPos(sizeofT*src_x0, src_y0, src_z0);
235  parms.srcPtr = make_cudaPitchedPtr(src_data, sizeofT*src_xsize, src_xsize, src_ysize);
236 
237  parms.dstPos = make_cudaPos(sizeofT*dst_x0, dst_y0, dst_z0);
238  parms.dstPtr = make_cudaPitchedPtr(dst_data, sizeofT*dst_xsize, dst_xsize, dst_ysize);
239 
240  parms.extent = make_cudaExtent(sizeofT*width, height, depth);
241 
242  parms.kind = cudaMemcpyHostToDevice;
243 #ifdef NAMD_CUDA
244  cudaCheck(cudaMemcpy3DAsync(&parms, stream));
245 #else
246  //TODO-HIP: remove ifdef when HIP implements cudaMemcpy3DAsync
247  cudaCheck(hipMemcpy3D(&parms));
248 #endif
249 }
__thread cudaStream_t stream
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
void copy3D_PeerDtoD_T ( int  src_dev,
int  dst_dev,
void src_data,
void dst_data,
int  src_x0,
int  src_y0,
int  src_z0,
size_t  src_xsize,
size_t  src_ysize,
int  dst_x0,
int  dst_y0,
int  dst_z0,
size_t  dst_xsize,
size_t  dst_ysize,
size_t  width,
size_t  height,
size_t  depth,
size_t  sizeofT,
cudaStream_t  stream 
)

Definition at line 315 of file CudaUtils.C.

References cudaCheck, and cudaDie().

Referenced by copy3D_PeerDtoD().

322  {
323 #ifdef NAMD_HIP
324 // TODO-HIP: Is a workaround possible? cudaMemcpy3D+cudaMemcpyPeer+cudaMemcpy3D
325  cudaDie("cudaMemcpy3DPeerAsync is not supported by HIP");
326 #else
327  cudaMemcpy3DPeerParms parms = {0};
328 
329  parms.srcDevice = src_dev;
330  parms.dstDevice = dst_dev;
331 
332  parms.srcPos = make_cudaPos(sizeofT*src_x0, src_y0, src_z0);
333  parms.srcPtr = make_cudaPitchedPtr(src_data, sizeofT*src_xsize, src_xsize, src_ysize);
334 
335  parms.dstPos = make_cudaPos(sizeofT*dst_x0, dst_y0, dst_z0);
336  parms.dstPtr = make_cudaPitchedPtr(dst_data, sizeofT*dst_xsize, dst_xsize, dst_ysize);
337 
338  parms.extent = make_cudaExtent(sizeofT*width, height, depth);
339 
340  cudaCheck(cudaMemcpy3DPeerAsync(&parms, stream));
341 #endif
342 }
__thread cudaStream_t stream
void cudaDie(const char *msg, cudaError_t err=cudaSuccess)
Definition: CudaUtils.C:9
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
void copy_DtoD_async_T ( const void d_src,
void d_dst,
const int  array_len,
cudaStream_t  stream,
const size_t  sizeofT 
)

Definition at line 202 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_DtoD().

203  {
204  cudaCheck(cudaMemcpyAsync(d_dst, d_src, sizeofT*array_len, cudaMemcpyDeviceToDevice, stream));
205 }
__thread cudaStream_t stream
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
void copy_DtoD_T ( const void d_src,
void d_dst,
const int  array_len,
const size_t  sizeofT 
)

Definition at line 207 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_DtoD_sync().

207  {
208  cudaCheck(cudaMemcpy(d_dst, d_src, sizeofT*array_len, cudaMemcpyDeviceToDevice));
209 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
void copy_DtoH_async_T ( const void d_array,
void h_array,
const int  array_len,
cudaStream_t  stream,
const size_t  sizeofT 
)

Definition at line 189 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_DtoH().

190  {
191  cudaCheck(cudaMemcpyAsync(h_array, d_array, sizeofT*array_len, cudaMemcpyDeviceToHost, stream));
192 }
__thread cudaStream_t stream
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
void copy_DtoH_T ( const void d_array,
void h_array,
const int  array_len,
const size_t  sizeofT 
)

Definition at line 194 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_DtoH_sync().

194  {
195  cudaCheck(cudaMemcpy(h_array, d_array, sizeofT*array_len, cudaMemcpyDeviceToHost));
196 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
void copy_HtoD_async_T ( const void h_array,
void d_array,
int  array_len,
cudaStream_t  stream,
const size_t  sizeofT 
)

Definition at line 175 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_HtoD().

176  {
177  cudaCheck(cudaMemcpyAsync(d_array, h_array, sizeofT*array_len, cudaMemcpyHostToDevice, stream));
178 }
__thread cudaStream_t stream
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
void copy_HtoD_T ( const void h_array,
void d_array,
int  array_len,
const size_t  sizeofT 
)

Definition at line 180 of file CudaUtils.C.

References cudaCheck.

Referenced by bindTextureObject(), and copy_HtoD_sync().

181  {
182  cudaCheck(cudaMemcpy(d_array, h_array, sizeofT*array_len, cudaMemcpyHostToDevice));
183 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
void copy_PeerDtoD_async_T ( const int  src_dev,
const int  dst_dev,
const void d_src,
void d_dst,
const int  array_len,
cudaStream_t  stream,
const size_t  sizeofT 
)

Definition at line 215 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_PeerDtoD().

217  {
218  cudaCheck(cudaMemcpyPeerAsync(d_dst, dst_dev, d_src, src_dev, sizeofT*array_len, stream));
219 }
__thread cudaStream_t stream
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
void cuda_affinity_initialize ( )

Definition at line 33 of file CudaUtils.C.

References dummy().

Referenced by all_init().

33  {
34  int devcnt = 0;
35  cudaError_t err = cudaGetDeviceCount(&devcnt);
36  if ( devcnt == 1 ) { // only one device so it must be ours
37  int *dummy;
38  if ( err == cudaSuccess ) err = cudaSetDevice(0);
39  if ( err == cudaSuccess ) err = cudaSetDeviceFlags(cudaDeviceMapHost);
40  if ( err == cudaSuccess ) err = cudaMalloc(&dummy, 4);
41  }
42  if ( err != cudaSuccess ) {
43  char host[128];
44  gethostname(host, 128); host[127] = 0;
45  fprintf(stderr,"CUDA initialization error on %s: %s\n", host, cudaGetErrorString(err));
46  }
47 }
void dummy()
void cudaDie ( const char *  msg,
cudaError_t  err 
)

Definition at line 9 of file CudaUtils.C.

References NAMD_die().

Referenced by copy3D_PeerDtoD_T(), cuda_check_local_progress(), cuda_check_pme_charges(), cuda_check_pme_forces(), cuda_check_progress(), cuda_check_remote_progress(), DeviceCUDA::initialize(), and read_CUDA_ARCH().

9  {
10  char host[128];
11  gethostname(host, 128); host[127] = 0;
12  char devstr[128] = "";
13  int devnum;
14  if ( cudaGetDevice(&devnum) == cudaSuccess ) {
15  sprintf(devstr, " device %d", devnum);
16  }
17  cudaDeviceProp deviceProp;
18  if ( cudaGetDeviceProperties(&deviceProp, devnum) == cudaSuccess ) {
19  sprintf(devstr, " device %d pci %x:%x:%x", devnum,
20  deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
21  }
22  char errmsg[1024];
23  if (err == cudaSuccess) {
24  sprintf(errmsg,"CUDA error %s on Pe %d (%s%s)", msg, CkMyPe(), host, devstr);
25  } else {
26  sprintf(errmsg,"CUDA error %s on Pe %d (%s%s): %s", msg, CkMyPe(), host, devstr, cudaGetErrorString(err));
27  }
28  NAMD_die(errmsg);
29 }
void NAMD_die(const char *err_msg)
Definition: common.C:85
void cudaNAMD_bug ( const char *  msg)

Definition at line 31 of file CudaUtils.C.

References NAMD_bug().

Referenced by CudaFFTCompute::backward(), CudaFFTCompute::forward(), gather_force(), and spread_charge().

31 {NAMD_bug(msg);}
void NAMD_bug(const char *err_msg)
Definition: common.C:129
void deallocate_device_T ( void **  pp)

Definition at line 84 of file CudaUtils.C.

References cudaCheck.

Referenced by deallocate_device().

84  {
85 
86  if (*pp != NULL) {
87  cudaCheck(cudaFree((void *)(*pp)));
88  *pp = NULL;
89  }
90 
91 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
void deallocate_host_T ( void **  pp)

Definition at line 98 of file CudaUtils.C.

References cudaCheck.

Referenced by deallocate_host().

98  {
99 
100  if (*pp != NULL) {
101  cudaCheck(cudaFreeHost((void *)(*pp)));
102  *pp = NULL;
103  }
104 
105 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
bool reallocate_device_T ( void **  pp,
int *  curlen,
const int  newlen,
const float  fac,
const size_t  sizeofT 
)

Definition at line 117 of file CudaUtils.C.

References cudaCheck.

Referenced by reallocate_device().

117  {
118 
119  if (*pp != NULL && *curlen < newlen) {
120  cudaCheck(cudaFree((void *)(*pp)));
121  *pp = NULL;
122  }
123 
124  if (*pp == NULL) {
125  if (fac > 1.0f) {
126  *curlen = (int)(((double)(newlen))*(double)fac);
127  } else {
128  *curlen = newlen;
129  }
130  cudaCheck(cudaMalloc(pp, sizeofT*(*curlen)));
131  return true;
132  }
133 
134  return false;
135 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
bool reallocate_host_T ( void **  pp,
int *  curlen,
const int  newlen,
const float  fac,
const unsigned int  flag,
const size_t  sizeofT 
)

Definition at line 150 of file CudaUtils.C.

References cudaCheck.

Referenced by reallocate_host().

151  {
152 
153  if (*pp != NULL && *curlen < newlen) {
154  cudaCheck(cudaFreeHost((void *)(*pp)));
155  *pp = NULL;
156  }
157 
158  if (*pp == NULL) {
159  if (fac > 1.0f) {
160  *curlen = (int)(((double)(newlen))*(double)fac);
161  } else {
162  *curlen = newlen;
163  }
164  cudaCheck(cudaHostAlloc(pp, sizeofT*(*curlen), flag));
165  return true;
166  }
167 
168  return false;
169 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:95