CudaUtils.C File Reference

#include <stdio.h>
#include "common.h"
#include "charm++.h"
#include "CudaUtils.h"

Go to the source code of this file.

Functions

void cudaDie (const char *msg, cudaError_t err)
void cudaNAMD_bug (const char *msg)
void cuda_affinity_initialize ()
void clear_device_array_async_T (void *data, const int ndata, cudaStream_t stream, const size_t sizeofT)
void clear_device_array_T (void *data, const int ndata, const size_t sizeofT)
void allocate_host_T (void **pp, const int len, const size_t sizeofT)
void allocate_device_T (void **pp, const int len, const size_t sizeofT)
void deallocate_device_T (void **pp)
void deallocate_host_T (void **pp)
bool reallocate_device_T (void **pp, int *curlen, const int newlen, const float fac, const size_t sizeofT)
bool reallocate_host_T (void **pp, int *curlen, const int newlen, const float fac, const unsigned int flag, const size_t sizeofT)
void copy_HtoD_async_T (const void *h_array, void *d_array, int array_len, cudaStream_t stream, const size_t sizeofT)
void copy_HtoD_T (const void *h_array, void *d_array, int array_len, const size_t sizeofT)
void copy_DtoH_async_T (const void *d_array, void *h_array, const int array_len, cudaStream_t stream, const size_t sizeofT)
void copy_DtoH_T (const void *d_array, void *h_array, const int array_len, const size_t sizeofT)
void copy_DtoD_async_T (const void *d_src, void *d_dst, const int array_len, cudaStream_t stream, const size_t sizeofT)
void copy_DtoD_T (const void *d_src, void *d_dst, const int array_len, const size_t sizeofT)
void copy_PeerDtoD_async_T (const int src_dev, const int dst_dev, const void *d_src, void *d_dst, const int array_len, cudaStream_t stream, const size_t sizeofT)
void copy3D_HtoD_T (void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream)
void copy3D_DtoH_T (void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream)
void copy3D_DtoD_T (void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream)
void copy3D_PeerDtoD_T (int src_dev, int dst_dev, void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream)


Function Documentation

void allocate_device_T ( void **  pp,
const int  len,
const size_t  sizeofT 
)

Definition at line 84 of file CudaUtils.C.

References cudaCheck.

Referenced by allocate_device().

00084                                                                        {
00085   cudaCheck(cudaMalloc(pp, sizeofT*len));
00086 }

void allocate_host_T ( void **  pp,
const int  len,
const size_t  sizeofT 
)

Definition at line 74 of file CudaUtils.C.

References cudaCheck.

Referenced by allocate_host().

00074                                                                      {
00075   cudaCheck(cudaMallocHost(pp, sizeofT*len));
00076 }

void clear_device_array_async_T ( void *  data,
const int  ndata,
cudaStream_t  stream,
const size_t  sizeofT 
)

Definition at line 60 of file CudaUtils.C.

References cudaCheck.

Referenced by clear_device_array().

00060                                                                                                         {
00061   cudaCheck(cudaMemsetAsync(data, 0, sizeofT*ndata, stream));
00062 }

void clear_device_array_T ( void *  data,
const int  ndata,
const size_t  sizeofT 
)

Definition at line 64 of file CudaUtils.C.

References cudaCheck.

Referenced by clear_device_array_sync().

00064                                                                              {
00065   cudaCheck(cudaMemset(data, 0, sizeofT*ndata));
00066 }

void copy3D_DtoD_T ( void *  src_data,
void *  dst_data,
int  src_x0,
int  src_y0,
int  src_z0,
size_t  src_xsize,
size_t  src_ysize,
int  dst_x0,
int  dst_y0,
int  dst_z0,
size_t  dst_xsize,
size_t  dst_ysize,
size_t  width,
size_t  height,
size_t  depth,
size_t  sizeofT,
cudaStream_t  stream 
)

Definition at line 284 of file CudaUtils.C.

References cudaCheck.

Referenced by copy3D_DtoD().

00290                                        {
00291   cudaMemcpy3DParms parms = {0};
00292 
00293   parms.srcPos = make_cudaPos(sizeofT*src_x0, src_y0, src_z0);
00294   parms.srcPtr = make_cudaPitchedPtr(src_data, sizeofT*src_xsize, src_xsize, src_ysize);
00295 
00296   parms.dstPos = make_cudaPos(sizeofT*dst_x0, dst_y0, dst_z0);
00297   parms.dstPtr = make_cudaPitchedPtr(dst_data, sizeofT*dst_xsize, dst_xsize, dst_ysize);
00298 
00299   parms.extent = make_cudaExtent(sizeofT*width, height, depth);
00300   parms.kind = cudaMemcpyDeviceToDevice;
00301 
00302   cudaCheck(cudaMemcpy3DAsync(&parms, stream));
00303 }

void copy3D_DtoH_T ( void *  src_data,
void *  dst_data,
int  src_x0,
int  src_y0,
int  src_z0,
size_t  src_xsize,
size_t  src_ysize,
int  dst_x0,
int  dst_y0,
int  dst_z0,
size_t  dst_xsize,
size_t  dst_ysize,
size_t  width,
size_t  height,
size_t  depth,
size_t  sizeofT,
cudaStream_t  stream 
)

Definition at line 259 of file CudaUtils.C.

References cudaCheck.

Referenced by copy3D_DtoH().

00265                                        {
00266   cudaMemcpy3DParms parms = {0};
00267 
00268   parms.srcPos = make_cudaPos(sizeofT*src_x0, src_y0, src_z0);
00269   parms.srcPtr = make_cudaPitchedPtr(src_data, sizeofT*src_xsize, src_xsize, src_ysize);
00270 
00271   parms.dstPos = make_cudaPos(sizeofT*dst_x0, dst_y0, dst_z0);
00272   parms.dstPtr = make_cudaPitchedPtr(dst_data, sizeofT*dst_xsize, dst_xsize, dst_ysize);
00273 
00274   parms.extent = make_cudaExtent(sizeofT*width, height, depth);
00275   parms.kind = cudaMemcpyDeviceToHost;
00276 
00277   cudaCheck(cudaMemcpy3DAsync(&parms, stream));
00278 }

void copy3D_HtoD_T ( void *  src_data,
void *  dst_data,
int  src_x0,
int  src_y0,
int  src_z0,
size_t  src_xsize,
size_t  src_ysize,
int  dst_x0,
int  dst_y0,
int  dst_z0,
size_t  dst_xsize,
size_t  dst_ysize,
size_t  width,
size_t  height,
size_t  depth,
size_t  sizeofT,
cudaStream_t  stream 
)

Definition at line 234 of file CudaUtils.C.

References cudaCheck.

Referenced by copy3D_HtoD().

00240                                        {
00241   cudaMemcpy3DParms parms = {0};
00242 
00243   parms.srcPos = make_cudaPos(sizeofT*src_x0, src_y0, src_z0);
00244   parms.srcPtr = make_cudaPitchedPtr(src_data, sizeofT*src_xsize, src_xsize, src_ysize);
00245 
00246   parms.dstPos = make_cudaPos(sizeofT*dst_x0, dst_y0, dst_z0);
00247   parms.dstPtr = make_cudaPitchedPtr(dst_data, sizeofT*dst_xsize, dst_xsize, dst_ysize);
00248 
00249   parms.extent = make_cudaExtent(sizeofT*width, height, depth);
00250   parms.kind = cudaMemcpyHostToDevice;
00251 
00252   cudaCheck(cudaMemcpy3DAsync(&parms, stream));
00253 }

void copy3D_PeerDtoD_T ( int  src_dev,
int  dst_dev,
void *  src_data,
void *  dst_data,
int  src_x0,
int  src_y0,
int  src_z0,
size_t  src_xsize,
size_t  src_ysize,
int  dst_x0,
int  dst_y0,
int  dst_z0,
size_t  dst_xsize,
size_t  dst_ysize,
size_t  width,
size_t  height,
size_t  depth,
size_t  sizeofT,
cudaStream_t  stream 
)

Definition at line 309 of file CudaUtils.C.

References cudaCheck.

Referenced by copy3D_PeerDtoD().

00316                                        {
00317   cudaMemcpy3DPeerParms parms = {0};
00318 
00319   parms.srcDevice = src_dev;
00320   parms.dstDevice = dst_dev;
00321 
00322   parms.srcPos = make_cudaPos(sizeofT*src_x0, src_y0, src_z0);
00323   parms.srcPtr = make_cudaPitchedPtr(src_data, sizeofT*src_xsize, src_xsize, src_ysize);
00324 
00325   parms.dstPos = make_cudaPos(sizeofT*dst_x0, dst_y0, dst_z0);
00326   parms.dstPtr = make_cudaPitchedPtr(dst_data, sizeofT*dst_xsize, dst_xsize, dst_ysize);
00327 
00328   parms.extent = make_cudaExtent(sizeofT*width, height, depth);
00329 
00330   cudaCheck(cudaMemcpy3DPeerAsync(&parms, stream));
00331 }

void copy_DtoD_async_T ( const void *  d_src,
void *  d_dst,
const int  array_len,
cudaStream_t  stream,
const size_t  sizeofT 
)

Definition at line 211 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_DtoD().

00212                                  {
00213   cudaCheck(cudaMemcpyAsync(d_dst, d_src, sizeofT*array_len, cudaMemcpyDeviceToDevice, stream));
00214 }

void copy_DtoD_T ( const void *  d_src,
void *  d_dst,
const int  array_len,
const size_t  sizeofT 
)

Definition at line 216 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_DtoD_sync().

00216                                                                                             {
00217   cudaCheck(cudaMemcpy(d_dst, d_src, sizeofT*array_len, cudaMemcpyDeviceToDevice));
00218 }

void copy_DtoH_async_T ( const void *  d_array,
void *  h_array,
const int  array_len,
cudaStream_t  stream,
const size_t  sizeofT 
)

Definition at line 198 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_DtoH().

00199                                  {
00200   cudaCheck(cudaMemcpyAsync(h_array, d_array, sizeofT*array_len, cudaMemcpyDeviceToHost, stream));
00201 }

void copy_DtoH_T ( const void *  d_array,
void *  h_array,
const int  array_len,
const size_t  sizeofT 
)

Definition at line 203 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_DtoH_sync().

00203                                                                                                 {
00204   cudaCheck(cudaMemcpy(h_array, d_array, sizeofT*array_len, cudaMemcpyDeviceToHost));
00205 }

void copy_HtoD_async_T ( const void *  h_array,
void *  d_array,
int  array_len,
cudaStream_t  stream,
const size_t  sizeofT 
)

Definition at line 184 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_HtoD().

00185                                  {
00186   cudaCheck(cudaMemcpyAsync(d_array, h_array, sizeofT*array_len, cudaMemcpyHostToDevice, stream));
00187 }

void copy_HtoD_T ( const void *  h_array,
void *  d_array,
int  array_len,
const size_t  sizeofT 
)

Definition at line 189 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_HtoD_sync().

00190                            {
00191   cudaCheck(cudaMemcpy(d_array, h_array, sizeofT*array_len, cudaMemcpyHostToDevice));
00192 }

void copy_PeerDtoD_async_T ( const int  src_dev,
const int  dst_dev,
const void *  d_src,
void *  d_dst,
const int  array_len,
cudaStream_t  stream,
const size_t  sizeofT 
)

Definition at line 224 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_PeerDtoD().

00226                         {
00227   cudaCheck(cudaMemcpyPeerAsync(d_dst, dst_dev, d_src, src_dev, sizeofT*array_len, stream));
00228 }

void cuda_affinity_initialize (  ) 

Definition at line 37 of file CudaUtils.C.

Referenced by BackEnd::init().

00037                                 {
00038   // called before Converse/Charm++ initialization so can't use Cmi/Ck utilities
00039   int devcnt = 0;
00040   cudaError_t err = cudaGetDeviceCount(&devcnt);
00041   if ( devcnt == 1 ) {  // only one device so it must be ours
00042     int *dummy;
00043     if ( err == cudaSuccess ) err = cudaSetDevice(0);
00044     if ( err == cudaSuccess ) err = cudaSetDeviceFlags(cudaDeviceMapHost);
00045     if ( err == cudaSuccess ) err = cudaMalloc(&dummy, 4);
00046   }
00047   if ( err != cudaSuccess ) {
00048 #ifdef NOHOSTNAME
00049     fprintf(stderr,"CUDA initialization error: %s\n", cudaGetErrorString(err));
00050 #else
00051     char host[128];
00052     gethostname(host, 128);  host[127] = 0;
00053     fprintf(stderr,"CUDA initialization error on %s: %s\n", host, cudaGetErrorString(err));
00054 #endif
00055   }
00056 }

void cudaDie ( const char *  msg,
cudaError_t  err 
)

Definition at line 9 of file CudaUtils.C.

References NAMD_die().

Referenced by DeviceCUDA::initialize(), and read_CUDA_ARCH().

00009                                                {
00010   char host[128];
00011 #ifdef NOHOSTNAME
00012   sprintf(host,"physical node %d", CmiPhysicalNodeID(CkMyPe()));
00013 #else
00014   gethostname(host, 128);  host[127] = 0;
00015 #endif
00016   char devstr[128] = "";
00017   int devnum;
00018   if ( cudaGetDevice(&devnum) == cudaSuccess ) {
00019     sprintf(devstr, " device %d", devnum);
00020   }
00021   cudaDeviceProp deviceProp;
00022   if ( cudaGetDeviceProperties(&deviceProp, devnum) == cudaSuccess ) {
00023     sprintf(devstr, " device %d pci %x:%x:%x", devnum,
00024       deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
00025   }
00026   char errmsg[1024];
00027   if (err == cudaSuccess) {
00028     sprintf(errmsg,"CUDA error %s on Pe %d (%s%s)", msg, CkMyPe(), host, devstr);
00029   } else {
00030     sprintf(errmsg,"CUDA error %s on Pe %d (%s%s): %s", msg, CkMyPe(), host, devstr, cudaGetErrorString(err));    
00031   }
00032   NAMD_die(errmsg);
00033 }

void cudaNAMD_bug ( const char *  msg  ) 

Definition at line 35 of file CudaUtils.C.

References NAMD_bug().

Referenced by CudaFFTCompute::backward(), CudaFFTCompute::forward(), gather_force(), and spread_charge().

00035 {NAMD_bug(msg);}

void deallocate_device_T ( void **  pp  ) 

Definition at line 93 of file CudaUtils.C.

References cudaCheck.

Referenced by deallocate_device().

00093                                     {
00094   
00095   if (*pp != NULL) {
00096     cudaCheck(cudaFree((void *)(*pp)));
00097     *pp = NULL;
00098   }
00099 
00100 }

void deallocate_host_T ( void **  pp  ) 

Definition at line 107 of file CudaUtils.C.

References cudaCheck.

Referenced by deallocate_host().

00107                                   {
00108   
00109   if (*pp != NULL) {
00110     cudaCheck(cudaFreeHost((void *)(*pp)));
00111     *pp = NULL;
00112   }
00113 
00114 }

bool reallocate_device_T ( void **  pp,
int *  curlen,
const int  newlen,
const float  fac,
const size_t  sizeofT 
)

Definition at line 126 of file CudaUtils.C.

References cudaCheck, and f.

Referenced by reallocate_device().

00126                                                                                                           {
00127 
00128   if (*pp != NULL && *curlen < newlen) {
00129     cudaCheck(cudaFree((void *)(*pp)));
00130     *pp = NULL;
00131   }
00132 
00133   if (*pp == NULL) {
00134     if (fac > 1.0f) {
00135       *curlen = (int)(((double)(newlen))*(double)fac);
00136     } else {
00137       *curlen = newlen;
00138     }
00139     cudaCheck(cudaMalloc(pp, sizeofT*(*curlen)));
00140     return true;
00141   }
00142 
00143   return false;
00144 }

bool reallocate_host_T ( void **  pp,
int *  curlen,
const int  newlen,
const float  fac,
const unsigned int  flag,
const size_t  sizeofT 
)

Definition at line 159 of file CudaUtils.C.

References cudaCheck, and f.

Referenced by reallocate_host().

00160                                                                                        {
00161 
00162   if (*pp != NULL && *curlen < newlen) {
00163     cudaCheck(cudaFreeHost((void *)(*pp)));
00164     *pp = NULL;
00165   }
00166 
00167   if (*pp == NULL) {
00168     if (fac > 1.0f) {
00169       *curlen = (int)(((double)(newlen))*(double)fac);
00170     } else {
00171       *curlen = newlen;
00172     }
00173     cudaCheck(cudaHostAlloc(pp, sizeofT*(*curlen), flag));
00174     return true;
00175   }
00176 
00177   return false;
00178 }


Generated on Sat May 26 01:17:16 2018 for NAMD by  doxygen 1.4.7