Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

CUDAUtil.cu

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2011 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: CUDAUtil.cu,v $
00013  *      $Author: johns $        $Locker:  $             $State: Exp $
00014  *      $Revision: 1.36 $        $Date: 2011/03/09 20:07:55 $
00015  *
00016  ***************************************************************************
00017  * DESCRIPTION:
00018  *   CUDA API wrapper for use by the CUDAAccel C++ class 
00019  ***************************************************************************/
00020 #include <string.h>
00021 #include <stdio.h>
00022 #include "CUDAKernels.h"
00023 #include "WKFThreads.h"
00024 
00025 #if defined(__cplusplus)
00026 extern "C" {
00027 #endif
00028 
00029 // report true if the driver is compatible with the runtime
00030 static int vmd_cuda_drv_runtime_compatible() {
00031 #if CUDART_VERSION >= 2020
00032   int cuda_driver_version=-1;
00033   int cuda_runtime_version=0;
00034 
00035   cudaDriverGetVersion(&cuda_driver_version);
00036   cudaRuntimeGetVersion(&cuda_runtime_version);
00037 
00038 #if 0
00039   printf("CUDA driver version: %d\n", cuda_driver_version);
00040   printf("CUDA runtime version: %d\n", cuda_runtime_version);
00041 #endif
00042 
00043   if (cuda_driver_version == 0) 
00044     return VMDCUDA_ERR_NODEVICES;
00045 
00046   if (cuda_driver_version < cuda_runtime_version)
00047     return VMDCUDA_ERR_DRVMISMATCH;
00048 #endif  
00049 
00050   return VMDCUDA_ERR_NONE;
00051 }
00052 
00053 
00054 int vmd_cuda_device_props(int dev, char *name, int namelen,
00055                           int *devmajor, int *devminor, 
00056                           unsigned long *memb, int *clockratekhz,
00057                           int *smcount, int *overlap, int *kerneltimeout,
00058                           int *canmaphostmem, int *computemode) {
00059   cudaError_t rc;
00060   cudaDeviceProp deviceProp;
00061 
00062   int vercheck;
00063   if ((vercheck = vmd_cuda_drv_runtime_compatible()) != VMDCUDA_ERR_NONE) {
00064     return vercheck;
00065   }
00066 
00067   memset(&deviceProp, 0, sizeof(cudaDeviceProp));
00068   if ((rc=cudaGetDeviceProperties(&deviceProp, dev)) != cudaSuccess) {
00069     // printf("error: %s\n", cudaGetErrorString(rc));
00070     if (rc == cudaErrorNotYetImplemented)
00071       return VMDCUDA_ERR_EMUDEVICE;
00072     return VMDCUDA_ERR_GENERAL;
00073   }
00074 
00075   if (name)
00076     strncpy(name, deviceProp.name, namelen);
00077   if (devmajor)
00078     *devmajor = deviceProp.major;
00079   if (devminor)
00080     *devminor = deviceProp.minor;
00081   if (memb)
00082     *memb = deviceProp.totalGlobalMem;
00083   if (clockratekhz)
00084     *clockratekhz = deviceProp.clockRate;
00085 #if CUDART_VERSION >= 2000
00086   if (smcount)
00087     *smcount = deviceProp.multiProcessorCount;
00088   if (overlap)
00089     *overlap = (deviceProp.deviceOverlap != 0);
00090 #else
00091   if (smcount)
00092     *smcount = -1;
00093   if (overlap)
00094     *overlap = 0; 
00095 #endif
00096 #if CUDART_VERSION >= 2010
00097   if (kerneltimeout)
00098     *kerneltimeout = (deviceProp.kernelExecTimeoutEnabled != 0);
00099 #else
00100   if (kerneltimeout)
00101     *kerneltimeout = 0;
00102 #endif
00103 #if CUDART_VERSION >= 2020
00104   if (canmaphostmem)
00105     *canmaphostmem = (deviceProp.canMapHostMemory != 0);
00106   if (computemode)
00107     *computemode = deviceProp.computeMode;
00108 #else
00109   if (canmaphostmem)
00110     *canmaphostmem = 0;
00111   if (computemode)
00112     *computemode = VMDCUDA_COMPUTEMODE_DEFAULT;
00113 #endif
00114   return VMDCUDA_ERR_NONE;
00115 }
00116 
00117 
00118 int vmd_cuda_num_devices(int *numdev) {
00119   int i;
00120   int devcount=0;
00121   int usabledevs=0;
00122   *numdev = 0;
00123 
00124   int vercheck;
00125   if ((vercheck = vmd_cuda_drv_runtime_compatible()) != VMDCUDA_ERR_NONE) {
00126     return vercheck;
00127   }
00128 
00129   if (cudaGetDeviceCount(&devcount) != cudaSuccess) {
00130     return VMDCUDA_ERR_NODEVICES;
00131   }
00132 
00133   // Do a sanity check in case we get complete gibberish back,
00134   // but no error. This can occur if we have either a driver or 
00135   // CUDA runtime that's badly mismatched.
00136   if (devcount > 100 || devcount < 0)
00137     return VMDCUDA_ERR_DRVMISMATCH;
00138 
00139   // disregard emulation mode as unusable for our purposes
00140   for (i=0; i<devcount; i++) {
00141     int devmajor, devminor, rc;
00142 
00143     rc = vmd_cuda_device_props(i, NULL, 0, &devmajor, &devminor, 
00144                                NULL, NULL, NULL, NULL, NULL, NULL, NULL);
00145 
00146     if (rc == VMDCUDA_ERR_NONE) {
00147       // Check for emulation mode devices, and ignore if found
00148       if (((devmajor >= 1) && (devminor >= 0)) &&
00149           ((devmajor != 9999) && (devminor != 9999))) {
00150         usabledevs++;
00151       }
00152     } else if (rc != VMDCUDA_ERR_EMUDEVICE) {
00153       return VMDCUDA_ERR_SOMEDEVICES;
00154     }
00155   } 
00156 
00157   *numdev = usabledevs;
00158 
00159   return VMDCUDA_ERR_NONE;
00160 }
00161 
00162 
00163 void * vmd_cuda_devpool_setdevice(void * voidparms) {
00164   int count, id, dev;
00165   cudaDeviceProp deviceProp;
00166   char *mesg;
00167   char *d_membuf;
00168   cudaError_t err;
00169 
00170   wkf_threadpool_worker_getid(voidparms, &id, &count);
00171   wkf_threadpool_worker_getdata(voidparms, (void **) &mesg);
00172   wkf_threadpool_worker_getdevid(voidparms, &dev);
00173 
00174   /* set active device */
00175   cudaSetDevice(dev);
00176 
00177 #if CUDART_VERSION >= 2000
00178   /* Query SM count and clock rate, and compute a speed scaling value */
00179   /* the current code uses a GeForce GTX 280 / Tesla C1060 as the     */
00180   /* "1.0" reference value, with 30 SMs, and a 1.3 GHz clock rate     */ 
00181   memset(&deviceProp, 0, sizeof(cudaDeviceProp));
00182   if (cudaGetDeviceProperties(&deviceProp, dev) == cudaSuccess) {
00183     float smscale = ((float) deviceProp.multiProcessorCount) / 30.0f;
00184     double clockscale = ((double) deviceProp.clockRate) / 1295000.0;
00185     float speedscale = smscale * ((float) clockscale);
00186 
00187 #if 0
00188     printf("clock rate: %lf\n", (double) deviceProp.clockRate);
00189     printf("scale: %.4f smscale: %.4f clockscale: %.4f\n", 
00190            speedscale, smscale, clockscale);  
00191 #endif
00192 
00193 #if CUDART_VERSION >= 2030
00194     if (deviceProp.canMapHostMemory != 0) {
00195 #if 0
00196       printf("Enabled mapped host memory on device[%d]\n", dev);
00197 #endif
00198 
00199       /* 
00200        * set blocking/yielding API behavior and enable mapped host memory
00201        * If this fails, then either we've got a problematic device, or 
00202        * we have already set the device flags within this thread (shouldn't
00203        * ever happen), or the device we're accessing doesn't actually support
00204        * mapped host memory (shouldn't ever happen since we check for that).
00205        */
00206       err = cudaSetDeviceFlags(cudaDeviceScheduleAuto | cudaDeviceMapHost);
00207       if (err != cudaSuccess) {
00208         printf("Warning) thread[%d] can't set GPU[%d] device flags\n", id, dev);
00209         printf("Warning) CUDA error: %s\n", cudaGetErrorString(err)); 
00210       }
00211     }
00212 #endif
00213 
00214     wkf_threadpool_worker_setdevspeed(voidparms, speedscale);
00215 
00216     /* 
00217      * Do a small 1MB device memory allocation to ensure that our context
00218      * has actually been initialized by the time we return.
00219      * If this tiny allocation fails, then something is seriously wrong
00220      * and we should mark this device as unusable for the rest of 
00221      * this VMD session.
00222      */
00223     if ((err = cudaMalloc((void **) &d_membuf, 1*1024*1024)) == cudaSuccess) {
00224       cudaFree(d_membuf); 
00225     } else {
00226       printf("Warning) thread[%d] can't init GPU[%d] found by device query\n", id, dev); 
00227       printf("Warning) CUDA error: %s\n", cudaGetErrorString(err));
00228       /* 
00229        * XXX we should mark the device unusable here so that no other code
00230        *     touchies it, but have no mechanism for doing that yet...
00231        */
00232     }
00233   }
00234 #endif
00235 
00236   if (mesg != NULL)
00237     printf("devpool thread[%d / %d], device %d message: '%s'\n", id, count, dev, mesg);
00238 
00239   return NULL;
00240 }
00241 
00242 
00243 
00244 #if defined(__cplusplus)
00245 }
00246 #endif
00247 
00248 

Generated on Sat May 26 01:47:51 2012 for VMD (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002