Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

CUDAAccel.C

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr                                                                       
00003  *cr            (C) Copyright 1995-2011 The Board of Trustees of the           
00004  *cr                        University of Illinois                       
00005  *cr                         All Rights Reserved                        
00006  *cr                                                                   
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: CUDAAccel.C,v $
00013  *      $Author: johns $        $Locker:  $             $State: Exp $
00014  *      $Revision: 1.43 $       $Date: 2012/01/06 15:56:41 $
00015  *
00016  ***************************************************************************
00017  * DESCRIPTION:
00018  *   Class to store and handle enumeration and initialization of 
00019  *   CUDA GPU accelerator devices.
00020  ***************************************************************************/
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include "config.h"     // rebuild on config changes
00024 #include "Inform.h"
00025 #include "ResizeArray.h"
00026 #include "CUDAAccel.h"
00027 #include "CUDAKernels.h"
00028 #include "WKFThreads.h"
00029 
00030 CUDAAccel::CUDAAccel(void) {
00031   cudaavail = 0;
00032   numdevices = 0;
00033   int usabledevices = 0;
00034   cudapool=NULL;
00035 
00036   if (getenv("VMDNOCUDA") != NULL) {
00037     msgInfo << "VMDNOCUDA environment variable is set, CUDA support disabled."
00038             << sendmsg;
00039     return; 
00040   }
00041 
00042   unsigned int gpumask = 0xffffffff;
00043   const char *gpumaskstr = getenv("VMDCUDADEVICEMASK");
00044   if (gpumaskstr != NULL) {
00045     unsigned int tmp;
00046     if (sscanf(gpumaskstr, "%x", &tmp) == 1) {
00047       gpumask = tmp;
00048       msgInfo << "Using GPU device mask '"
00049               << gpumaskstr << "'" << sendmsg;
00050     } else {
00051       msgInfo << "Failed to parse CUDA GPU device mask string '" 
00052               << gpumaskstr << "'" << sendmsg;
00053     }
00054   }
00055 
00056 #if defined(VMDCUDA)
00057   int rc = 0;
00058   if ((rc=vmd_cuda_num_devices(&numdevices)) != VMDCUDA_ERR_NONE) {
00059     numdevices = 0;
00060 
00061     // Only emit error messages when there are CUDA GPUs on the machine
00062     // but that they can't be used for some reason
00063     // XXX turning this off for the time being, as some people have 
00064     //     NVIDIA drivers installed on machines with no NVIDIA GPU, as can
00065     //     happen with some distros that package the drivers by default.
00066     switch (rc) {
00067       case VMDCUDA_ERR_NODEVICES:
00068       case VMDCUDA_ERR_SOMEDEVICES:
00069 //        msgInfo << "No CUDA accelerator devices available." << sendmsg;
00070         break;
00071 
00072 #if 0
00073       case VMDCUDA_ERR_SOMEDEVICES:
00074         msgWarn << "One or more CUDA accelerators may exist but are not usable." << sendmsg; 
00075         msgWarn << "Check to make sure that GPU drivers are up to date." << sendmsg;
00076         break;
00077 #endif
00078 
00079       case VMDCUDA_ERR_DRVMISMATCH:
00080         msgWarn << "Detected a mismatch between CUDA runtime and GPU driver" << sendmsg; 
00081         msgWarn << "Check to make sure that GPU drivers are up to date." << sendmsg;
00082 //        msgInfo << "No CUDA accelerator devices available." << sendmsg;
00083         break;
00084     }
00085    
00086     return;
00087   }
00088 
00089   if (numdevices > 0) {
00090     cudaavail = 1;
00091 
00092     int i;
00093     for (i=0; i<numdevices; i++) {
00094       cudadevprops dp;
00095       memset(&dp, 0, sizeof(dp));
00096       if (!vmd_cuda_device_props(i, dp.name, sizeof(dp.name),
00097                                 &dp.major, &dp.minor,
00098                                 &dp.membytes, &dp.clockratekhz, 
00099                                 &dp.smcount, &dp.overlap,
00100                                 &dp.kernelexectimeoutenabled,
00101                                 &dp.canmaphostmem, &dp.computemode)) {
00102         dp.deviceid=i; // save the device index
00103 
00104         // Check that each GPU device has not been excluded by virtue of 
00105         // being used for display, by a GPU device mask, or by the CUDA
00106         // device mode being set to a "prohibited" status.
00107         if (!(dp.kernelexectimeoutenabled && getenv("VMDCUDANODISPLAYGPUS")) &&
00108             (gpumask & (1 << i)) && 
00109             (dp.computemode != computeModeProhibited)) {
00110           devprops.append(dp);
00111           usabledevices++;
00112         }
00113       } else {
00114         msgWarn << "  Failed to retrieve properties for CUDA accelerator " << i << sendmsg; 
00115       }
00116     }
00117   }
00118   numdevices=usabledevices;
00119 
00120   devpool_init();
00121 #endif
00122 }
00123 
00124 // destructor
00125 CUDAAccel::~CUDAAccel(void) {
00126   devpool_fini();
00127 }
00128 
00129 
00130 void CUDAAccel::devpool_init(void) {
00131   cudapool=NULL;
00132 
00133 #if defined(VMDCUDA)
00134   if (!cudaavail || numdevices == 0 || getenv("VMDNOCUDA") != NULL)
00135     return;
00136 
00137   // only use as many GPUs as CPU cores we're allowed to use
00138   int workercount=numdevices;
00139   if (workercount > wkf_thread_numprocessors())
00140     workercount=wkf_thread_numprocessors();
00141 
00142   int *devlist = new int[workercount];
00143   int i;
00144   for (i=0; i<workercount; i++) {
00145     devlist[i]=device_index(i);
00146   }
00147 
00148   msgInfo << "Creating CUDA device pool and initializing hardware..." << sendmsg;
00149   cudapool=wkf_threadpool_create(workercount, devlist);
00150   delete [] devlist;
00151 
00152   // associate each worker thread with a specific GPU
00153   if (getenv("VMDCUDAVERBOSE") != NULL)
00154     wkf_threadpool_launch(cudapool, vmd_cuda_devpool_setdevice, (void*)"VMD CUDA Dev Init", 1);
00155   else
00156     wkf_threadpool_launch(cudapool, vmd_cuda_devpool_setdevice, NULL, 1);
00157 
00158   if (!getenv("VMDNOCUDA")) {
00159     // clear all available device memory on each of the GPUs
00160     wkf_threadpool_launch(cudapool, vmd_cuda_devpool_clear_device_mem, NULL, 1);
00161   }
00162 #endif
00163 }
00164 
00165 void CUDAAccel::devpool_fini(void) {
00166   if (!cudapool)
00167     return;
00168 
00169 #if defined(VMDCUDA)
00170   devpool_wait();
00171   wkf_threadpool_destroy(cudapool);
00172 #endif
00173   cudapool=NULL;
00174 }
00175 
00176 int CUDAAccel::devpool_launch(void *fctn(void *), void *parms, int blocking) {
00177   if (!cudapool)
00178     return -1;
00179 
00180   return wkf_threadpool_launch(cudapool, fctn, parms, blocking); 
00181 }
00182 
00183 int CUDAAccel::devpool_wait(void) {
00184   if (!cudapool)
00185     return -1;
00186 
00187   return wkf_threadpool_wait(cudapool);
00188 }
00189 
00190 void CUDAAccel::print_cuda_devices(void) {
00191   if (getenv("VMDCUDANODISPLAYGPUS")) {
00192     msgInfo << "Ignoring CUDA-capable GPUs used for display" << sendmsg;
00193   }
00194 
00195   if (!cudaavail || numdevices == 0) {
00196     msgInfo << "No CUDA accelerator devices available." << sendmsg;
00197     return;
00198   }
00199 
00200   msgInfo << "Detected " << numdevices << " available CUDA " 
00201           << ((numdevices > 1) ? "accelerators:" : "accelerator:") << sendmsg;
00202   int i;
00203   for (i=0; i<numdevices; i++) {
00204     char outstr[1024];
00205     memset(outstr, 0, sizeof(outstr));
00206 
00207     // list primary GPU device attributes
00208     sprintf(outstr, "[%d] %-18s %2d SM_%d.%d @ %.2f GHz",
00209             device_index(i), device_name(i), 
00210             (device_sm_count(i) > 0) ? device_sm_count(i) : 0,
00211             device_version_major(i), device_version_minor(i),
00212             device_clock_ghz(i));
00213     msgInfo << outstr;
00214 
00215     // list memory capacity 
00216     int gpumemmb = (device_membytes(i) / (1024 * 1024));
00217     if (gpumemmb < 1000)
00218       sprintf(outstr, ", %4dMB RAM", gpumemmb);
00219     else if (gpumemmb < 10240)
00220       sprintf(outstr, ", %.1fGB RAM", gpumemmb / 1024.0);
00221     else 
00222       sprintf(outstr, ", %dGB RAM", gpumemmb / 1024);
00223 
00224     msgInfo << outstr;
00225 
00226     // list optional hardware features and configuration attributes here...
00227     if (device_computemode(i) == computeModeProhibited) {
00228       msgInfo << ", Compute Mode: Prohibited";
00229     } else {
00230       if (device_kerneltimeoutenabled(i))
00231         msgInfo << ", KTO";
00232 
00233       if (device_overlap(i))
00234         msgInfo << ", OIO";
00235 
00236       if (device_canmaphostmem(i))
00237         msgInfo << ", ZCP";
00238     }
00239 
00240     msgInfo << sendmsg; 
00241   } 
00242 }
00243 
00244 int CUDAAccel::num_devices(void) {
00245   return numdevices;
00246 }
00247 
00248 int CUDAAccel::device_index(int dev) {
00249   return devprops[dev].deviceid;
00250 }
00251 
00252 const char *CUDAAccel::device_name(int dev) {
00253   if (!cudaavail || dev < 0 || dev >= numdevices)
00254     return NULL;
00255   return devprops[dev].name; 
00256 }
00257 
00258 int CUDAAccel::device_version_major(int dev) {
00259   if (!cudaavail || dev < 0 || dev >= numdevices)
00260     return 0; 
00261   return devprops[dev].major;
00262 }
00263 
00264 int CUDAAccel::device_version_minor(int dev) {
00265   if (!cudaavail || dev < 0 || dev >= numdevices)
00266     return 0; 
00267   return devprops[dev].minor;
00268 }
00269 
00270 unsigned long CUDAAccel::device_membytes(int dev) {
00271   if (!cudaavail || dev < 0 || dev >= numdevices)
00272     return 0; 
00273   return devprops[dev].membytes;
00274 }
00275 
00276 float CUDAAccel::device_clock_ghz(int dev) {
00277   if (!cudaavail || dev < 0 || dev >= numdevices)
00278     return 0; 
00279   return (float) (devprops[dev].clockratekhz / 1000000.0);
00280 }
00281 
00282 int CUDAAccel::device_sm_count(int dev) {
00283   if (!cudaavail || dev < 0 || dev >= numdevices)
00284     return -1; 
00285   return devprops[dev].smcount;
00286 }
00287 
00288 int CUDAAccel::device_overlap(int dev) {
00289   if (!cudaavail || dev < 0 || dev >= numdevices)
00290     return -1; 
00291   return devprops[dev].overlap;
00292 }
00293 
00294 int CUDAAccel::device_kerneltimeoutenabled(int dev) {
00295   if (!cudaavail || dev < 0 || dev >= numdevices)
00296     return -1; 
00297   return devprops[dev].kernelexectimeoutenabled;
00298 }
00299 
00300 int CUDAAccel::device_canmaphostmem(int dev) {
00301   if (!cudaavail || dev < 0 || dev >= numdevices)
00302     return -1; 
00303   return devprops[dev].canmaphostmem;
00304 }
00305 
00306 int CUDAAccel::device_computemode(int dev) {
00307   if (!cudaavail || dev < 0 || dev >= numdevices)
00308     return -1; 
00309   return devprops[dev].computemode;
00310 }
00311 
00312 

Generated on Sat May 26 01:47:50 2012 for VMD (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002