00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include "config.h"
00024 #include "Inform.h"
00025 #include "ResizeArray.h"
00026 #include "CUDAAccel.h"
00027 #include "CUDAKernels.h"
00028 #include "WKFThreads.h"
00029
00030 CUDAAccel::CUDAAccel(void) {
00031 cudaavail = 0;
00032 numdevices = 0;
00033 int usabledevices = 0;
00034 cudapool=NULL;
00035
00036 if (getenv("VMDNOCUDA") != NULL) {
00037 msgInfo << "VMDNOCUDA environment variable is set, CUDA support disabled."
00038 << sendmsg;
00039 return;
00040 }
00041
00042 unsigned int gpumask = 0xffffffff;
00043 const char *gpumaskstr = getenv("VMDCUDADEVICEMASK");
00044 if (gpumaskstr != NULL) {
00045 unsigned int tmp;
00046 if (sscanf(gpumaskstr, "%x", &tmp) == 1) {
00047 gpumask = tmp;
00048 msgInfo << "Using GPU device mask '"
00049 << gpumaskstr << "'" << sendmsg;
00050 } else {
00051 msgInfo << "Failed to parse CUDA GPU device mask string '"
00052 << gpumaskstr << "'" << sendmsg;
00053 }
00054 }
00055
00056 #if defined(VMDCUDA)
00057 int rc = 0;
00058 if ((rc=vmd_cuda_num_devices(&numdevices)) != VMDCUDA_ERR_NONE) {
00059 numdevices = 0;
00060
00061
00062
00063
00064
00065
00066 switch (rc) {
00067 case VMDCUDA_ERR_NODEVICES:
00068 case VMDCUDA_ERR_SOMEDEVICES:
00069
00070 break;
00071
00072 #if 0
00073 case VMDCUDA_ERR_SOMEDEVICES:
00074 msgWarn << "One or more CUDA accelerators may exist but are not usable." << sendmsg;
00075 msgWarn << "Check to make sure that GPU drivers are up to date." << sendmsg;
00076 break;
00077 #endif
00078
00079 case VMDCUDA_ERR_DRVMISMATCH:
00080 msgWarn << "Detected a mismatch between CUDA runtime and GPU driver" << sendmsg;
00081 msgWarn << "Check to make sure that GPU drivers are up to date." << sendmsg;
00082
00083 break;
00084 }
00085
00086 return;
00087 }
00088
00089 if (numdevices > 0) {
00090 cudaavail = 1;
00091
00092 int i;
00093 for (i=0; i<numdevices; i++) {
00094 cudadevprops dp;
00095 memset(&dp, 0, sizeof(dp));
00096 if (!vmd_cuda_device_props(i, dp.name, sizeof(dp.name),
00097 &dp.major, &dp.minor,
00098 &dp.membytes, &dp.clockratekhz,
00099 &dp.smcount, &dp.overlap,
00100 &dp.kernelexectimeoutenabled,
00101 &dp.canmaphostmem, &dp.computemode)) {
00102 dp.deviceid=i;
00103
00104
00105
00106
00107 if (!(dp.kernelexectimeoutenabled && getenv("VMDCUDANODISPLAYGPUS")) &&
00108 (gpumask & (1 << i)) &&
00109 (dp.computemode != computeModeProhibited)) {
00110 devprops.append(dp);
00111 usabledevices++;
00112 }
00113 } else {
00114 msgWarn << " Failed to retrieve properties for CUDA accelerator " << i << sendmsg;
00115 }
00116 }
00117 }
00118 numdevices=usabledevices;
00119
00120 devpool_init();
00121 #endif
00122 }
00123
00124
00125 CUDAAccel::~CUDAAccel(void) {
00126 devpool_fini();
00127 }
00128
00129
00130 void CUDAAccel::devpool_init(void) {
00131 cudapool=NULL;
00132
00133 #if defined(VMDCUDA)
00134 if (!cudaavail || numdevices == 0 || getenv("VMDNOCUDA") != NULL)
00135 return;
00136
00137
00138 int workercount=numdevices;
00139 if (workercount > wkf_thread_numprocessors())
00140 workercount=wkf_thread_numprocessors();
00141
00142 int *devlist = new int[workercount];
00143 int i;
00144 for (i=0; i<workercount; i++) {
00145 devlist[i]=device_index(i);
00146 }
00147
00148 msgInfo << "Creating CUDA device pool and initializing hardware..." << sendmsg;
00149 cudapool=wkf_threadpool_create(workercount, devlist);
00150 delete [] devlist;
00151
00152
00153 if (getenv("VMDCUDAVERBOSE") != NULL)
00154 wkf_threadpool_launch(cudapool, vmd_cuda_devpool_setdevice, (void*)"VMD CUDA Dev Init", 1);
00155 else
00156 wkf_threadpool_launch(cudapool, vmd_cuda_devpool_setdevice, NULL, 1);
00157
00158 if (!getenv("VMDNOCUDA")) {
00159
00160 wkf_threadpool_launch(cudapool, vmd_cuda_devpool_clear_device_mem, NULL, 1);
00161 }
00162 #endif
00163 }
00164
00165 void CUDAAccel::devpool_fini(void) {
00166 if (!cudapool)
00167 return;
00168
00169 #if defined(VMDCUDA)
00170 devpool_wait();
00171 wkf_threadpool_destroy(cudapool);
00172 #endif
00173 cudapool=NULL;
00174 }
00175
00176 int CUDAAccel::devpool_launch(void *fctn(void *), void *parms, int blocking) {
00177 if (!cudapool)
00178 return -1;
00179
00180 return wkf_threadpool_launch(cudapool, fctn, parms, blocking);
00181 }
00182
00183 int CUDAAccel::devpool_wait(void) {
00184 if (!cudapool)
00185 return -1;
00186
00187 return wkf_threadpool_wait(cudapool);
00188 }
00189
00190 void CUDAAccel::print_cuda_devices(void) {
00191 if (getenv("VMDCUDANODISPLAYGPUS")) {
00192 msgInfo << "Ignoring CUDA-capable GPUs used for display" << sendmsg;
00193 }
00194
00195 if (!cudaavail || numdevices == 0) {
00196 msgInfo << "No CUDA accelerator devices available." << sendmsg;
00197 return;
00198 }
00199
00200 msgInfo << "Detected " << numdevices << " available CUDA "
00201 << ((numdevices > 1) ? "accelerators:" : "accelerator:") << sendmsg;
00202 int i;
00203 for (i=0; i<numdevices; i++) {
00204 char outstr[1024];
00205 memset(outstr, 0, sizeof(outstr));
00206
00207
00208 sprintf(outstr, "[%d] %-18s %2d SM_%d.%d @ %.2f GHz",
00209 device_index(i), device_name(i),
00210 (device_sm_count(i) > 0) ? device_sm_count(i) : 0,
00211 device_version_major(i), device_version_minor(i),
00212 device_clock_ghz(i));
00213 msgInfo << outstr;
00214
00215
00216 int gpumemmb = (device_membytes(i) / (1024 * 1024));
00217 if (gpumemmb < 1000)
00218 sprintf(outstr, ", %4dMB RAM", gpumemmb);
00219 else if (gpumemmb < 10240)
00220 sprintf(outstr, ", %.1fGB RAM", gpumemmb / 1024.0);
00221 else
00222 sprintf(outstr, ", %dGB RAM", gpumemmb / 1024);
00223
00224 msgInfo << outstr;
00225
00226
00227 if (device_computemode(i) == computeModeProhibited) {
00228 msgInfo << ", Compute Mode: Prohibited";
00229 } else {
00230 if (device_kerneltimeoutenabled(i))
00231 msgInfo << ", KTO";
00232
00233 if (device_overlap(i))
00234 msgInfo << ", OIO";
00235
00236 if (device_canmaphostmem(i))
00237 msgInfo << ", ZCP";
00238 }
00239
00240 msgInfo << sendmsg;
00241 }
00242 }
00243
00244 int CUDAAccel::num_devices(void) {
00245 return numdevices;
00246 }
00247
00248 int CUDAAccel::device_index(int dev) {
00249 return devprops[dev].deviceid;
00250 }
00251
00252 const char *CUDAAccel::device_name(int dev) {
00253 if (!cudaavail || dev < 0 || dev >= numdevices)
00254 return NULL;
00255 return devprops[dev].name;
00256 }
00257
00258 int CUDAAccel::device_version_major(int dev) {
00259 if (!cudaavail || dev < 0 || dev >= numdevices)
00260 return 0;
00261 return devprops[dev].major;
00262 }
00263
00264 int CUDAAccel::device_version_minor(int dev) {
00265 if (!cudaavail || dev < 0 || dev >= numdevices)
00266 return 0;
00267 return devprops[dev].minor;
00268 }
00269
00270 unsigned long CUDAAccel::device_membytes(int dev) {
00271 if (!cudaavail || dev < 0 || dev >= numdevices)
00272 return 0;
00273 return devprops[dev].membytes;
00274 }
00275
00276 float CUDAAccel::device_clock_ghz(int dev) {
00277 if (!cudaavail || dev < 0 || dev >= numdevices)
00278 return 0;
00279 return (float) (devprops[dev].clockratekhz / 1000000.0);
00280 }
00281
00282 int CUDAAccel::device_sm_count(int dev) {
00283 if (!cudaavail || dev < 0 || dev >= numdevices)
00284 return -1;
00285 return devprops[dev].smcount;
00286 }
00287
00288 int CUDAAccel::device_overlap(int dev) {
00289 if (!cudaavail || dev < 0 || dev >= numdevices)
00290 return -1;
00291 return devprops[dev].overlap;
00292 }
00293
00294 int CUDAAccel::device_kerneltimeoutenabled(int dev) {
00295 if (!cudaavail || dev < 0 || dev >= numdevices)
00296 return -1;
00297 return devprops[dev].kernelexectimeoutenabled;
00298 }
00299
00300 int CUDAAccel::device_canmaphostmem(int dev) {
00301 if (!cudaavail || dev < 0 || dev >= numdevices)
00302 return -1;
00303 return devprops[dev].canmaphostmem;
00304 }
00305
00306 int CUDAAccel::device_computemode(int dev) {
00307 if (!cudaavail || dev < 0 || dev >= numdevices)
00308 return -1;
00309 return devprops[dev].computemode;
00310 }
00311
00312