CUDAWrapNVML.cu Source File

00001 
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include "CUDAWrapNVML.h"
00024 #include "cuda_runtime.h"
00025 
00026 /*
00027  * Wrappers to emulate dlopen() on other systems like Windows
00028  */
00029 #if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
00030 #include <windows.h>
00031 static void *wrap_dlopen(const char *filename) {
00032   return (void *)LoadLibrary(filename);
00033 }
00034 static void *wrap_dlsym(void *h, const char *sym) {
00035   return (void *)GetProcAddress((HINSTANCE)h, sym);
00036 }
00037 static int wrap_dlclose(void *h) {
00038   /* FreeLibrary returns nonzero on success */
00039   return (!FreeLibrary((HINSTANCE)h));
00040 }
00041 #else
00042 /* assume we can use dlopen itself... */
00043 #include <dlfcn.h>
00044 static void *wrap_dlopen(const char *filename) {
00045   return dlopen(filename, RTLD_NOW);
00046 }
00047 static void *wrap_dlsym(void *h, const char *sym) {
00048   return dlsym(h, sym);
00049 }
00050 static int wrap_dlclose(void *h) {
00051   return dlclose(h);
00052 }
00053 #endif
00054 
00055 #if defined(__cplusplus)
00056 extern "C" {
00057 #endif
00058 
00059 wrap_nvml_handle * wrap_nvml_create() {
00060   int i=0;
00061   wrap_nvml_handle *nvmlh = NULL;
00062 
00063   /* 
00064    * We use hard-coded library installation locations for the time being...
00065    * No idea where or if libnvidia-ml.so is installed on MacOS X, a 
00066    * deep scouring of the filesystem on one of the Mac CUDA build boxes
00067    * I used turned up nothing, so for now it's not going to work on OSX.
00068    */
00069 #if defined(_WIN64)
00070   /* 64-bit Windows */
00071   const char *plibnvidia_ml[] = {"c:/Program Files/NVIDIA Corporation/NVSMI/nvml.dll", NULL};
00072 #elif defined(_WIN32) || defined(_MSC_VER)
00073   /* 32-bit Windows */
00074   const char *plibnvidia_ml[] = {"c:/Program Files (x86)/NVIDIA Corporation/NVSMI/nvml.dll", NULL};
00075 #elif defined(__linux) && (defined(__i386__) || defined(__ARM_ARCH_7A__))
00076   /* 32-bit linux assumed */
00077   const char *plibnvidia_ml[] = {"/usr/lib/libnvidia-ml.so", 
00078                                  "/usr/lib/libnvidia-ml.so.1", 
00079                                  "/usr/lib/x86-linux-gnu/libnvidia-ml.so",
00080                                  "/usr/lib/x86-linux-gnu/libnvidia-ml.so.1",
00081                                  NULL};
00082 #elif defined(__linux)
00083   /* 64-bit linux assumed */
00084   const char *plibnvidia_ml[] = {"/usr/lib64/libnvidia-ml.so", 
00085                                  "/usr/lib64/libnvidia-ml.so.1",
00086                                  "/usr/lib/aarch64-linux-gnu/libnvidia-ml.so",
00087                                  "/usr/lib/aarch64-linux-gnu/libnvidia-ml.so.1",
00088                                  "/usr/lib/x86_64-linux-gnu/libnvidia-ml.so",
00089                                  "/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1",
00090                                  NULL};
00091 #else
00092   //#error "Unrecognized platform: need NVML DLL path for this platform..."
00093   const char *plibnvidia_ml[] = {NULL};
00094 #endif
00095 
00096 
00097   void *nvml_dll = NULL;
00098   const char *libnvidia_ml = NULL;
00099 
00100   // allow explicit user override of path if needed
00101   if (getenv("LIBNVMLPATH") != NULL)
00102     nvml_dll = wrap_dlopen(getenv("LIBNVMLPATH"));
00103 
00104   int sopathidx = 0;
00105   libnvidia_ml = plibnvidia_ml[sopathidx];
00106   while ((nvml_dll == NULL) && (libnvidia_ml != NULL)) {
00107     nvml_dll = wrap_dlopen(libnvidia_ml);
00108     sopathidx++;
00109     libnvidia_ml = plibnvidia_ml[sopathidx];
00110   }
00111   if (nvml_dll == NULL)
00112     return NULL;
00113 
00114   nvmlh = (wrap_nvml_handle *) calloc(1, sizeof(wrap_nvml_handle));
00115 
00116   nvmlh->nvml_dll = nvml_dll;  
00117 
00118   nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void)) 
00119     wrap_dlsym(nvmlh->nvml_dll, "nvmlInit");
00120 
00121   nvmlh->nvmlDeviceGetCount = (wrap_nvmlReturn_t (*)(int *)) 
00122     wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2");
00123 
00124   nvmlh->nvmlDeviceGetHandleByIndex = (wrap_nvmlReturn_t (*)(int, wrap_nvmlDevice_t *)) 
00125     wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetHandleByIndex_v2");
00126 
00127   nvmlh->nvmlDeviceGetPciInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlPciInfo_t *)) 
00128     wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo");
00129 
00130   nvmlh->nvmlDeviceGetName = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, char *, int))
00131     wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetName");
00132 
00133   nvmlh->nvmlDeviceGetTemperature = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int, unsigned int *))
00134     wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetTemperature");
00135 
00136   nvmlh->nvmlDeviceGetFanSpeed = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *))
00137     wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetFanSpeed");
00138 
00139   nvmlh->nvmlDeviceGetPowerUsage = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *))
00140     wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage");
00141 
00142   nvmlh->nvmlDeviceGetCpuAffinity = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int, unsigned long *))
00143     wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCpuAffinity");
00144 
00145   nvmlh->nvmlDeviceSetCpuAffinity = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t))
00146     wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetCpuAffinity");
00147 
00148   nvmlh->nvmlShutdown = (wrap_nvmlReturn_t (*)()) 
00149     wrap_dlsym(nvmlh->nvml_dll, "nvmlShutdown");
00150 
00151   if (nvmlh->nvmlInit == NULL || 
00152       nvmlh->nvmlShutdown == NULL ||
00153       nvmlh->nvmlDeviceGetCount == NULL ||
00154       nvmlh->nvmlDeviceGetHandleByIndex == NULL || 
00155       nvmlh->nvmlDeviceGetPciInfo == NULL ||
00156       nvmlh->nvmlDeviceGetName == NULL ||
00157       nvmlh->nvmlDeviceGetTemperature == NULL ||
00158       nvmlh->nvmlDeviceGetFanSpeed == NULL ||
00159       nvmlh->nvmlDeviceGetPowerUsage == NULL ||
00160       nvmlh->nvmlDeviceGetCpuAffinity == NULL ||
00161       nvmlh->nvmlDeviceSetCpuAffinity == NULL
00162       ) {
00163 #if 0
00164     printf("Failed to obtain all required NVML function pointers\n");
00165 #endif
00166     wrap_dlclose(nvmlh->nvml_dll);
00167     free(nvmlh);
00168     return NULL;
00169   }
00170 
00171   nvmlh->nvmlInit();
00172   nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount);
00173 
00174   /* Query CUDA device count, in case it doesn't agree with NVML, since  */
00175   /* CUDA will only report GPUs with compute capability greater than 1.0 */ 
00176   if (cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess) {
00177 #if 0
00178     printf("Failed to query CUDA device count!\n");
00179 #endif
00180     wrap_dlclose(nvmlh->nvml_dll);
00181     free(nvmlh);
00182     return NULL;
00183   }
00184 
00185   nvmlh->devs = (wrap_nvmlDevice_t *) calloc(nvmlh->nvml_gpucount, sizeof(wrap_nvmlDevice_t));
00186   nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
00187   nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
00188   nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
00189   nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int));
00190   nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int));
00191 
00192   /* Obtain GPU device handles we're going to need repeatedly... */
00193   for (i=0; i<nvmlh->nvml_gpucount; i++) {
00194     nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]);
00195   } 
00196 
00197   /* Query PCI info for each NVML device, and build table for mapping of */
00198   /* CUDA device IDs to NVML device IDs and vice versa                   */
00199   for (i=0; i<nvmlh->nvml_gpucount; i++) {
00200     wrap_nvmlPciInfo_t pciinfo;
00201     nvmlh->nvmlDeviceGetPciInfo(nvmlh->devs[i], &pciinfo);
00202     nvmlh->nvml_pci_domain_id[i] = pciinfo.domain;
00203     nvmlh->nvml_pci_bus_id[i]    = pciinfo.bus;
00204     nvmlh->nvml_pci_device_id[i] = pciinfo.device;
00205   }
00206 
00207   /* build mapping of NVML device IDs to CUDA IDs */
00208   for (i=0; i<nvmlh->nvml_gpucount; i++) {
00209     nvmlh->nvml_cuda_device_id[i] = -1;
00210   } 
00211   for (i=0; i<nvmlh->cuda_gpucount; i++) {
00212     cudaDeviceProp props;
00213     nvmlh->cuda_nvml_device_id[i] = -1;
00214 
00215     if (cudaGetDeviceProperties(&props, i) == cudaSuccess) {
00216       int j;
00217       for (j=0; j<nvmlh->nvml_gpucount; j++) {
00218         if ((nvmlh->nvml_pci_domain_id[j] == props.pciDomainID) &&
00219             (nvmlh->nvml_pci_bus_id[j]    == props.pciBusID) &&
00220             (nvmlh->nvml_pci_device_id[j] == props.pciDeviceID)) {
00221 #if 0
00222           printf("CUDA GPU[%d] matches NVML GPU[%d]\n", i, j);
00223 #endif
00224           nvmlh->nvml_cuda_device_id[j] = i;
00225           nvmlh->cuda_nvml_device_id[i] = j;
00226         }
00227       }
00228     }
00229   }
00230 
00231   return nvmlh;
00232 }
00233 
00234 
00235 int wrap_nvml_destroy(wrap_nvml_handle *nvmlh) {
00236   nvmlh->nvmlShutdown();
00237 
00238   if (nvmlh->nvml_pci_domain_id != NULL)
00239     free(nvmlh->nvml_pci_domain_id);
00240 
00241   if (nvmlh->nvml_pci_bus_id != NULL)
00242     free(nvmlh->nvml_pci_bus_id);
00243 
00244   if (nvmlh->nvml_pci_device_id != NULL)
00245     free(nvmlh->nvml_pci_device_id);
00246 
00247   if (nvmlh->nvml_cuda_device_id != NULL)
00248     free(nvmlh->nvml_cuda_device_id);
00249 
00250   if (nvmlh->cuda_nvml_device_id != NULL)
00251     free(nvmlh->cuda_nvml_device_id);
00252 
00253   if (nvmlh->devs != NULL)
00254     free(nvmlh->devs);
00255 
00256   wrap_dlclose(nvmlh->nvml_dll);
00257   free(nvmlh);
00258   return 0;
00259 }
00260 
00261 
00262 int wrap_nvml_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount) {
00263   *gpucount = nvmlh->nvml_gpucount;
00264   return 0; 
00265 }
00266 
00267 int wrap_cuda_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount) {
00268   *gpucount = nvmlh->cuda_gpucount;
00269   return 0; 
00270 }
00271 
00272 int wrap_nvml_get_gpu_name(wrap_nvml_handle *nvmlh,
00273                            int cudaindex, 
00274                            char *namebuf,
00275                            int bufsize) {
00276   int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
00277   if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
00278     return -1;
00279 
00280   if (nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != WRAPNVML_SUCCESS)
00281     return -1; 
00282 
00283   return 0;
00284 }
00285 
00286 
00287 int wrap_nvml_get_tempC(wrap_nvml_handle *nvmlh,
00288                         int cudaindex, unsigned int *tempC) {
00289   wrap_nvmlReturn_t rc;
00290   int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
00291   if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
00292     return -1;
00293 
00294   rc = nvmlh->nvmlDeviceGetTemperature(nvmlh->devs[gpuindex], 0u /* NVML_TEMPERATURE_GPU */, tempC);
00295   if (rc != WRAPNVML_SUCCESS) {
00296     return -1; 
00297   }
00298 
00299   return 0;
00300 }
00301 
00302 
00303 int wrap_nvml_get_fanpcnt(wrap_nvml_handle *nvmlh,
00304                           int cudaindex, unsigned int *fanpcnt) {
00305   wrap_nvmlReturn_t rc;
00306   int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
00307   if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
00308     return -1;
00309 
00310   rc = nvmlh->nvmlDeviceGetFanSpeed(nvmlh->devs[gpuindex], fanpcnt);
00311   if (rc != WRAPNVML_SUCCESS) {
00312     return -1; 
00313   }
00314 
00315   return 0;
00316 }
00317 
00318 
00319 int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh,
00320                               int cudaindex,
00321                               unsigned int *milliwatts) {
00322   int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
00323   if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
00324     return -1;
00325 
00326   if (nvmlh->nvmlDeviceGetPowerUsage(nvmlh->devs[gpuindex], milliwatts) != WRAPNVML_SUCCESS)
00327     return -1; 
00328 
00329   return 0;
00330 }
00331 
00332 
00333 int wrap_nvml_get_cpu_affinity(wrap_nvml_handle *nvmlh,
00334                                int cudaindex,
00335                                unsigned int cpuSetSize,
00336                                unsigned long *cpuSet) {
00337   int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
00338   if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
00339     return -1;
00340 
00341   if (nvmlh->nvmlDeviceGetCpuAffinity(nvmlh->devs[gpuindex], cpuSetSize, cpuSet) != WRAPNVML_SUCCESS)
00342     return -1; 
00343 
00344   return 0;
00345 }
00346 
00347 
00348 int wrap_nvml_set_cpu_affinity(wrap_nvml_handle *nvmlh,
00349                                int cudaindex) {
00350   int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
00351   if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
00352     return -1;
00353 
00354   if (nvmlh->nvmlDeviceSetCpuAffinity(nvmlh->devs[gpuindex]) != WRAPNVML_SUCCESS)
00355     return -1; 
00356   
00357   return 0;
00358 }
00359 
00360 
00361 #if defined(__cplusplus)
00362 }
00363 #endif
00364 
00365