00001
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include "CUDAWrapNVML.h"
00024 #include "cuda_runtime.h"
00025
00026
00027
00028
00029 #if defined(_MSC_VER) || defined(_WIN32) || defined(_WIN64)
00030 #include <windows.h>
00031 static void *wrap_dlopen(const char *filename) {
00032 return (void *)LoadLibrary(filename);
00033 }
00034 static void *wrap_dlsym(void *h, const char *sym) {
00035 return (void *)GetProcAddress((HINSTANCE)h, sym);
00036 }
00037 static int wrap_dlclose(void *h) {
00038
00039 return (!FreeLibrary((HINSTANCE)h));
00040 }
00041 #else
00042
00043 #include <dlfcn.h>
00044 static void *wrap_dlopen(const char *filename) {
00045 return dlopen(filename, RTLD_NOW);
00046 }
00047 static void *wrap_dlsym(void *h, const char *sym) {
00048 return dlsym(h, sym);
00049 }
00050 static int wrap_dlclose(void *h) {
00051 return dlclose(h);
00052 }
00053 #endif
00054
00055 #if defined(__cplusplus)
00056 extern "C" {
00057 #endif
00058
00059 wrap_nvml_handle * wrap_nvml_create() {
00060 int i=0;
00061 wrap_nvml_handle *nvmlh = NULL;
00062
00063
00064
00065
00066
00067
00068
00069 #if defined(_WIN64)
00070
00071 const char *plibnvidia_ml[] = {"c:/Program Files/NVIDIA Corporation/NVSMI/nvml.dll", NULL};
00072 #elif defined(_WIN32) || defined(_MSC_VER)
00073
00074 const char *plibnvidia_ml[] = {"c:/Program Files (x86)/NVIDIA Corporation/NVSMI/nvml.dll", NULL};
00075 #elif defined(__linux) && (defined(__i386__) || defined(__ARM_ARCH_7A__))
00076
00077 const char *plibnvidia_ml[] = {"/usr/lib/libnvidia-ml.so",
00078 "/usr/lib/libnvidia-ml.so.1",
00079 "/usr/lib/x86-linux-gnu/libnvidia-ml.so",
00080 "/usr/lib/x86-linux-gnu/libnvidia-ml.so.1",
00081 NULL};
00082 #elif defined(__linux)
00083
00084 const char *plibnvidia_ml[] = {"/usr/lib64/libnvidia-ml.so",
00085 "/usr/lib64/libnvidia-ml.so.1",
00086 "/usr/lib/aarch64-linux-gnu/libnvidia-ml.so",
00087 "/usr/lib/aarch64-linux-gnu/libnvidia-ml.so.1",
00088 "/usr/lib/x86_64-linux-gnu/libnvidia-ml.so",
00089 "/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1",
00090 NULL};
00091 #else
00092
00093 const char *plibnvidia_ml[] = {NULL};
00094 #endif
00095
00096
00097 void *nvml_dll = NULL;
00098 const char *libnvidia_ml = NULL;
00099
00100
00101 if (getenv("LIBNVMLPATH") != NULL)
00102 nvml_dll = wrap_dlopen(getenv("LIBNVMLPATH"));
00103
00104 int sopathidx = 0;
00105 libnvidia_ml = plibnvidia_ml[sopathidx];
00106 while ((nvml_dll == NULL) && (libnvidia_ml != NULL)) {
00107 nvml_dll = wrap_dlopen(libnvidia_ml);
00108 sopathidx++;
00109 libnvidia_ml = plibnvidia_ml[sopathidx];
00110 }
00111 if (nvml_dll == NULL)
00112 return NULL;
00113
00114 nvmlh = (wrap_nvml_handle *) calloc(1, sizeof(wrap_nvml_handle));
00115
00116 nvmlh->nvml_dll = nvml_dll;
00117
00118 nvmlh->nvmlInit = (wrap_nvmlReturn_t (*)(void))
00119 wrap_dlsym(nvmlh->nvml_dll, "nvmlInit");
00120
00121 nvmlh->nvmlDeviceGetCount = (wrap_nvmlReturn_t (*)(int *))
00122 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCount_v2");
00123
00124 nvmlh->nvmlDeviceGetHandleByIndex = (wrap_nvmlReturn_t (*)(int, wrap_nvmlDevice_t *))
00125 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetHandleByIndex_v2");
00126
00127 nvmlh->nvmlDeviceGetPciInfo = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, wrap_nvmlPciInfo_t *))
00128 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPciInfo");
00129
00130 nvmlh->nvmlDeviceGetName = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, char *, int))
00131 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetName");
00132
00133 nvmlh->nvmlDeviceGetTemperature = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, int, unsigned int *))
00134 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetTemperature");
00135
00136 nvmlh->nvmlDeviceGetFanSpeed = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *))
00137 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetFanSpeed");
00138
00139 nvmlh->nvmlDeviceGetPowerUsage = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int *))
00140 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetPowerUsage");
00141
00142 nvmlh->nvmlDeviceGetCpuAffinity = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t, unsigned int, unsigned long *))
00143 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceGetCpuAffinity");
00144
00145 nvmlh->nvmlDeviceSetCpuAffinity = (wrap_nvmlReturn_t (*)(wrap_nvmlDevice_t))
00146 wrap_dlsym(nvmlh->nvml_dll, "nvmlDeviceSetCpuAffinity");
00147
00148 nvmlh->nvmlShutdown = (wrap_nvmlReturn_t (*)())
00149 wrap_dlsym(nvmlh->nvml_dll, "nvmlShutdown");
00150
00151 if (nvmlh->nvmlInit == NULL ||
00152 nvmlh->nvmlShutdown == NULL ||
00153 nvmlh->nvmlDeviceGetCount == NULL ||
00154 nvmlh->nvmlDeviceGetHandleByIndex == NULL ||
00155 nvmlh->nvmlDeviceGetPciInfo == NULL ||
00156 nvmlh->nvmlDeviceGetName == NULL ||
00157 nvmlh->nvmlDeviceGetTemperature == NULL ||
00158 nvmlh->nvmlDeviceGetFanSpeed == NULL ||
00159 nvmlh->nvmlDeviceGetPowerUsage == NULL ||
00160 nvmlh->nvmlDeviceGetCpuAffinity == NULL ||
00161 nvmlh->nvmlDeviceSetCpuAffinity == NULL
00162 ) {
00163 #if 0
00164 printf("Failed to obtain all required NVML function pointers\n");
00165 #endif
00166 wrap_dlclose(nvmlh->nvml_dll);
00167 free(nvmlh);
00168 return NULL;
00169 }
00170
00171 nvmlh->nvmlInit();
00172 nvmlh->nvmlDeviceGetCount(&nvmlh->nvml_gpucount);
00173
00174
00175
00176 if (cudaGetDeviceCount(&nvmlh->cuda_gpucount) != cudaSuccess) {
00177 #if 0
00178 printf("Failed to query CUDA device count!\n");
00179 #endif
00180 wrap_dlclose(nvmlh->nvml_dll);
00181 free(nvmlh);
00182 return NULL;
00183 }
00184
00185 nvmlh->devs = (wrap_nvmlDevice_t *) calloc(nvmlh->nvml_gpucount, sizeof(wrap_nvmlDevice_t));
00186 nvmlh->nvml_pci_domain_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
00187 nvmlh->nvml_pci_bus_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
00188 nvmlh->nvml_pci_device_id = (unsigned int*) calloc(nvmlh->nvml_gpucount, sizeof(unsigned int));
00189 nvmlh->nvml_cuda_device_id = (int*) calloc(nvmlh->nvml_gpucount, sizeof(int));
00190 nvmlh->cuda_nvml_device_id = (int*) calloc(nvmlh->cuda_gpucount, sizeof(int));
00191
00192
00193 for (i=0; i<nvmlh->nvml_gpucount; i++) {
00194 nvmlh->nvmlDeviceGetHandleByIndex(i, &nvmlh->devs[i]);
00195 }
00196
00197
00198
00199 for (i=0; i<nvmlh->nvml_gpucount; i++) {
00200 wrap_nvmlPciInfo_t pciinfo;
00201 nvmlh->nvmlDeviceGetPciInfo(nvmlh->devs[i], &pciinfo);
00202 nvmlh->nvml_pci_domain_id[i] = pciinfo.domain;
00203 nvmlh->nvml_pci_bus_id[i] = pciinfo.bus;
00204 nvmlh->nvml_pci_device_id[i] = pciinfo.device;
00205 }
00206
00207
00208 for (i=0; i<nvmlh->nvml_gpucount; i++) {
00209 nvmlh->nvml_cuda_device_id[i] = -1;
00210 }
00211 for (i=0; i<nvmlh->cuda_gpucount; i++) {
00212 cudaDeviceProp props;
00213 nvmlh->cuda_nvml_device_id[i] = -1;
00214
00215 if (cudaGetDeviceProperties(&props, i) == cudaSuccess) {
00216 int j;
00217 for (j=0; j<nvmlh->nvml_gpucount; j++) {
00218 if ((nvmlh->nvml_pci_domain_id[j] == props.pciDomainID) &&
00219 (nvmlh->nvml_pci_bus_id[j] == props.pciBusID) &&
00220 (nvmlh->nvml_pci_device_id[j] == props.pciDeviceID)) {
00221 #if 0
00222 printf("CUDA GPU[%d] matches NVML GPU[%d]\n", i, j);
00223 #endif
00224 nvmlh->nvml_cuda_device_id[j] = i;
00225 nvmlh->cuda_nvml_device_id[i] = j;
00226 }
00227 }
00228 }
00229 }
00230
00231 return nvmlh;
00232 }
00233
00234
00235 int wrap_nvml_destroy(wrap_nvml_handle *nvmlh) {
00236 nvmlh->nvmlShutdown();
00237
00238 if (nvmlh->nvml_pci_domain_id != NULL)
00239 free(nvmlh->nvml_pci_domain_id);
00240
00241 if (nvmlh->nvml_pci_bus_id != NULL)
00242 free(nvmlh->nvml_pci_bus_id);
00243
00244 if (nvmlh->nvml_pci_device_id != NULL)
00245 free(nvmlh->nvml_pci_device_id);
00246
00247 if (nvmlh->nvml_cuda_device_id != NULL)
00248 free(nvmlh->nvml_cuda_device_id);
00249
00250 if (nvmlh->cuda_nvml_device_id != NULL)
00251 free(nvmlh->cuda_nvml_device_id);
00252
00253 if (nvmlh->devs != NULL)
00254 free(nvmlh->devs);
00255
00256 wrap_dlclose(nvmlh->nvml_dll);
00257 free(nvmlh);
00258 return 0;
00259 }
00260
00261
00262 int wrap_nvml_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount) {
00263 *gpucount = nvmlh->nvml_gpucount;
00264 return 0;
00265 }
00266
00267 int wrap_cuda_get_gpucount(wrap_nvml_handle *nvmlh, int *gpucount) {
00268 *gpucount = nvmlh->cuda_gpucount;
00269 return 0;
00270 }
00271
00272 int wrap_nvml_get_gpu_name(wrap_nvml_handle *nvmlh,
00273 int cudaindex,
00274 char *namebuf,
00275 int bufsize) {
00276 int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
00277 if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
00278 return -1;
00279
00280 if (nvmlh->nvmlDeviceGetName(nvmlh->devs[gpuindex], namebuf, bufsize) != WRAPNVML_SUCCESS)
00281 return -1;
00282
00283 return 0;
00284 }
00285
00286
00287 int wrap_nvml_get_tempC(wrap_nvml_handle *nvmlh,
00288 int cudaindex, unsigned int *tempC) {
00289 wrap_nvmlReturn_t rc;
00290 int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
00291 if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
00292 return -1;
00293
00294 rc = nvmlh->nvmlDeviceGetTemperature(nvmlh->devs[gpuindex], 0u , tempC);
00295 if (rc != WRAPNVML_SUCCESS) {
00296 return -1;
00297 }
00298
00299 return 0;
00300 }
00301
00302
00303 int wrap_nvml_get_fanpcnt(wrap_nvml_handle *nvmlh,
00304 int cudaindex, unsigned int *fanpcnt) {
00305 wrap_nvmlReturn_t rc;
00306 int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
00307 if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
00308 return -1;
00309
00310 rc = nvmlh->nvmlDeviceGetFanSpeed(nvmlh->devs[gpuindex], fanpcnt);
00311 if (rc != WRAPNVML_SUCCESS) {
00312 return -1;
00313 }
00314
00315 return 0;
00316 }
00317
00318
00319 int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh,
00320 int cudaindex,
00321 unsigned int *milliwatts) {
00322 int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
00323 if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
00324 return -1;
00325
00326 if (nvmlh->nvmlDeviceGetPowerUsage(nvmlh->devs[gpuindex], milliwatts) != WRAPNVML_SUCCESS)
00327 return -1;
00328
00329 return 0;
00330 }
00331
00332
00333 int wrap_nvml_get_cpu_affinity(wrap_nvml_handle *nvmlh,
00334 int cudaindex,
00335 unsigned int cpuSetSize,
00336 unsigned long *cpuSet) {
00337 int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
00338 if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
00339 return -1;
00340
00341 if (nvmlh->nvmlDeviceGetCpuAffinity(nvmlh->devs[gpuindex], cpuSetSize, cpuSet) != WRAPNVML_SUCCESS)
00342 return -1;
00343
00344 return 0;
00345 }
00346
00347
00348 int wrap_nvml_set_cpu_affinity(wrap_nvml_handle *nvmlh,
00349 int cudaindex) {
00350 int gpuindex = nvmlh->cuda_nvml_device_id[cudaindex];
00351 if (gpuindex < 0 || gpuindex >= nvmlh->nvml_gpucount)
00352 return -1;
00353
00354 if (nvmlh->nvmlDeviceSetCpuAffinity(nvmlh->devs[gpuindex]) != WRAPNVML_SUCCESS)
00355 return -1;
00356
00357 return 0;
00358 }
00359
00360
00361 #if defined(__cplusplus)
00362 }
00363 #endif
00364
00365