00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include <string.h>
00021 #include <stdio.h>
00022 #include "CUDAKernels.h"
00023 #include "WKFThreads.h"
00024
00025 #if defined(__cplusplus)
00026 extern "C" {
00027 #endif
00028
00029
00030 static int vmd_cuda_drv_runtime_compatible() {
00031 #if CUDART_VERSION >= 2020
00032 int cuda_driver_version=-1;
00033 int cuda_runtime_version=0;
00034
00035 cudaDriverGetVersion(&cuda_driver_version);
00036 cudaRuntimeGetVersion(&cuda_runtime_version);
00037
00038 #if 0
00039 printf("CUDA driver version: %d\n", cuda_driver_version);
00040 printf("CUDA runtime version: %d\n", cuda_runtime_version);
00041 #endif
00042
00043 if (cuda_driver_version == 0)
00044 return VMDCUDA_ERR_NODEVICES;
00045
00046 if (cuda_driver_version < cuda_runtime_version)
00047 return VMDCUDA_ERR_DRVMISMATCH;
00048 #endif
00049
00050 return VMDCUDA_ERR_NONE;
00051 }
00052
00053
00054 int vmd_cuda_device_props(int dev, char *name, int namelen,
00055 int *devmajor, int *devminor,
00056 unsigned long *memb, int *clockratekhz,
00057 int *smcount, int *overlap, int *kerneltimeout,
00058 int *canmaphostmem, int *computemode) {
00059 cudaError_t rc;
00060 cudaDeviceProp deviceProp;
00061
00062 int vercheck;
00063 if ((vercheck = vmd_cuda_drv_runtime_compatible()) != VMDCUDA_ERR_NONE) {
00064 return vercheck;
00065 }
00066
00067 memset(&deviceProp, 0, sizeof(cudaDeviceProp));
00068 if ((rc=cudaGetDeviceProperties(&deviceProp, dev)) != cudaSuccess) {
00069
00070 if (rc == cudaErrorNotYetImplemented)
00071 return VMDCUDA_ERR_EMUDEVICE;
00072 return VMDCUDA_ERR_GENERAL;
00073 }
00074
00075 if (name)
00076 strncpy(name, deviceProp.name, namelen);
00077 if (devmajor)
00078 *devmajor = deviceProp.major;
00079 if (devminor)
00080 *devminor = deviceProp.minor;
00081 if (memb)
00082 *memb = deviceProp.totalGlobalMem;
00083 if (clockratekhz)
00084 *clockratekhz = deviceProp.clockRate;
00085 #if CUDART_VERSION >= 2000
00086 if (smcount)
00087 *smcount = deviceProp.multiProcessorCount;
00088 if (overlap)
00089 *overlap = (deviceProp.deviceOverlap != 0);
00090 #else
00091 if (smcount)
00092 *smcount = -1;
00093 if (overlap)
00094 *overlap = 0;
00095 #endif
00096 #if CUDART_VERSION >= 2010
00097 if (kerneltimeout)
00098 *kerneltimeout = (deviceProp.kernelExecTimeoutEnabled != 0);
00099 #else
00100 if (kerneltimeout)
00101 *kerneltimeout = 0;
00102 #endif
00103 #if CUDART_VERSION >= 2020
00104 if (canmaphostmem)
00105 *canmaphostmem = (deviceProp.canMapHostMemory != 0);
00106 if (computemode)
00107 *computemode = deviceProp.computeMode;
00108 #else
00109 if (canmaphostmem)
00110 *canmaphostmem = 0;
00111 if (computemode)
00112 *computemode = VMDCUDA_COMPUTEMODE_DEFAULT;
00113 #endif
00114 return VMDCUDA_ERR_NONE;
00115 }
00116
00117
00118 int vmd_cuda_num_devices(int *numdev) {
00119 int i;
00120 int devcount=0;
00121 int usabledevs=0;
00122 *numdev = 0;
00123
00124 int vercheck;
00125 if ((vercheck = vmd_cuda_drv_runtime_compatible()) != VMDCUDA_ERR_NONE) {
00126 return vercheck;
00127 }
00128
00129 if (cudaGetDeviceCount(&devcount) != cudaSuccess) {
00130 return VMDCUDA_ERR_NODEVICES;
00131 }
00132
00133
00134
00135
00136 if (devcount > 100 || devcount < 0)
00137 return VMDCUDA_ERR_DRVMISMATCH;
00138
00139
00140 for (i=0; i<devcount; i++) {
00141 int devmajor, devminor, rc;
00142
00143 rc = vmd_cuda_device_props(i, NULL, 0, &devmajor, &devminor,
00144 NULL, NULL, NULL, NULL, NULL, NULL, NULL);
00145
00146 if (rc == VMDCUDA_ERR_NONE) {
00147
00148 if (((devmajor >= 1) && (devminor >= 0)) &&
00149 ((devmajor != 9999) && (devminor != 9999))) {
00150 usabledevs++;
00151 }
00152 } else if (rc != VMDCUDA_ERR_EMUDEVICE) {
00153 return VMDCUDA_ERR_SOMEDEVICES;
00154 }
00155 }
00156
00157 *numdev = usabledevs;
00158
00159 return VMDCUDA_ERR_NONE;
00160 }
00161
00162
00163 void * vmd_cuda_devpool_setdevice(void * voidparms) {
00164 int count, id, dev;
00165 cudaDeviceProp deviceProp;
00166 char *mesg;
00167 char *d_membuf;
00168 cudaError_t err;
00169
00170 wkf_threadpool_worker_getid(voidparms, &id, &count);
00171 wkf_threadpool_worker_getdata(voidparms, (void **) &mesg);
00172 wkf_threadpool_worker_getdevid(voidparms, &dev);
00173
00174
00175 cudaSetDevice(dev);
00176
00177 #if CUDART_VERSION >= 2000
00178
00179
00180
00181 memset(&deviceProp, 0, sizeof(cudaDeviceProp));
00182 if (cudaGetDeviceProperties(&deviceProp, dev) == cudaSuccess) {
00183 float smscale = ((float) deviceProp.multiProcessorCount) / 30.0f;
00184 double clockscale = ((double) deviceProp.clockRate) / 1295000.0;
00185 float speedscale = smscale * ((float) clockscale);
00186
00187 #if 0
00188 printf("clock rate: %lf\n", (double) deviceProp.clockRate);
00189 printf("scale: %.4f smscale: %.4f clockscale: %.4f\n",
00190 speedscale, smscale, clockscale);
00191 #endif
00192
00193 #if CUDART_VERSION >= 2030
00194 if (deviceProp.canMapHostMemory != 0) {
00195 #if 0
00196 printf("Enabled mapped host memory on device[%d]\n", dev);
00197 #endif
00198
00199
00200
00201
00202
00203
00204
00205
00206 err = cudaSetDeviceFlags(cudaDeviceScheduleAuto | cudaDeviceMapHost);
00207 if (err != cudaSuccess) {
00208 printf("Warning) thread[%d] can't set GPU[%d] device flags\n", id, dev);
00209 printf("Warning) CUDA error: %s\n", cudaGetErrorString(err));
00210 }
00211 }
00212 #endif
00213
00214 wkf_threadpool_worker_setdevspeed(voidparms, speedscale);
00215
00216
00217
00218
00219
00220
00221
00222
00223 if ((err = cudaMalloc((void **) &d_membuf, 1*1024*1024)) == cudaSuccess) {
00224 cudaFree(d_membuf);
00225 } else {
00226 printf("Warning) thread[%d] can't init GPU[%d] found by device query\n", id, dev);
00227 printf("Warning) CUDA error: %s\n", cudaGetErrorString(err));
00228
00229
00230
00231
00232 }
00233 }
00234 #endif
00235
00236 if (mesg != NULL)
00237 printf("devpool thread[%d / %d], device %d message: '%s'\n", id, count, dev, mesg);
00238
00239 return NULL;
00240 }
00241
00242
00243
00244 #if defined(__cplusplus)
00245 }
00246 #endif
00247
00248