Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

CUDAClearDevice.cu

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 2007-2009 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: CUDAClearDevice.cu,v $
00013  *      $Author: johns $        $Locker:  $             $State: Exp $
00014  *      $Revision: 1.11 $      $Date: 2010/03/22 20:33:24 $
00015  *
00016  ***************************************************************************
00017  * DESCRIPTION:
00018  *   CUDA utility to clear all global and constant GPU memory areas to 
00019  *   known values.
00020  *
00021  ***************************************************************************/
00022 #include <stdio.h>
00023 #include <stdlib.h>
00024 #include <string.h>
00025 #include <cuda.h>
00026 
00027 #include "utilities.h"
00028 #include "CUDAKernels.h"
00029 
00030 #define CUERR { cudaError_t err; \
00031   if ((err = cudaGetLastError()) != cudaSuccess) { \
00032   printf("CUDA error: %s, %s line %d\n", cudaGetErrorString(err), __FILE__, __LINE__); \
00033   return NULL; }}
00034 
00035 // a full-sized 64-kB constant memory buffer to use to clear
00036 // any existing device state
00037 __constant__ static float constbuf[16384];
00038 
00039 // maximum number of allocations to use to soak up all available RAM
00040 #define MAXLOOPS 16
00041 
00042 void * vmd_cuda_devpool_clear_device_mem(void * voidparms) {
00043   int i, id, count, dev, bufcnt;
00044   char *bufs[MAXLOOPS];
00045   size_t bufszs[MAXLOOPS];
00046   size_t totalsz=0;
00047   float zerobuf[16 * 1024];
00048   int verbose=0;
00049 
00050   if (getenv("VMDCUDAVERBOSE") != NULL)
00051     verbose=1;
00052 
00053   memset(zerobuf, 0, sizeof(zerobuf));
00054   memset(bufs, 0, MAXLOOPS * sizeof(sizeof(char *)));
00055   memset(bufszs, 0, MAXLOOPS * sizeof(sizeof(size_t)));
00056 
00057   wkf_threadpool_worker_getid(voidparms, &id, &count);
00058   wkf_threadpool_worker_getdevid(voidparms, &dev);
00059 
00060   // clear constant memory
00061   cudaMemcpyToSymbol(constbuf, zerobuf, sizeof(zerobuf), 0);
00062   CUERR
00063 
00064   // 
00065   // Allocate, clear, and deallocate all global memory we can touch
00066   //
00067   // XXX In MPI enabled builds, we skip the global memory clearing step 
00068   //     since multiple VMD processes may end up being mapped to the
00069   //     same node, sharing the same set of GPUs.  A better way of handling
00070   //     this would be either to perform the memory clear only on one 
00071   //     MPI rank per physical node, or to distribute GPUs among 
00072   //     VMD processes so no sharing occurs.
00073   //
00074 #if !defined(VMDMPI)
00075   size_t sz(1024 * 1024 * 1024); /* start with 1GB buffer size */
00076   bufcnt=0;
00077   for (i=0; i<MAXLOOPS; i++) {
00078     // Allocate the largest buffer we can get. If we fail, we reduce request
00079     // size to half of the previous, and try again until we reach the minimum
00080     // request size threshold.
00081     cudaError_t rc;
00082     while ((sz > (16 * 1024 * 1024)) && 
00083            ((rc=cudaMalloc((void **) &bufs[i], sz)) != cudaSuccess)) {
00084       cudaGetLastError(); // reset error state
00085       sz >>= 1;
00086     }
00087 
00088     if (rc == cudaSuccess) {
00089       bufszs[i] = sz;
00090       totalsz += sz; 
00091       bufcnt++;
00092       if (verbose)
00093         printf("devpool thread[%d / %d], dev %d buf[%d] size: %d\n", id, count, dev, i, sz);
00094     } else {
00095       bufs[i] = NULL;
00096       bufszs[i] = 0;
00097       if (verbose)
00098         printf("devpool thread[%d / %d], dev %d buf[%d] failed min allocation size: %d\n", id, count, dev, i, sz);
00099 
00100       // terminate allocation loop early
00101       break;
00102     } 
00103   }
00104 
00105   if (verbose)
00106     printf("devpool thread[%d / %d], dev %d allocated %d buffers\n", id, count, dev, bufcnt);
00107 
00108   for (i=0; i<bufcnt; i++) {
00109     if ((bufs[i] != NULL) && (bufszs[i] > 0)) {
00110       cudaMemset(bufs[i], 0, bufszs[i]);
00111       cudaFree(bufs[i]);
00112       bufs[i] = NULL;
00113       bufszs[i] = 0;
00114     }
00115   }
00116   CUERR
00117 
00118   if (verbose)
00119     printf("  Device %d cleared %d MB of GPU memory\n", dev, totalsz / (1024 * 1024));
00120 
00121 #endif
00122 
00123   return NULL;
00124 }
00125 

Generated on Sat May 26 01:47:50 2012 for VMD (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002