msmpot_cuda.h Source File

00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 2008-2009 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: msmpot_cuda.h,v $
00013  *      $Author: johns $        $Locker:  $             $State: Exp $
00014  *      $Revision: 1.3 $      $Date: 2010/06/03 20:07:10 $
00015  *
00016  ***************************************************************************/
00017 
00018 #include "msmpot_internal.h"
00019 
00020 
00021 #ifndef MSMPOT_MSMCUDA_H
00022 #define MSMPOT_MSMCUDA_H
00023 
00024 /*
00025  * detect and report error from CUDA
00026  */
00027 #undef  CUERR
00028 #define CUERR(errnum) \
00029   do { \
00030     cudaError_t cuerr = cudaGetLastError(); \
00031     if (cuerr != cudaSuccess) { \
00032       return ERROR(errnum); \
00033     } \
00034   } while (0)
00035 
00036 
00037 /*
00038  * Keep NBRLIST_MAXLEN of 3-tuples in GPU const cache memory:
00039  *   (3 * 5333) ints  +  1 int (giving use length)  ==  64000 bytes
00040  */
00041 #undef  NBRLIST_MAXLEN
00042 #define NBRLIST_MAXLEN  5333
00043 
00044 
00045 #ifdef __cplusplus
00046 extern "C" {
00047 #endif
00048 
00049   struct MsmpotCuda_t {
00050     Msmpot *msmpot;
00051 
00052     /* get CUDA device info */
00053 #if 0
00054     struct cudaDeviceProp *dev;
00055     int ndevs;
00056 #endif
00057     int devnum;           /* device number */
00058 
00059     /* CUDA short-range part ("binsmall") */
00060     int pmx, pmy, pmz;                 /* dimensions of padded epotmap */
00061     long maxpm;                        /* allocated points for padded map */ 
00062     float *padmap;                     /* padded epotmap for CUDA grid */
00063 
00064     float *dev_padmap;                 /* points to device memory */
00065     long dev_maxpm;                    /* allocated points on device */
00066 
00067     float4 *dev_bin;                   /* points to device memory */
00068     int dev_nbins;                     /* allocated bins on device */
00069 
00070     /* CUDA lattice cutoff */
00071     int   lk_nlevels;      /* number of levels for latcut kernel */
00072     int   lk_srad;         /* subcube radius for latcut kernel */
00073     int   lk_padding;      /* padding around internal array of subcubes */
00074     int   subcube_total;   /* total number of subcubes for compressed grids */
00075     int   block_total;     /* total number of thread blocks */
00076     /*
00077      * host_   -->  memory allocated on host
00078      * device_ -->  global memory allocated on device
00079      */
00080     int   *host_sinfo;     /* subcube info copy to device const mem */
00081     float *host_lfac;      /* level factor copy to device const mem */
00082     int maxlevels;
00083 
00084     float *host_wt;        /* weights copy to device const mem */
00085     int maxwts;
00086 
00087     float *host_qgrids;    /* q-grid subcubes copy to device global mem */
00088     float *host_egrids;    /* e-grid subcubes copy to device global mem */
00089     float *device_qgrids;  /* q-grid subcubes allocate on device */
00090     float *device_egrids;  /* e-grid subcubes allocate on device */
00091     long maxgridpts;
00092 
00093   };
00094 
00095   void Msmpot_cuda_cleanup(MsmpotCuda *);
00096 
00097   int Msmpot_cuda_setup_shortrng(MsmpotCuda *);
00098   void Msmpot_cuda_cleanup_shortrng(MsmpotCuda *);
00099 
00100   int Msmpot_cuda_setup_latcut(MsmpotCuda *);
00101   void Msmpot_cuda_cleanup_latcut(MsmpotCuda *);
00102 
00103 #ifdef __cplusplus
00104 }
00105 #endif
00106 
00107 
00108 #endif /* MSMPOT_MSMCUDA_H */