00001 /*************************************************************************** 00002 *cr 00003 *cr (C) Copyright 2008-2009 The Board of Trustees of the 00004 *cr University of Illinois 00005 *cr All Rights Reserved 00006 *cr 00007 ***************************************************************************/ 00008 00009 /*************************************************************************** 00010 * RCS INFORMATION: 00011 * 00012 * $RCSfile: msmpot_cuda.h,v $ 00013 * $Author: johns $ $Locker: $ $State: Exp $ 00014 * $Revision: 1.3 $ $Date: 2010/06/03 20:07:10 $ 00015 * 00016 ***************************************************************************/ 00017 00018 #include "msmpot_internal.h" 00019 00020 00021 #ifndef MSMPOT_MSMCUDA_H 00022 #define MSMPOT_MSMCUDA_H 00023 00024 /* 00025 * detect and report error from CUDA 00026 */ 00027 #undef CUERR 00028 #define CUERR(errnum) \ 00029 do { \ 00030 cudaError_t cuerr = cudaGetLastError(); \ 00031 if (cuerr != cudaSuccess) { \ 00032 return ERROR(errnum); \ 00033 } \ 00034 } while (0) 00035 00036 00037 /* 00038 * Keep NBRLIST_MAXLEN of 3-tuples in GPU const cache memory: 00039 * (3 * 5333) ints + 1 int (giving use length) == 64000 bytes 00040 */ 00041 #undef NBRLIST_MAXLEN 00042 #define NBRLIST_MAXLEN 5333 00043 00044 00045 #ifdef __cplusplus 00046 extern "C" { 00047 #endif 00048 00049 struct MsmpotCuda_t { 00050 Msmpot *msmpot; 00051 00052 /* get CUDA device info */ 00053 #if 0 00054 struct cudaDeviceProp *dev; 00055 int ndevs; 00056 #endif 00057 int devnum; /* device number */ 00058 00059 /* CUDA short-range part ("binsmall") */ 00060 int pmx, pmy, pmz; /* dimensions of padded epotmap */ 00061 long maxpm; /* allocated points for padded map */ 00062 float *padmap; /* padded epotmap for CUDA grid */ 00063 00064 float *dev_padmap; /* points to device memory */ 00065 long dev_maxpm; /* allocated points on device */ 00066 00067 float4 *dev_bin; /* points to device memory */ 00068 int dev_nbins; /* allocated bins on device */ 00069 00070 /* CUDA lattice cutoff */ 00071 int lk_nlevels; /* number of levels for latcut kernel */ 00072 int lk_srad; /* subcube radius for latcut kernel */ 00073 int lk_padding; /* padding around internal array of subcubes */ 00074 int subcube_total; /* total number of subcubes for compressed grids */ 00075 int block_total; /* total number of thread blocks */ 00076 /* 00077 * host_ --> memory allocated on host 00078 * device_ --> global memory allocated on device 00079 */ 00080 int *host_sinfo; /* subcube info copy to device const mem */ 00081 float *host_lfac; /* level factor copy to device const mem */ 00082 int maxlevels; 00083 00084 float *host_wt; /* weights copy to device const mem */ 00085 int maxwts; 00086 00087 float *host_qgrids; /* q-grid subcubes copy to device global mem */ 00088 float *host_egrids; /* e-grid subcubes copy to device global mem */ 00089 float *device_qgrids; /* q-grid subcubes allocate on device */ 00090 float *device_egrids; /* e-grid subcubes allocate on device */ 00091 long maxgridpts; 00092 00093 }; 00094 00095 void Msmpot_cuda_cleanup(MsmpotCuda *); 00096 00097 int Msmpot_cuda_setup_shortrng(MsmpotCuda *); 00098 void Msmpot_cuda_cleanup_shortrng(MsmpotCuda *); 00099 00100 int Msmpot_cuda_setup_latcut(MsmpotCuda *); 00101 void Msmpot_cuda_cleanup_latcut(MsmpotCuda *); 00102 00103 #ifdef __cplusplus 00104 } 00105 #endif 00106 00107 00108 #endif /* MSMPOT_MSMCUDA_H */