00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00022 #ifndef CUDAKERNELS_H
00023 #define CUDAKERNELS_H
00024
00025 #include "WKFThreads.h"
00026
00027
00028 #undef hz
00029
00030 #if defined(__cplusplus)
00031 extern "C" {
00032 #endif
00033
00034 class AtomSel;
00035
00036 #define VMDCUDA_ERR_NONE 0
00037 #define VMDCUDA_ERR_GENERAL -1
00038 #define VMDCUDA_ERR_NODEVICES -2
00039 #define VMDCUDA_ERR_SOMEDEVICES -3
00040 #define VMDCUDA_ERR_DRVMISMATCH -4
00041 #define VMDCUDA_ERR_EMUDEVICE -5
00042
00043
00044 #define VMDCUDA_COMPUTEMODE_DEFAULT 0
00045 #define VMDCUDA_COMPUTEMODE_EXCLUSIVE 1
00046 #define VMDCUDA_COMPUTEMODE_PROHIBITED 2
00047
00048
00049
00050
00051 int vmd_cuda_num_devices(int *numdev);
00052
00053
00054
00055
00056 int vmd_cuda_device_props(int dev, char *name, int namelen,
00057 int *revmajor, int *revminor,
00058 unsigned long *memb, int *clockratekhz,
00059 int *smcount, int *integratedgpu,
00060 int *asyncenginecount, int *kerneltimeout,
00061 int *canmaphostmem, int *computemode,
00062 int *spdpfpperfratio,
00063 int *pageablememaccess,
00064 int *pageablememaccessuseshostpagetables);
00065
00066
00067
00068
00069 int vmd_cuda_peer_matrix(int *numdev,
00070 int **p2pmat,
00071 int **p2psupp,
00072 int **p2patomics,
00073 int **p2parrays,
00074 int **perfmat,
00075 int *p2plinkcount,
00076 int *islands);
00077
00078
00079
00080
00081
00082
00083
00084 void * vmd_cuda_affinitize_threads(void * voidparms);
00085
00086
00087 void * vmd_cuda_devpool_enable_P2P(void * voidparms);
00088
00089
00090 void * vmd_cuda_devpool_setdeviceonly(void * voidparms);
00091
00092
00093 void * vmd_cuda_devpool_setdevice(void * voidparms);
00094
00095 void * vmd_cuda_devpool_clear_device_mem(void *);
00096
00097 int vmd_cuda_madd_gflops(int numdevs, int *devlist, double *gflops,
00098 int testloops);
00099
00100 int vmd_cuda_bus_bw(int numdevs, int *devlist,
00101 double *hdmbsec, double *hdlatusec,
00102 double *phdmbsec, double *phdlatusec,
00103 double *dhmbsec, double *dhlatusec,
00104 double *pdhmbsec, double *pdhlatusec);
00105
00106 int vmd_cuda_globmem_bw(int numdevs, int *devlist,
00107 double *memsetgbsec, double *memcpygbsec);
00108
00109 int vmd_cuda_devpool_latency(wkf_threadpool_t *devpool, int tilesize,
00110 double *kernlaunchlatency,
00111 double *barlatency,
00112 double *cyclelatency,
00113 double *tilelatency,
00114 double *kernellatency);
00115
00116 int vmd_cuda_measure_latencies(wkf_threadpool_t *devpool);
00117
00118 int gpu_ooc_bench(wkf_threadpool_t *devpool,
00119 int nfiles, const char **trjfileset,
00120 const AtomSel *sel, int first, int last, int step);
00121
00122
00123
00124 int vmd_cuda_vol_cpotential(long int natoms, float* atoms, float* grideners,
00125 long int numplane, long int numcol, long int numpt,
00126 float gridspacing);
00127
00128 int vmd_cuda_evaluate_occupancy_map(
00129 int mx, int my, int mz,
00130 float *map,
00131
00132
00133 float max_energy,
00134 float cutoff,
00135 float hx, float hy, float hz,
00136 float x0, float y0, float z0,
00137 float bx_1, float by_1, float bz_1,
00138
00139 int nbx, int nby, int nbz,
00140 const float *bin,
00141
00142 const float *bin_zero,
00143
00144 int num_binoffsets,
00145 const char *binoffsets,
00146
00147
00148 int num_extras,
00149 const float *extra,
00150
00151
00152
00153 int num_vdwparms,
00154 const float *vdwparms,
00155
00156
00157 int num_probes,
00158 const float *probevdwparms,
00159
00160
00161 int num_conformers,
00162 const float *conformers
00163
00164 );
00165
00166
00167 int rdf_gpu(wkf_threadpool_t *devpool,
00168 int usepbc,
00169 int natoms1,
00170
00171 float* xyz,
00172
00173 int natoms2,
00174
00175 float* xyz2,
00176
00177 float* cell,
00178 float* hist,
00179
00180 int maxbin,
00181 float rmin,
00182 float delr);
00183
00184
00185 #if defined(__cplusplus)
00186 }
00187 #endif
00188
00189 #endif
00190