CUDAKernels.h Source File

00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2019 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 /***************************************************************************
00009  * RCS INFORMATION:
00010  *
00011  *      $RCSfile: CUDAKernels.h,v $
00012  *      $Author: johns $        $Locker:  $             $State: Exp $
00013  *      $Revision: 1.64 $        $Date: 2020/07/03 07:12:25 $
00014  *
00015  ***************************************************************************/
00022 #ifndef CUDAKERNELS_H
00023 #define CUDAKERNELS_H
00024 
00025 #include "WKFThreads.h"
00026 
00027 /* avoid parameter name collisions with AIX5 "hz" macro */
00028 #undef hz
00029 
00030 #if defined(__cplusplus)
00031 extern "C" {
00032 #endif
00033 
00034 class AtomSel;
00035 
00036 #define VMDCUDA_ERR_NONE          0
00037 #define VMDCUDA_ERR_GENERAL      -1
00038 #define VMDCUDA_ERR_NODEVICES    -2
00039 #define VMDCUDA_ERR_SOMEDEVICES  -3
00040 #define VMDCUDA_ERR_DRVMISMATCH  -4
00041 #define VMDCUDA_ERR_EMUDEVICE    -5
00042 
00043 /* replicate CUDA compute mode enumerations */
00044 #define VMDCUDA_COMPUTEMODE_DEFAULT     0
00045 #define VMDCUDA_COMPUTEMODE_EXCLUSIVE   1
00046 #define VMDCUDA_COMPUTEMODE_PROHIBITED  2
00047 
00048 /* 
00049  * number of CUDA devices available 
00050  */
00051 int vmd_cuda_num_devices(int *numdev);
00052 
00053 /* 
00054  * Retrieve system and GPU device properties
00055  */
00056 int vmd_cuda_device_props(int dev, char *name, int namelen,
00057                           int *revmajor, int *revminor, 
00058                           unsigned long *memb, int *clockratekhz,
00059                           int *smcount, int *integratedgpu,
00060                           int *asyncenginecount, int *kerneltimeout,
00061                           int *canmaphostmem, int *computemode,
00062                           int *spdpfpperfratio, 
00063                           int *pageablememaccess, 
00064                           int *pageablememaccessuseshostpagetables);
00065 
00066 /*
00067  * Retrieve NVLink connectivity and topological information
00068  */
00069 int vmd_cuda_peer_matrix(int *numdev, 
00070                          int **p2pmat,
00071                          int **p2psupp,
00072                          int **p2patomics,
00073                          int **p2parrays,
00074                          int **perfmat,
00075                          int *p2plinkcount,
00076                          int *islands);
00077 
00078 /*
00079  * All generally available CUDA kernel launchers that require 
00080  * no particularly special handling, hardware, or context association
00081  */
00082 
00083 /* set GPU worker thread CPU affinity according to hardware topology */
00084 void * vmd_cuda_affinitize_threads(void * voidparms);
00085 
00086 /* enable peer-to-peer GPU memory transfer, e.g. for NVLink on DGX-2 */
00087 void * vmd_cuda_devpool_enable_P2P(void * voidparms);
00088 
00089 /* set device only, no other hardware initialization */
00090 void * vmd_cuda_devpool_setdeviceonly(void * voidparms);
00091 
00092 /* set device and perform various one-time hardware initialization */
00093 void * vmd_cuda_devpool_setdevice(void * voidparms);
00094 
00095 void * vmd_cuda_devpool_clear_device_mem(void *);
00096 
00097 int vmd_cuda_madd_gflops(int numdevs, int *devlist, double *gflops, 
00098                          int testloops);
00099 
00100 int vmd_cuda_bus_bw(int numdevs, int *devlist, 
00101                     double *hdmbsec, double *hdlatusec,
00102                     double *phdmbsec, double *phdlatusec,
00103                     double *dhmbsec, double *dhlatusec,
00104                     double *pdhmbsec, double *pdhlatusec);
00105 
00106 int vmd_cuda_globmem_bw(int numdevs, int *devlist, 
00107                         double *memsetgbsec, double *memcpygbsec);
00108 
00109 int vmd_cuda_devpool_latency(wkf_threadpool_t *devpool, int tilesize,
00110                              double *kernlaunchlatency,
00111                              double *barlatency,
00112                              double *cyclelatency, 
00113                              double *tilelatency,
00114                              double *kernellatency);
00115 
00116 int vmd_cuda_measure_latencies(wkf_threadpool_t *devpool);
00117 
00118 int gpu_ooc_bench(wkf_threadpool_t *devpool, // VMD GPU worker thread pool
00119                   int nfiles, const char **trjfileset,
00120                   const AtomSel *sel, int first, int last, int step);
00121 
00122 
00123 
00124 int vmd_cuda_vol_cpotential(long int natoms, float* atoms, float* grideners, 
00125                             long int numplane, long int numcol, long int numpt, 
00126                             float gridspacing);
00127 
00128 int vmd_cuda_evaluate_occupancy_map(
00129     int mx, int my, int mz,             // map dimensions
00130     float *map,                         // buffer space for occupancy map
00131                                         // (length mx*my*mz floats)
00132 
00133     float max_energy,                   // max energy threshold
00134     float cutoff,                       // vdw cutoff distance
00135     float hx, float hy, float hz,       // map lattice spacing
00136     float x0, float y0, float z0,       // map origin
00137     float bx_1, float by_1, float bz_1, // inverse of atom bin lengths
00138 
00139     int nbx, int nby, int nbz,          // bin dimensions
00140     const float *bin,                   // atom bins XXX typecast to flint
00141                                         // (length BIN_SIZE*nbx*nby*nbz)
00142     const float *bin_zero,              // bin pointer shifted to origin
00143 
00144     int num_binoffsets,                 // number of offsets
00145     const char *binoffsets,             // bin neighborhood index offsets
00146                                         // (length 3*num_bin_offsets)
00147 
00148     int num_extras,                     // number of extra atoms
00149     const float *extra,                 // extra atoms from overfilled bins
00150                                         // XXX typecast to flint
00151                                         // (length BIN_SLOTSIZE*num_extras)
00152 
00153     int num_vdwparms,                   // number of vdw parameter types
00154     const float *vdwparms,              // vdw parameters
00155                                         // (length 2*num_vdw_params)
00156 
00157     int num_probes,                     // number of probe atoms
00158     const float *probevdwparms,         // vdw parameters of probe atoms
00159                                         // (length 2*num_probes)
00160 
00161     int num_conformers,                 // number of conformers
00162     const float *conformers             // probe atom offsets for conformers
00163                                         // (length 3*num_probes*num_conformers)
00164     );
00165 
00166 
00167 int rdf_gpu(wkf_threadpool_t *devpool, // GPU worker thread pool
00168             int usepbc,                // periodic or non-periodic calc.
00169             int natoms1,               // array of the number of atoms in
00170                                        // selection 1 in each frame.
00171             float* xyz,                // coordinates of first selection.
00172                                        // [natoms1][3]
00173             int natoms2,               // array of the number of atoms in
00174                                        // selection 2 in each frame.
00175             float* xyz2,               // coordinates of selection 2.
00176                                        // [natoms2][3]
00177             float* cell,               // the cell x y and z dimensions [3]
00178             float* hist,               // the histograms, 1 per block
00179                                        // [ncudablocks][maxbin]
00180             int maxbin,                // the number of bins in the histogram
00181             float rmin,                // the minimum value of the first bin
00182             float delr);               // the width of each bin
00183 
00184 
00185 #if defined(__cplusplus)
00186 }
00187 #endif
00188 
00189 #endif
00190