Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

ComputeNonbondedCUDAKernel.h

Go to the documentation of this file.
00001 #ifdef NAMD_CUDA
00002 //this type defined in multiple files
00003 typedef float GBReal;
00004 
00005 void cuda_errcheck(const char *msg);
00006 
00007 #ifndef __CUDACC__
00008 #undef __align__(X)
00009 #define __align__(X)
00010 #endif
00011 
00012 #define PATCH_PAIR_SIZE 16
00013 #define PATCH_PAIR_USED 15
00014 
00015 struct __align__(16) patch_pair {  // must be multiple of 16!
00016   float4 offset;
00017   unsigned int patch1_size;
00018   unsigned int patch2_size;
00019   unsigned int patch1_force_size;  // non-fixed atoms at start of list
00020   unsigned int patch1_atom_start;
00021   unsigned int patch2_atom_start;
00022   unsigned int patch1_force_start;
00023   unsigned int block_flags_start;
00024   unsigned int virial_start;  // virial output location padded to 16
00025   unsigned int patch1_force_list_index;
00026   unsigned int patch1_force_list_size;
00027   unsigned int patch2_force_size;  // used for fixed-atom energy
00028   unsigned int pad2;
00029 };
00030 
00031 #define FORCE_LIST_SIZE 8
00032 #define FORCE_LIST_USED 8
00033 
00034 struct __align__(16) force_list {  // must be multiple of 16!
00035   unsigned int force_list_start;  // beginning of compute output
00036   unsigned int force_list_size;  // number of computes for this patch
00037   unsigned int patch_size;  // real number of atoms in patch
00038   unsigned int patch_stride;  // padded number of atoms in patch
00039   unsigned int force_output_start;  // output array
00040   unsigned int atom_start;  // atom positions
00041   unsigned int virial_list_start;  // beginning of compute virial output
00042   unsigned int virial_output_start;  // virial output location padded to 16
00043 };
00044 
00045 struct __align__(16) atom {  // must be multiple of 16!
00046   float3 position;
00047   float charge;
00048 };
00049 
00050 struct __align__(16) atom_param {  // must be multiple of 16!
00051   int vdw_type;
00052   int index;
00053   int excl_index;
00054   int excl_maxdiff;  // maxdiff == 0 -> only excluded from self
00055 };
00056 
00057 #define COPY_ATOM( DEST, SOURCE ) { \
00058   DEST.position.x = SOURCE.position.x; \
00059   DEST.position.y = SOURCE.position.y; \
00060   DEST.position.z = SOURCE.position.z; \
00061   DEST.charge = SOURCE.charge; \
00062   }
00063 
00064 #define COPY_PARAM( DEST, SOURCE ) { \
00065   DEST.sqrt_epsilon = SOURCE.sqrt_epsilon; \
00066   DEST.half_sigma = SOURCE.half_sigma; \
00067   DEST.index = SOURCE.index; \
00068   DEST.excl_index = SOURCE.excl_index; \
00069   DEST.excl_maxdiff = SOURCE.excl_maxdiff; \
00070   }
00071 
00072 #define COPY_ATOM_TO_SHARED( ATOM, PARAM, SHARED ) { \
00073     COPY_ATOM( SHARED, ATOM ) \
00074     COPY_PARAM( SHARED, PARAM ) \
00075   }
00076 
00077 #define COPY_ATOM_FROM_SHARED( ATOM, PARAM, SHARED ) { \
00078     COPY_ATOM( ATOM, SHARED ) \
00079     COPY_PARAM( PARAM, SHARED ) \
00080   }
00081 
00082 // 2^11 ints * 2^5 bits = 2^16 bits = range of unsigned short excl_index
00083 // 2^26 ints * 2^5 bits = 2^32 bits = range of int excl_index
00084 #define MAX_EXCLUSIONS (1<<26)
00085 #define MAX_CONST_EXCLUSIONS 2048  // cache size is 8k
00086 
00087 void cuda_bind_exclusions(const unsigned int *t, int n);
00088 
00089 void cuda_bind_lj_table(const float2 *t, int _lj_table_size);
00090 
00091 // #define FORCE_TABLE_SIZE 512
00092 // maximum size of CUDA array 1D texture reference is 2^13 = 8192
00093 // #define FORCE_TABLE_SIZE 8192
00094 // CUDA docs lie, older devices can only handle 4096
00095 #define FORCE_TABLE_SIZE 4096
00096 
00097 void cuda_bind_force_table(const float4 *t, const float4 *et);
00098 
00099 void cuda_init();
00100 
00101 void cuda_bind_patch_pairs(const patch_pair *pp, int npp,
00102                         const force_list *fl, int nfl,
00103                         int atoms_size_p, int force_buffers_size_p,
00104                         int block_flags_size_p, int max_atoms_per_patch_p);
00105 
00106 void cuda_bind_atom_params(const atom_param *t);
00107 
00108 void cuda_bind_atoms(const atom *a);
00109 
00110 void cuda_bind_forces(float4 *f, float4 *f_slow);
00111 
00112 void cuda_bind_virials(float *v);
00113 
00114 void cuda_nonbonded_forces(float3 lata, float3 latb, float3 latc,
00115                 float cutoff2, float plcutoff2,
00116                 int cbegin, int ccount, int pbegin, int pcount,
00117                 int doSlow, int doEnergy, int usePairlists, int savePairlists,
00118                 cudaStream_t &strm);
00119 
00120 //GBIS methods
00121 void cuda_GBIS_P1(
00122   int cbegin,
00123   int ccount,
00124   int pbegin,
00125   int pcount,
00126   float a_cut,
00127   float rho_0,
00128   float3 lata,
00129   float3 latb,
00130   float3 latc,
00131   cudaStream_t &strm
00132   );
00133 void cuda_GBIS_P2(
00134   int cbegin,
00135   int ccount,
00136   int pbegin,
00137   int pcount,
00138   float a_cut,
00139   float r_cut,
00140   float scaling,
00141   float kappa,
00142   float smoothDist,
00143   float epsilon_p,
00144   float epsilon_s,
00145   float3 lata,
00146   float3 latb,
00147   float3 latc,
00148   int doEnergy,
00149   int doFullElec,
00150   cudaStream_t &strm
00151   );
00152 void cuda_GBIS_P3(
00153   int cbegin,
00154   int ccount,
00155   int pbegin,
00156   int pcount,
00157   float a_cut,
00158   float rho_0,
00159   float scaling,
00160   float3 lata,
00161   float3 latb,
00162   float3 latc,
00163   cudaStream_t &strm
00164   );
00165 
00166 void cuda_bind_GBIS_intRad(float *intRad0H, float *intRadSH);
00167 void cuda_bind_GBIS_energy(float *energy_gbis);
00168 void cuda_bind_GBIS_psiSum(GBReal *psiSumH);
00169 void cuda_bind_GBIS_bornRad(float *bornRadH);
00170 void cuda_bind_GBIS_dEdaSum(GBReal *dEdaSumH);
00171 void cuda_bind_GBIS_dHdrPrefix(float *dHdrPrefixH);
00172 
00173 //end GBIS methods
00174 
00175 int cuda_stream_finished();
00176 
00177 #endif  // NAMD_CUDA
00178 

Generated on Tue May 21 04:07:15 2013 for NAMD by  doxygen 1.3.9.1