11 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 16 vdwCoefTableWidth = 0;
17 #if !defined(USE_TABLE_ARRAYS) 23 drudeNbTholeTijTable = NULL;
25 numPotentialNbtholeTerms = 0;
27 exclusionTable = NULL;
30 #if !defined(USE_TABLE_ARRAYS) 31 drudeNbTholeTijTableTex = 0;
32 modifiedExclusionForceTableTex = 0;
33 modifiedExclusionEnergyTableTex = 0;
35 modifiedExclusionForceTable = NULL;
36 modifiedExclusionEnergyTable = NULL;
45 if (vdwCoefTable != NULL) deallocate_device<float2>(&vdwCoefTable);
46 if (exclusionTable != NULL) deallocate_device<float4>(&exclusionTable);
47 if (r2_table != NULL) deallocate_device<float>(&r2_table);
50 #if !defined(USE_TABLE_ARRAYS) 53 cudaCheck(cudaFreeArray(modifiedExclusionForceArray));
54 cudaCheck(cudaFreeArray(modifiedExclusionEnergyArray));
56 cudaCheck(cudaDestroyTextureObject(forceTableTex));
57 cudaCheck(cudaDestroyTextureObject(energyTableTex));
58 cudaCheck(cudaDestroyTextureObject(modifiedExclusionForceTableTex));
59 cudaCheck(cudaDestroyTextureObject(modifiedExclusionEnergyTableTex));
61 if (drudeNbthole)
cudaCheck(cudaDestroyTextureObject(drudeNbTholeTijTableTex));
64 if (forceTable != NULL) deallocate_device<float4>(&forceTable);
65 if (energyTable != NULL) deallocate_device<float4>(&energyTable);
66 if (modifiedExclusionForceTable != NULL) deallocate_device<float4>(&modifiedExclusionForceTable);
67 if (modifiedExclusionEnergyTable != NULL) deallocate_device<float4>(&modifiedExclusionEnergyTable);
72 void copyTable(
int size, T* h_table, T*& d_table,
bool update=
false) {
75 allocate_device<T>(&d_table, size);
77 copy_HtoD_sync<T>(h_table, d_table, size);
81 #ifndef USE_TABLE_ARRAYS 83 void bindTextureObject(
int size, T* h_table, T*& d_table, cudaTextureObject_t& tex,
bool update=
false) {
86 allocate_device<T>(&d_table, size);
91 copy_HtoD_sync<T>(h_table, d_table, size);
94 cudaResourceDesc resDesc;
95 memset(&resDesc, 0,
sizeof(resDesc));
96 resDesc.resType = cudaResourceTypeLinear;
97 resDesc.res.linear.devPtr = d_table;
98 resDesc.res.linear.desc.f = cudaChannelFormatKindFloat;
99 resDesc.res.linear.desc.x =
sizeof(float)*8;
100 if (
sizeof(T) >=
sizeof(float)*2) resDesc.res.linear.desc.y =
sizeof(float)*8;
101 if (
sizeof(T) >=
sizeof(float)*3) resDesc.res.linear.desc.z =
sizeof(float)*8;
102 if (
sizeof(T) >=
sizeof(float)*4) resDesc.res.linear.desc.w =
sizeof(float)*8;
103 resDesc.res.linear.sizeInBytes = size*
sizeof(T);
105 cudaTextureDesc texDesc;
106 memset(&texDesc, 0,
sizeof(texDesc));
107 texDesc.readMode = cudaReadModeElementType;
110 cudaCheck(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL));
120 void CudaNonbondedTables::buildVdwCoefTable(
bool update) {
129 if ( tsize < dim )
NAMD_bug(
"CudaNonbondedTables::buildVdwCoefTable bad tsize");
131 float2 *h_vdwCoefTable =
new float2[tsize*tsize];
132 float2 *h_exclusionVdwCoefTable =
new float2[tsize*tsize];
134 float2 *row = h_vdwCoefTable;
135 float2 *exclusionRow = h_exclusionVdwCoefTable;
136 for (
int i=0; i<dim; ++i, row += tsize, exclusionRow += tsize ) {
137 for (
int j=0; j<dim; ++j ) {
147 iout <<
iINFO <<
"Building Drude NbThole correction paramter table.\n" <<
endi;
148 numPotentialNbtholeTerms = 0;
149 std::vector<float> h_drudeNbTholeTijTable(tsize * tsize);
150 float *tholeTijRow = h_drudeNbTholeTijTable.data();
154 for (
int i = 0; i < dim; ++i, tholeTijRow += tsize) {
155 for (
int j = 0; j < dim; ++j) {
163 if (((nbthole_array[k].ind1 == i) && (nbthole_array[k].ind2 == j)) ||
164 ((nbthole_array[k].ind2 == i) && (nbthole_array[k].ind1 == j))) {
166 tij = nbthole_array[k].
tholeij;
168 const std::string error =
169 "Negative NbThole thole_ij parameter is not allowed in GPU, " 170 "please check thole_ij between atom types " +
181 ++numPotentialNbtholeTerms;
183 tholeTijRow[j] = tij;
186 iout <<
iINFO <<
"There are potentially " << numPotentialNbtholeTerms / 2 <<
" NbThole correction interactions.\n" <<
endi;
187 #if !defined(USE_TABLE_ARRAYS) 188 bindTextureObject<float>(
189 tsize*tsize, h_drudeNbTholeTijTable.data(), drudeNbTholeTijTable,
190 drudeNbTholeTijTableTex, update);
192 copyTable<float>(tsize*tsize, h_drudeNbTholeTijTable.data(), drudeNbTholeTijTable, update);
196 vdwCoefTableWidth = tsize;
197 #if !defined(USE_TABLE_ARRAYS) 198 bindTextureObject<float2>(tsize*tsize, h_vdwCoefTable, vdwCoefTable, vdwCoefTableTex, update);
199 bindTextureObject<float2>(tsize*tsize, h_exclusionVdwCoefTable, exclusionVdwCoefTable, exclusionVdwCoefTableTex, update);
201 copyTable<float2>(tsize*tsize, h_vdwCoefTable, vdwCoefTable, update);
202 copyTable<float2>(tsize*tsize, h_exclusionVdwCoefTable, exclusionVdwCoefTable, update);
205 delete[] h_vdwCoefTable;
206 delete [] h_exclusionVdwCoefTable;
208 if ( ! CmiPhysicalNodeID(CkMyPe()) ) {
209 CkPrintf(
"Info: Updated CUDA LJ table with %d x %d elements.\n", dim, dim);
215 buildVdwCoefTable(
true);
221 template <
typename T>
223 const BigReal prefac,
const int dst_stride, T* dst_force, T* dst_energy) {
227 const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
229 union {
double f;
int32 i[2]; } byte_order_test;
230 byte_order_test.f = 1.0;
231 int32 *r2iilist = (
int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
233 for (
int i=1; i<tableSize; ++i ) {
234 double r = ((double) tableSize) / ( (double) i + 0.5 );
235 int table_i = (r2iilist[2*i] >> 14) + r2_delta_expc;
238 dst_force[i*dst_stride] = 0.;
239 dst_energy[i*dst_stride] = 0.;
245 BigReal table_a, table_b, table_c, table_d;
247 table_a = src_table[4*table_i+3];
248 table_b = src_table[4*table_i+2];
249 table_c = src_table[4*table_i+1];
250 table_d = src_table[4*table_i];
252 table_a = src_table[4*table_i];
253 table_b = src_table[4*table_i+1];
254 table_c = src_table[4*table_i+2];
255 table_d = src_table[4*table_i+3];
258 BigReal grad = ( 3. * table_d * diffa + 2. * table_c ) * diffa + table_b;
259 dst_force[i*dst_stride] = prefac * 2. * grad;
260 BigReal ener = table_a + diffa * ( ( table_d * diffa + table_c ) * diffa + table_b);
261 dst_energy[i*dst_stride] = prefac * ener;
265 dst_energy[0] = dst_energy[1*dst_stride];
268 #if !defined(USE_TABLE_ARRAYS) 269 template <
typename T>
271 cudaArray_t& array, cudaTextureObject_t& tableTex, T** d_table) {
273 #if defined(NAMD_CUDA) 275 copy_HtoD_T(h_table, *d_table, tableSize,
sizeof(T)*tableWidth);
277 cudaChannelFormatDesc desc;
278 memset(&desc, 0,
sizeof(desc));
279 desc.x =
sizeof(T)*8;
280 if (tableWidth >= 2) desc.y =
sizeof(T)*8;
281 if (tableWidth >= 3) desc.z =
sizeof(T)*8;
282 if (tableWidth >= 4) desc.w =
sizeof(T)*8;
283 desc.f = cudaChannelFormatKindFloat;
284 cudaCheck(cudaMallocArray(&array, &desc, tableSize, 0));
285 cudaCheck(cudaMemcpyToArray(array, 0, 0, h_table, tableSize*
sizeof(T)*tableWidth, cudaMemcpyHostToDevice));
287 cudaResourceDesc resDesc;
288 memset(&resDesc, 0,
sizeof(resDesc));
289 resDesc.resType = cudaResourceTypeArray;
290 resDesc.res.array.array = array;
292 cudaTextureDesc texDesc;
293 memset(&texDesc, 0,
sizeof(texDesc));
294 texDesc.addressMode[0] = cudaAddressModeClamp;
295 texDesc.filterMode = cudaFilterModeLinear;
296 texDesc.normalizedCoords = 1;
298 cudaCheck(cudaCreateTextureObject(&tableTex, &resDesc, &texDesc, NULL));
303 copy_HtoD_T(h_table, *d_table, tableSize,
sizeof(T)*tableWidth);
304 copy_HtoD_T(h_table + tableWidth * (tableSize - 1), *d_table + tableWidth * tableSize, 1,
sizeof(T)*tableWidth);
306 cudaResourceDesc resDesc;
307 memset(&resDesc, 0,
sizeof(resDesc));
308 resDesc.resType = cudaResourceTypeLinear;
309 resDesc.res.linear.devPtr = *d_table;
310 resDesc.res.linear.desc.f = cudaChannelFormatKindFloat;
311 resDesc.res.linear.desc.x =
sizeof(T)*8;
312 if (tableWidth >= 2) resDesc.res.linear.desc.y =
sizeof(T)*8;
313 if (tableWidth >= 3) resDesc.res.linear.desc.z =
sizeof(T)*8;
314 if (tableWidth >= 4) resDesc.res.linear.desc.w =
sizeof(T)*8;
315 resDesc.res.linear.sizeInBytes = (tableSize + 1)*
sizeof(T)*tableWidth;
317 cudaTextureDesc texDesc;
318 memset(&texDesc, 0,
sizeof(texDesc));
319 texDesc.readMode = cudaReadModeElementType;
321 cudaCheck(cudaCreateTextureObject(&tableTex, &resDesc, &texDesc, NULL));
326 void CudaNonbondedTables::buildForceAndEnergyTables(
int tableSize) {
327 forceAndEnergyTableSize = tableSize;
332 const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
334 double* r2list =
new double[tableSize];
335 for (
int i=1; i<tableSize; ++i ) {
336 double r = ((double) tableSize) / ( (double) i + 0.5 );
337 r2list[i] = r*r + r2_delta;
342 float* t =
new float[tableSize * 4];
343 float* et =
new float[tableSize * 4];
357 #if !defined(USE_TABLE_ARRAYS) 358 bindTextureObject<float>(tableSize, 4, t, forceArray, forceTableTex, (
float **)&forceTable);
359 bindTextureObject<float>(tableSize, 4, et, energyArray, energyTableTex, (
float **)&energyTable);
361 copyTable<float4>(tableSize, (float4*)t, forceTable);
362 copyTable<float4>(tableSize, (float4*)et, energyTable);
370 float* t =
new float[tableSize * 4];
371 float* et =
new float[tableSize * 4];
385 #ifndef USE_TABLE_ARRAYS 386 bindTextureObject<float>(tableSize, 4, t, modifiedExclusionForceArray, modifiedExclusionForceTableTex, (
float **)&modifiedExclusionForceTable);
387 bindTextureObject<float>(tableSize, 4, et, modifiedExclusionEnergyArray, modifiedExclusionEnergyTableTex, (
float **)&modifiedExclusionEnergyTable);
389 copyTable<float4>(tableSize, (float4*)t, modifiedExclusionForceTable);
390 copyTable<float4>(tableSize, (float4*)et, modifiedExclusionEnergyTable);
406 h_exclusionTable[i].x = 6.0*corr_full_table[4*i + 3];
407 h_exclusionTable[i].y = 4.0*corr_full_table[4*i + 2];
408 h_exclusionTable[i].z = 2.0*corr_full_table[4*i + 1];
409 h_exclusionTable[i].w = 1.0*corr_full_table[4*i + 0];
416 delete [] h_exclusionTable;
417 delete [] h_r2_table;
421 if ( ! CmiPhysicalNodeID(CkMyPe()) ) {
422 CkPrintf(
"Info: Updated CUDA force table with %d elements.\n", tableSize);
static BigReal * fast_table
std::ostream & iINFO(std::ostream &s)
static BigReal * scor_table
void allocate_device_T(void **pp, const size_t len, const size_t sizeofT)
SimParameters * simParameters
void deallocate_device(T **pp)
std::ostream & endi(std::ostream &s)
static BigReal * vdwa_table
std::string atom_type_name_str(Index a) const
void bindTextureObject(int size, T *h_table, T *&d_table, cudaTextureObject_t &tex, bool update=false)
static BigReal * full_table
static BigReal * r2_table
void buildForceAndEnergyTable(const int tableSize, const double *r2list, const BigReal *src_table, const bool flip, const BigReal prefac, const int dst_stride, T *dst_force, T *dst_energy)
void NAMD_bug(const char *err_msg)
int get_table_dim() const
#define FORCE_ENERGY_TABLE_SIZE
const TableEntry * table_val(unsigned int i, unsigned int j) const
void NAMD_die(const char *err_msg)
NbtholePairValue * nbthole_array
static BigReal * slow_table
void copy_HtoD_T(const void *h_array, void *d_array, size_t array_len, const size_t sizeofT)
static BigReal * vdwb_table
static const LJTable * ljTable
static BigReal * corr_table
void copyTable(int size, T *h_table, T *&d_table, bool update=false)
CudaNonbondedTables(const int deviceID)