NAMD
Functions
CudaNonbondedTables.C File Reference
#include "Molecule.h"
#include "ComputeNonbondedUtil.h"
#include "LJTable.h"
#include "CudaUtils.h"
#include "CudaNonbondedTables.h"
#include "Node.h"
#include "Parameters.h"
#include "SimParameters.h"
#include "InfoStream.h"

Go to the source code of this file.

Functions

template<typename T >
void copyTable (int size, T *h_table, T *&d_table, bool update=false)
 
template<typename T >
void bindTextureObject (int size, T *h_table, T *&d_table, cudaTextureObject_t &tex, bool update=false)
 
template<typename T >
void buildForceAndEnergyTable (const int tableSize, const double *r2list, const BigReal *src_table, const bool flip, const BigReal prefac, const int dst_stride, T *dst_force, T *dst_energy)
 
template<typename T >
void bindTextureObject (int tableSize, int tableWidth, T *h_table, cudaArray_t &array, cudaTextureObject_t &tableTex, T **d_table)
 

Function Documentation

◆ bindTextureObject() [1/2]

template<typename T >
void bindTextureObject ( int  size,
T *  h_table,
T *&  d_table,
cudaTextureObject_t &  tex,
bool  update = false 
)

Definition at line 83 of file CudaNonbondedTables.C.

References cudaCheck.

83  {
84  // Copy to device
85  if ( ! update) {
86  allocate_device<T>(&d_table, size);
87  }
88  else {
89  cudaCheck(cudaDestroyTextureObject(tex));
90  }
91  copy_HtoD_sync<T>(h_table, d_table, size);
92 
93  // Create texture object
94  cudaResourceDesc resDesc;
95  memset(&resDesc, 0, sizeof(resDesc));
96  resDesc.resType = cudaResourceTypeLinear;
97  resDesc.res.linear.devPtr = d_table;
98  resDesc.res.linear.desc.f = cudaChannelFormatKindFloat;
99  resDesc.res.linear.desc.x = sizeof(float)*8; // bits per channel
100  if (sizeof(T) >= sizeof(float)*2) resDesc.res.linear.desc.y = sizeof(float)*8; // bits per channel
101  if (sizeof(T) >= sizeof(float)*3) resDesc.res.linear.desc.z = sizeof(float)*8; // bits per channel
102  if (sizeof(T) >= sizeof(float)*4) resDesc.res.linear.desc.w = sizeof(float)*8; // bits per channel
103  resDesc.res.linear.sizeInBytes = size*sizeof(T);
104 
105  cudaTextureDesc texDesc;
106  memset(&texDesc, 0, sizeof(texDesc));
107  texDesc.readMode = cudaReadModeElementType;
108  //texDesc.normalizedCoords = 0;
109 
110  cudaCheck(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL));
111 
112 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:233

◆ bindTextureObject() [2/2]

template<typename T >
void bindTextureObject ( int  tableSize,
int  tableWidth,
T *  h_table,
cudaArray_t &  array,
cudaTextureObject_t &  tableTex,
T **  d_table 
)

Definition at line 270 of file CudaNonbondedTables.C.

References allocate_device_T(), copy_HtoD_T(), and cudaCheck.

271  {
272 
273 #if defined(NAMD_CUDA)
274  allocate_device_T((void **)d_table, tableSize, sizeof(T)*tableWidth);
275  copy_HtoD_T(h_table, *d_table, tableSize, sizeof(T)*tableWidth);
276 
277  cudaChannelFormatDesc desc;
278  memset(&desc, 0, sizeof(desc));
279  desc.x = sizeof(T)*8;
280  if (tableWidth >= 2) desc.y = sizeof(T)*8;
281  if (tableWidth >= 3) desc.z = sizeof(T)*8;
282  if (tableWidth >= 4) desc.w = sizeof(T)*8;
283  desc.f = cudaChannelFormatKindFloat;
284  cudaCheck(cudaMallocArray(&array, &desc, tableSize, 0));
285  cudaCheck(cudaMemcpyToArray(array, 0, 0, h_table, tableSize*sizeof(T)*tableWidth, cudaMemcpyHostToDevice));
286 
287  cudaResourceDesc resDesc;
288  memset(&resDesc, 0, sizeof(resDesc));
289  resDesc.resType = cudaResourceTypeArray;
290  resDesc.res.array.array = array;
291 
292  cudaTextureDesc texDesc;
293  memset(&texDesc, 0, sizeof(texDesc));
294  texDesc.addressMode[0] = cudaAddressModeClamp;
295  texDesc.filterMode = cudaFilterModeLinear;
296  texDesc.normalizedCoords = 1;
297 
298  cudaCheck(cudaCreateTextureObject(&tableTex, &resDesc, &texDesc, NULL));
299 #else
300  // tex1Dfetch is used in kernels, so create a linear texture
301  // The texture is 1 texel wider to simplify (and optimize) index clamping for tex1Dfetch
302  allocate_device_T((void **)d_table, tableSize + 1, sizeof(T)*tableWidth);
303  copy_HtoD_T(h_table, *d_table, tableSize, sizeof(T)*tableWidth);
304  copy_HtoD_T(h_table + tableWidth * (tableSize - 1), *d_table + tableWidth * tableSize, 1, sizeof(T)*tableWidth);
305 
306  cudaResourceDesc resDesc;
307  memset(&resDesc, 0, sizeof(resDesc));
308  resDesc.resType = cudaResourceTypeLinear;
309  resDesc.res.linear.devPtr = *d_table;
310  resDesc.res.linear.desc.f = cudaChannelFormatKindFloat;
311  resDesc.res.linear.desc.x = sizeof(T)*8;
312  if (tableWidth >= 2) resDesc.res.linear.desc.y = sizeof(T)*8;
313  if (tableWidth >= 3) resDesc.res.linear.desc.z = sizeof(T)*8;
314  if (tableWidth >= 4) resDesc.res.linear.desc.w = sizeof(T)*8;
315  resDesc.res.linear.sizeInBytes = (tableSize + 1)*sizeof(T)*tableWidth;
316 
317  cudaTextureDesc texDesc;
318  memset(&texDesc, 0, sizeof(texDesc));
319  texDesc.readMode = cudaReadModeElementType;
320 
321  cudaCheck(cudaCreateTextureObject(&tableTex, &resDesc, &texDesc, NULL));
322 #endif
323 }
void allocate_device_T(void **pp, const size_t len, const size_t sizeofT)
Definition: CudaUtils.C:97
void copy_HtoD_T(const void *h_array, void *d_array, size_t array_len, const size_t sizeofT)
Definition: CudaUtils.C:224
#define cudaCheck(stmt)
Definition: CudaUtils.h:233

◆ buildForceAndEnergyTable()

template<typename T >
void buildForceAndEnergyTable ( const int  tableSize,
const double *  r2list,
const BigReal src_table,
const bool  flip,
const BigReal  prefac,
const int  dst_stride,
T *  dst_force,
T *  dst_energy 
)

Definition at line 222 of file CudaNonbondedTables.C.

References ComputeNonbondedUtil::cutoff, ComputeNonbondedUtil::r2_delta, ComputeNonbondedUtil::r2_delta_exp, and ComputeNonbondedUtil::r2_table.

223  {
224 
225  const BigReal r2_delta = ComputeNonbondedUtil:: r2_delta;
226  const int r2_delta_exp = ComputeNonbondedUtil:: r2_delta_exp;
227  const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
228 
229  union { double f; int32 i[2]; } byte_order_test;
230  byte_order_test.f = 1.0; // should occupy high-order bits only
231  int32 *r2iilist = (int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
232 
233  for ( int i=1; i<tableSize; ++i ) {
234  double r = ((double) tableSize) / ( (double) i + 0.5 );
235  int table_i = (r2iilist[2*i] >> 14) + r2_delta_expc; // table_i >= 0
236 
237  if ( r > ComputeNonbondedUtil::cutoff ) {
238  dst_force[i*dst_stride] = 0.;
239  dst_energy[i*dst_stride] = 0.;
240  continue;
241  }
242 
243  BigReal diffa = r2list[i] - ComputeNonbondedUtil::r2_table[table_i];
244 
245  BigReal table_a, table_b, table_c, table_d;
246  if (flip) {
247  table_a = src_table[4*table_i+3];
248  table_b = src_table[4*table_i+2];
249  table_c = src_table[4*table_i+1];
250  table_d = src_table[4*table_i];
251  } else {
252  table_a = src_table[4*table_i];
253  table_b = src_table[4*table_i+1];
254  table_c = src_table[4*table_i+2];
255  table_d = src_table[4*table_i+3];
256  }
257 
258  BigReal grad = ( 3. * table_d * diffa + 2. * table_c ) * diffa + table_b;
259  dst_force[i*dst_stride] = prefac * 2. * grad;
260  BigReal ener = table_a + diffa * ( ( table_d * diffa + table_c ) * diffa + table_b);
261  dst_energy[i*dst_stride] = prefac * ener;
262  }
263 
264  dst_force[0] = 0.;
265  dst_energy[0] = dst_energy[1*dst_stride];
266 }
int32_t int32
Definition: common.h:38
double BigReal
Definition: common.h:123

◆ copyTable()

template<typename T >
void copyTable ( int  size,
T *  h_table,
T *&  d_table,
bool  update = false 
)

Definition at line 72 of file CudaNonbondedTables.C.

72  {
73  // Copy to device
74  if ( ! update) {
75  allocate_device<T>(&d_table, size);
76  }
77  copy_HtoD_sync<T>(h_table, d_table, size);
78 
79 }