CudaNonbondedTables.C File Reference

#include "charm++.h"
#include "NamdTypes.h"
#include "ComputeNonbondedUtil.h"
#include "LJTable.h"
#include "CudaUtils.h"
#include "CudaNonbondedTables.h"

Go to the source code of this file.

Functions

template<typename T>
void bindTextureObject (int size, T *h_table, T *&d_table, cudaTextureObject_t &tex)
template<typename T>
void buildForceAndEnergyTable (const int tableSize, const double *r2list, const BigReal *src_table, const bool flip, const BigReal prefac, const int dst_stride, T *dst_force, T *dst_energy)
template<typename T>
void bindTextureObject (int tableSize, int tableWidth, T *h_table, cudaArray_t &array, cudaTextureObject_t &tableTex)


Function Documentation

template<typename T>
void bindTextureObject ( int  tableSize,
int  tableWidth,
T *  h_table,
cudaArray_t &  array,
cudaTextureObject_t &  tableTex 
)

Definition at line 165 of file CudaNonbondedTables.C.

References cudaCheck.

00166                                                      {
00167 
00168   cudaChannelFormatDesc desc;
00169   memset(&desc, 0, sizeof(desc));
00170   desc.x = sizeof(T)*8;
00171   if (tableWidth >= 2) desc.y = sizeof(T)*8;
00172   if (tableWidth >= 3) desc.z = sizeof(T)*8;
00173   if (tableWidth >= 4) desc.w = sizeof(T)*8;
00174   desc.f = cudaChannelFormatKindFloat;
00175   cudaCheck(cudaMallocArray(&array, &desc, tableSize, 1));
00176   cudaCheck(cudaMemcpyToArray(array, 0, 0, h_table, tableSize*sizeof(T)*tableWidth, cudaMemcpyHostToDevice));
00177 
00178   cudaResourceDesc resDesc;
00179   memset(&resDesc, 0, sizeof(resDesc));
00180   resDesc.resType = cudaResourceTypeArray;
00181   resDesc.res.array.array = array;
00182 
00183   cudaTextureDesc texDesc;
00184   memset(&texDesc, 0, sizeof(texDesc));
00185   texDesc.addressMode[0] = cudaAddressModeClamp;
00186   texDesc.filterMode = cudaFilterModeLinear;
00187   texDesc.normalizedCoords = 1;
00188 
00189   cudaCheck(cudaCreateTextureObject(&tableTex, &resDesc, &texDesc, NULL));
00190 }

template<typename T>
void bindTextureObject ( int  size,
T *  h_table,
T *&  d_table,
cudaTextureObject_t &  tex 
)

Definition at line 50 of file CudaNonbondedTables.C.

References cudaCheck.

00050                                                                                     {
00051   // Copy to device
00052   allocate_device<T>(&d_table, size);
00053   copy_HtoD_sync<T>(h_table, d_table, size);
00054 
00055   // Create texture object
00056   cudaResourceDesc resDesc;
00057   memset(&resDesc, 0, sizeof(resDesc));
00058   resDesc.resType = cudaResourceTypeLinear;
00059   resDesc.res.linear.devPtr = d_table;
00060   resDesc.res.linear.desc.f = cudaChannelFormatKindFloat;
00061   resDesc.res.linear.desc.x = sizeof(float)*8; // bits per channel
00062   if (sizeof(T) >= sizeof(float)*2) resDesc.res.linear.desc.y = sizeof(float)*8; // bits per channel
00063   if (sizeof(T) >= sizeof(float)*3) resDesc.res.linear.desc.z = sizeof(float)*8; // bits per channel
00064   if (sizeof(T) >= sizeof(float)*4) resDesc.res.linear.desc.w = sizeof(float)*8; // bits per channel
00065   resDesc.res.linear.sizeInBytes = size*sizeof(T);
00066 
00067   cudaTextureDesc texDesc;
00068   memset(&texDesc, 0, sizeof(texDesc));
00069   texDesc.readMode = cudaReadModeElementType;
00070 
00071   cudaCheck(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL));
00072 }

template<typename T>
void buildForceAndEnergyTable ( const int  tableSize,
const double *  r2list,
const BigReal src_table,
const bool  flip,
const BigReal  prefac,
const int  dst_stride,
T *  dst_force,
T *  dst_energy 
)

Definition at line 118 of file CudaNonbondedTables.C.

References ComputeNonbondedUtil::cutoff, diffa, f, ComputeNonbondedUtil::r2_delta, ComputeNonbondedUtil::r2_delta_exp, ComputeNonbondedUtil::r2_table, and table_i.

00119                                                                            {
00120 
00121   const BigReal r2_delta = ComputeNonbondedUtil:: r2_delta;
00122   const int r2_delta_exp = ComputeNonbondedUtil:: r2_delta_exp;
00123   const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
00124 
00125   union { double f; int32 i[2]; } byte_order_test;
00126   byte_order_test.f = 1.0;  // should occupy high-order bits only
00127   int32 *r2iilist = (int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
00128 
00129   for ( int i=1; i<tableSize; ++i ) {
00130     double r = ((double) tableSize) / ( (double) i + 0.5 );
00131     int table_i = (r2iilist[2*i] >> 14) + r2_delta_expc;  // table_i >= 0
00132 
00133     if ( r > ComputeNonbondedUtil::cutoff ) {
00134       dst_force[i*dst_stride] = 0.;
00135       dst_energy[i*dst_stride] = 0.;
00136       continue;
00137     }
00138 
00139     BigReal diffa = r2list[i] - ComputeNonbondedUtil::r2_table[table_i];
00140 
00141     BigReal table_a, table_b, table_c, table_d;
00142     if (flip) {
00143       table_a = src_table[4*table_i+3];
00144       table_b = src_table[4*table_i+2];
00145       table_c = src_table[4*table_i+1];
00146       table_d = src_table[4*table_i];
00147     } else {
00148       table_a = src_table[4*table_i];
00149       table_b = src_table[4*table_i+1];
00150       table_c = src_table[4*table_i+2];
00151       table_d = src_table[4*table_i+3];
00152     }
00153 
00154     BigReal grad = ( 3. * table_d * diffa + 2. * table_c ) * diffa + table_b;
00155     dst_force[i*dst_stride] = prefac * 2. * grad;
00156     BigReal ener = table_a + diffa * ( ( table_d * diffa + table_c ) * diffa + table_b);
00157     dst_energy[i*dst_stride] = prefac * ener;
00158   }
00159 
00160   dst_force[0] = 0.;
00161   dst_energy[0] = dst_energy[1*dst_stride];
00162 }


Generated on Tue Nov 21 01:17:16 2017 for NAMD by  doxygen 1.4.7