8 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 13 vdwCoefTableWidth = 0;
14 #if !defined(USE_TABLE_ARRAYS) 21 exclusionTable = NULL;
24 #if !defined(USE_TABLE_ARRAYS) 25 modifiedExclusionForceTableTex = 0;
26 modifiedExclusionEnergyTableTex = 0;
28 modifiedExclusionForceTable = NULL;
29 modifiedExclusionEnergyTable = NULL;
38 if (vdwCoefTable != NULL) deallocate_device<float2>(&vdwCoefTable);
39 if (exclusionTable != NULL) deallocate_device<float4>(&exclusionTable);
40 if (r2_table != NULL) deallocate_device<float>(&r2_table);
42 #if !defined(USE_TABLE_ARRAYS) 45 cudaCheck(cudaFreeArray(modifiedExclusionForceArray));
46 cudaCheck(cudaFreeArray(modifiedExclusionEnergyArray));
48 cudaCheck(cudaDestroyTextureObject(forceTableTex));
49 cudaCheck(cudaDestroyTextureObject(energyTableTex));
50 cudaCheck(cudaDestroyTextureObject(modifiedExclusionForceTableTex));
51 cudaCheck(cudaDestroyTextureObject(modifiedExclusionEnergyTableTex));
54 if (forceTable != NULL) deallocate_device<float4>(&forceTable);
55 if (energyTable != NULL) deallocate_device<float4>(&energyTable);
56 if (modifiedExclusionForceTable != NULL) deallocate_device<float4>(&modifiedExclusionForceTable);
57 if (modifiedExclusionEnergyTable != NULL) deallocate_device<float4>(&modifiedExclusionEnergyTable);
62 void copyTable(
int size, T* h_table, T*& d_table,
bool update=
false) {
65 allocate_device<T>(&d_table, size);
67 copy_HtoD_sync<T>(h_table, d_table, size);
71 #ifndef USE_TABLE_ARRAYS 73 void bindTextureObject(
int size, T* h_table, T*& d_table, cudaTextureObject_t& tex,
bool update=
false) {
76 allocate_device<T>(&d_table, size);
81 copy_HtoD_sync<T>(h_table, d_table, size);
84 cudaResourceDesc resDesc;
85 memset(&resDesc, 0,
sizeof(resDesc));
86 resDesc.resType = cudaResourceTypeLinear;
87 resDesc.res.linear.devPtr = d_table;
88 resDesc.res.linear.desc.f = cudaChannelFormatKindFloat;
89 resDesc.res.linear.desc.x =
sizeof(float)*8;
90 if (
sizeof(T) >=
sizeof(float)*2) resDesc.res.linear.desc.y =
sizeof(float)*8;
91 if (
sizeof(T) >=
sizeof(float)*3) resDesc.res.linear.desc.z =
sizeof(float)*8;
92 if (
sizeof(T) >=
sizeof(float)*4) resDesc.res.linear.desc.w =
sizeof(float)*8;
93 resDesc.res.linear.sizeInBytes = size*
sizeof(T);
95 cudaTextureDesc texDesc;
96 memset(&texDesc, 0,
sizeof(texDesc));
97 texDesc.readMode = cudaReadModeElementType;
100 cudaCheck(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL));
110 void CudaNonbondedTables::buildVdwCoefTable(
bool update) {
117 if ( tsize < dim )
NAMD_bug(
"CudaNonbondedTables::buildVdwCoefTable bad tsize");
119 float2 *h_vdwCoefTable =
new float2[tsize*tsize];
120 float2 *h_exclusionVdwCoefTable =
new float2[tsize*tsize];
121 float2 *row = h_vdwCoefTable;
122 float2 *exclusionRow = h_exclusionVdwCoefTable;
123 for (
int i=0; i<dim; ++i, row += tsize, exclusionRow += tsize ) {
124 for (
int j=0; j<dim; ++j ) {
133 vdwCoefTableWidth = tsize;
134 #if !defined(USE_TABLE_ARRAYS) 135 bindTextureObject<float2>(tsize*tsize, h_vdwCoefTable, vdwCoefTable, vdwCoefTableTex, update);
136 bindTextureObject<float2>(tsize*tsize, h_exclusionVdwCoefTable, exclusionVdwCoefTable, exclusionVdwCoefTableTex, update);
138 copyTable<float2>(tsize*tsize, h_vdwCoefTable, vdwCoefTable, update);
139 copyTable<float2>(tsize*tsize, h_exclusionVdwCoefTable, exclusionVdwCoefTable, update);
142 delete[] h_vdwCoefTable;
143 delete [] h_exclusionVdwCoefTable;
145 if ( ! CmiPhysicalNodeID(CkMyPe()) ) {
146 CkPrintf(
"Info: Updated CUDA LJ table with %d x %d elements.\n", dim, dim);
152 buildVdwCoefTable(
true);
158 template <
typename T>
160 const BigReal prefac,
const int dst_stride, T* dst_force, T* dst_energy) {
164 const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
166 union {
double f;
int32 i[2]; } byte_order_test;
167 byte_order_test.f = 1.0;
168 int32 *r2iilist = (
int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
170 for (
int i=1; i<tableSize; ++i ) {
171 double r = ((double) tableSize) / ( (double) i + 0.5 );
172 int table_i = (r2iilist[2*i] >> 14) + r2_delta_expc;
175 dst_force[i*dst_stride] = 0.;
176 dst_energy[i*dst_stride] = 0.;
182 BigReal table_a, table_b, table_c, table_d;
184 table_a = src_table[4*table_i+3];
185 table_b = src_table[4*table_i+2];
186 table_c = src_table[4*table_i+1];
187 table_d = src_table[4*table_i];
189 table_a = src_table[4*table_i];
190 table_b = src_table[4*table_i+1];
191 table_c = src_table[4*table_i+2];
192 table_d = src_table[4*table_i+3];
195 BigReal grad = ( 3. * table_d * diffa + 2. * table_c ) * diffa + table_b;
196 dst_force[i*dst_stride] = prefac * 2. * grad;
197 BigReal ener = table_a + diffa * ( ( table_d * diffa + table_c ) * diffa + table_b);
198 dst_energy[i*dst_stride] = prefac * ener;
202 dst_energy[0] = dst_energy[1*dst_stride];
205 #if !defined(USE_TABLE_ARRAYS) 206 template <
typename T>
208 cudaArray_t& array, cudaTextureObject_t& tableTex, T** d_table) {
210 #if defined(NAMD_CUDA) 212 copy_HtoD_T(h_table, *d_table, tableSize,
sizeof(T)*tableWidth);
214 cudaChannelFormatDesc desc;
215 memset(&desc, 0,
sizeof(desc));
216 desc.x =
sizeof(T)*8;
217 if (tableWidth >= 2) desc.y =
sizeof(T)*8;
218 if (tableWidth >= 3) desc.z =
sizeof(T)*8;
219 if (tableWidth >= 4) desc.w =
sizeof(T)*8;
220 desc.f = cudaChannelFormatKindFloat;
221 cudaCheck(cudaMallocArray(&array, &desc, tableSize, 0));
222 cudaCheck(cudaMemcpyToArray(array, 0, 0, h_table, tableSize*
sizeof(T)*tableWidth, cudaMemcpyHostToDevice));
224 cudaResourceDesc resDesc;
225 memset(&resDesc, 0,
sizeof(resDesc));
226 resDesc.resType = cudaResourceTypeArray;
227 resDesc.res.array.array = array;
229 cudaTextureDesc texDesc;
230 memset(&texDesc, 0,
sizeof(texDesc));
231 texDesc.addressMode[0] = cudaAddressModeClamp;
232 texDesc.filterMode = cudaFilterModeLinear;
233 texDesc.normalizedCoords = 1;
235 cudaCheck(cudaCreateTextureObject(&tableTex, &resDesc, &texDesc, NULL));
240 copy_HtoD_T(h_table, *d_table, tableSize,
sizeof(T)*tableWidth);
241 copy_HtoD_T(h_table + tableWidth * (tableSize - 1), *d_table + tableWidth * tableSize, 1,
sizeof(T)*tableWidth);
243 cudaResourceDesc resDesc;
244 memset(&resDesc, 0,
sizeof(resDesc));
245 resDesc.resType = cudaResourceTypeLinear;
246 resDesc.res.linear.devPtr = *d_table;
247 resDesc.res.linear.desc.f = cudaChannelFormatKindFloat;
248 resDesc.res.linear.desc.x =
sizeof(T)*8;
249 if (tableWidth >= 2) resDesc.res.linear.desc.y =
sizeof(T)*8;
250 if (tableWidth >= 3) resDesc.res.linear.desc.z =
sizeof(T)*8;
251 if (tableWidth >= 4) resDesc.res.linear.desc.w =
sizeof(T)*8;
252 resDesc.res.linear.sizeInBytes = (tableSize + 1)*
sizeof(T)*tableWidth;
254 cudaTextureDesc texDesc;
255 memset(&texDesc, 0,
sizeof(texDesc));
256 texDesc.readMode = cudaReadModeElementType;
258 cudaCheck(cudaCreateTextureObject(&tableTex, &resDesc, &texDesc, NULL));
263 void CudaNonbondedTables::buildForceAndEnergyTables(
int tableSize) {
264 forceAndEnergyTableSize = tableSize;
269 const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
271 double* r2list =
new double[tableSize];
272 for (
int i=1; i<tableSize; ++i ) {
273 double r = ((double) tableSize) / ( (double) i + 0.5 );
274 r2list[i] = r*r + r2_delta;
279 float* t =
new float[tableSize * 4];
280 float* et =
new float[tableSize * 4];
294 #if !defined(USE_TABLE_ARRAYS) 295 bindTextureObject<float>(tableSize, 4, t, forceArray, forceTableTex, (
float **)&forceTable);
296 bindTextureObject<float>(tableSize, 4, et, energyArray, energyTableTex, (
float **)&energyTable);
298 copyTable<float4>(tableSize, (float4*)t, forceTable);
299 copyTable<float4>(tableSize, (float4*)et, energyTable);
307 float* t =
new float[tableSize * 4];
308 float* et =
new float[tableSize * 4];
322 #ifndef USE_TABLE_ARRAYS 323 bindTextureObject<float>(tableSize, 4, t, modifiedExclusionForceArray, modifiedExclusionForceTableTex, (
float **)&modifiedExclusionForceTable);
324 bindTextureObject<float>(tableSize, 4, et, modifiedExclusionEnergyArray, modifiedExclusionEnergyTableTex, (
float **)&modifiedExclusionEnergyTable);
326 copyTable<float4>(tableSize, (float4*)t, modifiedExclusionForceTable);
327 copyTable<float4>(tableSize, (float4*)et, modifiedExclusionEnergyTable);
343 h_exclusionTable[i].x = 6.0*corr_full_table[4*i + 3];
344 h_exclusionTable[i].y = 4.0*corr_full_table[4*i + 2];
345 h_exclusionTable[i].z = 2.0*corr_full_table[4*i + 1];
346 h_exclusionTable[i].w = 1.0*corr_full_table[4*i + 0];
353 delete [] h_exclusionTable;
354 delete [] h_r2_table;
358 if ( ! CmiPhysicalNodeID(CkMyPe()) ) {
359 CkPrintf(
"Info: Updated CUDA force table with %d elements.\n", tableSize);
static BigReal * fast_table
static BigReal * scor_table
void allocate_device_T(void **pp, const size_t len, const size_t sizeofT)
static BigReal * vdwa_table
void bindTextureObject(int size, T *h_table, T *&d_table, cudaTextureObject_t &tex, bool update=false)
static BigReal * full_table
static BigReal * r2_table
void buildForceAndEnergyTable(const int tableSize, const double *r2list, const BigReal *src_table, const bool flip, const BigReal prefac, const int dst_stride, T *dst_force, T *dst_energy)
void NAMD_bug(const char *err_msg)
int get_table_dim() const
#define FORCE_ENERGY_TABLE_SIZE
const TableEntry * table_val(unsigned int i, unsigned int j) const
static BigReal * slow_table
void copy_HtoD_T(const void *h_array, void *d_array, size_t array_len, const size_t sizeofT)
static BigReal * vdwb_table
static const LJTable * ljTable
static BigReal * corr_table
void copyTable(int size, T *h_table, T *&d_table, bool update=false)
CudaNonbondedTables(const int deviceID)