CudaNonbondedTables.C

Go to the documentation of this file.
00001 #include "charm++.h"
00002 #include "NamdTypes.h"
00003 #include "ComputeNonbondedUtil.h"
00004 #include "LJTable.h"
00005 #include "CudaUtils.h"
00006 #include "CudaNonbondedTables.h"
00007 
00008 #ifdef NAMD_CUDA
00009 
00010 CudaNonbondedTables::CudaNonbondedTables(const int deviceID) : deviceID(deviceID) {
00011 
00012   vdwCoefTable = NULL;
00013   vdwCoefTableWidth = 0;
00014   vdwCoefTableTex = 0;  
00015 
00016   forceTableTex = 0;
00017   energyTableTex = 0;
00018 
00019   exclusionTable = NULL;
00020   r2_table = NULL;
00021   exclusionTableTex = 0;
00022   r2_table_tex = 0;
00023 
00024   modifiedExclusionForceTableTex = 0;
00025   modifiedExclusionEnergyTableTex = 0;
00026 
00027   cudaCheck(cudaSetDevice(deviceID));
00028   buildForceAndEnergyTables(4096);
00029   buildVdwCoefTable();
00030 }
00031 
00032 CudaNonbondedTables::~CudaNonbondedTables() {
00033   cudaCheck(cudaSetDevice(deviceID));
00034   if (vdwCoefTable != NULL) deallocate_device<float2>(&vdwCoefTable);
00035   if (exclusionTable != NULL) deallocate_device<float4>(&exclusionTable);
00036   if (r2_table != NULL) deallocate_device<float>(&r2_table);
00037 
00038   cudaCheck(cudaFreeArray(forceArray));
00039   cudaCheck(cudaFreeArray(energyArray));
00040 
00041   cudaCheck(cudaDestroyTextureObject(vdwCoefTableTex));
00042   cudaCheck(cudaDestroyTextureObject(forceTableTex));
00043   cudaCheck(cudaDestroyTextureObject(energyTableTex));
00044 
00045   cudaCheck(cudaDestroyTextureObject(exclusionTableTex));
00046   cudaCheck(cudaDestroyTextureObject(r2_table_tex));
00047 }
00048 
00049 template <typename T>
00050 void bindTextureObject(int size, T* h_table, T*& d_table, cudaTextureObject_t& tex) {
00051   // Copy to device
00052   allocate_device<T>(&d_table, size);
00053   copy_HtoD_sync<T>(h_table, d_table, size);
00054 
00055   // Create texture object
00056   cudaResourceDesc resDesc;
00057   memset(&resDesc, 0, sizeof(resDesc));
00058   resDesc.resType = cudaResourceTypeLinear;
00059   resDesc.res.linear.devPtr = d_table;
00060   resDesc.res.linear.desc.f = cudaChannelFormatKindFloat;
00061   resDesc.res.linear.desc.x = sizeof(float)*8; // bits per channel
00062   if (sizeof(T) >= sizeof(float)*2) resDesc.res.linear.desc.y = sizeof(float)*8; // bits per channel
00063   if (sizeof(T) >= sizeof(float)*3) resDesc.res.linear.desc.z = sizeof(float)*8; // bits per channel
00064   if (sizeof(T) >= sizeof(float)*4) resDesc.res.linear.desc.w = sizeof(float)*8; // bits per channel
00065   resDesc.res.linear.sizeInBytes = size*sizeof(T);
00066 
00067   cudaTextureDesc texDesc;
00068   memset(&texDesc, 0, sizeof(texDesc));
00069   texDesc.readMode = cudaReadModeElementType;
00070 
00071   cudaCheck(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL));
00072 }
00073 
00074 //
00075 // Builds the VdW Lennard-Jones coefficient table. Swiped from ComputeNonbondedCUDA.C
00076 // NOTE: Can only be called once
00077 //
00078 void CudaNonbondedTables::buildVdwCoefTable() {
00079 
00080   const LJTable* const ljTable = ComputeNonbondedUtil:: ljTable;
00081   const int dim = ljTable->get_table_dim();
00082 
00083   // round dim up to odd multiple of 16
00084   int tsize = (((dim+16+31)/32)*32)-16;
00085   if ( tsize < dim ) NAMD_bug("CudaNonbondedTables::buildVdwCoefTable bad tsize");
00086 
00087   float2 *h_vdwCoefTable = new float2[tsize*tsize];
00088   float2 *h_exclusionVdwCoefTable = new float2[tsize*tsize];
00089   float2 *row = h_vdwCoefTable;
00090   float2 *exclusionRow = h_exclusionVdwCoefTable;
00091   for ( int i=0; i<dim; ++i, row += tsize, exclusionRow += tsize ) {
00092     for ( int j=0; j<dim; ++j ) {
00093       const LJTable::TableEntry *e = ljTable->table_val(i,j);
00094       row[j].x = e->A * ComputeNonbondedUtil::scaling;
00095       row[j].y = e->B * ComputeNonbondedUtil::scaling;
00096       exclusionRow[j].x = ((e+1)->A - e->A)* ComputeNonbondedUtil::scaling;
00097       exclusionRow[j].y = ((e+1)->B - e->B)* ComputeNonbondedUtil::scaling;
00098     }
00099   }
00100 
00101   vdwCoefTableWidth = tsize;
00102 
00103   bindTextureObject<float2>(tsize*tsize, h_vdwCoefTable, vdwCoefTable, vdwCoefTableTex);
00104   bindTextureObject<float2>(tsize*tsize, h_exclusionVdwCoefTable, exclusionVdwCoefTable, exclusionVdwCoefTableTex);
00105 
00106   delete [] h_vdwCoefTable;
00107   delete [] h_exclusionVdwCoefTable;
00108 
00109   if ( ! CmiPhysicalNodeID(CkMyPe()) ) {
00110     CkPrintf("Info: Updated CUDA LJ table with %d x %d elements.\n", dim, dim);
00111   }
00112 }
00113 
00114 //
00115 // Builds force and energy tables. Swiped from ComputeNonbondedCUDA.C
00116 //
00117 template <typename T>
00118 void buildForceAndEnergyTable(const int tableSize, const double* r2list, const BigReal* src_table, const bool flip,
00119   const BigReal prefac, const int dst_stride, T* dst_force, T* dst_energy) {
00120 
00121   const BigReal r2_delta = ComputeNonbondedUtil:: r2_delta;
00122   const int r2_delta_exp = ComputeNonbondedUtil:: r2_delta_exp;
00123   const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
00124 
00125   union { double f; int32 i[2]; } byte_order_test;
00126   byte_order_test.f = 1.0;  // should occupy high-order bits only
00127   int32 *r2iilist = (int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
00128 
00129   for ( int i=1; i<tableSize; ++i ) {
00130     double r = ((double) tableSize) / ( (double) i + 0.5 );
00131     int table_i = (r2iilist[2*i] >> 14) + r2_delta_expc;  // table_i >= 0
00132 
00133     if ( r > ComputeNonbondedUtil::cutoff ) {
00134       dst_force[i*dst_stride] = 0.;
00135       dst_energy[i*dst_stride] = 0.;
00136       continue;
00137     }
00138 
00139     BigReal diffa = r2list[i] - ComputeNonbondedUtil::r2_table[table_i];
00140 
00141     BigReal table_a, table_b, table_c, table_d;
00142     if (flip) {
00143       table_a = src_table[4*table_i+3];
00144       table_b = src_table[4*table_i+2];
00145       table_c = src_table[4*table_i+1];
00146       table_d = src_table[4*table_i];
00147     } else {
00148       table_a = src_table[4*table_i];
00149       table_b = src_table[4*table_i+1];
00150       table_c = src_table[4*table_i+2];
00151       table_d = src_table[4*table_i+3];
00152     }
00153 
00154     BigReal grad = ( 3. * table_d * diffa + 2. * table_c ) * diffa + table_b;
00155     dst_force[i*dst_stride] = prefac * 2. * grad;
00156     BigReal ener = table_a + diffa * ( ( table_d * diffa + table_c ) * diffa + table_b);
00157     dst_energy[i*dst_stride] = prefac * ener;
00158   }
00159 
00160   dst_force[0] = 0.;
00161   dst_energy[0] = dst_energy[1*dst_stride];
00162 }
00163 
00164 template <typename T>
00165 void bindTextureObject(int tableSize, int tableWidth, T* h_table,
00166   cudaArray_t& array, cudaTextureObject_t& tableTex) {
00167 
00168   cudaChannelFormatDesc desc;
00169   memset(&desc, 0, sizeof(desc));
00170   desc.x = sizeof(T)*8;
00171   if (tableWidth >= 2) desc.y = sizeof(T)*8;
00172   if (tableWidth >= 3) desc.z = sizeof(T)*8;
00173   if (tableWidth >= 4) desc.w = sizeof(T)*8;
00174   desc.f = cudaChannelFormatKindFloat;
00175   cudaCheck(cudaMallocArray(&array, &desc, tableSize, 1));
00176   cudaCheck(cudaMemcpyToArray(array, 0, 0, h_table, tableSize*sizeof(T)*tableWidth, cudaMemcpyHostToDevice));
00177 
00178   cudaResourceDesc resDesc;
00179   memset(&resDesc, 0, sizeof(resDesc));
00180   resDesc.resType = cudaResourceTypeArray;
00181   resDesc.res.array.array = array;
00182 
00183   cudaTextureDesc texDesc;
00184   memset(&texDesc, 0, sizeof(texDesc));
00185   texDesc.addressMode[0] = cudaAddressModeClamp;
00186   texDesc.filterMode = cudaFilterModeLinear;
00187   texDesc.normalizedCoords = 1;
00188 
00189   cudaCheck(cudaCreateTextureObject(&tableTex, &resDesc, &texDesc, NULL));
00190 }
00191 
00192 void CudaNonbondedTables::buildForceAndEnergyTables(int tableSize) {
00193 
00194   // Build r2list
00195   const BigReal r2_delta = ComputeNonbondedUtil:: r2_delta;
00196   const int r2_delta_exp = ComputeNonbondedUtil:: r2_delta_exp;
00197   const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
00198 
00199   double* r2list = new double[tableSize];  // double to match cpu code
00200   for ( int i=1; i<tableSize; ++i ) {
00201     double r = ((double) tableSize) / ( (double) i + 0.5 );
00202     r2list[i] = r*r + r2_delta;
00203   }
00204 
00205   // Non-bonded
00206   {
00207     float4* t = new float4[tableSize];
00208     float4* et = new float4[tableSize];
00209 
00210     buildForceAndEnergyTable<float>(tableSize, r2list, ComputeNonbondedUtil::fast_table, false, 1.0,
00211       4, &t[0].x, &et[0].x);
00212 
00213     buildForceAndEnergyTable<float>(tableSize, r2list, ComputeNonbondedUtil::vdwb_table, false, -1.0,
00214       4, &t[0].y, &et[0].y);
00215 
00216     buildForceAndEnergyTable<float>(tableSize, r2list, ComputeNonbondedUtil::vdwa_table, false, 1.0,
00217       4, &t[0].z, &et[0].z);
00218 
00219     buildForceAndEnergyTable<float>(tableSize, r2list, ComputeNonbondedUtil::scor_table, false, 1.0,
00220       4, &t[0].w, &et[0].w);
00221 
00222     bindTextureObject<float>(tableSize, 4, (float *)t, forceArray, forceTableTex);
00223     bindTextureObject<float>(tableSize, 4, (float *)et, energyArray, energyTableTex);
00224     delete [] t;
00225     delete [] et;
00226   }
00227 
00228   // Modified exclusions
00229   {
00230     float4* t = new float4[tableSize];
00231     float4* et = new float4[tableSize];
00232 
00233     buildForceAndEnergyTable<float>(tableSize, r2list, ComputeNonbondedUtil::fast_table, false, -1.0,
00234       4, &t[0].x, &et[0].x);
00235 
00236     buildForceAndEnergyTable<float>(tableSize, r2list, ComputeNonbondedUtil::vdwb_table, false, -1.0,
00237       4, &t[0].y, &et[0].y);
00238 
00239     buildForceAndEnergyTable<float>(tableSize, r2list, ComputeNonbondedUtil::vdwa_table, false, 1.0,
00240       4, &t[0].z, &et[0].z);
00241 
00242     buildForceAndEnergyTable<float>(tableSize, r2list, ComputeNonbondedUtil::slow_table, true, -1.0,
00243       4, &t[0].w, &et[0].w);
00244     // delete [] flip_slow_table;
00245 
00246     bindTextureObject<float>(tableSize, 4, (float *)t, modifiedExclusionForceArray, modifiedExclusionForceTableTex);
00247     bindTextureObject<float>(tableSize, 4, (float *)et, modifiedExclusionEnergyArray, modifiedExclusionEnergyTableTex);
00248     delete [] t;
00249     delete [] et;
00250   }
00251 
00252   // Exclusions
00253   {
00254     BigReal* corr_full_table = new BigReal[ComputeNonbondedUtil::table_length*4];
00255     for (int i=0;i < ComputeNonbondedUtil::table_length*4;i++) {
00256       corr_full_table[i] =  ComputeNonbondedUtil::corr_table[i] - ComputeNonbondedUtil::full_table[i];
00257     }
00258 
00259     float4* h_exclusionTable = new float4[ComputeNonbondedUtil::table_length];
00260     float* h_r2_table = new float[ComputeNonbondedUtil::table_length];
00261     for (int i=0;i < ComputeNonbondedUtil::table_length;i++) {
00262       h_exclusionTable[i].x = 6.0*corr_full_table[4*i + 3];
00263       h_exclusionTable[i].y = 4.0*corr_full_table[4*i + 2];
00264       h_exclusionTable[i].z = 2.0*corr_full_table[4*i + 1];
00265       h_exclusionTable[i].w = 1.0*corr_full_table[4*i + 0];
00266       h_r2_table[i] = ComputeNonbondedUtil::r2_table[i];
00267     }
00268 
00269     bindTextureObject<float>(ComputeNonbondedUtil::table_length, h_r2_table, r2_table, r2_table_tex);
00270     bindTextureObject<float4>(ComputeNonbondedUtil::table_length, h_exclusionTable, exclusionTable, exclusionTableTex);
00271 
00272     delete [] h_exclusionTable;
00273     delete [] h_r2_table;
00274 
00275   }
00276 
00277   if ( ! CmiPhysicalNodeID(CkMyPe()) ) {
00278     CkPrintf("Info: Updated CUDA force table with %d elements.\n", tableSize);
00279   }
00280 
00281   delete [] r2list;
00282 }
00283 
00284 #endif // NAMD_CUDA

Generated on Thu Sep 21 01:17:12 2017 for NAMD by  doxygen 1.4.7