NAMD
Classes | Public Types | Public Member Functions | Static Public Member Functions | List of all members
ComputeBondedCUDAKernel Class Reference

#include <ComputeBondedCUDAKernel.h>

Classes

struct  BondedVirial
 

Public Types

enum  {
  energyIndex_BOND =0, energyIndex_ANGLE, energyIndex_DIHEDRAL, energyIndex_IMPROPER,
  energyIndex_ELECT, energyIndex_LJ, energyIndex_ELECT_SLOW, energyIndex_CROSSTERM,
  normalVirialIndex_XX, normalVirialIndex_XY, normalVirialIndex_XZ, normalVirialIndex_YX,
  normalVirialIndex_YY, normalVirialIndex_YZ, normalVirialIndex_ZX, normalVirialIndex_ZY,
  normalVirialIndex_ZZ, nbondVirialIndex_XX, nbondVirialIndex_XY, nbondVirialIndex_XZ,
  nbondVirialIndex_YX, nbondVirialIndex_YY, nbondVirialIndex_YZ, nbondVirialIndex_ZX,
  nbondVirialIndex_ZY, nbondVirialIndex_ZZ, slowVirialIndex_XX, slowVirialIndex_XY,
  slowVirialIndex_XZ, slowVirialIndex_YX, slowVirialIndex_YY, slowVirialIndex_YZ,
  slowVirialIndex_ZX, slowVirialIndex_ZY, slowVirialIndex_ZZ, amdDiheVirialIndex_XX,
  amdDiheVirialIndex_XY, amdDiheVirialIndex_XZ, amdDiheVirialIndex_YX, amdDiheVirialIndex_YY,
  amdDiheVirialIndex_YZ, amdDiheVirialIndex_ZX, amdDiheVirialIndex_ZY, amdDiheVirialIndex_ZZ,
  energies_virials_SIZE
}
 

Public Member Functions

 ComputeBondedCUDAKernel (int deviceID, CudaNonbondedTables &cudaNonbondedTables)
 
 ~ComputeBondedCUDAKernel ()
 
void update (const int numBondsIn, const int numAnglesIn, const int numDihedralsIn, const int numImpropersIn, const int numModifiedExclusionsIn, const int numExclusionsIn, const int numCrosstermsIn, const char *h_tupleData, cudaStream_t stream)
 
void setupBondValues (int numBondValues, CudaBondValue *h_bondValues)
 
void setupAngleValues (int numAngleValues, CudaAngleValue *h_angleValues)
 
void setupDihedralValues (int numDihedralValues, CudaDihedralValue *h_dihedralValues)
 
void setupImproperValues (int numImproperValues, CudaDihedralValue *h_improperValues)
 
void setupCrosstermValues (int numCrosstermValues, CudaCrosstermValue *h_crosstermValues)
 
int getForceStride (const int atomStorageSize)
 
int getForceSize (const int atomStorageSize)
 
int getAllForceSize (const int atomStorageSize, const bool doSlow)
 
void bondedForce (const double scale14, const int atomStorageSize, const bool doEnergy, const bool doVirial, const bool doSlow, const float3 lata, const float3 latb, const float3 latc, const float cutoff2, const float r2_delta, const int r2_delta_expc, const float4 *h_xyzq, FORCE_TYPE *h_forces, double *h_energies, cudaStream_t stream)
 

Static Public Member Functions

static int warpAlign (const int n)
 

Detailed Description

Definition at line 54 of file ComputeBondedCUDAKernel.h.

Member Enumeration Documentation

anonymous enum
Enumerator
energyIndex_BOND 
energyIndex_ANGLE 
energyIndex_DIHEDRAL 
energyIndex_IMPROPER 
energyIndex_ELECT 
energyIndex_LJ 
energyIndex_ELECT_SLOW 
energyIndex_CROSSTERM 
normalVirialIndex_XX 
normalVirialIndex_XY 
normalVirialIndex_XZ 
normalVirialIndex_YX 
normalVirialIndex_YY 
normalVirialIndex_YZ 
normalVirialIndex_ZX 
normalVirialIndex_ZY 
normalVirialIndex_ZZ 
nbondVirialIndex_XX 
nbondVirialIndex_XY 
nbondVirialIndex_XZ 
nbondVirialIndex_YX 
nbondVirialIndex_YY 
nbondVirialIndex_YZ 
nbondVirialIndex_ZX 
nbondVirialIndex_ZY 
nbondVirialIndex_ZZ 
slowVirialIndex_XX 
slowVirialIndex_XY 
slowVirialIndex_XZ 
slowVirialIndex_YX 
slowVirialIndex_YY 
slowVirialIndex_YZ 
slowVirialIndex_ZX 
slowVirialIndex_ZY 
slowVirialIndex_ZZ 
amdDiheVirialIndex_XX 
amdDiheVirialIndex_XY 
amdDiheVirialIndex_XZ 
amdDiheVirialIndex_YX 
amdDiheVirialIndex_YY 
amdDiheVirialIndex_YZ 
amdDiheVirialIndex_ZX 
amdDiheVirialIndex_ZY 
amdDiheVirialIndex_ZZ 
energies_virials_SIZE 

Definition at line 58 of file ComputeBondedCUDAKernel.h.

Constructor & Destructor Documentation

ComputeBondedCUDAKernel::ComputeBondedCUDAKernel ( int  deviceID,
CudaNonbondedTables cudaNonbondedTables 
)

Definition at line 1826 of file ComputeBondedCUDAKernel.cu.

References ATOMIC_BINS, cudaCheck, and energies_virials_SIZE.

1826  :
1827 deviceID(deviceID), cudaNonbondedTables(cudaNonbondedTables) {
1828 
1829  cudaCheck(cudaSetDevice(deviceID));
1830 
1831  tupleData = NULL;
1832  tupleDataSize = 0;
1833 
1834  numBonds = 0;
1835  numAngles = 0;
1836  numDihedrals = 0;
1837  numImpropers = 0;
1838  numModifiedExclusions = 0;
1839  numExclusions = 0;
1840  numCrossterms = 0;
1841 
1842  bondValues = NULL;
1843  angleValues = NULL;
1844  dihedralValues = NULL;
1845  improperValues = NULL;
1846  crosstermValues = NULL;
1847 
1848  xyzq = NULL;
1849  xyzqSize = 0;
1850 
1851  forces = NULL;
1852  forcesSize = 0;
1853 
1854  forceList = NULL;
1855  forceListStarts = NULL;
1856  forceListNexts = NULL;
1857  forceListSize = 0;
1858  forceListStartsSize = 0;
1859  forceListNextsSize = 0;
1860  allocate_device<int>(&forceListCounter, 1);
1861 
1862  allocate_device<double>(&energies_virials, ATOMIC_BINS * energies_virials_SIZE);
1863 }
static __thread float4 * forces
#define ATOMIC_BINS
Definition: CudaUtils.h:24
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ xyzq
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
ComputeBondedCUDAKernel::~ComputeBondedCUDAKernel ( )

Definition at line 1868 of file ComputeBondedCUDAKernel.cu.

References cudaCheck.

1868  {
1869  cudaCheck(cudaSetDevice(deviceID));
1870 
1871  deallocate_device<double>(&energies_virials);
1872  // deallocate_device<BondedVirial>(&virial);
1873 
1874  if (tupleData != NULL) deallocate_device<char>(&tupleData);
1875  if (xyzq != NULL) deallocate_device<float4>(&xyzq);
1876  if (forces != NULL) deallocate_device<FORCE_TYPE>(&forces);
1877 
1878  if (forceList != NULL) deallocate_device<FORCE_TYPE>(&forceList);
1879  if (forceListCounter != NULL) deallocate_device<int>(&forceListCounter);
1880  if (forceListStarts != NULL) deallocate_device<int>(&forceListStarts);
1881  if (forceListNexts != NULL) deallocate_device<int>(&forceListNexts);
1882 
1883  if (bondValues != NULL) deallocate_device<CudaBondValue>(&bondValues);
1884  if (angleValues != NULL) deallocate_device<CudaAngleValue>(&angleValues);
1885  if (dihedralValues != NULL) deallocate_device<CudaDihedralValue>(&dihedralValues);
1886  if (improperValues != NULL) deallocate_device<CudaDihedralValue>(&improperValues);
1887  if (crosstermValues != NULL) deallocate_device<CudaCrosstermValue>(&crosstermValues);
1888 }
static __thread float4 * forces
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ xyzq
#define cudaCheck(stmt)
Definition: CudaUtils.h:95

Member Function Documentation

void ComputeBondedCUDAKernel::bondedForce ( const double  scale14,
const int  atomStorageSize,
const bool  doEnergy,
const bool  doVirial,
const bool  doSlow,
const float3  lata,
const float3  latb,
const float3  latc,
const float  cutoff2,
const float  r2_delta,
const int  r2_delta_expc,
const float4 *  h_xyzq,
FORCE_TYPE h_forces,
double *  h_energies,
cudaStream_t  stream 
)

Definition at line 2023 of file ComputeBondedCUDAKernel.cu.

References ATOMIC_BINS, BONDEDFORCESKERNEL_NUM_WARP, CALL, cudaCheck, deviceCUDA, energies_virials_SIZE, getAllForceSize(), getForceSize(), getForceStride(), DeviceCUDA::getMaxNumBlocks(), stream, and WARPSIZE.

2030  {
2031 
2032  int forceStorageSize = getAllForceSize(atomStorageSize, true);
2033  int forceCopySize = getAllForceSize(atomStorageSize, doSlow);
2034  int forceStride = getForceStride(atomStorageSize);
2035 
2036  int forceSize = getForceSize(atomStorageSize);
2037  int startNbond = forceSize;
2038  int startSlow = 2*forceSize;
2039 
2040  // Re-allocate coordinate and force arrays if neccessary
2041  reallocate_device<float4>(&xyzq, &xyzqSize, atomStorageSize, 1.4f);
2042  reallocate_device<FORCE_TYPE>(&forces, &forcesSize, forceStorageSize, 1.4f);
2043 
2044 #if !defined(USE_BONDED_FORCE_ATOMIC_STORE)
2045  // function stores
2046  // numBonds bondForce 2
2047  // numAngles angleForce 3
2048  // numDihedrals diheForce 4
2049  // numImpropers diheForce 4
2050  // numExclusions exclusionForce 2
2051  // numCrossterms crosstermForce 8
2052  // numModifiedExclusions modifiedExclusionForce 4
2053  int listSize = 3 * (numBonds * 2 + numAngles * 3 + numDihedrals * 4 + numImpropers * 4 + numExclusions * 2 + numCrossterms * 8 + numModifiedExclusions * 4);
2054  reallocate_device<FORCE_TYPE>(&forceList, &forceListSize, listSize, 1.4f);
2055  reallocate_device<int>(&forceListNexts, &forceListNextsSize, listSize, 1.4f);
2056  reallocate_device<int>(&forceListStarts, &forceListStartsSize, 3 * atomStorageSize, 1.4f);
2057  int* forceListStartsNbond = forceListStarts + atomStorageSize;
2058  int* forceListStartsSlow = forceListStarts + 2 * atomStorageSize;
2059 
2060  clear_device_array<int>(forceListCounter, 1, stream);
2061  cudaCheck(cudaMemsetAsync(forceListStarts, -1, sizeof(int) * 3 * atomStorageSize, stream));
2062 #else
2063  int* forceListStartsNbond = NULL;
2064  int* forceListStartsSlow = NULL;
2065 #endif
2066 
2067  // Copy coordinates to device
2068  copy_HtoD<float4>(h_xyzq, xyzq, atomStorageSize, stream);
2069 
2070  // Clear force array
2071 #if defined(USE_BONDED_FORCE_ATOMIC_STORE)
2072  clear_device_array<FORCE_TYPE>(forces, forceCopySize, stream);
2073 #endif
2074  if (doEnergy || doVirial) {
2075  clear_device_array<double>(energies_virials, ATOMIC_BINS * energies_virials_SIZE, stream);
2076  }
2077 
2078  float one_scale14 = (float)(1.0 - scale14);
2079 
2080  // If doSlow = false, these exclusions are not calculated
2081  int numExclusionsDoSlow = doSlow ? numExclusions : 0;
2082 
2083  int nthread = BONDEDFORCESKERNEL_NUM_WARP * WARPSIZE;
2084 
2085  int numBondsTB = (numBonds + nthread - 1)/nthread;
2086  int numAnglesTB = (numAngles + nthread - 1)/nthread;
2087  int numDihedralsTB = (numDihedrals + nthread - 1)/nthread;
2088  int numImpropersTB = (numImpropers + nthread - 1)/nthread;
2089  int numExclusionsTB= (numExclusionsDoSlow + nthread - 1)/nthread;
2090  int numCrosstermsTB= (numCrossterms + nthread - 1)/nthread;
2091 
2092  int nblock = numBondsTB + numAnglesTB + numDihedralsTB + numImpropersTB +
2093  numExclusionsTB + numCrosstermsTB;
2094  int shmem_size = 0;
2095 
2096  // printf("%d %d %d %d %d %d nblock %d\n",
2097  // numBonds, numAngles, numDihedrals, numImpropers, numModifiedExclusions, numExclusions, nblock);
2098 
2099  int max_nblock = deviceCUDA->getMaxNumBlocks();
2100 
2101  int start = 0;
2102  while (start < nblock)
2103  {
2104  int nleft = nblock - start;
2105  int nblock_use = min(max_nblock, nleft);
2106 
2107 
2108 #ifdef NAMD_HIP
2109 #define NONBONDEDTABLES cudaNonbondedTables.get_r2_table(), cudaNonbondedTables.getExclusionTable()
2110 #else
2111 #define NONBONDEDTABLES cudaNonbondedTables.get_r2_table(), cudaNonbondedTables.getExclusionTable(), \
2112 cudaNonbondedTables.get_r2_table_tex(), cudaNonbondedTables.getExclusionTableTex()
2113 #endif
2114 
2115 #ifdef NAMD_HIP
2116 #define CALL(DOENERGY, DOVIRIAL) \
2117  bondedForcesKernel<FORCE_TYPE, DOENERGY, DOVIRIAL> <<< nblock_use, nthread, shmem_size, stream >>> \
2118  (start, numBonds, bonds, bondValues, \
2119  numAngles, angles, angleValues, \
2120  numDihedrals, dihedrals, dihedralValues, \
2121  numImpropers, impropers, improperValues, \
2122  numExclusionsDoSlow, exclusions, \
2123  numCrossterms, crossterms, crosstermValues, \
2124  cutoff2, \
2125  r2_delta, r2_delta_expc, \
2126  cudaNonbondedTables.get_r2_table(), cudaNonbondedTables.getExclusionTable() , \
2127  xyzq, forceStride, \
2128  lata, latb, latc, \
2129  forces, &forces[startSlow], \
2130  forceList, forceListCounter, forceListStarts, forceListStartsSlow, forceListNexts, \
2131  energies_virials);
2132 #else
2133 #define CALL(DOENERGY, DOVIRIAL) \
2134  bondedForcesKernel<FORCE_TYPE, DOENERGY, DOVIRIAL> <<< nblock_use, nthread, shmem_size, stream >>> \
2135  (start, numBonds, bonds, bondValues, \
2136  numAngles, angles, angleValues, \
2137  numDihedrals, dihedrals, dihedralValues, \
2138  numImpropers, impropers, improperValues, \
2139  numExclusionsDoSlow, exclusions, \
2140  numCrossterms, crossterms, crosstermValues, \
2141  cutoff2, \
2142  r2_delta, r2_delta_expc, \
2143  cudaNonbondedTables.get_r2_table(), cudaNonbondedTables.getExclusionTable() , \
2144  cudaNonbondedTables.get_r2_table_tex(), cudaNonbondedTables.getExclusionTableTex() , \
2145  xyzq, forceStride, \
2146  lata, latb, latc, \
2147  forces, &forces[startSlow], \
2148  forceList, forceListCounter, forceListStarts, forceListStartsSlow, forceListNexts, \
2149  energies_virials);
2150 #endif
2151 
2152  if (!doEnergy && !doVirial) CALL(0, 0);
2153  if (!doEnergy && doVirial) CALL(0, 1);
2154  if (doEnergy && !doVirial) CALL(1, 0);
2155  if (doEnergy && doVirial) CALL(1, 1);
2156 
2157 #undef CALL
2158  cudaCheck(cudaGetLastError());
2159 
2160  start += nblock_use;
2161  }
2162 
2164  nblock = (numModifiedExclusions + nthread - 1)/nthread;
2165 
2166  bool doElect = (one_scale14 == 0.0f) ? false : true;
2167 
2168  start = 0;
2169  while (start < nblock)
2170  {
2171  int nleft = nblock - start;
2172  int nblock_use = min(max_nblock, nleft);
2173 
2174 #define CALL(DOENERGY, DOVIRIAL, DOELECT) \
2175  modifiedExclusionForcesKernel<FORCE_TYPE, DOENERGY, DOVIRIAL, DOELECT> \
2176  <<< nblock_use, nthread, shmem_size, stream >>> (\
2177  start, numModifiedExclusions, modifiedExclusions, \
2178  doSlow, one_scale14, cutoff2, \
2179  cudaNonbondedTables.getVdwCoefTableWidth(), cudaNonbondedTables.getExclusionVdwCoefTable(), \
2180  cudaNonbondedTables.getExclusionVdwCoefTableTex(), \
2181  cudaNonbondedTables.getModifiedExclusionForceTableTex(), cudaNonbondedTables.getModifiedExclusionEnergyTableTex(), \
2182  xyzq, forceStride, lata, latb, latc, \
2183  &forces[startNbond], &forces[startSlow], \
2184  forceList, forceListCounter, forceListStartsNbond, forceListStartsSlow, forceListNexts, \
2185  energies_virials);
2186 
2187 
2188  if (!doEnergy && !doVirial && !doElect) CALL(0, 0, 0);
2189  if (!doEnergy && doVirial && !doElect) CALL(0, 1, 0);
2190  if (doEnergy && !doVirial && !doElect) CALL(1, 0, 0);
2191  if (doEnergy && doVirial && !doElect) CALL(1, 1, 0);
2192 
2193  if (!doEnergy && !doVirial && doElect) CALL(0, 0, 1);
2194  if (!doEnergy && doVirial && doElect) CALL(0, 1, 1);
2195  if (doEnergy && !doVirial && doElect) CALL(1, 0, 1);
2196  if (doEnergy && doVirial && doElect) CALL(1, 1, 1);
2197 
2198 #undef CALL
2199  cudaCheck(cudaGetLastError());
2200 
2201  start += nblock_use;
2202  }
2203 #if !defined(USE_BONDED_FORCE_ATOMIC_STORE)
2205  nblock = (atomStorageSize + nthread - 1)/nthread;
2206 
2207  start = 0;
2208  while (start < nblock)
2209  {
2210  int nleft = nblock - start;
2211  int nblock_use = min(max_nblock, nleft);
2212 
2213  // cudaCheck(hipDeviceSynchronize());
2214  // auto t0 = std::chrono::high_resolution_clock::now();
2215 
2216  gatherBondedForcesKernel<FORCE_TYPE><<<nblock_use, nthread, 0, stream>>>(
2217  start, atomStorageSize, forceStride,
2218  forceList, forceListStarts, forceListNexts,
2219  forces);
2220  gatherBondedForcesKernel<FORCE_TYPE><<<nblock_use, nthread, 0, stream>>>(
2221  start, atomStorageSize, forceStride,
2222  forceList, forceListStartsNbond, forceListNexts,
2223  &forces[startNbond]);
2224  if (doSlow) {
2225  gatherBondedForcesKernel<FORCE_TYPE><<<nblock_use, nthread, 0, stream>>>(
2226  start, atomStorageSize, forceStride,
2227  forceList, forceListStartsSlow, forceListNexts,
2228  &forces[startSlow]);
2229  }
2230  cudaCheck(cudaGetLastError());
2231 
2232  // cudaCheck(hipStreamSynchronize(stream));
2233  // auto t1 = std::chrono::high_resolution_clock::now();
2234  // std::chrono::duration<double> diff1 = t1 - t0;
2235  // std::cout << "gatherBondedForcesKernel";
2236  // std::cout << " " << std::setprecision(3) << diff1.count() * 1e3 << " ms" << std::endl;
2237 
2238  start += nblock_use;
2239  }
2240 #endif
2241 
2242  copy_DtoH<FORCE_TYPE>(forces, h_forces, forceCopySize, stream);
2243  if (doEnergy || doVirial) {
2244  if (ATOMIC_BINS > 1) {
2245  // Reduce energies_virials[ATOMIC_BINS][energies_virials_SIZE] in-place (results are in energies_virials[0])
2246  reduceBondedBinsKernel<<<energies_virials_SIZE, ATOMIC_BINS, 0, stream>>>(energies_virials);
2247  }
2248  copy_DtoH<double>(energies_virials, h_energies_virials, energies_virials_SIZE, stream);
2249  }
2250 
2251 }
int getForceStride(const int atomStorageSize)
static __thread float4 * forces
int getAllForceSize(const int atomStorageSize, const bool doSlow)
#define BONDEDFORCESKERNEL_NUM_WARP
#define WARPSIZE
Definition: CudaUtils.h:10
__thread cudaStream_t stream
int getForceSize(const int atomStorageSize)
#define ATOMIC_BINS
Definition: CudaUtils.h:24
int getMaxNumBlocks()
Definition: DeviceCUDA.C:419
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ xyzq
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:22
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
#define CALL(DOENERGY, DOVIRIAL)
int ComputeBondedCUDAKernel::getAllForceSize ( const int  atomStorageSize,
const bool  doSlow 
)

Definition at line 2003 of file ComputeBondedCUDAKernel.cu.

References getForceSize().

Referenced by bondedForce().

2003  {
2004 
2005  int forceSize = getForceSize(atomStorageSize);
2006 
2007  if (numModifiedExclusions > 0 || numExclusions > 0) {
2008  if (doSlow) {
2009  // All three force arrays [normal, nbond, slow]
2010  forceSize *= 3;
2011  } else {
2012  // Two force arrays [normal, nbond]
2013  forceSize *= 2;
2014  }
2015  }
2016 
2017  return forceSize;
2018 }
int getForceSize(const int atomStorageSize)
int ComputeBondedCUDAKernel::getForceSize ( const int  atomStorageSize)

Definition at line 1992 of file ComputeBondedCUDAKernel.cu.

References getForceStride().

Referenced by bondedForce(), and getAllForceSize().

1992  {
1993 #ifdef USE_STRIDED_FORCE
1994  return (3*getForceStride(atomStorageSize));
1995 #else
1996  return (3*atomStorageSize);
1997 #endif
1998 }
int getForceStride(const int atomStorageSize)
int ComputeBondedCUDAKernel::getForceStride ( const int  atomStorageSize)

Definition at line 1980 of file ComputeBondedCUDAKernel.cu.

References FORCE_TYPE.

Referenced by bondedForce(), and getForceSize().

1980  {
1981 #ifdef USE_STRIDED_FORCE
1982  // Align stride to 256 bytes
1983  return ((atomStorageSize*sizeof(FORCE_TYPE) - 1)/256 + 1)*256/sizeof(FORCE_TYPE);
1984 #else
1985  return 1;
1986 #endif
1987 }
#define FORCE_TYPE
void ComputeBondedCUDAKernel::setupAngleValues ( int  numAngleValues,
CudaAngleValue h_angleValues 
)

Definition at line 1895 of file ComputeBondedCUDAKernel.cu.

1895  {
1896  allocate_device<CudaAngleValue>(&angleValues, numAngleValues);
1897  copy_HtoD_sync<CudaAngleValue>(h_angleValues, angleValues, numAngleValues);
1898 }
void ComputeBondedCUDAKernel::setupBondValues ( int  numBondValues,
CudaBondValue h_bondValues 
)

Definition at line 1890 of file ComputeBondedCUDAKernel.cu.

1890  {
1891  allocate_device<CudaBondValue>(&bondValues, numBondValues);
1892  copy_HtoD_sync<CudaBondValue>(h_bondValues, bondValues, numBondValues);
1893 }
void ComputeBondedCUDAKernel::setupCrosstermValues ( int  numCrosstermValues,
CudaCrosstermValue h_crosstermValues 
)

Definition at line 1910 of file ComputeBondedCUDAKernel.cu.

1910  {
1911  allocate_device<CudaCrosstermValue>(&crosstermValues, numCrosstermValues);
1912  copy_HtoD_sync<CudaCrosstermValue>(h_crosstermValues, crosstermValues, numCrosstermValues);
1913 }
void ComputeBondedCUDAKernel::setupDihedralValues ( int  numDihedralValues,
CudaDihedralValue h_dihedralValues 
)

Definition at line 1900 of file ComputeBondedCUDAKernel.cu.

1900  {
1901  allocate_device<CudaDihedralValue>(&dihedralValues, numDihedralValues);
1902  copy_HtoD_sync<CudaDihedralValue>(h_dihedralValues, dihedralValues, numDihedralValues);
1903 }
void ComputeBondedCUDAKernel::setupImproperValues ( int  numImproperValues,
CudaDihedralValue h_improperValues 
)

Definition at line 1905 of file ComputeBondedCUDAKernel.cu.

1905  {
1906  allocate_device<CudaDihedralValue>(&improperValues, numImproperValues);
1907  copy_HtoD_sync<CudaDihedralValue>(h_improperValues, improperValues, numImproperValues);
1908 }
void ComputeBondedCUDAKernel::update ( const int  numBondsIn,
const int  numAnglesIn,
const int  numDihedralsIn,
const int  numImpropersIn,
const int  numModifiedExclusionsIn,
const int  numExclusionsIn,
const int  numCrosstermsIn,
const char *  h_tupleData,
cudaStream_t  stream 
)

Definition at line 1918 of file ComputeBondedCUDAKernel.cu.

References stream, and warpAlign().

1927  {
1928 
1929  numBonds = numBondsIn;
1930  numAngles = numAnglesIn;
1931  numDihedrals = numDihedralsIn;
1932  numImpropers = numImpropersIn;
1933  numModifiedExclusions = numModifiedExclusionsIn;
1934  numExclusions = numExclusionsIn;
1935  numCrossterms = numCrosstermsIn;
1936 
1937  int numBondsWA = warpAlign(numBonds);
1938  int numAnglesWA = warpAlign(numAngles);
1939  int numDihedralsWA = warpAlign(numDihedrals);
1940  int numImpropersWA = warpAlign(numImpropers);
1941  int numModifiedExclusionsWA = warpAlign(numModifiedExclusions);
1942  int numExclusionsWA = warpAlign(numExclusions);
1943  int numCrosstermsWA = warpAlign(numCrossterms);
1944 
1945  int sizeTot = numBondsWA*sizeof(CudaBond) + numAnglesWA*sizeof(CudaAngle) +
1946  numDihedralsWA*sizeof(CudaDihedral) + numImpropersWA*sizeof(CudaDihedral) +
1947  numModifiedExclusionsWA*sizeof(CudaExclusion) + numExclusionsWA*sizeof(CudaExclusion) +
1948  numCrosstermsWA*sizeof(CudaCrossterm);
1949 
1950  reallocate_device<char>(&tupleData, &tupleDataSize, sizeTot, 1.4f);
1951  copy_HtoD<char>(h_tupleData, tupleData, sizeTot, stream);
1952 
1953  // Setup pointers
1954  int pos = 0;
1955  bonds = (CudaBond *)&tupleData[pos];
1956  pos += numBondsWA*sizeof(CudaBond);
1957 
1958  angles = (CudaAngle* )&tupleData[pos];
1959  pos += numAnglesWA*sizeof(CudaAngle);
1960 
1961  dihedrals = (CudaDihedral* )&tupleData[pos];
1962  pos += numDihedralsWA*sizeof(CudaDihedral);
1963 
1964  impropers = (CudaDihedral* )&tupleData[pos];
1965  pos += numImpropersWA*sizeof(CudaDihedral);
1966 
1967  modifiedExclusions = (CudaExclusion* )&tupleData[pos];
1968  pos += numModifiedExclusionsWA*sizeof(CudaExclusion);
1969 
1970  exclusions = (CudaExclusion* )&tupleData[pos];
1971  pos += numExclusionsWA*sizeof(CudaExclusion);
1972 
1973  crossterms = (CudaCrossterm* )&tupleData[pos];
1974  pos += numCrosstermsWA*sizeof(CudaCrossterm);
1975 }
static int warpAlign(const int n)
static __thread unsigned int * exclusions
__thread cudaStream_t stream
static int ComputeBondedCUDAKernel::warpAlign ( const int  n)
inlinestatic

Definition at line 161 of file ComputeBondedCUDAKernel.h.

References WARPSIZE.

Referenced by update().

161 {return ((n + WARPSIZE - 1)/WARPSIZE)*WARPSIZE;}
#define WARPSIZE
Definition: CudaUtils.h:10

The documentation for this class was generated from the following files: