NAMD
Classes | Public Member Functions | Public Attributes | List of all members
CudaPmeOneDevice Class Reference

#include <CudaPmeSolverUtil.h>

Classes

struct  EnergyVirial
 

Public Member Functions

 CudaPmeOneDevice (PmeGrid pmeGrid_, int deviceID_, int deviceIndex_)
 
 ~CudaPmeOneDevice ()
 
void compute (const Lattice &lattice, int doEnergyVirial, int step)
 
void finishReduction (bool doEnergyVirial)
 
int getShiftedGrid (const double x, const int grid)
 
int computeSharedMemoryPatchLevelSpreadCharge (const int numThreads, const int3 patchGridDim, const int order)
 
int computeSharedMemoryPatchLevelGatherForce (const int numThreads, const int3 patchGridDim, const int order)
 
void checkPatchLevelSimParamCompatibility (const int order, const bool periodicY, const bool periodicZ)
 
void checkPatchLevelDeviceCompatibility ()
 
void checkPatchLevelLatticeCompatibilityAndComputeOffsets (const Lattice &lattice, const int numPatches, const CudaLocalRecord *localRecords, double3 *patchMin, double3 *patchMax, double3 *awayDists)
 

Public Attributes

PmeGrid pmeGrid
 
int deviceID
 
int deviceIndex
 
cudaStream_t stream
 
int natoms
 
size_t num_used_grids
 
float4 * d_atoms
 
int * d_partition
 
float3 * d_forces
 
float * d_scaling_factors
 
cudaTextureObject_t * gridTexObjArrays
 
float * d_grids
 
float2 * d_trans
 
size_t gridsize
 
size_t transize
 
cufftHandle * forwardPlans
 
cufftHandle * backwardPlans
 
float * d_bm1
 
float * d_bm2
 
float * d_bm3
 
double kappa
 
EnergyViriald_energyVirials
 
EnergyVirialh_energyVirials
 
bool self_energy_alch_first_time
 
bool force_scaling_alch_first_time
 
double * d_selfEnergy
 
double * d_selfEnergy_FEP
 
double * d_selfEnergy_TI_1
 
double * d_selfEnergy_TI_2
 
double selfEnergy
 
double selfEnergy_FEP
 
double selfEnergy_TI_1
 
double selfEnergy_TI_2
 
int m_step
 
PatchLevelPmeData patchLevelPmeData
 
Lattice currentLattice
 

Detailed Description

PME for single GPU case, where data persists on GPU calls real space, FFT, and K space parts receives atom and charge data as float4 * allocated on device returns force data as float3 * allocated on device returns energy and virial allocated on device

Definition at line 209 of file CudaPmeSolverUtil.h.

Constructor & Destructor Documentation

◆ CudaPmeOneDevice()

CudaPmeOneDevice::CudaPmeOneDevice ( PmeGrid  pmeGrid_,
int  deviceID_,
int  deviceIndex_ 
)

Definition at line 1300 of file CudaPmeSolverUtil.C.

References SimParameters::alchFepOn, SimParameters::alchGetNumOfPMEGrids(), SimParameters::alchOn, SimParameters::alchThermIntOn, backwardPlans, checkPatchLevelDeviceCompatibility(), checkPatchLevelSimParamCompatibility(), compute_b_moduli(), cudaCheck, cufftCheck, d_atoms, d_bm1, d_bm2, d_bm3, d_energyVirials, d_forces, d_grids, d_partition, d_scaling_factors, d_selfEnergy, d_selfEnergy_FEP, d_selfEnergy_TI_1, d_selfEnergy_TI_2, d_trans, deviceID, deviceIndex, forwardPlans, gridsize, gridTexObjArrays, h_energyVirials, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, Node::molecule, natoms, num_used_grids, Molecule::numAtoms, Node::Object(), PmeGrid::order, order, pmeGrid, Node::simParameters, stream, and transize.

1304  :
1305  pmeGrid(pmeGrid_), deviceID(deviceID_), deviceIndex(deviceIndex_),
1306  natoms(0), d_atoms(0), d_forces(0),
1307  d_grids(0), gridsize(0),
1308  d_trans(0), transize(0),
1309  d_bm1(0), d_bm2(0), d_bm3(0),
1314 {
1315 // fprintf(stderr, "CudaPmeOneDevice constructor START ******************************************\n");
1316  const SimParameters& sim_params = *(Node::Object()->simParameters);
1318  // Determine how many grids we need for the alchemical route
1319  if (sim_params.alchOn) {
1320  num_used_grids = sim_params.alchGetNumOfPMEGrids();
1321  } else {
1322  num_used_grids = 1;
1323  }
1324  cudaCheck(cudaSetDevice(deviceID));
1325 
1326  // Check to see if the simulation and device is compatible with patch-level kernels. The results
1327  // will be worked in the PatchLevelPmeData field
1328  checkPatchLevelSimParamCompatibility(pmeGrid.order, true /* periodic Y */, true /* periodic Z */);
1330 
1331  // create our own CUDA stream
1332 #if CUDA_VERSION >= 5050 || defined(NAMD_HIP)
1333  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1334  int leastPriority, greatestPriority;
1335  cudaCheck(cudaDeviceGetStreamPriorityRange(&leastPriority, &greatestPriority));
1336  cudaCheck(cudaStreamCreateWithPriority(&stream, cudaStreamDefault, greatestPriority));
1337 #else
1338  cudaCheck(cudaStreamCreate(&stream));
1339 #endif
1340 
1341  allocate_host<EnergyVirial>(&h_energyVirials, num_used_grids);
1342  allocate_device<EnergyVirial>(&d_energyVirials, num_used_grids);
1343  allocate_device<float>(&d_scaling_factors, num_used_grids);
1344  allocate_device<double>(&d_selfEnergy, 1);
1345  if (sim_params.alchFepOn) {
1346  allocate_device<double>(&d_selfEnergy_FEP, 1);
1347  } else {
1348  d_selfEnergy_FEP = NULL;
1349  }
1350  if (sim_params.alchThermIntOn) {
1351  allocate_device<double>(&d_selfEnergy_TI_1, 1);
1352  allocate_device<double>(&d_selfEnergy_TI_2, 1);
1353  } else {
1354  d_selfEnergy_TI_1 = NULL;
1355  d_selfEnergy_TI_2 = NULL;
1356  }
1357 
1358  // create device buffer space for atom positions and forces
1359  // to be accessed externally through PatchData
1360  allocate_device<float4>(&d_atoms, num_used_grids * natoms);
1361  allocate_device<float3>(&d_forces, num_used_grids * natoms);
1362  if (sim_params.alchOn) {
1363  allocate_device<int>(&d_partition, natoms);
1364  } else {
1365  d_partition = NULL;
1366  }
1367 #ifdef NODEGROUP_FORCE_REGISTER
1368  DeviceData& devData = cpdata.ckLocalBranch()->devData[deviceIndex];
1369  devData.s_datoms = (CudaAtom *) (d_atoms);
1370  devData.f_slow = (CudaForce *) (d_forces);
1371  devData.f_slow_size = natoms;
1372  devData.s_datoms_partition = d_partition;
1373 #endif
1374  int k1 = pmeGrid.K1;
1375  int k2 = pmeGrid.K2;
1376  int k3 = pmeGrid.K3;
1377  int order = pmeGrid.order;
1378  gridsize = k1 * k2 * k3;
1379  transize = (k1/2 + 1) * k2 * k3;
1380 
1381 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1382 
1383  // set up cufft
1384  forwardPlans = new cufftHandle[num_used_grids];
1385  backwardPlans = new cufftHandle[num_used_grids];
1386  for (size_t iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1387  cufftCheck(cufftPlan3d(&(forwardPlans[iGrid]), k3, k2, k1, CUFFT_R2C));
1388  cufftCheck(cufftPlan3d(&(backwardPlans[iGrid]), k3, k2, k1, CUFFT_C2R));
1389  cufftCheck(cufftSetStream(forwardPlans[iGrid], stream));
1390  cufftCheck(cufftSetStream(backwardPlans[iGrid], stream));
1391  }
1392 #endif
1393 
1394 #ifdef NAMD_CUDA
1395  cudaDeviceProp deviceProp;
1396  cudaCheck(cudaGetDeviceProperties(&deviceProp, deviceID));
1397  const int texture_alignment = int(deviceProp.textureAlignment);
1398  // d_grids and d_grids + N * gridsize will be used as device pointers for ::cudaResourceDesc::res::linear::devPtr
1399  // check if (d_grids + N * gridsize) is an address aligned to ::cudaDeviceProp::textureAlignment
1400  // which is required by cudaCreateTextureObject()
1401  // or maybe I should use cudaMallocPitch()?
1402  if ((gridsize % texture_alignment) != 0) {
1403  // if it is not aligned, padding is required
1404  gridsize = (int(gridsize / texture_alignment) + 1) * texture_alignment;
1405  }
1406  // Is it necesary to align transize too?
1407 // if ((transize % texture_alignment) != 0) {
1408 // // if it is not aligned, padding is required
1409 // transize = (int(transize / texture_alignment) + 1) * texture_alignment;
1410 // }
1411  allocate_device<float>(&d_grids, num_used_grids * gridsize);
1412  allocate_device<float2>(&d_trans, num_used_grids * transize);
1413  gridTexObjArrays = new cudaTextureObject_t[num_used_grids];
1414  for (size_t iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1415  // set up texture object
1416  cudaResourceDesc resDesc;
1417  memset(&resDesc, 0, sizeof(resDesc));
1418  resDesc.resType = cudaResourceTypeLinear;
1419  resDesc.res.linear.devPtr = (void*)(d_grids + iGrid * (size_t)gridsize);
1420  resDesc.res.linear.desc.f = cudaChannelFormatKindFloat;
1421  resDesc.res.linear.desc.x = sizeof(float)*8;
1422  resDesc.res.linear.sizeInBytes = gridsize*sizeof(float);
1423  cudaTextureDesc texDesc;
1424  memset(&texDesc, 0, sizeof(texDesc));
1425  texDesc.readMode = cudaReadModeElementType;
1426  cudaCheck(cudaCreateTextureObject(&(gridTexObjArrays[iGrid]), &resDesc, &texDesc, NULL));
1427  }
1428 #else
1429  allocate_device<float>(&d_grids, num_used_grids * gridsize);
1430  allocate_device<float2>(&d_trans, num_used_grids * transize);
1431 #endif
1432  // calculate prefactors
1433  double *bm1 = new double[k1];
1434  double *bm2 = new double[k2];
1435  double *bm3 = new double[k3];
1436  // Use compute_b_moduli from PmeKSpace.C
1437  extern void compute_b_moduli(double *bm, int k, int order);
1438  compute_b_moduli(bm1, k1, order);
1439  compute_b_moduli(bm2, k2, order);
1440  compute_b_moduli(bm3, k3, order);
1441 
1442  // allocate space for and copy prefactors onto GPU
1443  float *bm1f = new float[k1];
1444  float *bm2f = new float[k2];
1445  float *bm3f = new float[k3];
1446  for (int i=0; i < k1; i++) bm1f[i] = (float) bm1[i];
1447  for (int i=0; i < k2; i++) bm2f[i] = (float) bm2[i];
1448  for (int i=0; i < k3; i++) bm3f[i] = (float) bm3[i];
1449  allocate_device<float>(&d_bm1, k1);
1450  allocate_device<float>(&d_bm2, k2);
1451  allocate_device<float>(&d_bm3, k3);
1452  copy_HtoD_sync<float>(bm1f, d_bm1, k1);
1453  copy_HtoD_sync<float>(bm2f, d_bm2, k2);
1454  copy_HtoD_sync<float>(bm3f, d_bm3, k3);
1455  delete [] bm1f;
1456  delete [] bm2f;
1457  delete [] bm3f;
1458  delete [] bm1;
1459  delete [] bm2;
1460  delete [] bm3;
1461 
1462  cudaCheck(cudaStreamSynchronize(stream));
1463 
1464 // fprintf(stderr, "CudaPmeOneDevice constructor END ********************************************\n");
1465 }
static Node * Object()
Definition: Node.h:86
cufftHandle * backwardPlans
void compute_b_moduli(double *bm, int K, int order)
Definition: PmeKSpace.C:42
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
EnergyVirial * d_energyVirials
#define cufftCheck(stmt)
void checkPatchLevelSimParamCompatibility(const int order, const bool periodicY, const bool periodicZ)
#define order
Definition: PmeRealSpace.C:235
int order
Definition: PmeBase.h:23
int numAtoms
Definition: Molecule.h:585
cudaTextureObject_t * gridTexObjArrays
int K3
Definition: PmeBase.h:21
#define cudaCheck(stmt)
Definition: CudaUtils.h:233
cufftHandle * forwardPlans
size_t alchGetNumOfPMEGrids() const
Molecule * molecule
Definition: Node.h:179
EnergyVirial * h_energyVirials
void checkPatchLevelDeviceCompatibility()

◆ ~CudaPmeOneDevice()

CudaPmeOneDevice::~CudaPmeOneDevice ( )

Definition at line 1467 of file CudaPmeSolverUtil.C.

References backwardPlans, cudaCheck, cufftCheck, d_atoms, d_bm1, d_bm2, d_bm3, d_energyVirials, d_forces, d_grids, d_partition, PatchLevelPmeData::d_patchGridOffsets, d_scaling_factors, d_selfEnergy, d_selfEnergy_FEP, d_selfEnergy_TI_1, d_selfEnergy_TI_2, d_trans, forwardPlans, gridTexObjArrays, h_energyVirials, PatchLevelPmeData::h_patchGridOffsets, num_used_grids, patchLevelPmeData, and stream.

1467  {
1468  deallocate_device<float4>(&d_atoms);
1469  deallocate_device<float3>(&d_forces);
1470  deallocate_device<float2>(&d_trans);
1471  deallocate_device<float>(&d_grids);
1472  deallocate_host<EnergyVirial>(&h_energyVirials);
1473  deallocate_device<EnergyVirial>(&d_energyVirials);
1474  deallocate_device<float>(&d_scaling_factors);
1475 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1476  for (size_t iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1477  cufftCheck(cufftDestroy(forwardPlans[iGrid]));
1478  cufftCheck(cufftDestroy(backwardPlans[iGrid]));
1479 #if defined(NAMD_CUDA) // only CUDA uses texture objects
1480  cudaCheck(cudaDestroyTextureObject(gridTexObjArrays[iGrid]));
1481 #endif
1482  }
1483 
1484  if (patchLevelPmeData.h_patchGridOffsets != nullptr) {
1485  deallocate_host<int3>(&patchLevelPmeData.h_patchGridOffsets);
1486  }
1487  if (patchLevelPmeData.d_patchGridOffsets != nullptr) {
1488  deallocate_device<int3>(&patchLevelPmeData.d_patchGridOffsets);
1489  }
1490 
1491  delete[] forwardPlans;
1492  delete[] backwardPlans;
1493 #if defined(NAMD_CUDA) // only CUDA uses texture objects
1494  delete[] gridTexObjArrays;
1495 #endif
1496 
1497 
1498 #endif
1499  deallocate_device<double>(&d_selfEnergy);
1500  if (d_partition != NULL) deallocate_device<int>(&d_partition);
1501  if (d_selfEnergy_FEP != NULL) deallocate_device<double>(&d_selfEnergy_FEP);
1502  if (d_selfEnergy_TI_1 != NULL) deallocate_device<double>(&d_selfEnergy_TI_1);
1503  if (d_selfEnergy_TI_2 != NULL) deallocate_device<double>(&d_selfEnergy_TI_2);
1504  deallocate_device<float>(&d_bm1);
1505  deallocate_device<float>(&d_bm2);
1506  deallocate_device<float>(&d_bm3);
1507  cudaCheck(cudaStreamDestroy(stream));
1508 }
cufftHandle * backwardPlans
EnergyVirial * d_energyVirials
#define cufftCheck(stmt)
PatchLevelPmeData patchLevelPmeData
cudaTextureObject_t * gridTexObjArrays
#define cudaCheck(stmt)
Definition: CudaUtils.h:233
cufftHandle * forwardPlans
EnergyVirial * h_energyVirials

Member Function Documentation

◆ checkPatchLevelDeviceCompatibility()

void CudaPmeOneDevice::checkPatchLevelDeviceCompatibility ( )

Definition at line 2033 of file CudaPmeSolverUtil.C.

References computeSharedMemoryPatchLevelGatherForce(), computeSharedMemoryPatchLevelSpreadCharge(), PatchLevelPmeData::deviceCompatible, deviceID, PatchLevelPmeData::deviceMaxSharedBytes, PatchLevelPmeData::gatherForceSharedBytes, PatchLevelPmeData::kNumThreads, PatchLevelPmeData::kPatchGridDim, PatchLevelPmeData::kPatchGridDimPad, patchLevelPmeData, and PatchLevelPmeData::spreadChargeSharedBytes.

Referenced by CudaPmeOneDevice().

2033  {
2034  cudaDeviceGetAttribute(&patchLevelPmeData.deviceMaxSharedBytes, cudaDevAttrMaxSharedMemoryPerBlockOptin, deviceID);
2035 
2036  const int3 constexprPatchGridDim = make_int3(
2040 
2043  constexprPatchGridDim, 8 /* order */);
2046  constexprPatchGridDim, 8 /* order */);
2047 
2051 }
static constexpr int kNumThreads
PatchLevelPmeData patchLevelPmeData
int computeSharedMemoryPatchLevelSpreadCharge(const int numThreads, const int3 patchGridDim, const int order)
static constexpr int kPatchGridDimPad
static constexpr int kPatchGridDim
int computeSharedMemoryPatchLevelGatherForce(const int numThreads, const int3 patchGridDim, const int order)

◆ checkPatchLevelLatticeCompatibilityAndComputeOffsets()

void CudaPmeOneDevice::checkPatchLevelLatticeCompatibilityAndComputeOffsets ( const Lattice lattice,
const int  numPatches,
const CudaLocalRecord localRecords,
double3 *  patchMin,
double3 *  patchMax,
double3 *  awayDists 
)

Definition at line 2053 of file CudaPmeSolverUtil.C.

References Lattice::a(), Lattice::a_r(), Lattice::b(), Lattice::b_r(), Lattice::c(), Lattice::c_r(), currentLattice, PatchLevelPmeData::d_patchGridOffsets, PatchLevelPmeData::deviceCompatible, getShiftedGrid(), PatchLevelPmeData::h_patchGridOffsets, Lattice::isEqual(), PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, PatchLevelPmeData::kPatchGridDim, PatchLevelPmeData::latticeCompatible, PatchLevelPmeData::localRecords, PatchLevelPmeData::numPatches, Node::Object(), PmeGrid::order, order, PatchLevelPmeData::patchGridDim, patchLevelPmeData, pmeGrid, Lattice::scale(), Node::simParameters, simParams, PatchLevelPmeData::simulationCompatible, Vector::unit(), Lattice::unscale(), and Vector::x.

2055  {
2056 
2057  patchLevelPmeData.localRecords = localRecords;
2058 
2059  // If the simulation isn't compatible or the device isn't compatible then no point in checking
2060  // patch sizes
2062 
2063  patchLevelPmeData.numPatches = numPatches;
2064 
2065  if (patchLevelPmeData.h_patchGridOffsets == nullptr) {
2066  allocate_host<int3>(&patchLevelPmeData.h_patchGridOffsets, numPatches);
2067  }
2068  if (patchLevelPmeData.d_patchGridOffsets == nullptr) {
2069  allocate_device<int3>(&patchLevelPmeData.d_patchGridOffsets, numPatches);
2070  }
2071 
2073  const int order = pmeGrid.order;
2074 
2075  // We only need to recompute the grid offsets if the lattice has changed
2076  if (!lattice.isEqual(currentLattice)) {
2077  currentLattice = lattice;
2078 
2079  double sysdima = currentLattice.a_r().unit() * currentLattice.a();
2080  double sysdimb = currentLattice.b_r().unit() * currentLattice.b();
2081  double sysdimc = currentLattice.c_r().unit() * currentLattice.c();
2082 
2083  patchLevelPmeData.patchGridDim = make_int3(0,0,0);
2084 
2085  for (int i = 0; i < numPatches; i++) {
2086  double3 pmin = currentLattice.unscale(patchMin[i]);
2087  double3 pmax = currentLattice.unscale(patchMax[i]);
2088  double3 width = pmax - pmin;
2089 
2090  // Logic copied from margin violation check
2091  double3 marginVal;
2092  marginVal.x = 0.5 * (awayDists[i].x - simParams->cutoff / sysdima);
2093  marginVal.y = 0.5 * (awayDists[i].y - simParams->cutoff / sysdimb);
2094  marginVal.z = 0.5 * (awayDists[i].z - simParams->cutoff / sysdimc);
2095  marginVal = currentLattice.unscale(marginVal);
2096 
2097  double3 minAtom = pmin - marginVal;
2098  double3 maxAtom = pmax + marginVal;
2099 
2100  double3 minScaled = currentLattice.scale(minAtom);
2101  double3 maxScaled = currentLattice.scale(maxAtom);
2102 
2103  int3 gridMin;
2104  gridMin.x = getShiftedGrid(minScaled.x, pmeGrid.K1);
2105  gridMin.y = getShiftedGrid(minScaled.y, pmeGrid.K2);
2106  gridMin.z = getShiftedGrid(minScaled.z, pmeGrid.K3);
2107 
2108  int3 gridMax;
2109  gridMax.x = getShiftedGrid(maxScaled.x, pmeGrid.K1);
2110  gridMax.y = getShiftedGrid(maxScaled.y, pmeGrid.K2);
2111  gridMax.z = getShiftedGrid(maxScaled.z, pmeGrid.K3);
2112 
2113  int3 gridWidth;
2114  gridWidth.x = gridMax.x - gridMin.x + order;
2115  gridWidth.y = gridMax.y - gridMin.y + order;
2116  gridWidth.z = gridMax.z - gridMin.z + order;
2117 
2119  patchLevelPmeData.patchGridDim.x = std::max(patchLevelPmeData.patchGridDim.x, gridWidth.x);
2120  patchLevelPmeData.patchGridDim.y = std::max(patchLevelPmeData.patchGridDim.y, gridWidth.y);
2121  patchLevelPmeData.patchGridDim.z = std::max(patchLevelPmeData.patchGridDim.z, gridWidth.z);
2122  }
2124  numPatches, nullptr);
2125  cudaStreamSynchronize(nullptr);
2126  const int maxGridPoints = patchLevelPmeData.patchGridDim.x *
2128 
2133  }
2134 }
static Node * Object()
Definition: Node.h:86
bool isEqual(const Lattice &other) const
Definition: Lattice.h:298
NAMD_HOST_DEVICE Vector c() const
Definition: Lattice.h:270
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
NAMD_HOST_DEVICE Position unscale(ScaledPosition s) const
Definition: Lattice.h:77
PatchLevelPmeData patchLevelPmeData
#define order
Definition: PmeRealSpace.C:235
int order
Definition: PmeBase.h:23
NAMD_HOST_DEVICE ScaledPosition scale(Position p) const
Definition: Lattice.h:83
BigReal x
Definition: Vector.h:74
NAMD_HOST_DEVICE Vector a_r() const
Definition: Lattice.h:284
NAMD_HOST_DEVICE Vector b_r() const
Definition: Lattice.h:285
NAMD_HOST_DEVICE Vector c_r() const
Definition: Lattice.h:286
NAMD_HOST_DEVICE Vector b() const
Definition: Lattice.h:269
#define simParams
Definition: Output.C:129
int K3
Definition: PmeBase.h:21
const CudaLocalRecord * localRecords
static constexpr int kPatchGridDim
NAMD_HOST_DEVICE Vector a() const
Definition: Lattice.h:268
NAMD_HOST_DEVICE Vector unit(void) const
Definition: Vector.h:215
int getShiftedGrid(const double x, const int grid)

◆ checkPatchLevelSimParamCompatibility()

void CudaPmeOneDevice::checkPatchLevelSimParamCompatibility ( const int  order,
const bool  periodicY,
const bool  periodicZ 
)

Definition at line 2022 of file CudaPmeSolverUtil.C.

References deviceCUDA, DeviceCUDA::getNumDevice(), order, patchLevelPmeData, and PatchLevelPmeData::simulationCompatible.

Referenced by CudaPmeOneDevice().

2022  {
2023  bool use = true;
2024  use = use && (order == 8);
2025  use = use && (periodicY);
2026  use = use && (periodicZ);
2027 
2028  use = use && (deviceCUDA->getNumDevice() == 1); // This is only supported for single GPU currently
2029 
2031 }
int getNumDevice()
Definition: DeviceCUDA.h:125
PatchLevelPmeData patchLevelPmeData
#define order
Definition: PmeRealSpace.C:235
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23

◆ compute()

void CudaPmeOneDevice::compute ( const Lattice lattice,
int  doEnergyVirial,
int  step 
)

Definition at line 1510 of file CudaPmeSolverUtil.C.

References Lattice::a_r(), SimParameters::alchOn, Lattice::b_r(), backwardPlans, Lattice::c_r(), compute_selfEnergy(), cudaCheck, cufftCheck, d_atoms, d_bm1, d_bm2, d_bm3, d_energyVirials, d_forces, d_grids, d_selfEnergy, d_trans, deviceID, SimParameters::firstTimestep, forwardPlans, gather_force(), gridsize, gridTexObjArrays, h_energyVirials, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, kappa, m_step, natoms, num_used_grids, Node::Object(), PmeGrid::order, order, patchLevelPmeData, pmeGrid, scalar_sum(), selfEnergy, Node::simParameters, spread_charge(), spread_charge_v2(), stream, transize, Lattice::volume(), WARPSIZE, Vector::x, Vector::y, and Vector::z.

1522  {
1523 // fprintf(stderr, "CudaPmeOneDevice compute ****************************************************\n");
1524  int k1 = pmeGrid.K1;
1525  int k2 = pmeGrid.K2;
1526  int k3 = pmeGrid.K3;
1527  int order = pmeGrid.order;
1528  double volume = lattice.volume();
1529  Vector a_r = lattice.a_r();
1530  Vector b_r = lattice.b_r();
1531  Vector c_r = lattice.c_r();
1532  float arx = a_r.x;
1533  float ary = a_r.y;
1534  float arz = a_r.z;
1535  float brx = b_r.x;
1536  float bry = b_r.y;
1537  float brz = b_r.z;
1538  float crx = c_r.x;
1539  float cry = c_r.y;
1540  float crz = c_r.z;
1541  m_step = step;
1542 
1543  //JM: actually necessary if you reserve a PME device!
1544  cudaCheck(cudaSetDevice(deviceID));
1545  const SimParameters& sim_params = *(Node::Object()->simParameters);
1546 
1547  // clear force array
1548  //fprintf(stderr, "Calling clear_device_array on d_force\n");
1549  clear_device_array<float3>(d_forces, num_used_grids * natoms, stream);
1550  // clear grid
1551  //fprintf(stderr, "Calling clear_device_array on d_grid\n");
1552  clear_device_array<float>(d_grids, num_used_grids * gridsize, stream);
1553  clear_device_array<float2>(d_trans, num_used_grids * transize, stream);
1554 
1555  // Clear energy and virial array if needed
1556  if (doEnergyVirial) {
1557  // clear_device_array<EnergyVirial>(d_energyVirial, 1, stream);
1558  clear_device_array<EnergyVirial>(d_energyVirials, num_used_grids * 1, stream);
1559  const bool updateSelfEnergy = (step == sim_params.firstTimestep) || (selfEnergy == 0);
1560  if (updateSelfEnergy && (sim_params.alchOn == false)) {
1561  clear_device_array<double>(d_selfEnergy, 1, stream);
1562  // calculate self energy term if not yet done
1564  kappa, stream);
1565  //fprintf(stderr, "selfEnergy = %12.8f\n", selfEnergy);
1566  }
1567  /* the self energy depends on the scaling factor, or lambda
1568  * the cases when self energy will be changed:
1569  * 1. If alchLambdaFreq > 0, we will have a linear scaling of lambda. Lambda is changed EVERY STEP!
1570  * 2. In most cases, users will not use alchLambdaFreq > 0, but simulations may enter another lambda-window by using TCL scripts.
1571  * in summary, the self energy will be not changed unless lambda is changed.
1572  * so calcSelfEnergyAlch() would compare lambda of current step with the one from last step.
1573  * only if lambda is changed, the calcSelfEnergyFEPKernel or calcSelfEnergyTIKernel will be executed again.
1574  */
1575  if (sim_params.alchOn) calcSelfEnergyAlch(m_step);
1576  }
1577 
1578 #if 0
1579 
1580  spread_charge(d_atoms, natoms, k1, k2, k3, k1, k2, k3,
1581  k1 /* xsize */, 0 /* jBlock */, 0 /* kBlock */,
1582  true /* pmeGrid.yBlocks == 1 */, true /* pmeGrid.zBlocks == 1 */,
1583  d_grid, order, stream);
1584 #else
1585  const int order3 = ((order*order*order-1)/WARPSIZE + 1)*WARPSIZE;
1586  for (size_t iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1588  d_atoms + iGrid * natoms, natoms, k1, k2, k3,
1589  float(k1), (float)k2, (float)k3, order3,
1590  k1, k2, k3,
1591  k1 /* xsize */, 0 /* jBlock */, 0 /* kBlock */,
1592  true /* pmeGrid.yBlocks == 1 */, true /* pmeGrid.zBlocks == 1 */,
1593  d_grids + iGrid * gridsize, order, stream);
1594  }
1595 
1596 #endif
1597  //cudaCheck(cudaStreamSynchronize(stream));
1598 
1599  // forward FFT
1600  //fprintf(stderr, "Calling cufftExecR2C\n");
1601  //cufftCheck(cufftExecR2C(forwardPlan, (cufftReal *)d_grid,
1602  // (cufftComplex *)d_tran));
1603 
1604  for (size_t iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1605  cufftCheck(cufftExecR2C(forwardPlans[iGrid],
1606  (cufftReal *)(d_grids + iGrid * gridsize),
1607  (cufftComplex *)(d_trans + iGrid * transize)));
1608  }
1609 
1610  //cudaCheck(cudaStreamSynchronize(stream));
1611 
1612  // reciprocal space calculation
1613  //fprintf(stderr, "Calling scalar_sum\n");
1614  for (size_t iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1615  scalar_sum(true /* Perm_cX_Y_Z */, k1, k2, k3, (k1/2 + 1), k2, k3,
1616  kappa, arx, ary, arz, brx, bry, brz, crx, cry, crz, volume,
1617  d_bm1, d_bm2, d_bm3, 0 /* jBlock */, 0 /* kBlock */,
1618  (bool) doEnergyVirial, &(d_energyVirials[iGrid].energy),
1619  d_energyVirials[iGrid].virial, d_trans + iGrid * transize, stream);
1620  }
1621  //scalar_sum(true /* Perm_cX_Y_Z */, k1, k2, k3, (k1/2 + 1), k2, k3,
1622  // kappa, arx, ary, arz, brx, bry, brz, crx, cry, crz, volume,
1623  // d_bm1, d_bm2, d_bm3, 0 /* jBlock */, 0 /* kBlock */,
1624  // (bool) doEnergyVirial, &(d_energyVirial->energy),
1625  // d_energyVirial->virial, d_tran, stream);
1626  //cudaCheck(cudaStreamSynchronize(stream));
1627 
1628  // backward FFT
1629  //fprintf(stderr, "Calling cufftExecC2R\n");
1630  for (size_t iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1631  cufftCheck(cufftExecC2R(backwardPlans[iGrid],
1632  (cufftComplex *)(d_trans + iGrid * transize),
1633  (cufftReal *)(d_grids + iGrid * gridsize)));
1634  }
1635 
1636  //cufftCheck(cufftExecC2R(backwardPlan, (cufftComplex *)d_tran,
1637  // (cufftReal *)d_grid));
1638  //cudaCheck(cudaStreamSynchronize(stream));
1639 
1640  // gather force from grid to atoms
1641  // missing cudaTextureObject_t below works for __CUDA_ARCH__ >= 350
1642  //fprintf(stderr, "Calling gather_force\n");
1643  for (unsigned int iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1645  &(d_atoms[iGrid * natoms]), natoms, k1, k2, k3, k1, k2, k3,
1646  k1 /* xsize */, 0 /* jBlock */, 0 /* kBlock */,
1647  true /* pmeGrid.yBlocks == 1 */, true /* pmeGrid.zBlocks == 1 */,
1648  d_grids + iGrid * gridsize, order, d_forces + iGrid * natoms,
1649 #ifdef NAMD_CUDA
1650  gridTexObjArrays[iGrid] /* cudaTextureObject_t */,
1651 #endif
1652  stream);
1653  }
1654 
1655  //gather_force(d_atoms, natoms, k1, k2, k3, k1, k2, k3,
1656  // k1 /* xsize */, 0 /* jBlock */, 0 /* kBlock */,
1657  // true /* pmeGrid.yBlocks == 1 */, true /* pmeGrid.zBlocks == 1 */,
1658  // d_grid, order, d_force, gridTexObj /* cudaTextureObject_t */,
1659  // stream);
1660  //cudaCheck(cudaStreamSynchronize(stream));
1661 
1662  // Copy energy and virial to host if needed
1663  if (doEnergyVirial) {
1664  //fprintf(stderr, "Calling copy_DtoH on d_energyVirial\n");
1665  copy_DtoH<EnergyVirial>(d_energyVirials, h_energyVirials,
1667  //cudaCheck(cudaEventRecord(copyEnergyVirialEvent, stream));
1668  //cudaCheck(cudaStreamSynchronize(stream));
1669  }
1670 
1671  // XXX debugging, quick test for borked forces
1672  //clear_device_array<float3>(d_force, natoms, stream);
1673  if (sim_params.alchOn) {
1674  scaleAndMergeForce(m_step);
1675  }
1676 }
static Node * Object()
Definition: Node.h:86
void spread_charge_v2(const PatchLevelPmeData patchLevelPmeData, const float4 *atoms, const int numAtoms, const int nfftx, const int nffty, const int nfftz, const float nfftx_f, const float nffty_f, const float nfftz_f, const int order3, const int xsize, const int ysize, const int zsize, const int xdim, const int y00, const int z00, const bool periodicY, const bool periodicZ, float *data, const int order, cudaStream_t stream)
cufftHandle * backwardPlans
Definition: Vector.h:72
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
EnergyVirial * d_energyVirials
BigReal z
Definition: Vector.h:74
#define cufftCheck(stmt)
void scalar_sum(const bool orderXYZ, const int nfft1, const int nfft2, const int nfft3, const int size1, const int size2, const int size3, const double kappa, const float recip1x, const float recip1y, const float recip1z, const float recip2x, const float recip2y, const float recip2z, const float recip3x, const float recip3y, const float recip3z, const double volume, const float *prefac1, const float *prefac2, const float *prefac3, const int k2_00, const int k3_00, const bool doEnergyVirial, double *energy, double *virial, float2 *data, cudaStream_t stream)
#define WARPSIZE
Definition: CudaUtils.h:17
PatchLevelPmeData patchLevelPmeData
#define order
Definition: PmeRealSpace.C:235
int order
Definition: PmeBase.h:23
BigReal x
Definition: Vector.h:74
void gather_force(const PatchLevelPmeData patchLevelPmeData, const float4 *atoms, const int numAtoms, const int nfftx, const int nffty, const int nfftz, const int xsize, const int ysize, const int zsize, const int xdim, const int y00, const int z00, const bool periodicY, const bool periodicZ, const float *data, const int order, float3 *force, const cudaTextureObject_t gridTexObj, cudaStream_t stream)
NAMD_HOST_DEVICE BigReal volume(void) const
Definition: Lattice.h:293
NAMD_HOST_DEVICE Vector a_r() const
Definition: Lattice.h:284
NAMD_HOST_DEVICE Vector b_r() const
Definition: Lattice.h:285
cudaTextureObject_t * gridTexObjArrays
NAMD_HOST_DEVICE Vector c_r() const
Definition: Lattice.h:286
double compute_selfEnergy(double *d_selfEnergy, const float4 *d_atoms, int natoms, double ewaldcof, cudaStream_t stream)
int K3
Definition: PmeBase.h:21
void spread_charge(const float4 *atoms, const int numAtoms, const int nfftx, const int nffty, const int nfftz, const int xsize, const int ysize, const int zsize, const int xdim, const int y00, const int z00, const bool periodicY, const bool periodicZ, float *data, const int order, cudaStream_t stream)
BigReal y
Definition: Vector.h:74
#define cudaCheck(stmt)
Definition: CudaUtils.h:233
cufftHandle * forwardPlans
EnergyVirial * h_energyVirials

◆ computeSharedMemoryPatchLevelGatherForce()

int CudaPmeOneDevice::computeSharedMemoryPatchLevelGatherForce ( const int  numThreads,
const int3  patchGridDim,
const int  order 
)

Definition at line 2012 of file CudaPmeSolverUtil.C.

References PatchLevelPmeData::kThetaPad, and order.

Referenced by checkPatchLevelDeviceCompatibility().

2013  {
2014 
2015  const int gridBytes = patchGridDim.x * patchGridDim.y * patchGridDim.z * sizeof(float);
2016  const int thetaBytes = (numThreads + PatchLevelPmeData::kThetaPad) * order *
2017  2 /* theta and dtheta */ * sizeof(float);
2018 
2019  return gridBytes + thetaBytes;
2020 }
#define order
Definition: PmeRealSpace.C:235
static constexpr int kThetaPad

◆ computeSharedMemoryPatchLevelSpreadCharge()

int CudaPmeOneDevice::computeSharedMemoryPatchLevelSpreadCharge ( const int  numThreads,
const int3  patchGridDim,
const int  order 
)

Definition at line 2001 of file CudaPmeSolverUtil.C.

References PatchLevelPmeData::kDim, PatchLevelPmeData::kThetaPad, and order.

Referenced by checkPatchLevelDeviceCompatibility().

2002  {
2003 
2004  const int gridBytes = patchGridDim.x * patchGridDim.y * patchGridDim.z * sizeof(float);
2005  const int thetaBytes = PatchLevelPmeData::kDim * (numThreads + PatchLevelPmeData::kThetaPad) *
2006  order * sizeof(float);
2007  const int indexBytes = numThreads * sizeof(char4);
2008 
2009  return gridBytes + thetaBytes + indexBytes;
2010 }
#define order
Definition: PmeRealSpace.C:235
static constexpr int kDim
static constexpr int kThetaPad

◆ finishReduction()

void CudaPmeOneDevice::finishReduction ( bool  doEnergyVirial)

Definition at line 1680 of file CudaPmeSolverUtil.C.

References SimParameters::alchFepOn, SimParameters::alchOn, SimParameters::alchThermIntOn, cudaCheck, deviceID, CudaPmeOneDevice::EnergyVirial::energy, h_energyVirials, m_step, Node::Object(), PatchData::reduction, REDUCTION_ELECT_ENERGY_SLOW, REDUCTION_ELECT_ENERGY_SLOW_F, REDUCTION_ELECT_ENERGY_SLOW_TI_1, REDUCTION_ELECT_ENERGY_SLOW_TI_2, selfEnergy, selfEnergy_FEP, selfEnergy_TI_1, selfEnergy_TI_2, Node::simParameters, stream, and CudaPmeOneDevice::EnergyVirial::virial.

1682  {
1683  cudaCheck(cudaStreamSynchronize(stream));
1684  if(doEnergyVirial){
1685  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1686  PatchData* patchData = cpdata.ckLocalBranch();
1687  NodeReduction *reduction = patchData->reduction;
1688  cudaCheck(cudaSetDevice(deviceID));
1689  double virial[9];
1690  double energy, energy_F, energy_TI_1, energy_TI_2;
1691  const SimParameters& sim_params = *(Node::Object()->simParameters);
1692  if (sim_params.alchOn) {
1693  if (sim_params.alchFepOn) {
1694  scaleAndComputeFEPEnergyVirials(h_energyVirials, m_step, energy, energy_F, virial);
1695  energy += selfEnergy;
1696  energy_F += selfEnergy_FEP;
1697  }
1698  if (sim_params.alchThermIntOn) {
1699  scaleAndComputeTIEnergyVirials(h_energyVirials, m_step, energy, energy_TI_1, energy_TI_2, virial);
1700  energy += selfEnergy;
1701  energy_TI_1 += selfEnergy_TI_1;
1702  energy_TI_2 += selfEnergy_TI_2;
1703  }
1704  } else {
1705  virial[0] = h_energyVirials[0].virial[0];
1706  virial[1] = h_energyVirials[0].virial[1];
1707  virial[2] = h_energyVirials[0].virial[2];
1708  virial[3] = h_energyVirials[0].virial[1];
1709  virial[4] = h_energyVirials[0].virial[3];
1710  virial[5] = h_energyVirials[0].virial[4];
1711  virial[6] = h_energyVirials[0].virial[2];
1712  virial[7] = h_energyVirials[0].virial[4];
1713  virial[8] = h_energyVirials[0].virial[5];
1714  energy = h_energyVirials[0].energy + selfEnergy;
1715  }
1716  #if 0
1717  fprintf(stderr, "PME ENERGY = %g %g\n", h_energyVirials[0].energy, selfEnergy );
1718  fprintf(stderr, "PME VIRIAL =\n"
1719  " %g %g %g\n %g %g %g\n %g %g %g\n",
1720  virial[0], virial[1], virial[2], virial[3], virial[4],
1721  virial[5], virial[6], virial[7], virial[8]);
1722  #endif
1723  (*reduction)[REDUCTION_VIRIAL_SLOW_XX] += virial[0];
1724  (*reduction)[REDUCTION_VIRIAL_SLOW_XY] += virial[1];
1725  (*reduction)[REDUCTION_VIRIAL_SLOW_XZ] += virial[2];
1726  (*reduction)[REDUCTION_VIRIAL_SLOW_YX] += virial[3];
1727  (*reduction)[REDUCTION_VIRIAL_SLOW_YY] += virial[4];
1728  (*reduction)[REDUCTION_VIRIAL_SLOW_YZ] += virial[5];
1729  (*reduction)[REDUCTION_VIRIAL_SLOW_ZX] += virial[6];
1730  (*reduction)[REDUCTION_VIRIAL_SLOW_ZY] += virial[7];
1731  (*reduction)[REDUCTION_VIRIAL_SLOW_ZZ] += virial[8];
1732  (*reduction)[REDUCTION_ELECT_ENERGY_SLOW] += energy;
1733  if (sim_params.alchFepOn) {
1734  (*reduction)[REDUCTION_ELECT_ENERGY_SLOW_F] += energy_F;
1735  }
1736  if (sim_params.alchThermIntOn) {
1737  (*reduction)[REDUCTION_ELECT_ENERGY_SLOW_TI_1] += energy_TI_1;
1738  (*reduction)[REDUCTION_ELECT_ENERGY_SLOW_TI_2] += energy_TI_2;
1739  }
1740  }
1741 }
static Node * Object()
Definition: Node.h:86
SimParameters * simParameters
Definition: Node.h:181
NodeReduction * reduction
Definition: PatchData.h:133
#define cudaCheck(stmt)
Definition: CudaUtils.h:233
EnergyVirial * h_energyVirials

◆ getShiftedGrid()

int CudaPmeOneDevice::getShiftedGrid ( const double  x,
const int  grid 
)

Definition at line 1995 of file CudaPmeSolverUtil.C.

Referenced by checkPatchLevelLatticeCompatibilityAndComputeOffsets().

1995  {
1996  double w = x + 0.5;
1997  double gw = w * grid;
1998  return floor(gw);
1999 }

Member Data Documentation

◆ backwardPlans

cufftHandle* CudaPmeOneDevice::backwardPlans

Definition at line 238 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ currentLattice

Lattice CudaPmeOneDevice::currentLattice

◆ d_atoms

float4* CudaPmeOneDevice::d_atoms

Definition at line 219 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_bm1

float* CudaPmeOneDevice::d_bm1

Definition at line 241 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_bm2

float* CudaPmeOneDevice::d_bm2

Definition at line 242 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_bm3

float* CudaPmeOneDevice::d_bm3

Definition at line 243 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_energyVirials

EnergyVirial* CudaPmeOneDevice::d_energyVirials

Definition at line 251 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_forces

float3* CudaPmeOneDevice::d_forces

Definition at line 221 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_grids

float* CudaPmeOneDevice::d_grids

on device grid of charge before forward FFT R->C, then grid of potential after backward FFT C->R

Definition at line 227 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_partition

int* CudaPmeOneDevice::d_partition

Definition at line 220 of file CudaPmeSolverUtil.h.

Referenced by CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_scaling_factors

float* CudaPmeOneDevice::d_scaling_factors

Definition at line 222 of file CudaPmeSolverUtil.h.

Referenced by CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_selfEnergy

double* CudaPmeOneDevice::d_selfEnergy

Definition at line 256 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_selfEnergy_FEP

double* CudaPmeOneDevice::d_selfEnergy_FEP

Definition at line 257 of file CudaPmeSolverUtil.h.

Referenced by CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_selfEnergy_TI_1

double* CudaPmeOneDevice::d_selfEnergy_TI_1

Definition at line 258 of file CudaPmeSolverUtil.h.

Referenced by CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_selfEnergy_TI_2

double* CudaPmeOneDevice::d_selfEnergy_TI_2

Definition at line 259 of file CudaPmeSolverUtil.h.

Referenced by CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_trans

float2* CudaPmeOneDevice::d_trans

on device FFT transformation to complex

Definition at line 231 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ deviceID

int CudaPmeOneDevice::deviceID

◆ deviceIndex

int CudaPmeOneDevice::deviceIndex

Definition at line 213 of file CudaPmeSolverUtil.h.

Referenced by CudaPmeOneDevice().

◆ force_scaling_alch_first_time

bool CudaPmeOneDevice::force_scaling_alch_first_time

Definition at line 255 of file CudaPmeSolverUtil.h.

◆ forwardPlans

cufftHandle* CudaPmeOneDevice::forwardPlans

Definition at line 237 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ gridsize

size_t CudaPmeOneDevice::gridsize

Definition at line 233 of file CudaPmeSolverUtil.h.

Referenced by compute(), and CudaPmeOneDevice().

◆ gridTexObjArrays

cudaTextureObject_t* CudaPmeOneDevice::gridTexObjArrays

Definition at line 224 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ h_energyVirials

EnergyVirial* CudaPmeOneDevice::h_energyVirials

Definition at line 252 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), finishReduction(), and ~CudaPmeOneDevice().

◆ kappa

double CudaPmeOneDevice::kappa

Definition at line 245 of file CudaPmeSolverUtil.h.

Referenced by compute().

◆ m_step

int CudaPmeOneDevice::m_step

Definition at line 264 of file CudaPmeSolverUtil.h.

Referenced by compute(), and finishReduction().

◆ natoms

int CudaPmeOneDevice::natoms

Definition at line 216 of file CudaPmeSolverUtil.h.

Referenced by compute(), and CudaPmeOneDevice().

◆ num_used_grids

size_t CudaPmeOneDevice::num_used_grids

Definition at line 217 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ patchLevelPmeData

PatchLevelPmeData CudaPmeOneDevice::patchLevelPmeData

◆ pmeGrid

PmeGrid CudaPmeOneDevice::pmeGrid

◆ self_energy_alch_first_time

bool CudaPmeOneDevice::self_energy_alch_first_time

Definition at line 254 of file CudaPmeSolverUtil.h.

◆ selfEnergy

double CudaPmeOneDevice::selfEnergy

Definition at line 260 of file CudaPmeSolverUtil.h.

Referenced by compute(), and finishReduction().

◆ selfEnergy_FEP

double CudaPmeOneDevice::selfEnergy_FEP

Definition at line 261 of file CudaPmeSolverUtil.h.

Referenced by finishReduction().

◆ selfEnergy_TI_1

double CudaPmeOneDevice::selfEnergy_TI_1

Definition at line 262 of file CudaPmeSolverUtil.h.

Referenced by finishReduction().

◆ selfEnergy_TI_2

double CudaPmeOneDevice::selfEnergy_TI_2

Definition at line 263 of file CudaPmeSolverUtil.h.

Referenced by finishReduction().

◆ stream

cudaStream_t CudaPmeOneDevice::stream

Definition at line 214 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), finishReduction(), and ~CudaPmeOneDevice().

◆ transize

size_t CudaPmeOneDevice::transize

Definition at line 234 of file CudaPmeSolverUtil.h.

Referenced by compute(), and CudaPmeOneDevice().


The documentation for this class was generated from the following files: