NAMD
Classes | Public Member Functions | Public Attributes | List of all members
CudaPmeOneDevice Class Reference

#include <CudaPmeSolverUtil.h>

Classes

struct  EnergyVirial
 

Public Member Functions

 CudaPmeOneDevice (PmeGrid pmeGrid_, int deviceID_, int deviceIndex_)
 
 ~CudaPmeOneDevice ()
 
void compute (const Lattice &lattice, int doEnergyVirial, int step)
 
void finishReduction (bool doEnergyVirial)
 
int getShiftedGrid (const double x, const int grid)
 
int computeSharedMemoryPatchLevelSpreadCharge (const int numThreads, const int3 patchGridDim, const int order)
 
int computeSharedMemoryPatchLevelGatherForce (const int numThreads, const int3 patchGridDim, const int order)
 
void checkPatchLevelSimParamCompatibility (const int order, const bool periodicY, const bool periodicZ)
 
void checkPatchLevelDeviceCompatibility ()
 
void checkPatchLevelLatticeCompatibilityAndComputeOffsets (const Lattice &lattice, const int numPatches, const CudaLocalRecord *localRecords, double3 *patchMin, double3 *patchMax, double3 *awayDists)
 

Public Attributes

PmeGrid pmeGrid
 
int deviceID
 
int deviceIndex
 
cudaStream_t stream
 
int natoms
 
size_t num_used_grids
 
float4 * d_atoms
 
int * d_partition
 
float3 * d_forces
 
float * d_scaling_factors
 
cudaTextureObject_t * gridTexObjArrays
 
float * d_grids
 
float2 * d_trans
 
size_t gridsize
 
size_t transize
 
cufftHandle * forwardPlans
 
cufftHandle * backwardPlans
 
float * d_bm1
 
float * d_bm2
 
float * d_bm3
 
double kappa
 
EnergyViriald_energyVirials
 
EnergyVirialh_energyVirials
 
bool self_energy_alch_first_time
 
bool force_scaling_alch_first_time
 
double * d_selfEnergy
 
double * d_selfEnergy_FEP
 
double * d_selfEnergy_TI_1
 
double * d_selfEnergy_TI_2
 
double selfEnergy
 
double selfEnergy_FEP
 
double selfEnergy_TI_1
 
double selfEnergy_TI_2
 
int m_step
 
PatchLevelPmeData patchLevelPmeData
 
Lattice currentLattice
 

Detailed Description

PME for single GPU case, where data persists on GPU calls real space, FFT, and K space parts receives atom and charge data as float4 * allocated on device returns force data as float3 * allocated on device returns energy and virial allocated on device

Definition at line 209 of file CudaPmeSolverUtil.h.

Constructor & Destructor Documentation

◆ CudaPmeOneDevice()

CudaPmeOneDevice::CudaPmeOneDevice ( PmeGrid  pmeGrid_,
int  deviceID_,
int  deviceIndex_ 
)

Definition at line 1300 of file CudaPmeSolverUtil.C.

References SimParameters::alchFepOn, SimParameters::alchGetNumOfPMEGrids(), SimParameters::alchOn, SimParameters::alchThermIntOn, backwardPlans, checkPatchLevelDeviceCompatibility(), checkPatchLevelSimParamCompatibility(), compute_b_moduli(), cudaCheck, SimParameters::CUDASOAintegrateMode, cufftCheck, d_atoms, d_bm1, d_bm2, d_bm3, d_energyVirials, d_forces, d_grids, d_partition, d_scaling_factors, d_selfEnergy, d_selfEnergy_FEP, d_selfEnergy_TI_1, d_selfEnergy_TI_2, d_trans, deviceID, deviceIndex, forwardPlans, gridsize, gridTexObjArrays, h_energyVirials, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, Node::molecule, NAMD_bug(), natoms, num_used_grids, Molecule::numAtoms, Node::Object(), ReductionMgr::Object(), PmeGrid::order, order, pmeGrid, REDUCTIONS_GPURESIDENT, Node::simParameters, stream, transize, and ReductionMgr::willSubmit().

1304  :
1305  pmeGrid(pmeGrid_), deviceID(deviceID_), deviceIndex(deviceIndex_),
1306  natoms(0), d_atoms(0), d_forces(0),
1307  d_grids(0), gridsize(0),
1308  d_trans(0), transize(0),
1309  d_bm1(0), d_bm2(0), d_bm3(0),
1314 {
1315 // fprintf(stderr, "CudaPmeOneDevice constructor START ******************************************\n");
1316  const SimParameters& sim_params = *(Node::Object()->simParameters);
1318  // Determine how many grids we need for the alchemical route
1319  if (sim_params.alchOn) {
1320  num_used_grids = sim_params.alchGetNumOfPMEGrids();
1321  } else {
1322  num_used_grids = 1;
1323  }
1324  cudaCheck(cudaSetDevice(deviceID));
1325 
1326  // Check to see if the simulation and device is compatible with patch-level kernels. The results
1327  // will be worked in the PatchLevelPmeData field
1328  checkPatchLevelSimParamCompatibility(pmeGrid.order, true /* periodic Y */, true /* periodic Z */);
1330 
1331  if (!sim_params.CUDASOAintegrateMode) {
1332  NAMD_bug("CudaPmeOneDevice requires GPU-resident mode");
1333  }
1334  reductionGpuResident = ReductionMgr::Object()->willSubmit(REDUCTIONS_GPURESIDENT);
1335 
1336  // create our own CUDA stream
1337 #if CUDA_VERSION >= 5050 || defined(NAMD_HIP)
1338  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1339  int leastPriority, greatestPriority;
1340  cudaCheck(cudaDeviceGetStreamPriorityRange(&leastPriority, &greatestPriority));
1341  cudaCheck(cudaStreamCreateWithPriority(&stream, cudaStreamDefault, greatestPriority));
1342 #else
1343  cudaCheck(cudaStreamCreate(&stream));
1344 #endif
1345 
1346  allocate_host<EnergyVirial>(&h_energyVirials, num_used_grids);
1347  allocate_device<EnergyVirial>(&d_energyVirials, num_used_grids);
1348  allocate_device<float>(&d_scaling_factors, num_used_grids);
1349  allocate_device<double>(&d_selfEnergy, 1);
1350  if (sim_params.alchFepOn) {
1351  allocate_device<double>(&d_selfEnergy_FEP, 1);
1352  } else {
1353  d_selfEnergy_FEP = NULL;
1354  }
1355  if (sim_params.alchThermIntOn) {
1356  allocate_device<double>(&d_selfEnergy_TI_1, 1);
1357  allocate_device<double>(&d_selfEnergy_TI_2, 1);
1358  } else {
1359  d_selfEnergy_TI_1 = NULL;
1360  d_selfEnergy_TI_2 = NULL;
1361  }
1362 
1363  // create device buffer space for atom positions and forces
1364  // to be accessed externally through PatchData
1365  allocate_device<float4>(&d_atoms, num_used_grids * natoms);
1366  allocate_device<float3>(&d_forces, num_used_grids * natoms);
1367  if (sim_params.alchOn) {
1368  allocate_device<int>(&d_partition, natoms);
1369  } else {
1370  d_partition = NULL;
1371  }
1372 #ifdef NODEGROUP_FORCE_REGISTER
1373  DeviceData& devData = cpdata.ckLocalBranch()->devData[deviceIndex];
1374  devData.s_datoms = (CudaAtom *) (d_atoms);
1375  devData.f_slow = (CudaForce *) (d_forces);
1376  devData.f_slow_size = natoms;
1377  devData.s_datoms_partition = d_partition;
1378 #endif
1379  int k1 = pmeGrid.K1;
1380  int k2 = pmeGrid.K2;
1381  int k3 = pmeGrid.K3;
1382  int order = pmeGrid.order;
1383  gridsize = k1 * k2 * k3;
1384  transize = (k1/2 + 1) * k2 * k3;
1385 
1386 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1387 
1388  // set up cufft
1389  forwardPlans = new cufftHandle[num_used_grids];
1390  backwardPlans = new cufftHandle[num_used_grids];
1391  for (size_t iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1392  cufftCheck(cufftPlan3d(&(forwardPlans[iGrid]), k3, k2, k1, CUFFT_R2C));
1393  cufftCheck(cufftPlan3d(&(backwardPlans[iGrid]), k3, k2, k1, CUFFT_C2R));
1394  cufftCheck(cufftSetStream(forwardPlans[iGrid], stream));
1395  cufftCheck(cufftSetStream(backwardPlans[iGrid], stream));
1396  }
1397 #endif
1398 
1399 #ifdef NAMD_CUDA
1400  cudaDeviceProp deviceProp;
1401  cudaCheck(cudaGetDeviceProperties(&deviceProp, deviceID));
1402  const int texture_alignment = int(deviceProp.textureAlignment);
1403  // d_grids and d_grids + N * gridsize will be used as device pointers for ::cudaResourceDesc::res::linear::devPtr
1404  // check if (d_grids + N * gridsize) is an address aligned to ::cudaDeviceProp::textureAlignment
1405  // which is required by cudaCreateTextureObject()
1406  // or maybe I should use cudaMallocPitch()?
1407  if ((gridsize % texture_alignment) != 0) {
1408  // if it is not aligned, padding is required
1409  gridsize = (int(gridsize / texture_alignment) + 1) * texture_alignment;
1410  }
1411  // Is it necesary to align transize too?
1412 // if ((transize % texture_alignment) != 0) {
1413 // // if it is not aligned, padding is required
1414 // transize = (int(transize / texture_alignment) + 1) * texture_alignment;
1415 // }
1416  allocate_device<float>(&d_grids, num_used_grids * gridsize);
1417  allocate_device<float2>(&d_trans, num_used_grids * transize);
1418  gridTexObjArrays = new cudaTextureObject_t[num_used_grids];
1419  for (size_t iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1420  // set up texture object
1421  cudaResourceDesc resDesc;
1422  memset(&resDesc, 0, sizeof(resDesc));
1423  resDesc.resType = cudaResourceTypeLinear;
1424  resDesc.res.linear.devPtr = (void*)(d_grids + iGrid * (size_t)gridsize);
1425  resDesc.res.linear.desc.f = cudaChannelFormatKindFloat;
1426  resDesc.res.linear.desc.x = sizeof(float)*8;
1427  resDesc.res.linear.sizeInBytes = gridsize*sizeof(float);
1428  cudaTextureDesc texDesc;
1429  memset(&texDesc, 0, sizeof(texDesc));
1430  texDesc.readMode = cudaReadModeElementType;
1431  cudaCheck(cudaCreateTextureObject(&(gridTexObjArrays[iGrid]), &resDesc, &texDesc, NULL));
1432  }
1433 #else
1434  allocate_device<float>(&d_grids, num_used_grids * gridsize);
1435  allocate_device<float2>(&d_trans, num_used_grids * transize);
1436 #endif
1437  // calculate prefactors
1438  double *bm1 = new double[k1];
1439  double *bm2 = new double[k2];
1440  double *bm3 = new double[k3];
1441  // Use compute_b_moduli from PmeKSpace.C
1442  extern void compute_b_moduli(double *bm, int k, int order);
1443  compute_b_moduli(bm1, k1, order);
1444  compute_b_moduli(bm2, k2, order);
1445  compute_b_moduli(bm3, k3, order);
1446 
1447  // allocate space for and copy prefactors onto GPU
1448  float *bm1f = new float[k1];
1449  float *bm2f = new float[k2];
1450  float *bm3f = new float[k3];
1451  for (int i=0; i < k1; i++) bm1f[i] = (float) bm1[i];
1452  for (int i=0; i < k2; i++) bm2f[i] = (float) bm2[i];
1453  for (int i=0; i < k3; i++) bm3f[i] = (float) bm3[i];
1454  allocate_device<float>(&d_bm1, k1);
1455  allocate_device<float>(&d_bm2, k2);
1456  allocate_device<float>(&d_bm3, k3);
1457  copy_HtoD_sync<float>(bm1f, d_bm1, k1);
1458  copy_HtoD_sync<float>(bm2f, d_bm2, k2);
1459  copy_HtoD_sync<float>(bm3f, d_bm3, k3);
1460  delete [] bm1f;
1461  delete [] bm2f;
1462  delete [] bm3f;
1463  delete [] bm1;
1464  delete [] bm2;
1465  delete [] bm3;
1466 
1467  cudaCheck(cudaStreamSynchronize(stream));
1468 
1469 // fprintf(stderr, "CudaPmeOneDevice constructor END ********************************************\n");
1470 }
static Node * Object()
Definition: Node.h:86
cufftHandle * backwardPlans
void compute_b_moduli(double *bm, int K, int order)
Definition: PmeKSpace.C:42
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
Bool CUDASOAintegrateMode
EnergyVirial * d_energyVirials
#define cufftCheck(stmt)
SubmitReduction * willSubmit(int setID, int size=-1)
Definition: ReductionMgr.C:368
static ReductionMgr * Object(void)
Definition: ReductionMgr.h:290
void checkPatchLevelSimParamCompatibility(const int order, const bool periodicY, const bool periodicZ)
#define order
Definition: PmeRealSpace.C:235
int order
Definition: PmeBase.h:23
void NAMD_bug(const char *err_msg)
Definition: common.C:195
int numAtoms
Definition: Molecule.h:586
cudaTextureObject_t * gridTexObjArrays
int K3
Definition: PmeBase.h:21
#define cudaCheck(stmt)
Definition: CudaUtils.h:233
cufftHandle * forwardPlans
size_t alchGetNumOfPMEGrids() const
Molecule * molecule
Definition: Node.h:179
EnergyVirial * h_energyVirials
void checkPatchLevelDeviceCompatibility()

◆ ~CudaPmeOneDevice()

CudaPmeOneDevice::~CudaPmeOneDevice ( )

Definition at line 1472 of file CudaPmeSolverUtil.C.

References backwardPlans, cudaCheck, cufftCheck, d_atoms, d_bm1, d_bm2, d_bm3, d_energyVirials, d_forces, d_grids, d_partition, PatchLevelPmeData::d_patchGridOffsets, d_scaling_factors, d_selfEnergy, d_selfEnergy_FEP, d_selfEnergy_TI_1, d_selfEnergy_TI_2, d_trans, forwardPlans, gridTexObjArrays, h_energyVirials, PatchLevelPmeData::h_patchGridOffsets, num_used_grids, patchLevelPmeData, and stream.

1472  {
1473  deallocate_device<float4>(&d_atoms);
1474  deallocate_device<float3>(&d_forces);
1475  deallocate_device<float2>(&d_trans);
1476  deallocate_device<float>(&d_grids);
1477  deallocate_host<EnergyVirial>(&h_energyVirials);
1478  deallocate_device<EnergyVirial>(&d_energyVirials);
1479  deallocate_device<float>(&d_scaling_factors);
1480 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1481  for (size_t iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1482  cufftCheck(cufftDestroy(forwardPlans[iGrid]));
1483  cufftCheck(cufftDestroy(backwardPlans[iGrid]));
1484 #if defined(NAMD_CUDA) // only CUDA uses texture objects
1485  cudaCheck(cudaDestroyTextureObject(gridTexObjArrays[iGrid]));
1486 #endif
1487  }
1488 
1489  if (patchLevelPmeData.h_patchGridOffsets != nullptr) {
1490  deallocate_host<int3>(&patchLevelPmeData.h_patchGridOffsets);
1491  }
1492  if (patchLevelPmeData.d_patchGridOffsets != nullptr) {
1493  deallocate_device<int3>(&patchLevelPmeData.d_patchGridOffsets);
1494  }
1495 
1496  delete[] forwardPlans;
1497  delete[] backwardPlans;
1498 #if defined(NAMD_CUDA) // only CUDA uses texture objects
1499  delete[] gridTexObjArrays;
1500 #endif
1501 
1502 
1503 #endif
1504  deallocate_device<double>(&d_selfEnergy);
1505  if (d_partition != NULL) deallocate_device<int>(&d_partition);
1506  if (d_selfEnergy_FEP != NULL) deallocate_device<double>(&d_selfEnergy_FEP);
1507  if (d_selfEnergy_TI_1 != NULL) deallocate_device<double>(&d_selfEnergy_TI_1);
1508  if (d_selfEnergy_TI_2 != NULL) deallocate_device<double>(&d_selfEnergy_TI_2);
1509  deallocate_device<float>(&d_bm1);
1510  deallocate_device<float>(&d_bm2);
1511  deallocate_device<float>(&d_bm3);
1512  cudaCheck(cudaStreamDestroy(stream));
1513 
1514  if (reductionGpuResident) {
1515  delete reductionGpuResident;
1516  }
1517 }
cufftHandle * backwardPlans
EnergyVirial * d_energyVirials
#define cufftCheck(stmt)
PatchLevelPmeData patchLevelPmeData
cudaTextureObject_t * gridTexObjArrays
#define cudaCheck(stmt)
Definition: CudaUtils.h:233
cufftHandle * forwardPlans
EnergyVirial * h_energyVirials

Member Function Documentation

◆ checkPatchLevelDeviceCompatibility()

void CudaPmeOneDevice::checkPatchLevelDeviceCompatibility ( )

Definition at line 2042 of file CudaPmeSolverUtil.C.

References computeSharedMemoryPatchLevelGatherForce(), computeSharedMemoryPatchLevelSpreadCharge(), PatchLevelPmeData::deviceCompatible, deviceID, PatchLevelPmeData::deviceMaxSharedBytes, PatchLevelPmeData::gatherForceSharedBytes, PatchLevelPmeData::kNumThreads, PatchLevelPmeData::kPatchGridDim, PatchLevelPmeData::kPatchGridDimPad, patchLevelPmeData, and PatchLevelPmeData::spreadChargeSharedBytes.

Referenced by CudaPmeOneDevice().

2042  {
2043  cudaDeviceGetAttribute(&patchLevelPmeData.deviceMaxSharedBytes, cudaDevAttrMaxSharedMemoryPerBlockOptin, deviceID);
2044 
2045  const int3 constexprPatchGridDim = make_int3(
2049 
2052  constexprPatchGridDim, 8 /* order */);
2055  constexprPatchGridDim, 8 /* order */);
2056 
2060 }
static constexpr int kNumThreads
PatchLevelPmeData patchLevelPmeData
int computeSharedMemoryPatchLevelSpreadCharge(const int numThreads, const int3 patchGridDim, const int order)
static constexpr int kPatchGridDimPad
static constexpr int kPatchGridDim
int computeSharedMemoryPatchLevelGatherForce(const int numThreads, const int3 patchGridDim, const int order)

◆ checkPatchLevelLatticeCompatibilityAndComputeOffsets()

void CudaPmeOneDevice::checkPatchLevelLatticeCompatibilityAndComputeOffsets ( const Lattice lattice,
const int  numPatches,
const CudaLocalRecord localRecords,
double3 *  patchMin,
double3 *  patchMax,
double3 *  awayDists 
)

Definition at line 2062 of file CudaPmeSolverUtil.C.

References Lattice::a(), Lattice::a_r(), Lattice::b(), Lattice::b_r(), Lattice::c(), Lattice::c_r(), currentLattice, PatchLevelPmeData::d_patchGridOffsets, PatchLevelPmeData::deviceCompatible, getShiftedGrid(), PatchLevelPmeData::h_patchGridOffsets, Lattice::isEqual(), PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, PatchLevelPmeData::kPatchGridDim, PatchLevelPmeData::latticeCompatible, PatchLevelPmeData::localRecords, PatchLevelPmeData::numPatches, Node::Object(), PmeGrid::order, order, PatchLevelPmeData::patchGridDim, patchLevelPmeData, pmeGrid, Lattice::scale(), Node::simParameters, simParams, PatchLevelPmeData::simulationCompatible, Vector::unit(), Lattice::unscale(), and Vector::x.

2064  {
2065 
2066  patchLevelPmeData.localRecords = localRecords;
2067 
2068  // If the simulation isn't compatible or the device isn't compatible then no point in checking
2069  // patch sizes
2071 
2072  patchLevelPmeData.numPatches = numPatches;
2073 
2074  if (patchLevelPmeData.h_patchGridOffsets == nullptr) {
2075  allocate_host<int3>(&patchLevelPmeData.h_patchGridOffsets, numPatches);
2076  }
2077  if (patchLevelPmeData.d_patchGridOffsets == nullptr) {
2078  allocate_device<int3>(&patchLevelPmeData.d_patchGridOffsets, numPatches);
2079  }
2080 
2082  const int order = pmeGrid.order;
2083 
2084  // We only need to recompute the grid offsets if the lattice has changed
2085  if (!lattice.isEqual(currentLattice)) {
2086  currentLattice = lattice;
2087 
2088  double sysdima = currentLattice.a_r().unit() * currentLattice.a();
2089  double sysdimb = currentLattice.b_r().unit() * currentLattice.b();
2090  double sysdimc = currentLattice.c_r().unit() * currentLattice.c();
2091 
2092  patchLevelPmeData.patchGridDim = make_int3(0,0,0);
2093 
2094  for (int i = 0; i < numPatches; i++) {
2095  double3 pmin = currentLattice.unscale(patchMin[i]);
2096  double3 pmax = currentLattice.unscale(patchMax[i]);
2097  double3 width = pmax - pmin;
2098 
2099  // Logic copied from margin violation check
2100  double3 marginVal;
2101  marginVal.x = 0.5 * (awayDists[i].x - simParams->cutoff / sysdima);
2102  marginVal.y = 0.5 * (awayDists[i].y - simParams->cutoff / sysdimb);
2103  marginVal.z = 0.5 * (awayDists[i].z - simParams->cutoff / sysdimc);
2104  marginVal = currentLattice.unscale(marginVal);
2105 
2106  double3 minAtom = pmin - marginVal;
2107  double3 maxAtom = pmax + marginVal;
2108 
2109  double3 minScaled = currentLattice.scale(minAtom);
2110  double3 maxScaled = currentLattice.scale(maxAtom);
2111 
2112  int3 gridMin;
2113  gridMin.x = getShiftedGrid(minScaled.x, pmeGrid.K1);
2114  gridMin.y = getShiftedGrid(minScaled.y, pmeGrid.K2);
2115  gridMin.z = getShiftedGrid(minScaled.z, pmeGrid.K3);
2116 
2117  int3 gridMax;
2118  gridMax.x = getShiftedGrid(maxScaled.x, pmeGrid.K1);
2119  gridMax.y = getShiftedGrid(maxScaled.y, pmeGrid.K2);
2120  gridMax.z = getShiftedGrid(maxScaled.z, pmeGrid.K3);
2121 
2122  int3 gridWidth;
2123  gridWidth.x = gridMax.x - gridMin.x + order;
2124  gridWidth.y = gridMax.y - gridMin.y + order;
2125  gridWidth.z = gridMax.z - gridMin.z + order;
2126 
2128  patchLevelPmeData.patchGridDim.x = std::max(patchLevelPmeData.patchGridDim.x, gridWidth.x);
2129  patchLevelPmeData.patchGridDim.y = std::max(patchLevelPmeData.patchGridDim.y, gridWidth.y);
2130  patchLevelPmeData.patchGridDim.z = std::max(patchLevelPmeData.patchGridDim.z, gridWidth.z);
2131  }
2133  numPatches, nullptr);
2134  cudaStreamSynchronize(nullptr);
2135  const int maxGridPoints = patchLevelPmeData.patchGridDim.x *
2137 
2142  }
2143 }
static Node * Object()
Definition: Node.h:86
bool isEqual(const Lattice &other) const
Definition: Lattice.h:298
NAMD_HOST_DEVICE Vector c() const
Definition: Lattice.h:270
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
NAMD_HOST_DEVICE Position unscale(ScaledPosition s) const
Definition: Lattice.h:77
PatchLevelPmeData patchLevelPmeData
#define order
Definition: PmeRealSpace.C:235
int order
Definition: PmeBase.h:23
NAMD_HOST_DEVICE ScaledPosition scale(Position p) const
Definition: Lattice.h:83
BigReal x
Definition: Vector.h:74
NAMD_HOST_DEVICE Vector a_r() const
Definition: Lattice.h:284
NAMD_HOST_DEVICE Vector b_r() const
Definition: Lattice.h:285
NAMD_HOST_DEVICE Vector c_r() const
Definition: Lattice.h:286
NAMD_HOST_DEVICE Vector b() const
Definition: Lattice.h:269
#define simParams
Definition: Output.C:131
int K3
Definition: PmeBase.h:21
const CudaLocalRecord * localRecords
static constexpr int kPatchGridDim
NAMD_HOST_DEVICE Vector a() const
Definition: Lattice.h:268
NAMD_HOST_DEVICE Vector unit(void) const
Definition: Vector.h:215
int getShiftedGrid(const double x, const int grid)

◆ checkPatchLevelSimParamCompatibility()

void CudaPmeOneDevice::checkPatchLevelSimParamCompatibility ( const int  order,
const bool  periodicY,
const bool  periodicZ 
)

Definition at line 2031 of file CudaPmeSolverUtil.C.

References deviceCUDA, DeviceCUDA::getNumDevice(), order, patchLevelPmeData, and PatchLevelPmeData::simulationCompatible.

Referenced by CudaPmeOneDevice().

2031  {
2032  bool use = true;
2033  use = use && (order == 8);
2034  use = use && (periodicY);
2035  use = use && (periodicZ);
2036 
2037  use = use && (deviceCUDA->getNumDevice() == 1); // This is only supported for single GPU currently
2038 
2040 }
int getNumDevice()
Definition: DeviceCUDA.h:125
PatchLevelPmeData patchLevelPmeData
#define order
Definition: PmeRealSpace.C:235
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23

◆ compute()

void CudaPmeOneDevice::compute ( const Lattice lattice,
int  doEnergyVirial,
int  step 
)

Definition at line 1519 of file CudaPmeSolverUtil.C.

References Lattice::a_r(), SimParameters::alchOn, Lattice::b_r(), backwardPlans, Lattice::c_r(), compute_selfEnergy(), cudaCheck, cufftCheck, d_atoms, d_bm1, d_bm2, d_bm3, d_energyVirials, d_forces, d_grids, d_selfEnergy, d_trans, deviceID, SimParameters::firstTimestep, forwardPlans, gather_force(), gridsize, gridTexObjArrays, h_energyVirials, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, kappa, m_step, natoms, num_used_grids, Node::Object(), PmeGrid::order, order, patchLevelPmeData, pmeGrid, scalar_sum(), selfEnergy, Node::simParameters, spread_charge(), spread_charge_v2(), stream, transize, Lattice::volume(), WARPSIZE, Vector::x, Vector::y, and Vector::z.

1530  {
1531 // fprintf(stderr, "CudaPmeOneDevice compute ****************************************************\n");
1532  int k1 = pmeGrid.K1;
1533  int k2 = pmeGrid.K2;
1534  int k3 = pmeGrid.K3;
1535  int order = pmeGrid.order;
1536  double volume = lattice.volume();
1537  Vector a_r = lattice.a_r();
1538  Vector b_r = lattice.b_r();
1539  Vector c_r = lattice.c_r();
1540  float arx = a_r.x;
1541  float ary = a_r.y;
1542  float arz = a_r.z;
1543  float brx = b_r.x;
1544  float bry = b_r.y;
1545  float brz = b_r.z;
1546  float crx = c_r.x;
1547  float cry = c_r.y;
1548  float crz = c_r.z;
1549  m_step = step;
1550 
1551  //JM: actually necessary if you reserve a PME device!
1552  cudaCheck(cudaSetDevice(deviceID));
1553  const SimParameters& sim_params = *(Node::Object()->simParameters);
1554 
1555  // clear force array
1556  //fprintf(stderr, "Calling clear_device_array on d_force\n");
1557  clear_device_array<float3>(d_forces, num_used_grids * natoms, stream);
1558  // clear grid
1559  //fprintf(stderr, "Calling clear_device_array on d_grid\n");
1560  clear_device_array<float>(d_grids, num_used_grids * gridsize, stream);
1561  clear_device_array<float2>(d_trans, num_used_grids * transize, stream);
1562 
1563  // Clear energy and virial array if needed
1564  if (doEnergyVirial) {
1565  // clear_device_array<EnergyVirial>(d_energyVirial, 1, stream);
1566  clear_device_array<EnergyVirial>(d_energyVirials, num_used_grids * 1, stream);
1567  const bool updateSelfEnergy = (step == sim_params.firstTimestep) || (selfEnergy == 0);
1568  if (updateSelfEnergy && (sim_params.alchOn == false)) {
1569  clear_device_array<double>(d_selfEnergy, 1, stream);
1570  // calculate self energy term if not yet done
1572  kappa, stream);
1573  //fprintf(stderr, "selfEnergy = %12.8f\n", selfEnergy);
1574  }
1575  /* the self energy depends on the scaling factor, or lambda
1576  * the cases when self energy will be changed:
1577  * 1. If alchLambdaFreq > 0, we will have a linear scaling of lambda. Lambda is changed EVERY STEP!
1578  * 2. In most cases, users will not use alchLambdaFreq > 0, but simulations may enter another lambda-window by using TCL scripts.
1579  * in summary, the self energy will be not changed unless lambda is changed.
1580  * so calcSelfEnergyAlch() would compare lambda of current step with the one from last step.
1581  * only if lambda is changed, the calcSelfEnergyFEPKernel or calcSelfEnergyTIKernel will be executed again.
1582  */
1583  if (sim_params.alchOn) calcSelfEnergyAlch(m_step);
1584  }
1585 
1586 #if 0
1587 
1588  spread_charge(d_atoms, natoms, k1, k2, k3, k1, k2, k3,
1589  k1 /* xsize */, 0 /* jBlock */, 0 /* kBlock */,
1590  true /* pmeGrid.yBlocks == 1 */, true /* pmeGrid.zBlocks == 1 */,
1591  d_grid, order, stream);
1592 #else
1593  const int order3 = ((order*order*order-1)/WARPSIZE + 1)*WARPSIZE;
1594  for (size_t iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1596  d_atoms + iGrid * natoms, natoms, k1, k2, k3,
1597  float(k1), (float)k2, (float)k3, order3,
1598  k1, k2, k3,
1599  k1 /* xsize */, 0 /* jBlock */, 0 /* kBlock */,
1600  true /* pmeGrid.yBlocks == 1 */, true /* pmeGrid.zBlocks == 1 */,
1601  d_grids + iGrid * gridsize, order, stream);
1602  }
1603 
1604 #endif
1605  //cudaCheck(cudaStreamSynchronize(stream));
1606 
1607  // forward FFT
1608  //fprintf(stderr, "Calling cufftExecR2C\n");
1609  //cufftCheck(cufftExecR2C(forwardPlan, (cufftReal *)d_grid,
1610  // (cufftComplex *)d_tran));
1611 
1612  for (size_t iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1613  cufftCheck(cufftExecR2C(forwardPlans[iGrid],
1614  (cufftReal *)(d_grids + iGrid * gridsize),
1615  (cufftComplex *)(d_trans + iGrid * transize)));
1616  }
1617 
1618  //cudaCheck(cudaStreamSynchronize(stream));
1619 
1620  // reciprocal space calculation
1621  //fprintf(stderr, "Calling scalar_sum\n");
1622  for (size_t iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1623  scalar_sum(true /* Perm_cX_Y_Z */, k1, k2, k3, (k1/2 + 1), k2, k3,
1624  kappa, arx, ary, arz, brx, bry, brz, crx, cry, crz, volume,
1625  d_bm1, d_bm2, d_bm3, 0 /* jBlock */, 0 /* kBlock */,
1626  (bool) doEnergyVirial, &(d_energyVirials[iGrid].energy),
1627  d_energyVirials[iGrid].virial, d_trans + iGrid * transize, stream);
1628  }
1629  //scalar_sum(true /* Perm_cX_Y_Z */, k1, k2, k3, (k1/2 + 1), k2, k3,
1630  // kappa, arx, ary, arz, brx, bry, brz, crx, cry, crz, volume,
1631  // d_bm1, d_bm2, d_bm3, 0 /* jBlock */, 0 /* kBlock */,
1632  // (bool) doEnergyVirial, &(d_energyVirial->energy),
1633  // d_energyVirial->virial, d_tran, stream);
1634  //cudaCheck(cudaStreamSynchronize(stream));
1635 
1636  // backward FFT
1637  //fprintf(stderr, "Calling cufftExecC2R\n");
1638  for (size_t iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1639  cufftCheck(cufftExecC2R(backwardPlans[iGrid],
1640  (cufftComplex *)(d_trans + iGrid * transize),
1641  (cufftReal *)(d_grids + iGrid * gridsize)));
1642  }
1643 
1644  //cufftCheck(cufftExecC2R(backwardPlan, (cufftComplex *)d_tran,
1645  // (cufftReal *)d_grid));
1646  //cudaCheck(cudaStreamSynchronize(stream));
1647 
1648  // gather force from grid to atoms
1649  // missing cudaTextureObject_t below works for __CUDA_ARCH__ >= 350
1650  //fprintf(stderr, "Calling gather_force\n");
1651  for (unsigned int iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1653  &(d_atoms[iGrid * natoms]), natoms, k1, k2, k3, k1, k2, k3,
1654  k1 /* xsize */, 0 /* jBlock */, 0 /* kBlock */,
1655  true /* pmeGrid.yBlocks == 1 */, true /* pmeGrid.zBlocks == 1 */,
1656  d_grids + iGrid * gridsize, order, d_forces + iGrid * natoms,
1657 #ifdef NAMD_CUDA
1658  gridTexObjArrays[iGrid] /* cudaTextureObject_t */,
1659 #endif
1660  stream);
1661  }
1662 
1663  //gather_force(d_atoms, natoms, k1, k2, k3, k1, k2, k3,
1664  // k1 /* xsize */, 0 /* jBlock */, 0 /* kBlock */,
1665  // true /* pmeGrid.yBlocks == 1 */, true /* pmeGrid.zBlocks == 1 */,
1666  // d_grid, order, d_force, gridTexObj /* cudaTextureObject_t */,
1667  // stream);
1668  //cudaCheck(cudaStreamSynchronize(stream));
1669 
1670  // Copy energy and virial to host if needed
1671  if (doEnergyVirial) {
1672  //fprintf(stderr, "Calling copy_DtoH on d_energyVirial\n");
1673  copy_DtoH<EnergyVirial>(d_energyVirials, h_energyVirials,
1675  //cudaCheck(cudaEventRecord(copyEnergyVirialEvent, stream));
1676  //cudaCheck(cudaStreamSynchronize(stream));
1677  }
1678 
1679  // XXX debugging, quick test for borked forces
1680  //clear_device_array<float3>(d_force, natoms, stream);
1681  if (sim_params.alchOn) {
1682  scaleAndMergeForce(m_step);
1683  }
1684 }
static Node * Object()
Definition: Node.h:86
void spread_charge_v2(const PatchLevelPmeData patchLevelPmeData, const float4 *atoms, const int numAtoms, const int nfftx, const int nffty, const int nfftz, const float nfftx_f, const float nffty_f, const float nfftz_f, const int order3, const int xsize, const int ysize, const int zsize, const int xdim, const int y00, const int z00, const bool periodicY, const bool periodicZ, float *data, const int order, cudaStream_t stream)
cufftHandle * backwardPlans
Definition: Vector.h:72
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
EnergyVirial * d_energyVirials
BigReal z
Definition: Vector.h:74
#define cufftCheck(stmt)
void scalar_sum(const bool orderXYZ, const int nfft1, const int nfft2, const int nfft3, const int size1, const int size2, const int size3, const double kappa, const float recip1x, const float recip1y, const float recip1z, const float recip2x, const float recip2y, const float recip2z, const float recip3x, const float recip3y, const float recip3z, const double volume, const float *prefac1, const float *prefac2, const float *prefac3, const int k2_00, const int k3_00, const bool doEnergyVirial, double *energy, double *virial, float2 *data, cudaStream_t stream)
#define WARPSIZE
Definition: CudaUtils.h:17
PatchLevelPmeData patchLevelPmeData
#define order
Definition: PmeRealSpace.C:235
int order
Definition: PmeBase.h:23
BigReal x
Definition: Vector.h:74
void gather_force(const PatchLevelPmeData patchLevelPmeData, const float4 *atoms, const int numAtoms, const int nfftx, const int nffty, const int nfftz, const int xsize, const int ysize, const int zsize, const int xdim, const int y00, const int z00, const bool periodicY, const bool periodicZ, const float *data, const int order, float3 *force, const cudaTextureObject_t gridTexObj, cudaStream_t stream)
NAMD_HOST_DEVICE BigReal volume(void) const
Definition: Lattice.h:293
NAMD_HOST_DEVICE Vector a_r() const
Definition: Lattice.h:284
NAMD_HOST_DEVICE Vector b_r() const
Definition: Lattice.h:285
cudaTextureObject_t * gridTexObjArrays
NAMD_HOST_DEVICE Vector c_r() const
Definition: Lattice.h:286
double compute_selfEnergy(double *d_selfEnergy, const float4 *d_atoms, int natoms, double ewaldcof, cudaStream_t stream)
int K3
Definition: PmeBase.h:21
void spread_charge(const float4 *atoms, const int numAtoms, const int nfftx, const int nffty, const int nfftz, const int xsize, const int ysize, const int zsize, const int xdim, const int y00, const int z00, const bool periodicY, const bool periodicZ, float *data, const int order, cudaStream_t stream)
BigReal y
Definition: Vector.h:74
#define cudaCheck(stmt)
Definition: CudaUtils.h:233
cufftHandle * forwardPlans
EnergyVirial * h_energyVirials

◆ computeSharedMemoryPatchLevelGatherForce()

int CudaPmeOneDevice::computeSharedMemoryPatchLevelGatherForce ( const int  numThreads,
const int3  patchGridDim,
const int  order 
)

Definition at line 2021 of file CudaPmeSolverUtil.C.

References PatchLevelPmeData::kThetaPad, and order.

Referenced by checkPatchLevelDeviceCompatibility().

2022  {
2023 
2024  const int gridBytes = patchGridDim.x * patchGridDim.y * patchGridDim.z * sizeof(float);
2025  const int thetaBytes = (numThreads + PatchLevelPmeData::kThetaPad) * order *
2026  2 /* theta and dtheta */ * sizeof(float);
2027 
2028  return gridBytes + thetaBytes;
2029 }
#define order
Definition: PmeRealSpace.C:235
static constexpr int kThetaPad

◆ computeSharedMemoryPatchLevelSpreadCharge()

int CudaPmeOneDevice::computeSharedMemoryPatchLevelSpreadCharge ( const int  numThreads,
const int3  patchGridDim,
const int  order 
)

Definition at line 2010 of file CudaPmeSolverUtil.C.

References PatchLevelPmeData::kDim, PatchLevelPmeData::kThetaPad, and order.

Referenced by checkPatchLevelDeviceCompatibility().

2011  {
2012 
2013  const int gridBytes = patchGridDim.x * patchGridDim.y * patchGridDim.z * sizeof(float);
2014  const int thetaBytes = PatchLevelPmeData::kDim * (numThreads + PatchLevelPmeData::kThetaPad) *
2015  order * sizeof(float);
2016  const int indexBytes = numThreads * sizeof(char4);
2017 
2018  return gridBytes + thetaBytes + indexBytes;
2019 }
#define order
Definition: PmeRealSpace.C:235
static constexpr int kDim
static constexpr int kThetaPad

◆ finishReduction()

void CudaPmeOneDevice::finishReduction ( bool  doEnergyVirial)

Definition at line 1688 of file CudaPmeSolverUtil.C.

References SimParameters::alchFepOn, SimParameters::alchOn, SimParameters::alchThermIntOn, cudaCheck, deviceID, CudaPmeOneDevice::EnergyVirial::energy, h_energyVirials, SubmitReduction::item(), m_step, Node::Object(), REDUCTION_ELECT_ENERGY_SLOW, REDUCTION_ELECT_ENERGY_SLOW_F, REDUCTION_ELECT_ENERGY_SLOW_TI_1, REDUCTION_ELECT_ENERGY_SLOW_TI_2, selfEnergy, selfEnergy_FEP, selfEnergy_TI_1, selfEnergy_TI_2, Node::simParameters, stream, SubmitReduction::submit(), and CudaPmeOneDevice::EnergyVirial::virial.

1690  {
1691  cudaCheck(cudaStreamSynchronize(stream));
1692  SubmitReduction *reduction = getCurrentReduction();
1693  if(doEnergyVirial){
1694  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1695  PatchData* patchData = cpdata.ckLocalBranch();
1696  cudaCheck(cudaSetDevice(deviceID));
1697  double virial[9];
1698  double energy, energy_F, energy_TI_1, energy_TI_2;
1699  const SimParameters& sim_params = *(Node::Object()->simParameters);
1700  if (sim_params.alchOn) {
1701  if (sim_params.alchFepOn) {
1702  scaleAndComputeFEPEnergyVirials(h_energyVirials, m_step, energy, energy_F, virial);
1703  energy += selfEnergy;
1704  energy_F += selfEnergy_FEP;
1705  }
1706  if (sim_params.alchThermIntOn) {
1707  scaleAndComputeTIEnergyVirials(h_energyVirials, m_step, energy, energy_TI_1, energy_TI_2, virial);
1708  energy += selfEnergy;
1709  energy_TI_1 += selfEnergy_TI_1;
1710  energy_TI_2 += selfEnergy_TI_2;
1711  }
1712  } else {
1713  virial[0] = h_energyVirials[0].virial[0];
1714  virial[1] = h_energyVirials[0].virial[1];
1715  virial[2] = h_energyVirials[0].virial[2];
1716  virial[3] = h_energyVirials[0].virial[1];
1717  virial[4] = h_energyVirials[0].virial[3];
1718  virial[5] = h_energyVirials[0].virial[4];
1719  virial[6] = h_energyVirials[0].virial[2];
1720  virial[7] = h_energyVirials[0].virial[4];
1721  virial[8] = h_energyVirials[0].virial[5];
1722  energy = h_energyVirials[0].energy + selfEnergy;
1723  }
1724  #if 0
1725  fprintf(stderr, "PME ENERGY = %g %g\n", h_energyVirials[0].energy, selfEnergy );
1726  fprintf(stderr, "PME VIRIAL =\n"
1727  " %g %g %g\n %g %g %g\n %g %g %g\n",
1728  virial[0], virial[1], virial[2], virial[3], virial[4],
1729  virial[5], virial[6], virial[7], virial[8]);
1730  #endif
1731  reduction->item(REDUCTION_VIRIAL_SLOW_XX) += virial[0];
1732  reduction->item(REDUCTION_VIRIAL_SLOW_XY) += virial[1];
1733  reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += virial[2];
1734  reduction->item(REDUCTION_VIRIAL_SLOW_YX) += virial[3];
1735  reduction->item(REDUCTION_VIRIAL_SLOW_YY) += virial[4];
1736  reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += virial[5];
1737  reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += virial[6];
1738  reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += virial[7];
1739  reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += virial[8];
1740  reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += energy;
1741  if (sim_params.alchFepOn) {
1742  reduction->item(REDUCTION_ELECT_ENERGY_SLOW_F) += energy_F;
1743  }
1744  if (sim_params.alchThermIntOn) {
1745  reduction->item(REDUCTION_ELECT_ENERGY_SLOW_TI_1) += energy_TI_1;
1746  reduction->item(REDUCTION_ELECT_ENERGY_SLOW_TI_2) += energy_TI_2;
1747  }
1748  }
1749  reduction->submit();
1750 }
static Node * Object()
Definition: Node.h:86
virtual void submit(void)=0
SimParameters * simParameters
Definition: Node.h:181
BigReal & item(int i)
Definition: ReductionMgr.h:336
#define cudaCheck(stmt)
Definition: CudaUtils.h:233
EnergyVirial * h_energyVirials

◆ getShiftedGrid()

int CudaPmeOneDevice::getShiftedGrid ( const double  x,
const int  grid 
)

Definition at line 2004 of file CudaPmeSolverUtil.C.

Referenced by checkPatchLevelLatticeCompatibilityAndComputeOffsets().

2004  {
2005  double w = x + 0.5;
2006  double gw = w * grid;
2007  return floor(gw);
2008 }

Member Data Documentation

◆ backwardPlans

cufftHandle* CudaPmeOneDevice::backwardPlans

Definition at line 238 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ currentLattice

Lattice CudaPmeOneDevice::currentLattice

◆ d_atoms

float4* CudaPmeOneDevice::d_atoms

Definition at line 219 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_bm1

float* CudaPmeOneDevice::d_bm1

Definition at line 241 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_bm2

float* CudaPmeOneDevice::d_bm2

Definition at line 242 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_bm3

float* CudaPmeOneDevice::d_bm3

Definition at line 243 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_energyVirials

EnergyVirial* CudaPmeOneDevice::d_energyVirials

Definition at line 251 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_forces

float3* CudaPmeOneDevice::d_forces

Definition at line 221 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_grids

float* CudaPmeOneDevice::d_grids

on device grid of charge before forward FFT R->C, then grid of potential after backward FFT C->R

Definition at line 227 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_partition

int* CudaPmeOneDevice::d_partition

Definition at line 220 of file CudaPmeSolverUtil.h.

Referenced by CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_scaling_factors

float* CudaPmeOneDevice::d_scaling_factors

Definition at line 222 of file CudaPmeSolverUtil.h.

Referenced by CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_selfEnergy

double* CudaPmeOneDevice::d_selfEnergy

Definition at line 256 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_selfEnergy_FEP

double* CudaPmeOneDevice::d_selfEnergy_FEP

Definition at line 257 of file CudaPmeSolverUtil.h.

Referenced by CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_selfEnergy_TI_1

double* CudaPmeOneDevice::d_selfEnergy_TI_1

Definition at line 258 of file CudaPmeSolverUtil.h.

Referenced by CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_selfEnergy_TI_2

double* CudaPmeOneDevice::d_selfEnergy_TI_2

Definition at line 259 of file CudaPmeSolverUtil.h.

Referenced by CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_trans

float2* CudaPmeOneDevice::d_trans

on device FFT transformation to complex

Definition at line 231 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ deviceID

int CudaPmeOneDevice::deviceID

◆ deviceIndex

int CudaPmeOneDevice::deviceIndex

Definition at line 213 of file CudaPmeSolverUtil.h.

Referenced by CudaPmeOneDevice().

◆ force_scaling_alch_first_time

bool CudaPmeOneDevice::force_scaling_alch_first_time

Definition at line 255 of file CudaPmeSolverUtil.h.

◆ forwardPlans

cufftHandle* CudaPmeOneDevice::forwardPlans

Definition at line 237 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ gridsize

size_t CudaPmeOneDevice::gridsize

Definition at line 233 of file CudaPmeSolverUtil.h.

Referenced by compute(), and CudaPmeOneDevice().

◆ gridTexObjArrays

cudaTextureObject_t* CudaPmeOneDevice::gridTexObjArrays

Definition at line 224 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ h_energyVirials

EnergyVirial* CudaPmeOneDevice::h_energyVirials

Definition at line 252 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), finishReduction(), and ~CudaPmeOneDevice().

◆ kappa

double CudaPmeOneDevice::kappa

Definition at line 245 of file CudaPmeSolverUtil.h.

Referenced by compute().

◆ m_step

int CudaPmeOneDevice::m_step

Definition at line 264 of file CudaPmeSolverUtil.h.

Referenced by compute(), and finishReduction().

◆ natoms

int CudaPmeOneDevice::natoms

Definition at line 216 of file CudaPmeSolverUtil.h.

Referenced by compute(), and CudaPmeOneDevice().

◆ num_used_grids

size_t CudaPmeOneDevice::num_used_grids

Definition at line 217 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ patchLevelPmeData

PatchLevelPmeData CudaPmeOneDevice::patchLevelPmeData

◆ pmeGrid

PmeGrid CudaPmeOneDevice::pmeGrid

◆ self_energy_alch_first_time

bool CudaPmeOneDevice::self_energy_alch_first_time

Definition at line 254 of file CudaPmeSolverUtil.h.

◆ selfEnergy

double CudaPmeOneDevice::selfEnergy

Definition at line 260 of file CudaPmeSolverUtil.h.

Referenced by compute(), and finishReduction().

◆ selfEnergy_FEP

double CudaPmeOneDevice::selfEnergy_FEP

Definition at line 261 of file CudaPmeSolverUtil.h.

Referenced by finishReduction().

◆ selfEnergy_TI_1

double CudaPmeOneDevice::selfEnergy_TI_1

Definition at line 262 of file CudaPmeSolverUtil.h.

Referenced by finishReduction().

◆ selfEnergy_TI_2

double CudaPmeOneDevice::selfEnergy_TI_2

Definition at line 263 of file CudaPmeSolverUtil.h.

Referenced by finishReduction().

◆ stream

cudaStream_t CudaPmeOneDevice::stream

Definition at line 214 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), finishReduction(), and ~CudaPmeOneDevice().

◆ transize

size_t CudaPmeOneDevice::transize

Definition at line 234 of file CudaPmeSolverUtil.h.

Referenced by compute(), and CudaPmeOneDevice().


The documentation for this class was generated from the following files: