NAMD
|
Go to the source code of this file.
Macros | |
#define | warps_per_block 8 |
#define | atoms_per_warp 4 |
#define | atoms_per_block (atoms_per_warp * warps_per_block) |
#define | BSPLINE_DEFS |
#define | WARP (threadIdx.x>>5) |
#define | THREAD (threadIdx.x&31) |
#define | FX bspline_factors[threadIdx.x>>5][0] |
#define | FY bspline_factors[threadIdx.x>>5][1] |
#define | FZ bspline_factors[threadIdx.x>>5][2] |
#define | DFX bspline_dfactors[threadIdx.x>>5][0] |
#define | DFY bspline_dfactors[threadIdx.x>>5][1] |
#define | DFZ bspline_dfactors[threadIdx.x>>5][2] |
#define | FX2 bspline_2factors[threadIdx.x>>5][0] |
#define | FY2 bspline_2factors[threadIdx.x>>5][1] |
#define | FZ2 bspline_2factors[threadIdx.x>>5][2] |
#define | ATOM atoms[threadIdx.x>>5].a2d[i_atom] |
#define | AX atoms[threadIdx.x>>5].a2d[i_atom][0] |
#define | AY atoms[threadIdx.x>>5].a2d[i_atom][1] |
#define | AZ atoms[threadIdx.x>>5].a2d[i_atom][2] |
#define | AQ atoms[threadIdx.x>>5].a2d[i_atom][3] |
#define | AI atoms[threadIdx.x>>5].a2d[i_atom][4] |
#define | AJ atoms[threadIdx.x>>5].a2d[i_atom][5] |
#define | AK atoms[threadIdx.x>>5].a2d[i_atom][6] |
#define | CALL(ORDER) |
#define | CALL(ORDER) |
#define | cudaFuncSetSharedMemConfig(X, Y) dummy() |
#define | CALL(ORDER) |
Functions | |
void | NAMD_die (const char *) |
void | cuda_init_bspline_coeffs (float **c, float **dc, int order) |
template<int order> | |
__global__ void | cuda_pme_charges_batched_dev (const float *__restrict__ coeffs, float *const *__restrict__ q_arr, int *__restrict__ f_arr, int *__restrict__ fz_arr, float **__restrict__ a_data_ptr, int *n_atoms_ptr, int *K1_ptr, int *K2_ptr, int *K3_ptr) |
template<int order> | |
__global__ void | cuda_pme_charges_dev (const float *__restrict__ coeffs, float *const *__restrict__ q_arr, int *__restrict__ f_arr, int *__restrict__ fz_arr, float *__restrict__ a_data, int n_atoms, int K1, int K2, int K3) |
template<int order> | |
__global__ void | cuda_pme_forces_dev (const float *__restrict__ coeffs, float *const *__restrict__ q_arr, float *const *__restrict__ afn_g, int K1, int K2, int K3) |
if (!nblocks) return | |
CALL (4) | |
else | CALL (6) |
else | CALL (8) |
else | NAMD_die ("unsupported PMEInterpOrder") |
if ((!nblocksX)||(!numPatches)) return | |
void | dummy () |
if ((!nblocks)||(!dimy)) return | |
Variables | |
static const float | order_4_coeffs [4][4] |
static const float | order_6_coeffs [6][6] |
static const float | order_8_coeffs [8][8] |
CUDA_PME_CHARGES_PROTOTYPE int | nblocks = (n_atoms + atoms_per_block - 1) / atoms_per_block |
CUDA_PME_CHARGES_BATCHED_PROTOTYPE | |
dim3 | gridSize |
gridSize | x = nblocksX |
gridSize | y = numPatches |
gridSize | z = 1 |
CUDA_PME_FORCES_PROTOTYPE | |
#define AI atoms[threadIdx.x>>5].a2d[i_atom][4] |
Definition at line 123 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_pme_charges_batched_dev(), cuda_pme_charges_dev(), and cuda_pme_forces_dev().
#define AJ atoms[threadIdx.x>>5].a2d[i_atom][5] |
Definition at line 124 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_pme_charges_batched_dev(), cuda_pme_charges_dev(), and cuda_pme_forces_dev().
#define AK atoms[threadIdx.x>>5].a2d[i_atom][6] |
Definition at line 125 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_pme_charges_batched_dev(), cuda_pme_charges_dev(), and cuda_pme_forces_dev().
#define AQ atoms[threadIdx.x>>5].a2d[i_atom][3] |
Definition at line 122 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_pme_charges_batched_dev(), cuda_pme_charges_dev(), and cuda_pme_forces_dev().
#define ATOM atoms[threadIdx.x>>5].a2d[i_atom] |
Definition at line 118 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_pme_charges_batched_dev(), cuda_pme_charges_dev(), and cuda_pme_forces_dev().
#define atoms_per_block (atoms_per_warp * warps_per_block) |
Definition at line 26 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_pme_charges_batched_dev(), cuda_pme_charges_dev(), and cuda_pme_forces_dev().
#define atoms_per_warp 4 |
Definition at line 25 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_pme_charges_batched_dev(), cuda_pme_charges_dev(), and cuda_pme_forces_dev().
#define AX atoms[threadIdx.x>>5].a2d[i_atom][0] |
Definition at line 119 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_pme_charges_dev().
#define AY atoms[threadIdx.x>>5].a2d[i_atom][1] |
Definition at line 120 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_pme_charges_dev().
#define AZ atoms[threadIdx.x>>5].a2d[i_atom][2] |
Definition at line 121 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_pme_charges_dev().
#define BSPLINE_DEFS |
Definition at line 90 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_pme_charges_batched_dev(), cuda_pme_charges_dev(), and cuda_pme_forces_dev().
#define CALL | ( | ORDER | ) |
Definition at line 453 of file ComputePmeCUDAKernel.cu.
#define CALL | ( | ORDER | ) |
Definition at line 453 of file ComputePmeCUDAKernel.cu.
#define CALL | ( | ORDER | ) |
Definition at line 453 of file ComputePmeCUDAKernel.cu.
Definition at line 445 of file ComputePmeCUDAKernel.cu.
#define DFX bspline_dfactors[threadIdx.x>>5][0] |
Definition at line 110 of file ComputePmeCUDAKernel.cu.
#define DFY bspline_dfactors[threadIdx.x>>5][1] |
Definition at line 111 of file ComputePmeCUDAKernel.cu.
#define DFZ bspline_dfactors[threadIdx.x>>5][2] |
Definition at line 112 of file ComputePmeCUDAKernel.cu.
#define FX bspline_factors[threadIdx.x>>5][0] |
Definition at line 107 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_pme_charges_batched_dev(), and cuda_pme_charges_dev().
#define FX2 bspline_2factors[threadIdx.x>>5][0] |
Definition at line 114 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_pme_forces_dev().
#define FY bspline_factors[threadIdx.x>>5][1] |
Definition at line 108 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_pme_charges_batched_dev(), and cuda_pme_charges_dev().
#define FY2 bspline_2factors[threadIdx.x>>5][1] |
Definition at line 115 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_pme_forces_dev().
#define FZ bspline_factors[threadIdx.x>>5][2] |
Definition at line 109 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_pme_charges_batched_dev(), and cuda_pme_charges_dev().
#define FZ2 bspline_2factors[threadIdx.x>>5][2] |
Definition at line 116 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_pme_forces_dev().
#define THREAD (threadIdx.x&31) |
Definition at line 106 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_pme_charges_batched_dev(), cuda_pme_charges_dev(), and cuda_pme_forces_dev().
#define WARP (threadIdx.x>>5) |
Definition at line 105 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_pme_charges_batched_dev(), cuda_pme_charges_dev(), and cuda_pme_forces_dev().
#define warps_per_block 8 |
Definition at line 24 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_pme_charges_batched_dev(), cuda_pme_charges_dev(), and cuda_pme_forces_dev().
CALL | ( | 4 | ) |
else CALL | ( | 6 | ) |
else CALL | ( | 8 | ) |
void cuda_init_bspline_coeffs | ( | float ** | c, |
float ** | dc, | ||
int | order | ||
) |
Definition at line 47 of file ComputePmeCUDAKernel.cu.
References NAMD_die(), order, order_4_coeffs, order_6_coeffs, and order_8_coeffs.
Referenced by ComputePmeMgr::initialize_computes().
__global__ void cuda_pme_charges_batched_dev | ( | const float *__restrict__ | coeffs, |
float *const *__restrict__ | q_arr, | ||
int *__restrict__ | f_arr, | ||
int *__restrict__ | fz_arr, | ||
float **__restrict__ | a_data_ptr, | ||
int * | n_atoms_ptr, | ||
int * | K1_ptr, | ||
int * | K2_ptr, | ||
int * | K3_ptr | ||
) |
Definition at line 130 of file ComputePmeCUDAKernel.cu.
References AI, AJ, AK, AQ, ATOM, atoms, atoms_per_block, atoms_per_warp, BSPLINE_DEFS, FX, FY, FZ, order, THREAD, WARP, WARP_FULL_MASK, WARP_SYNC, warps_per_block, and WARPSIZE.
__global__ void cuda_pme_charges_dev | ( | const float *__restrict__ | coeffs, |
float *const *__restrict__ | q_arr, | ||
int *__restrict__ | f_arr, | ||
int *__restrict__ | fz_arr, | ||
float *__restrict__ | a_data, | ||
int | n_atoms, | ||
int | K1, | ||
int | K2, | ||
int | K3 | ||
) |
Definition at line 205 of file ComputePmeCUDAKernel.cu.
References AI, AJ, AK, AQ, ATOM, atoms, atoms_per_block, atoms_per_warp, AX, AY, AZ, BSPLINE_DEFS, FX, FY, FZ, order, THREAD, WARP, WARP_FULL_MASK, WARP_SYNC, warps_per_block, and WARPSIZE.
__global__ void cuda_pme_forces_dev | ( | const float *__restrict__ | coeffs, |
float *const *__restrict__ | q_arr, | ||
float *const *__restrict__ | afn_g, | ||
int | K1, | ||
int | K2, | ||
int | K3 | ||
) |
Definition at line 296 of file ComputePmeCUDAKernel.cu.
References AI, AJ, AK, AQ, ATOM, atoms, atoms_per_block, atoms_per_warp, BSPLINE_DEFS, FX2, FY2, FZ2, order, THREAD, WARP, WARP_FULL_MASK, WARP_SYNC, warps_per_block, WARPSIZE, float2::x, and float2::y.
void dummy | ( | ) |
Definition at line 444 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_affinity_initialize(), CollectionMaster::enqueueForces(), CollectionMaster::enqueueVelocities(), and DeviceCUDA::initialize().
if | ( | ! | nblocks | ) |
if | ( | (!nblocksX)||(!numPatches) | ) |
if | ( | (!nblocks)||(!dimy) | ) |
else NAMD_die | ( | "unsupported PMEInterpOrder" | ) |
CUDA_PME_CHARGES_BATCHED_PROTOTYPE |
Definition at line 425 of file ComputePmeCUDAKernel.cu.
CUDA_PME_FORCES_PROTOTYPE |
Definition at line 449 of file ComputePmeCUDAKernel.cu.
dim3 gridSize |
Definition at line 428 of file ComputePmeCUDAKernel.cu.
CUDA_PME_CHARGES_PROTOTYPE int nblocks = (n_atoms + atoms_per_block - 1) / atoms_per_block |
Definition at line 412 of file ComputePmeCUDAKernel.cu.
|
static |
Definition at line 29 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_init_bspline_coeffs().
|
static |
Definition at line 33 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_init_bspline_coeffs().
|
static |
Definition at line 39 of file ComputePmeCUDAKernel.cu.
Referenced by cuda_init_bspline_coeffs().
gridSize x = nblocksX |
Definition at line 429 of file ComputePmeCUDAKernel.cu.
Referenced by AVector::AVector(), buildSortKeys(), calc_theta_dtheta(), PmeRealSpaceCompute::calcGridCoord(), ComputeQMMgr::calcMOPAC(), ComputeQMMgr::calcORCA(), ComputeQMMgr::calcUSR(), eABF1D::calpmf(), GridforceFullSubGrid::compute_b(), compute_b_spline(), GridforceFullBaseGrid::compute_VdV(), BondElem::computeForce(), GromacsPairElem::computeForce(), ComputeSphericalBC::doForce(), ComputeCylindricalBC::doForce(), HomePatch::doGroupSizeCheck(), ComputeFmmSerial::doWork(), ComputeMsmSerial::doWork(), ComputeFullDirect::doWork(), ComputeExt::doWork(), ComputeMsm::doWork(), ComputeGBISser::doWork(), ComputeGlobal::doWork(), ComputeEwald::doWork(), ComputeQM::doWork(), dumpbench(), PmeAtomFiler::fileAtoms(), gather_force(), GBIS_P2_Kernel(), GBIS_P3_Kernel(), PmePencilXY::initBlockSizes(), PmePencilX::initBlockSizes(), ComputePmeMgr::initialize(), CudaPmePencilXY::initializeDevice(), CudaPmePencilX::initializeDevice(), minHeap::insert(), maxHeap::insert(), MatrixFitRMS(), Controller::minimize(), Node::Node(), obj_transabout(), obj_transvec(), obj_transvecinv(), pe_sortop_coord_x::operator()(), ComputeQM::processFullQM(), ComputeQMMgr::procQMRes(), ComputePmeMgr::procUntrans(), HashPool< T >::push_back(), Parameters::read_energy_type_cubspline(), eABF1D::readfile(), recursive_bisect_coord(), ComputePmeMgr::recvArrays(), ComputeExtMgr::recvCoord(), ComputePmeCUDAMgr::recvPencils(), ComputeGlobal::recvResults(), sampleTableTex(), Matrix4::scale(), scale_coordinates(), ComputeNonbondedUtil::select(), ComputePmeMgr::sendTransSubset(), ComputePmeMgr::sendUntransSubset(), AVector::Set(), PDB::set_all_positions(), settle1(), spread_charge_kernel(), Tcl_loadCoords(), ProxyCombinedResultMsg::toRaw(), Matrix4Symmetry::translate(), Matrix4TMD::translate(), Matrix4::translate(), ParallelIOMgr::updateMolInfo(), void(), writeComplexToDisk(), writeHostComplexToDisk(), and writeResult().
gridSize y = numPatches |
Definition at line 430 of file ComputePmeCUDAKernel.cu.
Referenced by AVector::AVector(), buildSortKeys(), calc_theta_dtheta(), PmeRealSpaceCompute::calcGridCoord(), ComputeQMMgr::calcMOPAC(), ComputeQMMgr::calcORCA(), ComputeQMMgr::calcUSR(), eABF1D::calpmf(), GridforceFullSubGrid::compute_b(), compute_b_spline(), GridforceFullBaseGrid::compute_VdV(), HomePatch::doGroupSizeCheck(), dumpbench(), PmeAtomFiler::fileAtoms(), gather_force(), GBIS_P2_Kernel(), GBIS_P3_Kernel(), PmePencilY::initBlockSizes(), ComputePmeCUDADevice::initialize(), ComputePmeMgr::initialize(), CudaPmePencilY::initializeDevice(), ComputePmeCUDADevice::initializePatches(), mollify(), Node::Node(), obj_transabout(), obj_transvec(), obj_transvecinv(), pe_sortop_coord_y::operator()(), ComputeQMMgr::procQMRes(), eABF1D::readfile(), recursive_bisect_coord(), ComputePmeMgr::recvArrays(), ComputePmeCUDADevice::recvAtoms(), ComputePmeCUDADevice::recvAtomsFromNeighbor(), ComputeExtMgr::recvCoord(), ComputePmeCUDADevice::recvForcesFromNeighbor(), ComputePmeCUDAMgr::recvPencils(), Matrix4::scale(), scale_coordinates(), ComputePmeCUDADevice::sendAtomsToNeighbors(), ComputePmeCUDADevice::sendForcesToNeighbors(), AVector::Set(), PDB::set_all_positions(), spread_charge_kernel(), Tcl_loadCoords(), ProxyCombinedResultMsg::toRaw(), Matrix4Symmetry::translate(), Matrix4TMD::translate(), Matrix4::translate(), ParallelIOMgr::updateMolInfo(), void(), writeComplexToDisk(), writeHostComplexToDisk(), and writeResult().
gridSize z = 1 |
Definition at line 431 of file ComputePmeCUDAKernel.cu.
Referenced by AVector::AVector(), calc_theta_dtheta(), PmeRealSpaceCompute::calcGridCoord(), ComputeQMMgr::calcMOPAC(), ComputeQMMgr::calcORCA(), ComputeQMMgr::calcUSR(), GridforceFullSubGrid::compute_b(), compute_b_spline(), GridforceFullBaseGrid::compute_VdV(), HomePatch::doGroupSizeCheck(), dumpbench(), PmeAtomFiler::fileAtoms(), gather_force(), GBIS_P2_Kernel(), GBIS_P3_Kernel(), Molecule::get_go_force2(), Molecule::get_gro_force2(), HomePatch::hardWallDrude(), PmePencilZ::initBlockSizes(), ComputePmeCUDADevice::initialize(), ComputePmeMgr::initialize(), CudaPmePencilZ::initializeDevice(), ComputePmeCUDADevice::initializePatches(), Node::Node(), obj_transabout(), obj_transvec(), obj_transvecinv(), ComputeQMMgr::procQMRes(), HomePatch::rattle1old(), ComputePmeMgr::recvArrays(), ComputePmeCUDADevice::recvAtoms(), ComputePmeCUDADevice::recvAtomsFromNeighbor(), ComputeExtMgr::recvCoord(), ComputePmeCUDADevice::recvForcesFromNeighbor(), ComputePmeCUDAMgr::recvPencils(), Matrix4::scale(), scale_coordinates(), ComputePmeCUDADevice::sendAtomsToNeighbors(), ComputePmeCUDADevice::sendForcesToNeighbors(), AVector::Set(), PDB::set_all_positions(), spread_charge_kernel(), Sequencer::submitHalfstep(), Sequencer::submitReductions(), Random::sum_of_squared_gaussians(), Tcl_loadCoords(), ProxyCombinedResultMsg::toRaw(), Matrix4Symmetry::translate(), Matrix4TMD::translate(), Matrix4::translate(), ParallelIOMgr::updateMolInfo(), void(), and writeResult().