1 #include "CudaGlobalMasterClientKernel.h"
3 #include <hipcub/hipcub.hpp>
9 #if __CUDACC_VER_MAJOR__ >= 11
10 #include <cub/cub.cuh>
12 #include <namd_cub/cub.cuh>
16 #if defined(NAMD_CUDA) && defined(NODEGROUP_FORCE_REGISTER)
18 #define ATOM_BLOCKS 128
20 __global__ void clientVirialAndExtForceKernel(double *d_pos,
21 double *d_applied_force,
22 size_t numAtoms, size_t stride,
25 const int i = threadIdx.x + blockIdx.x * blockDim.x;
26 cudaTensor virial = {0};
27 double3 f = {0, 0, 0};
28 double3 pos = {0, 0, 0};
30 f.x = d_applied_force[i];
31 f.y = d_applied_force[i + stride];
32 f.z = d_applied_force[i + 2 * stride];
34 pos.y = d_pos[i + stride];
35 pos.z = d_pos[i + 2 * stride];
36 virial.xx = f.x * pos.x;
37 virial.xy = f.x * pos.y;
38 virial.xz = f.x * pos.z;
39 virial.yx = f.y * pos.x;
40 virial.yy = f.y * pos.y;
41 virial.yz = f.y * pos.z;
42 virial.zx = f.z * pos.x;
43 virial.zy = f.z * pos.y;
44 virial.zz = f.z * pos.z;
47 typedef cub::BlockReduce<BigReal, ATOM_BLOCKS> BlockReduce;
48 __shared__ typename BlockReduce::TempStorage temp_storage;
49 virial.xx = BlockReduce(temp_storage).Sum(virial.xx);
51 virial.xy = BlockReduce(temp_storage).Sum(virial.xy);
53 virial.xz = BlockReduce(temp_storage).Sum(virial.xz);
55 virial.yx = BlockReduce(temp_storage).Sum(virial.yx);
57 virial.yy = BlockReduce(temp_storage).Sum(virial.yy);
59 virial.yz = BlockReduce(temp_storage).Sum(virial.yz);
61 virial.zx = BlockReduce(temp_storage).Sum(virial.zx);
63 virial.zy = BlockReduce(temp_storage).Sum(virial.zy);
65 virial.zz = BlockReduce(temp_storage).Sum(virial.zz);
67 f.x = BlockReduce(temp_storage).Sum(f.x);
69 f.y = BlockReduce(temp_storage).Sum(f.y);
71 f.z = BlockReduce(temp_storage).Sum(f.z);
73 if (threadIdx.x == 0) {
74 atomicAdd(&(d_virial->xx), virial.xx);
75 atomicAdd(&(d_virial->xy), virial.xy);
76 atomicAdd(&(d_virial->xz), virial.xz);
77 atomicAdd(&(d_virial->yx), virial.yx);
78 atomicAdd(&(d_virial->yy), virial.yy);
79 atomicAdd(&(d_virial->yz), virial.yz);
80 atomicAdd(&(d_virial->zx), virial.zx);
81 atomicAdd(&(d_virial->zy), virial.zy);
82 atomicAdd(&(d_virial->zz), virial.zz);
83 atomicAdd(&(d_extForce->x), f.x);
84 atomicAdd(&(d_extForce->y), f.y);
85 atomicAdd(&(d_extForce->z), f.z);
89 void clientVirialAndExtForce(double *d_pos, double *d_applied_force,
90 size_t numAtoms, cudaTensor *d_virial,
91 Vector *d_extForce, cudaStream_t stream) {
92 const int grid = (numAtoms + ATOM_BLOCKS - 1) / ATOM_BLOCKS;
93 clientVirialAndExtForceKernel<<<grid, ATOM_BLOCKS, 0, stream>>>(
94 d_pos, d_applied_force, numAtoms, numAtoms, d_virial, d_extForce);
97 #endif // defined(NAMD_CUDA) && defined(NODEGROUP_FORCE_REGISTER)