NAMD
CudaGlobalMasterClientKernel.cu
Go to the documentation of this file.
1 #include "CudaGlobalMasterClientKernel.h"
2 #ifdef NAMD_HIP
3 #include <hipcub/hipcub.hpp>
4 #define cub hipcub
5 #endif // NAMD_HIP
6 
7 #ifdef NAMD_CUDA
8 #include <cuda.h>
9 #if __CUDACC_VER_MAJOR__ >= 11
10 #include <cub/cub.cuh>
11 #else
12 #include <namd_cub/cub.cuh>
13 #endif
14 #endif // NAMD_CUDA
15 
16 #if defined(NAMD_CUDA) && defined(NODEGROUP_FORCE_REGISTER)
17 
18 #define ATOM_BLOCKS 128
19 
20 __global__ void clientVirialAndExtForceKernel(double *d_pos,
21  double *d_applied_force,
22  size_t numAtoms, size_t stride,
23  cudaTensor *d_virial,
24  Vector *d_extForce) {
25  const int i = threadIdx.x + blockIdx.x * blockDim.x;
26  cudaTensor virial = {0};
27  double3 f = {0, 0, 0};
28  double3 pos = {0, 0, 0};
29  if (i < numAtoms) {
30  f.x = d_applied_force[i];
31  f.y = d_applied_force[i + stride];
32  f.z = d_applied_force[i + 2 * stride];
33  pos.x = d_pos[i];
34  pos.y = d_pos[i + stride];
35  pos.z = d_pos[i + 2 * stride];
36  virial.xx = f.x * pos.x;
37  virial.xy = f.x * pos.y;
38  virial.xz = f.x * pos.z;
39  virial.yx = f.y * pos.x;
40  virial.yy = f.y * pos.y;
41  virial.yz = f.y * pos.z;
42  virial.zx = f.z * pos.x;
43  virial.zy = f.z * pos.y;
44  virial.zz = f.z * pos.z;
45  }
46  __syncthreads();
47  typedef cub::BlockReduce<BigReal, ATOM_BLOCKS> BlockReduce;
48  __shared__ typename BlockReduce::TempStorage temp_storage;
49  virial.xx = BlockReduce(temp_storage).Sum(virial.xx);
50  __syncthreads();
51  virial.xy = BlockReduce(temp_storage).Sum(virial.xy);
52  __syncthreads();
53  virial.xz = BlockReduce(temp_storage).Sum(virial.xz);
54  __syncthreads();
55  virial.yx = BlockReduce(temp_storage).Sum(virial.yx);
56  __syncthreads();
57  virial.yy = BlockReduce(temp_storage).Sum(virial.yy);
58  __syncthreads();
59  virial.yz = BlockReduce(temp_storage).Sum(virial.yz);
60  __syncthreads();
61  virial.zx = BlockReduce(temp_storage).Sum(virial.zx);
62  __syncthreads();
63  virial.zy = BlockReduce(temp_storage).Sum(virial.zy);
64  __syncthreads();
65  virial.zz = BlockReduce(temp_storage).Sum(virial.zz);
66  __syncthreads();
67  f.x = BlockReduce(temp_storage).Sum(f.x);
68  __syncthreads();
69  f.y = BlockReduce(temp_storage).Sum(f.y);
70  __syncthreads();
71  f.z = BlockReduce(temp_storage).Sum(f.z);
72  __syncthreads();
73  if (threadIdx.x == 0) {
74  atomicAdd(&(d_virial->xx), virial.xx);
75  atomicAdd(&(d_virial->xy), virial.xy);
76  atomicAdd(&(d_virial->xz), virial.xz);
77  atomicAdd(&(d_virial->yx), virial.yx);
78  atomicAdd(&(d_virial->yy), virial.yy);
79  atomicAdd(&(d_virial->yz), virial.yz);
80  atomicAdd(&(d_virial->zx), virial.zx);
81  atomicAdd(&(d_virial->zy), virial.zy);
82  atomicAdd(&(d_virial->zz), virial.zz);
83  atomicAdd(&(d_extForce->x), f.x);
84  atomicAdd(&(d_extForce->y), f.y);
85  atomicAdd(&(d_extForce->z), f.z);
86  }
87 }
88 
89 void clientVirialAndExtForce(double *d_pos, double *d_applied_force,
90  size_t numAtoms, cudaTensor *d_virial,
91  Vector *d_extForce, cudaStream_t stream) {
92  const int grid = (numAtoms + ATOM_BLOCKS - 1) / ATOM_BLOCKS;
93  clientVirialAndExtForceKernel<<<grid, ATOM_BLOCKS, 0, stream>>>(
94  d_pos, d_applied_force, numAtoms, numAtoms, d_virial, d_extForce);
95 }
96 
97 #endif // defined(NAMD_CUDA) && defined(NODEGROUP_FORCE_REGISTER)