1 #ifndef COM_CUDA_KERNEL_H 2 #define COM_CUDA_KERNEL_H 5 #if __CUDACC_VER_MAJOR__ >= 11 8 #include <namd_cub/cub.cuh> 15 #include <hip/hip_runtime.h> 16 #include <hipcub/hipcub.hpp> 25 #ifdef NODEGROUP_FORCE_REGISTER 28 template <
int T_BLOCK_SIZE>
29 __global__
static void computeCOMKernel(
31 const double inv_group_mass,
33 const float * __restrict mass,
34 const double* __restrict pos_x,
35 const double* __restrict pos_y,
36 const double* __restrict pos_z,
37 const char3* __restrict transform,
38 const int* __restrict atomsSOAIndex,
39 double3* __restrict d_curCM,
40 double3* __restrict curCM,
41 unsigned int* __restrict tbcatomic){
42 int tid = threadIdx.x + blockIdx.x * blockDim.x;
43 int totaltb = gridDim.x;
44 bool isLastBlockDone =
false;
45 double3 cm = {0, 0, 0};
48 int SOAindex = atomsSOAIndex[tid];
50 double m = mass[SOAindex];
52 pos.x = pos_x[SOAindex];
53 pos.y = pos_y[SOAindex];
54 pos.z = pos_z[SOAindex];
57 char3 t = transform[SOAindex];
67 typedef cub::BlockReduce<double, T_BLOCK_SIZE> BlockReduce;
68 __shared__
typename BlockReduce::TempStorage temp_storage;
70 cm.x = BlockReduce(temp_storage).Sum(cm.x);
72 cm.y = BlockReduce(temp_storage).Sum(cm.y);
74 cm.z = BlockReduce(temp_storage).Sum(cm.z);
80 atomicAdd(&(d_curCM->x), cm.x);
81 atomicAdd(&(d_curCM->y), cm.y);
82 atomicAdd(&(d_curCM->z), cm.z);
85 unsigned int value = atomicInc(&tbcatomic[0], totaltb);
86 isLastBlockDone = (value == (totaltb -1));
94 curCM->x = d_curCM->x * inv_group_mass;
95 curCM->y = d_curCM->y * inv_group_mass;
96 curCM->z = d_curCM->z * inv_group_mass;
113 template <
int T_BLOCK_SIZE>
114 __global__
static void compute2COMKernel(
115 const int numGroup1Atoms,
116 const int totalNumRestrained,
117 const double inv_group1_mass,
118 const double inv_group2_mass,
120 const float * __restrict mass,
121 const double* __restrict pos_x,
122 const double* __restrict pos_y,
123 const double* __restrict pos_z,
124 const char3* __restrict transform,
125 const int* __restrict groupAtomsSOAIndex,
126 double3* __restrict d_curCM1,
127 double3* __restrict d_curCM2,
128 double3* __restrict curCM1,
129 double3* __restrict curCM2,
130 unsigned int* __restrict tbcatomic){
132 int tid = threadIdx.x + blockIdx.x * blockDim.x;
133 int totaltb = gridDim.x;
134 bool isLastBlockDone =
false;
135 double3 com1 = {0, 0, 0};
136 double3 com2 = {0, 0, 0};
138 if (tid < totalNumRestrained) {
139 int SOAindex = groupAtomsSOAIndex[tid];
141 double m = mass[SOAindex];
143 pos.x = pos_x[SOAindex];
144 pos.y = pos_y[SOAindex];
145 pos.z = pos_z[SOAindex];
148 char3 t = transform[SOAindex];
151 if(tid < numGroup1Atoms){
169 typedef cub::BlockReduce<double, T_BLOCK_SIZE> BlockReduce;
170 __shared__
typename BlockReduce::TempStorage temp_storage;
172 com1.x = BlockReduce(temp_storage).Sum(com1.x);
174 com1.y = BlockReduce(temp_storage).Sum(com1.y);
176 com1.z = BlockReduce(temp_storage).Sum(com1.z);
178 com2.x = BlockReduce(temp_storage).Sum(com2.x);
180 com2.y = BlockReduce(temp_storage).Sum(com2.y);
182 com2.z = BlockReduce(temp_storage).Sum(com2.z);
186 if(threadIdx.x == 0){
187 atomicAdd(&(d_curCM1->x), com1.x);
188 atomicAdd(&(d_curCM1->y), com1.y);
189 atomicAdd(&(d_curCM1->z), com1.z);
190 atomicAdd(&(d_curCM2->x), com2.x);
191 atomicAdd(&(d_curCM2->y), com2.y);
192 atomicAdd(&(d_curCM2->z), com2.z);
195 unsigned int value = atomicInc(&tbcatomic[0], totaltb);
196 isLastBlockDone = (value == (totaltb -1));
202 if(threadIdx.x == 0){
204 curCM1->x = d_curCM1->x * inv_group1_mass;
205 curCM1->y = d_curCM1->y * inv_group1_mass;
206 curCM1->z = d_curCM1->z * inv_group1_mass;
207 curCM2->x = d_curCM2->x * inv_group2_mass;
208 curCM2->y = d_curCM2->y * inv_group2_mass;
209 curCM2->z = d_curCM2->z * inv_group2_mass;
224 #endif // NODEGROUP_FORCE_REGISTER 225 #endif // COM_CUDA_KERNEL_H
NAMD_HOST_DEVICE Position reverse_transform(Position data, const Transform &t) const