NAMD
SortAtoms.C
Go to the documentation of this file.
1 
7 #include "SortAtoms.h"
8 #include "NamdTypes.h"
9 #include <algorithm>
10 #include "CudaUtils.h"
11 
12 // #include "charm++.h"
13 #if !defined(NAMD_HIP) && !defined(NAMD_CUDA)
14 #define WARPSIZE 32
15 #endif
16 
17 
18 struct sortop_base {
19  const FullAtom * const a;
20  sortop_base(const FullAtom* atoms) : a(atoms) { }
21 };
22 
23 struct sortop_x : public sortop_base {
24  sortop_x(const FullAtom* atoms) : sortop_base(atoms) { }
25  bool operator() (int i, int j) const {
26  return ( a[i].position.x < a[j].position.x );
27  }
28 };
29 
30 struct sortop_y : public sortop_base {
31  sortop_y(const FullAtom* atoms) : sortop_base(atoms) { }
32  bool operator() (int i, int j) const {
33  return ( a[i].position.y < a[j].position.y );
34  }
35 };
36 
37 struct sortop_z : public sortop_base {
38  sortop_z(const FullAtom* atoms) : sortop_base(atoms) { }
39  bool operator() (int i, int j) const {
40  return ( a[i].position.z < a[j].position.z );
41  }
42 };
43 
44 
45 static void partition(int *order, const FullAtom *atoms, int begin, int end) {
46 
47  // Applies orthogonal recursive bisection with splittings limited
48  // to multiples of WARPSIZE for warps and a final split on multiples of WARPSIZE/2.
49 
50 #ifdef NAMD_AVXTILES
51 #define PARTWIDTH 16
52 #else
53 #define PARTWIDTH 32
54 #endif
55 
56  int split;
57  // must be a multiple of 32 or 16 between begin and end to split at
58  if ( begin/PARTWIDTH < (end-1)/PARTWIDTH ) {
59  // find a multiple of 32 near the median
60  split = ((begin + end + PARTWIDTH) / (PARTWIDTH*2)) * PARTWIDTH;
61  } else if ( begin/(PARTWIDTH/2) < (end-1)/(PARTWIDTH/2) ) {
62  // find a multiple of 16 near the median
63  split = ((begin + end + (PARTWIDTH/2)) / PARTWIDTH) * (PARTWIDTH/2);
64  } else {
65  return;
66  }
67 
68  BigReal xmin, ymin, zmin, xmax, ymax, zmax;
69  {
70  const Position &pos = atoms[order[begin]].position;
71  xmin = pos.x;
72  ymin = pos.y;
73  zmin = pos.z;
74  xmax = pos.x;
75  ymax = pos.y;
76  zmax = pos.z;
77  }
78  for ( int i=begin+1; i<end; ++i ) {
79  const Position &pos = atoms[order[i]].position;
80  if ( pos.x < xmin ) { xmin = pos.x; }
81  if ( pos.y < ymin ) { ymin = pos.y; }
82  if ( pos.z < zmin ) { zmin = pos.z; }
83  if ( pos.x > xmax ) { xmax = pos.x; }
84  if ( pos.y > ymax ) { ymax = pos.y; }
85  if ( pos.z > zmax ) { zmax = pos.z; }
86  }
87  xmax -= xmin;
88  ymax -= ymin;
89  zmax -= zmin;
90 
91 #define NTH_ELEMENT(BEGIN,SPLIT,END,OP) std::nth_element(BEGIN,SPLIT,END,OP)
92 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(__GNUC_PATCHLEVEL__)
93 #if __GNUC__ == 4 && __GNUC_MINOR__ == 8 && __GNUC_PATCHLEVEL__ == 2
94 #define NTH_ELEMENT(BEGIN,SPLIT,END,OP) std::sort(BEGIN,END,OP)
95 #warning gcc 4.8.2 std::nth_element would segfault (see http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58800)
96 #endif
97 #endif
98 
99  if ( xmax >= ymax && xmax >= zmax ) {
100  NTH_ELEMENT(order+begin, order+split, order+end, sortop_x(atoms));
101  } else if ( ymax >= xmax && ymax >= zmax ) {
102  NTH_ELEMENT(order+begin, order+split, order+end, sortop_y(atoms));
103  } else {
104  NTH_ELEMENT(order+begin, order+split, order+end, sortop_z(atoms));
105  }
106 
107  if ( split & PARTWIDTH/2 ) return;
108 
109 #undef PARTWIDTH
110 
111  // recursively partition before and after split
112  partition(order, atoms, begin, split);
113  partition(order, atoms, split, end);
114 
115 }
116 
117 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(__GNUC_PATCHLEVEL__)
118 #if __GNUC__ == 4 && __GNUC_MINOR__ == 8 && __GNUC_PATCHLEVEL__ == 2
119 // #error gcc 4.8.2 std::nth_element would segfault (see http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58800)
120 #endif
121 #endif
122 
123 void sortAtomsForCUDA(int *order, const FullAtom *atoms, int nfree, int n) {
124 
125  // partition free atoms
126  // CkPrintf("%d %d\n", 0, nfree);
127  partition(order, atoms, 0, nfree);
128 
129  // partition fixed atoms
130  // CkPrintf("%d %d\n", nfree, n);
131  partition(order, atoms, nfree, n);
132 
133 }
134 
135 void sortAtomsForPatches(int *order, int *breaks,
136  const FullAtom *atoms, int nmgrps, int natoms,
137  int ni, int nj, int nk) {
138 
139 //CkPrintf("sorting %d atoms in %d groups to %d x %d x %d\n",
140 // natoms, nmgrps, nk, nj, ni);
141  std::sort(order, order+nmgrps, sortop_z(atoms));
142  int pid = 0;
143  int ibegin = 0;
144  int nai = 0;
145  for ( int ip=0; ip < ni; ++ip ) {
146  int naj = nai;
147  int targi = nai + (natoms - nai - 1) / (ni - ip) + 1;
148  int iend;
149  for ( iend=ibegin; iend<nmgrps; ++iend ) {
150  int mgs = atoms[order[iend]].migrationGroupSize;
151  if (nai + mgs <= targi) nai += mgs;
152  else break;
153  }
154 //CkPrintf(" Z %d %d (%d) %d\n", ibegin, iend, iend-ibegin, nai);
155  std::sort(order+ibegin, order+iend, sortop_y(atoms));
156  int jbegin = ibegin;
157  for ( int jp=0; jp < nj; ++jp ) {
158  int nak = naj;
159  int targj = naj + (nai - naj - 1) / (nj - jp) + 1;
160  int jend;
161  for ( jend=jbegin; jend<iend; ++jend ) {
162  int mgs = atoms[order[jend]].migrationGroupSize;
163  if (naj + mgs <= targj) naj += mgs;
164  else break;
165  }
166 
167 //CkPrintf(" Y %d %d (%d) %d\n", jbegin, jend, jend-jbegin, naj);
168  std::sort(order+jbegin, order+jend, sortop_x(atoms));
169  int kbegin = jbegin;
170  for ( int kp=0; kp < nk; ++kp ) {
171  int targk = nak + (naj - nak - 1) / (nk - kp) + 1;
172  int kend;
173  for ( kend=kbegin; kend<jend; ++kend ) {
174  int mgs = atoms[order[kend]].migrationGroupSize;
175  if (nak + mgs <= targk) nak += mgs;
176  else break;
177 //CkPrintf(" atom %d %d %.2f\n", atoms[order[kend]].id, mgs,
178 // atoms[order[kend]].position.x);
179  }
180 //CkPrintf(" X %d %d (%d) %d\n", kbegin, kend, kend-kbegin, nak);
181  breaks[pid++] = kend;
182  kbegin = kend;
183  }
184  jbegin = jend;
185  }
186  ibegin = iend;
187  }
188 
189 }
190 
191 
192 //
193 // begin SOA
194 //
195 
196 struct sortop_SOA {
197  const double * const a;
198  sortop_SOA(const double *r) : a(r) { }
199  bool operator() (int i, int j) const {
200  return ( a[i] < a[j] );
201  }
202 };
203 
204 #define NORMAL_SPLIT 32
205 #ifdef NAMD_HIP
206 #define FINAL_SPLIT 16
207 #else
208 #define FINAL_SPLIT 8
209 #endif
210 static void partition_SOA(
211  int * __restrict order,
212  const double * __restrict ax,
213  const double * __restrict ay,
214  const double * __restrict az,
215  int begin, int end
216  ) {
217 
218  // Applies orthogonal recursive bisection with splittings limited
219  // to multiples of 32 for warps and a final split on multiples of 16.
220  int split = -1;
221  // must be a multiple of 32 or 16 between begin and end to split at
222  int split_factor = NORMAL_SPLIT;
223 #ifdef NAMD_HIP
224  if ( begin/NORMAL_SPLIT < (end-1)/NORMAL_SPLIT ) {
225  // find a multiple of 32 near the median
226  split = ((begin + end + NORMAL_SPLIT) / (NORMAL_SPLIT*2)) * NORMAL_SPLIT;
227  } else if ( begin/(NORMAL_SPLIT/2) < (end-1)/(NORMAL_SPLIT/2) ) {
228  // find a multiple of 16 near the median
229  split = ((begin + end + (NORMAL_SPLIT/2)) / NORMAL_SPLIT) * NORMAL_SPLIT/2;
230  } else {
231  return;
232  }
233 #else
234  while (split == -1) {
235  if ( begin/split_factor < (end-1)/split_factor ) {
236  split = ((begin + end + split_factor) / (split_factor*2)) * split_factor;
237  }
238  split_factor /= 2;
239  if (split_factor == 1) return;
240  }
241 #endif
242 
243  BigReal xmin, ymin, zmin, xmax, ymax, zmax;
244  {
245 #if 0
246  const Position &pos = atoms[order[begin]].position;
247  xmin = pos.x;
248  ymin = pos.y;
249  zmin = pos.z;
250  xmax = pos.x;
251  ymax = pos.y;
252  zmax = pos.z;
253 #else
254  int i = order[begin];
255  xmin = ax[i];
256  xmax = ax[i];
257  ymin = ay[i];
258  ymax = ay[i];
259  zmin = az[i];
260  zmax = az[i];
261 #endif
262  }
263  for ( int i=begin+1; i<end; ++i ) {
264 #if 0
265  const Position &pos = atoms[order[i]].position;
266  if ( pos.x < xmin ) { xmin = pos.x; }
267  if ( pos.y < ymin ) { ymin = pos.y; }
268  if ( pos.z < zmin ) { zmin = pos.z; }
269  if ( pos.x > xmax ) { xmax = pos.x; }
270  if ( pos.y > ymax ) { ymax = pos.y; }
271  if ( pos.z > zmax ) { zmax = pos.z; }
272 #else
273  int j = order[i];
274  if ( ax[j] < xmin ) { xmin = ax[j]; }
275  if ( ax[j] > xmax ) { xmax = ax[j]; }
276  if ( ay[j] < ymin ) { ymin = ay[j]; }
277  if ( ay[j] > ymax ) { ymax = ay[j]; }
278  if ( az[j] < zmin ) { zmin = az[j]; }
279  if ( az[j] > zmax ) { zmax = az[j]; }
280 #endif
281  }
282  xmax -= xmin;
283  ymax -= ymin;
284  zmax -= zmin;
285 
286 #undef NTH_ELEMENT
287 #define NTH_ELEMENT(BEGIN,SPLIT,END,OP) std::nth_element(BEGIN,SPLIT,END,OP)
288 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(__GNUC_PATCHLEVEL__)
289 #if __GNUC__ == 4 && __GNUC_MINOR__ == 8 && __GNUC_PATCHLEVEL__ == 2
290 #define NTH_ELEMENT(BEGIN,SPLIT,END,OP) std::sort(BEGIN,END,OP)
291 #warning gcc 4.8.2 std::nth_element would segfault (see http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58800)
292 #endif
293 #endif
294 
295  if ( xmax >= ymax && xmax >= zmax ) {
296  NTH_ELEMENT(order+begin, order+split, order+end, sortop_SOA(ax));
297  } else if ( ymax >= xmax && ymax >= zmax ) {
298  NTH_ELEMENT(order+begin, order+split, order+end, sortop_SOA(ay));
299  } else {
300  NTH_ELEMENT(order+begin, order+split, order+end, sortop_SOA(az));
301  }
302 
303  if ( split & FINAL_SPLIT ) return;
304 
305  // recursively partition before and after split
306  partition_SOA(order, ax, ay, az, begin, split);
307  partition_SOA(order, ax, ay, az, split, end);
308 
309 }
310 
311 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(__GNUC_PATCHLEVEL__)
312 #if __GNUC__ == 4 && __GNUC_MINOR__ == 8 && __GNUC_PATCHLEVEL__ == 2
313 // #error gcc 4.8.2 std::nth_element would segfault (see http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58800)
314 #endif
315 #endif
316 
318  int * __restrict order,
319  int * __restrict unorder,
320  const double * __restrict ax,
321  const double * __restrict ay,
322  const double * __restrict az,
323  int nfree, int n
324  ) {
325 
326  // partition free atoms
327  // CkPrintf("%d %d\n", 0, nfree);
328  partition_SOA(order, ax, ay, az, 0, nfree);
329 
330  // partition fixed atoms
331  // CkPrintf("%d %d\n", nfree, n);
332  partition_SOA(order, ax, ay, az, nfree, n);
333 
334  // determine mapping to unsort atoms
335  for (int i=0; i < n; i++) {
336  unorder[order[i]] = i;
337  }
338 }
339 
340 //
341 // end SOA
342 //
sortop_x(const FullAtom *atoms)
Definition: SortAtoms.C:24
static void partition_SOA(int *__restrict order, const double *__restrict ax, const double *__restrict ay, const double *__restrict az, int begin, int end)
Definition: SortAtoms.C:210
static void partition(int *order, const FullAtom *atoms, int begin, int end)
Definition: SortAtoms.C:45
Definition: Vector.h:72
BigReal z
Definition: Vector.h:74
const FullAtom *const a
Definition: SortAtoms.C:19
Position position
Definition: NamdTypes.h:77
sortop_y(const FullAtom *atoms)
Definition: SortAtoms.C:31
bool operator()(int i, int j) const
Definition: SortAtoms.C:39
void sortAtomsForCUDA(int *order, const FullAtom *atoms, int nfree, int n)
Definition: SortAtoms.C:123
bool operator()(int i, int j) const
Definition: SortAtoms.C:32
sortop_base(const FullAtom *atoms)
Definition: SortAtoms.C:20
#define NTH_ELEMENT(BEGIN, SPLIT, END, OP)
#define order
Definition: PmeRealSpace.C:235
int32 migrationGroupSize
Definition: NamdTypes.h:220
#define FINAL_SPLIT
Definition: SortAtoms.C:208
#define PARTWIDTH
BigReal x
Definition: Vector.h:74
void sortAtomsForCUDA_SOA(int *__restrict order, int *__restrict unorder, const double *__restrict ax, const double *__restrict ay, const double *__restrict az, int nfree, int n)
Definition: SortAtoms.C:317
std::vector< std::string > split(const std::string &text, std::string delimiter)
Definition: MoleculeQM.C:74
bool operator()(int i, int j) const
Definition: SortAtoms.C:25
#define NORMAL_SPLIT
Definition: SortAtoms.C:204
BigReal y
Definition: Vector.h:74
bool operator()(int i, int j) const
Definition: SortAtoms.C:199
const double *const a
Definition: SortAtoms.C:197
void sortAtomsForPatches(int *order, int *breaks, const FullAtom *atoms, int nmgrps, int natoms, int ni, int nj, int nk)
Definition: SortAtoms.C:135
double BigReal
Definition: common.h:123
sortop_z(const FullAtom *atoms)
Definition: SortAtoms.C:38
sortop_SOA(const double *r)
Definition: SortAtoms.C:198