NAMD
SortAtoms.C
Go to the documentation of this file.
1 
7 #include "SortAtoms.h"
8 #include "NamdTypes.h"
9 #include <algorithm>
10 #include "CudaUtils.h"
11 
12 // #include "charm++.h"
13 
14 
15 struct sortop_base {
16  const FullAtom * const a;
17  sortop_base(const FullAtom* atoms) : a(atoms) { }
18 };
19 
20 struct sortop_x : public sortop_base {
21  sortop_x(const FullAtom* atoms) : sortop_base(atoms) { }
22  bool operator() (int i, int j) const {
23  return ( a[i].position.x < a[j].position.x );
24  }
25 };
26 
27 struct sortop_y : public sortop_base {
28  sortop_y(const FullAtom* atoms) : sortop_base(atoms) { }
29  bool operator() (int i, int j) const {
30  return ( a[i].position.y < a[j].position.y );
31  }
32 };
33 
34 struct sortop_z : public sortop_base {
35  sortop_z(const FullAtom* atoms) : sortop_base(atoms) { }
36  bool operator() (int i, int j) const {
37  return ( a[i].position.z < a[j].position.z );
38  }
39 };
40 //This function is only needed for GPU or AVX-512 builds
41 #if (defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_AVXTILES))
42 static void partition(int *order, const FullAtom *atoms, int begin, int end) {
43 
44  // Applies orthogonal recursive bisection with splittings limited
45  // to multiples of 32 for warps and a final split on multiples of 16.
46 
47 #ifdef NAMD_AVXTILES
48 #define WARPSIZE 16
49 #endif
50 
51  int split;
52  // must be a multiple of WARPSIZE/2 between begin and end to split at
53  if ( begin/WARPSIZE < (end-1)/WARPSIZE ) {
54  // find a multiple of WARPSIZE near the median
55  split = ((begin + end + WARPSIZE) / (WARPSIZE*2)) * WARPSIZE;
56  } else if ( begin/(WARPSIZE/2) < (end-1)/(WARPSIZE/2) ) {
57  // find a multiple of WARPSIZE/2 near the median
58  split = ((begin + end + WARPSIZE/2) / WARPSIZE) * (WARPSIZE/2);
59  } else {
60  return;
61  }
62 
63  BigReal xmin, ymin, zmin, xmax, ymax, zmax;
64  {
65  const Position &pos = atoms[order[begin]].position;
66  xmin = pos.x;
67  ymin = pos.y;
68  zmin = pos.z;
69  xmax = pos.x;
70  ymax = pos.y;
71  zmax = pos.z;
72  }
73  for ( int i=begin+1; i<end; ++i ) {
74  const Position &pos = atoms[order[i]].position;
75  if ( pos.x < xmin ) { xmin = pos.x; }
76  if ( pos.y < ymin ) { ymin = pos.y; }
77  if ( pos.z < zmin ) { zmin = pos.z; }
78  if ( pos.x > xmax ) { xmax = pos.x; }
79  if ( pos.y > ymax ) { ymax = pos.y; }
80  if ( pos.z > zmax ) { zmax = pos.z; }
81  }
82  xmax -= xmin;
83  ymax -= ymin;
84  zmax -= zmin;
85 
86 #define NTH_ELEMENT(BEGIN,SPLIT,END,OP) std::nth_element(BEGIN,SPLIT,END,OP)
87 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(__GNUC_PATCHLEVEL__)
88 #if __GNUC__ == 4 && __GNUC_MINOR__ == 8 && __GNUC_PATCHLEVEL__ == 2
89 #define NTH_ELEMENT(BEGIN,SPLIT,END,OP) std::sort(BEGIN,END,OP)
90 #warning gcc 4.8.2 std::nth_element would segfault (see http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58800)
91 #endif
92 #endif
93 
94  if ( xmax >= ymax && xmax >= zmax ) {
95  NTH_ELEMENT(order+begin, order+split, order+end, sortop_x(atoms));
96  } else if ( ymax >= xmax && ymax >= zmax ) {
97  NTH_ELEMENT(order+begin, order+split, order+end, sortop_y(atoms));
98  } else {
99  NTH_ELEMENT(order+begin, order+split, order+end, sortop_z(atoms));
100  }
101 
102  if ( split & (WARPSIZE/2) ) return;
103 
104  // recursively partition before and after split
105  partition(order, atoms, begin, split);
106  partition(order, atoms, split, end);
107 
108 }
109 
110 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(__GNUC_PATCHLEVEL__)
111 #if __GNUC__ == 4 && __GNUC_MINOR__ == 8 && __GNUC_PATCHLEVEL__ == 2
112 // #error gcc 4.8.2 std::nth_element would segfault (see http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58800)
113 #endif
114 #endif
115 
116 void sortAtomsForCUDA(int *order, const FullAtom *atoms, int nfree, int n) {
117 
118  // partition free atoms
119  // CkPrintf("%d %d\n", 0, nfree);
120  partition(order, atoms, 0, nfree);
121 
122  // partition fixed atoms
123  // CkPrintf("%d %d\n", nfree, n);
124  partition(order, atoms, nfree, n);
125 
126 }
127 #endif //End of GPU-specific code.
128 
129 void sortAtomsForPatches(int *order, int *breaks,
130  const FullAtom *atoms, int nmgrps, int natoms,
131  int ni, int nj, int nk) {
132 
133 //CkPrintf("sorting %d atoms in %d groups to %d x %d x %d\n",
134 // natoms, nmgrps, nk, nj, ni);
135  std::sort(order, order+nmgrps, sortop_z(atoms));
136  int pid = 0;
137  int ibegin = 0;
138  int nai = 0;
139  for ( int ip=0; ip < ni; ++ip ) {
140  int naj = nai;
141  int targi = nai + (natoms - nai - 1) / (ni - ip) + 1;
142  int iend;
143  for ( iend=ibegin; iend<nmgrps; ++iend ) {
144  int mgs = atoms[order[iend]].migrationGroupSize;
145  if (nai + mgs <= targi) nai += mgs;
146  else break;
147  }
148 //CkPrintf(" Z %d %d (%d) %d\n", ibegin, iend, iend-ibegin, nai);
149  std::sort(order+ibegin, order+iend, sortop_y(atoms));
150  int jbegin = ibegin;
151  for ( int jp=0; jp < nj; ++jp ) {
152  int nak = naj;
153  int targj = naj + (nai - naj - 1) / (nj - jp) + 1;
154  int jend;
155  for ( jend=jbegin; jend<iend; ++jend ) {
156  int mgs = atoms[order[jend]].migrationGroupSize;
157  if (naj + mgs <= targj) naj += mgs;
158  else break;
159  }
160 
161 //CkPrintf(" Y %d %d (%d) %d\n", jbegin, jend, jend-jbegin, naj);
162  std::sort(order+jbegin, order+jend, sortop_x(atoms));
163  int kbegin = jbegin;
164  for ( int kp=0; kp < nk; ++kp ) {
165  int targk = nak + (naj - nak - 1) / (nk - kp) + 1;
166  int kend;
167  for ( kend=kbegin; kend<jend; ++kend ) {
168  int mgs = atoms[order[kend]].migrationGroupSize;
169  if (nak + mgs <= targk) nak += mgs;
170  else break;
171 //CkPrintf(" atom %d %d %.2f\n", atoms[order[kend]].id, mgs,
172 // atoms[order[kend]].position.x);
173  }
174 //CkPrintf(" X %d %d (%d) %d\n", kbegin, kend, kend-kbegin, nak);
175  breaks[pid++] = kend;
176  kbegin = kend;
177  }
178  jbegin = jend;
179  }
180  ibegin = iend;
181  }
182 
183 }
184 
sortop_x(const FullAtom *atoms)
Definition: SortAtoms.C:21
static void partition(int *order, const FullAtom *atoms, int begin, int end)
Definition: SortAtoms.C:42
Definition: Vector.h:64
static __thread atom * atoms
BigReal z
Definition: Vector.h:66
const FullAtom *const a
Definition: SortAtoms.C:16
bool operator()(int i, int j) const
Definition: SortAtoms.C:36
Position position
Definition: NamdTypes.h:53
sortop_y(const FullAtom *atoms)
Definition: SortAtoms.C:28
void sortAtomsForCUDA(int *order, const FullAtom *atoms, int nfree, int n)
Definition: SortAtoms.C:116
#define WARPSIZE
Definition: CudaUtils.h:10
sortop_base(const FullAtom *atoms)
Definition: SortAtoms.C:17
#define NTH_ELEMENT(BEGIN, SPLIT, END, OP)
#define order
Definition: PmeRealSpace.C:235
BigReal x
Definition: Vector.h:66
std::vector< std::string > split(const std::string &text, std::string delimiter)
Definition: MoleculeQM.C:73
int migrationGroupSize
Definition: NamdTypes.h:117
BlockRadixSort::TempStorage sort
BigReal y
Definition: Vector.h:66
bool operator()(int i, int j) const
Definition: SortAtoms.C:22
bool operator()(int i, int j) const
Definition: SortAtoms.C:29
void sortAtomsForPatches(int *order, int *breaks, const FullAtom *atoms, int nmgrps, int natoms, int ni, int nj, int nk)
Definition: SortAtoms.C:129
double BigReal
Definition: common.h:114
sortop_z(const FullAtom *atoms)
Definition: SortAtoms.C:35