NAMD
AlgRecBisection.C
Go to the documentation of this file.
1 
7 #include "common.h"
8 #include "InfoStream.h"
9 #include "AlgRecBisection.h"
10 
11 //#define DEBUG
12 
13 //
14 // load balancing computes using recursion of bisection algorithm
15 //
16 //
18  processorInfo *processorArray, int nComps,
19  int nPatches, int nPes) :
20  Rebalancer(computeArray, patchArray,
21  processorArray, nComps,
22  nPatches, nPes)
23 {
24 strategyName = "Rob";
25 strategy();
26 }
27 
28 
29 void AlgRecBisection::rec_divide(int n, Partition &p)
30 {
31  int i;
32  int midpos;
33  int n1, n2;
34  double load1, currentload;
35  int maxdir, count;
36  Partition p1, p2;
37 
38 #ifdef DEBUG
39  CmiPrintf("rec_divide: partition n:%d count:%d load:%f (%d %d %d, %d %d %d)\n", n, p.count, p.load, p.origin[0], p.origin[1], p.origin[2], p.corner[0], p.corner[1], p.corner[2]);
40 #endif
41 
42  if (n==1) {
43  partitions[currentp++] = p;
44  return;
45  }
46 /*
47  if (p.origin.x==p.corner.x && p.origin.y==p.corner.y && p.origin.z==p.corner.z)
48  NAMD_die("AlgRecBisection failed in recursion.\n");
49 */
50 
51  n2 = n/2;
52  n1 = n-n2;
53 
54  load1 = (1.0*n1/n) * p.load;
55 
56  p1 = p;
57  p1.refno = ++refno;
58  p2 = p;
59  p2.refno = ++refno;
60 
61  // determine the best division direction
62  int maxSpan=-1;
63  maxdir = XDIR;
64  for (i=XDIR; i<=ZDIR; i++) {
65  int myspan = p.corner[i] - p.origin[i];
66  if (myspan > maxSpan) {
67  maxdir = i;
68  maxSpan = myspan;
69  }
70  }
71 
72  // other two dimensions
73  int dir2 = (maxdir+1)%3;
74  int dir3 = (maxdir+2)%3;
75 
76  currentload = 0.0;
77  count = 0;
78  midpos = p.origin[maxdir];
79  for (i=0; i<numComputes; i++) {
80  // not belong to this partition
81  if (computeLoad[vArray[maxdir][i].id].refno != p.refno) continue;
82  if (vArray[maxdir][i].v<p.origin[maxdir]) continue;
83  if (vArray[maxdir][i].v>p.corner[maxdir]) break;
84 
85  int cid = vArray[maxdir][i].id; // this compute ID
86  // check if this compute is within the partition
87  if ( computeLoad[cid].v[dir2] >= p.origin[dir2] &&
88  computeLoad[cid].v[dir2] <= p.corner[dir2] &&
89  computeLoad[cid].v[dir3] >= p.origin[dir3] &&
90  computeLoad[cid].v[dir3] <= p.corner[dir3] ) {
91  // this compute is set to the first partition
92  if (currentload <= load1) {
93  computeLoad[cid].refno = p1.refno;
94  currentload += computeLoad[cid].load;
95  count ++;
96  midpos = computeLoad[cid].v[maxdir];
97  }
98  else { // or the next partition
99 /*
100  double c = currentload + computeLoad[cid].load;
101  if (c - load1 < load1 - currentload) {
102  computeLoad[cid].refno = p1.refno;
103  currentload = c;
104  count ++;
105  midpos = computeLoad[cid].v[maxdir];
106  }
107  else
108 */
109  computeLoad[cid].refno = p2.refno;
110  }
111  }
112  }
113 // CmiPrintf("X:cur:%d, prev:%d load:%f %f\n", cur, prev, currentload, prevload);
114 #ifdef DEBUG
115  CmiPrintf("DIR:%d %d load:%f\n", maxdir, midpos, currentload);
116 #endif
117 
118 
119  p1.corner[maxdir] = midpos;
120  p2.origin[maxdir] = midpos;
121 
122  p1.load = currentload;
123  p1.count = count;
124  p2.load = p.load - p1.load;
125  p2.count = p.count - p1.count;
126 #ifdef DEBUG
127  CmiPrintf("p1: n:%d count:%d load:%f\n", n1, p1.count, p1.load);
128  CmiPrintf("p2: n:%d count:%d load:%f\n", n2, p2.count, p2.load);
129 #endif
130  CmiAssert(! (p1 == p2));
131  rec_divide(n1, p1);
132  rec_divide(n2, p2);
133 }
134 
135 void AlgRecBisection::setVal(int x, int y, int z)
136 {
137  int i;
138  for (i=0; i<numComputes; i++) {
139  computeLoad[i].tv = 1000000*computeLoad[i].v[x]+
140  1000*computeLoad[i].v[y]+
141  computeLoad[i].v[z];
142  }
143 #if 0
144  CmiPrintf("original:%d\n", x);
145  for (i=0; i<numComputes; i++)
146  CmiPrintf("%d ", computeLoad[i].tv);
147  CmiPrintf("\n");
148 #endif
149 }
150 
151 int AlgRecBisection::sort_partition(int x, int p, int r)
152 {
153  int mid = computeLoad[vArray[x][p].id].tv;
154  int i= p;
155  int j= r;
156  while (1) {
157  while (computeLoad[vArray[x][j].id].tv > mid && j>i) j--;
158  while (computeLoad[vArray[x][i].id].tv < mid && i<j) i++;
159  if (i<j) {
160  if (computeLoad[vArray[x][i].id].tv == computeLoad[vArray[x][j].id].tv)
161  {
162  if (computeLoad[vArray[x][i].id].tv != mid) NAMD_die("my god!\n");
163  if (i-p < r-j) i++;
164  else j--;
165  continue;
166  }
167  VecArray tmp = vArray[x][i];
168  vArray[x][i] = vArray[x][j];
169  vArray[x][j] = tmp;
170  }
171  else
172  return j;
173  }
174 }
175 
176 void AlgRecBisection::qsort(int x, int p, int r)
177 {
178  if (p<r) {
179  int q = sort_partition(x, p, r);
180 //CmiPrintf("midpoint: %d %d %d\n", p,q,r);
181  qsort(x, p, q-1);
182  qsort(x, q+1, r);
183  }
184 }
185 
186 void AlgRecBisection::quicksort(int x)
187 {
188  int y = (x+1)%3;
189  int z = (x+2)%3;
190  setVal(x, y, z);
191  qsort(x, 0, numComputes-1);
192 
193 #if 0
194  CmiPrintf("result:%d\n", x);
195  for (int i=0; i<numComputes; i++)
196  CmiPrintf("%d ", computeLoad[vArray[x][i].id].tv);
197  CmiPrintf("\n");
198 #endif
199 }
200 
201 void AlgRecBisection::mapPartitionsToNodes()
202 {
203  int i,j,k;
204 #if 0
205  for (i=0; i<P; i++) partitions[i].node = i;
206 #else
207  PatchMap *patchMap = PatchMap::Object();
208 
209  int **pool = new int *[P];
210  for (i=0; i<P; i++) pool[i] = new int[P];
211  for (i=0; i<P; i++) for (j=0; j<P; j++) pool[i][j] = 0;
212 
213  // sum up the number of nodes that patches of computes are on
214  for (i=0; i<numComputes; i++)
215  {
216  for (j=0; j<P; j++)
217  if (computeLoad[i].refno == partitions[j].refno)
218  {
219  //int node1 = patchMap->node(computes[i].patch1);
220  //int node2 = patchMap->node(computes[i].patch2);
221  int node1 = patches[computes[i].patch1].processor;
222  int node2 = patches[computes[i].patch2].processor;
223  pool[j][node1]++;
224  pool[j][node2]++;
225  }
226  }
227 #ifdef DEBUG
228  for (i=0; i<P; i++) {
229  for (j=0; j<P; j++) CmiPrintf("%d ", pool[i][j]);
230  CmiPrintf("\n");
231  }
232 #endif
233  while (1)
234  {
235  int index=-1, node=0, eager=-1;
236  for (j=0; j<P; j++) {
237  if (partitions[j].node != -1) continue;
238  int wantmost=-1, maxnodes=-1;
239  for (k=0; k<P; k++) if (pool[j][k] > maxnodes && !partitions[k].mapped) {wantmost=k; maxnodes = pool[j][k];}
240  if (maxnodes > eager) {
241  index = j; node = wantmost; eager = maxnodes;
242  }
243  }
244  if (eager == -1) break;
245  partitions[index].node = node;
246  partitions[node].mapped = 1;
247  }
248 
249  for (i=0; i<P; i++) delete [] pool[i];
250  delete [] pool;
251 #endif
252 
253  CmiPrintf("partitions to nodes mapping: ");
254  for (i=0; i<P; i++) CmiPrintf("%d ", partitions[i].node);
255  CmiPrintf("\n");
256 }
257 
258 void AlgRecBisection::strategy()
259 {
260  int i,j;
261 
262  PatchMap *patchMap = PatchMap::Object();
263 
264  // create computeLoad and calculate tentative computes coordinates
265  computeLoad = new ComputeLoad[numComputes];
266  for (i=XDIR; i<=ZDIR; i++) vArray[i] = new VecArray[numComputes];
267 
268  CmiPrintf("AlgRecBisection: numComputes:%d\n", numComputes);
269 
270  int size_x = patchMap->gridsize_a();
271  int size_y = patchMap->gridsize_b();
272  int size_z = patchMap->gridsize_c();
273 
274  // v[0] = XDIR v[1] = YDIR v[2] = ZDIR
275  // vArray[XDIR] is an array holding the x vector for all computes
276  for (i=0; i<numComputes; i++) {
277  int pid1 = computes[i].patch1;
278  int pid2 = computes[i].patch2;
279  computeLoad[i].id = i;
280  int a1 = patchMap->index_a(pid1);
281  int b1 = patchMap->index_b(pid1);
282  int c1 = patchMap->index_c(pid1);
283  int a2 = patchMap->index_a(pid2);
284  int b2 = patchMap->index_b(pid2);
285  int c2 = patchMap->index_c(pid1);
286  computeLoad[i].v[0] = a1 + a2;
287  computeLoad[i].v[1] = b1 + b2;
288  computeLoad[i].v[2] = c1 + c2;
289 // CmiPrintf("(%d %d %d)", computeLoad[i].x, computeLoad[i].y, computeLoad[i].z);
290  computeLoad[i].load = computes[i].load;
291  computeLoad[i].refno = 0;
292 
293  for (j=XDIR; j<=ZDIR; j++) {
294  vArray[j][i].id = i;
295  vArray[j][i].v = computeLoad[i].v[j];
296  }
297  }
298 // CmiPrintf("\n");
299 
300  double t = CmiWallTimer();
301 
302  quicksort(XDIR);
303  quicksort(YDIR);
304  quicksort(ZDIR);
305  CmiPrintf("qsort time: %f\n", CmiWallTimer() - t);
306 
307  npartition = P;
308  partitions = new Partition[npartition];
309 
310  top_partition.origin[XDIR] = 0;
311  top_partition.origin[YDIR] = 0;
312  top_partition.origin[ZDIR] = 0;
313  top_partition.corner[XDIR] = 2*(size_x-1);
314  top_partition.corner[YDIR] = 2*(size_y-1);
315  top_partition.corner[ZDIR] = 2*(size_z-1);
316 
317  top_partition.refno = 0;
318  top_partition.load = 0.0;
319  top_partition.count = numComputes;
320  for (i=0; i<numComputes; i++) top_partition.load += computes[i].load;
321 
322  currentp = 0;
323  refno = 0;
324 
325  // recursively divide
326  rec_divide(npartition, top_partition);
327 
328  CmiPrintf("After partitioning: \n");
329  for (i=0; i<P; i++) {
330  CmiPrintf("[%d] (%d,%d,%d) (%d,%d,%d) load:%f count:%d\n", i, partitions[i].origin[0], partitions[i].origin[1], partitions[i].origin[2], partitions[i].corner[0], partitions[i].corner[1], partitions[i].corner[2], partitions[i].load, partitions[i].count);
331  }
332 
333  for (i=0; i<numComputes; i++) computes[i].processor=-1;
334 
335  // mapping partitions to nodes
336  mapPartitionsToNodes();
337 
338  // this is for debugging
339  int *num = new int[P];
340  for (i=0; i<P; i++) num[i] = 0;
341 
342  for (i=0; i<numComputes; i++)
343  {
344  for (j=0; j<P; j++)
345  if (computeLoad[i].refno == partitions[j].refno)
346  { computes[computeLoad[i].id].processor = partitions[j].node; num[j]++; break; }
347  }
348 
349  for (i=0; i<P; i++)
350  if (num[i] != partitions[i].count)
351  NAMD_die("AlgRecBisection: Compute counts don't agree!\n");
352 
353  delete [] num;
354 
355  for (i=0; i<numComputes; i++) {
356  if (computes[i].processor == -1) NAMD_bug("AlgRecBisection failure!\n");
357  }
358 
359 
360  delete [] computeLoad;
361  for (i=0; i<3; i++) delete [] vArray[i];
362  delete [] partitions;
363 
364  // use refinement
365  for (i=0; i<numComputes; i++)
366  assign((computeInfo *) &(computes[i]),
367  (processorInfo *) &(processors[computes[i].processor]));
368 
369  printLoads();
370 
371 #if 1
372  multirefine();
373 #else
374  printSummary();
375 #endif
376 
377  printLoads();
378 
379  CmiPrintf("AlgRecBisection finished time: %f\n", CmiWallTimer() - t);
380 
381 }
382 
383 
BlockLoad::TempStorage load
int numComputes
Definition: Rebalancer.h:138
int patch1
Definition: elements.h:23
computeInfo * computes
Definition: Rebalancer.h:128
int gridsize_c(void) const
Definition: PatchMap.h:66
static PatchMap * Object()
Definition: PatchMap.h:27
int index_a(int pid) const
Definition: PatchMap.h:86
void assign(computeInfo *c, processorInfo *pRec)
Definition: Rebalancer.C:402
int processor
Definition: elements.h:24
processorInfo * processors
Definition: Rebalancer.h:130
void printLoads(int phase=0)
Definition: Rebalancer.C:874
int gridsize_a(void) const
Definition: PatchMap.h:64
void multirefine(double overload_start=1.02)
Definition: Rebalancer.C:784
int patch2
Definition: elements.h:23
void NAMD_bug(const char *err_msg)
Definition: common.C:129
void printSummary()
Definition: Rebalancer.C:975
gridSize z
int index_b(int pid) const
Definition: PatchMap.h:87
void NAMD_die(const char *err_msg)
Definition: common.C:85
int index_c(int pid) const
Definition: PatchMap.h:88
double load
Definition: elements.h:15
patchInfo * patches
Definition: Rebalancer.h:129
gridSize y
int processor
Definition: elements.h:31
gridSize x
AlgRecBisection(computeInfo *computeArray, patchInfo *patchArray, processorInfo *processorArray, int nComps, int nPatches, int nPes)
int gridsize_b(void) const
Definition: PatchMap.h:65
const char * strategyName
Definition: Rebalancer.h:127
for(int i=0;i< n1;++i)