NAMD
RecBisection.C
Go to the documentation of this file.
1 
7 #include <math.h>
8 #include <stdlib.h>
9 
10 #include "RecBisection.h"
11 #include "PatchMap.inl"
12 #include "Patch.h"
13 #include "PatchMgr.h"
14 
15 /* ********************************************************************* */
16 /* Constructor for the RecBisection Class */
17 /* ********************************************************************* */
18 RecBisection::RecBisection(int numpartitions, PatchMap *thePatchMap)
19 {
20  patchMap = thePatchMap;
21  npartition = numpartitions;
22  numPatches = patchMap->numPatches();
23  partitions = new Partition[npartition];
24  patchload = new PatchLoad[numPatches];
25  currentp = 0;
26 
27  if ( partitions == NULL ||
28  patchload == NULL )
29  {
30  NAMD_die("memory allocation failed in RecBisection::RecBisection");
31  }
32 
33 
34  // the cost coeffiencients that is used to compute the load introduced
35  // to the processor by a patch
36 
37  c_local0 = 1;
38  c_local1 = 0.015;
39  c_edge0 = 0.001;
40  c_edge1 = 0.001;
41  c_icompute0 = 0.001;
42  c_icompute1 = 0.000035;
43 
44  //c_local0 = 1.0;
45  //c_local1 = 0.;
46  //c_edge0 = 0.;
47  //c_edge1 = 0.;
48  //c_icompute0 = 0.;
49  //c_icompute1 = 0.;
50 }
51 
52 
53 /* ********************************************************************* */
54 /* Destructor for the RecBisection Class */
55 /* ********************************************************************* */
57 {
58  delete [] partitions;
59  delete [] patchload;
60 }
61 
62 
63 
64 /* *********************************************************************** */
65 /* This is a recursive function to partition a 3-D mesh into n cubical */
66 /* subpartitions with approximately equal loads. */
67 /* */
68 /* Input n : total number of subpartitions that the partition "p" has to */
69 /* be divided. Its value is any integer > 0 */
70 /* p : the current partition */
71 /* */
72 /* The partition p is divided into two partitions (say p1 and p2) such */
73 /* that a) p1 and p2 equally loaded b) p1 and p2 further to be partitioned */
74 /* ino n1 and n2 subpartitions. */
75 /* If n is even then n1=n2=n/2. Otherwise n1 is one more than n2. */
76 /* */
77 /* Since the subpartitions are rectangualr prisms (not an artibrary shape),*/
78 /* it is not always possible to find a bisection point where the load */
79 /* is equally divided. */
80 /* The following strategy is used to get a good partitioning: */
81 /* We divide the initial partition along x, y and z directions tentatively */
82 /* and choose the one that gives the best division */
83 /* *********************************************************************** */
84 
85 void RecBisection::rec_divide(int n, const Partition &p)
86 {
87  int i=0,j=0,k=0; // general purpose index vars
88  int posi[3],posj[3],posk[3]; // division points along x,y,z direction
89  int mindir; // the best direction
90  int n1, n2; // number of subpartitions in p1 and p2
91  int p1_empty[3], p2_empty[3]; // true if a subpartition is empty
92  float load1; // desired load of the first subpartition
93  float prevload,currentload;
94  float loadarray[3]; // actual loads of p1 (for each x,y,z
95  // division)
96  float diff; // load diffenrence (actual - desired)
97  float mindiff; // minimum difference (among directions)
98  Partition p1; // first subpartition
99  Partition p2; // second subpartition
100 
101 
102  if (n==1)
103  {
104  // no further subdivision
105  // record teh partition p as a final partition
106  partitions[currentp++] = p;
107  return;
108  }
109 
110  // calculate division ratio: 1/2 iff n is even, otherwise
111  // first partition has more load
112 
113  n2 = n/2;
114  n1 = n-n2;
115 
116  load1 = ( (float) n1/(float) n ) * p.load;
117 
118  for(i=XDIR; i<=ZDIR; i++) {p1_empty[i] = p2_empty[i] = 0;}
119 
120  p1 = p;
121  p2 = p;
122 
123  // now try dividing along the x,y,z directions
124 
125  // try x-axis
126  currentload = 0.0;
127  i = p.origin.x;
128  while(currentload < load1 && i<= p.corner.x)
129  {
130  prevload = currentload;
131  for(j=p.origin.y; j<=p.corner.y; j++)
132  for(k=p.origin.z; k<=p.corner.z; k++)
133  currentload += patchload[patchMap->pid(i,j,k)].total;
134  if (currentload > load1)
135  if ( prev_better(prevload,currentload,load1) )
136  {currentload=prevload; break;}
137  i++;
138  }
139  posi[XDIR] = i; posj[XDIR] = j; posk[XDIR] = k;
140  loadarray[XDIR] = currentload;
141  if (i == p.origin.x) p1_empty[XDIR] = 1;
142  if (i > p.corner.x) p2_empty[XDIR] = 1;
143 
144 
145  // z axis
146  currentload = 0.0;
147  k = p.origin.z;
148  while(currentload < load1 && k <= p.corner.z)
149  {
150  prevload = currentload;
151  for(i=p.origin.x; i<=p.corner.x; i++)
152  for(j=p.origin.y; j<=p.corner.y; j++)
153  currentload += patchload[patchMap->pid(i,j,k)].total;
154  if (currentload > load1)
155  if ( prev_better(prevload,currentload,load1) )
156  {currentload=prevload; break;}
157  k++;
158  }
159 
160  posi[ZDIR] = i; posj[ZDIR] = j; posk[ZDIR] = k;
161  loadarray[ZDIR] = currentload;
162  if (k == p.origin.z) p1_empty[ZDIR] = 1;
163  if (k > p.corner.z) p2_empty[ZDIR] = 1;
164 
165 
166  // y axis
167  currentload = 0.0;
168  j = p.origin.y;
169  while(currentload < load1 && j <= p.corner.y)
170  {
171  prevload = currentload;
172  for(i=p.origin.x; i<=p.corner.x; i++)
173  for(k=p.origin.z; k<=p.corner.z; k++)
174  currentload += patchload[patchMap->pid(i,j,k)].total;
175  if (currentload > load1)
176  if ( prev_better(prevload,currentload,load1) )
177  {currentload=prevload; break;}
178  j++;
179  }
180  posi[YDIR] = i; posj[YDIR] = j; posk[YDIR] = k;
181  loadarray[YDIR] = currentload;
182  if (j == p.origin.y) p1_empty[YDIR] = 1;
183  if (j > p.corner.y) p2_empty[YDIR] = 1;
184 
185  // determine the best division direction
186  mindiff = load1;
187  mindir = -1;
188  for(i=XDIR; i<=ZDIR; i++) {
189  diff = load1 - loadarray[i];
190  if (diff < 0.0) diff = -diff;
191  if (mindiff >= diff) {mindiff = diff; mindir = i;}
192  }
193 
194  // always divide along x or y if possible
195  int lx = p.corner.x - p.origin.x + 1;
196  if ( n >= 2*lx || lx >= 2*n || n > 10 || lx > 10 ) {
197  int n2x = n * (p.load - loadarray[XDIR]) / p.load + 0.5;
198  if ( lx > 1 && n2x > 0 && n2x < n ) mindir = XDIR;
199  }
200 
201  // revise n1 and n2 based on selected splitting dimension
202  n2 = n * (p.load - loadarray[mindir]) / p.load + 0.5;
203  if ( n2 < 1 ) n2 = 1;
204  if ( n2 >= n ) n2 = n-1;
205  n1 = n-n2;
206 
207  // divide along mindir
208  switch (mindir) {
209  case XDIR: p1.corner.x = posi[XDIR] - 1;
210  p2.origin.x = posi[XDIR];
211  break;
212  case YDIR: p1.corner.y = posj[YDIR] - 1;
213  p2.origin.y = posj[YDIR];
214  break;
215  case ZDIR: p1.corner.z = posk[ZDIR] - 1;
216  p2.origin.z = posk[ZDIR];
217  break;
218  default: NAMD_bug("RecBisection failing horribly!");
219  }
220  p1.load = loadarray[mindir];
221  p2.load = p.load - p1.load;
222  if (!p1_empty[mindir]) rec_divide(n1,p1);
223  if (!p2_empty[mindir]) rec_divide(n2,p2);
224 }
225 
226 
227 /* ************************************************************************ */
228 /* Compute the initial overhead/load of each patch to the processor. The */
229 /* load of patches are computed as follows: Each patch has a fixed cost and */
230 /* a variable cost depending of number atoms it has; c_local0 and c_local1 */
231 /* respectively. Secondly, due to interaction with each patch, the patch */
232 /* incurs some load determined by c_edge0 and c_edge1 cost parameters. */
233 /* Finally, each patch has load due to computing forces between its certain */
234 /* neighbours, with cost coefficients c_icompute0 and c_icompute1 */
235 /* ************************************************************************ */
236 
237 void RecBisection::compute_patch_load()
238 {
239  int i,nix,neighbour;
240  int numAtoms, numFixed, numNeighAtoms, numNeighFixed;
241  float total_icompute;
242 
243  for(i=0; i<numPatches; i++) {
244 #ifdef MEM_OPT_VERSION
245  numAtoms = patchMap->numAtoms(i);
246  numFixed = patchMap->numFixedAtoms(i);
247 #else
248  numAtoms = patchMap->patch(i)->getNumAtoms();
249  numFixed = patchMap->patch(i)->getNumFixedAtoms();
250 #endif
251  patchload[i].total = 0.0;
252  patchload[i].edge = 0.0;
253 
254  patchload[i].local = c_local0 + c_local1 * numAtoms;
255  patchload[i].local += c_icompute0 +
256  c_icompute1*(numAtoms*numAtoms-numFixed*numFixed);
257 
258 
259  total_icompute = 0.0;
260 
262  int nNeighbors = patchMap->oneAwayNeighbors(i,neighbors);
263 
264  for(nix=0; nix<nNeighbors; nix++) {
265  neighbour = neighbors[nix];
266 #ifdef MEM_OPT_VERSION
267  numNeighAtoms = patchMap->numAtoms(neighbour);
268  numNeighFixed = patchMap->numFixedAtoms(neighbour);
269 #else
270  numNeighAtoms = patchMap->patch(neighbour)->getNumAtoms();
271  numNeighFixed = patchMap->patch(neighbour)->getNumFixedAtoms();
272 #endif
273  patchload[i].icompute[nix] =
274  c_icompute0 +
275  c_icompute1*(numNeighAtoms*numAtoms-numNeighFixed*numFixed);
276  total_icompute += patchload[i].icompute[nix];
277  patchload[i].edge += c_edge0 + c_edge1 * numNeighAtoms;
278  }
279  patchload[i].total+=patchload[i].local+total_icompute+patchload[i].edge;
280  }
281 }
282 
283 
284 
285 
286 /* ********************************************************************* */
287 /* Partitions a 3D space. First a recursive algorithm divides the initial */
288 /* space into rectangular prisms with approximately equal loads. */
289 /* Then these rectangular prisms ar emodified to firther increase the */
290 /* load balance and reduce communication cost */
291 /* ********************************************************************* */
292 
293 int RecBisection::partition(int *dest_arr)
294 {
295  int i;
296 
297  top_partition.origin.x = 0;
298  top_partition.origin.y = 0;
299  top_partition.origin.z = 0;
300  top_partition.corner.x = patchMap->gridsize_a()-1;
301  top_partition.corner.y = patchMap->gridsize_b()-1;
302  top_partition.corner.z = patchMap->gridsize_c()-1;
303  top_partition.load = 0.0;
304 
305  // calculate estimated computational load due to each patch
306  compute_patch_load();
307 
308  for(i=0; i<numPatches; i++) top_partition.load += patchload[i].total;
309 
310  // divide into rectangular prisms with load as equal as possible
311  rec_divide(npartition,top_partition);
312 
313  if (currentp != npartition)
314  return 0;
315  else {
316  if (dest_arr==NULL)
317  assignNodes();
318  else
319  assign_nodes_arr(dest_arr);
320  }
321 
322  return 1;
323 }
324 
325 
326 /* ********************************************************************* */
327 /* partitioning done, update PatchDistib data structure to assign the */
328 /* patches to nodes. (patches in partition i is assigned to node i) */
329 /* ********************************************************************* */
330 
331 void RecBisection::assignNodes()
332 {
333  int i,j,k,pix;
334  Partition p;
335 
336  for(pix=0; pix<npartition; pix++)
337  {
338  p = partitions[pix];
339  for(i=p.origin.x; i<=p.corner.x; i++)
340  for(j=p.origin.y; j<=p.corner.y; j++)
341  for(k=p.origin.z; k<=p.corner.z; k++)
342  patchMap->assignNode(patchMap->pid(i,j,k),pix);
343  }
344 
345  int npatches = patchMap->numPatches();
346  for (int pid=0; pid<npatches; ++pid )
347  patchMap->assignBaseNode(pid);
348 }
349 
350 /* ********************************************************************* */
351 /* partitioning done, save results to an array, rather than updating */
352 /* the PatchDistib data structure. Otherwise, thist is identical to */
353 /* assignNodes() */
354 /* ********************************************************************* */
355 
356 void RecBisection::assign_nodes_arr(int *dest_arr)
357 {
358  int i,j,k,pix;
359  Partition p;
360 
361  for(pix=0; pix<npartition; pix++)
362  {
363  p = partitions[pix];
364  for(i=p.origin.x; i<=p.corner.x; i++)
365  for(j=p.origin.y; j<=p.corner.y; j++)
366  for(k=p.origin.z; k<=p.corner.z; k++) {
367  dest_arr[patchMap->pid(i,j,k)] = pix;
368  }
369  }
370 }
371 
372 
373 /* ************************************************************************ */
374 /* to be implemented: */
375 /* this functions revisits edge directions to refine the load distribution */
376 /* ************************************************************************ */
377 void RecBisection::refine_edges()
378 {
379 }
380 
381 
382 /* ************************************************************************ */
383 /* to be implemented: */
384 /* this function refines the boundaries of subpartitions to redice the */
385 /* communication across processors */
386 /* ************************************************************************ */
387 void RecBisection::refine_boundaries()
388 {
389 }
390 
391 
392 /* ************************************************************************ */
393 /* to be implemented: */
394 /* refine boundries invokes this function for eeach surface */
395 /* ************************************************************************ */
396 void RecBisection::refine_surface()
397 {
398 }
399 
400 /* ************************************************************************ */
401 /* return true if the difference between previous load (prev1) and desired */
402 /* load (load1) is less than teh difference between current an desired */
403 /* ************************************************************************ */
404 
405 int RecBisection::prev_better(float prev, float current, float load1)
406 {
407  float diff1,diff2;
408 
409  diff1 = load1 - prev;
410  diff2 = current - load1;
411 
412  if (diff1 < 0.0) diff1 = -diff1;
413  if (diff2 < 0.0) diff2 = -diff2;
414 
415  return (diff1 <= diff2);
416 }
417 
418 #if USE_TOPOMAP
419 
420 /* ********************************************************************* */
421 /* Partitions a 3D processor into rectangular prisms of different
422  sizes corresponding to the patches. Then a processor in the prism
423  is assigned to hold the patch.
424 *************************************/
425 
426 int RecBisection::partitionProcGrid(int X, int Y, int Z, int *dest_arr) {
427 
428  int i = 0;
429  top_partition.origin.x = 0;
430  top_partition.origin.y = 0;
431  top_partition.origin.z = 0;
432  top_partition.corner.x = patchMap->gridsize_a()-1;
433  top_partition.corner.y = patchMap->gridsize_b()-1;
434  top_partition.corner.z = patchMap->gridsize_c()-1;
435  top_partition.load = 0.0;
436 
437  // calculate estimated computational load due to each patch
438  compute_patch_load();
439 
440  for(i=0; i<numPatches; i++) top_partition.load += patchload[i].total;
441 
442  int new_X = X, new_Y = Y, new_Z = Z;
443  DimensionMap dm;
444 
445  dm.x = 0; dm.y = 1; dm.z = 2;
446 
447  findOptimalDimensions(X, Y, Z, new_X, new_Y, new_Z, dm);
448 
449  Partition proc_p;
450 
451  proc_p.origin.x = 0;
452  proc_p.corner.x = new_X - 1;
453 
454  proc_p.origin.y = 0;
455  proc_p.corner.y = new_Y - 1;
456 
457  proc_p.origin.z = 0;
458  proc_p.corner.z = new_Z - 1;
459 
460  iout << "\nLDB: Partitioning Proc Grid of size " << "[" << X << "] [" << Y << "] [" << Z << "]\n" << endi;
461 
462  // divide processor grid into rectangular prisms whose sizes
463  // corrspond to the computation load of the patches. Moreover
464  // neoghboring should also be on nearby processors on the grid
465  int rc = topogrid_rec_divide(proc_p, top_partition);
466 
467  if (rc < 0)
468  return rc;
469 
470  assignPatchesToProcGrid(dest_arr, X, Y, Z, dm);
471 
472  return 1;
473 }
474 
475 //Partition both a processor grid and a patch grid. Only useful when
476 //number of processors is 2 times greater than number of patches. It
477 //returns a processor partition for each patch.
478 
479 int RecBisection::topogrid_rec_divide(Partition &proc_p, Partition &patch_p) {
480 
481  Partition proc_p1, proc_p2, patch_p1, patch_p2;
482  int i=0, j=0, k=0;
483  int posi[3],posj[3],posk[3]; // division points along x,y,z direction
484  int mindir; // the best direction
485  int p1_empty[3], p2_empty[3]; // true if a subpartition is empty
486  double loadarray[3]; // actual loads of p1 (for each x,y,z division)
487  double diff; // load diffenrence (actual - desired)
488  double mindiff; // minimum difference (among directions)
489 
490  proc_p1 = proc_p2 = proc_p;
491  patch_p1 = patch_p2 = patch_p;
492 
493  p1_empty[0] = p1_empty[1] = p1_empty[2] = 0;
494  p2_empty[0] = p2_empty[1] = p2_empty[2] = 0;
495  /*
496  CkPrintf("topo rec divide pe grid = (%d,%d,%d) to (%d,%d,%d) and patch grid = (%d,%d,%d) to (%d,%d,%d)\n", proc_p.origin.x, proc_p.origin.y,
497  proc_p.origin.z, proc_p.corner.x, proc_p.corner.y, proc_p.corner.z,
498  patch_p.origin.x, patch_p.origin.y, patch_p.origin.z, patch_p.corner.x,
499  patch_p.corner.y, patch_p.corner.z);
500  */
501 
502  //We have just one patch left in the partition
503  if(patch_p.origin.x == patch_p.corner.x &&
504  patch_p.origin.y == patch_p.corner.y &&
505  patch_p.origin.z == patch_p.corner.z) {
506 
507  int pid = patchMap->pid(patch_p.origin.x,
508  patch_p.origin.y, patch_p.origin.z);
509 
510  //This patch owns all processors in the partition proc_p
511  partitions[pid] = proc_p;
512 
513  //CkPrintf("Assigning patch %d to partition with origin at %d,%d,%d\n", pid,
514  // proc_p.origin.x,
515  // proc_p.origin.y, proc_p.origin.z);
516 
517  return 1;
518  }
519 
520 
521  if(proc_p.origin.x == proc_p.corner.x &&
522  proc_p.origin.y == proc_p.corner.y &&
523  proc_p.origin.z == proc_p.corner.z) {
524 
525  return -1;
526  /*
527  for(k = patch_p.origin.z; k < patch_p.corner.z; k++)
528  for(j = patch_p.origin.y; j < patch_p.corner.y; j++)
529  for(i = patch_p.origin.x; i < patch_p.corner.x; i++) {
530 
531  partitions[patchMap->pid(i,j,k)] = proc_p;
532  }
533  return;
534  */
535  }
536 
537  double load_x, load_y, load_z;
538  load_x = (int) (proc_p.corner.x - proc_p.origin.x + 1)/2;
539  load_x /= proc_p.corner.x - proc_p.origin.x + 1;
540  load_x *= patch_p.load;
541 
542 
543  load_y = (int) (proc_p.corner.y - proc_p.origin.y + 1)/2;
544  load_y /= proc_p.corner.y - proc_p.origin.y + 1;
545  load_y *= patch_p.load;
546 
547  load_z = (int) (proc_p.corner.z - proc_p.origin.z + 1)/2;
548  load_z /= proc_p.corner.z - proc_p.origin.z + 1;
549  load_z *= patch_p.load;
550 
551  loadarray[XDIR] = loadarray[YDIR] = loadarray[ZDIR] = 10.0 * patch_p.load;
552 
553  double currentload = 0;
554  double prevload = 0;
555  if(load_x > 0 && patch_p.origin.x != patch_p.corner.x) {
556  //Split the processors in X dimension
557  //Also split patches in X dimension
558 
559  currentload = 0.0;
560  i = patch_p.origin.x;
561 
562  while(currentload < load_x && i <= patch_p.corner.x) {
563  prevload = currentload;
564  for(j=patch_p.origin.y; j<=patch_p.corner.y; j++)
565  for(k=patch_p.origin.z; k<=patch_p.corner.z; k++)
566  currentload += patchload[patchMap->pid(i,j,k)].total;
567 
568  if (currentload > load_x)
569  if ( prev_better(prevload,currentload,load_x) ) {
570  currentload=prevload;
571  break;
572  }
573  i++;
574  }
575 
576  posi[XDIR] = i; posj[XDIR] = j; posk[XDIR] = k;
577  loadarray[XDIR] = currentload;
578  if (i == patch_p.origin.x) p1_empty[XDIR] = 1;
579  if (i > patch_p.corner.x) p2_empty[XDIR] = 1;
580  }
581 
582  if(load_z > 0 && patch_p.origin.z != patch_p.corner.z) { //Split patches in Z dimension
583  //Also split patches in Z dimension
584 
585  // z axis
586  currentload = 0.0;
587  k = patch_p.origin.z;
588  while(currentload < load_z && k <= patch_p.corner.z){
589  prevload = currentload;
590  for(i=patch_p.origin.x; i<=patch_p.corner.x; i++)
591  for(j=patch_p.origin.y; j<=patch_p.corner.y; j++)
592  currentload += patchload[patchMap->pid(i,j,k)].total;
593 
594  if (currentload > load_z)
595  if ( prev_better(prevload,currentload,load_z) )
596  {currentload=prevload; break;}
597  k++;
598  }
599 
600  posi[ZDIR] = i; posj[ZDIR] = j; posk[ZDIR] = k;
601  loadarray[ZDIR] = currentload;
602  if (k == patch_p.origin.z) p1_empty[ZDIR] = 1;
603  if (k > patch_p.corner.z) p2_empty[ZDIR] = 1;
604  }
605 
606  if(load_y > 0 && patch_p.origin.y != patch_p.corner.y) { //Y dimension
607 
608  // y axis
609  currentload = 0.0;
610  j = patch_p.origin.y;
611  while(currentload < load_y && j <= patch_p.corner.y) {
612  prevload = currentload;
613  for(i=patch_p.origin.x; i<=patch_p.corner.x; i++)
614  for(k=patch_p.origin.z; k<=patch_p.corner.z; k++)
615  currentload += patchload[patchMap->pid(i,j,k)].total;
616 
617  if (currentload > load_y)
618  if ( prev_better(prevload,currentload,load_y) )
619  {currentload=prevload; break;}
620  j++;
621  }
622  posi[YDIR] = i; posj[YDIR] = j; posk[YDIR] = k;
623  loadarray[YDIR] = currentload;
624  if (j == patch_p.origin.y) p1_empty[YDIR] = 1;
625  if (j > patch_p.corner.y) p2_empty[YDIR] = 1;
626  }
627 
628  //Need to choose the best division. Need to be careful because the
629  //processor grid may not be partitionable in all dimensions
630 
631  mindiff = 10.0 * patch_p.load;
632  mindir = -1;
633  for(i=ZDIR; i >= XDIR; i--) {
634 
635  double load1 = 0;
636  if(i == XDIR)
637  load1 = load_x;
638  else if(i == YDIR)
639  load1 = load_y;
640  else if(i == ZDIR)
641  load1= load_z;
642 
643  diff = load1 - loadarray[i];
644  if (diff < 0.0) diff = -diff;
645  if (mindiff > diff) {mindiff = diff; mindir = i;}
646  }
647 
648  int pdir = 0;
649 
650  //Give up on loads and divide it some way. Later change to the
651  //max of the differences along processors and patches
652  if(mindiff >= patch_p.load) {
653  if(patch_p.origin.x != patch_p.corner.x) {
654  mindir = XDIR;
655  posi[XDIR] = (patch_p.origin.x + patch_p.corner.x + 1)/2;
656  }
657  else if(patch_p.origin.y != patch_p.corner.y) {
658  mindir = YDIR;
659  posj[YDIR] = (patch_p.origin.y + patch_p.corner.y + 1)/2;
660  }
661  else {
662  mindir = ZDIR;
663  posk[ZDIR] = (patch_p.origin.z + patch_p.corner.z + 1)/2;
664  }
665 
666  if(load_x > 0)
667  pdir = XDIR;
668  else if(load_y > 0)
669  pdir = YDIR;
670  else
671  pdir = ZDIR;
672 
673  loadarray[mindir] = 0.5 * patch_p.load;
674  loadarray[pdir] = 0.5 * patch_p.load;
675  }
676  else {
677  pdir = mindir;
678  }
679 
680  int n1 = 0, n2 = 0;
681 
682  // divide processor along pdir
683  switch (pdir) {
684  case XDIR:
685  n1 = loadarray[XDIR] * (proc_p.corner.x - proc_p.origin.x) / patch_p.load;
686  n2 = proc_p.corner.x - proc_p.origin.x - n1;
687 
688  proc_p1.corner.x = proc_p.origin.x + n1;
689  proc_p2.origin.x = proc_p.origin.x + n1 + 1;
690 
691  break;
692  case YDIR:
693 
694  n1 = loadarray[YDIR] * (proc_p.corner.y - proc_p.origin.y) / patch_p.load;
695  n2 = proc_p.corner.y - proc_p.origin.y - n1;
696 
697  proc_p1.corner.y = proc_p.origin.y + n1;
698  proc_p2.origin.y = proc_p.origin.y + n1 + 1;
699  break;
700 
701  case ZDIR:
702  n1 = loadarray[ZDIR] * (proc_p.corner.z - proc_p.origin.z) / patch_p.load;
703  n2 = proc_p.corner.z - proc_p.origin.z - n1;
704 
705  proc_p1.corner.z = proc_p.origin.z + n1;
706  proc_p2.origin.z = proc_p.origin.z + n1 + 1;
707 
708  break;
709  default: NAMD_bug("RecBisection failing horribly!");
710  }
711 
712  // divide PATCH along mindir
713  switch (mindir) {
714  case XDIR:
715  patch_p1.corner.x = posi[XDIR] - 1;
716  patch_p2.origin.x = posi[XDIR];
717 
718  break;
719  case YDIR:
720  patch_p1.corner.y = posj[YDIR] - 1;
721  patch_p2.origin.y = posj[YDIR];
722  break;
723 
724  case ZDIR:
725  patch_p1.corner.z = posk[ZDIR] - 1;
726  patch_p2.origin.z = posk[ZDIR];
727  break;
728 
729  default: NAMD_bug("RecBisection failing horribly!");
730  }
731 
732  patch_p1.load = loadarray[mindir];
733  patch_p2.load = patch_p.load - patch_p1.load;
734 
735  /*
736  CkPrintf("Calling Topo Rec Divide along %d direction with pe grid = (%d,%d,%d) to (%d,%d,%d) and patch grid = (%d,%d,%d) to (%d,%d,%d)\n", mindir, proc_p1.origin.x, proc_p1.origin.y,
737  proc_p1.origin.z, proc_p1.corner.x, proc_p1.corner.y, proc_p1.corner.z,
738  patch_p1.origin.x, patch_p1.origin.y, patch_p1.origin.z, patch_p1.corner.x,
739  patch_p1.corner.y, patch_p1.corner.z);
740 
741  CkPrintf("Calling Topo Rec Divide along %d direction with pe grid = (%d,%d,%d) to (%d,%d,%d) and patch grid = (%d,%d,%d) to (%d,%d,%d)\n\n\n", mindir, proc_p2.origin.x, proc_p2.origin.y,
742  proc_p2.origin.z, proc_p2.corner.x, proc_p2.corner.y, proc_p2.corner.z,
743  patch_p2.origin.x, patch_p2.origin.y, patch_p2.origin.z, patch_p2.corner.x,
744  patch_p2.corner.y, patch_p2.corner.z);
745  */
746 
747  int rc = 0;
748 
749  if (!p1_empty[mindir])
750  rc = topogrid_rec_divide(proc_p1, patch_p1);
751  else
752  return -1;
753 
754  if (rc < 0)
755  return rc;
756 
757  if (!p2_empty[mindir])
758  rc = topogrid_rec_divide(proc_p2, patch_p2);
759  else
760  return -1;
761 
762  if (rc < 0)
763  return rc;
764 
765  return 1;
766 }
767 
768 
769 //Assign all patches to their corresponding processors
770 void RecBisection::assignPatchesToProcGrid(int *dest_arr, int X, int Y, int Z,
771  DimensionMap dm)
772 {
773  int pix;
774  Partition p;
775  TopoManager tmgr;
776 
777  srand(CkNumPes() * 1000);
778 
779  int coord[3];
780 
781  //Naive scheme where I just choose the origin of the proc grie
782  for(pix=0; pix<npartition; pix++) {
783  p = partitions[pix];
784  //Get the actual origin of that processor prism and assign it
785  //to this patch
786 
787  int xdiff, ydiff, zdiff;
788 
789  xdiff = p.corner.x - p.origin.x + 1;
790  ydiff = p.corner.y - p.origin.y + 1;
791  zdiff = p.corner.z - p.origin.z + 1;
792 
793  coord[0] = p.origin.x + rand() % xdiff;
794  coord[1] = p.origin.y + rand() % ydiff;
795  coord[2] = p.origin.z + rand() % zdiff;
796 
797  int pe = tmgr.coordinatesToRank(coord[dm.x], coord[dm.y], coord[dm.z], 0);
798  patchMap->assignNode(pix, pe);
799  dest_arr[pix] = pe;
800  }
801 
802  int npatches = patchMap->numPatches();
803  for (int pid=0; pid<npatches; ++pid )
804  patchMap->assignBaseNode(pid);
805 }
806 
807 #endif //USE_TOPOMAP
int gridsize_c(void) const
Definition: PatchMap.h:66
int getNumFixedAtoms()
Definition: Patch.h:112
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
#define X
Definition: msm_defn.h:29
if(ComputeNonbondedUtil::goMethod==2)
#define iout
Definition: InfoStream.h:51
Patch * patch(PatchID pid)
Definition: PatchMap.h:235
void assignBaseNode(PatchID, NodeID)
Definition: PatchMap.C:472
int gridsize_a(void) const
Definition: PatchMap.h:64
void NAMD_bug(const char *err_msg)
Definition: common.C:129
int oneAwayNeighbors(int pid, PatchID *neighbor_ids=0)
Definition: PatchMap.C:532
#define Z
Definition: msm_defn.h:33
int PatchID
Definition: NamdTypes.h:182
void NAMD_die(const char *err_msg)
Definition: common.C:85
int numPatches(void) const
Definition: PatchMap.h:59
int pid(int aIndex, int bIndex, int cIndex)
Definition: PatchMap.inl:27
int getNumAtoms()
Definition: Patch.h:105
void assignNode(PatchID, NodeID)
Definition: PatchMap.C:465
int partition(int *)
Definition: RecBisection.C:293
RecBisection(int, PatchMap *)
Definition: RecBisection.C:18
#define Y
Definition: msm_defn.h:31
int gridsize_b(void) const
Definition: PatchMap.h:65