20 patchMap = thePatchMap;
21 npartition = numpartitions;
23 partitions =
new Partition[npartition];
24 patchload =
new PatchLoad[numPatches];
27 if ( partitions == NULL ||
30 NAMD_die(
"memory allocation failed in RecBisection::RecBisection");
42 c_icompute1 = 0.000035;
85 void RecBisection::rec_divide(
int n,
const Partition &p)
88 int posi[3],posj[3],posk[3];
91 int p1_empty[3], p2_empty[3];
93 float prevload,currentload;
106 partitions[currentp++] = p;
116 load1 = ( (float) n1/(
float) n ) * p.load;
118 for(i=XDIR; i<=ZDIR; i++) {p1_empty[i] = p2_empty[i] = 0;}
128 while(currentload < load1 && i<= p.corner.x)
130 prevload = currentload;
131 for(j=p.origin.y; j<=p.corner.y; j++)
132 for(k=p.origin.z; k<=p.corner.z; k++)
133 currentload += patchload[patchMap->
pid(i,j,k)].total;
134 if (currentload > load1)
135 if ( prev_better(prevload,currentload,load1) )
136 {currentload=prevload;
break;}
139 posi[XDIR] = i; posj[XDIR] = j; posk[XDIR] = k;
140 loadarray[XDIR] = currentload;
141 if (i == p.origin.x) p1_empty[XDIR] = 1;
142 if (i > p.corner.x) p2_empty[XDIR] = 1;
148 while(currentload < load1 && k <= p.corner.z)
150 prevload = currentload;
151 for(i=p.origin.x; i<=p.corner.x; i++)
152 for(j=p.origin.y; j<=p.corner.y; j++)
153 currentload += patchload[patchMap->
pid(i,j,k)].total;
154 if (currentload > load1)
155 if ( prev_better(prevload,currentload,load1) )
156 {currentload=prevload;
break;}
160 posi[ZDIR] = i; posj[ZDIR] = j; posk[ZDIR] = k;
161 loadarray[ZDIR] = currentload;
162 if (k == p.origin.z) p1_empty[ZDIR] = 1;
163 if (k > p.corner.z) p2_empty[ZDIR] = 1;
169 while(currentload < load1 && j <= p.corner.y)
171 prevload = currentload;
172 for(i=p.origin.x; i<=p.corner.x; i++)
173 for(k=p.origin.z; k<=p.corner.z; k++)
174 currentload += patchload[patchMap->
pid(i,j,k)].total;
175 if (currentload > load1)
176 if ( prev_better(prevload,currentload,load1) )
177 {currentload=prevload;
break;}
180 posi[YDIR] = i; posj[YDIR] = j; posk[YDIR] = k;
181 loadarray[YDIR] = currentload;
182 if (j == p.origin.y) p1_empty[YDIR] = 1;
183 if (j > p.corner.y) p2_empty[YDIR] = 1;
188 for(i=XDIR; i<=ZDIR; i++) {
189 diff = load1 - loadarray[i];
190 if (diff < 0.0) diff = -diff;
191 if (mindiff >= diff) {mindiff = diff; mindir = i;}
195 int lx = p.corner.x - p.origin.x + 1;
196 if ( n >= 2*lx || lx >= 2*n || n > 10 || lx > 10 ) {
197 int n2x = n * (p.load - loadarray[XDIR]) / p.load + 0.5;
198 if ( lx > 1 && n2x > 0 && n2x < n ) mindir = XDIR;
202 n2 = n * (p.load - loadarray[mindir]) / p.load + 0.5;
203 if ( n2 < 1 ) n2 = 1;
204 if ( n2 >= n ) n2 = n-1;
209 case XDIR: p1.corner.x = posi[XDIR] - 1;
210 p2.origin.x = posi[XDIR];
212 case YDIR: p1.corner.y = posj[YDIR] - 1;
213 p2.origin.y = posj[YDIR];
215 case ZDIR: p1.corner.z = posk[ZDIR] - 1;
216 p2.origin.z = posk[ZDIR];
218 default:
NAMD_bug(
"RecBisection failing horribly!");
220 p1.load = loadarray[mindir];
221 p2.load = p.load - p1.load;
222 if (!p1_empty[mindir]) rec_divide(n1,p1);
223 if (!p2_empty[mindir]) rec_divide(n2,p2);
237 void RecBisection::compute_patch_load()
240 int numAtoms, numFixed, numNeighAtoms, numNeighFixed;
241 float total_icompute;
243 for(i=0; i<numPatches; i++) {
244 #ifdef MEM_OPT_VERSION
245 numAtoms = patchMap->numAtoms(i);
246 numFixed = patchMap->numFixedAtoms(i);
251 patchload[i].total = 0.0;
252 patchload[i].edge = 0.0;
254 patchload[i].local = c_local0 + c_local1 * numAtoms;
255 patchload[i].local += c_icompute0 +
256 c_icompute1*(numAtoms*numAtoms-numFixed*numFixed);
259 total_icompute = 0.0;
264 for(nix=0; nix<nNeighbors; nix++) {
265 neighbour = neighbors[nix];
266 #ifdef MEM_OPT_VERSION
267 numNeighAtoms = patchMap->numAtoms(neighbour);
268 numNeighFixed = patchMap->numFixedAtoms(neighbour);
273 patchload[i].icompute[nix] =
275 c_icompute1*(numNeighAtoms*numAtoms-numNeighFixed*numFixed);
276 total_icompute += patchload[i].icompute[nix];
277 patchload[i].edge += c_edge0 + c_edge1 * numNeighAtoms;
279 patchload[i].total+=patchload[i].local+total_icompute+patchload[i].edge;
297 top_partition.origin.x = 0;
298 top_partition.origin.y = 0;
299 top_partition.origin.z = 0;
300 top_partition.corner.x = patchMap->
gridsize_a()-1;
301 top_partition.corner.y = patchMap->
gridsize_b()-1;
302 top_partition.corner.z = patchMap->
gridsize_c()-1;
303 top_partition.load = 0.0;
306 compute_patch_load();
308 for(i=0; i<numPatches; i++) top_partition.load += patchload[i].total;
311 rec_divide(npartition,top_partition);
313 if (currentp != npartition)
319 assign_nodes_arr(dest_arr);
331 void RecBisection::assignNodes()
336 for(pix=0; pix<npartition; pix++)
339 for(i=p.origin.x; i<=p.corner.x; i++)
340 for(j=p.origin.y; j<=p.corner.y; j++)
341 for(k=p.origin.z; k<=p.corner.z; k++)
346 for (
int pid=0; pid<npatches; ++pid )
356 void RecBisection::assign_nodes_arr(
int *dest_arr)
361 for(pix=0; pix<npartition; pix++)
364 for(i=p.origin.x; i<=p.corner.x; i++)
365 for(j=p.origin.y; j<=p.corner.y; j++)
366 for(k=p.origin.z; k<=p.corner.z; k++) {
367 dest_arr[patchMap->
pid(i,j,k)] = pix;
377 void RecBisection::refine_edges()
387 void RecBisection::refine_boundaries()
396 void RecBisection::refine_surface()
405 int RecBisection::prev_better(
float prev,
float current,
float load1)
409 diff1 = load1 - prev;
410 diff2 = current - load1;
412 if (diff1 < 0.0) diff1 = -diff1;
413 if (diff2 < 0.0) diff2 = -diff2;
415 return (diff1 <= diff2);
426 int RecBisection::partitionProcGrid(
int X,
int Y,
int Z,
int *dest_arr) {
429 top_partition.origin.x = 0;
430 top_partition.origin.y = 0;
431 top_partition.origin.z = 0;
432 top_partition.corner.x = patchMap->
gridsize_a()-1;
433 top_partition.corner.y = patchMap->
gridsize_b()-1;
434 top_partition.corner.z = patchMap->
gridsize_c()-1;
435 top_partition.load = 0.0;
438 compute_patch_load();
440 for(i=0; i<numPatches; i++) top_partition.load += patchload[i].total;
442 int new_X = X, new_Y = Y, new_Z = Z;
445 dm.x = 0; dm.y = 1; dm.z = 2;
447 findOptimalDimensions(X, Y, Z, new_X, new_Y, new_Z, dm);
452 proc_p.corner.x = new_X - 1;
455 proc_p.corner.y = new_Y - 1;
458 proc_p.corner.z = new_Z - 1;
460 iout <<
"\nLDB: Partitioning Proc Grid of size " <<
"[" << X <<
"] [" << Y <<
"] [" << Z <<
"]\n" <<
endi;
465 int rc = topogrid_rec_divide(proc_p, top_partition);
470 assignPatchesToProcGrid(dest_arr, X, Y, Z, dm);
479 int RecBisection::topogrid_rec_divide(Partition &proc_p, Partition &patch_p) {
481 Partition proc_p1, proc_p2, patch_p1, patch_p2;
483 int posi[3],posj[3],posk[3];
485 int p1_empty[3], p2_empty[3];
490 proc_p1 = proc_p2 = proc_p;
491 patch_p1 = patch_p2 = patch_p;
493 p1_empty[0] = p1_empty[1] = p1_empty[2] = 0;
494 p2_empty[0] = p2_empty[1] = p2_empty[2] = 0;
503 if(patch_p.origin.x == patch_p.corner.x &&
504 patch_p.origin.y == patch_p.corner.y &&
505 patch_p.origin.z == patch_p.corner.z) {
507 int pid = patchMap->
pid(patch_p.origin.x,
508 patch_p.origin.y, patch_p.origin.z);
511 partitions[pid] = proc_p;
521 if(proc_p.origin.x == proc_p.corner.x &&
522 proc_p.origin.y == proc_p.corner.y &&
523 proc_p.origin.z == proc_p.corner.z) {
537 double load_x, load_y, load_z;
538 load_x = (int) (proc_p.corner.x - proc_p.origin.x + 1)/2;
539 load_x /= proc_p.corner.x - proc_p.origin.x + 1;
540 load_x *= patch_p.load;
543 load_y = (int) (proc_p.corner.y - proc_p.origin.y + 1)/2;
544 load_y /= proc_p.corner.y - proc_p.origin.y + 1;
545 load_y *= patch_p.load;
547 load_z = (int) (proc_p.corner.z - proc_p.origin.z + 1)/2;
548 load_z /= proc_p.corner.z - proc_p.origin.z + 1;
549 load_z *= patch_p.load;
551 loadarray[XDIR] = loadarray[YDIR] = loadarray[ZDIR] = 10.0 * patch_p.load;
553 double currentload = 0;
555 if(load_x > 0 && patch_p.origin.x != patch_p.corner.x) {
560 i = patch_p.origin.x;
562 while(currentload < load_x && i <= patch_p.corner.x) {
563 prevload = currentload;
564 for(j=patch_p.origin.y; j<=patch_p.corner.y; j++)
565 for(k=patch_p.origin.z; k<=patch_p.corner.z; k++)
566 currentload += patchload[patchMap->
pid(i,j,k)].total;
568 if (currentload > load_x)
569 if ( prev_better(prevload,currentload,load_x) ) {
570 currentload=prevload;
576 posi[XDIR] = i; posj[XDIR] = j; posk[XDIR] = k;
577 loadarray[XDIR] = currentload;
578 if (i == patch_p.origin.x) p1_empty[XDIR] = 1;
579 if (i > patch_p.corner.x) p2_empty[XDIR] = 1;
582 if(load_z > 0 && patch_p.origin.z != patch_p.corner.z) {
587 k = patch_p.origin.z;
588 while(currentload < load_z && k <= patch_p.corner.z){
589 prevload = currentload;
590 for(i=patch_p.origin.x; i<=patch_p.corner.x; i++)
591 for(j=patch_p.origin.y; j<=patch_p.corner.y; j++)
592 currentload += patchload[patchMap->
pid(i,j,k)].total;
594 if (currentload > load_z)
595 if ( prev_better(prevload,currentload,load_z) )
596 {currentload=prevload;
break;}
600 posi[ZDIR] = i; posj[ZDIR] = j; posk[ZDIR] = k;
601 loadarray[ZDIR] = currentload;
602 if (k == patch_p.origin.z) p1_empty[ZDIR] = 1;
603 if (k > patch_p.corner.z) p2_empty[ZDIR] = 1;
606 if(load_y > 0 && patch_p.origin.y != patch_p.corner.y) {
610 j = patch_p.origin.y;
611 while(currentload < load_y && j <= patch_p.corner.y) {
612 prevload = currentload;
613 for(i=patch_p.origin.x; i<=patch_p.corner.x; i++)
614 for(k=patch_p.origin.z; k<=patch_p.corner.z; k++)
615 currentload += patchload[patchMap->
pid(i,j,k)].total;
617 if (currentload > load_y)
618 if ( prev_better(prevload,currentload,load_y) )
619 {currentload=prevload;
break;}
622 posi[YDIR] = i; posj[YDIR] = j; posk[YDIR] = k;
623 loadarray[YDIR] = currentload;
624 if (j == patch_p.origin.y) p1_empty[YDIR] = 1;
625 if (j > patch_p.corner.y) p2_empty[YDIR] = 1;
631 mindiff = 10.0 * patch_p.load;
633 for(i=ZDIR; i >= XDIR; i--) {
643 diff = load1 - loadarray[i];
644 if (diff < 0.0) diff = -diff;
645 if (mindiff > diff) {mindiff = diff; mindir = i;}
652 if(mindiff >= patch_p.load) {
653 if(patch_p.origin.x != patch_p.corner.x) {
655 posi[XDIR] = (patch_p.origin.x + patch_p.corner.x + 1)/2;
657 else if(patch_p.origin.y != patch_p.corner.y) {
659 posj[YDIR] = (patch_p.origin.y + patch_p.corner.y + 1)/2;
663 posk[ZDIR] = (patch_p.origin.z + patch_p.corner.z + 1)/2;
673 loadarray[mindir] = 0.5 * patch_p.load;
674 loadarray[pdir] = 0.5 * patch_p.load;
685 n1 = loadarray[XDIR] * (proc_p.corner.x - proc_p.origin.x) / patch_p.load;
686 n2 = proc_p.corner.x - proc_p.origin.x - n1;
688 proc_p1.corner.x = proc_p.origin.x + n1;
689 proc_p2.origin.x = proc_p.origin.x + n1 + 1;
694 n1 = loadarray[YDIR] * (proc_p.corner.y - proc_p.origin.y) / patch_p.load;
695 n2 = proc_p.corner.y - proc_p.origin.y - n1;
697 proc_p1.corner.y = proc_p.origin.y + n1;
698 proc_p2.origin.y = proc_p.origin.y + n1 + 1;
702 n1 = loadarray[ZDIR] * (proc_p.corner.z - proc_p.origin.z) / patch_p.load;
703 n2 = proc_p.corner.z - proc_p.origin.z - n1;
705 proc_p1.corner.z = proc_p.origin.z + n1;
706 proc_p2.origin.z = proc_p.origin.z + n1 + 1;
709 default:
NAMD_bug(
"RecBisection failing horribly!");
715 patch_p1.corner.x = posi[XDIR] - 1;
716 patch_p2.origin.x = posi[XDIR];
720 patch_p1.corner.y = posj[YDIR] - 1;
721 patch_p2.origin.y = posj[YDIR];
725 patch_p1.corner.z = posk[ZDIR] - 1;
726 patch_p2.origin.z = posk[ZDIR];
729 default:
NAMD_bug(
"RecBisection failing horribly!");
732 patch_p1.load = loadarray[mindir];
733 patch_p2.load = patch_p.load - patch_p1.load;
749 if (!p1_empty[mindir])
750 rc = topogrid_rec_divide(proc_p1, patch_p1);
757 if (!p2_empty[mindir])
758 rc = topogrid_rec_divide(proc_p2, patch_p2);
770 void RecBisection::assignPatchesToProcGrid(
int *dest_arr,
int X,
int Y,
int Z,
777 srand(CkNumPes() * 1000);
782 for(pix=0; pix<npartition; pix++) {
787 int xdiff, ydiff, zdiff;
789 xdiff = p.corner.x - p.origin.x + 1;
790 ydiff = p.corner.y - p.origin.y + 1;
791 zdiff = p.corner.z - p.origin.z + 1;
793 coord[0] = p.origin.x + rand() % xdiff;
794 coord[1] = p.origin.y + rand() % ydiff;
795 coord[2] = p.origin.z + rand() % zdiff;
797 int pe = tmgr.coordinatesToRank(coord[dm.x], coord[dm.y], coord[dm.z], 0);
803 for (
int pid=0; pid<npatches; ++pid )
int gridsize_c(void) const
std::ostream & endi(std::ostream &s)
if(ComputeNonbondedUtil::goMethod==2)
Patch * patch(PatchID pid)
void assignBaseNode(PatchID, NodeID)
int gridsize_a(void) const
void NAMD_bug(const char *err_msg)
int oneAwayNeighbors(int pid, PatchID *neighbor_ids=0)
void NAMD_die(const char *err_msg)
int numPatches(void) const
int pid(int aIndex, int bIndex, int cIndex)
void assignNode(PatchID, NodeID)
RecBisection(int, PatchMap *)
int gridsize_b(void) const