| version 1.109 | version 1.110 |
|---|
| |
| yPencil = CProxy_PmeYPencil::ckNew(); // (xBlocks,1,zBlocks); | yPencil = CProxy_PmeYPencil::ckNew(); // (xBlocks,1,zBlocks); |
| xPencil = CProxy_PmeXPencil::ckNew(); // (1,yBlocks,zBlocks); | xPencil = CProxy_PmeXPencil::ckNew(); // (1,yBlocks,zBlocks); |
| | |
| #if 1 | // decide which pes to use by bit reversal and patch use |
| | int i; |
| PatchMap *pmap = PatchMap::Object(); | int ncpus = CkNumPes(); |
| int npatches = pmap->numHomePatches(); | |
| | |
| int *pmemap = new int [npe]; | // find next highest power of two |
| memset (pmemap, 0, sizeof (int) * npe); | int npow2 = 1; int nbits = 0; |
| | while ( npow2 < ncpus ) { npow2 *= 2; nbits += 1; } |
| | |
| //Use max of x*y, y*z, z*x | // build bit reversal sequence |
| int n_pme_pes = xBlocks * yBlocks; | SortableResizeArray<int> patches, nopatches, pmeprocs; |
| if ( n_pme_pes < xBlocks * zBlocks ) n_pme_pes = xBlocks * zBlocks; | PatchMap *pmap = PatchMap::Object(); |
| if ( n_pme_pes < yBlocks * zBlocks ) n_pme_pes = yBlocks * zBlocks; | i = 0; |
| int n_avail_pes = 0; | for ( int icpu=0; icpu<ncpus; ++icpu ) { |
| | int ri; |
| //Grab all processors where we can store pme chares | for ( ri = ncpus; ri >= ncpus; ++i ) { |
| if (npe > npatches + 2 * n_pme_pes) { | ri = 0; |
| //Use non patch processors to assign pme chares as we have | int pow2 = 1; |
| //many processors, check base nodes later | int rpow2 = npow2 / 2; |
| for (int count = 0; count < npe; count++) | for ( int j=0; j<nbits; ++j ) { |
| if(pmap->numPatchesOnNode(basepe + count) == 0) | ri += rpow2 * ( ( i / pow2 ) % 2 ); |
| pmemap[n_avail_pes++] = basepe + count; | pow2 *= 2; rpow2 /= 2; |
| } | } |
| else { //Use all processors to assign pme chares | } |
| for (int count = 0; count < npe; count++) | // seq[icpu] = ri; |
| pmemap [n_avail_pes ++] = basepe + count; | if ( ri ) { // keep 0 for special case |
| | if ( pmap->numPatchesOnNode(ri) ) patches.add(ri); |
| | else nopatches.add(ri); |
| | } |
| } | } |
| | |
| double pe = 0.0; | // only use zero if it eliminates overloading or has patches |
| double stride = 1.0; | int useZero = 0; |
| | int npens = xBlocks*yBlocks; |
| | if ( npens % ncpus == 0 ) useZero = 1; |
| | if ( npens == nopatches.size() + 1 ) useZero = 1; |
| | npens += xBlocks*zBlocks; |
| | if ( npens % ncpus == 0 ) useZero = 1; |
| | if ( npens == nopatches.size() + 1 ) useZero = 1; |
| | npens += yBlocks*zBlocks; |
| | if ( npens % ncpus == 0 ) useZero = 1; |
| | if ( npens == nopatches.size() + 1 ) useZero = 1; |
| | |
| | // add nopatches then patches in reversed order |
| | for ( i=nopatches.size()-1; i>=0; --i ) pmeprocs.add(nopatches[i]); |
| | if ( useZero && ! pmap->numPatchesOnNode(0) ) pmeprocs.add(0); |
| | for ( i=patches.size()-1; i>=0; --i ) pmeprocs.add(patches[i]); |
| | if ( pmap->numPatchesOnNode(0) ) pmeprocs.add(0); |
| | |
| int x,y,z; | int pe = 0; |
| stride = (1.0 * n_avail_pes) / (n_pme_pes); | int npes = pmeprocs.size(); |
| | SortableResizeArray<int> zprocs(xBlocks*yBlocks); |
| | for ( i=0; i<xBlocks*yBlocks; ++i, ++pe ) zprocs[i] = pmeprocs[pe%npes]; |
| | zprocs.sort(); |
| | SortableResizeArray<int> yprocs(xBlocks*zBlocks); |
| | for ( i=0; i<xBlocks*zBlocks; ++i, ++pe ) yprocs[i] = pmeprocs[pe%npes]; |
| | yprocs.sort(); |
| | SortableResizeArray<int> xprocs(yBlocks*zBlocks); |
| | for ( i=0; i<yBlocks*zBlocks; ++i, ++pe ) xprocs[i] = pmeprocs[pe%npes]; |
| | xprocs.sort(); |
| | |
| pencilPMEProcessors = new char [CkNumPes()]; | pencilPMEProcessors = new char [CkNumPes()]; |
| memset (pencilPMEProcessors, 0, sizeof(char) * CkNumPes()); | memset (pencilPMEProcessors, 0, sizeof(char) * CkNumPes()); |
| | |
| for (pe=0.0, x = 0; x < xBlocks; x ++) | int x,y,z; |
| for (y = 0; y < yBlocks; y ++) { | |
| if (pe >= n_avail_pes) pe = 0.0; | |
| zPencil(x,y,0).insert (pmemap[(int) pe]); | |
| pencilPMEProcessors [pmemap[(int) pe]] = 1; | |
| pe += stride; | |
| } | |
| zPencil.doneInserting(); | |
| | |
| for (pe=1.0, z = 0; z < zBlocks; z ++) | iout << iINFO << "PME Z PENCIL LOCATIONS:"; |
| for (x = 0; x < xBlocks; x ++) { | for ( i=0; i<zprocs.size() && i<10; ++i ) { |
| if (pe >= n_avail_pes) pe = 1.0; | iout << " " << zprocs[i]; |
| yPencil(x,0,z).insert (pmemap[(int) pe]); | |
| pencilPMEProcessors [pmemap[(int) pe]] = 1; | |
| pe += stride; | |
| } | } |
| yPencil.doneInserting(); | if ( i < zprocs.size() ) iout << " ..."; |
| | iout << "\n" << endi; |
| | |
| for (pe=0.0, y = 0; y < yBlocks; y ++) | for (pe=0, x = 0; x < xBlocks; ++x) |
| for (z = 0; z < zBlocks; z ++) { | for (y = 0; y < yBlocks; ++y, ++pe ) { |
| if (pe >= n_avail_pes) pe = 0.0; | zPencil(x,y,0).insert(zprocs[pe]); |
| xPencil(0,y,z).insert (pmemap[(int) pe]); | pencilPMEProcessors[zprocs[pe]] = 1; |
| pencilPMEProcessors [pmemap[(int) pe]] = 1; | |
| pe += stride; | |
| } | } |
| xPencil.doneInserting(); | |
| | |
| delete [] pmemap; | |
| | |
| #else | |
| int pe = 0; | |
| | |
| for ( int i=0; i<xBlocks; ++i ) | |
| for ( int j=0; j<yBlocks; ++j ) | |
| zPencil(i,j,0).insert(basepe + pe++ % npe); | |
| zPencil.doneInserting(); | zPencil.doneInserting(); |
| | |
| for ( int i=0; i<xBlocks; ++i ) | iout << iINFO << "PME Y PENCIL LOCATIONS:"; |
| for ( int k=0; k<zBlocks; ++k ) | for ( i=0; i<yprocs.size() && i<10; ++i ) { |
| yPencil(i,0,k).insert(basepe + pe++ % npe); | iout << " " << yprocs[i]; |
| | } |
| | if ( i < yprocs.size() ) iout << " ..."; |
| | iout << "\n" << endi; |
| | |
| | for (pe=0, z = 0; z < zBlocks; ++z ) |
| | for (x = 0; x < xBlocks; ++x, ++pe ) { |
| | yPencil(x,0,z).insert(yprocs[pe]); |
| | pencilPMEProcessors[yprocs[pe]] = 1; |
| | } |
| yPencil.doneInserting(); | yPencil.doneInserting(); |
| | |
| for ( int j=0; j<yBlocks; ++j ) | iout << iINFO << "PME X PENCIL LOCATIONS:"; |
| for ( int k=0; k<zBlocks; ++k ) | for ( i=0; i<xprocs.size() && i<10; ++i ) { |
| xPencil(0,j,k).insert(basepe + pe++ % npe); | iout << " " << xprocs[i]; |
| xPencil.doneInserting(); | } |
| | if ( i < xprocs.size() ) iout << " ..."; |
| | iout << "\n" << endi; |
| | |
| #endif | for (pe=0, y = 0; y < yBlocks; ++y ) |
| | for (z = 0; z < zBlocks; ++z, ++pe ) { |
| | xPencil(0,y,z).insert(xprocs[pe]); |
| | pencilPMEProcessors[xprocs[pe]] = 1; |
| | } |
| | xPencil.doneInserting(); |
| | |
| pmeProxy.recvArrays(xPencil,yPencil,zPencil); | pmeProxy.recvArrays(xPencil,yPencil,zPencil); |
| PmePencilInitMsgData msgdata; | PmePencilInitMsgData msgdata; |