fftmap.h File Reference

#include <charm++.h>
#include <PatchMap.h>
#include <fftlib.h>

Go to the source code of this file.

Classes

struct  PmeFFTInfo
class  OptPmePencilMapX
class  OptPmePencilMapY
class  OptPmePencilMapZ

Functions

static void initializePmeMap (PmeFFTInfo _info, SortableResizeArray< int > &xprocs, SortableResizeArray< int > &yprocs, SortableResizeArray< int > &zprocs)

Variables

CProxy_OptPmePencilMapZ global_map_z
CProxy_OptPmePencilMapY global_map_y
CProxy_OptPmePencilMapX global_map_x


Function Documentation

static void initializePmeMap ( PmeFFTInfo  _info,
SortableResizeArray< int > &  xprocs,
SortableResizeArray< int > &  yprocs,
SortableResizeArray< int > &  zprocs 
) [inline, static]

Definition at line 19 of file fftmap.h.

References ResizeArray< Elem >::add(), PatchMap::basenode(), j, PatchMap::numPatches(), PatchMap::numPatchesOnNode(), PatchMap::Object(), ResizeArray< Elem >::resize(), ResizeArray< Elem >::size(), SortableResizeArray< Elem >::sort(), PmeFFTInfo::xBlocks, PmeFFTInfo::yBlocks, and PmeFFTInfo::zBlocks.

Referenced by OptPmePencilMapZ::initialize(), OptPmePencilMapY::initialize(), and OptPmePencilMapX::initialize().

00022                                                                            {
00023     
00024   // decide which pes to use by bit reversal and patch use
00025   int i;
00026   int ncpus = CkNumPes();
00027 
00028   int *basenodes = new int [ncpus];
00029   memset (basenodes, 0, sizeof(int) * ncpus);
00030   PatchMap *pmap = PatchMap::Object();
00031   for (int p = 0; p < pmap->numPatches(); p++)
00032     basenodes[pmap->basenode(p)] = 1;
00033   
00034   // find next highest power of two
00035   int npow2 = 1;  int nbits = 0;
00036   while ( npow2 < ncpus ) { npow2 *= 2; nbits += 1; }
00037   
00038   // build bit reversal sequence
00039   SortableResizeArray<int> patches, nopatches, pmeprocs, baseprocs;
00040   i = 0;
00041   for ( int icpu=0; icpu<ncpus; ++icpu ) {
00042     int ri;
00043     for ( ri = ncpus; ri >= ncpus; ++i ) {
00044       ri = 0;
00045       int pow2 = 1;
00046       int rpow2 = npow2 / 2;
00047       for ( int j=0; j<nbits; ++j ) {
00048         ri += rpow2 * ( ( i / pow2 ) % 2 );
00049         pow2 *= 2;  rpow2 /= 2;
00050       }
00051     }
00052     // seq[icpu] = ri;
00053     if ( ri ) { // keep 0 for special case
00054       if ( pmap->numPatchesOnNode(ri) ) 
00055         patches.add(ri);
00056       else if (basenodes[ri]) 
00057         baseprocs.add(ri);
00058       else nopatches.add(ri);
00059     }
00060   }   
00061 
00062   delete [] basenodes;
00063   
00064   // only use zero if it eliminates overloading or has patches
00065   int useZero = 0;
00066   int npens = _info.xBlocks*_info.yBlocks;
00067   if ( npens % ncpus == 0 ) useZero = 1;
00068   if ( npens == nopatches.size() + 1 ) useZero = 1;
00069   npens += _info.xBlocks*_info.zBlocks;
00070   if ( npens % ncpus == 0 ) useZero = 1;
00071   if ( npens == nopatches.size() + 1 ) useZero = 1;
00072   npens += _info.yBlocks*_info.zBlocks;
00073   if ( npens % ncpus == 0 ) useZero = 1;
00074   if ( npens == nopatches.size() + 1 ) useZero = 1;
00075   
00076   // add nopatches then patches in reversed order
00077   for ( i=nopatches.size()-1; i>=0; --i ) pmeprocs.add(nopatches[i]);
00078   for ( i=baseprocs.size()-1; i>=0; --i ) pmeprocs.add(baseprocs[i]);
00079 
00080   if ( useZero && ! pmap->numPatchesOnNode(0) ) pmeprocs.add(0);
00081   for ( i=patches.size()-1; i>=0; --i ) pmeprocs.add(patches[i]);
00082   if ( pmap->numPatchesOnNode(0) ) pmeprocs.add(0);
00083 
00084   int pe = 0;
00085   int npes = pmeprocs.size();
00086   int nxzpes = _info.xBlocks * _info.yBlocks;
00087   if (nxzpes < _info.yBlocks*_info.zBlocks)
00088     nxzpes = _info.yBlocks*_info.zBlocks;
00089   
00090   zprocs.resize (_info.xBlocks * _info.yBlocks);
00091   for ( i=0; i<_info.xBlocks * _info.yBlocks; ++i, ++pe ) zprocs[i] = pmeprocs[pe%npes];
00092   zprocs.sort();
00093 
00094   pe = nxzpes; 
00095   yprocs.resize(_info.xBlocks*_info.zBlocks);
00096   for ( i=0; i<_info.xBlocks*_info.zBlocks; ++i, ++pe ) yprocs[i] = pmeprocs[pe%npes];
00097   yprocs.sort();
00098   
00099   xprocs.resize(_info.yBlocks*_info.zBlocks);
00100   //for ( i=0; i<_info.yBlocks*_info.zBlocks; ++i, ++pe ) xprocs[i] = pmeprocs[pe%npes];
00101   for ( i=0, pe=0; i<_info.yBlocks*_info.zBlocks; ++i, ++pe ) xprocs[i] = pmeprocs[pe%npes];
00102   xprocs.sort();
00103 }


Variable Documentation

CProxy_OptPmePencilMapX global_map_x

Definition at line 11 of file fftmap.h.

Referenced by OptPmePencilMapX::OptPmePencilMapX().

CProxy_OptPmePencilMapY global_map_y

Definition at line 10 of file fftmap.h.

Referenced by OptPmeXPencil::initialize_manytomany(), and OptPmePencilMapY::OptPmePencilMapY().

CProxy_OptPmePencilMapZ global_map_z

Definition at line 9 of file fftmap.h.

Referenced by OptPmePencilMapZ::OptPmePencilMapZ().


Generated on Tue Nov 21 01:17:16 2017 for NAMD by  doxygen 1.4.7