fftmap.h

Go to the documentation of this file.
00001 
00002 #ifndef   __PME_FFT_MAP_H__
00003 #define   __PME_FFT_MAP_H__
00004 
00005 #include <charm++.h>
00006 #include <PatchMap.h>
00007 #include <fftlib.h>
00008 
00009 CProxy_OptPmePencilMapZ  global_map_z;
00010 CProxy_OptPmePencilMapY  global_map_y;
00011 CProxy_OptPmePencilMapX  global_map_x;
00012 
00013 struct PmeFFTInfo {
00014   int  xBlocks;   //FFT grid dimensions
00015   int  yBlocks;
00016   int  zBlocks;    
00017 };
00018 
00019 static  inline void initializePmeMap(PmeFFTInfo                    _info,
00020                                      SortableResizeArray<int>    & xprocs,
00021                                      SortableResizeArray<int>    & yprocs,
00022                                      SortableResizeArray<int>    & zprocs) {
00023     
00024   // decide which pes to use by bit reversal and patch use
00025   int i;
00026   int ncpus = CkNumPes();
00027 
00028   int *basenodes = new int [ncpus];
00029   memset (basenodes, 0, sizeof(int) * ncpus);
00030   PatchMap *pmap = PatchMap::Object();
00031   for (int p = 0; p < pmap->numPatches(); p++)
00032     basenodes[pmap->basenode(p)] = 1;
00033   
00034   // find next highest power of two
00035   int npow2 = 1;  int nbits = 0;
00036   while ( npow2 < ncpus ) { npow2 *= 2; nbits += 1; }
00037   
00038   // build bit reversal sequence
00039   SortableResizeArray<int> patches, nopatches, pmeprocs, baseprocs;
00040   i = 0;
00041   for ( int icpu=0; icpu<ncpus; ++icpu ) {
00042     int ri;
00043     for ( ri = ncpus; ri >= ncpus; ++i ) {
00044       ri = 0;
00045       int pow2 = 1;
00046       int rpow2 = npow2 / 2;
00047       for ( int j=0; j<nbits; ++j ) {
00048         ri += rpow2 * ( ( i / pow2 ) % 2 );
00049         pow2 *= 2;  rpow2 /= 2;
00050       }
00051     }
00052     // seq[icpu] = ri;
00053     if ( ri ) { // keep 0 for special case
00054       if ( pmap->numPatchesOnNode(ri) ) 
00055         patches.add(ri);
00056       else if (basenodes[ri]) 
00057         baseprocs.add(ri);
00058       else nopatches.add(ri);
00059     }
00060   }   
00061 
00062   delete [] basenodes;
00063   
00064   // only use zero if it eliminates overloading or has patches
00065   int useZero = 0;
00066   int npens = _info.xBlocks*_info.yBlocks;
00067   if ( npens % ncpus == 0 ) useZero = 1;
00068   if ( npens == nopatches.size() + 1 ) useZero = 1;
00069   npens += _info.xBlocks*_info.zBlocks;
00070   if ( npens % ncpus == 0 ) useZero = 1;
00071   if ( npens == nopatches.size() + 1 ) useZero = 1;
00072   npens += _info.yBlocks*_info.zBlocks;
00073   if ( npens % ncpus == 0 ) useZero = 1;
00074   if ( npens == nopatches.size() + 1 ) useZero = 1;
00075   
00076   // add nopatches then patches in reversed order
00077   for ( i=nopatches.size()-1; i>=0; --i ) pmeprocs.add(nopatches[i]);
00078   for ( i=baseprocs.size()-1; i>=0; --i ) pmeprocs.add(baseprocs[i]);
00079 
00080   if ( useZero && ! pmap->numPatchesOnNode(0) ) pmeprocs.add(0);
00081   for ( i=patches.size()-1; i>=0; --i ) pmeprocs.add(patches[i]);
00082   if ( pmap->numPatchesOnNode(0) ) pmeprocs.add(0);
00083 
00084   int pe = 0;
00085   int npes = pmeprocs.size();
00086   int nxzpes = _info.xBlocks * _info.yBlocks;
00087   if (nxzpes < _info.yBlocks*_info.zBlocks)
00088     nxzpes = _info.yBlocks*_info.zBlocks;
00089   
00090   zprocs.resize (_info.xBlocks * _info.yBlocks);
00091   for ( i=0; i<_info.xBlocks * _info.yBlocks; ++i, ++pe ) zprocs[i] = pmeprocs[pe%npes];
00092   zprocs.sort();
00093 
00094   pe = nxzpes; 
00095   yprocs.resize(_info.xBlocks*_info.zBlocks);
00096   for ( i=0; i<_info.xBlocks*_info.zBlocks; ++i, ++pe ) yprocs[i] = pmeprocs[pe%npes];
00097   yprocs.sort();
00098   
00099   xprocs.resize(_info.yBlocks*_info.zBlocks);
00100   //for ( i=0; i<_info.yBlocks*_info.zBlocks; ++i, ++pe ) xprocs[i] = pmeprocs[pe%npes];
00101   for ( i=0, pe=0; i<_info.yBlocks*_info.zBlocks; ++i, ++pe ) xprocs[i] = pmeprocs[pe%npes];
00102   xprocs.sort();
00103 }
00104 
00105 
00106 class OptPmePencilMapX : public CBase_OptPmePencilMapX
00107 {
00108   PmeFFTInfo   _info;
00109   int        * _mapcache;
00110   bool         _initialized;
00111 
00112  public:
00113   OptPmePencilMapX(int xblock, int yblock, int zblock) {
00114     _initialized = false;
00115     _info.xBlocks = xblock;
00116     _info.yBlocks = yblock;
00117     _info.zBlocks = zblock;    
00118     global_map_x = thisProxy;   
00119   }
00120     
00121   inline void initialize () {
00122     _initialized = true;
00123     _mapcache = (int *) malloc(_info.yBlocks * _info.zBlocks * sizeof(int));
00124     
00125     SortableResizeArray<int>    xprocs;
00126     SortableResizeArray<int>    yprocs;
00127     SortableResizeArray<int>    zprocs;
00128     
00129     initializePmeMap (_info, xprocs, yprocs, zprocs);
00130     
00131     for (int y = 0; y < _info.yBlocks; y++) {
00132       for (int z = 0; z < _info.zBlocks; z ++) {
00133         int index = z + y * _info.zBlocks;
00134         int pe = xprocs[index];
00135         _mapcache[index] = pe;
00136         
00137         if(CkMyRank() == 0) 
00138           pencilPMEProcessors[pe] = 1;
00139       }
00140     }
00141   }
00142 
00143   OptPmePencilMapX(CkMigrateMessage *m){}
00144 
00145   int procNum(int foo, const CkArrayIndex &idx) {
00146     if (!_initialized) initialize();
00147 
00148     CkArrayIndex3D idx3d = *(CkArrayIndex3D *) &idx;
00149     int index = idx3d.index[2] + idx3d.index[1] * _info.zBlocks;
00150     
00151     return _mapcache[index];
00152   }
00153 };
00154 
00155 
00156 class OptPmePencilMapY : public CBase_OptPmePencilMapY
00157 {
00158   PmeFFTInfo   _info;
00159   int        * _mapcache;
00160   bool         _initialized;
00161 
00162  public:
00163   OptPmePencilMapY(int xblock, int yblock, int zblock) {
00164     _initialized = false;
00165     _info.xBlocks = xblock;
00166     _info.yBlocks = yblock;
00167     _info.zBlocks = zblock;    
00168     global_map_y = thisProxy;
00169   }
00170 
00171   inline void initialize() {
00172     _initialized = true;
00173     _mapcache = (int *) malloc(_info.xBlocks * _info.zBlocks * sizeof(int)); 
00174     
00175     SortableResizeArray<int>    xprocs;
00176     SortableResizeArray<int>    yprocs;
00177     SortableResizeArray<int>    zprocs;
00178     
00179     initializePmeMap (_info, xprocs, yprocs, zprocs);
00180     
00181     for (int x = 0; x < _info.xBlocks; x ++) {
00182       for (int z = 0; z < _info.zBlocks; z++) {
00183         int index = z + x * _info.zBlocks;
00184         int pe = yprocs[index];
00185         _mapcache [index] = pe;
00186 
00187         if (CkMyPe() == 0) 
00188           pencilPMEProcessors[pe] = 1;
00189       }
00190     }
00191   }
00192   
00193   OptPmePencilMapY(CkMigrateMessage *m){}
00194 
00195   int procNum(int foo, const CkArrayIndex &idx) {
00196     if (!_initialized) initialize();
00197 
00198     CkArrayIndex3D idx3d = *(CkArrayIndex3D *) &idx;
00199     int index = idx3d.index[2] + idx3d.index[0] * _info.zBlocks;
00200     return _mapcache [index];
00201   }
00202 };
00203 
00204 class OptPmePencilMapZ : public CBase_OptPmePencilMapZ
00205 {
00206   PmeFFTInfo   _info;
00207   int        * _mapcache;
00208   bool         _initialized;
00209 
00210  public:
00211   OptPmePencilMapZ(int xblock, int yblock, int zblock) {
00212     _initialized = false;
00213     _info.xBlocks = xblock;
00214     _info.yBlocks = yblock;
00215     _info.zBlocks = zblock;    
00216     global_map_z = thisProxy;
00217   }
00218   
00219   inline void initialize() {
00220     _initialized = true;
00221     _mapcache = (int *) malloc(_info.xBlocks * _info.yBlocks * sizeof(int)); 
00222 
00223     SortableResizeArray<int>    xprocs;
00224     SortableResizeArray<int>    yprocs;
00225     SortableResizeArray<int>    zprocs;
00226     
00227     initializePmeMap (_info, xprocs, yprocs, zprocs);
00228 
00229     for (int x = 0; x < _info.xBlocks; x++) {
00230       for (int y = 0; y < _info.yBlocks; y ++) {        
00231         int index = y + x * _info.yBlocks;
00232         int pe = zprocs[index];
00233         _mapcache[index] = pe;
00234 
00235         if (CkMyPe() == 0)
00236           pencilPMEProcessors[pe] = 1;
00237       }
00238     }
00239   }
00240   
00241   OptPmePencilMapZ(CkMigrateMessage *m){}
00242   
00243   int procNum(int foo, const CkArrayIndex &idx) {
00244     if (!_initialized) initialize();
00245     
00246     CkArrayIndex3D idx3d = *(CkArrayIndex3D *) &idx;
00247     int index = idx3d.index[1] + idx3d.index[0] * _info.yBlocks;
00248     return _mapcache[index];
00249   }
00250 };
00251 
00252 
00253 #endif

Generated on Thu Sep 21 01:17:12 2017 for NAMD by  doxygen 1.4.7