00001
00002 #ifndef __PME_FFT_MAP_H__
00003 #define __PME_FFT_MAP_H__
00004
00005 #include <charm++.h>
00006 #include <PatchMap.h>
00007 #include <fftlib.h>
00008
00009 CProxy_OptPmePencilMapZ global_map_z;
00010 CProxy_OptPmePencilMapY global_map_y;
00011 CProxy_OptPmePencilMapX global_map_x;
00012
00013 struct PmeFFTInfo {
00014 int xBlocks;
00015 int yBlocks;
00016 int zBlocks;
00017 };
00018
00019 static inline void initializePmeMap(PmeFFTInfo _info,
00020 SortableResizeArray<int> & xprocs,
00021 SortableResizeArray<int> & yprocs,
00022 SortableResizeArray<int> & zprocs) {
00023
00024
00025 int i;
00026 int ncpus = CkNumPes();
00027
00028 int *basenodes = new int [ncpus];
00029 memset (basenodes, 0, sizeof(int) * ncpus);
00030 PatchMap *pmap = PatchMap::Object();
00031 for (int p = 0; p < pmap->numPatches(); p++)
00032 basenodes[pmap->basenode(p)] = 1;
00033
00034
00035 int npow2 = 1; int nbits = 0;
00036 while ( npow2 < ncpus ) { npow2 *= 2; nbits += 1; }
00037
00038
00039 SortableResizeArray<int> patches, nopatches, pmeprocs, baseprocs;
00040 i = 0;
00041 for ( int icpu=0; icpu<ncpus; ++icpu ) {
00042 int ri;
00043 for ( ri = ncpus; ri >= ncpus; ++i ) {
00044 ri = 0;
00045 int pow2 = 1;
00046 int rpow2 = npow2 / 2;
00047 for ( int j=0; j<nbits; ++j ) {
00048 ri += rpow2 * ( ( i / pow2 ) % 2 );
00049 pow2 *= 2; rpow2 /= 2;
00050 }
00051 }
00052
00053 if ( ri ) {
00054 if ( pmap->numPatchesOnNode(ri) )
00055 patches.add(ri);
00056 else if (basenodes[ri])
00057 baseprocs.add(ri);
00058 else nopatches.add(ri);
00059 }
00060 }
00061
00062 delete [] basenodes;
00063
00064
00065 int useZero = 0;
00066 int npens = _info.xBlocks*_info.yBlocks;
00067 if ( npens % ncpus == 0 ) useZero = 1;
00068 if ( npens == nopatches.size() + 1 ) useZero = 1;
00069 npens += _info.xBlocks*_info.zBlocks;
00070 if ( npens % ncpus == 0 ) useZero = 1;
00071 if ( npens == nopatches.size() + 1 ) useZero = 1;
00072 npens += _info.yBlocks*_info.zBlocks;
00073 if ( npens % ncpus == 0 ) useZero = 1;
00074 if ( npens == nopatches.size() + 1 ) useZero = 1;
00075
00076
00077 for ( i=nopatches.size()-1; i>=0; --i ) pmeprocs.add(nopatches[i]);
00078 for ( i=baseprocs.size()-1; i>=0; --i ) pmeprocs.add(baseprocs[i]);
00079
00080 if ( useZero && ! pmap->numPatchesOnNode(0) ) pmeprocs.add(0);
00081 for ( i=patches.size()-1; i>=0; --i ) pmeprocs.add(patches[i]);
00082 if ( pmap->numPatchesOnNode(0) ) pmeprocs.add(0);
00083
00084 int pe = 0;
00085 int npes = pmeprocs.size();
00086 int nxzpes = _info.xBlocks * _info.yBlocks;
00087 if (nxzpes < _info.yBlocks*_info.zBlocks)
00088 nxzpes = _info.yBlocks*_info.zBlocks;
00089
00090 zprocs.resize (_info.xBlocks * _info.yBlocks);
00091 for ( i=0; i<_info.xBlocks * _info.yBlocks; ++i, ++pe ) zprocs[i] = pmeprocs[pe%npes];
00092 zprocs.sort();
00093
00094 pe = nxzpes;
00095 yprocs.resize(_info.xBlocks*_info.zBlocks);
00096 for ( i=0; i<_info.xBlocks*_info.zBlocks; ++i, ++pe ) yprocs[i] = pmeprocs[pe%npes];
00097 yprocs.sort();
00098
00099 xprocs.resize(_info.yBlocks*_info.zBlocks);
00100
00101 for ( i=0, pe=0; i<_info.yBlocks*_info.zBlocks; ++i, ++pe ) xprocs[i] = pmeprocs[pe%npes];
00102 xprocs.sort();
00103 }
00104
00105
00106 class OptPmePencilMapX : public CBase_OptPmePencilMapX
00107 {
00108 PmeFFTInfo _info;
00109 int * _mapcache;
00110 bool _initialized;
00111
00112 public:
00113 OptPmePencilMapX(int xblock, int yblock, int zblock) {
00114 _initialized = false;
00115 _info.xBlocks = xblock;
00116 _info.yBlocks = yblock;
00117 _info.zBlocks = zblock;
00118 global_map_x = thisProxy;
00119 }
00120
00121 inline void initialize () {
00122 _initialized = true;
00123 _mapcache = (int *) malloc(_info.yBlocks * _info.zBlocks * sizeof(int));
00124
00125 SortableResizeArray<int> xprocs;
00126 SortableResizeArray<int> yprocs;
00127 SortableResizeArray<int> zprocs;
00128
00129 initializePmeMap (_info, xprocs, yprocs, zprocs);
00130
00131 for (int y = 0; y < _info.yBlocks; y++) {
00132 for (int z = 0; z < _info.zBlocks; z ++) {
00133 int index = z + y * _info.zBlocks;
00134 int pe = xprocs[index];
00135 _mapcache[index] = pe;
00136
00137 if(CkMyRank() == 0)
00138 pencilPMEProcessors[pe] = 1;
00139 }
00140 }
00141 }
00142
00143 OptPmePencilMapX(CkMigrateMessage *m){}
00144
00145 int procNum(int foo, const CkArrayIndex &idx) {
00146 if (!_initialized) initialize();
00147
00148 CkArrayIndex3D idx3d = *(CkArrayIndex3D *) &idx;
00149 int index = idx3d.index[2] + idx3d.index[1] * _info.zBlocks;
00150
00151 return _mapcache[index];
00152 }
00153 };
00154
00155
00156 class OptPmePencilMapY : public CBase_OptPmePencilMapY
00157 {
00158 PmeFFTInfo _info;
00159 int * _mapcache;
00160 bool _initialized;
00161
00162 public:
00163 OptPmePencilMapY(int xblock, int yblock, int zblock) {
00164 _initialized = false;
00165 _info.xBlocks = xblock;
00166 _info.yBlocks = yblock;
00167 _info.zBlocks = zblock;
00168 global_map_y = thisProxy;
00169 }
00170
00171 inline void initialize() {
00172 _initialized = true;
00173 _mapcache = (int *) malloc(_info.xBlocks * _info.zBlocks * sizeof(int));
00174
00175 SortableResizeArray<int> xprocs;
00176 SortableResizeArray<int> yprocs;
00177 SortableResizeArray<int> zprocs;
00178
00179 initializePmeMap (_info, xprocs, yprocs, zprocs);
00180
00181 for (int x = 0; x < _info.xBlocks; x ++) {
00182 for (int z = 0; z < _info.zBlocks; z++) {
00183 int index = z + x * _info.zBlocks;
00184 int pe = yprocs[index];
00185 _mapcache [index] = pe;
00186
00187 if (CkMyPe() == 0)
00188 pencilPMEProcessors[pe] = 1;
00189 }
00190 }
00191 }
00192
00193 OptPmePencilMapY(CkMigrateMessage *m){}
00194
00195 int procNum(int foo, const CkArrayIndex &idx) {
00196 if (!_initialized) initialize();
00197
00198 CkArrayIndex3D idx3d = *(CkArrayIndex3D *) &idx;
00199 int index = idx3d.index[2] + idx3d.index[0] * _info.zBlocks;
00200 return _mapcache [index];
00201 }
00202 };
00203
00204 class OptPmePencilMapZ : public CBase_OptPmePencilMapZ
00205 {
00206 PmeFFTInfo _info;
00207 int * _mapcache;
00208 bool _initialized;
00209
00210 public:
00211 OptPmePencilMapZ(int xblock, int yblock, int zblock) {
00212 _initialized = false;
00213 _info.xBlocks = xblock;
00214 _info.yBlocks = yblock;
00215 _info.zBlocks = zblock;
00216 global_map_z = thisProxy;
00217 }
00218
00219 inline void initialize() {
00220 _initialized = true;
00221 _mapcache = (int *) malloc(_info.xBlocks * _info.yBlocks * sizeof(int));
00222
00223 SortableResizeArray<int> xprocs;
00224 SortableResizeArray<int> yprocs;
00225 SortableResizeArray<int> zprocs;
00226
00227 initializePmeMap (_info, xprocs, yprocs, zprocs);
00228
00229 for (int x = 0; x < _info.xBlocks; x++) {
00230 for (int y = 0; y < _info.yBlocks; y ++) {
00231 int index = y + x * _info.yBlocks;
00232 int pe = zprocs[index];
00233 _mapcache[index] = pe;
00234
00235 if (CkMyPe() == 0)
00236 pencilPMEProcessors[pe] = 1;
00237 }
00238 }
00239 }
00240
00241 OptPmePencilMapZ(CkMigrateMessage *m){}
00242
00243 int procNum(int foo, const CkArrayIndex &idx) {
00244 if (!_initialized) initialize();
00245
00246 CkArrayIndex3D idx3d = *(CkArrayIndex3D *) &idx;
00247 int index = idx3d.index[1] + idx3d.index[0] * _info.yBlocks;
00248 return _mapcache[index];
00249 }
00250 };
00251
00252
00253 #endif