00001
00007 #ifdef NAMD_FFTW
00008 #ifdef NAMD_FFTW_NO_TYPE_PREFIX
00009 #include <fftw.h>
00010 #include <rfftw.h>
00011 #else
00012 #include <sfftw.h>
00013 #include <srfftw.h>
00014 #endif
00015 #endif
00016
00017 #include "InfoStream.h"
00018 #include "Node.h"
00019 #include "PatchMap.h"
00020 #include "PatchMap.inl"
00021 #include "AtomMap.h"
00022 #include "ComputePme.h"
00023 #include "ComputePmeMgr.decl.h"
00024 #include "PmeRealSpace.h"
00025 #include "PmeKSpace.h"
00026 #include "ComputeNonbondedUtil.h"
00027 #include "PatchMgr.h"
00028 #include "Molecule.h"
00029 #include "ReductionMgr.h"
00030 #include "ComputeMgr.h"
00031 #include "ComputeMgr.decl.h"
00032
00033 #define MIN_DEBUG_LEVEL 3
00034 #include "Debug.h"
00035 #include "SimParameters.h"
00036 #include "WorkDistrib.h"
00037 #include "varsizemsg.h"
00038 #include "Random.h"
00039 #include "Priorities.h"
00040
00041
00042
00043 #ifdef USE_COMM_LIB
00044 #include "EachToManyMulticastStrategy.h"
00045 #endif
00046
00047 #ifndef SQRT_PI
00048 #define SQRT_PI 1.7724538509055160273
00049 #endif
00050
00051 char *pencilPMEProcessors;
00052
00053
00054 class PmeGridMsg : public CMessage_PmeGridMsg {
00055 public:
00056
00057 int sourceNode;
00058 int sequence;
00059 int hasData;
00060 Lattice lattice;
00061 PmeReduction *evir;
00062 int start;
00063 int len;
00064 int zlistlen;
00065 int *zlist;
00066 char *fgrid;
00067 float *qgrid;
00068 };
00069
00070 class PmeTransMsg : public CMessage_PmeTransMsg {
00071 public:
00072
00073 int sourceNode;
00074 int sequence;
00075 int hasData;
00076 Lattice lattice;
00077 int x_start;
00078 int nx;
00079 float *qgrid;
00080 };
00081
00082
00083 class PmeUntransMsg : public CMessage_PmeUntransMsg {
00084 public:
00085
00086 int sourceNode;
00087 int has_evir;
00088 PmeReduction *evir;
00089 int y_start;
00090 int ny;
00091 float *qgrid;
00092
00093 };
00094
00095
00096
00097 struct PmePencilInitMsgData {
00098 PmeGrid grid;
00099 int xBlocks, yBlocks, zBlocks;
00100 CProxy_PmeXPencil xPencil;
00101 CProxy_PmeYPencil yPencil;
00102 CProxy_PmeZPencil zPencil;
00103 CProxy_ComputePmeMgr pmeProxy;
00104 };
00105
00106 class PmePencilInitMsg : public CMessage_PmePencilInitMsg {
00107 public:
00108 PmePencilInitMsg(PmePencilInitMsgData &d) { data = d; }
00109 PmePencilInitMsgData data;
00110 };
00111
00112
00113 struct LocalPmeInfo {
00114 int nx, x_start;
00115 int ny_after_transpose, y_start_after_transpose;
00116 };
00117
00118
00119
00120 void generatePmePeList(int *peMap, int numPes){
00121
00122 int i;
00123 int ncpus = CkNumPes();
00124
00125
00126 int npow2 = 1; int nbits = 0;
00127 while ( npow2 < ncpus ) { npow2 *= 2; nbits += 1; }
00128
00129
00130 SortableResizeArray<int> seq(ncpus);
00131 i = 0;
00132 for ( int icpu=0; icpu<ncpus; ++icpu ) {
00133 int ri;
00134 for ( ri = ncpus; ri >= ncpus; ++i ) {
00135 ri = 0;
00136 int pow2 = 1;
00137 int rpow2 = npow2 / 2;
00138 for ( int j=0; j<nbits; ++j ) {
00139 ri += rpow2 * ( ( i / pow2 ) % 2 );
00140 pow2 *= 2; rpow2 /= 2;
00141 }
00142 }
00143 seq[icpu] = ri;
00144 }
00145
00146
00147 for ( i=0; i<numPes; ++i ) {
00148 seq[i] = seq[ncpus - numPes + i];
00149 }
00150 seq.resize(numPes);
00151 seq.sort();
00152
00153 for ( i=0; i<numPes; ++i )
00154 peMap[i] = seq[i];
00155
00156
00157 }
00158
00159
00160 void generatePmePeList2(int *gridPeMap, int numGridPes, int *transPeMap, int numTransPes){
00161
00162 int i;
00163 int ncpus = CkNumPes();
00164
00165
00166 int npow2 = 1; int nbits = 0;
00167 while ( npow2 < ncpus ) { npow2 *= 2; nbits += 1; }
00168
00169
00170 SortableResizeArray<int> seq(ncpus);
00171 SortableResizeArray<int> seq2(ncpus);
00172 i = 0;
00173 for ( int icpu=0; icpu<ncpus; ++icpu ) {
00174 int ri;
00175 for ( ri = ncpus; ri >= ncpus; ++i ) {
00176 ri = 0;
00177 int pow2 = 1;
00178 int rpow2 = npow2 / 2;
00179 for ( int j=0; j<nbits; ++j ) {
00180 ri += rpow2 * ( ( i / pow2 ) % 2 );
00181 pow2 *= 2; rpow2 /= 2;
00182 }
00183 }
00184 seq[icpu] = ri;
00185 seq2[icpu] = ri;
00186 }
00187
00188
00189 for ( i=0; i<numGridPes; ++i ) {
00190 seq[i] = seq[ncpus - numGridPes + i];
00191 }
00192 seq.resize(numGridPes);
00193 seq.sort();
00194 int firstTransPe = ncpus - numGridPes - numTransPes;
00195 if ( firstTransPe < 0 ) {
00196 firstTransPe = 0;
00197
00198 if ( ncpus > numTransPes ) firstTransPe = 1;
00199 }
00200 for ( i=0; i<numTransPes; ++i ) {
00201 seq2[i] = seq2[firstTransPe + i];
00202 }
00203 seq2.resize(numTransPes);
00204 seq2.sort();
00205
00206 for ( i=0; i<numGridPes; ++i )
00207 gridPeMap[i] = seq[i];
00208
00209 for ( i=0; i<numTransPes; ++i )
00210 transPeMap[i] = seq2[i];
00211 }
00212
00213 #if USE_TOPOMAP
00214
00215 bool generateBGLORBPmePeList(int *pemap, int numPes, int *block_pes=0,
00216 int nbpes=0);
00217 #endif
00218
00219 class ComputePmeMgr : public BOCclass {
00220 public:
00221 friend class ComputePme;
00222 ComputePmeMgr();
00223 ~ComputePmeMgr();
00224
00225 void initialize(CkQdMsg*);
00226 void initialize_pencils(CkQdMsg*);
00227 void activate_pencils(CkQdMsg*);
00228 void recvArrays(CProxy_PmeXPencil, CProxy_PmeYPencil, CProxy_PmeZPencil);
00229
00230 void sendGrid(void);
00231 void recvGrid(PmeGridMsg *);
00232 void gridCalc1(void);
00233 void sendTransBarrier(void);
00234 void sendTrans(void);
00235 void recvTrans(PmeTransMsg *);
00236 void gridCalc2(void);
00237 void sendUntrans(void);
00238 void recvUntrans(PmeUntransMsg *);
00239 void gridCalc3(void);
00240 void sendUngrid(void);
00241 void recvUngrid(PmeGridMsg *);
00242 void ungridCalc(void);
00243
00244 void setCompute(ComputePme *c) { pmeCompute = c; c->setMgr(this); }
00245
00246
00247 int isPmeProcessor(int p);
00248
00249 #ifdef NAMD_FFTW
00250 static CmiNodeLock fftw_plan_lock;
00251 #endif
00252
00253 private:
00254 CProxy_ComputePmeMgr pmeProxy;
00255 CProxy_ComputePmeMgr pmeProxyDir;
00256 ComputePme *pmeCompute;
00257 PmeGrid myGrid;
00258 Lattice lattice;
00259 PmeKSpace *myKSpace;
00260 float *qgrid;
00261 float *kgrid;
00262
00263 #ifdef NAMD_FFTW
00264 fftw_plan forward_plan_x, backward_plan_x;
00265 rfftwnd_plan forward_plan_yz, backward_plan_yz;
00266 fftw_complex *work;
00267 #else
00268 float *work;
00269 #endif
00270
00271 int fepOn, thermInt, lesOn, lesFactor, pairOn, selfOn, numGrids;
00272 int decouple;
00273 BigReal fepElecLambdaStart;
00274 BigReal elecLambdaUp;
00275 BigReal elecLambdaDown;
00276
00277
00278 LocalPmeInfo *localInfo;
00279 int qgrid_size;
00280 int qgrid_start;
00281 int qgrid_len;
00282 int fgrid_start;
00283 int fgrid_len;
00284
00285 int numSources;
00286 int numGridPes;
00287 int numTransPes;
00288 int numDestRecipPes;
00289 int myGridPe;
00290 int myTransPe;
00291 int *gridPeMap;
00292 int *transPeMap;
00293 int *recipPeDest;
00294 int *gridPeOrder;
00295 int *transPeOrder;
00296 char *isPmeFlag;
00297 int grid_count;
00298 int trans_count;
00299 int untrans_count;
00300 int ungrid_count;
00301 PmeGridMsg **gridmsg_reuse;
00302 PmeReduction recip_evir[PME_MAX_EVALS];
00303 PmeReduction recip_evir2[PME_MAX_EVALS];
00304
00305 int sequence;
00306 int useBarrier;
00307 int sendTransBarrier_received;
00308
00309 int usePencils;
00310 int xBlocks, yBlocks, zBlocks;
00311 CProxy_PmeXPencil xPencil;
00312 CProxy_PmeYPencil yPencil;
00313 CProxy_PmeZPencil zPencil;
00314 char *pencilActive;
00315 int numPencilsActive;
00316 };
00317
00318 #ifdef NAMD_FFTW
00319 CmiNodeLock ComputePmeMgr::fftw_plan_lock;
00320 #endif
00321
00322 int isPmeProcessor(int p){
00323 return CProxy_ComputePmeMgr::ckLocalBranch(CkpvAccess(BOCclass_group).computePmeMgr)->isPmeProcessor(p);
00324 }
00325
00326 int ComputePmeMgr::isPmeProcessor(int p){
00327 return ( usePencils ? pencilPMEProcessors[p] : isPmeFlag[p] );
00328 }
00329
00330 ComputePmeMgr::ComputePmeMgr() : pmeProxy(thisgroup),
00331 pmeProxyDir(thisgroup), pmeCompute(0) {
00332
00333 CkpvAccess(BOCclass_group).computePmeMgr = thisgroup;
00334
00335 #ifdef USE_COMM_LIB
00336 ComlibDelegateProxy(&pmeProxy);
00337 #endif
00338
00339 #ifdef NAMD_FFTW
00340 if ( CmiMyRank() == 0 ) {
00341 fftw_plan_lock = CmiCreateLock();
00342 }
00343 #endif
00344
00345 myKSpace = 0;
00346 localInfo = new LocalPmeInfo[CkNumPes()];
00347 gridPeMap = new int[CkNumPes()];
00348 transPeMap = new int[CkNumPes()];
00349 recipPeDest = new int[CkNumPes()];
00350 gridPeOrder = new int[CkNumPes()];
00351 transPeOrder = new int[CkNumPes()];
00352 isPmeFlag = new char[CkNumPes()];
00353 kgrid = 0;
00354 work = 0;
00355 grid_count = 0;
00356 trans_count = 0;
00357 untrans_count = 0;
00358 ungrid_count = 0;
00359 gridmsg_reuse= new PmeGridMsg*[CkNumPes()];
00360 useBarrier = 0;
00361 sendTransBarrier_received = 0;
00362 usePencils = 0;
00363 }
00364
00365
00366 void ComputePmeMgr::recvArrays(
00367 CProxy_PmeXPencil x, CProxy_PmeYPencil y, CProxy_PmeZPencil z) {
00368 xPencil = x; yPencil = y; zPencil = z;
00369 }
00370
00371
00372 void ComputePmeMgr::initialize(CkQdMsg *msg) {
00373 delete msg;
00374
00375 SimParameters *simParams = Node::Object()->simParameters;
00376 PatchMap *patchMap = PatchMap::Object();
00377
00378 fepOn = simParams->fepOn;
00379 thermInt = simParams->thermInt;
00380 decouple = (fepOn || thermInt) && (simParams->decouple);
00381 fepElecLambdaStart = (fepOn || thermInt) ? simParams->fepElecLambdaStart : 0;
00382 if (fepOn || thermInt) {
00383 numGrids = 2;
00384 if (decouple) numGrids += 2;
00385 if (fepElecLambdaStart || thermInt) numGrids ++;
00386 }
00387 else numGrids = 1;
00388 lesOn = simParams->lesOn;
00389 useBarrier = simParams->PMEBarrier;
00390 if ( lesOn ) {
00391 lesFactor = simParams->lesFactor;
00392 numGrids = lesFactor;
00393 }
00394 selfOn = 0;
00395 pairOn = simParams->pairInteractionOn;
00396 if ( pairOn ) {
00397 selfOn = simParams->pairInteractionSelf;
00398 if ( selfOn ) pairOn = 0;
00399 numGrids = selfOn ? 1 : 3;
00400 }
00401
00402 if ( numGrids != 1 || simParams->PMEPencils == 0 ) usePencils = 0;
00403 else if ( simParams->PMEPencils > 0 ) usePencils = 1;
00404 else {
00405 int dimx = simParams->PMEGridSizeX;
00406 int dimy = simParams->PMEGridSizeY;
00407 int maxslabs = 1 + (dimx - 1) / simParams->PMEMinSlices;
00408 if ( maxslabs > CkNumPes() ) maxslabs = CkNumPes();
00409 int maxpencils = ( simParams->PMEGridSizeX * simParams->PMEGridSizeY
00410 * simParams->PMEGridSizeZ ) / simParams->PMEMinPoints;
00411 if ( maxpencils > CkNumPes() ) maxpencils = CkNumPes();
00412 if ( maxpencils > 3 * maxslabs ) usePencils = 1;
00413 else usePencils = 0;
00414 }
00415
00416 if ( usePencils ) {
00417 if ( simParams->PMEPencils > 1 ) {
00418 xBlocks = yBlocks = zBlocks = simParams->PMEPencils;
00419 } else {
00420 int nb2 = ( simParams->PMEGridSizeX * simParams->PMEGridSizeY
00421 * simParams->PMEGridSizeZ ) / simParams->PMEMinPoints;
00422 if ( nb2 > CkNumPes() ) nb2 = CkNumPes();
00423 int nb = (int) sqrt((float)nb2);
00424 xBlocks = zBlocks = nb;
00425 yBlocks = nb2 / nb;
00426 }
00427
00428 int dimx = simParams->PMEGridSizeX;
00429 int bx = 1 + ( dimx - 1 ) / xBlocks;
00430 xBlocks = 1 + ( dimx - 1 ) / bx;
00431
00432 int dimy = simParams->PMEGridSizeY;
00433 int by = 1 + ( dimy - 1 ) / yBlocks;
00434 yBlocks = 1 + ( dimy - 1 ) / by;
00435
00436 int dimz = simParams->PMEGridSizeZ / 2 + 1;
00437 int bz = 1 + ( dimz - 1 ) / zBlocks;
00438 zBlocks = 1 + ( dimz - 1 ) / bz;
00439
00440 if ( ! CkMyPe() ) {
00441 iout << iINFO << "PME using " << xBlocks << " x " <<
00442 yBlocks << " x " << zBlocks <<
00443 " pencil grid for FFT and reciprocal sum.\n" << endi;
00444 }
00445 } else {
00446
00447 {
00448
00449
00450 int minslices = simParams->PMEMinSlices;
00451 int dimx = simParams->PMEGridSizeX;
00452 int nrpx = ( dimx + minslices - 1 ) / minslices;
00453 int dimy = simParams->PMEGridSizeY;
00454 int nrpy = ( dimy + minslices - 1 ) / minslices;
00455
00456
00457 int nrpp = CkNumPes();
00458
00459 if ( nrpp < nrpx ) nrpx = nrpp;
00460 if ( nrpp < nrpy ) nrpy = nrpp;
00461
00462
00463 int nrps = simParams->PMEProcessors;
00464 if ( nrps > CkNumPes() ) nrps = CkNumPes();
00465 if ( nrps > 0 ) nrpx = nrps;
00466 if ( nrps > 0 ) nrpy = nrps;
00467
00468
00469 int bx = ( dimx + nrpx - 1 ) / nrpx;
00470 nrpx = ( dimx + bx - 1 ) / bx;
00471 int by = ( dimy + nrpy - 1 ) / nrpy;
00472 nrpy = ( dimy + by - 1 ) / by;
00473 if ( bx != ( dimx + nrpx - 1 ) / nrpx )
00474 NAMD_bug("Error in selecting number of PME processors.");
00475 if ( by != ( dimy + nrpy - 1 ) / nrpy )
00476 NAMD_bug("Error in selecting number of PME processors.");
00477
00478 numGridPes = nrpx;
00479 numTransPes = nrpy;
00480 }
00481 if ( ! CkMyPe() ) {
00482 iout << iINFO << "PME using " << numGridPes << " and " << numTransPes <<
00483 " processors for FFT and reciprocal sum.\n" << endi;
00484 }
00485 {
00486 int i;
00487 for ( i = 0; i < numGridPes; ++i ) {
00488 gridPeOrder[i] = i;
00489 }
00490 for ( i = 0; i < numTransPes; ++i ) {
00491 transPeOrder[i] = i;
00492 }
00493 Random rand(CkMyPe());
00494 rand.reorder(gridPeOrder,numGridPes);
00495 rand.reorder(transPeOrder,numTransPes);
00496 }
00497
00498 int sum_npes = numTransPes + numGridPes;
00499 int max_npes = (numTransPes > numGridPes)?numTransPes:numGridPes;
00500
00501 #if USE_TOPOMAP
00502 PatchMap * pmap = PatchMap::Object();
00503
00504 int patch_pes = pmap->numNodesWithPatches();
00505 TopoManager tmgr;
00506 if(tmgr.hasMultipleProcsPerNode())
00507 patch_pes *= 2;
00508
00509 bool done = false;
00510 #ifndef USE_COMM_LIB
00511 if(CkNumPes() > 2*sum_npes + patch_pes) {
00512 done = generateBGLORBPmePeList(transPeMap, numTransPes);
00513 done &= generateBGLORBPmePeList(gridPeMap, numGridPes, transPeMap, numTransPes);
00514 }
00515 else
00516 #endif
00517 if(CkNumPes() > 2 *max_npes + patch_pes) {
00518 done = generateBGLORBPmePeList(transPeMap, max_npes);
00519 gridPeMap = transPeMap;
00520 }
00521
00522 if (!done)
00523 #endif
00524 {
00525
00526
00527 generatePmePeList2(gridPeMap, numGridPes, transPeMap, numTransPes);
00528 }
00529
00530 #ifdef USE_COMM_LIB
00531 if(CkMyPe() == 0) {
00532 ComlibInstanceHandle cinst1 = CkCreateComlibInstance();
00533 EachToManyMulticastStrategy *strat = new
00534 EachToManyMulticastStrategy(USE_DIRECT, numGridPes,
00535 gridPeMap, numTransPes, transPeMap);
00536 cinst1.setStrategy(strat);
00537
00538 ComlibInstanceHandle cinst2 = CkCreateComlibInstance();
00539 strat = new EachToManyMulticastStrategy(USE_DIRECT, numTransPes, transPeMap
00540 , numGridPes, gridPeMap);
00541 cinst2.setStrategy(strat);
00542 ComlibDoneCreating();
00543 }
00544 #endif
00545
00546 myGridPe = -1;
00547 int i = 0;
00548 for ( i=0; i<CkNumPes(); ++i )
00549 isPmeFlag[i] = 0;
00550 for ( i=0; i<numGridPes; ++i ) {
00551 if ( gridPeMap[i] == CkMyPe() ) myGridPe = i;
00552 isPmeFlag[gridPeMap[i]] |= 1;
00553 }
00554 myTransPe = -1;
00555 for ( i=0; i<numTransPes; ++i ) {
00556 if ( transPeMap[i] == CkMyPe() ) myTransPe = i;
00557 isPmeFlag[transPeMap[i]] |= 2;
00558 }
00559
00560 if ( ! CkMyPe() ) {
00561 iout << iINFO << "PME GRID LOCATIONS:";
00562 int i;
00563 for ( i=0; i<numGridPes && i<10; ++i ) {
00564 iout << " " << gridPeMap[i];
00565 }
00566 if ( i < numGridPes ) iout << " ...";
00567 iout << "\n" << endi;
00568 iout << iINFO << "PME TRANS LOCATIONS:";
00569 for ( i=0; i<numTransPes && i<10; ++i ) {
00570 iout << " " << transPeMap[i];
00571 }
00572 if ( i < numTransPes ) iout << " ...";
00573 iout << "\n" << endi;
00574 }
00575
00576 }
00577
00578 myGrid.K1 = simParams->PMEGridSizeX;
00579 myGrid.K2 = simParams->PMEGridSizeY;
00580 myGrid.K3 = simParams->PMEGridSizeZ;
00581 myGrid.order = simParams->PMEInterpOrder;
00582 myGrid.dim2 = myGrid.K2;
00583 myGrid.dim3 = 2 * (myGrid.K3/2 + 1);
00584
00585 if ( ! usePencils ) {
00586 myGrid.block1 = ( myGrid.K1 + numGridPes - 1 ) / numGridPes;
00587 myGrid.block2 = ( myGrid.K2 + numTransPes - 1 ) / numTransPes;
00588 myGrid.block3 = myGrid.dim3 / 2;
00589 }
00590
00591 if ( usePencils ) {
00592 myGrid.block1 = ( myGrid.K1 + xBlocks - 1 ) / xBlocks;
00593 myGrid.block2 = ( myGrid.K2 + yBlocks - 1 ) / yBlocks;
00594 myGrid.block3 = ( myGrid.K3/2 + 1 + zBlocks - 1 ) / zBlocks;
00595
00596 if ( CkMyPe() == 0 ) {
00597 int basepe = 0; int npe = CkNumPes();
00598 if ( npe > xBlocks*yBlocks &&
00599 npe > xBlocks*zBlocks &&
00600 npe > yBlocks*zBlocks ) {
00601
00602 ++basepe;
00603 --npe;
00604 }
00605
00606 zPencil = CProxy_PmeZPencil::ckNew();
00607 yPencil = CProxy_PmeYPencil::ckNew();
00608 xPencil = CProxy_PmeXPencil::ckNew();
00609
00610
00611 int i;
00612 int ncpus = CkNumPes();
00613
00614
00615 int npow2 = 1; int nbits = 0;
00616 while ( npow2 < ncpus ) { npow2 *= 2; nbits += 1; }
00617
00618
00619 SortableResizeArray<int> patches, nopatches, pmeprocs;
00620 PatchMap *pmap = PatchMap::Object();
00621 i = 0;
00622 for ( int icpu=0; icpu<ncpus; ++icpu ) {
00623 int ri;
00624 for ( ri = ncpus; ri >= ncpus; ++i ) {
00625 ri = 0;
00626 int pow2 = 1;
00627 int rpow2 = npow2 / 2;
00628 for ( int j=0; j<nbits; ++j ) {
00629 ri += rpow2 * ( ( i / pow2 ) % 2 );
00630 pow2 *= 2; rpow2 /= 2;
00631 }
00632 }
00633
00634 if ( ri ) {
00635 if ( pmap->numPatchesOnNode(ri) ) patches.add(ri);
00636 else nopatches.add(ri);
00637 }
00638 }
00639
00640
00641 int useZero = 0;
00642 int npens = xBlocks*yBlocks;
00643 if ( npens % ncpus == 0 ) useZero = 1;
00644 if ( npens == nopatches.size() + 1 ) useZero = 1;
00645 npens += xBlocks*zBlocks;
00646 if ( npens % ncpus == 0 ) useZero = 1;
00647 if ( npens == nopatches.size() + 1 ) useZero = 1;
00648 npens += yBlocks*zBlocks;
00649 if ( npens % ncpus == 0 ) useZero = 1;
00650 if ( npens == nopatches.size() + 1 ) useZero = 1;
00651
00652
00653 for ( i=nopatches.size()-1; i>=0; --i ) pmeprocs.add(nopatches[i]);
00654 if ( useZero && ! pmap->numPatchesOnNode(0) ) pmeprocs.add(0);
00655 for ( i=patches.size()-1; i>=0; --i ) pmeprocs.add(patches[i]);
00656 if ( pmap->numPatchesOnNode(0) ) pmeprocs.add(0);
00657
00658 int pe = 0;
00659 int npes = pmeprocs.size();
00660 SortableResizeArray<int> zprocs(xBlocks*yBlocks);
00661 for ( i=0; i<xBlocks*yBlocks; ++i, ++pe ) zprocs[i] = pmeprocs[pe%npes];
00662 zprocs.sort();
00663 SortableResizeArray<int> yprocs(xBlocks*zBlocks);
00664 for ( i=0; i<xBlocks*zBlocks; ++i, ++pe ) yprocs[i] = pmeprocs[pe%npes];
00665 yprocs.sort();
00666 SortableResizeArray<int> xprocs(yBlocks*zBlocks);
00667 for ( i=0; i<yBlocks*zBlocks; ++i, ++pe ) xprocs[i] = pmeprocs[pe%npes];
00668 xprocs.sort();
00669
00670 pencilPMEProcessors = new char [CkNumPes()];
00671 memset (pencilPMEProcessors, 0, sizeof(char) * CkNumPes());
00672
00673 int x,y,z;
00674
00675 iout << iINFO << "PME Z PENCIL LOCATIONS:";
00676 for ( i=0; i<zprocs.size() && i<10; ++i ) {
00677 iout << " " << zprocs[i];
00678 }
00679 if ( i < zprocs.size() ) iout << " ...";
00680 iout << "\n" << endi;
00681
00682 for (pe=0, x = 0; x < xBlocks; ++x)
00683 for (y = 0; y < yBlocks; ++y, ++pe ) {
00684 zPencil(x,y,0).insert(zprocs[pe]);
00685 pencilPMEProcessors[zprocs[pe]] = 1;
00686 }
00687 zPencil.doneInserting();
00688
00689 iout << iINFO << "PME Y PENCIL LOCATIONS:";
00690 for ( i=0; i<yprocs.size() && i<10; ++i ) {
00691 iout << " " << yprocs[i];
00692 }
00693 if ( i < yprocs.size() ) iout << " ...";
00694 iout << "\n" << endi;
00695
00696 for (pe=0, z = 0; z < zBlocks; ++z )
00697 for (x = 0; x < xBlocks; ++x, ++pe ) {
00698 yPencil(x,0,z).insert(yprocs[pe]);
00699 pencilPMEProcessors[yprocs[pe]] = 1;
00700 }
00701 yPencil.doneInserting();
00702
00703 iout << iINFO << "PME X PENCIL LOCATIONS:";
00704 for ( i=0; i<xprocs.size() && i<10; ++i ) {
00705 iout << " " << xprocs[i];
00706 }
00707 if ( i < xprocs.size() ) iout << " ...";
00708 iout << "\n" << endi;
00709
00710 for (pe=0, y = 0; y < yBlocks; ++y )
00711 for (z = 0; z < zBlocks; ++z, ++pe ) {
00712 xPencil(0,y,z).insert(xprocs[pe]);
00713 pencilPMEProcessors[xprocs[pe]] = 1;
00714 }
00715 xPencil.doneInserting();
00716
00717 pmeProxy.recvArrays(xPencil,yPencil,zPencil);
00718 PmePencilInitMsgData msgdata;
00719 msgdata.grid = myGrid;
00720 msgdata.xBlocks = xBlocks;
00721 msgdata.yBlocks = yBlocks;
00722 msgdata.zBlocks = zBlocks;
00723 msgdata.xPencil = xPencil;
00724 msgdata.yPencil = yPencil;
00725 msgdata.zPencil = zPencil;
00726 msgdata.pmeProxy = pmeProxyDir;
00727 xPencil.init(new PmePencilInitMsg(msgdata));
00728 yPencil.init(new PmePencilInitMsg(msgdata));
00729 zPencil.init(new PmePencilInitMsg(msgdata));
00730 }
00731 return;
00732 }
00733
00734
00735 int pe;
00736 int nx = 0;
00737 for ( pe = 0; pe < numGridPes; ++pe ) {
00738 localInfo[pe].x_start = nx;
00739 nx += myGrid.block1;
00740 if ( nx > myGrid.K1 ) nx = myGrid.K1;
00741 localInfo[pe].nx = nx - localInfo[pe].x_start;
00742 }
00743 int ny = 0;
00744 for ( pe = 0; pe < numTransPes; ++pe ) {
00745 localInfo[pe].y_start_after_transpose = ny;
00746 ny += myGrid.block2;
00747 if ( ny > myGrid.K2 ) ny = myGrid.K2;
00748 localInfo[pe].ny_after_transpose =
00749 ny - localInfo[pe].y_start_after_transpose;
00750 }
00751
00752 {
00753
00754 PatchMap *patchMap = PatchMap::Object();
00755 Lattice lattice = simParams->lattice;
00756 BigReal sysdima = lattice.a_r().unit() * lattice.a();
00757 BigReal cutoff = simParams->cutoff;
00758 BigReal patchdim = simParams->patchDimension;
00759 int numPatches = patchMap->numPatches();
00760 int numNodes = CkNumPes();
00761 int *source_flags = new int[numNodes];
00762 int node;
00763 for ( node=0; node<numNodes; ++node ) {
00764 source_flags[node] = 0;
00765 recipPeDest[node] = 0;
00766 }
00767
00768
00769
00770
00771
00772
00773
00774 for ( int pid=0; pid < numPatches; ++pid ) {
00775 int pnode = patchMap->node(pid);
00776 BigReal minx = patchMap->min_a(pid);
00777 BigReal maxx = patchMap->max_a(pid);
00778 BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
00779
00780 int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
00781 int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
00782 for ( int i=min1; i<=max1; ++i ) {
00783 int ix = i;
00784 while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
00785 while ( ix < 0 ) ix += myGrid.K1;
00786
00787 if ( myGridPe >= 0 && ix >= localInfo[myGridPe].x_start &&
00788 ix < localInfo[myGridPe].x_start + localInfo[myGridPe].nx ) {
00789 source_flags[pnode] = 1;
00790 }
00791
00792 if ( pnode == CkMyPe() ) {
00793 recipPeDest[ix / myGrid.block1] = 1;
00794 }
00795 }
00796 }
00797
00798 numSources = 0;
00799 numDestRecipPes = 0;
00800 for ( node=0; node<numNodes; ++node ) {
00801 if ( source_flags[node] ) ++numSources;
00802 if ( recipPeDest[node] ) ++numDestRecipPes;
00803 }
00804
00805 #if 0
00806 if ( numSources ) {
00807 iout << iINFO << "PME " << CkMyPe() << " sources:";
00808 for ( node=0; node<numNodes; ++node ) {
00809 if ( source_flags[node] ) iout << " " << node;
00810 }
00811 iout << "\n" << endi;
00812 }
00813 #endif
00814
00815 delete [] source_flags;
00816
00817
00818
00819
00820 }
00821
00822 ungrid_count = numDestRecipPes;
00823
00824 sendTransBarrier_received = 0;
00825
00826 if ( myGridPe < 0 && myTransPe < 0 ) return;
00827
00828
00829 if ( myTransPe >= 0 ) {
00830 int k2_start = localInfo[myTransPe].y_start_after_transpose;
00831 int k2_end = k2_start + localInfo[myTransPe].ny_after_transpose;
00832 myKSpace = new PmeKSpace(myGrid, k2_start, k2_end, 0, myGrid.dim3/2);
00833 }
00834
00835 int local_size = myGrid.block1 * myGrid.K2 * myGrid.dim3;
00836 int local_size_2 = myGrid.block2 * myGrid.K1 * myGrid.dim3;
00837 if ( local_size < local_size_2 ) local_size = local_size_2;
00838 qgrid = new float[local_size*numGrids];
00839 if ( numGridPes > 1 || numTransPes > 1 ) {
00840 kgrid = new float[local_size*numGrids];
00841 } else {
00842 kgrid = qgrid;
00843 }
00844 qgrid_size = local_size;
00845
00846 if ( myGridPe >= 0 ) {
00847 qgrid_start = localInfo[myGridPe].x_start * myGrid.K2 * myGrid.dim3;
00848 qgrid_len = localInfo[myGridPe].nx * myGrid.K2 * myGrid.dim3;
00849 fgrid_start = localInfo[myGridPe].x_start * myGrid.K2;
00850 fgrid_len = localInfo[myGridPe].nx * myGrid.K2;
00851 }
00852
00853 int n[3]; n[0] = myGrid.K1; n[1] = myGrid.K2; n[2] = myGrid.K3;
00854
00855 #ifdef NAMD_FFTW
00856 CmiLock(fftw_plan_lock);
00857
00858 work = new fftw_complex[n[0]];
00859
00860 if ( ! CkMyPe() ) iout << iINFO << "Optimizing 4 FFT steps. 1..." << endi;
00861 if ( myGridPe >= 0 ) {
00862 forward_plan_yz = rfftwnd_create_plan_specific(2, n+1, FFTW_REAL_TO_COMPLEX,
00863 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00864 | FFTW_IN_PLACE | FFTW_USE_WISDOM, qgrid, 1, 0, 0);
00865 }
00866 if ( ! CkMyPe() ) iout << " 2..." << endi;
00867 if ( myTransPe >= 0 ) {
00868 forward_plan_x = fftw_create_plan_specific(n[0], FFTW_REAL_TO_COMPLEX,
00869 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00870 | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) kgrid,
00871 localInfo[myTransPe].ny_after_transpose * myGrid.dim3 / 2, work, 1);
00872 }
00873 if ( ! CkMyPe() ) iout << " 3..." << endi;
00874 if ( myTransPe >= 0 ) {
00875 backward_plan_x = fftw_create_plan_specific(n[0], FFTW_COMPLEX_TO_REAL,
00876 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00877 | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) kgrid,
00878 localInfo[myTransPe].ny_after_transpose * myGrid.dim3 / 2, work, 1);
00879 }
00880 if ( ! CkMyPe() ) iout << " 4..." << endi;
00881 if ( myGridPe >= 0 ) {
00882 backward_plan_yz = rfftwnd_create_plan_specific(2, n+1, FFTW_COMPLEX_TO_REAL,
00883 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00884 | FFTW_IN_PLACE | FFTW_USE_WISDOM, qgrid, 1, 0, 0);
00885 }
00886 if ( ! CkMyPe() ) iout << " Done.\n" << endi;
00887
00888 CmiUnlock(fftw_plan_lock);
00889 #else
00890 NAMD_die("Sorry, FFTW must be compiled in to use PME.");
00891 #endif
00892
00893 if ( myGridPe >= 0 && numSources == 0 )
00894 NAMD_bug("PME grid elements exist without sources.");
00895 grid_count = numSources;
00896 memset( (void*) qgrid, 0, qgrid_size * numGrids * sizeof(float) );
00897 trans_count = numGridPes;
00898 }
00899
00900
00901 void ComputePmeMgr::initialize_pencils(CkQdMsg *msg) {
00902 delete msg;
00903 if ( ! usePencils ) return;
00904
00905 SimParameters *simParams = Node::Object()->simParameters;
00906
00907 PatchMap *patchMap = PatchMap::Object();
00908 Lattice lattice = simParams->lattice;
00909 BigReal sysdima = lattice.a_r().unit() * lattice.a();
00910 BigReal sysdimb = lattice.b_r().unit() * lattice.b();
00911 BigReal cutoff = simParams->cutoff;
00912 BigReal patchdim = simParams->patchDimension;
00913 int numPatches = patchMap->numPatches();
00914
00915 pencilActive = new char[xBlocks*yBlocks];
00916 for ( int i=0; i<xBlocks; ++i ) {
00917 for ( int j=0; j<yBlocks; ++j ) {
00918 pencilActive[i*yBlocks+j] = 0;
00919 }
00920 }
00921
00922 for ( int pid=0; pid < numPatches; ++pid ) {
00923 int pnode = patchMap->node(pid);
00924 if ( pnode != CkMyPe() ) continue;
00925
00926 BigReal minx = patchMap->min_a(pid);
00927 BigReal maxx = patchMap->max_a(pid);
00928 BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
00929
00930 int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
00931 int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
00932
00933 BigReal miny = patchMap->min_b(pid);
00934 BigReal maxy = patchMap->max_b(pid);
00935 BigReal marginb = 0.5 * ( patchdim - cutoff ) / sysdimb;
00936
00937 int min2 = ((int) floor(myGrid.K2 * (miny - marginb))) - myGrid.order + 1;
00938 int max2 = ((int) floor(myGrid.K2 * (maxy + marginb)));
00939
00940 for ( int i=min1; i<=max1; ++i ) {
00941 int ix = i;
00942 while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
00943 while ( ix < 0 ) ix += myGrid.K1;
00944 for ( int j=min2; j<=max2; ++j ) {
00945 int jy = j;
00946 while ( jy >= myGrid.K2 ) jy -= myGrid.K2;
00947 while ( jy < 0 ) jy += myGrid.K2;
00948 pencilActive[(ix / myGrid.block1)*yBlocks + (jy / myGrid.block2)] = 1;
00949 }
00950 }
00951 }
00952
00953 numPencilsActive = 0;
00954 for ( int i=0; i<xBlocks; ++i ) {
00955 for ( int j=0; j<yBlocks; ++j ) {
00956 if ( pencilActive[i*yBlocks+j] ) {
00957 ++numPencilsActive;
00958 zPencil(i,j,0).dummyRecvGrid(CkMyPe(),0);
00959 }
00960 }
00961 }
00962
00963
00964
00965
00966 ungrid_count = numPencilsActive;
00967 }
00968
00969
00970 void ComputePmeMgr::activate_pencils(CkQdMsg *msg) {
00971 if ( ! usePencils ) return;
00972 if ( CkMyPe() == 0 ) zPencil.dummyRecvGrid(CkMyPe(),1);
00973 }
00974
00975
00976 ComputePmeMgr::~ComputePmeMgr() {
00977
00978 #ifdef NAMD_FFTW
00979 if ( CmiMyRank() == 0 ) {
00980 CmiDestroyLock(fftw_plan_lock);
00981 }
00982 #endif
00983
00984 delete myKSpace;
00985 delete [] localInfo;
00986 delete [] gridPeMap;
00987 delete [] transPeMap;
00988 delete [] recipPeDest;
00989 delete [] gridPeOrder;
00990 delete [] transPeOrder;
00991 delete [] isPmeFlag;
00992 delete [] qgrid;
00993 if ( kgrid != qgrid ) delete [] kgrid;
00994 delete [] work;
00995 delete [] gridmsg_reuse;
00996 }
00997
00998 void ComputePmeMgr::sendGrid(void) {
00999 pmeCompute->sendData(numGridPes,gridPeOrder,recipPeDest,gridPeMap);
01000 }
01001
01002 void ComputePmeMgr::recvGrid(PmeGridMsg *msg) {
01003
01004 if ( grid_count == 0 ) {
01005 NAMD_bug("Message order failure in ComputePmeMgr::recvGrid\n");
01006 }
01007 if ( grid_count == numSources ) {
01008 lattice = msg->lattice;
01009 sequence = msg->sequence;
01010 }
01011
01012 int zdim = myGrid.dim3;
01013 int zlistlen = msg->zlistlen;
01014 int *zlist = msg->zlist;
01015 float *qmsg = msg->qgrid;
01016 for ( int g=0; g<numGrids; ++g ) {
01017 char *f = msg->fgrid + fgrid_len * g;
01018 float *q = qgrid + qgrid_size * g;
01019 for ( int i=0; i<fgrid_len; ++i ) {
01020 if ( f[i] ) {
01021 for ( int k=0; k<zlistlen; ++k ) {
01022 q[zlist[k]] += *(qmsg++);
01023 }
01024 }
01025 q += zdim;
01026 }
01027 }
01028
01029 gridmsg_reuse[numSources-grid_count] = msg;
01030 --grid_count;
01031
01032 if ( grid_count == 0 ) {
01033 #if CHARM_VERSION > 050402
01034 pmeProxyDir[CkMyPe()].gridCalc1();
01035 if ( useBarrier ) pmeProxyDir[0].sendTransBarrier();
01036 #else
01037 pmeProxyDir.gridCalc1(CkMyPe());
01038 if ( useBarrier ) pmeProxyDir.sendTransBarrier(0);
01039 #endif
01040 }
01041 }
01042
01043 void ComputePmeMgr::gridCalc1(void) {
01044
01045
01046 #ifdef NAMD_FFTW
01047 for ( int g=0; g<numGrids; ++g ) {
01048 rfftwnd_real_to_complex(forward_plan_yz, localInfo[myGridPe].nx,
01049 qgrid + qgrid_size * g, 1, myGrid.dim2 * myGrid.dim3, 0, 0, 0);
01050 }
01051 #endif
01052
01053 #if CHARM_VERSION > 050402
01054 if ( ! useBarrier ) pmeProxyDir[CkMyPe()].sendTrans();
01055 #else
01056 if ( ! useBarrier ) pmeProxyDir.sendTrans(CkMyPe());
01057 #endif
01058 }
01059
01060 void ComputePmeMgr::sendTransBarrier(void) {
01061 sendTransBarrier_received += 1;
01062
01063 if ( sendTransBarrier_received < numGridPes ) return;
01064 sendTransBarrier_received = 0;
01065 for ( int i=0; i<numGridPes; ++i ) {
01066 #if CHARM_VERSION > 050402
01067 pmeProxyDir[gridPeMap[i]].sendTrans();
01068 #else
01069 pmeProxyDir.sendTrans(gridPeMap[i]);
01070 #endif
01071 }
01072 }
01073
01074 void ComputePmeMgr::sendTrans(void) {
01075
01076
01077
01078 int zdim = myGrid.dim3;
01079 int nx = localInfo[myGridPe].nx;
01080 int x_start = localInfo[myGridPe].x_start;
01081 int slicelen = myGrid.K2 * zdim;
01082
01083 #ifdef USE_COMM_LIB
01084 ComlibInstanceHandle cinst1 = CkGetComlibInstance(0);
01085 cinst1.beginIteration();
01086 #endif
01087
01088 #if CMK_BLUEGENEL
01089 CmiNetworkProgressAfter (0);
01090 #endif
01091
01092 for (int j=0; j<numTransPes; j++) {
01093 int pe = transPeOrder[j];
01094 LocalPmeInfo &li = localInfo[pe];
01095 int cpylen = li.ny_after_transpose * zdim;
01096 PmeTransMsg *newmsg = new (nx * cpylen * numGrids,
01097 PRIORITY_SIZE) PmeTransMsg;
01098 newmsg->sourceNode = myGridPe;
01099 newmsg->lattice = lattice;
01100 newmsg->x_start = x_start;
01101 newmsg->nx = nx;
01102 for ( int g=0; g<numGrids; ++g ) {
01103 float *q = qgrid + qgrid_size * g + li.y_start_after_transpose * zdim;
01104 float *qmsg = newmsg->qgrid + nx * cpylen * g;
01105 for ( int x = 0; x < nx; ++x ) {
01106 CmiMemcpy((void*)qmsg, (void*)q, cpylen*sizeof(float));
01107 q += slicelen;
01108 qmsg += cpylen;
01109 }
01110 }
01111 newmsg->sequence = sequence;
01112 SET_PRIORITY(newmsg,sequence,PME_TRANS_PRIORITY)
01113 #if CHARM_VERSION > 050402
01114 pmeProxy[transPeMap[pe]].recvTrans(newmsg);
01115 #else
01116 pmeProxy.recvTrans(newmsg,transPeMap[pe]);
01117 #endif
01118 }
01119
01120 untrans_count = numTransPes;
01121
01122 #ifdef USE_COMM_LIB
01123 cinst1.endIteration();
01124 #endif
01125 }
01126
01127 void ComputePmeMgr::recvTrans(PmeTransMsg *msg) {
01128
01129 if ( trans_count == numGridPes ) {
01130 lattice = msg->lattice;
01131 sequence = msg->sequence;
01132 }
01133
01134 int zdim = myGrid.dim3;
01135
01136 int ny = localInfo[myTransPe].ny_after_transpose;
01137 int x_start = msg->x_start;
01138 int nx = msg->nx;
01139 for ( int g=0; g<numGrids; ++g ) {
01140 CmiMemcpy((void*)(kgrid + qgrid_size * g + x_start*ny*zdim),
01141 (void*)(msg->qgrid + nx*ny*zdim*g), nx*ny*zdim*sizeof(float));
01142 }
01143
01144 delete msg;
01145 --trans_count;
01146
01147 if ( trans_count == 0 ) {
01148 #if CHARM_VERSION > 050402
01149 pmeProxyDir[CkMyPe()].gridCalc2();
01150 #else
01151 pmeProxyDir.gridCalc2(CkMyPe());
01152 #endif
01153 }
01154 }
01155
01156 void ComputePmeMgr::gridCalc2(void) {
01157
01158
01159 #if CMK_BLUEGENEL
01160 CmiNetworkProgressAfter (0);
01161 #endif
01162
01163 int zdim = myGrid.dim3;
01164
01165 int ny = localInfo[myTransPe].ny_after_transpose;
01166
01167 for ( int g=0; g<numGrids; ++g ) {
01168
01169 #ifdef NAMD_FFTW
01170 fftw(forward_plan_x, ny * zdim / 2, (fftw_complex *)(kgrid+qgrid_size*g),
01171 ny * zdim / 2, 1, work, 1, 0);
01172 #endif
01173
01174
01175 BigReal ewaldcof = ComputeNonbondedUtil::ewaldcof;
01176 recip_evir2[g][0] = myKSpace->compute_energy(kgrid+qgrid_size*g,
01177 lattice, ewaldcof, &(recip_evir2[g][1]));
01178
01179
01180
01181 #ifdef NAMD_FFTW
01182 fftw(backward_plan_x, ny * zdim / 2, (fftw_complex *)(kgrid+qgrid_size*g),
01183 ny * zdim / 2, 1, work, 1, 0);
01184 #endif
01185 }
01186
01187 #if CHARM_VERSION > 050402
01188 pmeProxyDir[CkMyPe()].sendUntrans();
01189 #else
01190 pmeProxyDir.sendUntrans(CkMyPe());
01191 #endif
01192 }
01193
01194 void ComputePmeMgr::sendUntrans(void) {
01195
01196 int zdim = myGrid.dim3;
01197 int y_start = localInfo[myTransPe].y_start_after_transpose;
01198 int ny = localInfo[myTransPe].ny_after_transpose;
01199
01200 #ifdef USE_COMM_LIB
01201 ComlibInstanceHandle cinst2 = CkGetComlibInstance(1);
01202 cinst2.beginIteration();
01203 #endif
01204
01205 #if CMK_BLUEGENEL
01206 CmiNetworkProgressAfter (0);
01207 #endif
01208
01209
01210 for (int j=0; j<numGridPes; j++) {
01211 int pe = gridPeOrder[j];
01212 LocalPmeInfo &li = localInfo[pe];
01213 int x_start =li.x_start;
01214 int nx = li.nx;
01215 PmeUntransMsg *newmsg = new (nx*ny*zdim*numGrids,numGrids,
01216 PRIORITY_SIZE) PmeUntransMsg;
01217 newmsg->sourceNode = myTransPe;
01218 newmsg->y_start = y_start;
01219 newmsg->ny = ny;
01220 for ( int g=0; g<numGrids; ++g ) {
01221 if ( j == 0 ) {
01222 newmsg->evir[g] = recip_evir2[g];
01223 } else {
01224 newmsg->evir[g] = 0.;
01225 }
01226 CmiMemcpy((void*)(newmsg->qgrid+nx*ny*zdim*g),
01227 (void*)(kgrid + qgrid_size*g + x_start*ny*zdim),
01228 nx*ny*zdim*sizeof(float));
01229 }
01230 SET_PRIORITY(newmsg,sequence,PME_UNTRANS_PRIORITY)
01231 #if CHARM_VERSION > 050402
01232 pmeProxy[gridPeMap[pe]].recvUntrans(newmsg);
01233 #else
01234 pmeProxy.recvUntrans(newmsg,gridPeMap[pe]);
01235 #endif
01236 }
01237
01238 #ifdef USE_COMM_LIB
01239 cinst2.endIteration();
01240 #endif
01241
01242 trans_count = numGridPes;
01243 }
01244
01245 void ComputePmeMgr::recvUntrans(PmeUntransMsg *msg) {
01246
01247 if ( untrans_count == numTransPes ) {
01248 for ( int g=0; g<numGrids; ++g ) {
01249 recip_evir[g] = 0.;
01250 }
01251 }
01252
01253 #if CMK_BLUEGENEL
01254 CmiNetworkProgressAfter (0);
01255 #endif
01256
01257 int g;
01258 for ( g=0; g<numGrids; ++g ) {
01259 recip_evir[g] += msg->evir[g];
01260 }
01261
01262 int zdim = myGrid.dim3;
01263
01264 int nx = localInfo[myGridPe].nx;
01265 int y_start = msg->y_start;
01266 int ny = msg->ny;
01267 int slicelen = myGrid.K2 * zdim;
01268 int cpylen = ny * zdim;
01269 for ( g=0; g<numGrids; ++g ) {
01270 float *q = qgrid + qgrid_size * g + y_start * zdim;
01271 float *qmsg = msg->qgrid + nx * cpylen * g;
01272 for ( int x = 0; x < nx; ++x ) {
01273 CmiMemcpy((void*)q, (void*)qmsg, cpylen*sizeof(float));
01274 q += slicelen;
01275 qmsg += cpylen;
01276 }
01277 }
01278
01279 delete msg;
01280 --untrans_count;
01281
01282 if ( untrans_count == 0 ) {
01283 #if CHARM_VERSION > 050402
01284 pmeProxyDir[CkMyPe()].gridCalc3();
01285 #else
01286 pmeProxyDir.gridCalc3(CkMyPe());
01287 #endif
01288 }
01289 }
01290
01291 void ComputePmeMgr::gridCalc3(void) {
01292
01293
01294
01295 #ifdef NAMD_FFTW
01296 for ( int g=0; g<numGrids; ++g ) {
01297 rfftwnd_complex_to_real(backward_plan_yz, localInfo[myGridPe].nx,
01298 (fftw_complex *) (qgrid + qgrid_size * g),
01299 1, myGrid.dim2 * myGrid.dim3 / 2, 0, 0, 0);
01300 }
01301 #endif
01302
01303 #if CHARM_VERSION > 050402
01304 pmeProxyDir[CkMyPe()].sendUngrid();
01305 #else
01306 pmeProxyDir.sendUngrid(CkMyPe());
01307 #endif
01308 }
01309
01310 void ComputePmeMgr::sendUngrid(void) {
01311
01312 for ( int j=0; j<numSources; ++j ) {
01313
01314 PmeGridMsg *newmsg = gridmsg_reuse[j];
01315 int pe = newmsg->sourceNode;
01316 if ( j == 0 ) {
01317 for ( int g=0; g<numGrids; ++g ) {
01318 newmsg->evir[g] = recip_evir[g];
01319 }
01320 } else {
01321 for ( int g=0; g<numGrids; ++g ) {
01322 newmsg->evir[g] = 0.;
01323 }
01324 }
01325 int zdim = myGrid.dim3;
01326 int flen = newmsg->len;
01327 int fstart = newmsg->start;
01328 int zlistlen = newmsg->zlistlen;
01329 int *zlist = newmsg->zlist;
01330 float *qmsg = newmsg->qgrid;
01331 for ( int g=0; g<numGrids; ++g ) {
01332 char *f = newmsg->fgrid + fgrid_len * g;
01333 float *q = qgrid + qgrid_size * g + (fstart-fgrid_start) * zdim;
01334 for ( int i=0; i<flen; ++i ) {
01335 if ( f[i] ) {
01336 for ( int k=0; k<zlistlen; ++k ) {
01337 *(qmsg++) = q[zlist[k]];
01338 }
01339 }
01340 q += zdim;
01341 }
01342 }
01343 newmsg->sourceNode = myGridPe;
01344
01345 SET_PRIORITY(newmsg,sequence,PME_UNGRID_PRIORITY)
01346 #if CHARM_VERSION > 050402
01347 pmeProxyDir[pe].recvUngrid(newmsg);
01348 #else
01349 pmeProxyDir.recvUngrid(newmsg,pe);
01350 #endif
01351 }
01352 grid_count = numSources;
01353 memset( (void*) qgrid, 0, qgrid_size * numGrids * sizeof(float) );
01354 }
01355
01356 void ComputePmeMgr::recvUngrid(PmeGridMsg *msg) {
01357
01358 if ( ungrid_count == 0 ) {
01359 NAMD_bug("Message order failure in ComputePmeMgr::recvUngrid\n");
01360 }
01361
01362 if ( usePencils ) pmeCompute->copyPencils(msg);
01363 else pmeCompute->copyResults(msg);
01364 delete msg;
01365 --ungrid_count;
01366
01367 if ( ungrid_count == 0 ) {
01368 #if CHARM_VERSION > 050402
01369 pmeProxyDir[CkMyPe()].ungridCalc();
01370 #else
01371 pmeProxyDir.ungridCalc(CkMyPe());
01372 #endif
01373 }
01374 }
01375
01376 void ComputePmeMgr::ungridCalc(void) {
01377
01378
01379 pmeCompute->ungridForces();
01380
01381 ungrid_count = (usePencils ? numPencilsActive : numDestRecipPes );
01382 }
01383
01384
01385 static void scale_coordinates(PmeParticle p[], int N, Lattice lattice, PmeGrid grid) {
01386 Vector origin = lattice.origin();
01387 Vector recip1 = lattice.a_r();
01388 Vector recip2 = lattice.b_r();
01389 Vector recip3 = lattice.c_r();
01390 double ox = origin.x;
01391 double oy = origin.y;
01392 double oz = origin.z;
01393 double r1x = recip1.x;
01394 double r1y = recip1.y;
01395 double r1z = recip1.z;
01396 double r2x = recip2.x;
01397 double r2y = recip2.y;
01398 double r2z = recip2.z;
01399 double r3x = recip3.x;
01400 double r3y = recip3.y;
01401 double r3z = recip3.z;
01402 int K1 = grid.K1;
01403 int K2 = grid.K2;
01404 int K3 = grid.K3;
01405
01406 for (int i=0; i<N; i++) {
01407 double px = p[i].x - ox;
01408 double py = p[i].y - oy;
01409 double pz = p[i].z - oz;
01410 double sx = px*r1x + py*r1y + pz*r1z;
01411 double sy = px*r2x + py*r2y + pz*r2z;
01412 double sz = px*r3x + py*r3y + pz*r3z;
01413 p[i].x = K1 * ( sx - floor(sx) );
01414 p[i].y = K2 * ( sy - floor(sy) );
01415 p[i].z = K3 * ( sz - floor(sz) );
01416
01417
01418 if ( p[i].x == K1 ) p[i].x = 0;
01419 if ( p[i].y == K2 ) p[i].y = 0;
01420 if ( p[i].z == K3 ) p[i].z = 0;
01421 }
01422 }
01423
01424 static void scale_forces(Vector f[], int N, Lattice lattice) {
01425 Vector recip1 = lattice.a_r();
01426 Vector recip2 = lattice.b_r();
01427 Vector recip3 = lattice.c_r();
01428 double r1x = recip1.x;
01429 double r1y = recip1.y;
01430 double r1z = recip1.z;
01431 double r2x = recip2.x;
01432 double r2y = recip2.y;
01433 double r2z = recip2.z;
01434 double r3x = recip3.x;
01435 double r3y = recip3.y;
01436 double r3z = recip3.z;
01437
01438 for (int i=0; i<N; i++) {
01439 double f1 = f[i].x;
01440 double f2 = f[i].y;
01441 double f3 = f[i].z;
01442 f[i].x = f1*r1x + f2*r2x + f3*r3x;
01443 f[i].y = f1*r1y + f2*r2y + f3*r3y;
01444 f[i].z = f1*r1z + f2*r2z + f3*r3z;
01445 }
01446 }
01447
01448
01449 ComputePme::ComputePme(ComputeID c) :
01450 ComputeHomePatches(c)
01451 {
01452 DebugM(4,"ComputePme created.\n");
01453
01454 CProxy_ComputePmeMgr::ckLocalBranch(
01455 CkpvAccess(BOCclass_group).computePmeMgr)->setCompute(this);
01456
01457 useAvgPositions = 1;
01458
01459 reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
01460
01461 SimParameters *simParams = Node::Object()->simParameters;
01462
01463 fepOn = simParams->fepOn;
01464 thermInt = simParams->thermInt;
01465 decouple = (fepOn || thermInt) && (simParams->decouple);
01466 fepElecLambdaStart = (fepOn || thermInt) ? simParams->fepElecLambdaStart : 0;
01467
01468 if (fepOn || thermInt) {
01469 numGrids = 2;
01470 if (decouple) numGrids += 2;
01471 if (fepElecLambdaStart || thermInt) numGrids ++;
01472 }
01473 else numGrids = 1;
01474 lesOn = simParams->lesOn;
01475 if ( lesOn ) {
01476 lesFactor = simParams->lesFactor;
01477 numGrids = lesFactor;
01478 }
01479 selfOn = 0;
01480 pairOn = simParams->pairInteractionOn;
01481 if ( pairOn ) {
01482 selfOn = simParams->pairInteractionSelf;
01483 if ( selfOn ) pairOn = 0;
01484 numGrids = selfOn ? 1 : 3;
01485 }
01486
01487 myGrid.K1 = simParams->PMEGridSizeX;
01488 myGrid.K2 = simParams->PMEGridSizeY;
01489 myGrid.K3 = simParams->PMEGridSizeZ;
01490 myGrid.order = simParams->PMEInterpOrder;
01491 myGrid.dim2 = myGrid.K2;
01492 myGrid.dim3 = 2 * (myGrid.K3/2 + 1);
01493 qsize = myGrid.K1 * myGrid.dim2 * myGrid.dim3;
01494 fsize = myGrid.K1 * myGrid.dim2;
01495 q_arr = new double*[fsize*numGrids];
01496 memset( (void*) q_arr, 0, fsize*numGrids * sizeof(double*) );
01497 f_arr = new char[fsize*numGrids];
01498 fz_arr = new char[myGrid.K3];
01499 }
01500
01501 ComputePme::~ComputePme()
01502 {
01503 for (int i=0; i<fsize*numGrids; ++i) {
01504 if ( q_arr[i] ) {
01505 delete [] q_arr[i];
01506 }
01507 }
01508 delete [] q_arr;
01509 delete [] f_arr;
01510 delete [] fz_arr;
01511 }
01512
01513 void ComputePme::doWork()
01514 {
01515 DebugM(4,"Entering ComputePme::doWork().\n");
01516
01517 #ifdef TRACE_COMPUTE_OBJECTS
01518 double traceObjStartTime = CmiWallTimer();
01519 #endif
01520
01521 ResizeArrayIter<PatchElem> ap(patchList);
01522
01523
01524 if ( ! patchList[0].p->flags.doFullElectrostatics )
01525 {
01526 for (ap = ap.begin(); ap != ap.end(); ap++) {
01527 CompAtom *x = (*ap).positionBox->open();
01528 Results *r = (*ap).forceBox->open();
01529 (*ap).positionBox->close(&x);
01530 (*ap).forceBox->close(&r);
01531 }
01532 reduction->submit();
01533 return;
01534 }
01535
01536
01537 numLocalAtoms = 0;
01538 for (ap = ap.begin(); ap != ap.end(); ap++) {
01539 numLocalAtoms += (*ap).p->getNumAtoms();
01540 }
01541
01542 Lattice lattice = patchList[0].p->flags.lattice;
01543
01544 localData = new PmeParticle[numLocalAtoms*(numGrids+
01545 ((numGrids>1 || selfOn)?1:0))];
01546 localPartition = new char[numLocalAtoms];
01547
01548 int g;
01549 for ( g=0; g<numGrids; ++g ) {
01550 localGridData[g] = localData + numLocalAtoms*(g+1);
01551 }
01552
01553
01554 PmeParticle * data_ptr = localData;
01555 char * part_ptr = localPartition;
01556 const BigReal coloumb_sqrt = sqrt( COLOUMB * ComputeNonbondedUtil::scaling
01557 * ComputeNonbondedUtil::dielectric_1 );
01558
01559 for (ap = ap.begin(); ap != ap.end(); ap++) {
01560 #ifdef NETWORK_PROGRESS
01561 CmiNetworkProgress();
01562 #endif
01563
01564 CompAtom *x = (*ap).positionBox->open();
01565 if ( patchList[0].p->flags.doMolly ) {
01566 (*ap).positionBox->close(&x);
01567 x = (*ap).avgPositionBox->open();
01568 }
01569 int numAtoms = (*ap).p->getNumAtoms();
01570
01571 for(int i=0; i<numAtoms; ++i)
01572 {
01573 data_ptr->x = x[i].position.x;
01574 data_ptr->y = x[i].position.y;
01575 data_ptr->z = x[i].position.z;
01576 data_ptr->cg = coloumb_sqrt * x[i].charge;
01577 ++data_ptr;
01578 *part_ptr = x[i].partition;
01579 ++part_ptr;
01580 }
01581
01582 if ( patchList[0].p->flags.doMolly ) { (*ap).avgPositionBox->close(&x); }
01583 else { (*ap).positionBox->close(&x); }
01584 }
01585
01586
01587 if ( ((fepOn || thermInt) && (!decouple)) || lesOn ) {
01588 for ( g=0; g<numGrids; ++g ) {
01589 #ifdef NETWORK_PROGRESS
01590 CmiNetworkProgress();
01591 #endif
01592
01593 PmeParticle *lgd = localGridData[g];
01594 int nga = 0;
01595 for(int i=0; i<numLocalAtoms; ++i) {
01596 if ( localPartition[i] == 0 || localPartition[i] == (g+1) ) {
01597
01598
01599
01600 lgd[nga++] = localData[i];
01601 }
01602 }
0160