00001
00007 #ifdef NAMD_FFTW
00008 #ifdef NAMD_FFTW_NO_TYPE_PREFIX
00009 #include <fftw.h>
00010 #include <rfftw.h>
00011 #else
00012 #include <sfftw.h>
00013 #include <srfftw.h>
00014 #endif
00015 #endif
00016
00017 #include "InfoStream.h"
00018 #include "Node.h"
00019 #include "PatchMap.h"
00020 #include "PatchMap.inl"
00021 #include "AtomMap.h"
00022 #include "ComputePme.h"
00023 #include "ComputePmeMgr.decl.h"
00024 #include "PmeRealSpace.h"
00025 #include "PmeKSpace.h"
00026 #include "ComputeNonbondedUtil.h"
00027 #include "PatchMgr.h"
00028 #include "Molecule.h"
00029 #include "ReductionMgr.h"
00030 #include "ComputeMgr.h"
00031 #include "ComputeMgr.decl.h"
00032
00033 #define MIN_DEBUG_LEVEL 3
00034 #include "Debug.h"
00035 #include "SimParameters.h"
00036 #include "WorkDistrib.h"
00037 #include "varsizemsg.h"
00038 #include "Random.h"
00039 #include "Priorities.h"
00040
00041
00042
00043 #ifdef USE_COMM_LIB
00044 #include "EachToManyMulticastStrategy.h"
00045 #endif
00046
00047 #ifndef SQRT_PI
00048 #define SQRT_PI 1.7724538509055160273
00049 #endif
00050
00051 char *pencilPMEProcessors;
00052
00053
00054 class PmeGridMsg : public CMessage_PmeGridMsg {
00055 public:
00056
00057 int sourceNode;
00058 int sequence;
00059 int hasData;
00060 Lattice lattice;
00061 PmeReduction *evir;
00062 int start;
00063 int len;
00064 int zlistlen;
00065 int *zlist;
00066 char *fgrid;
00067 float *qgrid;
00068 };
00069
00070 class PmeTransMsg : public CMessage_PmeTransMsg {
00071 public:
00072
00073 int sourceNode;
00074 int sequence;
00075 int hasData;
00076 Lattice lattice;
00077 int x_start;
00078 int nx;
00079 float *qgrid;
00080 };
00081
00082
00083 class PmeUntransMsg : public CMessage_PmeUntransMsg {
00084 public:
00085
00086 int sourceNode;
00087 int has_evir;
00088 PmeReduction *evir;
00089 int y_start;
00090 int ny;
00091 float *qgrid;
00092
00093 };
00094
00095
00096
00097 struct PmePencilInitMsgData {
00098 PmeGrid grid;
00099 int xBlocks, yBlocks, zBlocks;
00100 CProxy_PmeXPencil xPencil;
00101 CProxy_PmeYPencil yPencil;
00102 CProxy_PmeZPencil zPencil;
00103 CProxy_ComputePmeMgr pmeProxy;
00104 };
00105
00106 class PmePencilInitMsg : public CMessage_PmePencilInitMsg {
00107 public:
00108 PmePencilInitMsg(PmePencilInitMsgData &d) { data = d; }
00109 PmePencilInitMsgData data;
00110 };
00111
00112
00113 struct LocalPmeInfo {
00114 int nx, x_start;
00115 int ny_after_transpose, y_start_after_transpose;
00116 };
00117
00118
00119
00120 void generatePmePeList(int *peMap, int numPes){
00121
00122 int i;
00123 int ncpus = CkNumPes();
00124
00125
00126 int npow2 = 1; int nbits = 0;
00127 while ( npow2 < ncpus ) { npow2 *= 2; nbits += 1; }
00128
00129
00130 SortableResizeArray<int> seq(ncpus);
00131 i = 0;
00132 for ( int icpu=0; icpu<ncpus; ++icpu ) {
00133 int ri;
00134 for ( ri = ncpus; ri >= ncpus; ++i ) {
00135 ri = 0;
00136 int pow2 = 1;
00137 int rpow2 = npow2 / 2;
00138 for ( int j=0; j<nbits; ++j ) {
00139 ri += rpow2 * ( ( i / pow2 ) % 2 );
00140 pow2 *= 2; rpow2 /= 2;
00141 }
00142 }
00143 seq[icpu] = ri;
00144 }
00145
00146
00147 for ( i=0; i<numPes; ++i ) {
00148 seq[i] = seq[ncpus - numPes + i];
00149 }
00150 seq.resize(numPes);
00151 seq.sort();
00152
00153 for ( i=0; i<numPes; ++i )
00154 peMap[i] = seq[i];
00155
00156
00157 }
00158
00159
00160 void generatePmePeList2(int *gridPeMap, int numGridPes, int *transPeMap, int numTransPes){
00161
00162 int i;
00163 int ncpus = CkNumPes();
00164
00165
00166 int npow2 = 1; int nbits = 0;
00167 while ( npow2 < ncpus ) { npow2 *= 2; nbits += 1; }
00168
00169
00170 SortableResizeArray<int> seq(ncpus);
00171 SortableResizeArray<int> seq2(ncpus);
00172 i = 0;
00173 for ( int icpu=0; icpu<ncpus; ++icpu ) {
00174 int ri;
00175 for ( ri = ncpus; ri >= ncpus; ++i ) {
00176 ri = 0;
00177 int pow2 = 1;
00178 int rpow2 = npow2 / 2;
00179 for ( int j=0; j<nbits; ++j ) {
00180 ri += rpow2 * ( ( i / pow2 ) % 2 );
00181 pow2 *= 2; rpow2 /= 2;
00182 }
00183 }
00184 seq[icpu] = ri;
00185 seq2[icpu] = ri;
00186 }
00187
00188
00189 for ( i=0; i<numGridPes; ++i ) {
00190 seq[i] = seq[ncpus - numGridPes + i];
00191 }
00192 seq.resize(numGridPes);
00193 seq.sort();
00194 int firstTransPe = ncpus - numGridPes - numTransPes;
00195 if ( firstTransPe < 0 ) {
00196 firstTransPe = 0;
00197
00198 if ( ncpus > numTransPes ) firstTransPe = 1;
00199 }
00200 for ( i=0; i<numTransPes; ++i ) {
00201 seq2[i] = seq2[firstTransPe + i];
00202 }
00203 seq2.resize(numTransPes);
00204 seq2.sort();
00205
00206 for ( i=0; i<numGridPes; ++i )
00207 gridPeMap[i] = seq[i];
00208
00209 for ( i=0; i<numTransPes; ++i )
00210 transPeMap[i] = seq2[i];
00211 }
00212
00213 #if USE_TOPOMAP
00214
00215 bool generateBGLORBPmePeList(int *pemap, int numPes, int *block_pes=0,
00216 int nbpes=0);
00217 #endif
00218
00219 class ComputePmeMgr : public BOCclass {
00220 public:
00221 friend class ComputePme;
00222 ComputePmeMgr();
00223 ~ComputePmeMgr();
00224
00225 void initialize(CkQdMsg*);
00226 void initialize_pencils(CkQdMsg*);
00227 void activate_pencils(CkQdMsg*);
00228 void recvArrays(CProxy_PmeXPencil, CProxy_PmeYPencil, CProxy_PmeZPencil);
00229
00230 void sendGrid(void);
00231 void recvGrid(PmeGridMsg *);
00232 void gridCalc1(void);
00233 void sendTransBarrier(void);
00234 void sendTrans(void);
00235 void recvTrans(PmeTransMsg *);
00236 void gridCalc2(void);
00237 void sendUntrans(void);
00238 void recvUntrans(PmeUntransMsg *);
00239 void gridCalc3(void);
00240 void sendUngrid(void);
00241 void recvUngrid(PmeGridMsg *);
00242 void ungridCalc(void);
00243
00244 void setCompute(ComputePme *c) { pmeCompute = c; c->setMgr(this); }
00245
00246
00247 int isPmeProcessor(int p);
00248
00249 #ifdef NAMD_FFTW
00250 static CmiNodeLock fftw_plan_lock;
00251 #endif
00252
00253 private:
00254 CProxy_ComputePmeMgr pmeProxy;
00255 CProxy_ComputePmeMgr pmeProxyDir;
00256 ComputePme *pmeCompute;
00257 PmeGrid myGrid;
00258 Lattice lattice;
00259 PmeKSpace *myKSpace;
00260 float *qgrid;
00261 float *kgrid;
00262
00263 #ifdef NAMD_FFTW
00264 fftw_plan forward_plan_x, backward_plan_x;
00265 rfftwnd_plan forward_plan_yz, backward_plan_yz;
00266 fftw_complex *work;
00267 #else
00268 float *work;
00269 #endif
00270
00271 int alchFepOn, alchThermIntOn, lesOn, lesFactor, pairOn, selfOn, numGrids;
00272 int alchDecouple;
00273 BigReal alchElecLambdaStart;
00274 BigReal elecLambdaUp;
00275 BigReal elecLambdaDown;
00276
00277
00278 LocalPmeInfo *localInfo;
00279 int qgrid_size;
00280 int qgrid_start;
00281 int qgrid_len;
00282 int fgrid_start;
00283 int fgrid_len;
00284
00285 int numSources;
00286 int numGridPes;
00287 int numTransPes;
00288 int numDestRecipPes;
00289 int myGridPe;
00290 int myTransPe;
00291 int *gridPeMap;
00292 int *transPeMap;
00293 int *recipPeDest;
00294 int *gridPeOrder;
00295 int *transPeOrder;
00296 char *isPmeFlag;
00297 int grid_count;
00298 int trans_count;
00299 int untrans_count;
00300 int ungrid_count;
00301 PmeGridMsg **gridmsg_reuse;
00302 PmeReduction recip_evir[PME_MAX_EVALS];
00303 PmeReduction recip_evir2[PME_MAX_EVALS];
00304
00305 int sequence;
00306 int useBarrier;
00307 int sendTransBarrier_received;
00308
00309 int usePencils;
00310 int xBlocks, yBlocks, zBlocks;
00311 CProxy_PmeXPencil xPencil;
00312 CProxy_PmeYPencil yPencil;
00313 CProxy_PmeZPencil zPencil;
00314 char *pencilActive;
00315 int numPencilsActive;
00316 };
00317
00318 #ifdef NAMD_FFTW
00319 CmiNodeLock ComputePmeMgr::fftw_plan_lock;
00320 #endif
00321
00322 int isPmeProcessor(int p){
00323 if (pencilPMEProcessors)
00324 return pencilPMEProcessors[p];
00325 else
00326 return CProxy_ComputePmeMgr::ckLocalBranch(CkpvAccess(BOCclass_group).computePmeMgr)->isPmeProcessor(p);
00327 }
00328
00329 int ComputePmeMgr::isPmeProcessor(int p){
00330 return ( usePencils ? pencilPMEProcessors[p] : isPmeFlag[p] );
00331 }
00332
00333 ComputePmeMgr::ComputePmeMgr() : pmeProxy(thisgroup),
00334 pmeProxyDir(thisgroup), pmeCompute(0) {
00335
00336 CkpvAccess(BOCclass_group).computePmeMgr = thisgroup;
00337
00338 #ifdef USE_COMM_LIB
00339 ComlibDelegateProxy(&pmeProxy);
00340 #endif
00341
00342 #ifdef NAMD_FFTW
00343 if ( CmiMyRank() == 0 ) {
00344 fftw_plan_lock = CmiCreateLock();
00345 }
00346 #endif
00347
00348 myKSpace = 0;
00349 kgrid = 0;
00350 work = 0;
00351 grid_count = 0;
00352 trans_count = 0;
00353 untrans_count = 0;
00354 ungrid_count = 0;
00355 gridmsg_reuse= new PmeGridMsg*[CkNumPes()];
00356 useBarrier = 0;
00357 sendTransBarrier_received = 0;
00358 usePencils = 0;
00359 }
00360
00361
00362 void ComputePmeMgr::recvArrays(
00363 CProxy_PmeXPencil x, CProxy_PmeYPencil y, CProxy_PmeZPencil z) {
00364 xPencil = x; yPencil = y; zPencil = z;
00365 }
00366
00367
00368 void ComputePmeMgr::initialize(CkQdMsg *msg) {
00369 delete msg;
00370
00371 localInfo = new LocalPmeInfo[CkNumPes()];
00372 gridPeMap = new int[CkNumPes()];
00373 transPeMap = new int[CkNumPes()];
00374 recipPeDest = new int[CkNumPes()];
00375 gridPeOrder = new int[CkNumPes()];
00376 transPeOrder = new int[CkNumPes()];
00377 isPmeFlag = new char[CkNumPes()];
00378
00379 SimParameters *simParams = Node::Object()->simParameters;
00380 PatchMap *patchMap = PatchMap::Object();
00381
00382 alchFepOn = simParams->alchFepOn;
00383 alchThermIntOn = simParams->alchThermIntOn;
00384 alchDecouple = (alchFepOn || alchThermIntOn) && (simParams->alchDecouple);
00385 alchElecLambdaStart = (alchFepOn || alchThermIntOn) ?
00386 simParams->alchElecLambdaStart : 0;
00387 if (alchFepOn || alchThermIntOn) {
00388 numGrids = 2;
00389 if (alchDecouple) numGrids += 2;
00390 if (alchElecLambdaStart || alchThermIntOn) numGrids ++;
00391 }
00392 else numGrids = 1;
00393 lesOn = simParams->lesOn;
00394 useBarrier = simParams->PMEBarrier;
00395 if ( lesOn ) {
00396 lesFactor = simParams->lesFactor;
00397 numGrids = lesFactor;
00398 }
00399 selfOn = 0;
00400 pairOn = simParams->pairInteractionOn;
00401 if ( pairOn ) {
00402 selfOn = simParams->pairInteractionSelf;
00403 if ( selfOn ) pairOn = 0;
00404 numGrids = selfOn ? 1 : 3;
00405 }
00406
00407 if ( numGrids != 1 || simParams->PMEPencils == 0 ) usePencils = 0;
00408 else if ( simParams->PMEPencils > 0 ) usePencils = 1;
00409 else {
00410 int dimx = simParams->PMEGridSizeX;
00411 int dimy = simParams->PMEGridSizeY;
00412 int maxslabs = 1 + (dimx - 1) / simParams->PMEMinSlices;
00413 if ( maxslabs > CkNumPes() ) maxslabs = CkNumPes();
00414 int maxpencils = ( simParams->PMEGridSizeX * simParams->PMEGridSizeY
00415 * simParams->PMEGridSizeZ ) / simParams->PMEMinPoints;
00416 if ( maxpencils > CkNumPes() ) maxpencils = CkNumPes();
00417 if ( maxpencils > 3 * maxslabs ) usePencils = 1;
00418 else usePencils = 0;
00419 }
00420
00421 if ( usePencils ) {
00422 if ( simParams->PMEPencils > 1 ) {
00423 xBlocks = yBlocks = zBlocks = simParams->PMEPencils;
00424 } else {
00425 int nb2 = ( simParams->PMEGridSizeX * simParams->PMEGridSizeY
00426 * simParams->PMEGridSizeZ ) / simParams->PMEMinPoints;
00427 if ( nb2 > CkNumPes() ) nb2 = CkNumPes();
00428 int nb = (int) sqrt((float)nb2);
00429 xBlocks = zBlocks = nb;
00430 yBlocks = nb2 / nb;
00431 }
00432
00433 int dimx = simParams->PMEGridSizeX;
00434 int bx = 1 + ( dimx - 1 ) / xBlocks;
00435 xBlocks = 1 + ( dimx - 1 ) / bx;
00436
00437 int dimy = simParams->PMEGridSizeY;
00438 int by = 1 + ( dimy - 1 ) / yBlocks;
00439 yBlocks = 1 + ( dimy - 1 ) / by;
00440
00441 int dimz = simParams->PMEGridSizeZ / 2 + 1;
00442 int bz = 1 + ( dimz - 1 ) / zBlocks;
00443 zBlocks = 1 + ( dimz - 1 ) / bz;
00444
00445 if ( ! CkMyPe() ) {
00446 iout << iINFO << "PME using " << xBlocks << " x " <<
00447 yBlocks << " x " << zBlocks <<
00448 " pencil grid for FFT and reciprocal sum.\n" << endi;
00449 }
00450 } else {
00451
00452 {
00453
00454
00455 int minslices = simParams->PMEMinSlices;
00456 int dimx = simParams->PMEGridSizeX;
00457 int nrpx = ( dimx + minslices - 1 ) / minslices;
00458 int dimy = simParams->PMEGridSizeY;
00459 int nrpy = ( dimy + minslices - 1 ) / minslices;
00460
00461
00462 int nrpp = CkNumPes();
00463
00464 if ( nrpp < nrpx ) nrpx = nrpp;
00465 if ( nrpp < nrpy ) nrpy = nrpp;
00466
00467
00468 int nrps = simParams->PMEProcessors;
00469 if ( nrps > CkNumPes() ) nrps = CkNumPes();
00470 if ( nrps > 0 ) nrpx = nrps;
00471 if ( nrps > 0 ) nrpy = nrps;
00472
00473
00474 int bx = ( dimx + nrpx - 1 ) / nrpx;
00475 nrpx = ( dimx + bx - 1 ) / bx;
00476 int by = ( dimy + nrpy - 1 ) / nrpy;
00477 nrpy = ( dimy + by - 1 ) / by;
00478 if ( bx != ( dimx + nrpx - 1 ) / nrpx )
00479 NAMD_bug("Error in selecting number of PME processors.");
00480 if ( by != ( dimy + nrpy - 1 ) / nrpy )
00481 NAMD_bug("Error in selecting number of PME processors.");
00482
00483 numGridPes = nrpx;
00484 numTransPes = nrpy;
00485 }
00486 if ( ! CkMyPe() ) {
00487 iout << iINFO << "PME using " << numGridPes << " and " << numTransPes <<
00488 " processors for FFT and reciprocal sum.\n" << endi;
00489 }
00490 {
00491 int i;
00492 for ( i = 0; i < numGridPes; ++i ) {
00493 gridPeOrder[i] = i;
00494 }
00495 for ( i = 0; i < numTransPes; ++i ) {
00496 transPeOrder[i] = i;
00497 }
00498 Random rand(CkMyPe());
00499 rand.reorder(gridPeOrder,numGridPes);
00500 rand.reorder(transPeOrder,numTransPes);
00501 }
00502
00503 int sum_npes = numTransPes + numGridPes;
00504 int max_npes = (numTransPes > numGridPes)?numTransPes:numGridPes;
00505
00506 #if USE_TOPOMAP
00507 PatchMap * pmap = PatchMap::Object();
00508
00509 int patch_pes = pmap->numNodesWithPatches();
00510 TopoManager tmgr;
00511 if(tmgr.hasMultipleProcsPerNode())
00512 patch_pes *= 2;
00513
00514 bool done = false;
00515 #ifndef USE_COMM_LIB
00516 if(CkNumPes() > 2*sum_npes + patch_pes) {
00517 done = generateBGLORBPmePeList(transPeMap, numTransPes);
00518 done &= generateBGLORBPmePeList(gridPeMap, numGridPes, transPeMap, numTransPes);
00519 }
00520 else
00521 #endif
00522 if(CkNumPes() > 2 *max_npes + patch_pes) {
00523 done = generateBGLORBPmePeList(transPeMap, max_npes);
00524 gridPeMap = transPeMap;
00525 }
00526
00527 if (!done)
00528 #endif
00529 {
00530
00531
00532 generatePmePeList2(gridPeMap, numGridPes, transPeMap, numTransPes);
00533 }
00534
00535 #ifdef USE_COMM_LIB
00536 if(CkMyPe() == 0) {
00537 ComlibInstanceHandle cinst1 = CkCreateComlibInstance();
00538 EachToManyMulticastStrategy *strat = new
00539 EachToManyMulticastStrategy(USE_DIRECT, numGridPes,
00540 gridPeMap, numTransPes, transPeMap);
00541 cinst1.setStrategy(strat);
00542
00543 ComlibInstanceHandle cinst2 = CkCreateComlibInstance();
00544 strat = new EachToManyMulticastStrategy(USE_DIRECT, numTransPes, transPeMap
00545 , numGridPes, gridPeMap);
00546 cinst2.setStrategy(strat);
00547 ComlibDoneCreating();
00548 }
00549 #endif
00550
00551 myGridPe = -1;
00552 int i = 0;
00553 for ( i=0; i<CkNumPes(); ++i )
00554 isPmeFlag[i] = 0;
00555 for ( i=0; i<numGridPes; ++i ) {
00556 if ( gridPeMap[i] == CkMyPe() ) myGridPe = i;
00557 isPmeFlag[gridPeMap[i]] |= 1;
00558 }
00559 myTransPe = -1;
00560 for ( i=0; i<numTransPes; ++i ) {
00561 if ( transPeMap[i] == CkMyPe() ) myTransPe = i;
00562 isPmeFlag[transPeMap[i]] |= 2;
00563 }
00564
00565 if ( ! CkMyPe() ) {
00566 iout << iINFO << "PME GRID LOCATIONS:";
00567 int i;
00568 for ( i=0; i<numGridPes && i<10; ++i ) {
00569 iout << " " << gridPeMap[i];
00570 }
00571 if ( i < numGridPes ) iout << " ...";
00572 iout << "\n" << endi;
00573 iout << iINFO << "PME TRANS LOCATIONS:";
00574 for ( i=0; i<numTransPes && i<10; ++i ) {
00575 iout << " " << transPeMap[i];
00576 }
00577 if ( i < numTransPes ) iout << " ...";
00578 iout << "\n" << endi;
00579 }
00580
00581 }
00582
00583 myGrid.K1 = simParams->PMEGridSizeX;
00584 myGrid.K2 = simParams->PMEGridSizeY;
00585 myGrid.K3 = simParams->PMEGridSizeZ;
00586 myGrid.order = simParams->PMEInterpOrder;
00587 myGrid.dim2 = myGrid.K2;
00588 myGrid.dim3 = 2 * (myGrid.K3/2 + 1);
00589
00590 if ( ! usePencils ) {
00591 myGrid.block1 = ( myGrid.K1 + numGridPes - 1 ) / numGridPes;
00592 myGrid.block2 = ( myGrid.K2 + numTransPes - 1 ) / numTransPes;
00593 myGrid.block3 = myGrid.dim3 / 2;
00594 }
00595
00596 if ( usePencils ) {
00597 myGrid.block1 = ( myGrid.K1 + xBlocks - 1 ) / xBlocks;
00598 myGrid.block2 = ( myGrid.K2 + yBlocks - 1 ) / yBlocks;
00599 myGrid.block3 = ( myGrid.K3/2 + 1 + zBlocks - 1 ) / zBlocks;
00600
00601 if ( CkMyPe() == 0 ) {
00602 int basepe = 0; int npe = CkNumPes();
00603 if ( npe > xBlocks*yBlocks &&
00604 npe > xBlocks*zBlocks &&
00605 npe > yBlocks*zBlocks ) {
00606
00607 ++basepe;
00608 --npe;
00609 }
00610
00611 zPencil = CProxy_PmeZPencil::ckNew();
00612 yPencil = CProxy_PmeYPencil::ckNew();
00613 xPencil = CProxy_PmeXPencil::ckNew();
00614
00615
00616 int i;
00617 int ncpus = CkNumPes();
00618
00619
00620 int npow2 = 1; int nbits = 0;
00621 while ( npow2 < ncpus ) { npow2 *= 2; nbits += 1; }
00622
00623
00624 SortableResizeArray<int> patches, nopatches, pmeprocs;
00625 PatchMap *pmap = PatchMap::Object();
00626 i = 0;
00627 for ( int icpu=0; icpu<ncpus; ++icpu ) {
00628 int ri;
00629 for ( ri = ncpus; ri >= ncpus; ++i ) {
00630 ri = 0;
00631 int pow2 = 1;
00632 int rpow2 = npow2 / 2;
00633 for ( int j=0; j<nbits; ++j ) {
00634 ri += rpow2 * ( ( i / pow2 ) % 2 );
00635 pow2 *= 2; rpow2 /= 2;
00636 }
00637 }
00638
00639 if ( ri ) {
00640 if ( pmap->numPatchesOnNode(ri) ) patches.add(ri);
00641 else nopatches.add(ri);
00642 }
00643 }
00644
00645
00646 int useZero = 0;
00647 int npens = xBlocks*yBlocks;
00648 if ( npens % ncpus == 0 ) useZero = 1;
00649 if ( npens == nopatches.size() + 1 ) useZero = 1;
00650 npens += xBlocks*zBlocks;
00651 if ( npens % ncpus == 0 ) useZero = 1;
00652 if ( npens == nopatches.size() + 1 ) useZero = 1;
00653 npens += yBlocks*zBlocks;
00654 if ( npens % ncpus == 0 ) useZero = 1;
00655 if ( npens == nopatches.size() + 1 ) useZero = 1;
00656
00657
00658 for ( i=nopatches.size()-1; i>=0; --i ) pmeprocs.add(nopatches[i]);
00659 if ( useZero && ! pmap->numPatchesOnNode(0) ) pmeprocs.add(0);
00660 for ( i=patches.size()-1; i>=0; --i ) pmeprocs.add(patches[i]);
00661 if ( pmap->numPatchesOnNode(0) ) pmeprocs.add(0);
00662
00663 int pe = 0;
00664 int npes = pmeprocs.size();
00665 SortableResizeArray<int> zprocs(xBlocks*yBlocks);
00666 for ( i=0; i<xBlocks*yBlocks; ++i, ++pe ) zprocs[i] = pmeprocs[pe%npes];
00667 zprocs.sort();
00668 SortableResizeArray<int> yprocs(xBlocks*zBlocks);
00669 for ( i=0; i<xBlocks*zBlocks; ++i, ++pe ) yprocs[i] = pmeprocs[pe%npes];
00670 yprocs.sort();
00671 SortableResizeArray<int> xprocs(yBlocks*zBlocks);
00672 for ( i=0; i<yBlocks*zBlocks; ++i, ++pe ) xprocs[i] = pmeprocs[pe%npes];
00673 xprocs.sort();
00674
00675 pencilPMEProcessors = new char [CkNumPes()];
00676 memset (pencilPMEProcessors, 0, sizeof(char) * CkNumPes());
00677
00678 int x,y,z;
00679
00680 iout << iINFO << "PME Z PENCIL LOCATIONS:";
00681 for ( i=0; i<zprocs.size() && i<10; ++i ) {
00682 iout << " " << zprocs[i];
00683 }
00684 if ( i < zprocs.size() ) iout << " ...";
00685 iout << "\n" << endi;
00686
00687 for (pe=0, x = 0; x < xBlocks; ++x)
00688 for (y = 0; y < yBlocks; ++y, ++pe ) {
00689 zPencil(x,y,0).insert(zprocs[pe]);
00690 pencilPMEProcessors[zprocs[pe]] = 1;
00691 }
00692 zPencil.doneInserting();
00693
00694 iout << iINFO << "PME Y PENCIL LOCATIONS:";
00695 for ( i=0; i<yprocs.size() && i<10; ++i ) {
00696 iout << " " << yprocs[i];
00697 }
00698 if ( i < yprocs.size() ) iout << " ...";
00699 iout << "\n" << endi;
00700
00701 for (pe=0, z = 0; z < zBlocks; ++z )
00702 for (x = 0; x < xBlocks; ++x, ++pe ) {
00703 yPencil(x,0,z).insert(yprocs[pe]);
00704 pencilPMEProcessors[yprocs[pe]] = 1;
00705 }
00706 yPencil.doneInserting();
00707
00708 iout << iINFO << "PME X PENCIL LOCATIONS:";
00709 for ( i=0; i<xprocs.size() && i<10; ++i ) {
00710 iout << " " << xprocs[i];
00711 }
00712 if ( i < xprocs.size() ) iout << " ...";
00713 iout << "\n" << endi;
00714
00715 for (pe=0, y = 0; y < yBlocks; ++y )
00716 for (z = 0; z < zBlocks; ++z, ++pe ) {
00717 xPencil(0,y,z).insert(xprocs[pe]);
00718 pencilPMEProcessors[xprocs[pe]] = 1;
00719 }
00720 xPencil.doneInserting();
00721
00722 pmeProxy.recvArrays(xPencil,yPencil,zPencil);
00723 PmePencilInitMsgData msgdata;
00724 msgdata.grid = myGrid;
00725 msgdata.xBlocks = xBlocks;
00726 msgdata.yBlocks = yBlocks;
00727 msgdata.zBlocks = zBlocks;
00728 msgdata.xPencil = xPencil;
00729 msgdata.yPencil = yPencil;
00730 msgdata.zPencil = zPencil;
00731 msgdata.pmeProxy = pmeProxyDir;
00732 xPencil.init(new PmePencilInitMsg(msgdata));
00733 yPencil.init(new PmePencilInitMsg(msgdata));
00734 zPencil.init(new PmePencilInitMsg(msgdata));
00735 }
00736 return;
00737 }
00738
00739
00740 int pe;
00741 int nx = 0;
00742 for ( pe = 0; pe < numGridPes; ++pe ) {
00743 localInfo[pe].x_start = nx;
00744 nx += myGrid.block1;
00745 if ( nx > myGrid.K1 ) nx = myGrid.K1;
00746 localInfo[pe].nx = nx - localInfo[pe].x_start;
00747 }
00748 int ny = 0;
00749 for ( pe = 0; pe < numTransPes; ++pe ) {
00750 localInfo[pe].y_start_after_transpose = ny;
00751 ny += myGrid.block2;
00752 if ( ny > myGrid.K2 ) ny = myGrid.K2;
00753 localInfo[pe].ny_after_transpose =
00754 ny - localInfo[pe].y_start_after_transpose;
00755 }
00756
00757 {
00758
00759 PatchMap *patchMap = PatchMap::Object();
00760 Lattice lattice = simParams->lattice;
00761 BigReal sysdima = lattice.a_r().unit() * lattice.a();
00762 BigReal cutoff = simParams->cutoff;
00763 BigReal patchdim = simParams->patchDimension;
00764 int numPatches = patchMap->numPatches();
00765 int numNodes = CkNumPes();
00766 int *source_flags = new int[numNodes];
00767 int node;
00768 for ( node=0; node<numNodes; ++node ) {
00769 source_flags[node] = 0;
00770 recipPeDest[node] = 0;
00771 }
00772
00773
00774
00775
00776
00777
00778
00779 for ( int pid=0; pid < numPatches; ++pid ) {
00780 int pnode = patchMap->node(pid);
00781 BigReal minx = patchMap->min_a(pid);
00782 BigReal maxx = patchMap->max_a(pid);
00783 BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
00784
00785 int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
00786 int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
00787 for ( int i=min1; i<=max1; ++i ) {
00788 int ix = i;
00789 while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
00790 while ( ix < 0 ) ix += myGrid.K1;
00791
00792 if ( myGridPe >= 0 && ix >= localInfo[myGridPe].x_start &&
00793 ix < localInfo[myGridPe].x_start + localInfo[myGridPe].nx ) {
00794 source_flags[pnode] = 1;
00795 }
00796
00797 if ( pnode == CkMyPe() ) {
00798 recipPeDest[ix / myGrid.block1] = 1;
00799 }
00800 }
00801 }
00802
00803 numSources = 0;
00804 numDestRecipPes = 0;
00805 for ( node=0; node<numNodes; ++node ) {
00806 if ( source_flags[node] ) ++numSources;
00807 if ( recipPeDest[node] ) ++numDestRecipPes;
00808 }
00809
00810 #if 0
00811 if ( numSources ) {
00812 iout << iINFO << "PME " << CkMyPe() << " sources:";
00813 for ( node=0; node<numNodes; ++node ) {
00814 if ( source_flags[node] ) iout << " " << node;
00815 }
00816 iout << "\n" << endi;
00817 }
00818 #endif
00819
00820 delete [] source_flags;
00821
00822
00823
00824
00825 }
00826
00827 ungrid_count = numDestRecipPes;
00828
00829 sendTransBarrier_received = 0;
00830
00831 if ( myGridPe < 0 && myTransPe < 0 ) return;
00832
00833
00834 if ( myTransPe >= 0 ) {
00835 int k2_start = localInfo[myTransPe].y_start_after_transpose;
00836 int k2_end = k2_start + localInfo[myTransPe].ny_after_transpose;
00837 myKSpace = new PmeKSpace(myGrid, k2_start, k2_end, 0, myGrid.dim3/2);
00838 }
00839
00840 int local_size = myGrid.block1 * myGrid.K2 * myGrid.dim3;
00841 int local_size_2 = myGrid.block2 * myGrid.K1 * myGrid.dim3;
00842 if ( local_size < local_size_2 ) local_size = local_size_2;
00843 qgrid = new float[local_size*numGrids];
00844 if ( numGridPes > 1 || numTransPes > 1 ) {
00845 kgrid = new float[local_size*numGrids];
00846 } else {
00847 kgrid = qgrid;
00848 }
00849 qgrid_size = local_size;
00850
00851 if ( myGridPe >= 0 ) {
00852 qgrid_start = localInfo[myGridPe].x_start * myGrid.K2 * myGrid.dim3;
00853 qgrid_len = localInfo[myGridPe].nx * myGrid.K2 * myGrid.dim3;
00854 fgrid_start = localInfo[myGridPe].x_start * myGrid.K2;
00855 fgrid_len = localInfo[myGridPe].nx * myGrid.K2;
00856 }
00857
00858 int n[3]; n[0] = myGrid.K1; n[1] = myGrid.K2; n[2] = myGrid.K3;
00859
00860 #ifdef NAMD_FFTW
00861 CmiLock(fftw_plan_lock);
00862
00863 work = new fftw_complex[n[0]];
00864
00865 if ( ! CkMyPe() ) iout << iINFO << "Optimizing 4 FFT steps. 1..." << endi;
00866 if ( myGridPe >= 0 ) {
00867 forward_plan_yz = rfftwnd_create_plan_specific(2, n+1, FFTW_REAL_TO_COMPLEX,
00868 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00869 | FFTW_IN_PLACE | FFTW_USE_WISDOM, qgrid, 1, 0, 0);
00870 }
00871 if ( ! CkMyPe() ) iout << " 2..." << endi;
00872 if ( myTransPe >= 0 ) {
00873 forward_plan_x = fftw_create_plan_specific(n[0], FFTW_REAL_TO_COMPLEX,
00874 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00875 | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) kgrid,
00876 localInfo[myTransPe].ny_after_transpose * myGrid.dim3 / 2, work, 1);
00877 }
00878 if ( ! CkMyPe() ) iout << " 3..." << endi;
00879 if ( myTransPe >= 0 ) {
00880 backward_plan_x = fftw_create_plan_specific(n[0], FFTW_COMPLEX_TO_REAL,
00881 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00882 | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) kgrid,
00883 localInfo[myTransPe].ny_after_transpose * myGrid.dim3 / 2, work, 1);
00884 }
00885 if ( ! CkMyPe() ) iout << " 4..." << endi;
00886 if ( myGridPe >= 0 ) {
00887 backward_plan_yz = rfftwnd_create_plan_specific(2, n+1, FFTW_COMPLEX_TO_REAL,
00888 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00889 | FFTW_IN_PLACE | FFTW_USE_WISDOM, qgrid, 1, 0, 0);
00890 }
00891 if ( ! CkMyPe() ) iout << " Done.\n" << endi;
00892
00893 CmiUnlock(fftw_plan_lock);
00894 #else
00895 NAMD_die("Sorry, FFTW must be compiled in to use PME.");
00896 #endif
00897
00898 if ( myGridPe >= 0 && numSources == 0 )
00899 NAMD_bug("PME grid elements exist without sources.");
00900 grid_count = numSources;
00901 memset( (void*) qgrid, 0, qgrid_size * numGrids * sizeof(float) );
00902 trans_count = numGridPes;
00903 }
00904
00905
00906 void ComputePmeMgr::initialize_pencils(CkQdMsg *msg) {
00907 delete msg;
00908 if ( ! usePencils ) return;
00909
00910 SimParameters *simParams = Node::Object()->simParameters;
00911
00912 PatchMap *patchMap = PatchMap::Object();
00913 Lattice lattice = simParams->lattice;
00914 BigReal sysdima = lattice.a_r().unit() * lattice.a();
00915 BigReal sysdimb = lattice.b_r().unit() * lattice.b();
00916 BigReal cutoff = simParams->cutoff;
00917 BigReal patchdim = simParams->patchDimension;
00918 int numPatches = patchMap->numPatches();
00919
00920 pencilActive = new char[xBlocks*yBlocks];
00921 for ( int i=0; i<xBlocks; ++i ) {
00922 for ( int j=0; j<yBlocks; ++j ) {
00923 pencilActive[i*yBlocks+j] = 0;
00924 }
00925 }
00926
00927 for ( int pid=0; pid < numPatches; ++pid ) {
00928 int pnode = patchMap->node(pid);
00929 if ( pnode != CkMyPe() ) continue;
00930
00931 BigReal minx = patchMap->min_a(pid);
00932 BigReal maxx = patchMap->max_a(pid);
00933 BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
00934
00935 int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
00936 int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
00937
00938 BigReal miny = patchMap->min_b(pid);
00939 BigReal maxy = patchMap->max_b(pid);
00940 BigReal marginb = 0.5 * ( patchdim - cutoff ) / sysdimb;
00941
00942 int min2 = ((int) floor(myGrid.K2 * (miny - marginb))) - myGrid.order + 1;
00943 int max2 = ((int) floor(myGrid.K2 * (maxy + marginb)));
00944
00945 for ( int i=min1; i<=max1; ++i ) {
00946 int ix = i;
00947 while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
00948 while ( ix < 0 ) ix += myGrid.K1;
00949 for ( int j=min2; j<=max2; ++j ) {
00950 int jy = j;
00951 while ( jy >= myGrid.K2 ) jy -= myGrid.K2;
00952 while ( jy < 0 ) jy += myGrid.K2;
00953 pencilActive[(ix / myGrid.block1)*yBlocks + (jy / myGrid.block2)] = 1;
00954 }
00955 }
00956 }
00957
00958 numPencilsActive = 0;
00959 for ( int i=0; i<xBlocks; ++i ) {
00960 for ( int j=0; j<yBlocks; ++j ) {
00961 if ( pencilActive[i*yBlocks+j] ) {
00962 ++numPencilsActive;
00963 zPencil(i,j,0).dummyRecvGrid(CkMyPe(),0);
00964 }
00965 }
00966 }
00967
00968
00969
00970
00971 ungrid_count = numPencilsActive;
00972 }
00973
00974
00975 void ComputePmeMgr::activate_pencils(CkQdMsg *msg) {
00976 if ( ! usePencils ) return;
00977 if ( CkMyPe() == 0 ) zPencil.dummyRecvGrid(CkMyPe(),1);
00978 }
00979
00980
00981 ComputePmeMgr::~ComputePmeMgr() {
00982
00983 #ifdef NAMD_FFTW
00984 if ( CmiMyRank() == 0 ) {
00985 CmiDestroyLock(fftw_plan_lock);
00986 }
00987 #endif
00988
00989 delete myKSpace;
00990 delete [] localInfo;
00991 delete [] gridPeMap;
00992 delete [] transPeMap;
00993 delete [] recipPeDest;
00994 delete [] gridPeOrder;
00995 delete [] transPeOrder;
00996 delete [] isPmeFlag;
00997 delete [] qgrid;
00998 if ( kgrid != qgrid ) delete [] kgrid;
00999 delete [] work;
01000 delete [] gridmsg_reuse;
01001 }
01002
01003 void ComputePmeMgr::sendGrid(void) {
01004 pmeCompute->sendData(numGridPes,gridPeOrder,recipPeDest,gridPeMap);
01005 }
01006
01007 void ComputePmeMgr::recvGrid(PmeGridMsg *msg) {
01008
01009 if ( grid_count == 0 ) {
01010 NAMD_bug("Message order failure in ComputePmeMgr::recvGrid\n");
01011 }
01012 if ( grid_count == numSources ) {
01013 lattice = msg->lattice;
01014 sequence = msg->sequence;
01015 }
01016
01017 int zdim = myGrid.dim3;
01018 int zlistlen = msg->zlistlen;
01019 int *zlist = msg->zlist;
01020 float *qmsg = msg->qgrid;
01021 for ( int g=0; g<numGrids; ++g ) {
01022 char *f = msg->fgrid + fgrid_len * g;
01023 float *q = qgrid + qgrid_size * g;
01024 for ( int i=0; i<fgrid_len; ++i ) {
01025 if ( f[i] ) {
01026 for ( int k=0; k<zlistlen; ++k ) {
01027 q[zlist[k]] += *(qmsg++);
01028 }
01029 }
01030 q += zdim;
01031 }
01032 }
01033
01034 gridmsg_reuse[numSources-grid_count] = msg;
01035 --grid_count;
01036
01037 if ( grid_count == 0 ) {
01038 pmeProxyDir[CkMyPe()].gridCalc1();
01039 if ( useBarrier ) pmeProxyDir[0].sendTransBarrier();
01040 }
01041 }
01042
01043 void ComputePmeMgr::gridCalc1(void) {
01044
01045
01046 #ifdef NAMD_FFTW
01047 for ( int g=0; g<numGrids; ++g ) {
01048 rfftwnd_real_to_complex(forward_plan_yz, localInfo[myGridPe].nx,
01049 qgrid + qgrid_size * g, 1, myGrid.dim2 * myGrid.dim3, 0, 0, 0);
01050 }
01051 #endif
01052
01053 if ( ! useBarrier ) pmeProxyDir[CkMyPe()].sendTrans();
01054 }
01055
01056 void ComputePmeMgr::sendTransBarrier(void) {
01057 sendTransBarrier_received += 1;
01058
01059 if ( sendTransBarrier_received < numGridPes ) return;
01060 sendTransBarrier_received = 0;
01061 for ( int i=0; i<numGridPes; ++i ) {
01062 pmeProxyDir[gridPeMap[i]].sendTrans();
01063 }
01064 }
01065
01066 void ComputePmeMgr::sendTrans(void) {
01067
01068
01069
01070 int zdim = myGrid.dim3;
01071 int nx = localInfo[myGridPe].nx;
01072 int x_start = localInfo[myGridPe].x_start;
01073 int slicelen = myGrid.K2 * zdim;
01074
01075 #ifdef USE_COMM_LIB
01076 ComlibInstanceHandle cinst1 = CkGetComlibInstance(0);
01077 cinst1.beginIteration();
01078 #endif
01079
01080 #if CMK_BLUEGENEL
01081 CmiNetworkProgressAfter (0);
01082 #endif
01083
01084 for (int j=0; j<numTransPes; j++) {
01085 int pe = transPeOrder[j];
01086 LocalPmeInfo &li = localInfo[pe];
01087 int cpylen = li.ny_after_transpose * zdim;
01088 PmeTransMsg *newmsg = new (nx * cpylen * numGrids,
01089 PRIORITY_SIZE) PmeTransMsg;
01090 newmsg->sourceNode = myGridPe;
01091 newmsg->lattice = lattice;
01092 newmsg->x_start = x_start;
01093 newmsg->nx = nx;
01094 for ( int g=0; g<numGrids; ++g ) {
01095 float *q = qgrid + qgrid_size * g + li.y_start_after_transpose * zdim;
01096 float *qmsg = newmsg->qgrid + nx * cpylen * g;
01097 for ( int x = 0; x < nx; ++x ) {
01098 CmiMemcpy((void*)qmsg, (void*)q, cpylen*sizeof(float));
01099 q += slicelen;
01100 qmsg += cpylen;
01101 }
01102 }
01103 newmsg->sequence = sequence;
01104 SET_PRIORITY(newmsg,sequence,PME_TRANS_PRIORITY)
01105 pmeProxy[transPeMap[pe]].recvTrans(newmsg);
01106 }
01107
01108 untrans_count = numTransPes;
01109
01110 #ifdef USE_COMM_LIB
01111 cinst1.endIteration();
01112 #endif
01113 }
01114
01115 void ComputePmeMgr::recvTrans(PmeTransMsg *msg) {
01116
01117 if ( trans_count == numGridPes ) {
01118 lattice = msg->lattice;
01119 sequence = msg->sequence;
01120 }
01121
01122 int zdim = myGrid.dim3;
01123
01124 int ny = localInfo[myTransPe].ny_after_transpose;
01125 int x_start = msg->x_start;
01126 int nx = msg->nx;
01127 for ( int g=0; g<numGrids; ++g ) {
01128 CmiMemcpy((void*)(kgrid + qgrid_size * g + x_start*ny*zdim),
01129 (void*)(msg->qgrid + nx*ny*zdim*g), nx*ny*zdim*sizeof(float));
01130 }
01131
01132 delete msg;
01133 --trans_count;
01134
01135 if ( trans_count == 0 ) {
01136 pmeProxyDir[CkMyPe()].gridCalc2();
01137 }
01138 }
01139
01140 void ComputePmeMgr::gridCalc2(void) {
01141
01142
01143 #if CMK_BLUEGENEL
01144 CmiNetworkProgressAfter (0);
01145 #endif
01146
01147 int zdim = myGrid.dim3;
01148
01149 int ny = localInfo[myTransPe].ny_after_transpose;
01150
01151 for ( int g=0; g<numGrids; ++g ) {
01152
01153 #ifdef NAMD_FFTW
01154 fftw(forward_plan_x, ny * zdim / 2, (fftw_complex *)(kgrid+qgrid_size*g),
01155 ny * zdim / 2, 1, work, 1, 0);
01156 #endif
01157
01158
01159 BigReal ewaldcof = ComputeNonbondedUtil::ewaldcof;
01160 recip_evir2[g][0] = myKSpace->compute_energy(kgrid+qgrid_size*g,
01161 lattice, ewaldcof, &(recip_evir2[g][1]));
01162
01163
01164
01165 #ifdef NAMD_FFTW
01166 fftw(backward_plan_x, ny * zdim / 2, (fftw_complex *)(kgrid+qgrid_size*g),
01167 ny * zdim / 2, 1, work, 1, 0);
01168 #endif
01169 }
01170
01171 pmeProxyDir[CkMyPe()].sendUntrans();
01172 }
01173
01174 void ComputePmeMgr::sendUntrans(void) {
01175
01176 int zdim = myGrid.dim3;
01177 int y_start = localInfo[myTransPe].y_start_after_transpose;
01178 int ny = localInfo[myTransPe].ny_after_transpose;
01179
01180 #ifdef USE_COMM_LIB
01181 ComlibInstanceHandle cinst2 = CkGetComlibInstance(1);
01182 cinst2.beginIteration();
01183 #endif
01184
01185 #if CMK_BLUEGENEL
01186 CmiNetworkProgressAfter (0);
01187 #endif
01188
01189
01190 for (int j=0; j<numGridPes; j++) {
01191 int pe = gridPeOrder[j];
01192 LocalPmeInfo &li = localInfo[pe];
01193 int x_start =li.x_start;
01194 int nx = li.nx;
01195 PmeUntransMsg *newmsg = new (nx*ny*zdim*numGrids,numGrids,
01196 PRIORITY_SIZE) PmeUntransMsg;
01197 newmsg->sourceNode = myTransPe;
01198 newmsg->y_start = y_start;
01199 newmsg->ny = ny;
01200 for ( int g=0; g<numGrids; ++g ) {
01201 if ( j == 0 ) {
01202 newmsg->evir[g] = recip_evir2[g];
01203 } else {
01204 newmsg->evir[g] = 0.;
01205 }
01206 CmiMemcpy((void*)(newmsg->qgrid+nx*ny*zdim*g),
01207 (void*)(kgrid + qgrid_size*g + x_start*ny*zdim),
01208 nx*ny*zdim*sizeof(float));
01209 }
01210 SET_PRIORITY(newmsg,sequence,PME_UNTRANS_PRIORITY)
01211 pmeProxy[gridPeMap[pe]].recvUntrans(newmsg);
01212 }
01213
01214 #ifdef USE_COMM_LIB
01215 cinst2.endIteration();
01216 #endif
01217
01218 trans_count = numGridPes;
01219 }
01220
01221 void ComputePmeMgr::recvUntrans(PmeUntransMsg *msg) {
01222
01223 if ( untrans_count == numTransPes ) {
01224 for ( int g=0; g<numGrids; ++g ) {
01225 recip_evir[g] = 0.;
01226 }
01227 }
01228
01229 #if CMK_BLUEGENEL
01230 CmiNetworkProgressAfter (0);
01231 #endif
01232
01233 int g;
01234 for ( g=0; g<numGrids; ++g ) {
01235 recip_evir[g] += msg->evir[g];
01236 }
01237
01238 int zdim = myGrid.dim3;
01239
01240 int nx = localInfo[myGridPe].nx;
01241 int y_start = msg->y_start;
01242 int ny = msg->ny;
01243 int slicelen = myGrid.K2 * zdim;
01244 int cpylen = ny * zdim;
01245 for ( g=0; g<numGrids; ++g ) {
01246 float *q = qgrid + qgrid_size * g + y_start * zdim;
01247 float *qmsg = msg->qgrid + nx * cpylen * g;
01248 for ( int x = 0; x < nx; ++x ) {
01249 CmiMemcpy((void*)q, (void*)qmsg, cpylen*sizeof(float));
01250 q += slicelen;
01251 qmsg += cpylen;
01252 }
01253 }
01254
01255 delete msg;
01256 --untrans_count;
01257
01258 if ( untrans_count == 0 ) {
01259 pmeProxyDir[CkMyPe()].gridCalc3();
01260 }
01261 }
01262
01263 void ComputePmeMgr::gridCalc3(void) {
01264
01265
01266
01267 #ifdef NAMD_FFTW
01268 for ( int g=0; g<numGrids; ++g ) {
01269 rfftwnd_complex_to_real(backward_plan_yz, localInfo[myGridPe].nx,
01270 (fftw_complex *) (qgrid + qgrid_size * g),
01271 1, myGrid.dim2 * myGrid.dim3 / 2, 0, 0, 0);
01272 }
01273 #endif
01274
01275 pmeProxyDir[CkMyPe()].sendUngrid();
01276 }
01277
01278 void ComputePmeMgr::sendUngrid(void) {
01279
01280 for ( int j=0; j<numSources; ++j ) {
01281
01282 PmeGridMsg *newmsg = gridmsg_reuse[j];
01283 int pe = newmsg->sourceNode;
01284 if ( j == 0 ) {
01285 for ( int g=0; g<numGrids; ++g ) {
01286 newmsg->evir[g] = recip_evir[g];
01287 }
01288 } else {
01289 for ( int g=0; g<numGrids; ++g ) {
01290 newmsg->evir[g] = 0.;
01291 }
01292 }
01293 int zdim = myGrid.dim3;
01294 int flen = newmsg->len;
01295 int fstart = newmsg->start;
01296 int zlistlen = newmsg->zlistlen;
01297 int *zlist = newmsg->zlist;
01298 float *qmsg = newmsg->qgrid;
01299 for ( int g=0; g<numGrids; ++g ) {
01300 char *f = newmsg->fgrid + fgrid_len * g;
01301 float *q = qgrid + qgrid_size * g + (fstart-fgrid_start) * zdim;
01302 for ( int i=0; i<flen; ++i ) {
01303 if ( f[i] ) {
01304 for ( int k=0; k<zlistlen; ++k ) {
01305 *(qmsg++) = q[zlist[k]];
01306 }
01307 }
01308 q += zdim;
01309 }
01310 }
01311 newmsg->sourceNode = myGridPe;
01312
01313 SET_PRIORITY(newmsg,sequence,PME_UNGRID_PRIORITY)
01314 pmeProxyDir[pe].recvUngrid(newmsg);
01315 }
01316 grid_count = numSources;
01317 memset( (void*) qgrid, 0, qgrid_size * numGrids * sizeof(float) );
01318 }
01319
01320 void ComputePmeMgr::recvUngrid(PmeGridMsg *msg) {
01321
01322 if ( ungrid_count == 0 ) {
01323 NAMD_bug("Message order failure in ComputePmeMgr::recvUngrid\n");
01324 }
01325
01326 if ( usePencils ) pmeCompute->copyPencils(msg);
01327 else pmeCompute->copyResults(msg);
01328 delete msg;
01329 --ungrid_count;
01330
01331 if ( ungrid_count == 0 ) {
01332 pmeProxyDir[CkMyPe()].ungridCalc();
01333 }
01334 }
01335
01336 void ComputePmeMgr::ungridCalc(void) {
01337
01338
01339 pmeCompute->ungridForces();
01340
01341 ungrid_count = (usePencils ? numPencilsActive : numDestRecipPes );
01342 }
01343
01344
01345 ComputePme::ComputePme(ComputeID c) :
01346 ComputeHomePatches(c)
01347 {
01348 DebugM(4,"ComputePme created.\n");
01349
01350 basePriority = PME_PRIORITY;
01351
01352 CProxy_ComputePmeMgr::ckLocalBranch(
01353 CkpvAccess(BOCclass_group).computePmeMgr)->setCompute(this);
01354
01355 useAvgPositions = 1;
01356
01357 reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
01358
01359 SimParameters *simParams = Node::Object()->simParameters;
01360
01361 alchFepOn = simParams->alchFepOn;
01362 alchThermIntOn = simParams->alchThermIntOn;
01363 alchDecouple = (alchFepOn || alchThermIntOn) && (simParams->alchDecouple);
01364 alchElecLambdaStart = (alchFepOn || alchThermIntOn) ?
01365 simParams->alchElecLambdaStart : 0;
01366
01367 if (alchFepOn || alchThermIntOn) {
01368 numGrids = 2;
01369 if (alchDecouple) numGrids += 2;
01370 if (alchElecLambdaStart || alchThermIntOn) numGrids ++;
01371 }
01372 else numGrids = 1;
01373 lesOn = simParams->lesOn;
01374 if ( lesOn ) {
01375 lesFactor = simParams->lesFactor;
01376 numGrids = lesFactor;
01377 }
01378 selfOn = 0;
01379 pairOn = simParams->pairInteractionOn;
01380 if ( pairOn ) {
01381 selfOn = simParams->pairInteractionSelf;
01382 if ( selfOn ) pairOn = 0;
01383 numGrids = selfOn ? 1 : 3;
01384 }
01385
01386 myGrid.K1 = simParams->PMEGridSizeX;
01387 myGrid.K2 = simParams->PMEGridSizeY;
01388 myGrid.K3 = simParams->PMEGridSizeZ;
01389 myGrid.order = simParams->PMEInterpOrder;
01390 myGrid.dim2 = myGrid.K2;
01391 myGrid.dim3 = 2 * (myGrid.K3/2 + 1);
01392 qsize = myGrid.K1 * myGrid.dim2 * myGrid.dim3;
01393 fsize = myGrid.K1 * myGrid.dim2;
01394 q_arr = new double*[fsize*numGrids];
01395 memset( (void*) q_arr, 0, fsize*numGrids * sizeof(double*) );
01396 f_arr = new char[fsize*numGrids];
01397 fz_arr = new char[myGrid.K3];
01398 }
01399
01400 ComputePme::~ComputePme()
01401 {
01402 for (int i=0; i<fsize*numGrids; ++i) {
01403 if ( q_arr[i] ) {
01404 delete [] q_arr[i];
01405 }
01406 }
01407 delete [] q_arr;
01408 delete [] f_arr;
01409 delete [] fz_arr;
01410 }
01411
01412 void ComputePme::doWork()
01413 {
01414 DebugM(4,"Entering ComputePme::doWork().\n");
01415
01416 #ifdef TRACE_COMPUTE_OBJECTS
01417 double traceObjStartTime = CmiWallTimer();
01418 #endif
01419
01420 ResizeArrayIter<PatchElem> ap(patchList);
01421
01422
01423 if ( ! patchList[0].p->flags.doFullElectrostatics )
01424 {
01425 for (ap = ap.begin(); ap != ap.end(); ap++) {
01426 CompAtom *x = (*ap).positionBox->open();
01427 Results *r = (*ap).forceBox->open();
01428 (*ap).positionBox->close(&x);
01429 (*ap).forceBox->close(&r);
01430 }
01431 reduction->submit();
01432 return;
01433 }
01434
01435
01436 numLocalAtoms = 0;
01437 for (ap = ap.begin(); ap != ap.end(); ap++) {
01438 numLocalAtoms += (*ap).p->getNumAtoms();
01439 }
01440
01441 Lattice lattice = patchList[0].p->flags.lattice;
01442
01443 localData = new PmeParticle[numLocalAtoms*(numGrids+
01444 ((numGrids>1 || selfOn)?1:0))];
01445 localPartition = new char[numLocalAtoms];
01446
01447 int g;
01448 for ( g=0; g<numGrids; ++g ) {
01449 localGridData[g] = localData + numLocalAtoms*(g+1);
01450 }
01451
01452
01453 PmeParticle * data_ptr = localData;
01454 char * part_ptr = localPartition;
01455 const BigReal coloumb_sqrt = sqrt( COLOUMB * ComputeNonbondedUtil::scaling
01456 * ComputeNonbondedUtil::dielectric_1 );
01457
01458 for (ap = ap.begin(); ap != ap.end(); ap++) {
01459 #ifdef NETWORK_PROGRESS
01460 CmiNetworkProgress();
01461 #endif
01462
01463 CompAtom *x = (*ap).positionBox->open();
01464
01465 if ( patchList[0].p->flags.doMolly ) {
01466 (*ap).positionBox->close(&x);
01467 x = (*ap).avgPositionBox->open();
01468 }
01469 int numAtoms = (*ap).p->getNumAtoms();
01470
01471 for(int i=0; i<numAtoms; ++i)
01472 {
01473 data_ptr->x = x[i].position.x;
01474 data_ptr->y = x[i].position.y;
01475 data_ptr->z = x[i].position.z;
01476 data_ptr->cg = coloumb_sqrt * x[i].charge;
01477 ++data_ptr;
01478 *part_ptr = x[i].partition;
01479 ++part_ptr;
01480 }
01481
01482 if ( patchList[0].p->flags.doMolly ) { (*ap).avgPositionBox->close(&x); }
01483 else { (*ap).positionBox->close(&x); }
01484 }
01485
01486
01487 if ( ((alchFepOn || alchThermIntOn) && (!alchDecouple)) || lesOn ) {
01488 for ( g=0; g<numGrids; ++g ) {
01489 #ifdef NETWORK_PROGRESS
01490 CmiNetworkProgress();
01491 #endif
01492
01493 PmeParticle *lgd = localGridData[g];
01494 int nga = 0;
01495 for(int i=0; i<numLocalAtoms; ++i) {
01496 if ( localPartition[i] == 0 || localPartition[i] == (g+1) ) {
01497
01498
01499
01500 lgd[nga++] = localData[i];
01501 }
01502 }
01503 numGridAtoms[g] = nga;
01504 }
01505 } else if ( (alchFepOn || alchThermIntOn) && alchDecouple) {
01506
01507
01508
01509
01510
01511
01512 for ( g=0; g<2; ++g ) {
01513 #ifdef NETWORK_PROGRESS
01514 CmiNetworkProgress();
01515 #endif
01516
01517 PmeParticle *lgd = localGridData[g];
01518 int nga = 0;
01519 for(int i=0; i<numLocalAtoms; ++i) {
01520 if ( localPartition[i] == 0 || localPartition[i] == (g+1) ) {
01521 lgd[nga++] = localData[i];
01522 }
01523 }
01524 numGridAtoms[g] = nga;
01525 }
01526 for (g=2 ; g<4 ; ++g ) {
01527 #ifdef NETWORK_PROGRESS
01528 CmiNetworkProgress();
01529 #endif
01530
01531 PmeParticle *lgd = localGridData[g];
01532 int nga = 0;
01533 for(int i=0; i<numLocalAtoms; ++i) {
01534 if ( localPartition[i] == (g-1) ) {
01535 lgd[nga++] = localData[i];
01536 }
01537 }
01538 numGridAtoms[g] = nga;
01539 }
01540 for (g=4 ; g<numGrids ; ++g ) {
01541
01542 #ifdef NETWORK_PROGRESS
01543 CmiNetworkProgress();
01544 #endif
01545
01546 PmeParticle *lgd = localGridData[g];
01547 int nga = 0;
01548 for(int i=0; i<numLocalAtoms; ++i) {
01549 if ( localPartition[i] == 0 ) {
01550 lgd[nga++] = localData[i];
01551 }
01552 }
01553 numGridAtoms[g] = nga;
01554 }
01555 } else if ( selfOn ) {
01556 if ( numGrids != 1 ) NAMD_bug("ComputePme::doWork assertion 1 failed");
01557 g = 0;
01558 PmeParticle *lgd = localGridData[g];
01559 int nga = 0;
01560 for(int i=0; i<numLocalAtoms; ++i) {
01561 if ( localPartition[i] == 1 ) {
01562 lgd[nga++] = localData[i];
01563 }
01564 }
01565 numGridAtoms[g] = nga;
01566 } else if ( pairOn ) {
01567 if ( numGrids != 3 ) NAMD_bug("ComputePme::doWork assertion 2 failed");
01568 g = 0;
01569 PmeParticle *lgd = localGridData[g];
01570 int nga = 0;
01571 for(int i=0; i<numLocalAtoms; ++i) {
01572 if ( localPartition[i] == 1 || localPartition[i] == 2 ) {
01573 lgd[nga++] = localData[i];
01574 }
01575 }
01576 numGridAtoms[g] = nga;
01577 for ( g=1; g<3; ++g ) {
01578 PmeParticle *lgd = localGridData[g];
01579 int nga = 0;
01580 for(int i=0; i<numLocalAtoms; ++i) {
01581 if ( localPartition[i] == g ) {
01582 lgd[nga++] = localData[i];
01583 }
01584 }
01585 numGridAtoms[g] = nga;
01586 }
01587 } else {
01588 if ( numGrids != 1 ) NAMD_bug("ComputePme::doWork assertion 3 failed");
01589 localGridData[0] = localData;
01590 numGridAtoms[0] = numLocalAtoms;
01591 }
01592
01593 memset( (void*) fz_arr, 0, myGrid.K3 * sizeof(char) );
01594
01595
01596 BigReal ewaldcof = ComputeNonbondedUtil::ewaldcof;
01597 for ( g=0; g<numGrids; ++g ) {
01598 #ifdef NETWORK_PROGRESS
01599 CmiNetworkProgress();
01600 #endif
01601
01602 evir[g] = 0;
01603 BigReal selfEnergy = 0;
01604 data_ptr = localGridData[g];
01605 int i;
01606 for(i=0; i<numGridAtoms[g]; ++i)
01607 {
01608 selfEnergy += data_ptr->cg * data_ptr->cg;
01609 ++data_ptr;
01610 }
01611 selfEnergy *= -1. * ewaldcof / SQRT_PI;
01612 evir[g][0] += selfEnergy;
01613
01614 double **q = q_arr + g*fsize;
01615 for (i=0; i<fsize; ++i) {
01616 if ( q[i] ) {
01617 memset( (void*) (q[i]), 0, myGrid.dim3 * sizeof(double) );
01618 }
01619 }
01620
01621 char *f = f_arr + g*fsize;
01622 memset( (void*) f, 0, fsize * sizeof(char) );
01623 myRealSpace[g] = new PmeRealSpace(myGrid,numGridAtoms[g]);
01624 scale_coordinates(localGridData[g], numGridAtoms[g], lattice, myGrid);
01625 myRealSpace[g]->fill_charges(q, f, fz_arr, localGridData[g]);
01626 }
01627
01628 #ifdef TRACE_COMPUTE_OBJECTS
01629 traceUserBracketEvent(TRACE_COMPOBJ_IDOFFSET+this->cid, traceObjStartTime, CmiWallTimer());
01630 #endif
01631
01632 if ( myMgr->usePencils ) {
01633 sendPencils();
01634 } else {
01635 #if 0
01636 CProxy_ComputePmeMgr pmeProxy(CkpvAccess(BOCclass_group).computePmeMgr);
01637 pmeProxy[CkMyPe()].sendGrid();
01638 #else
01639 sendData(myMgr->numGridPes,myMgr->gridPeOrder,
01640 myMgr->recipPeDest,myMgr->gridPeMap);
01641 #endif
01642 }
01643
01644 if ( myMgr->ungrid_count == 0 ) {
01645 myMgr->pmeProxyDir[CkMyPe()].ungridCalc();
01646 }
01647
01648 }
01649
01650
01651 void ComputePme::sendPencils() {
01652
01653
01654
01655 int xBlocks = myMgr->xBlocks;
01656 int yBlocks = myMgr->yBlocks;
01657 int zBlocks = myMgr->zBlocks;
01658 myGrid.block1 = ( myGrid.K1 + xBlocks - 1 ) / xBlocks;
01659 myGrid.block2 = ( myGrid.K2 + yBlocks - 1 ) / yBlocks;
01660 int K1 = myGrid.K1;
01661 int K2 = myGrid.K2;
01662 int dim2 = myGrid.dim2;
01663 int dim3 = myGrid.dim3;
01664 int block1 = myGrid.block1;
01665 int block2 = myGrid.block2;
01666
01667 Lattice lattice = patchList[0].p->flags.lattice;
01668
01669 resultsRemaining = myMgr->numPencilsActive;
01670 const char *pencilActive = myMgr->pencilActive;
01671
01672 strayChargeErrors = 0;
01673
01674
01675 for (int ib=0; ib<xBlocks; ++ib) {
01676 for (int jb=0; jb<yBlocks; ++jb) {
01677 int ibegin = ib*block1;
01678 int iend = ibegin + block1; if ( iend > K1 ) iend = K1;
01679 int jbegin = jb*block2;
01680 int jend = jbegin + block2; if ( jend > K2 ) jend = K2;
01681 int flen = numGrids * (iend - ibegin) * (jend - jbegin);
01682 int fcount = 0;
01683
01684 for ( int g=0; g<numGrids; ++g ) {
01685 char *f = f_arr + g*fsize;
01686 int fcount_g = 0;
01687 for ( int i=ibegin; i<iend; ++i ) {
01688 for ( int j=jbegin; j<jend; ++j ) {
01689 fcount_g += ( f[i*dim2+j] ? 1 : 0 );
01690 }
01691 }
01692 fcount += fcount_g;
01693 if ( ! pencilActive[ib*yBlocks+jb] ) {
01694 if ( fcount_g ) {
01695 ++strayChargeErrors;
01696 iout << iERROR << "Stray PME grid charges detected: "
01697 << CkMyPe() << " sending to (x,y)";
01698 for ( int i=ibegin; i<iend; ++i ) {
01699 for ( int j=jbegin; j<jend; ++j ) {
01700 if ( f[i*dim2+j] ) { iout << " (" << i << "," << j << ")"; }
01701 }
01702 }
01703 iout << "\n" << endi;
01704 }
01705 }
01706 }
01707
01708 #ifdef NETWORK_PROGRESS
01709 CmiNetworkProgress();
01710 #endif
01711
01712 if ( ! pencilActive[ib*yBlocks+jb] ) continue;
01713
01714 int zlistlen = 0;
01715 for ( int i=0; i<myGrid.K3; ++i ) {
01716 if ( fz_arr[i] ) ++zlistlen;
01717 }
01718
01719 int hd = ( fcount? 1 : 0 );
01720
01721
01722 PmeGridMsg *msg = new (hd*fcount*zlistlen, hd*zlistlen, hd*flen,
01723 hd*numGrids, PRIORITY_SIZE) PmeGridMsg;
01724 msg->sourceNode = CkMyPe();
01725 msg->hasData = hd;
01726 msg->lattice = lattice;
01727 if ( hd ) {
01728 #if 0
01729 msg->start = fstart;
01730 msg->len = flen;
01731 #else
01732 msg->start = -1;
01733 msg->len = -1;
01734 #endif
01735 msg->zlistlen = zlistlen;
01736 int *zlist = msg->zlist;
01737 zlistlen = 0;
01738 for ( int i=0; i<myGrid.K3; ++i ) {
01739 if ( fz_arr[i] ) zlist[zlistlen++] = i;
01740 }
01741 char *fmsg = msg->fgrid;
01742 float *qmsg = msg->qgrid;
01743 for ( int g=0; g<numGrids; ++g ) {
01744 char *f = f_arr + g*fsize;
01745 double **q = q_arr + g*fsize;
01746 for ( int i=ibegin; i<iend; ++i ) {
01747 for ( int j=jbegin; j<jend; ++j ) {
01748 *(fmsg++) = f[i*dim2+j];
01749 if( f[i*dim2+j] ) {
01750 for ( int k=0; k<zlistlen; ++k ) {
01751 *(qmsg++) = q[i*dim2+j][zlist[k]];
01752 }
01753 }
01754 }
01755 }
01756 }
01757 } else {
01758 myMgr->ungrid_count--;
01759 }
01760
01761 msg->sequence = sequence();
01762 SET_PRIORITY(msg,sequence(),PME_GRID_PRIORITY)
01763 myMgr->zPencil(ib,jb,0).recvGrid(msg);
01764 }
01765 }
01766
01767
01768
01769
01770
01771 for (int i=0; i<fsize; ++i) {
01772 if ( q_arr[i] ) {
01773 memset( (void*) (q_arr[i]), -1, myGrid.dim3 * sizeof(double) );
01774 }
01775 }
01776
01777 }
01778
01779
01780 void ComputePme::copyPencils(PmeGridMsg *msg) {
01781
01782 int xBlocks = myMgr->xBlocks;
01783 int yBlocks = myMgr->yBlocks;
01784 int zBlocks = myMgr->zBlocks;
01785 myGrid.block1 = ( myGrid.K1 + xBlocks - 1 ) / xBlocks;
01786 myGrid.block2 = ( myGrid.K2 + yBlocks - 1 ) / yBlocks;
01787 int K1 = myGrid.K1;
01788 int K2 = myGrid.K2;
01789 int dim2 = myGrid.dim2;
01790 int dim3 = myGrid.dim3;
01791 int block1 = myGrid.block1;
01792 int block2 = myGrid.block2;
01793
01794
01795 int ib = msg->sourceNode / yBlocks;
01796 int jb = msg->sourceNode % yBlocks;
01797
01798 int ibegin = ib*block1;
01799 int iend = ibegin + block1; if ( iend > K1 ) iend = K1;
01800 int jbegin = jb*block2;
01801 int jend = jbegin + block2; if ( jend > K2 ) jend = K2;
01802
01803 int zlistlen = msg->zlistlen;
01804 int *zlist = msg->zlist;
01805 float *qmsg = msg->qgrid;
01806 int g;
01807 for ( g=0; g<numGrids; ++g ) {
01808 evir[g] += msg->evir[g];
01809 char *f = f_arr + g*fsize;
01810 double **q = q_arr + g*fsize;
01811 for ( int i=ibegin; i<iend; ++i ) {
01812 for ( int j=jbegin; j<jend; ++j ) {
01813 if( f[i*dim2+j] ) {
01814 for ( int k=0; k<zlistlen; ++k ) {
01815 q[i*dim2+j][zlist[k]] = *(qmsg++);
01816 }
01817 }
01818 }
01819 }
01820 }
01821 }
01822
01823
01824 void ComputePme::sendData(int numRecipPes, int *recipPeOrder,
01825 int *recipPeDest, int *gridPeMap) {
01826
01827
01828
01829 myGrid.block1 = ( myGrid.K1 + numRecipPes - 1 ) / numRecipPes;
01830 myGrid.block2 = ( myGrid.K2 + numRecipPes - 1 ) / numRecipPes;
01831 bsize = myGrid.block1 * myGrid.dim2 * myGrid.dim3;
01832
01833 Lattice lattice = patchList[0].p->flags.lattice;
01834
01835 resultsRemaining = numRecipPes;
01836
01837 strayChargeErrors = 0;
01838
01839 CProxy_ComputePmeMgr pmeProxy(CkpvAccess(BOCclass_group).computePmeMgr);
01840 for (int j=0; j<numRecipPes; j++) {
01841 int pe = recipPeOrder[j];
01842 int start = pe * bsize;
01843 int len = bsize;
01844 if ( start >= qsize ) { start = 0; len = 0; }
01845 if ( start + len > qsize ) { len = qsize - start; }
01846 int zdim = myGrid.dim3;
01847 int fstart = start / zdim;
01848 int flen = len / zdim;
01849 int fcount = 0;
01850 int i;
01851
01852 int g;
01853 for ( g=0; g<numGrids; ++g ) {
01854 char *f = f_arr + fstart + g*fsize;
01855 int fcount_g = 0;
01856 for ( i=0; i<flen; ++i ) {
01857 fcount_g += ( f[i] ? 1 : 0 );
01858 }
01859 fcount += fcount_g;
01860 if ( ! recipPeDest[pe] ) {
01861 if ( fcount_g ) {
01862 ++strayChargeErrors;
01863 iout << iERROR << "Stray PME grid charges detected: "
01864 << CkMyPe() << " sending to " << gridPeMap[pe] << " for planes";
01865 int iz = -1;
01866 for ( i=0; i<flen; ++i ) {
01867 if ( f[i] ) {
01868 int jz = (i+fstart)/myGrid.K2;
01869 if ( iz != jz ) { iout << " " << jz; iz = jz; }
01870 }
01871 }
01872 iout << "\n" << endi;
01873 }
01874 }
01875 }
01876
01877 #ifdef NETWORK_PROGRESS
01878 CmiNetworkProgress();
01879 #endif
01880
01881 if ( ! recipPeDest[pe] ) continue;
01882
01883 int zlistlen = 0;
01884 for ( i=0; i<myGrid.K3; ++i ) {
01885 if ( fz_arr[i] ) ++zlistlen;
01886 }
01887
01888 PmeGridMsg *msg = new (fcount*zlistlen, zlistlen, flen*numGrids,
01889 numGrids, PRIORITY_SIZE) PmeGridMsg;
01890 msg->sourceNode = CkMyPe();
01891 msg->lattice = lattice;
01892 msg->start = fstart;
01893 msg->len = flen;
01894 msg->zlistlen = zlistlen;
01895 int *zlist = msg->zlist;
01896 zlistlen = 0;
01897 for ( i=0; i<myGrid.K3; ++i ) {
01898 if ( fz_arr[i] ) zlist[zlistlen++] = i;
01899 }
01900 float *qmsg = msg->qgrid;
01901 for ( g=0; g<numGrids; ++g ) {
01902 char *f = f_arr + fstart + g*fsize;
01903 CmiMemcpy((void*)(msg->fgrid+g*flen),(void*)f,flen*sizeof(char));
01904 double **q = q_arr + fstart + g*fsize;
01905 for ( i=0; i<flen; ++i ) {
01906 if ( f[i] ) {
01907 for ( int k=0; k<zlistlen; ++k ) {
01908 *(qmsg++) = q[i][zlist[k]];
01909 }
01910 }
01911 }
01912 }
01913
01914 msg->sequence = sequence();
01915 SET_PRIORITY(msg,sequence(),PME_GRID_PRIORITY)
01916 pmeProxy[gridPeMap[pe]].recvGrid(msg);
01917 }
01918
01919 for (int i=0; i<fsize; ++i) {
01920 if ( q_arr[i] ) {
01921 memset( (void*) (q_arr[i]), -1, myGrid.dim3 * sizeof(double) );
01922 }
01923 }
01924
01925 }
01926
01927 void ComputePme::copyResults(PmeGridMsg *msg) {
01928
01929 int zdim = myGrid.dim3;
01930 int flen = msg->len;
01931 int fstart = msg->start;
01932 int zlistlen = msg->zlistlen;
01933 int *zlist = msg->zlist;
01934 float *qmsg = msg->qgrid;
01935 int g;
01936 for ( g=0; g<numGrids; ++g ) {
01937 evir[g] += msg->evir[g];
01938 char *f = msg->fgrid + g*flen;
01939 double **q = q_arr + fstart + g*fsize;
01940 for ( int i=0; i<flen; ++i ) {
01941 if ( f[i] ) {
01942 for ( int k=0; k<zlistlen; ++k ) {
01943 q[i][zlist[k]] = *(qmsg++);
01944 }
01945 }
01946 }
01947 }
01948 }
01949
01950 void ComputePme::ungridForces() {
01951
01952 SimParameters *simParams = Node::Object()->simParameters;
01953
01954 Vector *localResults = new Vector[numLocalAtoms*
01955 ((numGrids>1 || selfOn)?2:1)];
01956 Vector *gridResults;
01957 if ( alchFepOn || alchThermIntOn || lesOn || selfOn || pairOn ) {
01958 for(int i=0; i<numLocalAtoms; ++i) { localResults[i] = 0.; }
01959 gridResults = localResults + numLocalAtoms;
01960 } else {
01961 gridResults = localResults;
01962 }
01963
01964 Vector pairForce = 0.;
01965 Lattice lattice = patchList[0].p->flags.lattice;
01966 int g = 0;
01967 if(!simParams->commOnly) {
01968 for ( g=0; g<numGrids; ++g ) {
01969 #ifdef NETWORK_PROGRESS
01970 CmiNetworkProgress();
01971 #endif
01972
01973 myRealSpace[g]->compute_forces(q_arr+g*fsize, localGridData[g], gridResults);
01974 delete myRealSpace[g];
01975 scale_forces(gridResults, numGridAtoms[g], lattice);
01976
01977 if ( alchFepOn || alchThermIntOn ) {
01978 double scale = 1.;
01979 elecLambdaUp = (simParams->alchLambda <= alchElecLambdaStart)? 0. : \
01980 (simParams->alchLambda - alchElecLambdaStart) / (1. - alchElecLambdaStart);
01981 elecLambdaDown = ((1-simParams->alchLambda) <= alchElecLambdaStart)? 0. : ((1-simParams->alchLambda) - alchElecLambdaStart) / (1. - alchElecLambdaStart);
01982 if ( g == 0 ) scale = elecLambdaUp;
01983 else if ( g == 1 ) scale = elecLambdaDown;
01984 else if ( g == 2 ) scale = (elecLambdaUp + elecLambdaDown - 1)*(-1);
01985 if (alchDecouple) {
01986 if ( g == 2 ) scale = 1 - elecLambdaUp;
01987 else if ( g == 3 ) scale = 1 - elecLambdaDown;
01988 else if ( g == 4 ) scale = (elecLambdaUp + elecLambdaDown - 1)*(-1);
01989 }
01990 int nga = 0;
01991 if (!alchDecouple) {
01992 for(int i=0; i<numLocalAtoms; ++i) {
01993 if ( localPartition[i] == 0 || localPartition[i] == (g+1) ) {
01994
01995 localResults[i] += gridResults[nga++] * scale;
01996 }
01997 }
01998 }
01999 else {
02000 if ( g < 2 ) {
02001 for(int i=0; i<numLocalAtoms; ++i) {
02002 if ( localPartition[i] == 0 || localPartition[i] == (g+1) ) {
02003
02004
02005 localResults[i] += gridResults[nga++] * scale;
02006 }
02007 }
02008 }
02009 else {
02010 for(int i=0; i<numLocalAtoms; ++i) {
02011 if ( localPartition[i] == (g-1) || localPartition[i] == (g-4)) {
02012
02013
02014
02015 localResults[i] += gridResults[nga++] * scale;
02016 }
02017 }
02018 }
02019 }
02020 } else if ( lesOn ) {
02021 double scale = 1.;
02022 if ( alchFepOn ) {
02023 if ( g == 0 ) scale = simParams->alchLambda;
02024 else if ( g == 1 ) scale = 1. - simParams->alchLambda;
02025 } else if ( lesOn ) {
02026 scale = 1.0 / (double)lesFactor;
02027 }
02028 int nga = 0;
02029 for(int i=0; i<numLocalAtoms; ++i) {
02030 if ( localPartition[i] == 0 || localPartition[i] == (g+1) ) {
02031 localResults[i] += gridResults[nga++] * scale;
02032 }
02033 }
02034 } else if ( selfOn ) {
02035 PmeParticle *lgd = localGridData[g];
02036 int nga = 0;
02037 for(int i=0; i<numLocalAtoms; ++i) {
02038 if ( localPartition[i] == 1 ) {
02039 pairForce += gridResults[nga];
02040 localResults[i] += gridResults[nga++];
02041 }
02042 }
02043 } else if ( pairOn ) {
02044 if ( g == 0 ) {
02045 int nga = 0;
02046 for(int i=0; i<numLocalAtoms; ++i) {
02047 if ( localPartition[i] == 1 ) {
02048 pairForce += gridResults[nga];
02049 }
02050 if ( localPartition[i] == 1 || localPartition[i] == 2 ) {
02051 localResults[i] += gridResults[nga++];
02052 }
02053 }
02054 } else if ( g == 1 ) {
02055 int nga = 0;
02056 for(int i=0; i<numLocalAtoms; ++i) {
02057 if ( localPartition[i] == g ) {
02058 pairForce -= gridResults[nga];
02059 localResults[i] -= gridResults[nga++];
02060 }
02061 }
02062 } else {
02063 int nga = 0;
02064 for(int i=0; i<numLocalAtoms; ++i) {
02065 if ( localPartition[i] == g ) {
02066 localResults[i] -= gridResults[nga++];
02067 }
02068 }
02069 }
02070 }
02071 }
02072 }
02073 else
02074 {
02075 for ( g=0; g<numGrids; ++g ) {delete myRealSpace[g];}
02076 }
02077 delete [] localData;
02078 delete [] localPartition;
02079
02080 Vector *results_ptr = localResults;
02081 ResizeArrayIter<PatchElem> ap(patchList);
02082
02083
02084 for (ap = ap.begin(); ap != ap.end(); ap++) {
02085 Results *r = (*ap).forceBox->open();
02086 Force *f = r->f[Results::slow];
02087 int numAtoms = (*ap).p->getNumAtoms();
02088
02089 if ( ! strayChargeErrors && ! simParams->commOnly ) {
02090 for(int i=0; i<numAtoms; ++i) {
02091 f[i].x += results_ptr->x;
02092 f[i].y += results_ptr->y;
02093 f[i].z += results_ptr->z;
02094 ++results_ptr;
02095 }
02096 }
02097
02098 (*ap).forceBox->close(&r);
02099 }
02100
02101 delete [] localResults;
02102
02103 if ( pairOn || selfOn ) {
02104 ADD_VECTOR_OBJECT(reduction,REDUCTION_PAIR_ELECT_FORCE,pairForce);
02105 }
02106
02107 for ( g=0; g<numGrids; ++g ) {
02108 double scale = 1.;
02109 if ( alchFepOn || alchThermIntOn ) {
02110 if ( g == 0 ) scale = elecLambdaUp;
02111 else if ( g == 1 ) scale = elecLambdaDown;
02112 else if ( g == 2 ) scale = (elecLambdaUp + elecLambdaDown - 1)*(-1);
02113 if (alchDecouple) {
02114 if ( g == 2 ) scale = 1-elecLambdaUp;
02115 else if ( g == 3 ) scale = 1-elecLambdaDown;
02116 else if ( g == 4 ) scale = (elecLambdaUp + elecLambdaDown - 1)*(-1);
02117 }
02118 } else if ( lesOn ) {
02119 scale = 1.0 / (double)lesFactor;
02120 } else if ( pairOn ) {
02121 scale = ( g == 0 ? 1. : -1. );
02122 }
02123 reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += evir[g][0] * scale;
02124 reduction->item(REDUCTION_VIRIAL_SLOW_XX) += evir[g][1] * scale;
02125 reduction->item(REDUCTION_VIRIAL_SLOW_XY) += evir[g][2] * scale;
02126 reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += evir[g][3] * scale;
02127 reduction->item(REDUCTION_VIRIAL_SLOW_YX) += evir[g][2] * scale;
02128 reduction->item(REDUCTION_VIRIAL_SLOW_YY) += evir[g][4] * scale;
02129 reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += evir[g][5] * scale;
02130 reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += evir[g][3] * scale;
02131 reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += evir[g][5] * scale;
02132 reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += evir[g][6] * scale;
02133
02134 double scale2 = 0.;
02135
02136
02137
02138 if (alchFepOn) {
02139 BigReal elecLambda2Up = (simParams->alchLambda2 <= alchElecLambdaStart)? 0. : \
02140 (simParams->alchLambda2 - alchElecLambdaStart) / (1. - alchElecLambdaStart);
02141 BigReal elecLambda2Down = ((1-simParams->alchLambda2) <= alchElecLambdaStart)? 0. : \
02142 ((1-simParams->alchLambda2) - alchElecLambdaStart) / (1. - alchElecLambdaStart);
02143
02144
02145 if ( g == 0 ) scale2 = elecLambda2Up;
02146 else if ( g == 1 ) scale2 = elecLambda2Down;
02147 else if ( g == 2 ) scale2 = (elecLambda2Up + elecLambda2Down - 1)*(-1);
02148 if (alchDecouple && g == 2 ) scale2 = 1 - elecLambda2Up;
02149 else if (alchDecouple && g == 3 ) scale2 = 1 - elecLambda2Down;
02150 else if (alchDecouple && g == 4 ) scale2 = (elecLambda2Up + elecLambda2Down - 1)*(-1);
02151 }
02152 reduction->item(REDUCTION_ELECT_ENERGY_SLOW_F) += evir[g][0] * scale2;
02153
02154 if (alchThermIntOn) {
02155
02156
02157
02158
02159
02160
02161
02162
02163
02164
02165
02166
02167
02168
02169
02170
02171
02172
02173
02174
02175
02176
02177
02178
02179
02180
02181
02182
02183 if ( g == 0 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_1) += evir[g][0];
02184 if ( g == 1 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_2) += evir[g][0];
02185 if (!alchDecouple) {
02186 if ( g == 2 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_1) -= evir[g][0];
02187 if ( g == 2 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_2) -= evir[g][0];
02188 }
02189 else {
02190 if ( g == 2 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_1) -= evir[g][0];
02191 if ( g == 3 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_2) -= evir[g][0];
02192 if ( g == 4 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_1) -= evir[g][0];
02193 if ( g == 4 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_2) -= evir[g][0];
02194 }
02195 }
02196 }
02197 reduction->item(REDUCTION_STRAY_CHARGE_ERRORS) += strayChargeErrors;
02198 reduction->submit();
02199
02200 }
02201
02202 #if USE_TOPOMAP
02203
02204 #define NPRIMES 8
02205 const static unsigned int NAMDPrimes[] = {
02206 3,
02207 5,
02208 7,
02209 11,
02210 13,
02211 17,
02212 19,
02213 23,
02214 29,
02215 31,
02216 37,
02217 59,
02218 73,
02219 93,
02220 113,
02221 157,
02222 307,
02223 617,
02224 1217
02225 };
02226
02227 #include "RecBisection.h"
02228
02229
02230
02231
02232
02233
02234
02235 bool generateBGLORBPmePeList(int *pemap, int numPes,
02236 int *block_pes, int nbpes) {
02237
02238 PatchMap *pmap = PatchMap::Object();
02239 int *pmemap = new int [CkNumPes()];
02240
02241 if (pemap == NULL)
02242 return false;
02243
02244 TopoManager tmgr;
02245
02246 memset(pmemap, 0, sizeof(int) * CkNumPes());
02247
02248 for(int count = 0; count < CkNumPes(); count++) {
02249 if(count < nbpes)
02250 pmemap[block_pes[count]] = 1;
02251
02252 if(pmap->numPatchesOnNode(count)) {
02253 pmemap[count] = 1;
02254
02255
02256 if(tmgr.hasMultipleProcsPerNode()) {
02257 pmemap[(count + CkNumPes()/2)% CkNumPes()] = 1;
02258 }
02259 }
02260 }
02261
02262 if(numPes + nbpes + pmap->numNodesWithPatches() > CkNumPes())
02263
02264 return false;
02265
02266 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
02267 Node *node = nd.ckLocalBranch();
02268 SimParameters *simParams = node->simParameters;
02269
02270
02271
02272 int xsize = 0, ysize = 0, zsize = 0;
02273
02274 xsize = tmgr.getDimX();
02275 ysize = tmgr.getDimY();
02276 zsize = tmgr.getDimZ();
02277
02278 int nx = xsize, ny = ysize, nz = zsize;
02279 DimensionMap dm;
02280
02281 dm.x = 0;
02282 dm.y = 1;
02283 dm.z = 2;
02284
02285 findOptimalDimensions(xsize, ysize, zsize, nx, ny, nz, dm);
02286
02287
02288 int group_size = numPes/nx;
02289 if(numPes % nx)
02290 group_size ++;
02291
02292 int my_prime = NAMDPrimes[0];
02293 int density = (ny * nz)/group_size + 1;
02294 int count = 0;
02295
02296
02297 for(count = 0; count < NPRIMES; count ++) {
02298
02299 if(density < NAMDPrimes[count]) {
02300 my_prime = NAMDPrimes[count];
02301 break;
02302 }
02303 }
02304
02305 if(count == NPRIMES)
02306 my_prime = NAMDPrimes[NPRIMES-1];
02307
02308
02309 int gcount = 0;
02310 int npme_pes = 0;
02311
02312 int coord[3];
02313
02314 for(int x = 0; x < nx; x++) {
02315 coord[0] = (x + nx/2)%nx;
02316
02317 for(count=0; count < group_size && npme_pes < numPes; count++) {
02318 int dest = (count + 1) * my_prime;
02319 dest = dest % (ny * nz);
02320
02321 coord[2] = dest / ny;
02322 coord[1] = dest - coord[2] * ny;
02323
02324
02325 int destPe = coord[dm.x] + coord[dm.y] * xsize +
02326 coord[dm.z] * xsize* ysize;
02327
02328 if(pmemap[destPe] == 0) {
02329 pemap[gcount++] = destPe;
02330 pmemap[destPe] = 1;
02331
02332 if(tmgr.hasMultipleProcsPerNode())
02333 pmemap[(destPe + CkNumPes()/2) % CkNumPes()] = 1;
02334
02335 npme_pes ++;
02336 }
02337 else {
02338 for(int pos = 1; pos < ny * nz; pos++) {
02339
02340 coord[2] += pos / ny;
02341 coord[1] += pos % ny;
02342
02343 coord[2] = coord[2] % nz;
02344 coord[1] = coord[1] % ny;
02345
02346 int newdest = coord[dm.x] + coord[dm.y] * xsize +
02347 coord[dm.z] * xsize * ysize;
02348
02349 if(pmemap[newdest] == 0) {
02350 pemap[gcount++] = newdest;
02351 pmemap[newdest] = 1;
02352
02353 if(tmgr.hasMultipleProcsPerNode())
02354 pmemap[(newdest + CkNumPes()/2) % CkNumPes()] = 1;
02355
02356 npme_pes ++;
02357 break;
02358 }
02359 }
02360 }
02361 }
02362
02363 if(gcount == numPes)
02364 gcount = 0;
02365
02366 if(npme_pes >= numPes)
02367 break;
02368 }
02369
02370 delete [] pmemap;
02371
02372 if(npme_pes != numPes)
02373
02374 return false;
02375
02376 return true;
02377 }
02378
02379 #endif
02380
02381 template <class T> class PmePencil : public T {
02382 public:
02383 PmePencil() {
02384 data = 0;
02385 work = 0;
02386 send_order = 0;
02387 needs_reply = 0;
02388 }
02389 ~PmePencil() {
02390 delete [] data;
02391 delete [] work;
02392 delete [] send_order;
02393 delete [] needs_reply;
02394 }
02395 void base_init(PmePencilInitMsg *msg) {
02396 initdata = msg->data;
02397 }
02398 void order_init(int nBlocks) {
02399 send_order = new int[nBlocks];
02400 for ( int i=0; i<nBlocks; ++i ) send_order[i] = i;
02401 Random rand(CkMyPe());
02402 rand.reorder(send_order,nBlocks);
02403 needs_reply = new int[nBlocks];
02404 }
02405 PmePencilInitMsgData initdata;
02406 Lattice lattice;
02407 PmeReduction evir;
02408 int sequence;
02409 int imsg;
02410 int hasData;
02411 float *data;
02412 float *work;
02413 int *send_order;
02414 int *needs_reply;
02415 };
02416
02417 class PmeZPencil : public PmePencil<CBase_PmeZPencil> {
02418 public:
02419 PmeZPencil_SDAG_CODE
02420 PmeZPencil() { __sdag_init(); setMigratable(false); }
02421 PmeZPencil(CkMigrateMessage *) { __sdag_init(); setMigratable (false); }
02422 void fft_init();
02423 void recv_grid(const PmeGridMsg *);
02424 void forward_fft();
02425 void send_trans();
02426 void recv_untrans(const PmeUntransMsg *);
02427 void backward_fft();
02428 void send_ungrid(PmeGridMsg *);
02429 private:
02430 ResizeArray<PmeGridMsg *> grid_msgs;
02431 #ifdef NAMD_FFTW
02432 rfftwnd_plan forward_plan, backward_plan;
02433 #endif
02434 int nx, ny;
02435 };
02436
02437 class PmeYPencil : public PmePencil<CBase_PmeYPencil> {
02438 public:
02439 PmeYPencil_SDAG_CODE
02440 PmeYPencil() { __sdag_init(); setMigratable(false); }
02441 PmeYPencil(CkMigrateMessage *) { __sdag_init(); }
02442 void fft_init();
02443 void recv_trans(const PmeTransMsg *);
02444 void forward_fft();
02445 void send_trans();
02446 void recv_untrans(const PmeUntransMsg *);
02447 void backward_fft();
02448 void send_untrans();
02449 private:
02450 #ifdef NAMD_FFTW
02451 fftw_plan forward_plan, backward_plan;
02452 #endif
02453 int nx, nz;
02454 };
02455
02456 class PmeXPencil : public PmePencil<CBase_PmeXPencil> {
02457 public:
02458 PmeXPencil_SDAG_CODE
02459 PmeXPencil() { __sdag_init(); myKSpace = 0; setMigratable(false); }
02460 PmeXPencil(CkMigrateMessage *) { __sdag_init(); }
02461 void fft_init();
02462 void recv_trans(const PmeTransMsg *);
02463 void forward_fft();
02464 void pme_kspace();
02465 void backward_fft();
02466 void send_untrans();
02467 #ifdef NAMD_FFTW
02468 fftw_plan forward_plan, backward_plan;
02469 #endif
02470 int ny, nz;
02471 PmeKSpace *myKSpace;
02472 };
02473
02474 void PmeZPencil::fft_init() {
02475 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
02476 Node *node = nd.ckLocalBranch();
02477 SimParameters *simParams = node->simParameters;
02478
02479 int K1 = initdata.grid.K1;
02480 int K2 = initdata.grid.K2;
02481 int K3 = initdata.grid.K3;
02482 int dim3 = initdata.grid.dim3;
02483 int block1 = initdata.grid.block1;
02484 int block2 = initdata.grid.block2;
02485
02486 nx = block1;
02487 if ( (thisIndex.x + 1) * block1 > K1 ) nx = K1 - thisIndex.x * block1;
02488 ny = block2;
02489 if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
02490
02491 data = new float[nx*ny*dim3];
02492 work = new float[dim3];
02493
02494 order_init(initdata.zBlocks);
02495
02496 #ifdef NAMD_FFTW
02497 CmiLock(ComputePmeMgr::fftw_plan_lock);
02498
02499 forward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_REAL_TO_COMPLEX,
02500 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
02501 | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
02502 backward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_COMPLEX_TO_REAL,
02503 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
02504 | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
02505
02506 CmiUnlock(ComputePmeMgr::fftw_plan_lock);
02507 #else
02508 NAMD_die("Sorry, FFTW must be compiled in to use PME.");
02509 #endif
02510 }
02511
02512 void PmeYPencil::fft_init() {
02513 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
02514 Node *node = nd.ckLocalBranch();
02515 SimParameters *simParams = node->simParameters;
02516
02517 int K1 = initdata.grid.K1;
02518 int K2 = initdata.grid.K2;
02519 int dim2 = initdata.grid.dim2;
02520 int dim3 = initdata.grid.dim3;
02521 int block1 = initdata.grid.block1;
02522 int block3 = initdata.grid.block3;
02523
02524 nx = block1;
02525 if ( (thisIndex.x + 1) * block1 > K1 ) nx = K1 - thisIndex.x * block1;
02526 nz = block3;
02527 if ( (thisIndex.z+1)*block3 > dim3/2 ) nz = dim3/2 - thisIndex.z*block3;
02528
02529 data = new float[nx*dim2*nz*2];
02530 work = new float[2*K2];
02531
02532 order_init(initdata.yBlocks);
02533
02534 #ifdef NAMD_FFTW
02535 CmiLock(ComputePmeMgr::fftw_plan_lock);
02536
02537 forward_plan = fftw_create_plan_specific(K2, FFTW_FORWARD,
02538 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
02539 | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
02540 nz, (fftw_complex *) work, 1);
02541 backward_plan = fftw_create_plan_specific(K2, FFTW_BACKWARD,
02542 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
02543 | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
02544 nz, (fftw_complex *) work, 1);
02545
02546 CmiUnlock(ComputePmeMgr::fftw_plan_lock);
02547 #else
02548 NAMD_die("Sorry, FFTW must be compiled in to use PME.");
02549 #endif
02550 }
02551
02552 void PmeXPencil::fft_init() {
02553 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
02554 Node *node = nd.ckLocalBranch();
02555 SimParameters *simParams = node->simParameters;
02556
02557 int K1 = initdata.grid.K1;
02558 int K2 = initdata.grid.K2;
02559 int dim3 = initdata.grid.dim3;
02560 int block2 = initdata.grid.block2;
02561 int block3 = initdata.grid.block3;
02562
02563 ny = block2;
02564 if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
02565 nz = block3;
02566 if ( (thisIndex.z+1)*block3 > dim3/2 ) nz = dim3/2 - thisIndex.z*block3;
02567
02568 data = new float[K1*block2*block3*2];
02569 work = new float[2*K1];
02570
02571 order_init(initdata.xBlocks);
02572
02573 #ifdef NAMD_FFTW
02574 CmiLock(ComputePmeMgr::fftw_plan_lock);
02575
02576 forward_plan = fftw_create_plan_specific(K1, FFTW_FORWARD,
02577 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
02578 | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
02579 ny*nz, (fftw_complex *) work, 1);
02580 backward_plan = fftw_create_plan_specific(K1, FFTW_BACKWARD,
02581 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
02582 | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) data,
02583 ny*nz, (fftw_complex *) work, 1);
02584
02585 CmiUnlock(ComputePmeMgr::fftw_plan_lock);
02586 #else
02587 NAMD_die("Sorry, FFTW must be compiled in to use PME.");
02588 #endif
02589
02590 myKSpace = new PmeKSpace(initdata.grid,
02591 thisIndex.y*block2, thisIndex.y*block2 + ny,
02592 thisIndex.z*block3, thisIndex.z*block3 + nz);
02593 }
02594
02595
02596
02597
02598 void PmeZPencil::recv_grid(const PmeGridMsg *msg) {
02599
02600 int dim3 = initdata.grid.dim3;
02601 if ( imsg == 0 ) {
02602 lattice = msg->lattice;
02603 sequence = msg->sequence;
02604 memset(data, 0, sizeof(float) * nx*ny*dim3);
02605 }
02606
02607 if ( ! msg->hasData ) return;
02608
02609 int zlistlen = msg->zlistlen;
02610 int *zlist = msg->zlist;
02611 char *fmsg = msg->fgrid;
02612 float *qmsg = msg->qgrid;
02613 float *d = data;
02614 int numGrids = 1;
02615 for ( int g=0; g<numGrids; ++g ) {
02616 for ( int i=0; i<nx; ++i ) {
02617 for ( int j=0; j<ny; ++j, d += dim3 ) {
02618 if( *(fmsg++) ) {
02619 for ( int k=0; k<zlistlen; ++k ) {
02620 d[zlist[k]] += *(qmsg++);
02621 }
02622 }
02623 }
02624 }
02625 }
02626 }
02627
02628 void PmeZPencil::forward_fft() {
02629 #ifdef FFTCHECK
02630 int dim3 = initdata.grid.dim3;
02631 int K3 = initdata.grid.K3;
02632 float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
02633 float *d = data;
02634 for ( int i=0; i<nx; ++i ) {
02635 for ( int j=0; j<ny; ++j, d += dim3 ) {
02636 for ( int k=0; k<dim3; ++k ) {
02637 d[k] = 10. * (10. * (10. * std_base + i) + j) + k;
02638 }
02639 }
02640 }
02641 #endif
02642 #ifdef NAMD_FFTW
02643 rfftwnd_real_to_complex(forward_plan, nx*ny,
02644 data, 1, initdata.grid.dim3, (fftw_complex *) work, 1, 0);
02645 #endif
02646 #ifdef ZEROCHECK
02647 int dim3 = initdata.grid.dim3;
02648 int K3 = initdata.grid.K3;
02649 float *d = data;
02650 for ( int i=0; i<nx; ++i ) {
02651 for ( int j=0; j<ny; ++j, d += dim3 ) {
02652 for ( int k=0; k<dim3; ++k ) {
02653 if ( d[k] == 0. ) CkPrintf("0 in Z at %d %d %d %d %d %d %d %d %d\n",
02654 thisIndex.x, thisIndex.y, i, j, k, nx, ny, dim3);
02655 }
02656 }
02657 }
02658 #endif
02659 }
02660
02661 void PmeZPencil::send_trans() {
02662 int zBlocks = initdata.zBlocks;
02663 int block3 = initdata.grid.block3;
02664 int dim3 = initdata.grid.dim3;
02665 for ( int isend=0; isend<zBlocks; ++isend ) {
02666 int kb = send_order[isend];
02667 int nz = block3;
02668 if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
02669 int hd = ( hasData ? 1 : 0 );
02670 PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
02671 msg->lattice = lattice;
02672 msg->sourceNode = thisIndex.y;
02673 msg->hasData = hasData;
02674 msg->nx = ny;
02675 if ( hasData ) {
02676 float *md = msg->qgrid;
02677 const float *d = data;
02678 for ( int i=0; i<nx; ++i ) {
02679 for ( int j=0; j<ny; ++j, d += dim3 ) {
02680 for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
02681 *(md++) = d[2*k];
02682 *(md++) = d[2*k+1];
02683 }
02684 }
02685 }
02686 }
02687 msg->sequence = sequence;
02688 SET_PRIORITY(msg,sequence,PME_TRANS_PRIORITY)
02689 initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
02690 }
02691 }
02692
02693 void PmeYPencil::recv_trans(const PmeTransMsg *msg) {
02694 if ( imsg == 0 ) {
02695 lattice = msg->lattice;
02696 sequence = msg->sequence;
02697 }
02698 int block2 = initdata.grid.block2;
02699 int K2 = initdata.grid.K2;
02700 int jb = msg->sourceNode;
02701 int ny = msg->nx;
02702 if ( msg->hasData ) {
02703 const float *md = msg->qgrid;
02704 float *d = data;
02705 for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
02706 for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
02707 for ( int k=0; k<nz; ++k ) {
02708 #ifdef ZEROCHECK
02709 if ( (*md) == 0. ) CkPrintf("0 in ZY at %d %d %d %d %d %d %d %d %d\n",
02710 thisIndex.x, jb, thisIndex.z, i, j, k, nx, ny, nz);
02711 #endif
02712 d[2*(j*nz+k)] = *(md++);
02713 d[2*(j*nz+k)+1] = *(md++);
02714 }
02715 }
02716 }
02717 } else {
02718 float *d = data;
02719 for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
02720 for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
02721 for ( int k=0; k<nz; ++k ) {
02722 d[2*(j*nz+k)] = 0;
02723 d[2*(j*nz+k)+1] = 0;
02724 }
02725 }
02726 }
02727 }
02728 }
02729
02730 void PmeYPencil::forward_fft() {
02731 #ifdef NAMD_FFTW
02732 for ( int i=0; i<nx; ++i ) {
02733 fftw(forward_plan, nz,
02734 ((fftw_complex *) data) + i * nz * initdata.grid.K2,
02735 nz, 1, (fftw_complex *) work, 1, 0);
02736 }
02737 #endif
02738 }
02739
02740 void PmeYPencil::send_trans() {
02741 int yBlocks = initdata.yBlocks;
02742 int block2 = initdata.grid.block2;
02743 int K2 = initdata.grid.K2;
02744 for ( int isend=0; isend<yBlocks; ++isend ) {
02745 int jb = send_order[isend];
02746 int ny = block2;
02747 if ( (jb+1)*block2 > K2 ) ny = K2 - jb*block2;
02748 int hd = ( hasData ? 1 : 0 );
02749 PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
02750 msg->lattice = lattice;
02751 msg->sourceNode = thisIndex.x;
02752 msg->hasData = hasData;
02753 msg->nx = nx;
02754 if ( hasData ) {
02755 float *md = msg->qgrid;
02756 const float *d = data;
02757 for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
02758 for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
02759 for ( int k=0; k<nz; ++k ) {
02760 *(md++) = d[2*(j*nz+k)];
02761 *(md++) = d[2*(j*nz+k)+1];
02762 #ifdef ZEROCHECK
02763 if ( *(md-2) == 0. ) CkPrintf("send 0 in YX at %d %d %d %d %d %d %d %d %d\n",
02764 thisIndex.x, jb, thisIndex.z, i, j, k, nx, ny, nz);
02765 #endif
02766 }
02767 }
02768 }
02769 if ( md != msg->qgrid + nx*ny*nz*2 ) CkPrintf("error in YX at %d %d %d\n",
02770 thisIndex.x, jb, thisIndex.z);
02771 }
02772 msg->sequence = sequence;
02773 SET_PRIORITY(msg,sequence,PME_TRANS2_PRIORITY)
02774 initdata.xPencil(0,jb,thisIndex.z).recvTrans(msg);
02775 }
02776 }
02777
02778 void PmeXPencil::recv_trans(const PmeTransMsg *msg) {
02779 if ( imsg == 0 ) {
02780 lattice = msg->lattice;
02781 sequence = msg->sequence;
02782 }
02783 int block1 = initdata.grid.block1;
02784 int K1 = initdata.grid.K1;
02785 int ib = msg->sourceNode;
02786 int nx = msg->nx;
02787 if ( msg->hasData ) {
02788 const float *md = msg->qgrid;
02789 for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
02790 float *d = data + i*ny*nz*2;
02791 for ( int j=0; j<ny; ++j, d += nz*2 ) {
02792 for ( int k=0; k<nz; ++k ) {
02793 #ifdef ZEROCHECK
02794 if ( (*md) == 0. ) CkPrintf("0 in YX at %d %d %d %d %d %d %d %d %d\n",
02795 ib, thisIndex.y, thisIndex.z, i, j, k, nx, ny, nz);
02796 #endif
02797 d[2*k] = *(md++);
02798 d[2*k+1] = *(md++);
02799 }
02800 }
02801 }
02802 } else {
02803 for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
02804 float *d = data + i*ny*nz*2;
02805 for ( int j=0; j<ny; ++j, d += nz*2 ) {
02806 for ( int k=0; k<nz; ++k ) {
02807 d[2*k] = 0;
02808 d[2*k+1] = 0;
02809 }
02810 }
02811 }
02812 }
02813 }
02814
02815 void PmeXPencil::forward_fft() {
02816 #ifdef NAMD_FFTW
02817 fftw(forward_plan, ny*nz,
02818 ((fftw_complex *) data), ny*nz, 1, (fftw_complex *) work, 1, 0);
02819 #endif
02820 }
02821
02822 void PmeXPencil::pme_kspace() {
02823
02824 evir = 0.;
02825
02826 #ifdef FFTCHECK
02827 return;
02828 #endif
02829
02830 BigReal ewaldcof = ComputeNonbondedUtil::ewaldcof;
02831
02832 int numGrids = 1;
02833 for ( int g=0; g<numGrids; ++g ) {
02834 evir[0] = myKSpace->compute_energy(data+0*g,
02835 lattice, ewaldcof, &(evir[1]));
02836 }
02837
02838 }
02839
02840 void PmeXPencil::backward_fft() {
02841 #ifdef NAMD_FFTW
02842 fftw(backward_plan, ny*nz,
02843 ((fftw_complex *) data), ny*nz, 1, (fftw_complex *) work, 1, 0);
02844 #endif
02845 }
02846
02847 void PmeXPencil::send_untrans() {
02848 int xBlocks = initdata.xBlocks;
02849 int block1 = initdata.grid.block1;
02850 int K1 = initdata.grid.K1;
02851 int send_evir = 1;
02852 for ( int isend=0; isend<xBlocks; ++isend ) {
02853 int ib = send_order[isend];
02854 if ( ! needs_reply[ib] ) continue;
02855 int nx = block1;
02856 if ( (ib+1)*block1 > K1 ) nx = K1 - ib*block1;
02857 PmeUntransMsg *msg = new (nx*ny*nz*2,send_evir,PRIORITY_SIZE) PmeUntransMsg;
02858 if ( send_evir ) {
02859 msg->evir[0] = evir;
02860 msg->has_evir = 1;
02861 send_evir = 0;
02862 } else {
02863 msg->has_evir = 0;
02864 }
02865 msg->sourceNode = thisIndex.y;
02866 msg->ny = ny;
02867 float *md = msg->qgrid;
02868 for ( int i=ib*block1; i<(ib*block1+nx); ++i ) {
02869 float *d = data + i*ny*nz*2;
02870 for ( int j=0; j<ny; ++j, d += nz*2 ) {
02871 for ( int k=0; k<nz; ++k ) {
02872 *(md++) = d[2*k];
02873 *(md++) = d[2*k+1];
02874 }
02875 }
02876 }
02877 SET_PRIORITY(msg,sequence,PME_UNTRANS_PRIORITY)
02878 initdata.yPencil(ib,0,thisIndex.z).recvUntrans(msg);
02879 }
02880 }
02881
02882 void PmeYPencil::recv_untrans(const PmeUntransMsg *msg) {
02883 if ( imsg == 0 ) evir = 0.;
02884 if ( msg->has_evir ) evir += msg->evir[0];
02885 int block2 = initdata.grid.block2;
02886 int K2 = initdata.grid.K2;
02887 int jb = msg->sourceNode;
02888 int ny = msg->ny;
02889 const float *md = msg->qgrid;
02890 float *d = data;
02891 for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
02892 #if CMK_BLUEGENEL
02893 CmiNetworkProgress();
02894 #endif
02895 for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
02896 for ( int k=0; k<nz; ++k ) {
02897 #ifdef ZEROCHECK
02898 if ( (*md) == 0. ) CkPrintf("0 in XY at %d %d %d %d %d %d %d %d %d\n",
02899 thisIndex.x, jb, thisIndex.z, i, j, k, nx, ny, nz);
02900 #endif
02901 d[2*(j*nz+k)] = *(md++);
02902 d[2*(j*nz+k)+1] = *(md++);
02903 }
02904 }
02905 }
02906 }
02907
02908 void PmeYPencil::backward_fft() {
02909 #ifdef NAMD_FFTW
02910 for ( int i=0; i<nx; ++i ) {
02911 #if CMK_BLUEGENEL
02912 CmiNetworkProgress();
02913 #endif
02914
02915 fftw(backward_plan, nz,
02916 ((fftw_complex *) data) + i * nz * initdata.grid.K2,
02917 nz, 1, (fftw_complex *) work, 1, 0);
02918 }
02919 #endif
02920 }
02921
02922 void PmeYPencil::send_untrans() {
02923 int yBlocks = initdata.yBlocks;
02924 int block2 = initdata.grid.block2;
02925 int K2 = initdata.grid.K2;
02926 int send_evir = 1;
02927 for ( int isend=0; isend<yBlocks; ++isend ) {
02928 int jb = send_order[isend];
02929 if ( ! needs_reply[jb] ) continue;
02930 int ny = block2;
02931 if ( (jb+1)*block2 > K2 ) ny = K2 - jb*block2;
02932 PmeUntransMsg *msg = new (nx*ny*nz*2,send_evir,PRIORITY_SIZE) PmeUntransMsg;
02933 if ( send_evir ) {
02934 msg->evir[0] = evir;
02935 msg->has_evir = 1;
02936 send_evir = 0;
02937 } else {
02938 msg->has_evir = 0;
02939 }
02940 msg->sourceNode = thisIndex.z;
02941 msg->ny = nz;
02942 float *md = msg->qgrid;
02943 const float *d = data;
02944 for ( int i=0; i<nx; ++i, d += K2*nz*2 ) {
02945 for ( int j=jb*block2; j<(jb*block2+ny); ++j ) {
02946 for ( int k=0; k<nz; ++k ) {
02947 *(md++) = d[2*(j*nz+k)];
02948 *(md++) = d[2*(j*nz+k)+1];
02949 }
02950 }
02951 }
02952 SET_PRIORITY(msg,sequence,PME_UNTRANS2_PRIORITY)
02953 initdata.zPencil(thisIndex.x,jb,0).recvUntrans(msg);
02954 }
02955 }
02956
02957 void PmeZPencil::recv_untrans(const PmeUntransMsg *msg) {
02958 if ( imsg == 0 ) evir = 0.;
02959 if ( msg->has_evir ) evir += msg->evir[0];
02960 int block3 = initdata.grid.block3;
02961 int dim3 = initdata.grid.dim3;
02962 int kb = msg->sourceNode;
02963 int nz = msg->ny;
02964 const float *md = msg->qgrid;
02965 float *d = data;
02966 for ( int i=0; i<nx; ++i ) {
02967 #if CMK_BLUEGENEL
02968 CmiNetworkProgress();
02969 #endif
02970 for ( int j=0; j<ny; ++j, d += dim3 ) {
02971 for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
02972 #ifdef ZEROCHECK
02973 if ( (*md) == 0. ) CkPrintf("0 in YZ at %d %d %d %d %d %d %d %d %d\n",
02974 thisIndex.x, thisIndex.y, kb, i, j, k, nx, ny, nz);
02975 #endif
02976 d[2*k] = *(md++);
02977 d[2*k+1] = *(md++);
02978 }
02979 }
02980 }
02981 }
02982
02983 void PmeZPencil::backward_fft() {
02984 #ifdef NAMD_FFTW
02985 rfftwnd_complex_to_real(backward_plan, nx*ny,
02986 (fftw_complex *) data, 1, initdata.grid.dim3/2, work, 1, 0);
02987 #endif
02988
02989 #if CMK_BLUEGENEL
02990 CmiNetworkProgress();
02991 #endif
02992
02993 #ifdef FFTCHECK
02994 int dim3 = initdata.grid.dim3;
02995 int K1 = initdata.grid.K1;
02996 int K2 = initdata.grid.K2;
02997 int K3 = initdata.grid.K3;
02998 float scale = 1. / (1. * K1 * K2 * K3);
02999 float maxerr = 0.;
03000 float maxstd = 0.;
03001 int mi, mj, mk; mi = mj = mk = -1;
03002 float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
03003 const float *d = data;
03004 for ( int i=0; i<nx; ++i ) {
03005 for ( int j=0; j<ny; ++j, d += dim3 ) {
03006 for ( int k=0; k<K3; ++k ) {
03007 float std = 10. * (10. * (10. * std_base + i) + j) + k;
03008 float err = scale * d[k] - std;
03009 if ( fabsf(err) > fabsf(maxerr) ) {
03010 maxerr = err;
03011 maxstd = std;
03012 mi = i; mj = j; mk = k;
03013 }
03014 }
03015 }
03016 }
03017 CkPrintf("pencil %d %d max error %f at %d %d %d (should be %f)\n",
03018 thisIndex.x, thisIndex.y, maxerr, mi, mj, mk, maxstd);
03019 #endif
03020 }
03021
03022 void PmeZPencil::send_ungrid(PmeGridMsg *msg) {
03023 int pe = msg->sourceNode;
03024 msg->sourceNode = thisIndex.x * initdata.yBlocks + thisIndex.y;
03025 int dim3 = initdata.grid.dim3;
03026 int zlistlen = msg->zlistlen;
03027 int *zlist = msg->zlist;
03028 char *fmsg = msg->fgrid;
03029 float *qmsg = msg->qgrid;
03030 float *d = data;
03031 int numGrids = 1;
03032 for ( int g=0; g<numGrids; ++g ) {
03033 #if CMK_BLUEGENEL
03034 CmiNetworkProgress();
03035 #endif
03036 for ( int i=0; i<nx; ++i ) {
03037 for ( int j=0; j<ny; ++j, d += dim3 ) {
03038 if( *(fmsg++) ) {
03039 for ( int k=0; k<zlistlen; ++k ) {
03040 *(qmsg++) = d[zlist[k]];
03041 }
03042 }
03043 }
03044 }
03045 }
03046
03047 SET_PRIORITY(msg,sequence,PME_UNGRID_PRIORITY)
03048 initdata.pmeProxy[pe].recvUngrid(msg);
03049 }
03050
03051
03052 #include "ComputePmeMgr.def.h"
03053