Inheritance diagram for ComputePmeMgr:

Public Member Functions | |
| ComputePmeMgr () | |
| ~ComputePmeMgr () | |
| void | initialize (CkQdMsg *) |
| void | initialize_pencils (CkQdMsg *) |
| void | activate_pencils (CkQdMsg *) |
| void | recvArrays (CProxy_PmeXPencil, CProxy_PmeYPencil, CProxy_PmeZPencil) |
| void | sendGrid (void) |
| void | recvGrid (PmeGridMsg *) |
| void | gridCalc1 (void) |
| void | sendTransBarrier (void) |
| void | sendTrans (void) |
| void | recvTrans (PmeTransMsg *) |
| void | gridCalc2 (void) |
| void | sendUntrans (void) |
| void | recvUntrans (PmeUntransMsg *) |
| void | gridCalc3 (void) |
| void | sendUngrid (void) |
| void | recvUngrid (PmeGridMsg *) |
| void | ungridCalc (void) |
| void | setCompute (ComputePme *c) |
| int | isPmeProcessor (int p) |
Friends | |
| class | ComputePme |
|
|
Definition at line 330 of file ComputePme.C. 00330 : pmeProxy(thisgroup),
00331 pmeProxyDir(thisgroup), pmeCompute(0) {
00332
00333 CkpvAccess(BOCclass_group).computePmeMgr = thisgroup;
00334
00335 #ifdef USE_COMM_LIB
00336 ComlibDelegateProxy(&pmeProxy);
00337 #endif
00338
00339 #ifdef NAMD_FFTW
00340 if ( CmiMyRank() == 0 ) {
00341 fftw_plan_lock = CmiCreateLock();
00342 }
00343 #endif
00344
00345 myKSpace = 0;
00346 localInfo = new LocalPmeInfo[CkNumPes()];
00347 gridPeMap = new int[CkNumPes()];
00348 transPeMap = new int[CkNumPes()];
00349 recipPeDest = new int[CkNumPes()];
00350 gridPeOrder = new int[CkNumPes()];
00351 transPeOrder = new int[CkNumPes()];
00352 isPmeFlag = new char[CkNumPes()];
00353 kgrid = 0;
00354 work = 0;
00355 grid_count = 0;
00356 trans_count = 0;
00357 untrans_count = 0;
00358 ungrid_count = 0;
00359 gridmsg_reuse= new PmeGridMsg*[CkNumPes()];
00360 useBarrier = 0;
00361 sendTransBarrier_received = 0;
00362 usePencils = 0;
00363 }
|
|
|
Definition at line 976 of file ComputePme.C. 00976 {
00977
00978 #ifdef NAMD_FFTW
00979 if ( CmiMyRank() == 0 ) {
00980 CmiDestroyLock(fftw_plan_lock);
00981 }
00982 #endif
00983
00984 delete myKSpace;
00985 delete [] localInfo;
00986 delete [] gridPeMap;
00987 delete [] transPeMap;
00988 delete [] recipPeDest;
00989 delete [] gridPeOrder;
00990 delete [] transPeOrder;
00991 delete [] isPmeFlag;
00992 delete [] qgrid;
00993 if ( kgrid != qgrid ) delete [] kgrid;
00994 delete [] work;
00995 delete [] gridmsg_reuse;
00996 }
|
|
|
Definition at line 970 of file ComputePme.C. 00970 {
00971 if ( ! usePencils ) return;
00972 if ( CkMyPe() == 0 ) zPencil.dummyRecvGrid(CkMyPe(),1);
00973 }
|
|
|
Definition at line 1043 of file ComputePme.C. References PmeGrid::dim2, and PmeGrid::dim3. 01043 {
01044 // CkPrintf("gridCalc1 on Pe(%d)\n",CkMyPe());
01045
01046 #ifdef NAMD_FFTW
01047 for ( int g=0; g<numGrids; ++g ) {
01048 rfftwnd_real_to_complex(forward_plan_yz, localInfo[myGridPe].nx,
01049 qgrid + qgrid_size * g, 1, myGrid.dim2 * myGrid.dim3, 0, 0, 0);
01050 }
01051 #endif
01052
01053 #if CHARM_VERSION > 050402
01054 if ( ! useBarrier ) pmeProxyDir[CkMyPe()].sendTrans();
01055 #else
01056 if ( ! useBarrier ) pmeProxyDir.sendTrans(CkMyPe());
01057 #endif
01058 }
|
|
|
Definition at line 1156 of file ComputePme.C. References BigReal, PmeKSpace::compute_energy(), PmeGrid::dim3, and LocalPmeInfo::ny_after_transpose. 01156 {
01157 // CkPrintf("gridCalc2 on Pe(%d)\n",CkMyPe());
01158
01159 #if CMK_VERSION_BLUEGENE
01160 CmiNetworkProgressAfter (0);
01161 #endif
01162
01163 int zdim = myGrid.dim3;
01164 // int y_start = localInfo[myTransPe].y_start_after_transpose;
01165 int ny = localInfo[myTransPe].ny_after_transpose;
01166
01167 for ( int g=0; g<numGrids; ++g ) {
01168 // finish forward FFT (x dimension)
01169 #ifdef NAMD_FFTW
01170 fftw(forward_plan_x, ny * zdim / 2, (fftw_complex *)(kgrid+qgrid_size*g),
01171 ny * zdim / 2, 1, work, 1, 0);
01172 #endif
01173
01174 // reciprocal space portion of PME
01175 BigReal ewaldcof = ComputeNonbondedUtil::ewaldcof;
01176 recip_evir2[g][0] = myKSpace->compute_energy(kgrid+qgrid_size*g,
01177 lattice, ewaldcof, &(recip_evir2[g][1]));
01178 // CkPrintf("Ewald reciprocal energy = %f\n", recip_evir2[g][0]);
01179
01180 // start backward FFT (x dimension)
01181 #ifdef NAMD_FFTW
01182 fftw(backward_plan_x, ny * zdim / 2, (fftw_complex *)(kgrid+qgrid_size*g),
01183 ny * zdim / 2, 1, work, 1, 0);
01184 #endif
01185 }
01186
01187 #if CHARM_VERSION > 050402
01188 pmeProxyDir[CkMyPe()].sendUntrans();
01189 #else
01190 pmeProxyDir.sendUntrans(CkMyPe());
01191 #endif
01192 }
|
|
|
Definition at line 1291 of file ComputePme.C. References PmeGrid::dim2, and PmeGrid::dim3. 01291 {
01292 // CkPrintf("gridCalc3 on Pe(%d)\n",CkMyPe());
01293
01294 // finish backward FFT
01295 #ifdef NAMD_FFTW
01296 for ( int g=0; g<numGrids; ++g ) {
01297 rfftwnd_complex_to_real(backward_plan_yz, localInfo[myGridPe].nx,
01298 (fftw_complex *) (qgrid + qgrid_size * g),
01299 1, myGrid.dim2 * myGrid.dim3 / 2, 0, 0, 0);
01300 }
01301 #endif
01302
01303 #if CHARM_VERSION > 050402
01304 pmeProxyDir[CkMyPe()].sendUngrid();
01305 #else
01306 pmeProxyDir.sendUngrid(CkMyPe());
01307 #endif
01308 }
|
|
|
Definition at line 372 of file ComputePme.C. References Lattice::a(), Lattice::a_r(), ResizeArray< Elem >::add(), BigReal, PmeGrid::block1, PmeGrid::block2, PmeGrid::block3, SimParameters::cutoff, SimParameters::decouple, PmeGrid::dim2, PmeGrid::dim3, SimParameters::fepElecLambdaStart, SimParameters::fepOn, SimParameters::FFTWEstimate, generatePmePeList2(), PmePencilInitMsgData::grid, iINFO(), iout, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, SimParameters::lattice, SimParameters::lesFactor, SimParameters::lesOn, PatchMap::max_a(), PatchMap::min_a(), NAMD_bug(), NAMD_die(), PatchMap::node(), PatchMap::numNodesWithPatches(), PatchMap::numPatches(), PatchMap::numPatchesOnNode(), LocalPmeInfo::nx, LocalPmeInfo::ny_after_transpose, PatchMap::Object(), Node::Object(), PmeGrid::order, SimParameters::pairInteractionOn, SimParameters::pairInteractionSelf, SimParameters::patchDimension, pencilPMEProcessors, SimParameters::PMEBarrier, SimParameters::PMEGridSizeX, SimParameters::PMEGridSizeY, SimParameters::PMEGridSizeZ, SimParameters::PMEInterpOrder, SimParameters::PMEMinPoints, SimParameters::PMEMinSlices, SimParameters::PMEPencils, SimParameters::PMEProcessors, PmePencilInitMsgData::pmeProxy, Random::reorder(), Node::simParameters, simParams, ResizeArray< Elem >::size(), SortableResizeArray< Elem >::sort(), SimParameters::thermInt, Vector::unit(), LocalPmeInfo::x_start, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::xPencil, LocalPmeInfo::y_start_after_transpose, PmePencilInitMsgData::yBlocks, PmePencilInitMsgData::yPencil, PmePencilInitMsgData::zBlocks, and PmePencilInitMsgData::zPencil. 00372 {
00373 delete msg;
00374
00375 SimParameters *simParams = Node::Object()->simParameters;
00376 PatchMap *patchMap = PatchMap::Object();
00377
00378 fepOn = simParams->fepOn;
00379 thermInt = simParams->thermInt;
00380 decouple = (fepOn || thermInt) && (simParams->decouple);
00381 fepElecLambdaStart = (fepOn || thermInt) ? simParams->fepElecLambdaStart : 0;
00382 if (fepOn || thermInt) {
00383 numGrids = 2;
00384 if (decouple) numGrids += 2;
00385 if (fepElecLambdaStart || thermInt) numGrids ++;
00386 }
00387 else numGrids = 1;
00388 lesOn = simParams->lesOn;
00389 useBarrier = simParams->PMEBarrier;
00390 if ( lesOn ) {
00391 lesFactor = simParams->lesFactor;
00392 numGrids = lesFactor;
00393 }
00394 selfOn = 0;
00395 pairOn = simParams->pairInteractionOn;
00396 if ( pairOn ) {
00397 selfOn = simParams->pairInteractionSelf;
00398 if ( selfOn ) pairOn = 0; // make pairOn and selfOn exclusive
00399 numGrids = selfOn ? 1 : 3;
00400 }
00401
00402 if ( numGrids != 1 || simParams->PMEPencils == 0 ) usePencils = 0;
00403 else if ( simParams->PMEPencils > 0 ) usePencils = 1;
00404 else {
00405 int dimx = simParams->PMEGridSizeX;
00406 int dimy = simParams->PMEGridSizeY;
00407 int maxslabs = 1 + (dimx - 1) / simParams->PMEMinSlices;
00408 if ( maxslabs > CkNumPes() ) maxslabs = CkNumPes();
00409 int maxpencils = ( simParams->PMEGridSizeX * simParams->PMEGridSizeY
00410 * simParams->PMEGridSizeZ ) / simParams->PMEMinPoints;
00411 if ( maxpencils > CkNumPes() ) maxpencils = CkNumPes();
00412 if ( maxpencils > 3 * maxslabs ) usePencils = 1;
00413 else usePencils = 0;
00414 }
00415
00416 if ( usePencils ) {
00417 if ( simParams->PMEPencils > 1 ) {
00418 xBlocks = yBlocks = zBlocks = simParams->PMEPencils;
00419 } else {
00420 int nb2 = ( simParams->PMEGridSizeX * simParams->PMEGridSizeY
00421 * simParams->PMEGridSizeZ ) / simParams->PMEMinPoints;
00422 if ( nb2 > CkNumPes() ) nb2 = CkNumPes();
00423 int nb = (int) sqrt((float)nb2);
00424 xBlocks = zBlocks = nb;
00425 yBlocks = nb2 / nb;
00426 }
00427
00428 int dimx = simParams->PMEGridSizeX;
00429 int bx = 1 + ( dimx - 1 ) / xBlocks;
00430 xBlocks = 1 + ( dimx - 1 ) / bx;
00431
00432 int dimy = simParams->PMEGridSizeY;
00433 int by = 1 + ( dimy - 1 ) / yBlocks;
00434 yBlocks = 1 + ( dimy - 1 ) / by;
00435
00436 int dimz = simParams->PMEGridSizeZ / 2 + 1; // complex
00437 int bz = 1 + ( dimz - 1 ) / zBlocks;
00438 zBlocks = 1 + ( dimz - 1 ) / bz;
00439
00440 if ( ! CkMyPe() ) {
00441 iout << iINFO << "PME using " << xBlocks << " x " <<
00442 yBlocks << " x " << zBlocks <<
00443 " pencil grid for FFT and reciprocal sum.\n" << endi;
00444 }
00445 } else { // usePencils
00446
00447 { // decide how many pes to use for reciprocal sum
00448
00449 // rules based on work available
00450 int minslices = simParams->PMEMinSlices;
00451 int dimx = simParams->PMEGridSizeX;
00452 int nrpx = ( dimx + minslices - 1 ) / minslices;
00453 int dimy = simParams->PMEGridSizeY;
00454 int nrpy = ( dimy + minslices - 1 ) / minslices;
00455
00456 // rules based on processors available
00457 int nrpp = CkNumPes();
00458 // if ( nrpp > 32 ) nrpp = 32; // cap to limit messages
00459 if ( nrpp < nrpx ) nrpx = nrpp;
00460 if ( nrpp < nrpy ) nrpy = nrpp;
00461
00462 // user override
00463 int nrps = simParams->PMEProcessors;
00464 if ( nrps > CkNumPes() ) nrps = CkNumPes();
00465 if ( nrps > 0 ) nrpx = nrps;
00466 if ( nrps > 0 ) nrpy = nrps;
00467
00468 // make sure there aren't any totally empty processors
00469 int bx = ( dimx + nrpx - 1 ) / nrpx;
00470 nrpx = ( dimx + bx - 1 ) / bx;
00471 int by = ( dimy + nrpy - 1 ) / nrpy;
00472 nrpy = ( dimy + by - 1 ) / by;
00473 if ( bx != ( dimx + nrpx - 1 ) / nrpx )
00474 NAMD_bug("Error in selecting number of PME processors.");
00475 if ( by != ( dimy + nrpy - 1 ) / nrpy )
00476 NAMD_bug("Error in selecting number of PME processors.");
00477
00478 numGridPes = nrpx;
00479 numTransPes = nrpy;
00480 }
00481 if ( ! CkMyPe() ) {
00482 iout << iINFO << "PME using " << numGridPes << " and " << numTransPes <<
00483 " processors for FFT and reciprocal sum.\n" << endi;
00484 }
00485 { // generate random orderings for grid and trans messages
00486 int i;
00487 for ( i = 0; i < numGridPes; ++i ) {
00488 gridPeOrder[i] = i;
00489 }
00490 for ( i = 0; i < numTransPes; ++i ) {
00491 transPeOrder[i] = i;
00492 }
00493 Random rand(CkMyPe());
00494 rand.reorder(gridPeOrder,numGridPes);
00495 rand.reorder(transPeOrder,numTransPes);
00496 }
00497
00498 int sum_npes = numTransPes + numGridPes;
00499 int max_npes = (numTransPes > numGridPes)?numTransPes:numGridPes;
00500
00501 #if USE_TOPOMAP
00502 PatchMap * pmap = PatchMap::Object();
00503
00504 int patch_pes = pmap->numNodesWithPatches();
00505 TopoManager tmgr;
00506 if(tmgr.hasMultipleProcsPerNode())
00507 patch_pes *= 2;
00508
00509 bool done = false;
00510 #ifndef USE_COMM_LIB
00511 if(CkNumPes() > 2*sum_npes + patch_pes) {
00512 done = generateBGLORBPmePeList(transPeMap, numTransPes);
00513 done &= generateBGLORBPmePeList(gridPeMap, numGridPes, transPeMap, numTransPes);
00514 }
00515 else
00516 #endif
00517 if(CkNumPes() > 2 *max_npes + patch_pes) {
00518 done = generateBGLORBPmePeList(transPeMap, max_npes);
00519 gridPeMap = transPeMap;
00520 }
00521
00522 if (!done)
00523 #endif
00524 {
00525 //generatePmePeList(transPeMap, max_npes);
00526 //gridPeMap = transPeMap;
00527 generatePmePeList2(gridPeMap, numGridPes, transPeMap, numTransPes);
00528 }
00529
00530 #ifdef USE_COMM_LIB
00531 if(CkMyPe() == 0) {
00532 ComlibInstanceHandle cinst1 = CkCreateComlibInstance();
00533 EachToManyMulticastStrategy *strat = new
00534 EachToManyMulticastStrategy(USE_DIRECT, numGridPes,
00535 gridPeMap, numTransPes, transPeMap);
00536 cinst1.setStrategy(strat);
00537
00538 ComlibInstanceHandle cinst2 = CkCreateComlibInstance();
00539 strat = new EachToManyMulticastStrategy(USE_DIRECT, numTransPes, transPeMap
00540 , numGridPes, gridPeMap);
00541 cinst2.setStrategy(strat);
00542 ComlibDoneCreating();
00543 }
00544 #endif
00545
00546 myGridPe = -1;
00547 int i = 0;
00548 for ( i=0; i<CkNumPes(); ++i )
00549 isPmeFlag[i] = 0;
00550 for ( i=0; i<numGridPes; ++i ) {
00551 if ( gridPeMap[i] == CkMyPe() ) myGridPe = i;
00552 isPmeFlag[gridPeMap[i]] |= 1;
00553 }
00554 myTransPe = -1;
00555 for ( i=0; i<numTransPes; ++i ) {
00556 if ( transPeMap[i] == CkMyPe() ) myTransPe = i;
00557 isPmeFlag[transPeMap[i]] |= 2;
00558 }
00559
00560 if ( ! CkMyPe() ) {
00561 iout << iINFO << "PME GRID LOCATIONS:";
00562 int i;
00563 for ( i=0; i<numGridPes && i<10; ++i ) {
00564 iout << " " << gridPeMap[i];
00565 }
00566 if ( i < numGridPes ) iout << " ...";
00567 iout << "\n" << endi;
00568 iout << iINFO << "PME TRANS LOCATIONS:";
00569 for ( i=0; i<numTransPes && i<10; ++i ) {
00570 iout << " " << transPeMap[i];
00571 }
00572 if ( i < numTransPes ) iout << " ...";
00573 iout << "\n" << endi;
00574 }
00575
00576 } // ! usePencils
00577
00578 myGrid.K1 = simParams->PMEGridSizeX;
00579 myGrid.K2 = simParams->PMEGridSizeY;
00580 myGrid.K3 = simParams->PMEGridSizeZ;
00581 myGrid.order = simParams->PMEInterpOrder;
00582 myGrid.dim2 = myGrid.K2;
00583 myGrid.dim3 = 2 * (myGrid.K3/2 + 1);
00584
00585 if ( ! usePencils ) {
00586 myGrid.block1 = ( myGrid.K1 + numGridPes - 1 ) / numGridPes;
00587 myGrid.block2 = ( myGrid.K2 + numTransPes - 1 ) / numTransPes;
00588 myGrid.block3 = myGrid.dim3 / 2; // complex
00589 }
00590
00591 if ( usePencils ) {
00592 myGrid.block1 = ( myGrid.K1 + xBlocks - 1 ) / xBlocks;
00593 myGrid.block2 = ( myGrid.K2 + yBlocks - 1 ) / yBlocks;
00594 myGrid.block3 = ( myGrid.K3/2 + 1 + zBlocks - 1 ) / zBlocks; // complex
00595
00596 if ( CkMyPe() == 0 ) {
00597 int basepe = 0; int npe = CkNumPes();
00598 if ( npe > xBlocks*yBlocks &&
00599 npe > xBlocks*zBlocks &&
00600 npe > yBlocks*zBlocks ) {
00601 // avoid node 0
00602 ++basepe;
00603 --npe;
00604 }
00605
00606 zPencil = CProxy_PmeZPencil::ckNew(); // (xBlocks,yBlocks,1);
00607 yPencil = CProxy_PmeYPencil::ckNew(); // (xBlocks,1,zBlocks);
00608 xPencil = CProxy_PmeXPencil::ckNew(); // (1,yBlocks,zBlocks);
00609
00610 // decide which pes to use by bit reversal and patch use
00611 int i;
00612 int ncpus = CkNumPes();
00613
00614 // find next highest power of two
00615 int npow2 = 1; int nbits = 0;
00616 while ( npow2 < ncpus ) { npow2 *= 2; nbits += 1; }
00617
00618 // build bit reversal sequence
00619 SortableResizeArray<int> patches, nopatches, pmeprocs;
00620 PatchMap *pmap = PatchMap::Object();
00621 i = 0;
00622 for ( int icpu=0; icpu<ncpus; ++icpu ) {
00623 int ri;
00624 for ( ri = ncpus; ri >= ncpus; ++i ) {
00625 ri = 0;
00626 int pow2 = 1;
00627 int rpow2 = npow2 / 2;
00628 for ( int j=0; j<nbits; ++j ) {
00629 ri += rpow2 * ( ( i / pow2 ) % 2 );
00630 pow2 *= 2; rpow2 /= 2;
00631 }
00632 }
00633 // seq[icpu] = ri;
00634 if ( ri ) { // keep 0 for special case
00635 if ( pmap->numPatchesOnNode(ri) ) patches.add(ri);
00636 else nopatches.add(ri);
00637 }
00638 }
00639
00640 // only use zero if it eliminates overloading or has patches
00641 int useZero = 0;
00642 int npens = xBlocks*yBlocks;
00643 if ( npens % ncpus == 0 ) useZero = 1;
00644 if ( npens == nopatches.size() + 1 ) useZero = 1;
00645 npens += xBlocks*zBlocks;
00646 if ( npens % ncpus == 0 ) useZero = 1;
00647 if ( npens == nopatches.size() + 1 ) useZero = 1;
00648 npens += yBlocks*zBlocks;
00649 if ( npens % ncpus == 0 ) useZero = 1;
00650 if ( npens == nopatches.size() + 1 ) useZero = 1;
00651
00652 // add nopatches then patches in reversed order
00653 for ( i=nopatches.size()-1; i>=0; --i ) pmeprocs.add(nopatches[i]);
00654 if ( useZero && ! pmap->numPatchesOnNode(0) ) pmeprocs.add(0);
00655 for ( i=patches.size()-1; i>=0; --i ) pmeprocs.add(patches[i]);
00656 if ( pmap->numPatchesOnNode(0) ) pmeprocs.add(0);
00657
00658 int pe = 0;
00659 int npes = pmeprocs.size();
00660 SortableResizeArray<int> zprocs(xBlocks*yBlocks);
00661 for ( i=0; i<xBlocks*yBlocks; ++i, ++pe ) zprocs[i] = pmeprocs[pe%npes];
00662 zprocs.sort();
00663 SortableResizeArray<int> yprocs(xBlocks*zBlocks);
00664 for ( i=0; i<xBlocks*zBlocks; ++i, ++pe ) yprocs[i] = pmeprocs[pe%npes];
00665 yprocs.sort();
00666 SortableResizeArray<int> xprocs(yBlocks*zBlocks);
00667 for ( i=0; i<yBlocks*zBlocks; ++i, ++pe ) xprocs[i] = pmeprocs[pe%npes];
00668 xprocs.sort();
00669
00670 pencilPMEProcessors = new char [CkNumPes()];
00671 memset (pencilPMEProcessors, 0, sizeof(char) * CkNumPes());
00672
00673 int x,y,z;
00674
00675 iout << iINFO << "PME Z PENCIL LOCATIONS:";
00676 for ( i=0; i<zprocs.size() && i<10; ++i ) {
00677 iout << " " << zprocs[i];
00678 }
00679 if ( i < zprocs.size() ) iout << " ...";
00680 iout << "\n" << endi;
00681
00682 for (pe=0, x = 0; x < xBlocks; ++x)
00683 for (y = 0; y < yBlocks; ++y, ++pe ) {
00684 zPencil(x,y,0).insert(zprocs[pe]);
00685 pencilPMEProcessors[zprocs[pe]] = 1;
00686 }
00687 zPencil.doneInserting();
00688
00689 iout << iINFO << "PME Y PENCIL LOCATIONS:";
00690 for ( i=0; i<yprocs.size() && i<10; ++i ) {
00691 iout << " " << yprocs[i];
00692 }
00693 if ( i < yprocs.size() ) iout << " ...";
00694 iout << "\n" << endi;
00695
00696 for (pe=0, z = 0; z < zBlocks; ++z )
00697 for (x = 0; x < xBlocks; ++x, ++pe ) {
00698 yPencil(x,0,z).insert(yprocs[pe]);
00699 pencilPMEProcessors[yprocs[pe]] = 1;
00700 }
00701 yPencil.doneInserting();
00702
00703 iout << iINFO << "PME X PENCIL LOCATIONS:";
00704 for ( i=0; i<xprocs.size() && i<10; ++i ) {
00705 iout << " " << xprocs[i];
00706 }
00707 if ( i < xprocs.size() ) iout << " ...";
00708 iout << "\n" << endi;
00709
00710 for (pe=0, y = 0; y < yBlocks; ++y )
00711 for (z = 0; z < zBlocks; ++z, ++pe ) {
00712 xPencil(0,y,z).insert(xprocs[pe]);
00713 pencilPMEProcessors[xprocs[pe]] = 1;
00714 }
00715 xPencil.doneInserting();
00716
00717 pmeProxy.recvArrays(xPencil,yPencil,zPencil);
00718 PmePencilInitMsgData msgdata;
00719 msgdata.grid = myGrid;
00720 msgdata.xBlocks = xBlocks;
00721 msgdata.yBlocks = yBlocks;
00722 msgdata.zBlocks = zBlocks;
00723 msgdata.xPencil = xPencil;
00724 msgdata.yPencil = yPencil;
00725 msgdata.zPencil = zPencil;
00726 msgdata.pmeProxy = pmeProxyDir;
00727 xPencil.init(new PmePencilInitMsg(msgdata));
00728 yPencil.init(new PmePencilInitMsg(msgdata));
00729 zPencil.init(new PmePencilInitMsg(msgdata));
00730 }
00731 return; // continue in initialize_pencils() at next startup stage
00732 }
00733
00734
00735 int pe;
00736 int nx = 0;
00737 for ( pe = 0; pe < numGridPes; ++pe ) {
00738 localInfo[pe].x_start = nx;
00739 nx += myGrid.block1;
00740 if ( nx > myGrid.K1 ) nx = myGrid.K1;
00741 localInfo[pe].nx = nx - localInfo[pe].x_start;
00742 }
00743 int ny = 0;
00744 for ( pe = 0; pe < numTransPes; ++pe ) {
00745 localInfo[pe].y_start_after_transpose = ny;
00746 ny += myGrid.block2;
00747 if ( ny > myGrid.K2 ) ny = myGrid.K2;
00748 localInfo[pe].ny_after_transpose =
00749 ny - localInfo[pe].y_start_after_transpose;
00750 }
00751
00752 { // decide how many pes this node exchanges charges with
00753
00754 PatchMap *patchMap = PatchMap::Object();
00755 Lattice lattice = simParams->lattice;
00756 BigReal sysdima = lattice.a_r().unit() * lattice.a();
00757 BigReal cutoff = simParams->cutoff;
00758 BigReal patchdim = simParams->patchDimension;
00759 int numPatches = patchMap->numPatches();
00760 int numNodes = CkNumPes();
00761 int *source_flags = new int[numNodes];
00762 int node;
00763 for ( node=0; node<numNodes; ++node ) {
00764 source_flags[node] = 0;
00765 recipPeDest[node] = 0;
00766 }
00767
00768 // // make sure that we don't get ahead of ourselves on this node
00769 // if ( CkMyPe() < numPatches && myRecipPe >= 0 ) {
00770 // source_flags[CkMyPe()] = 1;
00771 // recipPeDest[myRecipPe] = 1;
00772 // }
00773
00774 for ( int pid=0; pid < numPatches; ++pid ) {
00775 int pnode = patchMap->node(pid);
00776 BigReal minx = patchMap->min_a(pid);
00777 BigReal maxx = patchMap->max_a(pid);
00778 BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
00779 // min1 (max1) is smallest (largest) grid line for this patch
00780 int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
00781 int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
00782 for ( int i=min1; i<=max1; ++i ) {
00783 int ix = i;
00784 while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
00785 while ( ix < 0 ) ix += myGrid.K1;
00786 // set source_flags[pnode] if this patch sends to our node
00787 if ( myGridPe >= 0 && ix >= localInfo[myGridPe].x_start &&
00788 ix < localInfo[myGridPe].x_start + localInfo[myGridPe].nx ) {
00789 source_flags[pnode] = 1;
00790 }
00791 // set dest_flags[] for node that our patch sends to
00792 if ( pnode == CkMyPe() ) {
00793 recipPeDest[ix / myGrid.block1] = 1;
00794 }
00795 }
00796 }
00797
00798 numSources = 0;
00799 numDestRecipPes = 0;
00800 for ( node=0; node<numNodes; ++node ) {
00801 if ( source_flags[node] ) ++numSources;
00802 if ( recipPeDest[node] ) ++numDestRecipPes;
00803 }
00804
00805 #if 0
00806 if ( numSources ) {
00807 iout << iINFO << "PME " << CkMyPe() << " sources:";
00808 for ( node=0; node<numNodes; ++node ) {
00809 if ( source_flags[node] ) iout << " " << node;
00810 }
00811 iout << "\n" << endi;
00812 }
00813 #endif
00814
00815 delete [] source_flags;
00816
00817 // CkPrintf("PME on node %d has %d sources and %d destinations\n",
00818 // CkMyPe(), numSources, numDestRecipPes);
00819
00820 } // decide how many pes this node exchanges charges with (end)
00821
00822 ungrid_count = numDestRecipPes;
00823
00824 sendTransBarrier_received = 0;
00825
00826 if ( myGridPe < 0 && myTransPe < 0 ) return;
00827 // the following only for nodes doing reciprocal sum
00828
00829 if ( myTransPe >= 0 ) {
00830 int k2_start = localInfo[myTransPe].y_start_after_transpose;
00831 int k2_end = k2_start + localInfo[myTransPe].ny_after_transpose;
00832 myKSpace = new PmeKSpace(myGrid, k2_start, k2_end, 0, myGrid.dim3/2);
00833 }
00834
00835 int local_size = myGrid.block1 * myGrid.K2 * myGrid.dim3;
00836 int local_size_2 = myGrid.block2 * myGrid.K1 * myGrid.dim3;
00837 if ( local_size < local_size_2 ) local_size = local_size_2;
00838 qgrid = new float[local_size*numGrids];
00839 if ( numGridPes > 1 || numTransPes > 1 ) {
00840 kgrid = new float[local_size*numGrids];
00841 } else {
00842 kgrid = qgrid;
00843 }
00844 qgrid_size = local_size;
00845
00846 if ( myGridPe >= 0 ) {
00847 qgrid_start = localInfo[myGridPe].x_start * myGrid.K2 * myGrid.dim3;
00848 qgrid_len = localInfo[myGridPe].nx * myGrid.K2 * myGrid.dim3;
00849 fgrid_start = localInfo[myGridPe].x_start * myGrid.K2;
00850 fgrid_len = localInfo[myGridPe].nx * myGrid.K2;
00851 }
00852
00853 int n[3]; n[0] = myGrid.K1; n[1] = myGrid.K2; n[2] = myGrid.K3;
00854
00855 #ifdef NAMD_FFTW
00856 CmiLock(fftw_plan_lock);
00857
00858 work = new fftw_complex[n[0]];
00859
00860 if ( ! CkMyPe() ) iout << iINFO << "Optimizing 4 FFT steps. 1..." << endi;
00861 if ( myGridPe >= 0 ) {
00862 forward_plan_yz = rfftwnd_create_plan_specific(2, n+1, FFTW_REAL_TO_COMPLEX,
00863 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00864 | FFTW_IN_PLACE | FFTW_USE_WISDOM, qgrid, 1, 0, 0);
00865 }
00866 if ( ! CkMyPe() ) iout << " 2..." << endi;
00867 if ( myTransPe >= 0 ) {
00868 forward_plan_x = fftw_create_plan_specific(n[0], FFTW_REAL_TO_COMPLEX,
00869 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00870 | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) kgrid,
00871 localInfo[myTransPe].ny_after_transpose * myGrid.dim3 / 2, work, 1);
00872 }
00873 if ( ! CkMyPe() ) iout << " 3..." << endi;
00874 if ( myTransPe >= 0 ) {
00875 backward_plan_x = fftw_create_plan_specific(n[0], FFTW_COMPLEX_TO_REAL,
00876 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00877 | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) kgrid,
00878 localInfo[myTransPe].ny_after_transpose * myGrid.dim3 / 2, work, 1);
00879 }
00880 if ( ! CkMyPe() ) iout << " 4..." << endi;
00881 if ( myGridPe >= 0 ) {
00882 backward_plan_yz = rfftwnd_create_plan_specific(2, n+1, FFTW_COMPLEX_TO_REAL,
00883 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
00884 | FFTW_IN_PLACE | FFTW_USE_WISDOM, qgrid, 1, 0, 0);
00885 }
00886 if ( ! CkMyPe() ) iout << " Done.\n" << endi;
00887
00888 CmiUnlock(fftw_plan_lock);
00889 #else
00890 NAMD_die("Sorry, FFTW must be compiled in to use PME.");
00891 #endif
00892
00893 if ( myGridPe >= 0 && numSources == 0 )
00894 NAMD_bug("PME grid elements exist without sources.");
00895 grid_count = numSources;
00896 memset( (void*) qgrid, 0, qgrid_size * numGrids * sizeof(float) );
00897 trans_count = numGridPes;
00898 }
|
|
|
Definition at line 901 of file ComputePme.C. References Lattice::a(), Lattice::a_r(), Lattice::b(), Lattice::b_r(), BigReal, PmeGrid::block1, PmeGrid::block2, SimParameters::cutoff, PmeGrid::K1, PmeGrid::K2, SimParameters::lattice, PatchMap::max_a(), PatchMap::max_b(), PatchMap::min_a(), PatchMap::min_b(), PatchMap::node(), PatchMap::numPatches(), PatchMap::Object(), Node::Object(), PmeGrid::order, SimParameters::patchDimension, Node::simParameters, simParams, and Vector::unit(). 00901 {
00902 delete msg;
00903 if ( ! usePencils ) return;
00904
00905 SimParameters *simParams = Node::Object()->simParameters;
00906
00907 PatchMap *patchMap = PatchMap::Object();
00908 Lattice lattice = simParams->lattice;
00909 BigReal sysdima = lattice.a_r().unit() * lattice.a();
00910 BigReal sysdimb = lattice.b_r().unit() * lattice.b();
00911 BigReal cutoff = simParams->cutoff;
00912 BigReal patchdim = simParams->patchDimension;
00913 int numPatches = patchMap->numPatches();
00914
00915 pencilActive = new char[xBlocks*yBlocks];
00916 for ( int i=0; i<xBlocks; ++i ) {
00917 for ( int j=0; j<yBlocks; ++j ) {
00918 pencilActive[i*yBlocks+j] = 0;
00919 }
00920 }
00921
00922 for ( int pid=0; pid < numPatches; ++pid ) {
00923 int pnode = patchMap->node(pid);
00924 if ( pnode != CkMyPe() ) continue;
00925
00926 BigReal minx = patchMap->min_a(pid);
00927 BigReal maxx = patchMap->max_a(pid);
00928 BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
00929 // min1 (max1) is smallest (largest) grid line for this patch
00930 int min1 = ((int) floor(myGrid.K1 * (minx - margina))) - myGrid.order + 1;
00931 int max1 = ((int) floor(myGrid.K1 * (maxx + margina)));
00932
00933 BigReal miny = patchMap->min_b(pid);
00934 BigReal maxy = patchMap->max_b(pid);
00935 BigReal marginb = 0.5 * ( patchdim - cutoff ) / sysdimb;
00936 // min2 (max2) is smallest (largest) grid line for this patch
00937 int min2 = ((int) floor(myGrid.K2 * (miny - marginb))) - myGrid.order + 1;
00938 int max2 = ((int) floor(myGrid.K2 * (maxy + marginb)));
00939
00940 for ( int i=min1; i<=max1; ++i ) {
00941 int ix = i;
00942 while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
00943 while ( ix < 0 ) ix += myGrid.K1;
00944 for ( int j=min2; j<=max2; ++j ) {
00945 int jy = j;
00946 while ( jy >= myGrid.K2 ) jy -= myGrid.K2;
00947 while ( jy < 0 ) jy += myGrid.K2;
00948 pencilActive[(ix / myGrid.block1)*yBlocks + (jy / myGrid.block2)] = 1;
00949 }
00950 }
00951 }
00952
00953 numPencilsActive = 0;
00954 for ( int i=0; i<xBlocks; ++i ) {
00955 for ( int j=0; j<yBlocks; ++j ) {
00956 if ( pencilActive[i*yBlocks+j] ) {
00957 ++numPencilsActive;
00958 zPencil(i,j,0).dummyRecvGrid(CkMyPe(),0);
00959 }
00960 }
00961 }
00962 //if ( numPencilsActive ) {
00963 // CkPrintf("node %d sending to %d pencils\n", CkMyPe(), numPencilsActive);
00964 //}
00965
00966 ungrid_count = numPencilsActive;
00967 }
|
|
|
Definition at line 326 of file ComputePme.C. References pencilPMEProcessors. 00326 {
00327 return ( usePencils ? pencilPMEProcessors[p] : isPmeFlag[p] );
00328 }
|
|
||||||||||||||||
|
Definition at line 366 of file ComputePme.C. 00367 {
00368 xPencil = x; yPencil = y; zPencil = z;
00369 }
|
|
|
Definition at line 1002 of file ComputePme.C. References PmeGrid::dim3, PmeGridMsg::fgrid, PmeGridMsg::lattice, NAMD_bug(), PmeGridMsg::qgrid, PmeGridMsg::sequence, PmeGridMsg::zlist, and PmeGridMsg::zlistlen. Referenced by ComputePme::sendPencils(). 01002 {
01003 // CkPrintf("recvGrid from %d on Pe(%d)\n",msg->sourceNode,CkMyPe());
01004 if ( grid_count == 0 ) {
01005 NAMD_bug("Message order failure in ComputePmeMgr::recvGrid\n");
01006 }
01007 if ( grid_count == numSources ) {
01008 lattice = msg->lattice;
01009 sequence = msg->sequence;
01010 }
01011
01012 int zdim = myGrid.dim3;
01013 int zlistlen = msg->zlistlen;
01014 int *zlist = msg->zlist;
01015 float *qmsg = msg->qgrid;
01016 for ( int g=0; g<numGrids; ++g ) {
01017 char *f = msg->fgrid + fgrid_len * g;
01018 float *q = qgrid + qgrid_size * g;
01019 for ( int i=0; i<fgrid_len; ++i ) {
01020 if ( f[i] ) {
01021 for ( int k=0; k<zlistlen; ++k ) {
01022 q[zlist[k]] += *(qmsg++);
01023 }
01024 }
01025 q += zdim;
01026 }
01027 }
01028
01029 gridmsg_reuse[numSources-grid_count] = msg;
01030 --grid_count;
01031
01032 if ( grid_count == 0 ) {
01033 #if CHARM_VERSION > 050402
01034 pmeProxyDir[CkMyPe()].gridCalc1();
01035 if ( useBarrier ) pmeProxyDir[0].sendTransBarrier();
01036 #else
01037 pmeProxyDir.gridCalc1(CkMyPe());
01038 if ( useBarrier ) pmeProxyDir.sendTransBarrier(0);
01039 #endif
01040 }
01041 }
|
|
|
Definition at line 1127 of file ComputePme.C. References CmiMemcpy, PmeGrid::dim3, PmeTransMsg::lattice, PmeTransMsg::nx, LocalPmeInfo::ny_after_transpose, PmeTransMsg::qgrid, PmeTransMsg::sequence, and PmeTransMsg::x_start. 01127 {
01128 // CkPrintf("recvTrans on Pe(%d)\n",CkMyPe());
01129 if ( trans_count == numGridPes ) {
01130 lattice = msg->lattice;
01131 sequence = msg->sequence;
01132 }
01133
01134 int zdim = myGrid.dim3;
01135 // int y_start = localInfo[myTransPe].y_start_after_transpose;
01136 int ny = localInfo[myTransPe].ny_after_transpose;
01137 int x_start = msg->x_start;
01138 int nx = msg->nx;
01139 for ( int g=0; g<numGrids; ++g ) {
01140 CmiMemcpy((void*)(kgrid + qgrid_size * g + x_start*ny*zdim),
01141 (void*)(msg->qgrid + nx*ny*zdim*g), nx*ny*zdim*sizeof(float));
01142 }
01143
01144 delete msg;
01145 --trans_count;
01146
01147 if ( trans_count == 0 ) {
01148 #if CHARM_VERSION > 050402
01149 pmeProxyDir[CkMyPe()].gridCalc2();
01150 #else
01151 pmeProxyDir.gridCalc2(CkMyPe());
01152 #endif
01153 }
01154 }
|
|
|
Definition at line 1356 of file ComputePme.C. References ComputePme::copyPencils(), ComputePme::copyResults(), and NAMD_bug(). 01356 {
01357 // CkPrintf("recvUngrid on Pe(%d)\n",CkMyPe());
01358 if ( ungrid_count == 0 ) {
01359 NAMD_bug("Message order failure in ComputePmeMgr::recvUngrid\n");
01360 }
01361
01362 if ( usePencils ) pmeCompute->copyPencils(msg);
01363 else pmeCompute->copyResults(msg);
01364 delete msg;
01365 --ungrid_count;
01366
01367 if ( ungrid_count == 0 ) {
01368 #if CHARM_VERSION > 050402
01369 pmeProxyDir[CkMyPe()].ungridCalc();
01370 #else
01371 pmeProxyDir.ungridCalc(CkMyPe());
01372 #endif
01373 }
01374 }
|
|
|
Definition at line 1245 of file ComputePme.C. References CmiMemcpy, PmeGrid::dim3, PmeUntransMsg::evir, PmeGrid::K2, LocalPmeInfo::nx, PmeUntransMsg::ny, PmeUntransMsg::qgrid, and PmeUntransMsg::y_start. 01245 {
01246 // CkPrintf("recvUntrans on Pe(%d)\n",CkMyPe());
01247 if ( untrans_count == numTransPes ) {
01248 for ( int g=0; g<numGrids; ++g ) {
01249 recip_evir[g] = 0.;
01250 }
01251 }
01252
01253 #if CMK_VERSION_BLUEGENE
01254 CmiNetworkProgressAfter (0);
01255 #endif
01256
01257 int g;
01258 for ( g=0; g<numGrids; ++g ) {
01259 recip_evir[g] += msg->evir[g];
01260 }
01261
01262 int zdim = myGrid.dim3;
01263 // int x_start = localInfo[myGridPe].x_start;
01264 int nx = localInfo[myGridPe].nx;
01265 int y_start = msg->y_start;
01266 int ny = msg->ny;
01267 int slicelen = myGrid.K2 * zdim;
01268 int cpylen = ny * zdim;
01269 for ( g=0; g<numGrids; ++g ) {
01270 float *q = qgrid + qgrid_size * g + y_start * zdim;
01271 float *qmsg = msg->qgrid + nx * cpylen * g;
01272 for ( int x = 0; x < nx; ++x ) {
01273 CmiMemcpy((void*)q, (void*)qmsg, cpylen*sizeof(float));
01274 q += slicelen;
01275 qmsg += cpylen;
01276 }
01277 }
01278
01279 delete msg;
01280 --untrans_count;
01281
01282 if ( untrans_count == 0 ) {
01283 #if CHARM_VERSION > 050402
01284 pmeProxyDir[CkMyPe()].gridCalc3();
01285 #else
01286 pmeProxyDir.gridCalc3(CkMyPe());
01287 #endif
01288 }
01289 }
|
|
|
Definition at line 998 of file ComputePme.C. References ComputePme::sendData(). 00998 {
00999 pmeCompute->sendData(numGridPes,gridPeOrder,recipPeDest,gridPeMap);
01000 }
|
|
|
Definition at line 1074 of file ComputePme.C. References CmiMemcpy, PmeGrid::dim3, PmeGrid::K2, PmeTransMsg::lattice, PmeTransMsg::nx, LocalPmeInfo::nx, LocalPmeInfo::ny_after_transpose, PME_TRANS_PRIORITY, PmeTransMsg::qgrid, PmeTransMsg::sequence, SET_PRIORITY, PmeTransMsg::sourceNode, PmeTransMsg::x_start, LocalPmeInfo::x_start, and LocalPmeInfo::y_start_after_transpose. 01074 {
01075 // CkPrintf("sendTrans on %d\n",myTransPe);
01076
01077 // send data for transpose
01078 int zdim = myGrid.dim3;
01079 int nx = localInfo[myGridPe].nx;
01080 int x_start = localInfo[myGridPe].x_start;
01081 int slicelen = myGrid.K2 * zdim;
01082
01083 #ifdef USE_COMM_LIB
01084 ComlibInstanceHandle cinst1 = CkGetComlibInstance(0);
01085 cinst1.beginIteration();
01086 #endif
01087
01088 #if CMK_VERSION_BLUEGENE
01089 CmiNetworkProgressAfter (0);
01090 #endif
01091
01092 for (int j=0; j<numTransPes; j++) {
01093 int pe = transPeOrder[j]; // different order on each node
01094 LocalPmeInfo &li = localInfo[pe];
01095 int cpylen = li.ny_after_transpose * zdim;
01096 PmeTransMsg *newmsg = new (nx * cpylen * numGrids,
01097 PRIORITY_SIZE) PmeTransMsg;
01098 newmsg->sourceNode = myGridPe;
01099 newmsg->lattice = lattice;
01100 newmsg->x_start = x_start;
01101 newmsg->nx = nx;
01102 for ( int g=0; g<numGrids; ++g ) {
01103 float *q = qgrid + qgrid_size * g + li.y_start_after_transpose * zdim;
01104 float *qmsg = newmsg->qgrid + nx * cpylen * g;
01105 for ( int x = 0; x < nx; ++x ) {
01106 CmiMemcpy((void*)qmsg, (void*)q, cpylen*sizeof(float));
01107 q += slicelen;
01108 qmsg += cpylen;
01109 }
01110 }
01111 newmsg->sequence = sequence;
01112 SET_PRIORITY(newmsg,sequence,PME_TRANS_PRIORITY)
01113 #if CHARM_VERSION > 050402
01114 pmeProxy[transPeMap[pe]].recvTrans(newmsg);
01115 #else
01116 pmeProxy.recvTrans(newmsg,transPeMap[pe]);
01117 #endif
01118 }
01119
01120 untrans_count = numTransPes;
01121
01122 #ifdef USE_COMM_LIB
01123 cinst1.endIteration();
01124 #endif
01125 }
|
|
|
Definition at line 1060 of file ComputePme.C. 01060 {
01061 sendTransBarrier_received += 1;
01062 // CkPrintf("sendTransBarrier on %d %d\n",myGridPe,numGridPes-sendTransBarrier_received);
01063 if ( sendTransBarrier_received < numGridPes ) return;
01064 sendTransBarrier_received = 0;
01065 for ( int i=0; i<numGridPes; ++i ) {
01066 #if CHARM_VERSION > 050402
01067 pmeProxyDir[gridPeMap[i]].sendTrans();
01068 #else
01069 pmeProxyDir.sendTrans(gridPeMap[i]);
01070 #endif
01071 }
01072 }
|
|
|
Definition at line 1310 of file ComputePme.C. References PmeGrid::dim3, PmeGridMsg::evir, PmeGridMsg::fgrid, PmeGridMsg::len, PME_UNGRID_PRIORITY, PmeGridMsg::qgrid, SET_PRIORITY, PmeGridMsg::sourceNode, PmeGridMsg::start, PmeGridMsg::zlist, and PmeGridMsg::zlistlen. 01310 {
01311
01312 for ( int j=0; j<numSources; ++j ) {
01313 // int msglen = qgrid_len;
01314 PmeGridMsg *newmsg = gridmsg_reuse[j];
01315 int pe = newmsg->sourceNode;
01316 if ( j == 0 ) { // only need these once
01317 for ( int g=0; g<numGrids; ++g ) {
01318 newmsg->evir[g] = recip_evir[g];
01319 }
01320 } else {
01321 for ( int g=0; g<numGrids; ++g ) {
01322 newmsg->evir[g] = 0.;
01323 }
01324 }
01325 int zdim = myGrid.dim3;
01326 int flen = newmsg->len;
01327 int fstart = newmsg->start;
01328 int zlistlen = newmsg->zlistlen;
01329 int *zlist = newmsg->zlist;
01330 float *qmsg = newmsg->qgrid;
01331 for ( int g=0; g<numGrids; ++g ) {
01332 char *f = newmsg->fgrid + fgrid_len * g;
01333 float *q = qgrid + qgrid_size * g + (fstart-fgrid_start) * zdim;
01334 for ( int i=0; i<flen; ++i ) {
01335 if ( f[i] ) {
01336 for ( int k=0; k<zlistlen; ++k ) {
01337 *(qmsg++) = q[zlist[k]];
01338 }
01339 }
01340 q += zdim;
01341 }
01342 }
01343 newmsg->sourceNode = myGridPe;
01344
01345 SET_PRIORITY(newmsg,sequence,PME_UNGRID_PRIORITY)
01346 #if CHARM_VERSION > 050402
01347 pmeProxyDir[pe].recvUngrid(newmsg);
01348 #else
01349 pmeProxyDir.recvUngrid(newmsg,pe);
01350 #endif
01351 }
01352 grid_count = numSources;
01353 memset( (void*) qgrid, 0, qgrid_size * numGrids * sizeof(float) );
01354 }
|
|
|
Definition at line 1194 of file ComputePme.C. References CmiMemcpy, PmeGrid::dim3, PmeUntransMsg::evir, LocalPmeInfo::nx, PmeUntransMsg::ny, LocalPmeInfo::ny_after_transpose, PME_UNTRANS_PRIORITY, PmeUntransMsg::qgrid, SET_PRIORITY, PmeUntransMsg::sourceNode, LocalPmeInfo::x_start, PmeUntransMsg::y_start, and LocalPmeInfo::y_start_after_transpose. 01194 {
01195
01196 int zdim = myGrid.dim3;
01197 int y_start = localInfo[myTransPe].y_start_after_transpose;
01198 int ny = localInfo[myTransPe].ny_after_transpose;
01199
01200 #ifdef USE_COMM_LIB
01201 ComlibInstanceHandle cinst2 = CkGetComlibInstance(1);
01202 cinst2.beginIteration();
01203 #endif
01204
01205 #if CMK_VERSION_BLUEGENE
01206 CmiNetworkProgressAfter (0);
01207 #endif
01208
01209 // send data for reverse transpose
01210 for (int j=0; j<numGridPes; j++) {
01211 int pe = gridPeOrder[j]; // different order on each node
01212 LocalPmeInfo &li = localInfo[pe];
01213 int x_start =li.x_start;
01214 int nx = li.nx;
01215 PmeUntransMsg *newmsg = new (nx*ny*zdim*numGrids,numGrids,
01216 PRIORITY_SIZE) PmeUntransMsg;
01217 newmsg->sourceNode = myTransPe;
01218 newmsg->y_start = y_start;
01219 newmsg->ny = ny;
01220 for ( int g=0; g<numGrids; ++g ) {
01221 if ( j == 0 ) { // only need these once
01222 newmsg->evir[g] = recip_evir2[g];
01223 } else {
01224 newmsg->evir[g] = 0.;
01225 }
01226 CmiMemcpy((void*)(newmsg->qgrid+nx*ny*zdim*g),
01227 (void*)(kgrid + qgrid_size*g + x_start*ny*zdim),
01228 nx*ny*zdim*sizeof(float));
01229 }
01230 SET_PRIORITY(newmsg,sequence,PME_UNTRANS_PRIORITY)
01231 #if CHARM_VERSION > 050402
01232 pmeProxy[gridPeMap[pe]].recvUntrans(newmsg);
01233 #else
01234 pmeProxy.recvUntrans(newmsg,gridPeMap[pe]);
01235 #endif
01236 }
01237
01238 #ifdef USE_COMM_LIB
01239 cinst2.endIteration();
01240 #endif
01241
01242 trans_count = numGridPes;
01243 }
|
|
|
Definition at line 244 of file ComputePme.C. References ComputePme::setMgr(). 00244 { pmeCompute = c; c->setMgr(this); }
|
|
|
Definition at line 1376 of file ComputePme.C. References ComputePme::ungridForces(). Referenced by ComputePme::doWork(). 01376 {
01377 // CkPrintf("ungridCalc on Pe(%d)\n",CkMyPe());
01378
01379 pmeCompute->ungridForces();
01380
01381 ungrid_count = (usePencils ? numPencilsActive : numDestRecipPes );
01382 }
|
|
|
Definition at line 221 of file ComputePme.C. |
1.3.9.1