Inheritance diagram for ComputePmeMgr:

|
|
Definition at line 524 of file ComputePme.C. References fftw_plan_lock. 00524 : pmeProxy(thisgroup),
00525 pmeProxyDir(thisgroup) {
00526
00527 CkpvAccess(BOCclass_group).computePmeMgr = thisgroup;
00528 pmeNodeProxy = CkpvAccess(BOCclass_group).nodePmeMgr;
00529
00530 pmeNodeProxy.ckLocalBranch()->initialize();
00531
00532 if ( CmiMyRank() == 0 ) {
00533 fftw_plan_lock = CmiCreateLock();
00534 }
00535
00536 myKSpace = 0;
00537 kgrid = 0;
00538 work = 0;
00539 grid_count = 0;
00540 trans_count = 0;
00541 untrans_count = 0;
00542 ungrid_count = 0;
00543 gridmsg_reuse= new PmeGridMsg*[CkNumPes()];
00544 useBarrier = 0;
00545 sendTransBarrier_received = 0;
00546 usePencils = 0;
00547 }
|
|
|
Definition at line 1532 of file ComputePme.C. References fftw_plan_lock. 01532 {
01533
01534 if ( CmiMyRank() == 0 ) {
01535 CmiDestroyLock(fftw_plan_lock);
01536 }
01537
01538 delete myKSpace;
01539 delete [] localInfo;
01540 delete [] gridNodeInfo;
01541 delete [] transNodeInfo;
01542 delete [] gridPeMap;
01543 delete [] transPeMap;
01544 delete [] recipPeDest;
01545 delete [] gridPeOrder;
01546 delete [] gridNodeOrder;
01547 delete [] transNodeOrder;
01548 delete [] isPmeFlag;
01549 delete [] qgrid;
01550 if ( kgrid != qgrid ) delete [] kgrid;
01551 delete [] work;
01552 delete [] gridmsg_reuse;
01553
01554 for (int i=0; i<fsize*numGrids; ++i) {
01555 if ( q_arr[i] ) {
01556 delete [] q_arr[i];
01557 }
01558 }
01559 delete [] q_arr;
01560 delete [] q_list;
01561 delete [] f_arr;
01562 delete [] fz_arr;
01563 }
|
|
|
Definition at line 1526 of file ComputePme.C. 01526 {
01527 if ( ! usePencils ) return;
01528 if ( CkMyPe() == 0 ) zPencil.dummyRecvGrid(CkMyPe(),1);
01529 }
|
|
|
Definition at line 2724 of file ComputePme.C. References PmeGrid::block1, PmeGrid::block2, PmeGrid::dim2, PmeGrid::dim3, PmeGridMsg::evir, j, PmeGrid::K1, PmeGrid::K2, PmeGridMsg::qgrid, PmeGridMsg::sourceNode, PmeGridMsg::zlist, and PmeGridMsg::zlistlen. Referenced by recvUngrid(). 02724 {
02725
02726 int K1 = myGrid.K1;
02727 int K2 = myGrid.K2;
02728 int dim2 = myGrid.dim2;
02729 int dim3 = myGrid.dim3;
02730 int block1 = myGrid.block1;
02731 int block2 = myGrid.block2;
02732
02733 // msg->sourceNode = thisIndex.x * initdata.yBlocks + thisIndex.y;
02734 int ib = msg->sourceNode / yBlocks;
02735 int jb = msg->sourceNode % yBlocks;
02736
02737 int ibegin = ib*block1;
02738 int iend = ibegin + block1; if ( iend > K1 ) iend = K1;
02739 int jbegin = jb*block2;
02740 int jend = jbegin + block2; if ( jend > K2 ) jend = K2;
02741
02742 int zlistlen = msg->zlistlen;
02743 int *zlist = msg->zlist;
02744 float *qmsg = msg->qgrid;
02745 int g;
02746 for ( g=0; g<numGrids; ++g ) {
02747 evir[g] += msg->evir[g];
02748 char *f = f_arr + g*fsize;
02749 float **q = q_arr + g*fsize;
02750 for ( int i=ibegin; i<iend; ++i ) {
02751 for ( int j=jbegin; j<jend; ++j ) {
02752 if( f[i*dim2+j] ) {
02753 f[i*dim2+j] = 0;
02754 for ( int k=0; k<zlistlen; ++k ) {
02755 q[i*dim2+j][zlist[k]] = *(qmsg++);
02756 }
02757 }
02758 }
02759 }
02760 }
02761 }
|
|
|
Definition at line 2861 of file ComputePme.C. References PmeGrid::dim3, PmeGridMsg::evir, PmeGridMsg::fgrid, PmeGridMsg::len, PmeGridMsg::qgrid, PmeGridMsg::start, PmeGridMsg::zlist, and PmeGridMsg::zlistlen. Referenced by recvUngrid(). 02861 {
02862
02863 int zdim = myGrid.dim3;
02864 int flen = msg->len;
02865 int fstart = msg->start;
02866 int zlistlen = msg->zlistlen;
02867 int *zlist = msg->zlist;
02868 float *qmsg = msg->qgrid;
02869 int g;
02870 for ( g=0; g<numGrids; ++g ) {
02871 evir[g] += msg->evir[g];
02872 char *f = msg->fgrid + g*flen;
02873 float **q = q_arr + fstart + g*fsize;
02874 for ( int i=0; i<flen; ++i ) {
02875 if ( f[i] ) {
02876 f[i] = 0;
02877 for ( int k=0; k<zlistlen; ++k ) {
02878 q[i][zlist[k]] = *(qmsg++);
02879 }
02880 }
02881 }
02882 }
02883 }
|
|
|
Definition at line 1734 of file ComputePme.C. References PmeSharedTransMsg::count, PmeSharedTransMsg::lock, PmeSharedTransMsg::msg, NodePmeInfo::npe, NodePmeInfo::pe_start, PME_TRANS_PRIORITY, PmeTransMsg::sequence, and SET_PRIORITY. Referenced by NodePmeMgr::recvTrans(), and sendTrans(). 01734 {
01735 // CkPrintf("fwdSharedTrans on Pe(%d)\n",CkMyPe());
01736 int pe = transNodeInfo[myTransNode].pe_start;
01737 int npe = transNodeInfo[myTransNode].npe;
01738 CmiNodeLock lock = CmiCreateLock();
01739 int *count = new int; *count = npe;
01740 for (int i=0; i<npe; ++i, ++pe) {
01741 PmeSharedTransMsg *shmsg = new (PRIORITY_SIZE) PmeSharedTransMsg;
01742 SET_PRIORITY(shmsg,msg->sequence,PME_TRANS_PRIORITY)
01743 shmsg->msg = msg;
01744 shmsg->count = count;
01745 shmsg->lock = lock;
01746 pmeProxy[transPeMap[pe]].recvSharedTrans(shmsg);
01747 }
01748 }
|
|
|
Definition at line 1957 of file ComputePme.C. References PmeSharedUntransMsg::count, PmeSharedUntransMsg::lock, PmeSharedUntransMsg::msg, NodePmeInfo::npe, and NodePmeInfo::pe_start. Referenced by NodePmeMgr::recvUntrans(), and sendUntrans(). 01957 {
01958 int pe = gridNodeInfo[myGridNode].pe_start;
01959 int npe = gridNodeInfo[myGridNode].npe;
01960 CmiNodeLock lock = CmiCreateLock();
01961 int *count = new int; *count = npe;
01962 for (int i=0; i<npe; ++i, ++pe) {
01963 PmeSharedUntransMsg *shmsg = new PmeSharedUntransMsg;
01964 shmsg->msg = msg;
01965 shmsg->count = count;
01966 shmsg->lock = lock;
01967 pmeProxy[gridPeMap[pe]].recvSharedUntrans(shmsg);
01968 }
01969 }
|
|
|
Definition at line 1644 of file ComputePme.C. References PmeGrid::dim2, and PmeGrid::dim3. 01644 {
01645 // CkPrintf("gridCalc1 on Pe(%d)\n",CkMyPe());
01646
01647 #ifdef NAMD_FFTW
01648 for ( int g=0; g<numGrids; ++g ) {
01649 #ifdef NAMD_FFTW_3
01650 fftwf_execute(forward_plan_yz[g]);
01651 #else
01652 rfftwnd_real_to_complex(forward_plan_yz, localInfo[myGridPe].nx,
01653 qgrid + qgrid_size * g, 1, myGrid.dim2 * myGrid.dim3, 0, 0, 0);
01654 #endif
01655
01656 }
01657 #endif
01658
01659 if ( ! useBarrier ) pmeProxyDir[CkMyPe()].sendTrans();
01660 }
|
|
|
Definition at line 1802 of file ComputePme.C. References PmeGrid::dim3, gridCalc2R(), LocalPmeInfo::ny_after_transpose, and simParams. 01802 {
01803 // CkPrintf("gridCalc2 on Pe(%d)\n",CkMyPe());
01804
01805 #if CMK_BLUEGENEL
01806 CmiNetworkProgressAfter (0);
01807 #endif
01808
01809 int zdim = myGrid.dim3;
01810 // int y_start = localInfo[myTransPe].y_start_after_transpose;
01811 int ny = localInfo[myTransPe].ny_after_transpose;
01812
01813 for ( int g=0; g<numGrids; ++g ) {
01814 // finish forward FFT (x dimension)
01815 #ifdef NAMD_FFTW
01816 #ifdef NAMD_FFTW_3
01817 fftwf_execute(forward_plan_x[g]);
01818 #else
01819 fftw(forward_plan_x, ny * zdim / 2, (fftw_complex *)(kgrid+qgrid_size*g),
01820 ny * zdim / 2, 1, work, 1, 0);
01821 #endif
01822 #endif
01823 }
01824
01825 #ifdef OPENATOM_VERSION
01826 if ( ! simParams -> openatomOn ) {
01827 #endif // OPENATOM_VERSION
01828 gridCalc2R();
01829 #ifdef OPENATOM_VERSION
01830 } else {
01831 gridCalc2Moa();
01832 }
01833 #endif // OPENATOM_VERSION
01834 }
|
|
|
Definition at line 1862 of file ComputePme.C. References BigReal, PmeKSpace::compute_energy(), PmeGrid::dim3, and LocalPmeInfo::ny_after_transpose. Referenced by gridCalc2(). 01862 {
01863
01864 int zdim = myGrid.dim3;
01865 // int y_start = localInfo[myTransPe].y_start_after_transpose;
01866 int ny = localInfo[myTransPe].ny_after_transpose;
01867
01868 for ( int g=0; g<numGrids; ++g ) {
01869 // reciprocal space portion of PME
01870 BigReal ewaldcof = ComputeNonbondedUtil::ewaldcof;
01871 recip_evir2[g][0] = myKSpace->compute_energy(kgrid+qgrid_size*g,
01872 lattice, ewaldcof, &(recip_evir2[g][1]));
01873 // CkPrintf("Ewald reciprocal energy = %f\n", recip_evir2[g][0]);
01874
01875 // start backward FFT (x dimension)
01876
01877 #ifdef NAMD_FFTW
01878 #ifdef NAMD_FFTW_3
01879 fftwf_execute(backward_plan_x[g]);
01880 #else
01881 fftw(backward_plan_x, ny * zdim / 2, (fftw_complex *)(kgrid+qgrid_size*g),
01882 ny * zdim / 2, 1, work, 1, 0);
01883 #endif
01884 #endif
01885 }
01886
01887 pmeProxyDir[CkMyPe()].sendUntrans();
01888 }
|
|
|
Definition at line 2039 of file ComputePme.C. References PmeGrid::dim2, and PmeGrid::dim3. 02039 {
02040 // CkPrintf("gridCalc3 on Pe(%d)\n",CkMyPe());
02041
02042 // finish backward FFT
02043 #ifdef NAMD_FFTW
02044
02045 for ( int g=0; g<numGrids; ++g ) {
02046 #ifdef NAMD_FFTW_3
02047 fftwf_execute(backward_plan_yz[g]);
02048 #else
02049 rfftwnd_complex_to_real(backward_plan_yz, localInfo[myGridPe].nx,
02050 (fftw_complex *) (qgrid + qgrid_size * g),
02051 1, myGrid.dim2 * myGrid.dim3 / 2, 0, 0, 0);
02052 #endif
02053 }
02054
02055 #endif
02056
02057 pmeProxyDir[CkMyPe()].sendUngrid();
02058 }
|
|
|
Definition at line 612 of file ComputePme.C. References Lattice::a(), Lattice::a_r(), ResizeArray< Elem >::add(), SimParameters::alchDecouple, SimParameters::alchElecLambdaStart, SimParameters::alchFepOn, SimParameters::alchThermIntOn, ResizeArray< Elem >::begin(), BigReal, PmeGrid::block1, PmeGrid::block2, PmeGrid::block3, CmiPeOnSamePhysicalNode, SimParameters::cutoff, PmeGrid::dim2, PmeGrid::dim3, ResizeArray< Elem >::end(), fftw_plan_lock, SimParameters::FFTWEstimate, SimParameters::FFTWPatient, generatePmePeList2(), PmePencilInitMsgData::grid, iINFO(), ResizeArray< Elem >::insert(), iout, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, SimParameters::lattice, SimParameters::lesFactor, SimParameters::lesOn, PatchMap::max_a(), PatchMap::min_a(), NAMD_bug(), NAMD_die(), PatchMap::node(), NodePmeInfo::npe, PatchMap::numNodesWithPatches(), PatchMap::numPatches(), PatchMap::numPatchesOnNode(), LocalPmeInfo::nx, LocalPmeInfo::ny_after_transpose, PatchMap::Object(), Node::Object(), PmeGrid::order, SimParameters::pairInteractionOn, SimParameters::pairInteractionSelf, SimParameters::patchDimension, NodePmeInfo::pe_start, pencilPMEProcessors, SimParameters::PMEBarrier, SimParameters::PMEGridSizeX, SimParameters::PMEGridSizeY, SimParameters::PMEGridSizeZ, SimParameters::PMEInterpOrder, SimParameters::PMEMinPoints, SimParameters::PMEMinSlices, PmePencilInitMsgData::pmeNodeProxy, SimParameters::PMEPencils, SimParameters::PMEPencilsX, SimParameters::PMEPencilsXLayout, SimParameters::PMEPencilsY, SimParameters::PMEPencilsYLayout, SimParameters::PMEPencilsZ, SimParameters::PMEProcessors, PmePencilInitMsgData::pmeProxy, NodePmeInfo::real_node, Random::reorder(), ResizeArray< Elem >::resize(), Node::simParameters, simParams, ResizeArray< Elem >::size(), SortableResizeArray< Elem >::sort(), WorkDistrib::sortPmePes(), Vector::unit(), LocalPmeInfo::x_start, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::xm, PmePencilInitMsgData::xPencil, LocalPmeInfo::y_start_after_transpose, PmePencilInitMsgData::yBlocks, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, PmePencilInitMsgData::zBlocks, PmePencilInitMsgData::zm, and PmePencilInitMsgData::zPencil. 00612 {
00613 delete msg;
00614
00615 localInfo = new LocalPmeInfo[CkNumPes()];
00616 gridNodeInfo = new NodePmeInfo[CkNumNodes()];
00617 transNodeInfo = new NodePmeInfo[CkNumNodes()];
00618 gridPeMap = new int[CkNumPes()];
00619 transPeMap = new int[CkNumPes()];
00620 recipPeDest = new int[CkNumPes()];
00621 gridPeOrder = new int[CkNumPes()];
00622 gridNodeOrder = new int[CkNumNodes()];
00623 transNodeOrder = new int[CkNumNodes()];
00624 isPmeFlag = new char[CkNumPes()];
00625
00626 SimParameters *simParams = Node::Object()->simParameters;
00627 PatchMap *patchMap = PatchMap::Object();
00628
00629 alchFepOn = simParams->alchFepOn;
00630 alchThermIntOn = simParams->alchThermIntOn;
00631 alchDecouple = (alchFepOn || alchThermIntOn) && (simParams->alchDecouple);
00632 alchElecLambdaStart = (alchFepOn || alchThermIntOn) ?
00633 simParams->alchElecLambdaStart : 0;
00634 if (alchFepOn || alchThermIntOn) {
00635 numGrids = 2;
00636 if (alchDecouple) numGrids += 2;
00637 if (alchElecLambdaStart || alchThermIntOn) numGrids ++;
00638 }
00639 else numGrids = 1;
00640 lesOn = simParams->lesOn;
00641 useBarrier = simParams->PMEBarrier;
00642 if ( lesOn ) {
00643 lesFactor = simParams->lesFactor;
00644 numGrids = lesFactor;
00645 }
00646 selfOn = 0;
00647 pairOn = simParams->pairInteractionOn;
00648 if ( pairOn ) {
00649 selfOn = simParams->pairInteractionSelf;
00650 if ( selfOn ) pairOn = 0; // make pairOn and selfOn exclusive
00651 numGrids = selfOn ? 1 : 3;
00652 }
00653
00654 if ( numGrids != 1 || simParams->PMEPencils == 0 ) usePencils = 0;
00655 else if ( simParams->PMEPencils > 0 ) usePencils = 1;
00656 else {
00657 int nrps = simParams->PMEProcessors;
00658 if ( nrps <= 0 ) nrps = CkNumPes();
00659 if ( nrps > CkNumPes() ) nrps = CkNumPes();
00660 int dimx = simParams->PMEGridSizeX;
00661 int dimy = simParams->PMEGridSizeY;
00662 int maxslabs = 1 + (dimx - 1) / simParams->PMEMinSlices;
00663 if ( maxslabs > nrps ) maxslabs = nrps;
00664 int maxpencils = ( simParams->PMEGridSizeX * simParams->PMEGridSizeY
00665 * simParams->PMEGridSizeZ ) / simParams->PMEMinPoints;
00666 if ( maxpencils > nrps ) maxpencils = nrps;
00667 if ( maxpencils > 3 * maxslabs ) usePencils = 1;
00668 else usePencils = 0;
00669 }
00670
00671 if ( usePencils ) {
00672 int nrps = simParams->PMEProcessors;
00673 if ( nrps <= 0 ) nrps = CkNumPes();
00674 if ( nrps > CkNumPes() ) nrps = CkNumPes();
00675 if ( simParams->PMEPencils > 1 &&
00676 simParams->PMEPencils * simParams->PMEPencils <= nrps ) {
00677 xBlocks = yBlocks = zBlocks = simParams->PMEPencils;
00678 } else {
00679 int nb2 = ( simParams->PMEGridSizeX * simParams->PMEGridSizeY
00680 * simParams->PMEGridSizeZ ) / simParams->PMEMinPoints;
00681 if ( nb2 > nrps ) nb2 = nrps;
00682 if ( nb2 < 1 ) nb2 = 1;
00683 int nb = (int) sqrt((float)nb2);
00684 if ( nb < 1 ) nb = 1;
00685 xBlocks = zBlocks = nb;
00686 yBlocks = nb2 / nb;
00687 }
00688
00689 if ( simParams->PMEPencilsX > 0 ) xBlocks = simParams->PMEPencilsX;
00690 if ( simParams->PMEPencilsY > 0 ) yBlocks = simParams->PMEPencilsY;
00691 if ( simParams->PMEPencilsZ > 0 ) zBlocks = simParams->PMEPencilsZ;
00692
00693 int dimx = simParams->PMEGridSizeX;
00694 int bx = 1 + ( dimx - 1 ) / xBlocks;
00695 xBlocks = 1 + ( dimx - 1 ) / bx;
00696
00697 int dimy = simParams->PMEGridSizeY;
00698 int by = 1 + ( dimy - 1 ) / yBlocks;
00699 yBlocks = 1 + ( dimy - 1 ) / by;
00700
00701 int dimz = simParams->PMEGridSizeZ / 2 + 1; // complex
00702 int bz = 1 + ( dimz - 1 ) / zBlocks;
00703 zBlocks = 1 + ( dimz - 1 ) / bz;
00704
00705 if ( xBlocks * yBlocks > CkNumPes() ) {
00706 NAMD_die("PME pencils xBlocks * yBlocks > numPes");
00707 }
00708 if ( xBlocks * zBlocks > CkNumPes() ) {
00709 NAMD_die("PME pencils xBlocks * zBlocks > numPes");
00710 }
00711 if ( yBlocks * zBlocks > CkNumPes() ) {
00712 NAMD_die("PME pencils yBlocks * zBlocks > numPes");
00713 }
00714
00715 if ( ! CkMyPe() ) {
00716 iout << iINFO << "PME using " << xBlocks << " x " <<
00717 yBlocks << " x " << zBlocks <<
00718 " pencil grid for FFT and reciprocal sum.\n" << endi;
00719 }
00720 } else { // usePencils
00721
00722 { // decide how many pes to use for reciprocal sum
00723
00724 // rules based on work available
00725 int minslices = simParams->PMEMinSlices;
00726 int dimx = simParams->PMEGridSizeX;
00727 int nrpx = ( dimx + minslices - 1 ) / minslices;
00728 int dimy = simParams->PMEGridSizeY;
00729 int nrpy = ( dimy + minslices - 1 ) / minslices;
00730
00731 // rules based on processors available
00732 int nrpp = CkNumPes();
00733 // if ( nrpp > 32 ) nrpp = 32; // cap to limit messages
00734 if ( nrpp < nrpx ) nrpx = nrpp;
00735 if ( nrpp < nrpy ) nrpy = nrpp;
00736
00737 // user override
00738 int nrps = simParams->PMEProcessors;
00739 if ( nrps > CkNumPes() ) nrps = CkNumPes();
00740 if ( nrps > 0 ) nrpx = nrps;
00741 if ( nrps > 0 ) nrpy = nrps;
00742
00743 // make sure there aren't any totally empty processors
00744 int bx = ( dimx + nrpx - 1 ) / nrpx;
00745 nrpx = ( dimx + bx - 1 ) / bx;
00746 int by = ( dimy + nrpy - 1 ) / nrpy;
00747 nrpy = ( dimy + by - 1 ) / by;
00748 if ( bx != ( dimx + nrpx - 1 ) / nrpx )
00749 NAMD_bug("Error in selecting number of PME processors.");
00750 if ( by != ( dimy + nrpy - 1 ) / nrpy )
00751 NAMD_bug("Error in selecting number of PME processors.");
00752
00753 numGridPes = nrpx;
00754 numTransPes = nrpy;
00755 }
00756 if ( ! CkMyPe() ) {
00757 iout << iINFO << "PME using " << numGridPes << " and " << numTransPes <<
00758 " processors for FFT and reciprocal sum.\n" << endi;
00759 }
00760
00761 int sum_npes = numTransPes + numGridPes;
00762 int max_npes = (numTransPes > numGridPes)?numTransPes:numGridPes;
00763
00764 #if 0 // USE_TOPOMAP
00765 /* This code is being disabled permanently for slab PME on Blue Gene machines */
00766 PatchMap * pmap = PatchMap::Object();
00767
00768 int patch_pes = pmap->numNodesWithPatches();
00769 TopoManager tmgr;
00770 if(tmgr.hasMultipleProcsPerNode())
00771 patch_pes *= 2;
00772
00773 bool done = false;
00774 if(CkNumPes() > 2*sum_npes + patch_pes) {
00775 done = generateBGLORBPmePeList(transPeMap, numTransPes);
00776 done &= generateBGLORBPmePeList(gridPeMap, numGridPes, transPeMap, numTransPes);
00777 }
00778 else
00779 if(CkNumPes() > 2 *max_npes + patch_pes) {
00780 done = generateBGLORBPmePeList(transPeMap, max_npes);
00781 gridPeMap = transPeMap;
00782 }
00783
00784 if (!done)
00785 #endif
00786 {
00787 //generatePmePeList(transPeMap, max_npes);
00788 //gridPeMap = transPeMap;
00789 generatePmePeList2(gridPeMap, numGridPes, transPeMap, numTransPes);
00790 }
00791
00792 if ( ! CkMyPe() ) {
00793 iout << iINFO << "PME GRID LOCATIONS:";
00794 int i;
00795 for ( i=0; i<numGridPes && i<10; ++i ) {
00796 iout << " " << gridPeMap[i];
00797 }
00798 if ( i < numGridPes ) iout << " ...";
00799 iout << "\n" << endi;
00800 iout << iINFO << "PME TRANS LOCATIONS:";
00801 for ( i=0; i<numTransPes && i<10; ++i ) {
00802 iout << " " << transPeMap[i];
00803 }
00804 if ( i < numTransPes ) iout << " ...";
00805 iout << "\n" << endi;
00806 }
00807
00808 // sort based on nodes and physical nodes
00809 std::sort(gridPeMap,gridPeMap+numGridPes,WorkDistrib::pe_sortop_compact());
00810
00811 myGridPe = -1;
00812 myGridNode = -1;
00813 int i = 0;
00814 for ( i=0; i<CkNumPes(); ++i )
00815 isPmeFlag[i] = 0;
00816 int node = -1;
00817 int real_node = -1;
00818 for ( i=0; i<numGridPes; ++i ) {
00819 if ( gridPeMap[i] == CkMyPe() ) myGridPe = i;
00820 isPmeFlag[gridPeMap[i]] |= 1;
00821 int real_node_i = CkNodeOf(gridPeMap[i]);
00822 if ( real_node_i == real_node ) {
00823 gridNodeInfo[node].npe += 1;
00824 } else {
00825 real_node = real_node_i;
00826 ++node;
00827 gridNodeInfo[node].real_node = real_node;
00828 gridNodeInfo[node].pe_start = i;
00829 gridNodeInfo[node].npe = 1;
00830 }
00831 if ( CkMyNode() == real_node_i ) myGridNode = node;
00832 }
00833 numGridNodes = node + 1;
00834 myTransPe = -1;
00835 myTransNode = -1;
00836 node = -1;
00837 real_node = -1;
00838 for ( i=0; i<numTransPes; ++i ) {
00839 if ( transPeMap[i] == CkMyPe() ) myTransPe = i;
00840 isPmeFlag[transPeMap[i]] |= 2;
00841 int real_node_i = CkNodeOf(transPeMap[i]);
00842 if ( real_node_i == real_node ) {
00843 transNodeInfo[node].npe += 1;
00844 } else {
00845 real_node = real_node_i;
00846 ++node;
00847 transNodeInfo[node].real_node = real_node;
00848 transNodeInfo[node].pe_start = i;
00849 transNodeInfo[node].npe = 1;
00850 }
00851 if ( CkMyNode() == real_node_i ) myTransNode = node;
00852 }
00853 numTransNodes = node + 1;
00854
00855 if ( ! CkMyPe() ) {
00856 iout << iINFO << "PME USING " << numGridNodes << " GRID NODES AND "
00857 << numTransNodes << " TRANS NODES\n" << endi;
00858 }
00859
00860 { // generate random orderings for grid and trans messages
00861 int i;
00862 for ( i = 0; i < numGridPes; ++i ) {
00863 gridPeOrder[i] = i;
00864 }
00865 Random rand(CkMyPe());
00866 if ( myGridPe < 0 ) {
00867 rand.reorder(gridPeOrder,numGridPes);
00868 } else { // self last
00869 gridPeOrder[myGridPe] = numGridPes-1;
00870 gridPeOrder[numGridPes-1] = myGridPe;
00871 rand.reorder(gridPeOrder,numGridPes-1);
00872 }
00873 for ( i = 0; i < numGridNodes; ++i ) {
00874 gridNodeOrder[i] = i;
00875 }
00876 if ( myGridNode < 0 ) {
00877 rand.reorder(gridNodeOrder,numGridNodes);
00878 } else { // self last
00879 gridNodeOrder[myGridNode] = numGridNodes-1;
00880 gridNodeOrder[numGridNodes-1] = myGridNode;
00881 rand.reorder(gridNodeOrder,numGridNodes-1);
00882 }
00883 for ( i = 0; i < numTransNodes; ++i ) {
00884 transNodeOrder[i] = i;
00885 }
00886 if ( myTransNode < 0 ) {
00887 rand.reorder(transNodeOrder,numTransNodes);
00888 } else { // self last
00889 transNodeOrder[myTransNode] = numTransNodes-1;
00890 transNodeOrder[numTransNodes-1] = myTransNode;
00891 rand.reorder(transNodeOrder,numTransNodes-1);
00892 }
00893 }
00894
00895 } // ! usePencils
00896
00897 myGrid.K1 = simParams->PMEGridSizeX;
00898 myGrid.K2 = simParams->PMEGridSizeY;
00899 myGrid.K3 = simParams->PMEGridSizeZ;
00900 myGrid.order = simParams->PMEInterpOrder;
00901 myGrid.dim2 = myGrid.K2;
00902 myGrid.dim3 = 2 * (myGrid.K3/2 + 1);
00903
00904 if ( ! usePencils ) {
00905 myGrid.block1 = ( myGrid.K1 + numGridPes - 1 ) / numGridPes;
00906 myGrid.block2 = ( myGrid.K2 + numTransPes - 1 ) / numTransPes;
00907 myGrid.block3 = myGrid.dim3 / 2; // complex
00908 }
00909
00910 if ( usePencils ) {
00911 myGrid.block1 = ( myGrid.K1 + xBlocks - 1 ) / xBlocks;
00912 myGrid.block2 = ( myGrid.K2 + yBlocks - 1 ) / yBlocks;
00913 myGrid.block3 = ( myGrid.K3/2 + 1 + zBlocks - 1 ) / zBlocks; // complex
00914
00915
00916 int pe = 0;
00917 int x,y,z;
00918
00919 SortableResizeArray<int> zprocs(xBlocks*yBlocks);
00920 SortableResizeArray<int> yprocs(xBlocks*zBlocks);
00921 SortableResizeArray<int> xprocs(yBlocks*zBlocks);
00922 {
00923 int basepe = 0; int npe = CkNumPes();
00924 if ( npe > xBlocks*yBlocks &&
00925 npe > xBlocks*zBlocks &&
00926 npe > yBlocks*zBlocks ) {
00927 // avoid node 0
00928 ++basepe;
00929 --npe;
00930 }
00931
00932
00933 // decide which pes to use by bit reversal and patch use
00934 int i;
00935 int ncpus = CkNumPes();
00936 SortableResizeArray<int> patches, nopatches, pmeprocs;
00937 PatchMap *pmap = PatchMap::Object();
00938 for ( int icpu=0; icpu<ncpus; ++icpu ) {
00939 int ri = WorkDistrib::peDiffuseOrdering[icpu];
00940 if ( ri ) { // keep 0 for special case
00941 if ( pmap->numPatchesOnNode(ri) ) patches.add(ri);
00942 else nopatches.add(ri);
00943 }
00944 }
00945
00946 #if USE_RANDOM_TOPO
00947 Random rand(CkMyPe());
00948 int *tmp = new int[patches.size()];
00949 int nn = patches.size();
00950 for (i=0;i<nn;i++) tmp[i] = patches[i];
00951 rand.reorder(tmp, nn);
00952 patches.resize(0);
00953 for (i=0;i<nn;i++) patches.add(tmp[i]);
00954 delete [] tmp;
00955 tmp = new int[nopatches.size()];
00956 nn = nopatches.size();
00957 for (i=0;i<nn;i++) tmp[i] = nopatches[i];
00958 rand.reorder(tmp, nn);
00959 nopatches.resize(0);
00960 for (i=0;i<nn;i++) nopatches.add(tmp[i]);
00961 delete [] tmp;
00962 #endif
00963
00964 // only use zero if it eliminates overloading or has patches
00965 int useZero = 0;
00966 int npens = xBlocks*yBlocks;
00967 if ( npens % ncpus == 0 ) useZero = 1;
00968 if ( npens == nopatches.size() + 1 ) useZero = 1;
00969 npens += xBlocks*zBlocks;
00970 if ( npens % ncpus == 0 ) useZero = 1;
00971 if ( npens == nopatches.size() + 1 ) useZero = 1;
00972 npens += yBlocks*zBlocks;
00973 if ( npens % ncpus == 0 ) useZero = 1;
00974 if ( npens == nopatches.size() + 1 ) useZero = 1;
00975
00976 // add nopatches then patches in reversed order
00977 for ( i=nopatches.size()-1; i>=0; --i ) pmeprocs.add(nopatches[i]);
00978 if ( useZero && ! pmap->numPatchesOnNode(0) ) pmeprocs.add(0);
00979 for ( i=patches.size()-1; i>=0; --i ) pmeprocs.add(patches[i]);
00980 if ( pmap->numPatchesOnNode(0) ) pmeprocs.add(0);
00981
00982 int npes = pmeprocs.size();
00983 for ( i=0; i<xBlocks*yBlocks; ++i, ++pe ) zprocs[i] = pmeprocs[pe%npes];
00984 if ( i>1 && zprocs[0] == zprocs[i-1] ) zprocs[0] = 0;
00985 #if !USE_RANDOM_TOPO
00986 zprocs.sort();
00987 #endif
00988 for ( i=0; i<xBlocks*zBlocks; ++i, ++pe ) yprocs[i] = pmeprocs[pe%npes];
00989 if ( i>1 && yprocs[0] == yprocs[i-1] ) yprocs[0] = 0;
00990 #if !USE_RANDOM_TOPO
00991 yprocs.sort();
00992 #endif
00993 for ( i=0; i<yBlocks*zBlocks; ++i, ++pe ) xprocs[i] = pmeprocs[pe%npes];
00994 if ( i>1 && xprocs[0] == xprocs[i-1] ) xprocs[0] = 0;
00995 #if !USE_RANDOM_TOPO
00996 xprocs.sort();
00997 #endif
00998
00999 #if USE_TOPO_SFC
01000 CmiLock(tmgr_lock);
01001 //{
01002 TopoManager tmgr;
01003 int xdim = tmgr.getDimNX();
01004 int ydim = tmgr.getDimNY();
01005 int zdim = tmgr.getDimNZ();
01006 int xdim1 = find_level_grid(xdim);
01007 int ydim1 = find_level_grid(ydim);
01008 int zdim1 = find_level_grid(zdim);
01009 if(CkMyPe() == 0)
01010 printf("xdim: %d %d %d, %d %d %d\n", xdim, ydim, zdim, xdim1, ydim1, zdim1);
01011
01012 vector<Coord> result;
01013 SFC_grid(xdim, ydim, zdim, xdim1, ydim1, zdim1, result);
01014 sort_sfc(xprocs, tmgr, result);
01015 sort_sfc(yprocs, tmgr, result);
01016 sort_sfc(zprocs, tmgr, result);
01017 //}
01018 CmiUnlock(tmgr_lock);
01019 #endif
01020
01021 pencilPMEProcessors = new char [CkNumPes()];
01022 memset (pencilPMEProcessors, 0, sizeof(char) * CkNumPes());
01023
01024
01025 if(CkMyPe() == 0){
01026 iout << iINFO << "PME Z PENCIL LOCATIONS:";
01027 for ( i=0; i<zprocs.size() && i<10; ++i ) {
01028 #if USE_TOPO_SFC
01029 int x,y,z,t;
01030 tmgr.rankToCoordinates(zprocs[i], x,y, z, t);
01031 iout << " " << zprocs[i] << "(" << x << " " << y << " " << z << ")";
01032 #else
01033 iout << " " << zprocs[i];
01034 #endif
01035 }
01036 if ( i < zprocs.size() ) iout << " ...";
01037 iout << "\n" << endi;
01038 }
01039
01040 for (pe=0, x = 0; x < xBlocks; ++x)
01041 for (y = 0; y < yBlocks; ++y, ++pe ) {
01042 pencilPMEProcessors[zprocs[pe]] = 1;
01043 }
01044
01045 if(CkMyPe() == 0){
01046 iout << iINFO << "PME Y PENCIL LOCATIONS:";
01047 for ( i=0; i<yprocs.size() && i<10; ++i ) {
01048 #if USE_TOPO_SFC
01049 int x,y,z,t;
01050 tmgr.rankToCoordinates(yprocs[i], x,y, z, t);
01051 iout << " " << yprocs[i] << "(" << x << " " << y << " " << z << ")";
01052 #else
01053 iout << " " << yprocs[i];
01054 #endif
01055 }
01056 if ( i < yprocs.size() ) iout << " ...";
01057 iout << "\n" << endi;
01058 }
01059
01060 for (pe=0, z = 0; z < zBlocks; ++z )
01061 for (x = 0; x < xBlocks; ++x, ++pe ) {
01062 pencilPMEProcessors[yprocs[pe]] = 1;
01063 }
01064
01065 if(CkMyPe() == 0){
01066 iout << iINFO << "PME X PENCIL LOCATIONS:";
01067 for ( i=0; i<xprocs.size() && i<10; ++i ) {
01068 #if USE_TOPO_SFC
01069 int x,y,z,t;
01070 tmgr.rankToCoordinates(xprocs[i], x,y, z, t);
01071 iout << " " << xprocs[i] << "(" << x << " " << y << " " << z << ")";
01072 #else
01073 iout << " " << xprocs[i];
01074 #endif
01075 }
01076 if ( i < xprocs.size() ) iout << " ...";
01077 iout << "\n" << endi;
01078 }
01079
01080 for (pe=0, y = 0; y < yBlocks; ++y )
01081 for (z = 0; z < zBlocks; ++z, ++pe ) {
01082 pencilPMEProcessors[xprocs[pe]] = 1;
01083 }
01084
01085 }
01086
01087
01088 // creating the pencil arrays
01089 if ( CkMyPe() == 0 ){
01090 #if !USE_RANDOM_TOPO
01091 // std::sort(zprocs.begin(),zprocs.end(),WorkDistrib::pe_sortop_compact());
01092 WorkDistrib::sortPmePes(zprocs.begin(),xBlocks,yBlocks);
01093 std::sort(yprocs.begin(),yprocs.end(),WorkDistrib::pe_sortop_compact());
01094 std::sort(xprocs.begin(),xprocs.end(),WorkDistrib::pe_sortop_compact());
01095 #endif
01096 #if 1
01097 CProxy_PmePencilMap zm = CProxy_PmePencilMap::ckNew(0,1,yBlocks,xBlocks*yBlocks,zprocs.begin());
01098 CProxy_PmePencilMap ym;
01099 if ( simParams->PMEPencilsYLayout )
01100 ym = CProxy_PmePencilMap::ckNew(0,2,zBlocks,zBlocks*xBlocks,yprocs.begin()); // new
01101 else
01102 ym = CProxy_PmePencilMap::ckNew(2,0,xBlocks,zBlocks*xBlocks,yprocs.begin()); // old
01103 CProxy_PmePencilMap xm;
01104 if ( simParams->PMEPencilsXLayout )
01105 xm = CProxy_PmePencilMap::ckNew(2,1,yBlocks,yBlocks*zBlocks,xprocs.begin()); // new
01106 else
01107 xm = CProxy_PmePencilMap::ckNew(1,2,zBlocks,yBlocks*zBlocks,xprocs.begin()); // old
01108 pmeNodeProxy.recvPencilMapProxies(xm,ym,zm);
01109 CkArrayOptions zo(xBlocks,yBlocks,1); zo.setMap(zm);
01110 CkArrayOptions yo(xBlocks,1,zBlocks); yo.setMap(ym);
01111 CkArrayOptions xo(1,yBlocks,zBlocks); xo.setMap(xm);
01112 #if CHARM_VERSION > 60301
01113 zo.setAnytimeMigration(false); zo.setStaticInsertion(true);
01114 yo.setAnytimeMigration(false); yo.setStaticInsertion(true);
01115 xo.setAnytimeMigration(false); xo.setStaticInsertion(true);
01116 #endif
01117 zPencil = CProxy_PmeZPencil::ckNew(zo); // (xBlocks,yBlocks,1);
01118 yPencil = CProxy_PmeYPencil::ckNew(yo); // (xBlocks,1,zBlocks);
01119 xPencil = CProxy_PmeXPencil::ckNew(xo); // (1,yBlocks,zBlocks);
01120 #else
01121 zPencil = CProxy_PmeZPencil::ckNew(); // (xBlocks,yBlocks,1);
01122 yPencil = CProxy_PmeYPencil::ckNew(); // (xBlocks,1,zBlocks);
01123 xPencil = CProxy_PmeXPencil::ckNew(); // (1,yBlocks,zBlocks);
01124
01125 for (pe=0, x = 0; x < xBlocks; ++x)
01126 for (y = 0; y < yBlocks; ++y, ++pe ) {
01127 zPencil(x,y,0).insert(zprocs[pe]);
01128 }
01129 zPencil.doneInserting();
01130
01131 for (pe=0, x = 0; x < xBlocks; ++x)
01132 for (z = 0; z < zBlocks; ++z, ++pe ) {
01133 yPencil(x,0,z).insert(yprocs[pe]);
01134 }
01135 yPencil.doneInserting();
01136
01137
01138 for (pe=0, y = 0; y < yBlocks; ++y )
01139 for (z = 0; z < zBlocks; ++z, ++pe ) {
01140 xPencil(0,y,z).insert(xprocs[pe]);
01141 }
01142 xPencil.doneInserting();
01143 #endif
01144
01145 pmeProxy.recvArrays(xPencil,yPencil,zPencil);
01146 PmePencilInitMsgData msgdata;
01147 msgdata.grid = myGrid;
01148 msgdata.xBlocks = xBlocks;
01149 msgdata.yBlocks = yBlocks;
01150 msgdata.zBlocks = zBlocks;
01151 msgdata.xPencil = xPencil;
01152 msgdata.yPencil = yPencil;
01153 msgdata.zPencil = zPencil;
01154 msgdata.pmeProxy = pmeProxyDir;
01155 msgdata.pmeNodeProxy = pmeNodeProxy;
01156 msgdata.xm = xm;
01157 msgdata.ym = ym;
01158 msgdata.zm = zm;
01159 xPencil.init(new PmePencilInitMsg(msgdata));
01160 yPencil.init(new PmePencilInitMsg(msgdata));
01161 zPencil.init(new PmePencilInitMsg(msgdata));
01162 }
01163
01164 return; // continue in initialize_pencils() at next startup stage
01165 }
01166
01167
01168 int pe;
01169 int nx = 0;
01170 for ( pe = 0; pe < numGridPes; ++pe ) {
01171 localInfo[pe].x_start = nx;
01172 nx += myGrid.block1;
01173 if ( nx > myGrid.K1 ) nx = myGrid.K1;
01174 localInfo[pe].nx = nx - localInfo[pe].x_start;
01175 }
01176 int ny = 0;
01177 for ( pe = 0; pe < numTransPes; ++pe ) {
01178 localInfo[pe].y_start_after_transpose = ny;
01179 ny += myGrid.block2;
01180 if ( ny > myGrid.K2 ) ny = myGrid.K2;
01181 localInfo[pe].ny_after_transpose =
01182 ny - localInfo[pe].y_start_after_transpose;
01183 }
01184
01185 { // decide how many pes this node exchanges charges with
01186
01187 PatchMap *patchMap = PatchMap::Object();
01188 Lattice lattice = simParams->lattice;
01189 BigReal sysdima = lattice.a_r().unit() * lattice.a();
01190 BigReal cutoff = simParams->cutoff;
01191 BigReal patchdim = simParams->patchDimension;
01192 int numPatches = patchMap->numPatches();
01193 int numNodes = CkNumPes();
01194 int *source_flags = new int[numNodes];
01195 int node;
01196 for ( node=0; node<numNodes; ++node ) {
01197 source_flags[node] = 0;
01198 recipPeDest[node] = 0;
01199 }
01200
01201 // // make sure that we don't get ahead of ourselves on this node
01202 // if ( CkMyPe() < numPatches && myRecipPe >= 0 ) {
01203 // source_flags[CkMyPe()] = 1;
01204 // recipPeDest[myRecipPe] = 1;
01205 // }
01206
01207 for ( int pid=0; pid < numPatches; ++pid ) {
01208 int pnode = patchMap->node(pid);
01209 int shift1 = (myGrid.K1 + myGrid.order - 1)/2;
01210 BigReal minx = patchMap->min_a(pid);
01211 BigReal maxx = patchMap->max_a(pid);
01212 BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
01213 // min1 (max1) is smallest (largest) grid line for this patch
01214 int min1 = ((int) floor(myGrid.K1 * (minx - margina))) + shift1 - myGrid.order + 1;
01215 int max1 = ((int) floor(myGrid.K1 * (maxx + margina))) + shift1;
01216 for ( int i=min1; i<=max1; ++i ) {
01217 int ix = i;
01218 while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
01219 while ( ix < 0 ) ix += myGrid.K1;
01220 // set source_flags[pnode] if this patch sends to our node
01221 if ( myGridPe >= 0 && ix >= localInfo[myGridPe].x_start &&
01222 ix < localInfo[myGridPe].x_start + localInfo[myGridPe].nx ) {
01223 source_flags[pnode] = 1;
01224 }
01225 // set dest_flags[] for node that our patch sends to
01226 if ( pnode == CkMyPe() ) {
01227 recipPeDest[ix / myGrid.block1] = 1;
01228 }
01229 }
01230 }
01231
01232 int numSourcesSamePhysicalNode = 0;
01233 numSources = 0;
01234 numDestRecipPes = 0;
01235 for ( node=0; node<numNodes; ++node ) {
01236 if ( source_flags[node] ) ++numSources;
01237 if ( recipPeDest[node] ) ++numDestRecipPes;
01238 if ( source_flags[node] && CmiPeOnSamePhysicalNode(node,CkMyPe()) ) ++numSourcesSamePhysicalNode;
01239 }
01240
01241 #if 0
01242 if ( numSources ) {
01243 CkPrintf("pe %5d pme %5d of %5d on same physical node\n",
01244 CkMyPe(), numSourcesSamePhysicalNode, numSources);
01245 iout << iINFO << "PME " << CkMyPe() << " sources:";
01246 for ( node=0; node<numNodes; ++node ) {
01247 if ( source_flags[node] ) iout << " " << node;
01248 }
01249 iout << "\n" << endi;
01250 }
01251 #endif
01252
01253 delete [] source_flags;
01254
01255 // CkPrintf("PME on node %d has %d sources and %d destinations\n",
01256 // CkMyPe(), numSources, numDestRecipPes);
01257
01258 } // decide how many pes this node exchanges charges with (end)
01259
01260 ungrid_count = numDestRecipPes;
01261
01262 sendTransBarrier_received = 0;
01263
01264 if ( myGridPe < 0 && myTransPe < 0 ) return;
01265 // the following only for nodes doing reciprocal sum
01266
01267 if ( myTransPe >= 0 ) {
01268 int k2_start = localInfo[myTransPe].y_start_after_transpose;
01269 int k2_end = k2_start + localInfo[myTransPe].ny_after_transpose;
01270 #ifdef OPENATOM_VERSION
01271 if ( simParams->openatomOn ) {
01272 CProxy_ComputeMoaMgr moaProxy(CkpvAccess(BOCclass_group).computeMoaMgr);
01273 myKSpace = new PmeKSpace(myGrid, k2_start, k2_end, 0, myGrid.dim3/2, moaProxy);
01274 } else {
01275 myKSpace = new PmeKSpace(myGrid, k2_start, k2_end, 0, myGrid.dim3/2);
01276 }
01277 #else // OPENATOM_VERSION
01278 myKSpace = new PmeKSpace(myGrid, k2_start, k2_end, 0, myGrid.dim3/2);
01279 #endif // OPENATOM_VERSION
01280 }
01281
01282 int local_size = myGrid.block1 * myGrid.K2 * myGrid.dim3;
01283 int local_size_2 = myGrid.block2 * myGrid.K1 * myGrid.dim3;
01284 if ( local_size < local_size_2 ) local_size = local_size_2;
01285 qgrid = new float[local_size*numGrids];
01286 if ( numGridPes > 1 || numTransPes > 1 ) {
01287 kgrid = new float[local_size*numGrids];
01288 } else {
01289 kgrid = qgrid;
01290 }
01291 qgrid_size = local_size;
01292
01293 if ( myGridPe >= 0 ) {
01294 qgrid_start = localInfo[myGridPe].x_start * myGrid.K2 * myGrid.dim3;
01295 qgrid_len = localInfo[myGridPe].nx * myGrid.K2 * myGrid.dim3;
01296 fgrid_start = localInfo[myGridPe].x_start * myGrid.K2;
01297 fgrid_len = localInfo[myGridPe].nx * myGrid.K2;
01298 }
01299
01300 int n[3]; n[0] = myGrid.K1; n[1] = myGrid.K2; n[2] = myGrid.K3;
01301 #ifdef NAMD_FFTW
01302 CmiLock(fftw_plan_lock);
01303 #ifdef NAMD_FFTW_3
01304 work = new fftwf_complex[n[0]];
01305 int fftwFlags = simParams->FFTWPatient ? FFTW_PATIENT : simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE ;
01306 if ( myGridPe >= 0 ) {
01307 forward_plan_yz=new fftwf_plan[numGrids];
01308 backward_plan_yz=new fftwf_plan[numGrids];
01309 }
01310 if ( myTransPe >= 0 ) {
01311 forward_plan_x=new fftwf_plan[numGrids];
01312 backward_plan_x=new fftwf_plan[numGrids];
01313 }
01314 /* need one plan per grid */
01315 if ( ! CkMyPe() ) iout << iINFO << "Optimizing 4 FFT steps. 1..." << endi;
01316 if ( myGridPe >= 0 ) {
01317 for( int g=0; g<numGrids; g++)
01318 {
01319 forward_plan_yz[g] = fftwf_plan_many_dft_r2c(2, n+1,
01320 localInfo[myGridPe].nx,
01321 qgrid + qgrid_size * g,
01322 NULL,
01323 1,
01324 myGrid.dim2 * myGrid.dim3,
01325 (fftwf_complex *)
01326 qgrid + qgrid_size * g,
01327 NULL,
01328 1,
01329 myGrid.dim2 * (myGrid.dim3/2),
01330 fftwFlags);
01331 }
01332 }
01333 int zdim = myGrid.dim3;
01334 int xStride=localInfo[myTransPe].ny_after_transpose *( myGrid.dim3 / 2);
01335 if ( ! CkMyPe() ) iout << " 2..." << endi;
01336 if ( myTransPe >= 0 ) {
01337 for( int g=0; g<numGrids; g++)
01338 {
01339
01340 forward_plan_x[g] = fftwf_plan_many_dft(1, n, xStride,
01341 (fftwf_complex *)
01342 (kgrid+qgrid_size*g),
01343 NULL,
01344 xStride,
01345 1,
01346 (fftwf_complex *)
01347 (kgrid+qgrid_size*g),
01348 NULL,
01349 xStride,
01350 1,
01351 FFTW_FORWARD,fftwFlags);
01352
01353 }
01354 }
01355 if ( ! CkMyPe() ) iout << " 3..." << endi;
01356 if ( myTransPe >= 0 ) {
01357 for( int g=0; g<numGrids; g++)
01358 {
01359 backward_plan_x[g] = fftwf_plan_many_dft(1, n, xStride,
01360 (fftwf_complex *)
01361 (kgrid+qgrid_size*g),
01362 NULL,
01363 xStride,
01364 1,
01365 (fftwf_complex *)
01366 (kgrid+qgrid_size*g),
01367 NULL,
01368 xStride,
01369 1,
01370 FFTW_BACKWARD, fftwFlags);
01371
01372 }
01373 }
01374 if ( ! CkMyPe() ) iout << " 4..." << endi;
01375 if ( myGridPe >= 0 ) {
01376 for( int g=0; g<numGrids; g++)
01377 {
01378 backward_plan_yz[g] = fftwf_plan_many_dft_c2r(2, n+1,
01379 localInfo[myGridPe].nx,
01380 (fftwf_complex *)
01381 qgrid + qgrid_size * g,
01382 NULL,
01383 1,
01384 myGrid.dim2*(myGrid.dim3/2),
01385 qgrid + qgrid_size * g,
01386 NULL,
01387 1,
01388 myGrid.dim2 * myGrid.dim3,
01389 fftwFlags);
01390 }
01391 }
01392 if ( ! CkMyPe() ) iout << " Done.\n" << endi;
01393
01394 #else
01395 work = new fftw_complex[n[0]];
01396
01397 if ( ! CkMyPe() ) iout << iINFO << "Optimizing 4 FFT steps. 1..." << endi;
01398 if ( myGridPe >= 0 ) {
01399 forward_plan_yz = rfftwnd_create_plan_specific(2, n+1, FFTW_REAL_TO_COMPLEX,
01400 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
01401 | FFTW_IN_PLACE | FFTW_USE_WISDOM, qgrid, 1, 0, 0);
01402 }
01403 if ( ! CkMyPe() ) iout << " 2..." << endi;
01404 if ( myTransPe >= 0 ) {
01405 forward_plan_x = fftw_create_plan_specific(n[0], FFTW_FORWARD,
01406 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
01407 | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) kgrid,
01408 localInfo[myTransPe].ny_after_transpose * myGrid.dim3 / 2, work, 1);
01409 }
01410 if ( ! CkMyPe() ) iout << " 3..." << endi;
01411 if ( myTransPe >= 0 ) {
01412 backward_plan_x = fftw_create_plan_specific(n[0], FFTW_BACKWARD,
01413 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
01414 | FFTW_IN_PLACE | FFTW_USE_WISDOM, (fftw_complex *) kgrid,
01415 localInfo[myTransPe].ny_after_transpose * myGrid.dim3 / 2, work, 1);
01416 }
01417 if ( ! CkMyPe() ) iout << " 4..." << endi;
01418 if ( myGridPe >= 0 ) {
01419 backward_plan_yz = rfftwnd_create_plan_specific(2, n+1, FFTW_COMPLEX_TO_REAL,
01420 ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
01421 | FFTW_IN_PLACE | FFTW_USE_WISDOM, qgrid, 1, 0, 0);
01422 }
01423 if ( ! CkMyPe() ) iout << " Done.\n" << endi;
01424 #endif
01425 CmiUnlock(fftw_plan_lock);
01426 #else
01427 NAMD_die("Sorry, FFTW must be compiled in to use PME.");
01428 #endif
01429
01430 if ( myGridPe >= 0 && numSources == 0 )
01431 NAMD_bug("PME grid elements exist without sources.");
01432 grid_count = numSources;
01433 memset( (void*) qgrid, 0, qgrid_size * numGrids * sizeof(float) );
01434 trans_count = numGridPes;
01435 }
|
|
|
Definition at line 2204 of file ComputePme.C. References SimParameters::accelMDOn, PmeGrid::dim2, PmeGrid::dim3, ijpair::i, j, ijpair::j, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, Node::Object(), ReductionMgr::Object(), PmeGrid::order, REDUCTIONS_AMD, REDUCTIONS_BASIC, Node::simParameters, simParams, and ReductionMgr::willSubmit(). 02204 {
02205
02206 noWorkCount = 0;
02207 doWorkCount = 0;
02208 ungridForcesCount = 0;
02209
02210 reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_BASIC);
02211
02212 SimParameters *simParams = Node::Object()->simParameters;
02213
02214 strayChargeErrors = 0;
02215
02216 if (simParams->accelMDOn) {
02217 amd_reduction = ReductionMgr::Object()->willSubmit(REDUCTIONS_AMD);
02218 } else {
02219 amd_reduction = NULL;
02220 }
02221
02222 qsize = myGrid.K1 * myGrid.dim2 * myGrid.dim3;
02223 fsize = myGrid.K1 * myGrid.dim2;
02224 q_arr = new float*[fsize*numGrids];
02225 memset( (void*) q_arr, 0, fsize*numGrids * sizeof(float*) );
02226 q_list = new float*[fsize*numGrids];
02227 memset( (void*) q_list, 0, fsize*numGrids * sizeof(float*) );
02228 q_count = 0;
02229 f_arr = new char[fsize*numGrids];
02230 // memset to non-zero value has race condition on BlueGene/Q
02231 // memset( (void*) f_arr, 2, fsize*numGrids * sizeof(char) );
02232 for ( int n=fsize*numGrids, i=0; i<n; ++i ) f_arr[i] = 2;
02233 fz_arr = new char[myGrid.K3+myGrid.order-1];
02234
02235 for ( int g=0; g<numGrids; ++g ) {
02236 char *f = f_arr + g*fsize;
02237 if ( usePencils ) {
02238 int K1 = myGrid.K1;
02239 int K2 = myGrid.K2;
02240 int block1 = ( K1 + xBlocks - 1 ) / xBlocks;
02241 int block2 = ( K2 + yBlocks - 1 ) / yBlocks;
02242 int dim2 = myGrid.dim2;
02243 for (int ap=0; ap<numPencilsActive; ++ap) {
02244 int ib = activePencils[ap].i;
02245 int jb = activePencils[ap].j;
02246 int ibegin = ib*block1;
02247 int iend = ibegin + block1; if ( iend > K1 ) iend = K1;
02248 int jbegin = jb*block2;
02249 int jend = jbegin + block2; if ( jend > K2 ) jend = K2;
02250 int flen = numGrids * (iend - ibegin) * (jend - jbegin);
02251 for ( int i=ibegin; i<iend; ++i ) {
02252 for ( int j=jbegin; j<jend; ++j ) {
02253 f[i*dim2+j] = 0;
02254 }
02255 }
02256 }
02257 } else {
02258 int block1 = ( myGrid.K1 + numGridPes - 1 ) / numGridPes;
02259 bsize = block1 * myGrid.dim2 * myGrid.dim3;
02260 for (int pe=0; pe<numGridPes; pe++) {
02261 if ( ! recipPeDest[pe] ) continue;
02262 int start = pe * bsize;
02263 int len = bsize;
02264 if ( start >= qsize ) { start = 0; len = 0; }
02265 if ( start + len > qsize ) { len = qsize - start; }
02266 int zdim = myGrid.dim3;
02267 int fstart = start / zdim;
02268 int flen = len / zdim;
02269 memset(f + fstart, 0, flen*sizeof(char));
02270 }
02271 }
02272 }
02273
02274 #if USE_PERSISTENT
02275 recvGrid_handle = NULL;
02276 #endif
02277 }
|
|
|
Definition at line 1439 of file ComputePme.C. References Lattice::a(), Lattice::a_r(), Lattice::b(), Lattice::b_r(), BigReal, PmeGrid::block1, PmeGrid::block2, SimParameters::cutoff, j, PmeGrid::K1, PmeGrid::K2, SimParameters::lattice, PatchMap::max_a(), PatchMap::max_b(), PatchMap::min_a(), PatchMap::min_b(), PatchMap::node(), PatchMap::numPatches(), PatchMap::Object(), Node::Object(), PmeGrid::order, SimParameters::patchDimension, SimParameters::PMESendOrder, Random::reorder(), Node::simParameters, simParams, and Vector::unit(). 01439 {
01440 delete msg;
01441 if ( ! usePencils ) return;
01442
01443 SimParameters *simParams = Node::Object()->simParameters;
01444
01445 PatchMap *patchMap = PatchMap::Object();
01446 Lattice lattice = simParams->lattice;
01447 BigReal sysdima = lattice.a_r().unit() * lattice.a();
01448 BigReal sysdimb = lattice.b_r().unit() * lattice.b();
01449 BigReal cutoff = simParams->cutoff;
01450 BigReal patchdim = simParams->patchDimension;
01451 int numPatches = patchMap->numPatches();
01452
01453 pencilActive = new char[xBlocks*yBlocks];
01454 for ( int i=0; i<xBlocks; ++i ) {
01455 for ( int j=0; j<yBlocks; ++j ) {
01456 pencilActive[i*yBlocks+j] = 0;
01457 }
01458 }
01459
01460 for ( int pid=0; pid < numPatches; ++pid ) {
01461 int pnode = patchMap->node(pid);
01462 if ( pnode != CkMyPe() ) continue;
01463
01464 int shift1 = (myGrid.K1 + myGrid.order - 1)/2;
01465 int shift2 = (myGrid.K2 + myGrid.order - 1)/2;
01466
01467 BigReal minx = patchMap->min_a(pid);
01468 BigReal maxx = patchMap->max_a(pid);
01469 BigReal margina = 0.5 * ( patchdim - cutoff ) / sysdima;
01470 // min1 (max1) is smallest (largest) grid line for this patch
01471 int min1 = ((int) floor(myGrid.K1 * (minx - margina))) + shift1 - myGrid.order + 1;
01472 int max1 = ((int) floor(myGrid.K1 * (maxx + margina))) + shift1;
01473
01474 BigReal miny = patchMap->min_b(pid);
01475 BigReal maxy = patchMap->max_b(pid);
01476 BigReal marginb = 0.5 * ( patchdim - cutoff ) / sysdimb;
01477 // min2 (max2) is smallest (largest) grid line for this patch
01478 int min2 = ((int) floor(myGrid.K2 * (miny - marginb))) + shift2 - myGrid.order + 1;
01479 int max2 = ((int) floor(myGrid.K2 * (maxy + marginb))) + shift2;
01480
01481 for ( int i=min1; i<=max1; ++i ) {
01482 int ix = i;
01483 while ( ix >= myGrid.K1 ) ix -= myGrid.K1;
01484 while ( ix < 0 ) ix += myGrid.K1;
01485 for ( int j=min2; j<=max2; ++j ) {
01486 int jy = j;
01487 while ( jy >= myGrid.K2 ) jy -= myGrid.K2;
01488 while ( jy < 0 ) jy += myGrid.K2;
01489 pencilActive[(ix / myGrid.block1)*yBlocks + (jy / myGrid.block2)] = 1;
01490 }
01491 }
01492 }
01493
01494 numPencilsActive = 0;
01495 for ( int i=0; i<xBlocks; ++i ) {
01496 for ( int j=0; j<yBlocks; ++j ) {
01497 if ( pencilActive[i*yBlocks+j] ) {
01498 ++numPencilsActive;
01499 zPencil(i,j,0).dummyRecvGrid(CkMyPe(),0);
01500 }
01501 }
01502 }
01503 activePencils = new ijpair[numPencilsActive];
01504 numPencilsActive = 0;
01505 for ( int i=0; i<xBlocks; ++i ) {
01506 for ( int j=0; j<yBlocks; ++j ) {
01507 if ( pencilActive[i*yBlocks+j] ) {
01508 activePencils[numPencilsActive++] = ijpair(i,j);
01509 }
01510 }
01511 }
01512 if ( simParams->PMESendOrder ) {
01513 std::sort(activePencils,activePencils+numPencilsActive,ijpair_sortop_bit_reversed());
01514 } else {
01515 Random rand(CkMyPe());
01516 rand.reorder(activePencils,numPencilsActive);
01517 }
01518 //if ( numPencilsActive ) {
01519 // CkPrintf("node %d sending to %d pencils\n", CkMyPe(), numPencilsActive);
01520 //}
01521
01522 ungrid_count = numPencilsActive;
01523 }
|
|
|
Definition at line 439 of file ComputePme.C. References Node::Object(), pencilPMEProcessors, SimParameters::PMEPencils, Node::simParameters, and simParams. 00439 {
00440 SimParameters *simParams = Node::Object()->simParameters;
00441 return ( usePencils || (simParams->PMEPencils>0) ? pencilPMEProcessors[p] : isPmeFlag[p] );
00442 }
|
|
|
Definition at line 1768 of file ComputePme.C. References PmeGrid::dim3, PmeTransMsg::lattice, NodePmeInfo::npe, PmeTransMsg::nx, LocalPmeInfo::ny_after_transpose, NodePmeInfo::pe_start, PmeTransMsg::qgrid, PmeTransMsg::sequence, PmeTransMsg::x_start, and LocalPmeInfo::y_start_after_transpose. Referenced by recvSharedTrans(), and recvTrans(). 01768 {
01769 // CkPrintf("procTrans on Pe(%d)\n",CkMyPe());
01770 if ( trans_count == numGridPes ) {
01771 lattice = msg->lattice;
01772 grid_sequence = msg->sequence;
01773 }
01774
01775 if ( msg->nx ) {
01776 int zdim = myGrid.dim3;
01777 NodePmeInfo &nodeInfo(transNodeInfo[myTransNode]);
01778 int first_pe = nodeInfo.pe_start;
01779 int last_pe = first_pe+nodeInfo.npe-1;
01780 int y_skip = localInfo[myTransPe].y_start_after_transpose
01781 - localInfo[first_pe].y_start_after_transpose;
01782 int ny_msg = localInfo[last_pe].y_start_after_transpose
01783 + localInfo[last_pe].ny_after_transpose
01784 - localInfo[first_pe].y_start_after_transpose;
01785 int ny = localInfo[myTransPe].ny_after_transpose;
01786 int x_start = msg->x_start;
01787 int nx = msg->nx;
01788 for ( int g=0; g<numGrids; ++g ) {
01789 CmiMemcpy((void*)(kgrid + qgrid_size * g + x_start*ny*zdim),
01790 (void*)(msg->qgrid + nx*(ny_msg*g+y_skip)*zdim),
01791 nx*ny*zdim*sizeof(float));
01792 }
01793 }
01794
01795 --trans_count;
01796
01797 if ( trans_count == 0 ) {
01798 pmeProxyDir[CkMyPe()].gridCalc2();
01799 }
01800 }
|
|
|
Definition at line 1989 of file ComputePme.C. References PmeGrid::dim3, PmeUntransMsg::evir, PmeGrid::K2, NodePmeInfo::npe, LocalPmeInfo::nx, PmeUntransMsg::ny, NodePmeInfo::pe_start, PmeUntransMsg::qgrid, LocalPmeInfo::x_start, and PmeUntransMsg::y_start. Referenced by recvSharedUntrans(), and recvUntrans(). 01989 {
01990 // CkPrintf("recvUntrans on Pe(%d)\n",CkMyPe());
01991 if ( untrans_count == numTransPes ) {
01992 for ( int g=0; g<numGrids; ++g ) {
01993 recip_evir[g] = 0.;
01994 }
01995 }
01996
01997 #if CMK_BLUEGENEL
01998 CmiNetworkProgressAfter (0);
01999 #endif
02000
02001 NodePmeInfo &nodeInfo(gridNodeInfo[myGridNode]);
02002 int first_pe = nodeInfo.pe_start;
02003 int g;
02004 if ( myGridPe == first_pe ) for ( g=0; g<numGrids; ++g ) {
02005 recip_evir[g] += msg->evir[g];
02006 }
02007
02008 if ( msg->ny ) {
02009 int zdim = myGrid.dim3;
02010 int last_pe = first_pe+nodeInfo.npe-1;
02011 int x_skip = localInfo[myGridPe].x_start
02012 - localInfo[first_pe].x_start;
02013 int nx_msg = localInfo[last_pe].x_start
02014 + localInfo[last_pe].nx
02015 - localInfo[first_pe].x_start;
02016 int nx = localInfo[myGridPe].nx;
02017 int y_start = msg->y_start;
02018 int ny = msg->ny;
02019 int slicelen = myGrid.K2 * zdim;
02020 int cpylen = ny * zdim;
02021 for ( g=0; g<numGrids; ++g ) {
02022 float *q = qgrid + qgrid_size * g + y_start * zdim;
02023 float *qmsg = msg->qgrid + (nx_msg*g+x_skip) * cpylen;
02024 for ( int x = 0; x < nx; ++x ) {
02025 CmiMemcpy((void*)q, (void*)qmsg, cpylen*sizeof(float));
02026 q += slicelen;
02027 qmsg += cpylen;
02028 }
02029 }
02030 }
02031
02032 --untrans_count;
02033
02034 if ( untrans_count == 0 ) {
02035 pmeProxyDir[CkMyPe()].gridCalc3();
02036 }
02037 }
|
|
|
Definition at line 2116 of file ComputePme.C. Referenced by recvUngrid(). 02116 {
02117 if ( msg ) delete msg;
02118 --ungrid_count;
02119
02120 if ( ungrid_count == 0 ) {
02121 pmeProxyDir[CkMyPe()].ungridCalc();
02122 }
02123 }
|
|
||||||||||||||||
|
Definition at line 550 of file ComputePme.C. 00551 {
00552 xPencil = x; yPencil = y; zPencil = z;
00553
00554 if(CmiMyRank()==0)
00555 {
00556 pmeNodeProxy.ckLocalBranch()->xPencil=x;
00557 pmeNodeProxy.ckLocalBranch()->yPencil=y;
00558 pmeNodeProxy.ckLocalBranch()->zPencil=z;
00559 }
00560 }
|
|
|
Definition at line 1565 of file ComputePme.C. References PmeGrid::dim3, PmeGridMsg::fgrid, PmeGridMsg::lattice, NAMD_bug(), PmeGridMsg::qgrid, PmeGridMsg::sequence, PmeGridMsg::zlist, and PmeGridMsg::zlistlen. 01565 {
01566 // CkPrintf("recvGrid from %d on Pe(%d)\n",msg->sourceNode,CkMyPe());
01567 if ( grid_count == 0 ) {
01568 NAMD_bug("Message order failure in ComputePmeMgr::recvGrid\n");
01569 }
01570 if ( grid_count == numSources ) {
01571 lattice = msg->lattice;
01572 grid_sequence = msg->sequence;
01573 }
01574
01575 int zdim = myGrid.dim3;
01576 int zlistlen = msg->zlistlen;
01577 int *zlist = msg->zlist;
01578 float *qmsg = msg->qgrid;
01579 for ( int g=0; g<numGrids; ++g ) {
01580 char *f = msg->fgrid + fgrid_len * g;
01581 float *q = qgrid + qgrid_size * g;
01582 for ( int i=0; i<fgrid_len; ++i ) {
01583 if ( f[i] ) {
01584 for ( int k=0; k<zlistlen; ++k ) {
01585 q[zlist[k]] += *(qmsg++);
01586 }
01587 }
01588 q += zdim;
01589 }
01590 }
01591
01592 gridmsg_reuse[numSources-grid_count] = msg;
01593 --grid_count;
01594
01595 if ( grid_count == 0 ) {
01596 pmeProxyDir[CkMyPe()].gridCalc1();
01597 if ( useBarrier ) pmeProxyDir[0].sendTransBarrier();
01598 }
01599 }
|
|
|
Definition at line 1750 of file ComputePme.C. References PmeSharedTransMsg::count, PmeSharedTransMsg::lock, PmeSharedTransMsg::msg, and procTrans(). 01750 {
01751 procTrans(msg->msg);
01752 CmiLock(msg->lock);
01753 int count = --(*msg->count);
01754 CmiUnlock(msg->lock);
01755 if ( count == 0 ) {
01756 CmiDestroyLock(msg->lock);
01757 delete msg->count;
01758 delete msg->msg;
01759 }
01760 delete msg;
01761 }
|
|
|
Definition at line 1971 of file ComputePme.C. References PmeSharedUntransMsg::count, PmeSharedUntransMsg::lock, PmeSharedUntransMsg::msg, and procUntrans(). 01971 {
01972 procUntrans(msg->msg);
01973 CmiLock(msg->lock);
01974 int count = --(*msg->count);
01975 CmiUnlock(msg->lock);
01976 if ( count == 0 ) {
01977 CmiDestroyLock(msg->lock);
01978 delete msg->count;
01979 delete msg->msg;
01980 }
01981 delete msg;
01982 }
|
|
|
Definition at line 1763 of file ComputePme.C. References procTrans(). 01763 {
01764 procTrans(msg);
01765 delete msg;
01766 }
|
|
|
Definition at line 2104 of file ComputePme.C. References copyPencils(), copyResults(), NAMD_bug(), and recvAck(). 02104 {
02105 // CkPrintf("recvUngrid on Pe(%d)\n",CkMyPe());
02106 if ( ungrid_count == 0 ) {
02107 NAMD_bug("Message order failure in ComputePmeMgr::recvUngrid\n");
02108 }
02109
02110 if ( usePencils ) copyPencils(msg);
02111 else copyResults(msg);
02112 delete msg;
02113 recvAck(0);
02114 }
|
|
|
Definition at line 1984 of file ComputePme.C. References procUntrans(). 01984 {
01985 procUntrans(msg);
01986 delete msg;
01987 }
|
|
||||||||||||
|
Definition at line 2764 of file ComputePme.C. References PmeGrid::block1, PmeGrid::dim2, PmeGrid::dim3, PmeGridMsg::fgrid, iERROR(), iout, j, PmeGrid::K2, PmeGrid::K3, PmeGridMsg::lattice, PmeGridMsg::len, PME_GRID_PRIORITY, PmeGridMsg::qgrid, PmeGridMsg::sequence, SET_PRIORITY, PmeGridMsg::sourceNode, PmeGridMsg::start, PmeGridMsg::zlist, and PmeGridMsg::zlistlen. Referenced by ComputePme::doWork(). 02764 {
02765
02766 // iout << "Sending charge grid for " << numLocalAtoms << " atoms to FFT on " << iPE << ".\n" << endi;
02767
02768 bsize = myGrid.block1 * myGrid.dim2 * myGrid.dim3;
02769
02770 CProxy_ComputePmeMgr pmeProxy(CkpvAccess(BOCclass_group).computePmeMgr);
02771 for (int j=0; j<numGridPes; j++) {
02772 int pe = gridPeOrder[j]; // different order
02773 if ( ! recipPeDest[pe] && ! strayChargeErrors ) continue;
02774 int start = pe * bsize;
02775 int len = bsize;
02776 if ( start >= qsize ) { start = 0; len = 0; }
02777 if ( start + len > qsize ) { len = qsize - start; }
02778 int zdim = myGrid.dim3;
02779 int fstart = start / zdim;
02780 int flen = len / zdim;
02781 int fcount = 0;
02782 int i;
02783
02784 int g;
02785 for ( g=0; g<numGrids; ++g ) {
02786 char *f = f_arr + fstart + g*fsize;
02787 for ( i=0; i<flen; ++i ) {
02788 fcount += f[i];
02789 }
02790 if ( ! recipPeDest[pe] ) {
02791 int errfound = 0;
02792 for ( i=0; i<flen; ++i ) {
02793 if ( f[i] == 3 ) {
02794 errfound = 1;
02795 break;
02796 }
02797 }
02798 if ( errfound ) {
02799 iout << iERROR << "Stray PME grid charges detected: "
02800 << CkMyPe() << " sending to " << gridPeMap[pe] << " for planes";
02801 int iz = -1;
02802 for ( i=0; i<flen; ++i ) {
02803 if ( f[i] == 3 ) {
02804 f[i] = 2;
02805 int jz = (i+fstart)/myGrid.K2;
02806 if ( iz != jz ) { iout << " " << jz; iz = jz; }
02807 }
02808 }
02809 iout << "\n" << endi;
02810 }
02811 }
02812 }
02813
02814 #ifdef NETWORK_PROGRESS
02815 CmiNetworkProgress();
02816 #endif
02817
02818 if ( ! recipPeDest[pe] ) continue;
02819
02820 int zlistlen = 0;
02821 for ( i=0; i<myGrid.K3; ++i ) {
02822 if ( fz_arr[i] ) ++zlistlen;
02823 }
02824
02825 PmeGridMsg *msg = new (numGrids, zlistlen, flen*numGrids,
02826 fcount*zlistlen, PRIORITY_SIZE) PmeGridMsg;
02827
02828 msg->sourceNode = CkMyPe();
02829 msg->lattice = lattice;
02830 msg->start = fstart;
02831 msg->len = flen;
02832 msg->zlistlen = zlistlen;
02833 int *zlist = msg->zlist;
02834 zlistlen = 0;
02835 for ( i=0; i<myGrid.K3; ++i ) {
02836 if ( fz_arr[i] ) zlist[zlistlen++] = i;
02837 }
02838 float *qmsg = msg->qgrid;
02839 for ( g=0; g<numGrids; ++g ) {
02840 char *f = f_arr + fstart + g*fsize;
02841 CmiMemcpy((void*)(msg->fgrid+g*flen),(void*)f,flen*sizeof(char));
02842 float **q = q_arr + fstart + g*fsize;
02843 for ( i=0; i<flen; ++i ) {
02844 if ( f[i] ) {
02845 for ( int k=0; k<zlistlen; ++k ) {
02846 *(qmsg++) = q[i][zlist[k]];
02847 }
02848 }
02849 }
02850 }
02851
02852 msg->sequence = compute_sequence;
02853 SET_PRIORITY(msg,compute_sequence,PME_GRID_PRIORITY)
02854 pmeProxy[gridPeMap[pe]].recvGrid(msg);
02855 }
02856
02857 strayChargeErrors = 0;
02858
02859 }
|
|
||||||||||||
|
Definition at line 2569 of file ComputePme.C. References PmeGrid::block1, PmeGrid::block2, PmeGridMsg::destElem, PmeGrid::dim2, PmeGrid::dim3, PmeGridMsg::fgrid, PmeGridMsg::hasData, ijpair::i, iERROR(), iout, j, ijpair::j, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, PmeGridMsg::lattice, PmeGridMsg::len, NAMD_bug(), PME_GRID_PRIORITY, PmeGridMsg::qgrid, NodePmeMgr::recvZGrid(), PmeGridMsg::sequence, SET_PRIORITY, PmeGridMsg::sourceNode, PmeGridMsg::start, PmeGridMsg::zlist, PmeGridMsg::zlistlen, and NodePmeMgr::zm. Referenced by ComputePme::doWork(). 02569 {
02570
02571 // iout << "Sending charge grid for " << numLocalAtoms << " atoms to FFT on " << iPE << ".\n" << endi;
02572
02573 #if USE_PERSISTENT
02574 if (recvGrid_handle== NULL) setup_recvgrid_persistent();
02575 #endif
02576 int K1 = myGrid.K1;
02577 int K2 = myGrid.K2;
02578 int dim2 = myGrid.dim2;
02579 int dim3 = myGrid.dim3;
02580 int block1 = myGrid.block1;
02581 int block2 = myGrid.block2;
02582
02583 // int savedMessages = 0;
02584 NodePmeMgr *npMgr = pmeNodeProxy[CkMyNode()].ckLocalBranch();
02585
02586 for (int ap=0; ap<numPencilsActive; ++ap) {
02587 int ib = activePencils[ap].i;
02588 int jb = activePencils[ap].j;
02589 int ibegin = ib*block1;
02590 int iend = ibegin + block1; if ( iend > K1 ) iend = K1;
02591 int jbegin = jb*block2;
02592 int jend = jbegin + block2; if ( jend > K2 ) jend = K2;
02593 int flen = numGrids * (iend - ibegin) * (jend - jbegin);
02594
02595 int fcount = 0;
02596 for ( int g=0; g<numGrids; ++g ) {
02597 char *f = f_arr + g*fsize;
02598 for ( int i=ibegin; i<iend; ++i ) {
02599 for ( int j=jbegin; j<jend; ++j ) {
02600 fcount += f[i*dim2+j];
02601 }
02602 }
02603 }
02604
02605 #ifdef NETWORK_PROGRESS
02606 CmiNetworkProgress();
02607 #endif
02608
02609 if ( ! pencilActive[ib*yBlocks+jb] )
02610 NAMD_bug("PME activePencils list inconsistent");
02611
02612 int zlistlen = 0;
02613 for ( int i=0; i<myGrid.K3; ++i ) {
02614 if ( fz_arr[i] ) ++zlistlen;
02615 }
02616
02617 int hd = ( fcount? 1 : 0 ); // has data?
02618 // if ( ! hd ) ++savedMessages;
02619
02620
02621 PmeGridMsg *msg = new ( hd*numGrids, hd*zlistlen, hd*flen,
02622 hd*fcount*zlistlen, PRIORITY_SIZE) PmeGridMsg;
02623 msg->sourceNode = CkMyPe();
02624 msg->hasData = hd;
02625 msg->lattice = lattice;
02626 if ( hd ) {
02627 #if 0
02628 msg->start = fstart;
02629 msg->len = flen;
02630 #else
02631 msg->start = -1; // obsolete?
02632 msg->len = -1; // obsolete?
02633 #endif
02634 msg->zlistlen = zlistlen;
02635 int *zlist = msg->zlist;
02636 zlistlen = 0;
02637 for ( int i=0; i<myGrid.K3; ++i ) {
02638 if ( fz_arr[i] ) zlist[zlistlen++] = i;
02639 }
02640 char *fmsg = msg->fgrid;
02641 float *qmsg = msg->qgrid;
02642 for ( int g=0; g<numGrids; ++g ) {
02643 char *f = f_arr + g*fsize;
02644 float **q = q_arr + g*fsize;
02645 for ( int i=ibegin; i<iend; ++i ) {
02646 for ( int j=jbegin; j<jend; ++j ) {
02647 *(fmsg++) = f[i*dim2+j];
02648 if( f[i*dim2+j] ) {
02649 for ( int k=0; k<zlistlen; ++k ) {
02650 *(qmsg++) = q[i*dim2+j][zlist[k]];
02651 }
02652 }
02653 }
02654 }
02655 }
02656 }
02657
02658 msg->sequence = compute_sequence;
02659 SET_PRIORITY(msg,compute_sequence,PME_GRID_PRIORITY)
02660 CmiEnableUrgentSend(1);
02661 #if USE_NODE_PAR_RECEIVE
02662 msg->destElem=CkArrayIndex3D(ib,jb,0);
02663 CProxy_PmePencilMap lzm = npMgr->zm;
02664 int destproc = lzm.ckLocalBranch()->procNum(0, msg->destElem);
02665 int destnode = CmiNodeOf(destproc);
02666
02667 #if 0
02668 CmiUsePersistentHandle(&recvGrid_handle[ap], 1);
02669 #endif
02670 pmeNodeProxy[destnode].recvZGrid(msg);
02671 #if 0
02672 CmiUsePersistentHandle(NULL, 0);
02673 #endif
02674 #else
02675 #if 0
02676 CmiUsePersistentHandle(&recvGrid_handle[ap], 1);
02677 #endif
02678 zPencil(ib,jb,0).recvGrid(msg);
02679 #if 0
02680 CmiUsePersistentHandle(NULL, 0);
02681 #endif
02682 #endif
02683 CmiEnableUrgentSend(0);
02684 }
02685
02686
02687 if ( strayChargeErrors ) {
02688 strayChargeErrors = 0;
02689 iout << iERROR << "Stray PME grid charges detected: "
02690 << CkMyPe() << " sending to (x,y)";
02691 for (int ib=0; ib<xBlocks; ++ib) {
02692 for (int jb=0; jb<yBlocks; ++jb) {
02693 int ibegin = ib*block1;
02694 int iend = ibegin + block1; if ( iend > K1 ) iend = K1;
02695 int jbegin = jb*block2;
02696 int jend = jbegin + block2; if ( jend > K2 ) jend = K2;
02697 int flen = numGrids * (iend - ibegin) * (jend - jbegin);
02698
02699 for ( int g=0; g<numGrids; ++g ) {
02700 char *f = f_arr + g*fsize;
02701 if ( ! pencilActive[ib*yBlocks+jb] ) {
02702 for ( int i=ibegin; i<iend; ++i ) {
02703 for ( int j=jbegin; j<jend; ++j ) {
02704 if ( f[i*dim2+j] == 3 ) {
02705 f[i*dim2+j] = 2;
02706 iout << " (" << i << "," << j << ")";
02707 }
02708 }
02709 }
02710 }
02711 }
02712 }
02713 }
02714 iout << "\n" << endi;
02715 }
02716
02717 // if ( savedMessages ) {
02718 // CkPrintf("Pe %d eliminated %d PME messages\n",CkMyPe(),savedMessages);
02719 // }
02720
02721 }
|
|
|
Definition at line 1672 of file ComputePme.C. References PmeGrid::dim3, fwdSharedTrans(), j, PmeGrid::K2, kgrid, PmeTransMsg::lattice, NodePmeInfo::npe, PmeTransMsg::nx, LocalPmeInfo::nx, LocalPmeInfo::ny_after_transpose, NodePmeInfo::pe_start, PME_TRANS_PRIORITY, PmeTransMsg::qgrid, qgrid_size, NodePmeInfo::real_node, PmeTransMsg::sequence, SET_PRIORITY, PmeTransMsg::sourceNode, PmeTransMsg::x_start, LocalPmeInfo::x_start, and LocalPmeInfo::y_start_after_transpose. 01672 {
01673 // CkPrintf("sendTrans on Pe(%d)\n",CkMyPe());
01674
01675 // send data for transpose
01676 int zdim = myGrid.dim3;
01677 int nx = localInfo[myGridPe].nx;
01678 int x_start = localInfo[myGridPe].x_start;
01679 int slicelen = myGrid.K2 * zdim;
01680
01681 ComputePmeMgr **mgrObjects = pmeNodeProxy.ckLocalBranch()->mgrObjects;
01682
01683 #if CMK_BLUEGENEL
01684 CmiNetworkProgressAfter (0);
01685 #endif
01686
01687 for (int j=0; j<numTransNodes; j++) {
01688 int node = transNodeOrder[j]; // different order on each node
01689 int pe = transNodeInfo[node].pe_start;
01690 int npe = transNodeInfo[node].npe;
01691 int totlen = 0;
01692 if ( node != myTransNode ) for (int i=0; i<npe; ++i, ++pe) {
01693 LocalPmeInfo &li = localInfo[pe];
01694 int cpylen = li.ny_after_transpose * zdim;
01695 totlen += cpylen;
01696 }
01697 PmeTransMsg *newmsg = new (nx * totlen * numGrids,
01698 PRIORITY_SIZE) PmeTransMsg;
01699 newmsg->sourceNode = myGridPe;
01700 newmsg->lattice = lattice;
01701 newmsg->x_start = x_start;
01702 newmsg->nx = nx;
01703 for ( int g=0; g<numGrids; ++g ) {
01704 float *qmsg = newmsg->qgrid + nx * totlen * g;
01705 pe = transNodeInfo[node].pe_start;
01706 for (int i=0; i<npe; ++i, ++pe) {
01707 LocalPmeInfo &li = localInfo[pe];
01708 int cpylen = li.ny_after_transpose * zdim;
01709 if ( node == myTransNode ) {
01710 ComputePmeMgr *m = mgrObjects[CkRankOf(transPeMap[pe])];
01711 qmsg = m->kgrid + m->qgrid_size * g + x_start*cpylen;
01712 }
01713 float *q = qgrid + qgrid_size * g + li.y_start_after_transpose * zdim;
01714 for ( int x = 0; x < nx; ++x ) {
01715 CmiMemcpy((void*)qmsg, (void*)q, cpylen*sizeof(float));
01716 q += slicelen;
01717 qmsg += cpylen;
01718 }
01719 }
01720 }
01721 newmsg->sequence = grid_sequence;
01722 SET_PRIORITY(newmsg,grid_sequence,PME_TRANS_PRIORITY)
01723 if ( node == myTransNode ) newmsg->nx = 0;
01724 if ( npe > 1 ) {
01725 if ( node == myTransNode ) fwdSharedTrans(newmsg);
01726 else pmeNodeProxy[transNodeInfo[node].real_node].recvTrans(newmsg);
01727 } else pmeProxy[transPeMap[transNodeInfo[node].pe_start]].recvTrans(newmsg);
01728 }
01729
01730 untrans_count = numTransPes;
01731
01732 }
|
|
|
Definition at line 1662 of file ComputePme.C. 01662 {
01663 sendTransBarrier_received += 1;
01664 // CkPrintf("sendTransBarrier on %d %d\n",myGridPe,numGridPes-sendTransBarrier_received);
01665 if ( sendTransBarrier_received < numGridPes ) return;
01666 sendTransBarrier_received = 0;
01667 for ( int i=0; i<numGridPes; ++i ) {
01668 pmeProxyDir[gridPeMap[i]].sendTrans();
01669 }
01670 }
|
|
|
Definition at line 2060 of file ComputePme.C. References PmeGrid::dim3, PmeGridMsg::evir, PmeGridMsg::fgrid, j, PmeGridMsg::len, PME_UNGRID_PRIORITY, PmeGridMsg::qgrid, SET_PRIORITY, PmeGridMsg::sourceNode, PmeGridMsg::start, PmeGridMsg::zlist, and PmeGridMsg::zlistlen. 02060 {
02061
02062 for ( int j=0; j<numSources; ++j ) {
02063 // int msglen = qgrid_len;
02064 PmeGridMsg *newmsg = gridmsg_reuse[j];
02065 int pe = newmsg->sourceNode;
02066 if ( j == 0 ) { // only need these once
02067 for ( int g=0; g<numGrids; ++g ) {
02068 newmsg->evir[g] = recip_evir[g];
02069 }
02070 } else {
02071 for ( int g=0; g<numGrids; ++g ) {
02072 newmsg->evir[g] = 0.;
02073 }
02074 }
02075 int zdim = myGrid.dim3;
02076 int flen = newmsg->len;
02077 int fstart = newmsg->start;
02078 int zlistlen = newmsg->zlistlen;
02079 int *zlist = newmsg->zlist;
02080 float *qmsg = newmsg->qgrid;
02081 for ( int g=0; g<numGrids; ++g ) {
02082 char *f = newmsg->fgrid + fgrid_len * g;
02083 float *q = qgrid + qgrid_size * g + (fstart-fgrid_start) * zdim;
02084 for ( int i=0; i<flen; ++i ) {
02085 if ( f[i] ) {
02086 for ( int k=0; k<zlistlen; ++k ) {
02087 *(qmsg++) = q[zlist[k]];
02088 }
02089 }
02090 q += zdim;
02091 }
02092 }
02093 newmsg->sourceNode = myGridPe;
02094
02095 SET_PRIORITY(newmsg,grid_sequence,PME_UNGRID_PRIORITY)
02096 CmiEnableUrgentSend(1);
02097 pmeProxyDir[pe].recvUngrid(newmsg);
02098 CmiEnableUrgentSend(0);
02099 }
02100 grid_count = numSources;
02101 memset( (void*) qgrid, 0, qgrid_size * numGrids * sizeof(float) );
02102 }
|
|
|
Definition at line 1890 of file ComputePme.C. References PmeGrid::dim3, PmeUntransMsg::evir, fwdSharedUntrans(), j, PmeGrid::K2, NodePmeInfo::npe, LocalPmeInfo::nx, PmeUntransMsg::ny, LocalPmeInfo::ny_after_transpose, NodePmeInfo::pe_start, PME_UNTRANS_PRIORITY, qgrid, PmeUntransMsg::qgrid, qgrid_size, NodePmeInfo::real_node, SET_PRIORITY, PmeUntransMsg::sourceNode, LocalPmeInfo::x_start, PmeUntransMsg::y_start, and LocalPmeInfo::y_start_after_transpose. 01890 {
01891
01892 int zdim = myGrid.dim3;
01893 int y_start = localInfo[myTransPe].y_start_after_transpose;
01894 int ny = localInfo[myTransPe].ny_after_transpose;
01895 int slicelen = myGrid.K2 * zdim;
01896
01897 ComputePmeMgr **mgrObjects = pmeNodeProxy.ckLocalBranch()->mgrObjects;
01898
01899 #if CMK_BLUEGENEL
01900 CmiNetworkProgressAfter (0);
01901 #endif
01902
01903 // send data for reverse transpose
01904 for (int j=0; j<numGridNodes; j++) {
01905 int node = gridNodeOrder[j]; // different order on each node
01906 int pe = gridNodeInfo[node].pe_start;
01907 int npe = gridNodeInfo[node].npe;
01908 int totlen = 0;
01909 if ( node != myGridNode ) for (int i=0; i<npe; ++i, ++pe) {
01910 LocalPmeInfo &li = localInfo[pe];
01911 int cpylen = li.nx * zdim;
01912 totlen += cpylen;
01913 }
01914 PmeUntransMsg *newmsg = new (numGrids, ny * totlen * numGrids, PRIORITY_SIZE) PmeUntransMsg;
01915 newmsg->sourceNode = myTransPe;
01916 newmsg->y_start = y_start;
01917 newmsg->ny = ny;
01918 for ( int g=0; g<numGrids; ++g ) {
01919 if ( j == 0 ) { // only need these once
01920 newmsg->evir[g] = recip_evir2[g];
01921 } else {
01922 newmsg->evir[g] = 0.;
01923 }
01924 float *qmsg = newmsg->qgrid + ny * totlen * g;
01925 pe = gridNodeInfo[node].pe_start;
01926 for (int i=0; i<npe; ++i, ++pe) {
01927 LocalPmeInfo &li = localInfo[pe];
01928 if ( node == myGridNode ) {
01929 ComputePmeMgr *m = mgrObjects[CkRankOf(gridPeMap[pe])];
01930 qmsg = m->qgrid + m->qgrid_size * g + y_start * zdim;
01931 float *q = kgrid + qgrid_size*g + li.x_start*ny*zdim;
01932 int cpylen = ny * zdim;
01933 for ( int x = 0; x < li.nx; ++x ) {
01934 CmiMemcpy((void*)qmsg, (void*)q, cpylen*sizeof(float));
01935 q += cpylen;
01936 qmsg += slicelen;
01937 }
01938 } else {
01939 CmiMemcpy((void*)qmsg,
01940 (void*)(kgrid + qgrid_size*g + li.x_start*ny*zdim),
01941 li.nx*ny*zdim*sizeof(float));
01942 qmsg += li.nx*ny*zdim;
01943 }
01944 }
01945 }
01946 SET_PRIORITY(newmsg,grid_sequence,PME_UNTRANS_PRIORITY)
01947 if ( node == myGridNode ) newmsg->ny = 0;
01948 if ( npe > 1 ) {
01949 if ( node == myGridNode ) fwdSharedUntrans(newmsg);
01950 else pmeNodeProxy[gridNodeInfo[node].real_node].recvUntrans(newmsg);
01951 } else pmeProxy[gridPeMap[gridNodeInfo[node].pe_start]].recvUntrans(newmsg);
01952 }
01953
01954 trans_count = numGridPes;
01955 }
|
|
|
Definition at line 3060 of file ComputePme.C. References SimParameters::alchElecLambda, SimParameters::alchFepElecOn, SimParameters::alchFepWhamOn, SimParameters::alchLambda2, BigReal, SubmitReduction::item(), WorkDistrib::messageEnqueueWork(), Node::Object(), REDUCTION_ELECT_ENERGY_PME_TI_1, REDUCTION_ELECT_ENERGY_PME_TI_2, REDUCTION_ELECT_ENERGY_SLOW, REDUCTION_ELECT_ENERGY_SLOW_F, REDUCTION_STRAY_CHARGE_ERRORS, ResizeArray< Elem >::resize(), Node::simParameters, simParams, ResizeArray< Elem >::size(), and SubmitReduction::submit(). Referenced by ComputePme::doWork(). 03060 {
03061
03062 for ( int g=0; g<numGrids; ++g ) {
03063 float scale = 1.;
03064 if ( alchFepOn || alchThermIntOn ) {
03065 if ( g == 0 ) scale = elecLambdaUp;
03066 else if ( g == 1 ) scale = elecLambdaDown;
03067 else if ( g == 2 ) scale = (elecLambdaUp + elecLambdaDown - 1)*(-1);
03068 if (alchDecouple) {
03069 if ( g == 2 ) scale = 1-elecLambdaUp;
03070 else if ( g == 3 ) scale = 1-elecLambdaDown;
03071 else if ( g == 4 ) scale = (elecLambdaUp + elecLambdaDown - 1)*(-1);
03072 }
03073 } else if ( lesOn ) {
03074 scale = 1.0 / lesFactor;
03075 } else if ( pairOn ) {
03076 scale = ( g == 0 ? 1. : -1. );
03077 }
03078 reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += evir[g][0] * scale;
03079 reduction->item(REDUCTION_VIRIAL_SLOW_XX) += evir[g][1] * scale;
03080 reduction->item(REDUCTION_VIRIAL_SLOW_XY) += evir[g][2] * scale;
03081 reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += evir[g][3] * scale;
03082 reduction->item(REDUCTION_VIRIAL_SLOW_YX) += evir[g][2] * scale;
03083 reduction->item(REDUCTION_VIRIAL_SLOW_YY) += evir[g][4] * scale;
03084 reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += evir[g][5] * scale;
03085 reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += evir[g][3] * scale;
03086 reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += evir[g][5] * scale;
03087 reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += evir[g][6] * scale;
03088
03089 if (amd_reduction) {
03090 amd_reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += evir[g][0] * scale;
03091 amd_reduction->item(REDUCTION_VIRIAL_SLOW_XX) += evir[g][1] * scale;
03092 amd_reduction->item(REDUCTION_VIRIAL_SLOW_XY) += evir[g][2] * scale;
03093 amd_reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += evir[g][3] * scale;
03094 amd_reduction->item(REDUCTION_VIRIAL_SLOW_YX) += evir[g][2] * scale;
03095 amd_reduction->item(REDUCTION_VIRIAL_SLOW_YY) += evir[g][4] * scale;
03096 amd_reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += evir[g][5] * scale;
03097 amd_reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += evir[g][3] * scale;
03098 amd_reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += evir[g][5] * scale;
03099 amd_reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += evir[g][6] * scale;
03100 }
03101
03102 float scale2 = 0.;
03103
03104 //alchElecLambdaStart = (alchFepOn || alchThermIntOn) ? simParams->alchElecLambdaStart : 0;
03105 SimParameters *simParams = Node::Object()->simParameters;
03106
03107 if (alchFepOn) {
03108 BigReal elecLambda2Up=0.0, elecLambda2Down=0.0;
03109 if(simParams->alchFepWhamOn) {
03110 if(simParams->alchFepElecOn) {
03111 elecLambda2Up = simParams->alchElecLambda;
03112 elecLambda2Down = 1.0 - simParams->alchElecLambda;
03113 }
03114 else {
03115 elecLambda2Up = 0.0;
03116 elecLambda2Down = 1.0;
03117 }
03118 }
03119 else {
03120 elecLambda2Up = (simParams->alchLambda2 <= alchElecLambdaStart)? 0. : \
03121 (simParams->alchLambda2 - alchElecLambdaStart) / (1. - alchElecLambdaStart);
03122 elecLambda2Down = ((1-simParams->alchLambda2) <= alchElecLambdaStart)? 0. : \
03123 ((1-simParams->alchLambda2) - alchElecLambdaStart) / (1. - alchElecLambdaStart);
03124 }
03125
03126 if ( g == 0 ) scale2 = elecLambda2Up;
03127 else if ( g == 1 ) scale2 = elecLambda2Down;
03128 else if ( g == 2 ) scale2 = (elecLambda2Up + elecLambda2Down - 1)*(-1);
03129 if (alchDecouple && g == 2 ) scale2 = 1 - elecLambda2Up;
03130 else if (alchDecouple && g == 3 ) scale2 = 1 - elecLambda2Down;
03131 else if (alchDecouple && g == 4 ) scale2 = (elecLambda2Up + elecLambda2Down - 1)*(-1);
03132 }
03133 if(simParams->alchFepWhamOn && simParams->alchFepElecOn) { // FEP with wham post-process
03134 if( g==0 ) scale2 = scale + 1.0;
03135 else if( g==1 ) scale2 = scale - 1.0;
03136 else if( g==2 ) scale2 = scale - 1.0;
03137 else if( g==3 ) scale2 = scale + 1.0;
03138 }
03139 reduction->item(REDUCTION_ELECT_ENERGY_SLOW_F) += evir[g][0] * scale2;
03140 if (amd_reduction) amd_reduction->item(REDUCTION_ELECT_ENERGY_SLOW_F) += evir[g][0] * scale2;
03141
03142 if (alchThermIntOn) {
03143
03144 // no decoupling:
03145 // part. 1 <-> all of system except partition 2: g[0] - g[2]
03146 // (interactions between all atoms [partition 0 OR partition 1],
03147 // minus all [within partition 0])
03148 // U = elecLambdaUp * (U[0] - U[2])
03149 // dU/dl = U[0] - U[2];
03150
03151 // part. 2 <-> all of system except partition 1: g[1] - g[2]
03152 // (interactions between all atoms [partition 0 OR partition 2],
03153 // minus all [within partition 0])
03154 // U = elecLambdaDown * (U[1] - U[2])
03155 // dU/dl = U[1] - U[2];
03156
03157 // alchDecouple:
03158 // part. 1 <-> part. 0: g[0] - g[2] - g[4]
03159 // (interactions between all atoms [partition 0 OR partition 1]
03160 // minus all [within partition 1] minus all [within partition 0]
03161 // U = elecLambdaUp * (U[0] - U[4]) + (1-elecLambdaUp)* U[2]
03162 // dU/dl = U[0] - U[2] - U[4];
03163
03164 // part. 2 <-> part. 0: g[1] - g[3] - g[4]
03165 // (interactions between all atoms [partition 0 OR partition 2]
03166 // minus all [within partition 2] minus all [within partition 0]
03167 // U = elecLambdaDown * (U[1] - U[4]) + (1-elecLambdaDown)* U[3]
03168 // dU/dl = U[1] - U[3] - U[4];
03169
03170
03171 if ( g == 0 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_1) += evir[g][0];
03172 if ( g == 1 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_2) += evir[g][0];
03173 if (!alchDecouple) {
03174 if ( g == 2 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_1) -= evir[g][0];
03175 if ( g == 2 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_2) -= evir[g][0];
03176 }
03177 else { // alchDecouple
03178 if ( g == 2 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_1) -= evir[g][0];
03179 if ( g == 3 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_2) -= evir[g][0];
03180 if ( g == 4 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_1) -= evir[g][0];
03181 if ( g == 4 ) reduction->item(REDUCTION_ELECT_ENERGY_PME_TI_2) -= evir[g][0];
03182 }
03183 }
03184 }
03185 reduction->item(REDUCTION_STRAY_CHARGE_ERRORS) += strayChargeErrors;
03186 reduction->submit();
03187 if (amd_reduction) amd_reduction->submit();
03188
03189
03190 for ( int i=0; i<heldComputes.size(); ++i ) {
03191 WorkDistrib::messageEnqueueWork(heldComputes[i]);
03192 }
03193 heldComputes.resize(0);
03194 }
|
|
|
Definition at line 2125 of file ComputePme.C. References j, PmeGrid::K3, WorkDistrib::messageEnqueueWork(), PmeGrid::order, and ResizeArray< Elem >::size(). 02125 {
02126 // CkPrintf("ungridCalc on Pe(%d)\n",CkMyPe());
02127
02128 // for (int j=0; j<myGrid.order-1; ++j) {
02129 // fz_arr[myGrid.K3+j] = fz_arr[j];
02130 // }
02131 for (int i=0; i<q_count; ++i) {
02132 for (int j=0; j<myGrid.order-1; ++j) {
02133 q_list[i][myGrid.K3+j] = q_list[i][j];
02134 }
02135 }
02136
02137 ungridForcesCount = pmeComputes.size();
02138
02139 for ( int i=0; i<pmeComputes.size(); ++i ) {
02140 WorkDistrib::messageEnqueueWork(pmeComputes[i]);
02141 // pmeComputes[i]->ungridForces();
02142 }
02143 // submitReductions(); // must follow all ungridForces()
02144
02145 ungrid_count = (usePencils ? numPencilsActive : numDestRecipPes );
02146 }
|
|
|
Definition at line 278 of file ComputePme.C. |
|
|
Definition at line 279 of file ComputePme.C. |
|
|
Definition at line 433 of file ComputePme.C. Referenced by ComputePmeMgr(), initialize(), and ~ComputePmeMgr(). |
1.3.9.1