| version 1.228 | version 1.229 |
|---|
| |
| void recvGrid(PmeGridMsg *); | void recvGrid(PmeGridMsg *); |
| void gridCalc1(void); | void gridCalc1(void); |
| void sendTransBarrier(void); | void sendTransBarrier(void); |
| | void sendTransSubset(int first, int last); |
| void sendTrans(void); | void sendTrans(void); |
| void fwdSharedTrans(PmeTransMsg *); | void fwdSharedTrans(PmeTransMsg *); |
| void recvSharedTrans(PmeSharedTransMsg *); | void recvSharedTrans(PmeSharedTransMsg *); |
| |
| void fwdSharedUntrans(PmeUntransMsg *); | void fwdSharedUntrans(PmeUntransMsg *); |
| void recvSharedUntrans(PmeSharedUntransMsg *); | void recvSharedUntrans(PmeSharedUntransMsg *); |
| void sendUntrans(void); | void sendUntrans(void); |
| | void sendUntransSubset(int first, int last); |
| void recvUntrans(PmeUntransMsg *); | void recvUntrans(PmeUntransMsg *); |
| void procUntrans(PmeUntransMsg *); | void procUntrans(PmeUntransMsg *); |
| void gridCalc3(void); | void gridCalc3(void); |
| void sendUngrid(void); | void sendUngrid(void); |
| | void sendUngridSubset(int first, int last); |
| void recvUngrid(PmeGridMsg *); | void recvUngrid(PmeGridMsg *); |
| void recvAck(PmeAckMsg *); | void recvAck(PmeAckMsg *); |
| void copyResults(PmeGridMsg *); | void copyResults(PmeGridMsg *); |
| |
| } | } |
| } | } |
| | |
| | static inline void PmeSlabSendTrans(int first, int last, void *result, int paraNum, void *param) { |
| | ComputePmeMgr *mgr = (ComputePmeMgr *)param; |
| | mgr->sendTransSubset(first, last); |
| | } |
| | |
| void ComputePmeMgr::sendTrans(void) { | void ComputePmeMgr::sendTrans(void) { |
| | |
| | untrans_count = numTransPes; |
| | |
| | #if CMK_SMP && USE_CKLOOP |
| | int useCkLoop = Node::Object()->simParameters->useCkLoop; |
| | if ( useCkLoop >= CKLOOP_CTRL_PME_SENDTRANS && CkNumPes() >= 2 * numGridPes) { |
| | CkLoop_Parallelize(PmeSlabSendTrans, 1, (void *)this, CkMyNodeSize(), 0, numTransNodes-1, 0); // no sync |
| | } else |
| | #endif |
| | { |
| | sendTransSubset(0, numTransNodes-1); |
| | } |
| | |
| | } |
| | |
| | void ComputePmeMgr::sendTransSubset(int first, int last) { |
| // CkPrintf("sendTrans on Pe(%d)\n",CkMyPe()); | // CkPrintf("sendTrans on Pe(%d)\n",CkMyPe()); |
| | |
| // send data for transpose | // send data for transpose |
| |
| CmiNetworkProgressAfter (0); | CmiNetworkProgressAfter (0); |
| #endif | #endif |
| | |
| for (int j=0; j<numTransNodes; j++) { | for (int j=first; j<=last; j++) { |
| int node = transNodeOrder[j]; // different order on each node | int node = transNodeOrder[j]; // different order on each node |
| int pe = transNodeInfo[node].pe_start; | int pe = transNodeInfo[node].pe_start; |
| int npe = transNodeInfo[node].npe; | int npe = transNodeInfo[node].npe; |
| |
| else pmeNodeProxy[transNodeInfo[node].real_node].recvTrans(newmsg); | else pmeNodeProxy[transNodeInfo[node].real_node].recvTrans(newmsg); |
| } else pmeProxy[transPeMap[transNodeInfo[node].pe_start]].recvTrans(newmsg); | } else pmeProxy[transPeMap[transNodeInfo[node].pe_start]].recvTrans(newmsg); |
| } | } |
| | |
| untrans_count = numTransPes; | |
| | |
| } | } |
| | |
| void ComputePmeMgr::fwdSharedTrans(PmeTransMsg *msg) { | void ComputePmeMgr::fwdSharedTrans(PmeTransMsg *msg) { |
| |
| pmeProxyDir[CkMyPe()].sendUntrans(); | pmeProxyDir[CkMyPe()].sendUntrans(); |
| } | } |
| | |
| void ComputePmeMgr::sendUntrans(void) { | static inline void PmeSlabSendUntrans(int first, int last, void *result, int paraNum, void *param) { |
| | ComputePmeMgr *mgr = (ComputePmeMgr *)param; |
| | mgr->sendUntransSubset(first, last); |
| | } |
| | |
| int zdim = myGrid.dim3; | void ComputePmeMgr::sendUntrans(void) { |
| int y_start = localInfo[myTransPe].y_start_after_transpose; | |
| int ny = localInfo[myTransPe].ny_after_transpose; | |
| int slicelen = myGrid.K2 * zdim; | |
| | |
| ComputePmeMgr **mgrObjects = pmeNodeProxy.ckLocalBranch()->mgrObjects; | trans_count = numGridPes; |
| | |
| { // send energy and virial | { // send energy and virial |
| PmeEvirMsg *newmsg = new (numGrids, PRIORITY_SIZE) PmeEvirMsg; | PmeEvirMsg *newmsg = new (numGrids, PRIORITY_SIZE) PmeEvirMsg; |
| |
| CmiEnableUrgentSend(0); | CmiEnableUrgentSend(0); |
| } | } |
| | |
| | #if CMK_SMP && USE_CKLOOP |
| | int useCkLoop = Node::Object()->simParameters->useCkLoop; |
| | if ( useCkLoop >= CKLOOP_CTRL_PME_SENDUNTRANS && CkNumPes() >= 2 * numTransPes) { |
| | CkLoop_Parallelize(PmeSlabSendUntrans, 1, (void *)this, CkMyNodeSize(), 0, numGridNodes-1, 0); // no sync |
| | } else |
| | #endif |
| | { |
| | sendUntransSubset(0, numGridNodes-1); |
| | } |
| | |
| | } |
| | |
| | void ComputePmeMgr::sendUntransSubset(int first, int last) { |
| | |
| | int zdim = myGrid.dim3; |
| | int y_start = localInfo[myTransPe].y_start_after_transpose; |
| | int ny = localInfo[myTransPe].ny_after_transpose; |
| | int slicelen = myGrid.K2 * zdim; |
| | |
| | ComputePmeMgr **mgrObjects = pmeNodeProxy.ckLocalBranch()->mgrObjects; |
| | |
| #if CMK_BLUEGENEL | #if CMK_BLUEGENEL |
| CmiNetworkProgressAfter (0); | CmiNetworkProgressAfter (0); |
| #endif | #endif |
| | |
| // send data for reverse transpose | // send data for reverse transpose |
| for (int j=0; j<numGridNodes; j++) { | for (int j=first; j<=last; j++) { |
| int node = gridNodeOrder[j]; // different order on each node | int node = gridNodeOrder[j]; // different order on each node |
| int pe = gridNodeInfo[node].pe_start; | int pe = gridNodeInfo[node].pe_start; |
| int npe = gridNodeInfo[node].npe; | int npe = gridNodeInfo[node].npe; |
| |
| else pmeNodeProxy[gridNodeInfo[node].real_node].recvUntrans(newmsg); | else pmeNodeProxy[gridNodeInfo[node].real_node].recvUntrans(newmsg); |
| } else pmeProxy[gridPeMap[gridNodeInfo[node].pe_start]].recvUntrans(newmsg); | } else pmeProxy[gridPeMap[gridNodeInfo[node].pe_start]].recvUntrans(newmsg); |
| } | } |
| | |
| trans_count = numGridPes; | |
| } | } |
| | |
| void ComputePmeMgr::fwdSharedUntrans(PmeUntransMsg *msg) { | void ComputePmeMgr::fwdSharedUntrans(PmeUntransMsg *msg) { |
| |
| pmeProxyDir[CkMyPe()].sendUngrid(); | pmeProxyDir[CkMyPe()].sendUngrid(); |
| } | } |
| | |
| | static inline void PmeSlabSendUngrid(int first, int last, void *result, int paraNum, void *param) { |
| | ComputePmeMgr *mgr = (ComputePmeMgr *)param; |
| | mgr->sendUngridSubset(first, last); |
| | } |
| | |
| void ComputePmeMgr::sendUngrid(void) { | void ComputePmeMgr::sendUngrid(void) { |
| | |
| | #if CMK_SMP && USE_CKLOOP |
| | int useCkLoop = Node::Object()->simParameters->useCkLoop; |
| | if ( useCkLoop >= CKLOOP_CTRL_PME_SENDUNTRANS && CkNumPes() >= 2 * numGridPes) { |
| | CkLoop_Parallelize(PmeSlabSendUngrid, 1, (void *)this, CkMyNodeSize(), 0, numSources-1, 1); // sync |
| | } else |
| | #endif |
| | { |
| | sendUngridSubset(0, numSources-1); |
| | } |
| | |
| | grid_count = numSources; |
| | memset( (void*) qgrid, 0, qgrid_size * numGrids * sizeof(float) ); |
| | } |
| | |
| | void ComputePmeMgr::sendUngridSubset(int first, int last) { |
| | |
| #ifdef NAMD_CUDA | #ifdef NAMD_CUDA |
| const int UNGRID_PRIORITY = ( offload ? PME_OFFLOAD_UNGRID_PRIORITY : PME_UNGRID_PRIORITY ); | const int UNGRID_PRIORITY = ( offload ? PME_OFFLOAD_UNGRID_PRIORITY : PME_UNGRID_PRIORITY ); |
| #else | #else |
| const int UNGRID_PRIORITY = PME_UNGRID_PRIORITY ; | const int UNGRID_PRIORITY = PME_UNGRID_PRIORITY ; |
| #endif | #endif |
| | |
| for ( int j=0; j<numSources; ++j ) { | for ( int j=first; j<=last; ++j ) { |
| // int msglen = qgrid_len; | // int msglen = qgrid_len; |
| PmeGridMsg *newmsg = gridmsg_reuse[j]; | PmeGridMsg *newmsg = gridmsg_reuse[j]; |
| int pe = newmsg->sourceNode; | int pe = newmsg->sourceNode; |
| |
| pmeProxyDir[pe].recvUngrid(newmsg); | pmeProxyDir[pe].recvUngrid(newmsg); |
| CmiEnableUrgentSend(0); | CmiEnableUrgentSend(0); |
| } | } |
| grid_count = numSources; | |
| memset( (void*) qgrid, 0, qgrid_size * numGrids * sizeof(float) ); | |
| } | } |
| | |
| void ComputePmeMgr::recvUngrid(PmeGridMsg *msg) { | void ComputePmeMgr::recvUngrid(PmeGridMsg *msg) { |