version 1.228 | version 1.229 |
---|
| |
void recvGrid(PmeGridMsg *); | void recvGrid(PmeGridMsg *); |
void gridCalc1(void); | void gridCalc1(void); |
void sendTransBarrier(void); | void sendTransBarrier(void); |
| void sendTransSubset(int first, int last); |
void sendTrans(void); | void sendTrans(void); |
void fwdSharedTrans(PmeTransMsg *); | void fwdSharedTrans(PmeTransMsg *); |
void recvSharedTrans(PmeSharedTransMsg *); | void recvSharedTrans(PmeSharedTransMsg *); |
| |
void fwdSharedUntrans(PmeUntransMsg *); | void fwdSharedUntrans(PmeUntransMsg *); |
void recvSharedUntrans(PmeSharedUntransMsg *); | void recvSharedUntrans(PmeSharedUntransMsg *); |
void sendUntrans(void); | void sendUntrans(void); |
| void sendUntransSubset(int first, int last); |
void recvUntrans(PmeUntransMsg *); | void recvUntrans(PmeUntransMsg *); |
void procUntrans(PmeUntransMsg *); | void procUntrans(PmeUntransMsg *); |
void gridCalc3(void); | void gridCalc3(void); |
void sendUngrid(void); | void sendUngrid(void); |
| void sendUngridSubset(int first, int last); |
void recvUngrid(PmeGridMsg *); | void recvUngrid(PmeGridMsg *); |
void recvAck(PmeAckMsg *); | void recvAck(PmeAckMsg *); |
void copyResults(PmeGridMsg *); | void copyResults(PmeGridMsg *); |
| |
} | } |
} | } |
| |
| static inline void PmeSlabSendTrans(int first, int last, void *result, int paraNum, void *param) { |
| ComputePmeMgr *mgr = (ComputePmeMgr *)param; |
| mgr->sendTransSubset(first, last); |
| } |
| |
void ComputePmeMgr::sendTrans(void) { | void ComputePmeMgr::sendTrans(void) { |
| |
| untrans_count = numTransPes; |
| |
| #if CMK_SMP && USE_CKLOOP |
| int useCkLoop = Node::Object()->simParameters->useCkLoop; |
| if ( useCkLoop >= CKLOOP_CTRL_PME_SENDTRANS && CkNumPes() >= 2 * numGridPes) { |
| CkLoop_Parallelize(PmeSlabSendTrans, 1, (void *)this, CkMyNodeSize(), 0, numTransNodes-1, 0); // no sync |
| } else |
| #endif |
| { |
| sendTransSubset(0, numTransNodes-1); |
| } |
| |
| } |
| |
| void ComputePmeMgr::sendTransSubset(int first, int last) { |
// CkPrintf("sendTrans on Pe(%d)\n",CkMyPe()); | // CkPrintf("sendTrans on Pe(%d)\n",CkMyPe()); |
| |
// send data for transpose | // send data for transpose |
| |
CmiNetworkProgressAfter (0); | CmiNetworkProgressAfter (0); |
#endif | #endif |
| |
for (int j=0; j<numTransNodes; j++) { | for (int j=first; j<=last; j++) { |
int node = transNodeOrder[j]; // different order on each node | int node = transNodeOrder[j]; // different order on each node |
int pe = transNodeInfo[node].pe_start; | int pe = transNodeInfo[node].pe_start; |
int npe = transNodeInfo[node].npe; | int npe = transNodeInfo[node].npe; |
| |
else pmeNodeProxy[transNodeInfo[node].real_node].recvTrans(newmsg); | else pmeNodeProxy[transNodeInfo[node].real_node].recvTrans(newmsg); |
} else pmeProxy[transPeMap[transNodeInfo[node].pe_start]].recvTrans(newmsg); | } else pmeProxy[transPeMap[transNodeInfo[node].pe_start]].recvTrans(newmsg); |
} | } |
| |
untrans_count = numTransPes; | |
| |
} | } |
| |
void ComputePmeMgr::fwdSharedTrans(PmeTransMsg *msg) { | void ComputePmeMgr::fwdSharedTrans(PmeTransMsg *msg) { |
| |
pmeProxyDir[CkMyPe()].sendUntrans(); | pmeProxyDir[CkMyPe()].sendUntrans(); |
} | } |
| |
void ComputePmeMgr::sendUntrans(void) { | static inline void PmeSlabSendUntrans(int first, int last, void *result, int paraNum, void *param) { |
| ComputePmeMgr *mgr = (ComputePmeMgr *)param; |
| mgr->sendUntransSubset(first, last); |
| } |
| |
int zdim = myGrid.dim3; | void ComputePmeMgr::sendUntrans(void) { |
int y_start = localInfo[myTransPe].y_start_after_transpose; | |
int ny = localInfo[myTransPe].ny_after_transpose; | |
int slicelen = myGrid.K2 * zdim; | |
| |
ComputePmeMgr **mgrObjects = pmeNodeProxy.ckLocalBranch()->mgrObjects; | trans_count = numGridPes; |
| |
{ // send energy and virial | { // send energy and virial |
PmeEvirMsg *newmsg = new (numGrids, PRIORITY_SIZE) PmeEvirMsg; | PmeEvirMsg *newmsg = new (numGrids, PRIORITY_SIZE) PmeEvirMsg; |
| |
CmiEnableUrgentSend(0); | CmiEnableUrgentSend(0); |
} | } |
| |
| #if CMK_SMP && USE_CKLOOP |
| int useCkLoop = Node::Object()->simParameters->useCkLoop; |
| if ( useCkLoop >= CKLOOP_CTRL_PME_SENDUNTRANS && CkNumPes() >= 2 * numTransPes) { |
| CkLoop_Parallelize(PmeSlabSendUntrans, 1, (void *)this, CkMyNodeSize(), 0, numGridNodes-1, 0); // no sync |
| } else |
| #endif |
| { |
| sendUntransSubset(0, numGridNodes-1); |
| } |
| |
| } |
| |
| void ComputePmeMgr::sendUntransSubset(int first, int last) { |
| |
| int zdim = myGrid.dim3; |
| int y_start = localInfo[myTransPe].y_start_after_transpose; |
| int ny = localInfo[myTransPe].ny_after_transpose; |
| int slicelen = myGrid.K2 * zdim; |
| |
| ComputePmeMgr **mgrObjects = pmeNodeProxy.ckLocalBranch()->mgrObjects; |
| |
#if CMK_BLUEGENEL | #if CMK_BLUEGENEL |
CmiNetworkProgressAfter (0); | CmiNetworkProgressAfter (0); |
#endif | #endif |
| |
// send data for reverse transpose | // send data for reverse transpose |
for (int j=0; j<numGridNodes; j++) { | for (int j=first; j<=last; j++) { |
int node = gridNodeOrder[j]; // different order on each node | int node = gridNodeOrder[j]; // different order on each node |
int pe = gridNodeInfo[node].pe_start; | int pe = gridNodeInfo[node].pe_start; |
int npe = gridNodeInfo[node].npe; | int npe = gridNodeInfo[node].npe; |
| |
else pmeNodeProxy[gridNodeInfo[node].real_node].recvUntrans(newmsg); | else pmeNodeProxy[gridNodeInfo[node].real_node].recvUntrans(newmsg); |
} else pmeProxy[gridPeMap[gridNodeInfo[node].pe_start]].recvUntrans(newmsg); | } else pmeProxy[gridPeMap[gridNodeInfo[node].pe_start]].recvUntrans(newmsg); |
} | } |
| |
trans_count = numGridPes; | |
} | } |
| |
void ComputePmeMgr::fwdSharedUntrans(PmeUntransMsg *msg) { | void ComputePmeMgr::fwdSharedUntrans(PmeUntransMsg *msg) { |
| |
pmeProxyDir[CkMyPe()].sendUngrid(); | pmeProxyDir[CkMyPe()].sendUngrid(); |
} | } |
| |
| static inline void PmeSlabSendUngrid(int first, int last, void *result, int paraNum, void *param) { |
| ComputePmeMgr *mgr = (ComputePmeMgr *)param; |
| mgr->sendUngridSubset(first, last); |
| } |
| |
void ComputePmeMgr::sendUngrid(void) { | void ComputePmeMgr::sendUngrid(void) { |
| |
| #if CMK_SMP && USE_CKLOOP |
| int useCkLoop = Node::Object()->simParameters->useCkLoop; |
| if ( useCkLoop >= CKLOOP_CTRL_PME_SENDUNTRANS && CkNumPes() >= 2 * numGridPes) { |
| CkLoop_Parallelize(PmeSlabSendUngrid, 1, (void *)this, CkMyNodeSize(), 0, numSources-1, 1); // sync |
| } else |
| #endif |
| { |
| sendUngridSubset(0, numSources-1); |
| } |
| |
| grid_count = numSources; |
| memset( (void*) qgrid, 0, qgrid_size * numGrids * sizeof(float) ); |
| } |
| |
| void ComputePmeMgr::sendUngridSubset(int first, int last) { |
| |
#ifdef NAMD_CUDA | #ifdef NAMD_CUDA |
const int UNGRID_PRIORITY = ( offload ? PME_OFFLOAD_UNGRID_PRIORITY : PME_UNGRID_PRIORITY ); | const int UNGRID_PRIORITY = ( offload ? PME_OFFLOAD_UNGRID_PRIORITY : PME_UNGRID_PRIORITY ); |
#else | #else |
const int UNGRID_PRIORITY = PME_UNGRID_PRIORITY ; | const int UNGRID_PRIORITY = PME_UNGRID_PRIORITY ; |
#endif | #endif |
| |
for ( int j=0; j<numSources; ++j ) { | for ( int j=first; j<=last; ++j ) { |
// int msglen = qgrid_len; | // int msglen = qgrid_len; |
PmeGridMsg *newmsg = gridmsg_reuse[j]; | PmeGridMsg *newmsg = gridmsg_reuse[j]; |
int pe = newmsg->sourceNode; | int pe = newmsg->sourceNode; |
| |
pmeProxyDir[pe].recvUngrid(newmsg); | pmeProxyDir[pe].recvUngrid(newmsg); |
CmiEnableUrgentSend(0); | CmiEnableUrgentSend(0); |
} | } |
grid_count = numSources; | |
memset( (void*) qgrid, 0, qgrid_size * numGrids * sizeof(float) ); | |
} | } |
| |
void ComputePmeMgr::recvUngrid(PmeGridMsg *msg) { | void ComputePmeMgr::recvUngrid(PmeGridMsg *msg) { |