1 #ifndef SYNCHRONOUS_COLLECTIVES_H 2 #define SYNCHRONOUS_COLLECTIVES_H 12 #include "SynchronousCollectives.decl.h" 18 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 30 public CkMcastBaseMsg,
public CMessage_SynchronousCollectivesMulticastMsg
64 return CkpvAccess(SynchronousCollectives_instance);
67 return CkpvAccessOther(SynchronousCollectives_instance, CmiRankOf(pe));
98 const int numDevices,
const int deviceIndex,
const std::vector<int>&
masterPeList);
117 std::vector<T>
allReduce(std::vector<T>& data, CkReduction::reducerType type,
231 const unsigned int key);
291 void setThread(CthThread thread) { self_awaken_thread_ = thread; }
297 T retrieveTemp(
const unsigned int key);
311 return currentBarrierAll_;
313 return currentBarrierMasterPe_;
315 return currentBarrierSingle_;
319 std::vector<int> masterPeList_;
320 CProxySection_SynchronousCollectives masterPes_;
321 CProxySection_SynchronousCollectives masterPesMulticast_;
322 CkSectionInfo reductionCookie_;
323 CProxy_SynchronousCollectives allPes_;
326 unsigned int tempDataMasterKey_ = 0;
327 unsigned int tempDataAllKey_ = 0;
328 std::map<unsigned int, std::any> tempData_;
329 std::any reductionTemp_;
333 std::vector<int> currentBarrierAll_;
334 std::vector<int> currentBarrierMasterPe_;
335 std::vector<int> currentBarrierSingle_;
339 CthThread self_awaken_thread_;
344 int deviceIndex_ = -1;
347 #if !(defined(__NVCC__) || defined(__HIPCC__)) 354 #define CK_TEMPLATES_ONLY 355 #include "SynchronousCollectives.def.h" 356 #undef CK_TEMPLATES_ONLY
void barrier(const SynchronousCollectiveScope scope)
std::vector< T > allGather(const T &data, const SynchronousCollectiveScope scope)
void handleReductionMaster(CkReductionMsg *msg)
std::vector< T > allReduce(std::vector< T > &data, CkReduction::reducerType type, const SynchronousCollectiveScope scope)
void initMasterScope(const int isMasterPe, const int isMasterDevice, const int numDevices, const int deviceIndex, const std::vector< int > &masterPeList)
int masterPeList[MAX_NUM_DEVICES]
T broadcast(const T &data, const bool isRoot, const SynchronousCollectiveScope scope)
static SynchronousCollectives * ObjectOnPe(const int pe)
void recvBarrierAll(const int PE)
~SynchronousCollectives()
void recvBarrierMasterPe(const int deviceIndex)
void broadcastReductionResult(int n, char *data)
SynchronousCollectiveScope
void recvIndexData(const int index, const T &data, const SynchronousCollectiveScope scope, const unsigned int key)
SynchronousCollectivesMulticastMsg()
void setupMulticastSection(SynchronousCollectivesMulticastMsg *msg)
void handleReductionAll(CkReductionMsg *msg)
std::vector< T > alltoallv(const std::vector< T > &data, const SynchronousCollectiveScope scope)
PUPbytes(cudaIpcMemHandle_t)
static SynchronousCollectives * Object()
void recvBroadcast(const T &data, const unsigned int key)