1 #ifndef SYNCHRONOUS_COLLECTIVES_H     2 #define SYNCHRONOUS_COLLECTIVES_H    12 #include "SynchronousCollectives.decl.h"    18 #if defined(NAMD_CUDA) || defined(NAMD_HIP)    30   public CkMcastBaseMsg, 
public CMessage_SynchronousCollectivesMulticastMsg
    64     return CkpvAccess(SynchronousCollectives_instance); 
    67     return CkpvAccessOther(SynchronousCollectives_instance, CmiRankOf(pe));
    98     const int numDevices, 
const int deviceIndex, 
const std::vector<int>& 
masterPeList);
   117   std::vector<T> 
allReduce(std::vector<T>& data, CkReduction::reducerType type, 
   231     const unsigned int key);
   291   void setThread(CthThread thread) { self_awaken_thread_ = thread; }
   297   T retrieveTemp(
const unsigned int key);
   311       return currentBarrierAll_;
   313       return currentBarrierMasterPe_;
   315       return currentBarrierSingle_;
   319   std::vector<int> masterPeList_;
   320   CProxySection_SynchronousCollectives masterPes_;
   321   CProxySection_SynchronousCollectives masterPesMulticast_;
   322   CkSectionInfo reductionCookie_;
   323   CProxy_SynchronousCollectives allPes_;
   326   unsigned int tempDataMasterKey_ = 0;
   327   unsigned int tempDataAllKey_ = 0;
   328   std::map<unsigned int, std::any> tempData_;
   329   std::any reductionTemp_;
   333   std::vector<int> currentBarrierAll_;
   334   std::vector<int> currentBarrierMasterPe_;
   335   std::vector<int> currentBarrierSingle_;
   339   CthThread self_awaken_thread_;
   344   int deviceIndex_ = -1;
   347 #if !(defined(__NVCC__) || defined(__HIPCC__))   354 #define CK_TEMPLATES_ONLY   355 #include "SynchronousCollectives.def.h"   356 #undef CK_TEMPLATES_ONLY 
void barrier(const SynchronousCollectiveScope scope)
std::vector< T > allGather(const T &data, const SynchronousCollectiveScope scope)
void handleReductionMaster(CkReductionMsg *msg)
std::vector< T > allReduce(std::vector< T > &data, CkReduction::reducerType type, const SynchronousCollectiveScope scope)
void initMasterScope(const int isMasterPe, const int isMasterDevice, const int numDevices, const int deviceIndex, const std::vector< int > &masterPeList)
int masterPeList[MAX_NUM_DEVICES]
T broadcast(const T &data, const bool isRoot, const SynchronousCollectiveScope scope)
static SynchronousCollectives * ObjectOnPe(const int pe)
void recvBarrierAll(const int PE)
~SynchronousCollectives()
void recvBarrierMasterPe(const int deviceIndex)
void broadcastReductionResult(int n, char *data)
SynchronousCollectiveScope
void recvIndexData(const int index, const T &data, const SynchronousCollectiveScope scope, const unsigned int key)
SynchronousCollectivesMulticastMsg()
void setupMulticastSection(SynchronousCollectivesMulticastMsg *msg)
void handleReductionAll(CkReductionMsg *msg)
std::vector< T > alltoallv(const std::vector< T > &data, const SynchronousCollectiveScope scope)
PUPbytes(cudaIpcMemHandle_t)
static SynchronousCollectives * Object()
void recvBroadcast(const T &data, const unsigned int key)