3 #include "GlobalGPUMgr.decl.h" 17 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 21 validCpu = valid && (tempCpu >= 0) && (tempCpu < CmiNumCores());
24 current = CPU_ALLOC(CmiNumCores());
25 temp = CPU_ALLOC(CmiNumCores());
26 size = CPU_ALLOC_SIZE(CmiNumCores());
28 CPU_ZERO_S(size, current);
29 CPU_ZERO_S(size, temp);
31 pthread_getaffinity_np(pthread_self(), size, current);
33 CPU_SET_S(tempCpu, size, temp);
34 pthread_setaffinity_np(pthread_self(), size, temp);
40 pthread_setaffinity_np(pthread_self(), size, current);
49 if (CkpvAccess(GlobalGPUMgr_instance) == NULL) {
50 CkpvAccess(GlobalGPUMgr_instance) =
this;
52 NAMD_bug(
"GlobalGPUMgr instanced twice on same processor!");
64 peToDeviceIDMap.resize(CkNumPes(), -1);
65 peToDeviceIndexMap.resize(CkNumPes(), -1);
73 for (
int i = 0; i < CkNumPes(); i++) {
74 if (peToDeviceIDMap[i] != -1) {
79 deviceIndexToPeMap.resize(numDevices, 0);
82 int current_device = 0;
83 for (
int i = 0; i < CkNumPes(); i++) {
84 if (peToDeviceIDMap[i] != -1) {
85 deviceIndexToPeMap[current_device] = i;
86 peToDeviceIndexMap[i] = current_device;
91 deviceIndex = peToDeviceIndexMap[CkMyPe()];
106 #include "GlobalGPUMgr.def.h"
std::vector< T > allGather(const T &data, const SynchronousCollectiveScope scope)
__thread DeviceCUDA * deviceCUDA
void NAMD_bug(const char *err_msg)
void initializeBackends()
RunWithAffinity(const bool valid, const int tempCpu)
static SynchronousCollectives * Object()