NAMD
Public Member Functions | List of all members
CudaPmePencilXY Class Reference

#include <CudaPmeSolver.h>

Inheritance diagram for CudaPmePencilXY:

Public Member Functions

CudaPmePencilXY_SDAG_CODE CudaPmePencilXY ()
 
 CudaPmePencilXY (CkMigrateMessage *m)
 
 ~CudaPmePencilXY ()
 
void initialize (CudaPmeXYInitMsg *msg)
 
void initializeDevice (InitDeviceMsg *msg)
 

Detailed Description

Definition at line 99 of file CudaPmeSolver.h.

Constructor & Destructor Documentation

◆ CudaPmePencilXY() [1/2]

CudaPmePencilXY_SDAG_CODE CudaPmePencilXY::CudaPmePencilXY ( )
inline

Definition at line 102 of file CudaPmeSolver.h.

102 : numGetDeviceBuffer(0), eventCreated(false) {}

◆ CudaPmePencilXY() [2/2]

CudaPmePencilXY::CudaPmePencilXY ( CkMigrateMessage *  m)
inline

Definition at line 103 of file CudaPmeSolver.h.

103 : numGetDeviceBuffer(0), eventCreated(false) {}

◆ ~CudaPmePencilXY()

CudaPmePencilXY::~CudaPmePencilXY ( )

Definition at line 80 of file CudaPmeSolver.C.

References cudaCheck.

80  {
81  if (eventCreated) cudaCheck(cudaEventDestroy(event));
82 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:233

Member Function Documentation

◆ initialize()

void CudaPmePencilXY::initialize ( CudaPmeXYInitMsg msg)

Definition at line 70 of file CudaPmeSolver.C.

References CudaPmeXYInitMsg::pmeGrid, CudaPmeXYInitMsg::pmePencilZ, and CudaPmeXYInitMsg::zMap.

70  {
71  pmeGrid = msg->pmeGrid;
72  pmePencilZ = msg->pmePencilZ;
73  zMap = msg->zMap;
74 
75  delete msg;
76 
77  initBlockSizes();
78 }
CProxy_PmePencilXMap zMap
Definition: CudaPmeSolver.h:21
CProxy_CudaPmePencilZ pmePencilZ
Definition: CudaPmeSolver.h:20

◆ initializeDevice()

void CudaPmePencilXY::initializeDevice ( InitDeviceMsg msg)

Definition at line 87 of file CudaPmeSolver.C.

References cudaCheck, InitDeviceMsg::deviceProxy, NUM_GRID_MAX, and Perm_cX_Y_Z.

87  {
88  // Store device proxy
89  deviceProxy = msg->deviceProxy;
90  delete msg;
91  deviceID = deviceProxy.ckLocalBranch()->getDeviceID();
92  stream = deviceProxy.ckLocalBranch()->getStream();
93  CProxy_ComputePmeCUDAMgr mgrProxy = deviceProxy.ckLocalBranch()->getMgrProxy();
94  // Setup fftCompute and pmeKSpaceCompute
95  for (unsigned int iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
96  if (deviceProxy.ckLocalBranch()->isGridEnabled(iGrid) == true) {
97  fftComputes[iGrid] = new CudaFFTCompute(deviceID, stream);
98  pmeTransposes[iGrid] = new CudaPmeTranspose(pmeGrid, Perm_cX_Y_Z, 0, thisIndex.z, deviceID, stream);
99  } else {
100  fftComputes[iGrid] = NULL;
101  pmeTransposes[iGrid] = NULL;
102  }
103  }
104 
105  deviceBuffers.resize(pmeGrid.xBlocks, DeviceBuffer(-1, false));
106  numDeviceBuffers = 0;
107 
108  // Create event. NOTE: Events are tied to devices, hence the cudaSetDevice() here
109  cudaCheck(cudaSetDevice(deviceID));
110  cudaCheck(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
111  eventCreated = true;
112 
113 /*
114  bool useMultiGPUfft = true;
115  bool allDeviceOnSameNode = true;
116  for (int x=0;x < pmeGrid.xBlocks;x++) {
117  int pe = zMap.ckLocalBranch()->procNum(0, CkArrayIndex3D(x,0,0));
118  allDeviceOnSameNode &= (CkNodeOf(pe) == CkMyNode());
119  }
120 
121  if (useMultiGPUfft && allDeviceOnSameNode && pmeGrid.xBlocks > 1) {
122  // WARNING: code may be incomplete here!
123  // CHC: Assuming there are two GPUs on the same node and we use:
124  // PMEGridSpacing 2.0
125  // PMEPencilsX 2
126  // PMEPencilsY 1
127  // PMEPencilsZ 1
128  // and running NAMD with all GPUs and two CPU threads,
129  // this "if" statement is satisfied
130 
131 
132  } else {
133 */
134 
135  for (int x=0;x < pmeGrid.xBlocks;x++) {
136  int pe = zMap.ckLocalBranch()->procNum(0, CkArrayIndex3D(x,0,0));
137  if (CkNodeOf(pe) == CkMyNode()) {
138  // Get device ID on a device on this node
139  int deviceID0 = mgrProxy.ckLocalBranch()->getDeviceIDPencilZ(x, 0);
140  // Check for Peer-to-Peer access
141  int canAccessPeer = 0;
142  if (deviceID != deviceID0) {
143  cudaCheck(cudaSetDevice(deviceID));
144  cudaCheck(cudaDeviceCanAccessPeer(&canAccessPeer, deviceID, deviceID0));
145 #ifdef DISABLE_P2P
146  canAccessPeer = 0;
147 #endif
148  if (canAccessPeer) {
149  unsigned int flags = 0;
150  cudaCheck(cudaDeviceEnablePeerAccess(deviceID0, flags));
151  // fprintf(stderr, "device %d can access device %d\n", deviceID, deviceID0);
152  }
153  }
154  numDeviceBuffers++;
155  // CHC: I have tried to use deviceID instead of deviceID0, but NAMD still crashes.
156  deviceBuffers[x] = DeviceBuffer(deviceID0, canAccessPeer);
157  pmePencilZ(x,0,0).getDeviceBuffer(thisIndex.z, (deviceID0 == deviceID) || canAccessPeer, thisProxy);
158  }
159  }
160 
161  // }
162 
163 }
const unsigned int NUM_GRID_MAX
Definition: PmeSolverUtil.h:9
CProxy_ComputePmeCUDADevice deviceProxy
Definition: CudaPmeSolver.h:44
#define cudaCheck(stmt)
Definition: CudaUtils.h:233

The documentation for this class was generated from the following files: