NAMD
Public Member Functions | List of all members
CudaPmePencilXY Class Reference

#include <CudaPmeSolver.h>

Inheritance diagram for CudaPmePencilXY:

Public Member Functions

CudaPmePencilXY_SDAG_CODE CudaPmePencilXY ()
 
 CudaPmePencilXY (CkMigrateMessage *m)
 
 ~CudaPmePencilXY ()
 
void initialize (CudaPmeXYInitMsg *msg)
 
void initializeDevice (InitDeviceMsg *msg)
 

Detailed Description

Definition at line 84 of file CudaPmeSolver.h.

Constructor & Destructor Documentation

CudaPmePencilXY_SDAG_CODE CudaPmePencilXY::CudaPmePencilXY ( )
inline

Definition at line 87 of file CudaPmeSolver.h.

87 : numGetDeviceBuffer(0), eventCreated(false) {}
CudaPmePencilXY::CudaPmePencilXY ( CkMigrateMessage *  m)
inline

Definition at line 88 of file CudaPmeSolver.h.

88 : numGetDeviceBuffer(0), eventCreated(false) {}
CudaPmePencilXY::~CudaPmePencilXY ( )

Definition at line 66 of file CudaPmeSolver.C.

References cudaCheck.

66  {
67  if (eventCreated) cudaCheck(cudaEventDestroy(event));
68 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:95

Member Function Documentation

void CudaPmePencilXY::initialize ( CudaPmeXYInitMsg msg)

Definition at line 56 of file CudaPmeSolver.C.

References CudaPmeXYInitMsg::pmeGrid, CudaPmeXYInitMsg::pmePencilZ, and CudaPmeXYInitMsg::zMap.

56  {
57  pmeGrid = msg->pmeGrid;
58  pmePencilZ = msg->pmePencilZ;
59  zMap = msg->zMap;
60 
61  delete msg;
62 
63  initBlockSizes();
64 }
CProxy_PmePencilXMap zMap
Definition: CudaPmeSolver.h:21
CProxy_CudaPmePencilZ pmePencilZ
Definition: CudaPmeSolver.h:20
void CudaPmePencilXY::initializeDevice ( InitDeviceMsg msg)

Definition at line 73 of file CudaPmeSolver.C.

References cudaCheck, InitDeviceMsg::deviceProxy, Perm_cX_Y_Z, and x.

73  {
74  // Store device proxy
75  deviceProxy = msg->deviceProxy;
76  delete msg;
77  deviceID = deviceProxy.ckLocalBranch()->getDeviceID();
78  stream = deviceProxy.ckLocalBranch()->getStream();
79  CProxy_ComputePmeCUDAMgr mgrProxy = deviceProxy.ckLocalBranch()->getMgrProxy();
80  // Setup fftCompute and pmeKSpaceCompute
81  fftCompute = new CudaFFTCompute(deviceID, stream);
82  pmeTranspose = new CudaPmeTranspose(pmeGrid, Perm_cX_Y_Z, 0, thisIndex.z, deviceID, stream);
83 
84  deviceBuffers.resize(pmeGrid.xBlocks, DeviceBuffer(-1, false, NULL));
85  numDeviceBuffers = 0;
86 
87  // Create event. NOTE: Events are tied to devices, hence the cudaSetDevice() here
88  cudaCheck(cudaSetDevice(deviceID));
89  cudaCheck(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
90  eventCreated = true;
91 
92 /*
93  bool useMultiGPUfft = true;
94  bool allDeviceOnSameNode = true;
95  for (int x=0;x < pmeGrid.xBlocks;x++) {
96  int pe = zMap.ckLocalBranch()->procNum(0, CkArrayIndex3D(x,0,0));
97  allDeviceOnSameNode &= (CkNodeOf(pe) == CkMyNode());
98  }
99 
100  if (useMultiGPUfft && allDeviceOnSameNode && pmeGrid.xBlocks > 1) {
101 
102 
103 
104  } else {
105 */
106 
107  for (int x=0;x < pmeGrid.xBlocks;x++) {
108  int pe = zMap.ckLocalBranch()->procNum(0, CkArrayIndex3D(x,0,0));
109  if (CkNodeOf(pe) == CkMyNode()) {
110  // Get device ID on a device on this node
111  int deviceID0 = mgrProxy.ckLocalBranch()->getDeviceIDPencilZ(x, 0);
112  // Check for Peer-to-Peer access
113  int canAccessPeer = 0;
114  if (deviceID != deviceID0) {
115  cudaCheck(cudaSetDevice(deviceID));
116  cudaCheck(cudaDeviceCanAccessPeer(&canAccessPeer, deviceID, deviceID0));
117 #ifdef DISABLE_P2P
118  canAccessPeer = 0;
119 #endif
120  if (canAccessPeer) {
121  unsigned int flags = 0;
122  cudaCheck(cudaDeviceEnablePeerAccess(deviceID0, flags));
123  // fprintf(stderr, "device %d can access device %d\n", deviceID, deviceID0);
124  }
125  }
126  numDeviceBuffers++;
127  deviceBuffers[x] = DeviceBuffer(deviceID0, canAccessPeer, NULL);
128  pmePencilZ(x,0,0).getDeviceBuffer(thisIndex.z, (deviceID0 == deviceID) || canAccessPeer, thisProxy);
129  }
130  }
131 
132  // }
133 
134 }
CProxy_ComputePmeCUDADevice deviceProxy
Definition: CudaPmeSolver.h:44
__thread cudaStream_t stream
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
gridSize x

The documentation for this class was generated from the following files: