#include <CudaPmeSolver.h>

Inheritance diagram for CudaPmePencilXY:

Public Member Functions
CudaPmePencilXY_SDAG_CODE	CudaPmePencilXY ()

	CudaPmePencilXY (CkMigrateMessage *m)

	~CudaPmePencilXY ()

void	initialize (CudaPmeXYInitMsg *msg)

void	initializeDevice (InitDeviceMsg *msg)

Detailed Description

Definition at line 99 of file CudaPmeSolver.h.

Constructor & Destructor Documentation

◆ CudaPmePencilXY() [1/2]

CudaPmePencilXY_SDAG_CODE CudaPmePencilXY::CudaPmePencilXY ( )

inline

Definition at line 102 of file CudaPmeSolver.h.

102 : numGetDeviceBuffer(0), eventCreated(false) {}

◆ CudaPmePencilXY() [2/2]

CudaPmePencilXY::CudaPmePencilXY ( CkMigrateMessage * m )

inline

Definition at line 103 of file CudaPmeSolver.h.

103 : numGetDeviceBuffer(0), eventCreated(false) {}

◆ ~CudaPmePencilXY()

CudaPmePencilXY::~CudaPmePencilXY ( )

Definition at line 80 of file CudaPmeSolver.C.

References cudaCheck.

                                   {
   if (eventCreated) cudaCheck(cudaEventDestroy(event));
 }

Member Function Documentation

◆ initialize()

void CudaPmePencilXY::initialize ( CudaPmeXYInitMsg * msg )

Definition at line 70 of file CudaPmeSolver.C.

References CudaPmeXYInitMsg::pmeGrid, CudaPmeXYInitMsg::pmePencilZ, and CudaPmeXYInitMsg::zMap.

                                                       {
   pmeGrid = msg->pmeGrid;
   pmePencilZ = msg->pmePencilZ;
   zMap = msg->zMap;
 
   delete msg;
 
   initBlockSizes();
 }

◆ initializeDevice()

void CudaPmePencilXY::initializeDevice ( InitDeviceMsg * msg )

Definition at line 87 of file CudaPmeSolver.C.

References cudaCheck, InitDeviceMsg::deviceProxy, NUM_GRID_MAX, and Perm_cX_Y_Z.

                                                          {
   // Store device proxy
   deviceProxy = msg->deviceProxy;
   delete msg;
   deviceID = deviceProxy.ckLocalBranch()->getDeviceID();
   stream = deviceProxy.ckLocalBranch()->getStream();
   CProxy_ComputePmeCUDAMgr mgrProxy = deviceProxy.ckLocalBranch()->getMgrProxy();
   // Setup fftCompute and pmeKSpaceCompute
   for (unsigned int iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
     if (deviceProxy.ckLocalBranch()->isGridEnabled(iGrid) == true) {
       fftComputes[iGrid] = new CudaFFTCompute(deviceID, stream);
       pmeTransposes[iGrid] = new CudaPmeTranspose(pmeGrid, Perm_cX_Y_Z, 0, thisIndex.z, deviceID, stream);
     } else {
       fftComputes[iGrid] = NULL;
       pmeTransposes[iGrid] = NULL;
     }
   }
 
   deviceBuffers.resize(pmeGrid.xBlocks, DeviceBuffer(-1, false));
   numDeviceBuffers = 0;
 
   // Create event. NOTE: Events are tied to devices, hence the cudaSetDevice() here
   cudaCheck(cudaSetDevice(deviceID));
   cudaCheck(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
   eventCreated = true;
 
 /*
   bool useMultiGPUfft = true;
   bool allDeviceOnSameNode = true;
   for (int x=0;x < pmeGrid.xBlocks;x++) {
     int pe = zMap.ckLocalBranch()->procNum(0, CkArrayIndex3D(x,0,0));
     allDeviceOnSameNode &= (CkNodeOf(pe) == CkMyNode());
   }
 
   if (useMultiGPUfft && allDeviceOnSameNode && pmeGrid.xBlocks > 1) {
   // WARNING: code may be incomplete here!
   // CHC: Assuming there are two GPUs on the same node and we use:
   //        PMEGridSpacing 2.0
   //        PMEPencilsX 2  
   //        PMEPencilsY 1
   //        PMEPencilsZ 1
   //      and running NAMD with all GPUs and two CPU threads,
   //      this "if" statement is satisfied
 
 
   } else {
 */
 
   for (int x=0;x < pmeGrid.xBlocks;x++) {
     int pe = zMap.ckLocalBranch()->procNum(0, CkArrayIndex3D(x,0,0));
     if (CkNodeOf(pe) == CkMyNode()) {
       // Get device ID on a device on this node
       int deviceID0 = mgrProxy.ckLocalBranch()->getDeviceIDPencilZ(x, 0);
       // Check for Peer-to-Peer access
       int canAccessPeer = 0;
       if (deviceID != deviceID0) {
         cudaCheck(cudaSetDevice(deviceID));
         cudaCheck(cudaDeviceCanAccessPeer(&canAccessPeer, deviceID, deviceID0));
 #ifdef DISABLE_P2P
         canAccessPeer = 0;
 #endif
         if (canAccessPeer) {
           unsigned int flags = 0;
           cudaCheck(cudaDeviceEnablePeerAccess(deviceID0, flags));
           // fprintf(stderr, "device %d can access device %d\n", deviceID, deviceID0);
         }
       }
       numDeviceBuffers++;
       // CHC: I have tried to use deviceID instead of deviceID0, but NAMD still crashes.
       deviceBuffers[x] = DeviceBuffer(deviceID0, canAccessPeer);
       pmePencilZ(x,0,0).getDeviceBuffer(thisIndex.z, (deviceID0 == deviceID) || canAccessPeer, thisProxy);
     }
   }
 
   // }
 
 }

The documentation for this class was generated from the following files:

Public Member Functions