NAMD
Classes | Public Member Functions | Static Public Member Functions | Public Attributes | List of all members
ComputePmeCUDAMgr Class Reference

#include <ComputePmeCUDAMgr.h>

Inheritance diagram for ComputePmeCUDAMgr:

Public Member Functions

 ComputePmeCUDAMgr ()
 
 ComputePmeCUDAMgr (CkMigrateMessage *)
 
 ~ComputePmeCUDAMgr ()
 
void setupPencils ()
 
void initialize (CkQdMsg *msg)
 
void initialize_pencils (CkQdMsg *msg)
 
void activate_pencils (CkQdMsg *msg)
 
PmeGrid getPmeGrid ()
 
int getNode (int i, int j)
 
int getDevice (int i, int j)
 
int getDevicePencilY (int i, int j)
 
int getDevicePencilZ (int i, int j)
 
int getDeviceIDPencilX (int i, int j)
 
int getDeviceIDPencilY (int i, int j)
 
int getDeviceIDPencilZ (int i, int j)
 
void recvPencils (CProxy_CudaPmePencilXYZ xyz)
 
void recvPencils (CProxy_CudaPmePencilXY xy, CProxy_CudaPmePencilZ z)
 
void recvPencils (CProxy_CudaPmePencilX x, CProxy_CudaPmePencilY y, CProxy_CudaPmePencilZ z)
 
void createDevicesAndAtomFiler ()
 
void recvDevices (RecvDeviceMsg *msg)
 
void recvAtomFiler (CProxy_PmeAtomFiler filer)
 
void skip ()
 
void recvAtoms (PmeAtomMsg *msg)
 
void getHomePencil (PatchID patchID, int &homey, int &homez)
 
int getHomeNode (PatchID patchID)
 
bool isPmePe (int pe)
 
bool isPmeNode (int node)
 
bool isPmeDevice (int deviceID)
 

Static Public Member Functions

static ComputePmeCUDAMgrObject ()
 

Public Attributes

 ComputePmeCUDAMgr_SDAG_CODE
 

Detailed Description

Definition at line 579 of file ComputePmeCUDAMgr.h.

Constructor & Destructor Documentation

◆ ComputePmeCUDAMgr() [1/2]

ComputePmeCUDAMgr::ComputePmeCUDAMgr ( )

Definition at line 221 of file ComputePmeCUDAMgr.C.

221  {
222  __sdag_init();
223  numDevices = 0;
224  numTotalPatches = 0;
225  numNodesContributed = 0;
226  numDevicesMax = 0;
227 }

◆ ComputePmeCUDAMgr() [2/2]

ComputePmeCUDAMgr::ComputePmeCUDAMgr ( CkMigrateMessage *  )

Definition at line 232 of file ComputePmeCUDAMgr.C.

References NAMD_bug().

232  {
233  __sdag_init();
234  NAMD_bug("ComputePmeCUDAMgr cannot be migrated");
235  numDevices = 0;
236  numTotalPatches = 0;
237  numNodesContributed = 0;
238  numDevicesMax = 0;
239 }
void NAMD_bug(const char *err_msg)
Definition: common.C:195

◆ ~ComputePmeCUDAMgr()

ComputePmeCUDAMgr::~ComputePmeCUDAMgr ( )

Definition at line 244 of file ComputePmeCUDAMgr.C.

References cudaCheck.

244  {
245  for (int i=0;i < extraDevices.size();i++) {
246  cudaCheck(cudaSetDevice(extraDevices[i].deviceID));
247  cudaCheck(cudaStreamDestroy(extraDevices[i].stream));
248  }
249 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:233

Member Function Documentation

◆ activate_pencils()

void ComputePmeCUDAMgr::activate_pencils ( CkQdMsg *  msg)

Definition at line 837 of file ComputePmeCUDAMgr.C.

References PmeStartMsg::dataGrid, PmeStartMsg::dataSizes, PmeStartMsg::enabledGrid, and NUM_GRID_MAX.

837  {
838  if (msg != NULL) delete msg;
839 
840  for (int device=0;device < numDevices;device++) {
841  deviceProxy[device].ckLocalBranch()->activate_pencils();
842  }
843 
844  for (int i=0;i < ijPencilY.size();i++) {
845  PmeStartMsg* pmeStartYMsg = new PmeStartMsg();
846  for (unsigned iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
847  pmeStartYMsg->dataGrid[iGrid] = NULL;
848  pmeStartYMsg->dataSizes[iGrid] = 0;
849  pmeStartYMsg->enabledGrid[iGrid] = deviceProxy[0].ckLocalBranch()->isGridEnabled(iGrid);
850  }
851  pmePencilY(ijPencilY[i].i, 0, ijPencilY[i].j).start(pmeStartYMsg);
852  }
853 
854  for (int i=0;i < ijPencilZ.size();i++) {
855  PmeStartMsg* pmeStartZMsg = new PmeStartMsg();
856  for (unsigned iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
857  pmeStartZMsg->dataGrid[iGrid] = NULL;
858  pmeStartZMsg->dataSizes[iGrid] = 0;
859  pmeStartZMsg->enabledGrid[iGrid] = deviceProxy[0].ckLocalBranch()->isGridEnabled(iGrid);
860  }
861  pmePencilZ(ijPencilZ[i].i, ijPencilZ[i].j, 0).start(pmeStartZMsg);
862  }
863 
864 }
const unsigned int NUM_GRID_MAX
Definition: PmeSolverUtil.h:9
std::array< int, NUM_GRID_MAX > dataSizes
Definition: PmeSolver.h:106
std::array< float *, NUM_GRID_MAX > dataGrid
Definition: PmeSolver.h:105
std::array< bool, NUM_GRID_MAX > enabledGrid
Definition: PmeSolver.h:107

◆ createDevicesAndAtomFiler()

void ComputePmeCUDAMgr::createDevicesAndAtomFiler ( )

Definition at line 705 of file ComputePmeCUDAMgr.C.

References RecvDeviceMsg::dev, NAMD_bug(), RecvDeviceMsg::numDevicesMax, and PRIORITY_SIZE.

705  {
706  if (CkMyNode() != 0)
707  NAMD_bug("ComputePmeCUDAMgr::createDevicesAndAtomFiler can only be called on root node");
708 
709  // Root node creates all device proxies
710  // NOTE: Only root node has numDevicesMax
711  RecvDeviceMsg* msg = new (numDevicesMax, PRIORITY_SIZE) RecvDeviceMsg();
712  msg->numDevicesMax = numDevicesMax;
713  for (int i=0;i < numDevicesMax;i++) {
714  CProxy_ComputePmeCUDADevice dev = CProxy_ComputePmeCUDADevice::ckNew();
715  memcpy(&msg->dev[i], &dev, sizeof(CProxy_ComputePmeCUDADevice));
716  }
717  thisProxy.recvDevices(msg);
718 
719  CProxy_PmeAtomFiler filer = CProxy_PmeAtomFiler::ckNew();
720  thisProxy.recvAtomFiler(filer);
721 
722 }
#define PRIORITY_SIZE
Definition: Priorities.h:13
void NAMD_bug(const char *err_msg)
Definition: common.C:195
CProxy_ComputePmeCUDADevice * dev

◆ getDevice()

int ComputePmeCUDAMgr::getDevice ( int  i,
int  j 
)

Definition at line 888 of file ComputePmeCUDAMgr.C.

References NAMD_bug(), PmeGrid::yBlocks, and PmeGrid::zBlocks.

Referenced by getDeviceIDPencilX(), and recvAtoms().

888  {
889  if (i < 0 || i >= pmeGrid.yBlocks || j < 0 || j >= pmeGrid.zBlocks)
890  NAMD_bug("ComputePmeCUDAMgr::getDevice, pencil index out of bounds");
891  int ind = i + j*pmeGrid.yBlocks;
892  int device = nodeDeviceList[ind].device;
893  if (device == -1)
894  NAMD_bug("ComputePmeCUDAMgr::getDevice, no device found");
895  return device;
896 }
int zBlocks
Definition: PmeBase.h:25
int yBlocks
Definition: PmeBase.h:25
void NAMD_bug(const char *err_msg)
Definition: common.C:195

◆ getDeviceIDPencilX()

int ComputePmeCUDAMgr::getDeviceIDPencilX ( int  i,
int  j 
)

Definition at line 929 of file ComputePmeCUDAMgr.C.

References getDevice().

929  {
930  int device = getDevice(i, j);
931  return deviceProxy[device].ckLocalBranch()->getDeviceID();
932 }
int getDevice(int i, int j)

◆ getDeviceIDPencilY()

int ComputePmeCUDAMgr::getDeviceIDPencilY ( int  i,
int  j 
)

Definition at line 937 of file ComputePmeCUDAMgr.C.

References getDevicePencilY().

937  {
938  int device = getDevicePencilY(i, j);
939  return deviceProxy[device].ckLocalBranch()->getDeviceID();
940 }
int getDevicePencilY(int i, int j)

◆ getDeviceIDPencilZ()

int ComputePmeCUDAMgr::getDeviceIDPencilZ ( int  i,
int  j 
)

Definition at line 945 of file ComputePmeCUDAMgr.C.

References getDevicePencilZ().

945  {
946  int device = getDevicePencilZ(i, j);
947  return deviceProxy[device].ckLocalBranch()->getDeviceID();
948 }
int getDevicePencilZ(int i, int j)

◆ getDevicePencilY()

int ComputePmeCUDAMgr::getDevicePencilY ( int  i,
int  j 
)

Definition at line 901 of file ComputePmeCUDAMgr.C.

References NAMD_bug(), PmeGrid::xBlocks, and PmeGrid::zBlocks.

Referenced by getDeviceIDPencilY().

901  {
902  if (i < 0 || i >= pmeGrid.xBlocks || j < 0 || j >= pmeGrid.zBlocks)
903  NAMD_bug("ComputePmeCUDAMgr::getDevicePencilY, pencil index out of bounds");
904  for (int device=0;device < ijPencilY.size();device++) {
905  if (ijPencilY[device].i == i && ijPencilY[device].j == j) return device;
906  }
907  char str[256];
908  sprintf(str, "ComputePmeCUDAMgr::getDevicePencilY, no device found at i %d j %d",i,j);
909  NAMD_bug(str);
910  return -1;
911 }
int zBlocks
Definition: PmeBase.h:25
void NAMD_bug(const char *err_msg)
Definition: common.C:195
int xBlocks
Definition: PmeBase.h:25

◆ getDevicePencilZ()

int ComputePmeCUDAMgr::getDevicePencilZ ( int  i,
int  j 
)

Definition at line 916 of file ComputePmeCUDAMgr.C.

References NAMD_bug(), PmeGrid::xBlocks, and PmeGrid::yBlocks.

Referenced by getDeviceIDPencilZ().

916  {
917  if (i < 0 || i >= pmeGrid.xBlocks || j < 0 || j >= pmeGrid.yBlocks)
918  NAMD_bug("ComputePmeCUDAMgr::getDevicePencilZ, pencil index out of bounds");
919  for (int device=0;device < ijPencilZ.size();device++) {
920  if (ijPencilZ[device].i == i && ijPencilZ[device].j == j) return device;
921  }
922  NAMD_bug("ComputePmeCUDAMgr::getDevicePencilZ, no device found");
923  return -1;
924 }
int yBlocks
Definition: PmeBase.h:25
void NAMD_bug(const char *err_msg)
Definition: common.C:195
int xBlocks
Definition: PmeBase.h:25

◆ getHomeNode()

int ComputePmeCUDAMgr::getHomeNode ( PatchID  patchID)

Definition at line 879 of file ComputePmeCUDAMgr.C.

References getHomePencil(), and getNode().

879  {
880  int homey, homez;
881  getHomePencil(patchID, homey, homez);
882  return getNode(homey, homez);
883 }
void getHomePencil(PatchID patchID, int &homey, int &homez)
int getNode(int i, int j)

◆ getHomePencil()

void ComputePmeCUDAMgr::getHomePencil ( PatchID  patchID,
int &  homey,
int &  homez 
)

Definition at line 255 of file ComputePmeCUDAMgr.C.

References getPencilDim(), PmeGrid::K2, PmeGrid::K3, PatchMap::max_b(), PatchMap::max_c(), PatchMap::min_b(), PatchMap::min_c(), NAMD_bug(), PatchMap::Object(), PmeGrid::order, Perm_X_Y_Z, PmeGrid::yBlocks, and PmeGrid::zBlocks.

Referenced by getHomeNode().

255  {
256  PatchMap *patchMap = PatchMap::Object();
257 
258  BigReal miny = patchMap->min_b(patchID);
259  BigReal maxy = patchMap->max_b(patchID);
260 
261  BigReal minz = patchMap->min_c(patchID);
262  BigReal maxz = patchMap->max_c(patchID);
263 
264  // Determine home pencil = pencil with most overlap
265 
266  // Calculate patch grid coordinates
267  int patch_y0 = floor((miny+0.5)*pmeGrid.K2);
268  int patch_y1 = floor((maxy+0.5)*pmeGrid.K2)-1;
269  int patch_z0 = floor((minz+0.5)*pmeGrid.K3);
270  int patch_z1 = floor((maxz+0.5)*pmeGrid.K3)-1;
271 
272  if (patch_y0 < 0 || patch_y1 >= pmeGrid.K2 || patch_z0 < 0 || patch_z1 >= pmeGrid.K3) {
273  NAMD_bug("ComputePmeCUDAMgr::getHomePencil, patch bounds are outside grid bounds");
274  }
275 
276  int maxOverlap = 0;
277  homey = -1;
278  homez = -1;
279  for (int iz=0;iz < pmeGrid.zBlocks;iz++) {
280  for (int iy=0;iy < pmeGrid.yBlocks;iy++) {
281  int pencil_x0, pencil_x1, pencil_y0, pencil_y1, pencil_z0, pencil_z1;
282  getPencilDim(pmeGrid, Perm_X_Y_Z, iy, iz,
283  pencil_x0, pencil_x1, pencil_y0, pencil_y1, pencil_z0, pencil_z1);
284 
285  if (pencil_y1 - pencil_y0 < pmeGrid.order || pencil_z1 - pencil_z0 < pmeGrid.order)
286  NAMD_bug("ComputePmeCUDAMgr::getHomePencil, pencil size must be >= PMEInterpOrder");
287 
288  int y0 = std::max(patch_y0, pencil_y0);
289  int y1 = std::min(patch_y1, pencil_y1);
290  int z0 = std::max(patch_z0, pencil_z0);
291  int z1 = std::min(patch_z1, pencil_z1);
292 
293  int overlap = (y1-y0 > 0 && z1-z0 > 0) ? (y1-y0)*(z1-z0) : -1;
294 
295  if (overlap > maxOverlap) {
296  maxOverlap = overlap;
297  homey = iy;
298  homez = iz;
299  }
300  }
301  }
302 
303  if (homey == -1 || homez == -1)
304  NAMD_bug("ComputePmeCUDAMgr::getHomePencil, home pencil not found");
305 }
int zBlocks
Definition: PmeBase.h:25
static PatchMap * Object()
Definition: PatchMap.h:27
int K2
Definition: PmeBase.h:21
static void getPencilDim(const PmeGrid &pmeGrid, const int permutation, const int jblock, const int kblock, int &i0, int &i1, int &j0, int &j1, int &k0, int &k1)
Definition: PmeSolverUtil.h:32
int yBlocks
Definition: PmeBase.h:25
int order
Definition: PmeBase.h:23
void NAMD_bug(const char *err_msg)
Definition: common.C:195
BigReal min_c(int pid) const
Definition: PatchMap.h:95
int K3
Definition: PmeBase.h:21
BigReal max_b(int pid) const
Definition: PatchMap.h:94
BigReal max_c(int pid) const
Definition: PatchMap.h:96
BigReal min_b(int pid) const
Definition: PatchMap.h:93
double BigReal
Definition: common.h:123

◆ getNode()

int ComputePmeCUDAMgr::getNode ( int  i,
int  j 
)

Definition at line 869 of file ComputePmeCUDAMgr.C.

References NAMD_bug(), PmeGrid::yBlocks, and PmeGrid::zBlocks.

Referenced by getHomeNode().

869  {
870  if (i < 0 || i >= pmeGrid.yBlocks || j < 0 || j >= pmeGrid.zBlocks)
871  NAMD_bug("ComputePmeCUDAMgr::getNode, pencil index out of bounds");
872  int ind = i + j*pmeGrid.yBlocks;
873  return nodeDeviceList[ind].node;
874 }
int zBlocks
Definition: PmeBase.h:25
int yBlocks
Definition: PmeBase.h:25
void NAMD_bug(const char *err_msg)
Definition: common.C:195

◆ getPmeGrid()

PmeGrid ComputePmeCUDAMgr::getPmeGrid ( )
inline

Definition at line 589 of file ComputePmeCUDAMgr.h.

Referenced by ComputePmeCUDA::initialize().

589 {return pmeGrid;}

◆ initialize()

void ComputePmeCUDAMgr::initialize ( CkQdMsg *  msg)

Definition at line 624 of file ComputePmeCUDAMgr.C.

References DebugM, endi(), iINFO(), iout, setupPencils(), PmeGrid::xBlocks, PmeGrid::yBlocks, and PmeGrid::zBlocks.

624  {
625  if (msg != NULL) delete msg;
626  DebugM(4, "ComputePmeCUDAMgr::initialize(CkQdMsg)\n" << endi);
627  setupPencils();
628 
629  if ( ! CkMyNode() ) {
630  iout << iINFO << "PME using " << pmeGrid.xBlocks << " x " <<
631  pmeGrid.yBlocks << " x " << pmeGrid.zBlocks <<
632  " pencil grid for FFT and reciprocal sum.\n" << endi;
633  }
634 
635  // Initialize list that contains the number of home patches for each device on this manager
636  numHomePatchesList.resize(numDevices, 0);
637 
638  //--------------------------------------------------------------------------
639  // Create devices and atom filer
640  // numDevices = number of devices we'll be using, possibly different on each node
641  // Use root node to compute the maximum number of devices in use over all nodes
642  thisProxy[0].initializeDevicesAndAtomFiler(new NumDevicesMsg(numDevices));
643  //--------------------------------------------------------------------------
644 
645  if (CkMyNode() == 0) {
646 
647  if (pmePencilType == 3) {
648  // Single block => 3D FFT
649  CProxy_PmePencilXYZMap xyzMap = CProxy_PmePencilXYZMap::ckNew(xPes[0]);
650  CkArrayOptions xyzOpts(1);
651  xyzOpts.setMap(xyzMap);
652  xyzOpts.setAnytimeMigration(false);
653  xyzOpts.setStaticInsertion(true);
654  pmePencilXYZ = CProxy_CudaPmePencilXYZ::ckNew(xyzOpts);
655  pmePencilXYZ[0].initialize(new CudaPmeXYZInitMsg(pmeGrid));
656  thisProxy.recvPencils(pmePencilXYZ);
657  } else if (pmePencilType == 2) {
658  // Blocks in all but y-dimension => 2D FFT
659  CProxy_PmePencilXYMap xyMap = CProxy_PmePencilXYMap::ckNew(xPes);
660  CProxy_PmePencilXMap zMap = CProxy_PmePencilXMap::ckNew(0, 1, pmeGrid.xBlocks, zPes);
661  CkArrayOptions xyOpts(1, 1, pmeGrid.zBlocks);
662  CkArrayOptions zOpts(pmeGrid.xBlocks, 1, 1);
663  xyOpts.setMap(xyMap);
664  zOpts.setMap(zMap);
665  xyOpts.setAnytimeMigration(false);
666  zOpts.setAnytimeMigration(false);
667  xyOpts.setStaticInsertion(true);
668  zOpts.setStaticInsertion(true);
669  pmePencilXY = CProxy_CudaPmePencilXY::ckNew(xyOpts);
670  pmePencilZ = CProxy_CudaPmePencilZ::ckNew(zOpts);
671  // Send pencil proxies to other nodes
672  thisProxy.recvPencils(pmePencilXY, pmePencilZ);
673  pmePencilXY.initialize(new CudaPmeXYInitMsg(pmeGrid, pmePencilXY, pmePencilZ, xyMap, zMap));
674  pmePencilZ.initialize(new CudaPmeXYInitMsg(pmeGrid, pmePencilXY, pmePencilZ, xyMap, zMap));
675  } else {
676  // Blocks in all dimensions => 1D FFT
677  CProxy_PmePencilXMap xMap = CProxy_PmePencilXMap::ckNew(1, 2, pmeGrid.yBlocks, xPes);
678  CProxy_PmePencilXMap yMap = CProxy_PmePencilXMap::ckNew(0, 2, pmeGrid.xBlocks, yPes);
679  CProxy_PmePencilXMap zMap = CProxy_PmePencilXMap::ckNew(0, 1, pmeGrid.xBlocks, zPes);
680  CkArrayOptions xOpts(1, pmeGrid.yBlocks, pmeGrid.zBlocks);
681  CkArrayOptions yOpts(pmeGrid.xBlocks, 1, pmeGrid.zBlocks);
682  CkArrayOptions zOpts(pmeGrid.xBlocks, pmeGrid.yBlocks, 1);
683  xOpts.setMap(xMap);
684  yOpts.setMap(yMap);
685  zOpts.setMap(zMap);
686  xOpts.setAnytimeMigration(false);
687  yOpts.setAnytimeMigration(false);
688  zOpts.setAnytimeMigration(false);
689  xOpts.setStaticInsertion(true);
690  yOpts.setStaticInsertion(true);
691  zOpts.setStaticInsertion(true);
692  pmePencilX = CProxy_CudaPmePencilX::ckNew(xOpts);
693  pmePencilY = CProxy_CudaPmePencilY::ckNew(yOpts);
694  pmePencilZ = CProxy_CudaPmePencilZ::ckNew(zOpts);
695  // Send pencil proxies to other nodes
696  thisProxy.recvPencils(pmePencilX, pmePencilY, pmePencilZ);
697  pmePencilX.initialize(new CudaPmeXInitMsg(pmeGrid, pmePencilX, pmePencilY, pmePencilZ, xMap, yMap, zMap));
698  pmePencilY.initialize(new CudaPmeXInitMsg(pmeGrid, pmePencilX, pmePencilY, pmePencilZ, xMap, yMap, zMap));
699  pmePencilZ.initialize(new CudaPmeXInitMsg(pmeGrid, pmePencilX, pmePencilY, pmePencilZ, xMap, yMap, zMap));
700  }
701  }
702 
703 }
int zBlocks
Definition: PmeBase.h:25
std::ostream & iINFO(std::ostream &s)
Definition: InfoStream.C:81
#define DebugM(x, y)
Definition: Debug.h:75
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
#define iout
Definition: InfoStream.h:51
int yBlocks
Definition: PmeBase.h:25
int xBlocks
Definition: PmeBase.h:25

◆ initialize_pencils()

void ComputePmeCUDAMgr::initialize_pencils ( CkQdMsg *  msg)

Definition at line 760 of file ComputePmeCUDAMgr.C.

References createStream(), cudaCheck, DebugM, deviceCUDA, endi(), DeviceCUDA::getDeviceIDbyRank(), and DeviceCUDA::getNumDevice().

760  {
761  if (msg != NULL) delete msg;
762 
763  int numDevicesTmp = deviceCUDA->getNumDevice();
764  DebugM(4, "ComputePmeCUDAMgr::initialize_pencils() numDevicesTmp " << numDevicesTmp <<"\n"<< endi);
765  // Initialize device proxies for real-space interfacing
766  for (int i=0;i < ijPencilX.size();i++) {
767  // NOTE: i is here the device ID
768  int deviceID = deviceCUDA->getDeviceIDbyRank(i % numDevicesTmp);
769  deviceProxy[i].ckLocalBranch()->initialize(pmeGrid, ijPencilX[i].i, ijPencilX[i].j,
770  deviceID, pmePencilType, thisProxy, pmeAtomFiler);
771  if (pmePencilType == 1) {
772  deviceProxy[i].ckLocalBranch()->setPencilProxy(pmePencilX);
773  } else if (pmePencilType == 2) {
774  deviceProxy[i].ckLocalBranch()->setPencilProxy(pmePencilXY);
775  } else {
776  deviceProxy[i].ckLocalBranch()->setPencilProxy(pmePencilXYZ);
777  }
778  }
779 
780  // Use above initialized device proxies for the PME pencils that interface with real-space
781  for (int i=0;i < ijPencilX.size();i++) {
782  if (pmePencilType == 1) {
783  pmePencilX(0, ijPencilX[i].i, ijPencilX[i].j).initializeDevice(new InitDeviceMsg(deviceProxy[i]));
784  } else if (pmePencilType == 2) {
785  pmePencilXY(0, 0, ijPencilX[i].j).initializeDevice(new InitDeviceMsg(deviceProxy[i]));
786  } else {
787  pmePencilXYZ[0].initializeDevice(new InitDeviceMsg(deviceProxy[i]));
788  }
789  }
790 
791  // Create extra devices for Y and Z pencils if necessary
792  int n = std::max(ijPencilY.size(), ijPencilZ.size());
793  if (n > ijPencilX.size()) {
794  int nextra = n - ijPencilX.size();
795  extraDevices.resize(nextra);
796  for (int i=0;i < nextra;i++) {
797  extraDevices[i].deviceID = deviceCUDA->getDeviceIDbyRank((i + ijPencilX.size()) % numDevicesTmp);
798  cudaCheck(cudaSetDevice(extraDevices[i].deviceID));
799  createStream(extraDevices[i].stream);
800  }
801  }
802 
803  // Initialize Y pencils
804  for (int i=0;i < ijPencilY.size();i++) {
805  int deviceID;
806  cudaStream_t stream;
807  if (i < ijPencilX.size()) {
808  deviceID = deviceProxy[i].ckLocalBranch()->getDeviceID();
809  stream = deviceProxy[i].ckLocalBranch()->getStream();
810  } else {
811  deviceID = extraDevices[i-ijPencilX.size()].deviceID;
812  stream = extraDevices[i-ijPencilX.size()].stream;
813  }
814  pmePencilY(ijPencilY[i].i, 0, ijPencilY[i].j).initializeDevice(new InitDeviceMsg2(deviceID, stream, thisProxy, deviceProxy[i]));
815  }
816 
817  // Initialize Z pencils
818  for (int i=0;i < ijPencilZ.size();i++) {
819  int deviceID;
820  cudaStream_t stream;
821  if (i < ijPencilX.size()) {
822  deviceID = deviceProxy[i].ckLocalBranch()->getDeviceID();
823  stream = deviceProxy[i].ckLocalBranch()->getStream();
824  } else {
825  deviceID = extraDevices[i-ijPencilX.size()].deviceID;
826  stream = extraDevices[i-ijPencilX.size()].stream;
827  }
828  pmePencilZ(ijPencilZ[i].i, ijPencilZ[i].j, 0).initializeDevice(new InitDeviceMsg2(deviceID, stream, thisProxy, deviceProxy[i]));
829  }
830 
831 }
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23
#define DebugM(x, y)
Definition: Debug.h:75
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
int getNumDevice()
Definition: DeviceCUDA.h:125
void createStream(cudaStream_t &stream)
int getDeviceIDbyRank(int rank)
Definition: DeviceCUDA.h:145
#define cudaCheck(stmt)
Definition: CudaUtils.h:233

◆ isPmeDevice()

bool ComputePmeCUDAMgr::isPmeDevice ( int  deviceID)

Definition at line 611 of file ComputePmeCUDAMgr.C.

References deviceCUDA, DeviceCUDA::getDeviceIDbyRank(), and DeviceCUDA::getNumDevice().

611  {
612  for (int i=0;i < nodeDeviceList.size();i++) {
613  if (deviceCUDA->getDeviceIDbyRank(nodeDeviceList[i].device % deviceCUDA->getNumDevice()) == deviceID) {
614  return true;
615  }
616  }
617  return false;
618 }
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23
int getNumDevice()
Definition: DeviceCUDA.h:125
int getDeviceIDbyRank(int rank)
Definition: DeviceCUDA.h:145

◆ isPmeNode()

bool ComputePmeCUDAMgr::isPmeNode ( int  node)

Definition at line 599 of file ComputePmeCUDAMgr.C.

599  {
600  for (int i=0;i < nodeDeviceList.size();i++) {
601  if (nodeDeviceList[i].node == node) {
602  return true;
603  }
604  }
605  return false;
606 }

◆ isPmePe()

bool ComputePmeCUDAMgr::isPmePe ( int  pe)

Definition at line 589 of file ComputePmeCUDAMgr.C.

Referenced by CudaComputeNonbonded::assignPatches().

589  {
590  for (int i=0;i < xPes.size();i++) {
591  if (pe == xPes[i]) return true;
592  }
593  return false;
594 }

◆ Object()

static ComputePmeCUDAMgr* ComputePmeCUDAMgr::Object ( )
inlinestatic

Definition at line 613 of file ComputePmeCUDAMgr.h.

Referenced by CudaComputeNonbonded::assignPatches().

613  {
614  CProxy_ComputePmeCUDAMgr mgrProxy(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
615  return mgrProxy.ckLocalBranch();
616  }

◆ recvAtomFiler()

void ComputePmeCUDAMgr::recvAtomFiler ( CProxy_PmeAtomFiler  filer)

Definition at line 724 of file ComputePmeCUDAMgr.C.

724  {
725  pmeAtomFiler = filer;
726 }

◆ recvAtoms()

void ComputePmeCUDAMgr::recvAtoms ( PmeAtomMsg msg)

Definition at line 968 of file ComputePmeCUDAMgr.C.

References DebugM, endi(), getDevice(), PmeAtomMsg::i, and PmeAtomMsg::j.

968  {
969  DebugM(2, "ComputePmeCUDADevice::recvAtoms\n" << endi);
970  int device = getDevice(msg->i, msg->j);
971  deviceProxy[device].ckLocalBranch()->recvAtoms(msg);
972 }
#define DebugM(x, y)
Definition: Debug.h:75
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
int getDevice(int i, int j)

◆ recvDevices()

void ComputePmeCUDAMgr::recvDevices ( RecvDeviceMsg msg)

Definition at line 728 of file ComputePmeCUDAMgr.C.

References DebugM, RecvDeviceMsg::dev, endi(), NAMD_bug(), and RecvDeviceMsg::numDevicesMax.

728  {
729 
730  numDevicesMax = msg->numDevicesMax;
731  DebugM(4, "ComputePmeCUDAMgr::recvDevices() numDevicesMax " << numDevicesMax <<"\n"<< endi);
732  if (numDevices > numDevicesMax)
733  NAMD_bug("ComputePmeCUDAMgr::recvDevices, numDevices > numDevicesMax");
734  deviceProxy.resize(numDevices);
735  for (int i=0;i < numDevices;i++) {
736  deviceProxy[i] = msg->dev[i];
737  }
738  delete msg;
739 }
#define DebugM(x, y)
Definition: Debug.h:75
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
void NAMD_bug(const char *err_msg)
Definition: common.C:195
CProxy_ComputePmeCUDADevice * dev

◆ recvPencils() [1/3]

void ComputePmeCUDAMgr::recvPencils ( CProxy_CudaPmePencilXYZ  xyz)

Definition at line 741 of file ComputePmeCUDAMgr.C.

741  {
742  pmePencilXYZ = xyz;
743 }

◆ recvPencils() [2/3]

void ComputePmeCUDAMgr::recvPencils ( CProxy_CudaPmePencilXY  xy,
CProxy_CudaPmePencilZ  z 
)

Definition at line 745 of file ComputePmeCUDAMgr.C.

References xy.

745  {
746  pmePencilXY = xy;
747  pmePencilZ = z;
748 }
virial xy

◆ recvPencils() [3/3]

void ComputePmeCUDAMgr::recvPencils ( CProxy_CudaPmePencilX  x,
CProxy_CudaPmePencilY  y,
CProxy_CudaPmePencilZ  z 
)

Definition at line 750 of file ComputePmeCUDAMgr.C.

750  {
751  pmePencilX = x;
752  pmePencilY = y;
753  pmePencilZ = z;
754 }

◆ setupPencils()

void ComputePmeCUDAMgr::setupPencils ( )

Definition at line 418 of file ComputePmeCUDAMgr.C.

References PmeGrid::block1, PmeGrid::block2, PmeGrid::block3, DebugM, deviceCUDA, PmeGrid::dim2, PmeGrid::dim3, endi(), DeviceCUDA::getNumDevice(), iINFO(), iout, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, NAMD_bug(), Node::Object(), PmeGrid::order, Node::simParameters, simParams, PmeGrid::xBlocks, PmeGrid::yBlocks, and PmeGrid::zBlocks.

Referenced by initialize().

418  {
420  DebugM(4, "ComputePmeCUDAMgr::setupPencils\n"<< endi);
421  pmeGrid.K1 = simParams->PMEGridSizeX;
422  pmeGrid.K2 = simParams->PMEGridSizeY;
423  pmeGrid.K3 = simParams->PMEGridSizeZ;
424  pmeGrid.order = simParams->PMEInterpOrder;
425  pmeGrid.dim2 = pmeGrid.K2;
426  pmeGrid.dim3 = 2 * (pmeGrid.K3/2 + 1);
427 
428  // Count the total number of devices assuming all nodes have the same number as this node
429  // NOTE: This should be changed in the future to support heterogeneous nodes!!!
430  int numDevicesTmp = deviceCUDA->getNumDevice();
431 
432  int numDeviceTot = CkNumNodes() * numDevicesTmp;
433  // Use approximately 1/4th of the devices for PME
434  // int numDeviceToUse = std::max(1, numDeviceTot/4);
435  int numDeviceToUse = 1;// THERE CAN BE ONLY 1
436  // there is a correctness problem in the multi-pencil case, so don't
437  // go there until that is fixed and we have a better multi-device
438  // parallelization.
439 
440  DebugM(4, "ComputePmeCUDAMgr::setupPencils numDeviceToUse "<<numDeviceToUse<< " numDeviceTot "<< numDeviceTot <<"\n"<< endi);
441  if (numDeviceToUse < 4) {
442  // 2D Slab
443  pmeGrid.yBlocks = 1;
444  pmeGrid.xBlocks = pmeGrid.zBlocks = numDeviceToUse;
445  } else {
446  // 1D Pencil
447  pmeGrid.yBlocks = (int)sqrt((double)numDeviceToUse);
448  pmeGrid.zBlocks = numDeviceToUse/pmeGrid.yBlocks;
449  pmeGrid.xBlocks = pmeGrid.zBlocks;
450  }
451 
452  if ( simParams->PMEPencilsX > 0 ) pmeGrid.xBlocks = simParams->PMEPencilsX;
453  if ( simParams->PMEPencilsY > 0 ) pmeGrid.yBlocks = simParams->PMEPencilsY;
454  if ( simParams->PMEPencilsZ > 0 ) pmeGrid.zBlocks = simParams->PMEPencilsZ;
455 
456  // Restrict number of pencils to the maximum number
457  restrictToMaxPMEPencils();
458 
459  // Fix pencil numbers if they don't make sense w.r.t. number of devices
460  if (pmeGrid.yBlocks == 1) {
461  // 2D Slab
462  if (pmeGrid.xBlocks > numDeviceTot) pmeGrid.xBlocks = numDeviceTot;
463  if (pmeGrid.zBlocks > numDeviceTot) pmeGrid.zBlocks = numDeviceTot;
464  } else {
465  // 1D Pencil
466  if (pmeGrid.yBlocks*pmeGrid.zBlocks > numDeviceTot ||
467  pmeGrid.xBlocks*pmeGrid.zBlocks > numDeviceTot ||
468  pmeGrid.xBlocks*pmeGrid.yBlocks > numDeviceTot) {
469  pmeGrid.yBlocks = std::min(pmeGrid.yBlocks, (int)sqrt((double)numDeviceTot));
470  pmeGrid.zBlocks = std::min(pmeGrid.zBlocks, numDeviceTot/pmeGrid.yBlocks);
471  }
472  pmeGrid.xBlocks = std::min(pmeGrid.yBlocks, pmeGrid.zBlocks);
473  }
474 
475  // Here (block1, block2, block3) define the size of charge grid pencil in each direction
476  pmeGrid.block1 = ( pmeGrid.K1 + pmeGrid.xBlocks - 1 ) / pmeGrid.xBlocks;
477  pmeGrid.block2 = ( pmeGrid.K2 + pmeGrid.yBlocks - 1 ) / pmeGrid.yBlocks;
478  pmeGrid.block3 = ( pmeGrid.K3 + pmeGrid.zBlocks - 1 ) / pmeGrid.zBlocks;
479 
480  // Determine type of FFT
481  if (pmeGrid.xBlocks == 1 && pmeGrid.yBlocks == 1 && pmeGrid.zBlocks == 1) {
482  // Single block => 3D FFT
483  pmePencilType = 3;
484  iout << iINFO << "Use 3D box decompostion in PME FFT.\n" << endi;
485  } else if (pmeGrid.yBlocks == 1) {
486  // Blocks in all but y-dimension => 2D FFT
487  pmePencilType = 2;
488  iout << iINFO << "Use 2D slab decompostion in PME FFT.\n" << endi;
489  } else {
490  // Blocks in all dimensions => 1D FFT
491  pmePencilType = 1;
492  iout << iINFO << "Use 1D pencil decompostion in PME FFT.\n" << endi;
493  }
494 
495  //--------------------------------------------------------------------------
496  // Map pencils into Pes
497  xPes.resize(pmeGrid.yBlocks*pmeGrid.zBlocks);
498 
499  if (pmePencilType == 1 || pmePencilType == 2) {
500  zPes.resize(pmeGrid.xBlocks*pmeGrid.yBlocks);
501  }
502  if (pmePencilType == 1) {
503  yPes.resize(pmeGrid.xBlocks*pmeGrid.zBlocks);
504  }
505 
506  // i % numDeviceTot = device index
507  // (i % numDeviceTot)/deviceCUDA->getNumDevice() = node index
508  // (i % CkNodeSize(node)) = pe displacement
509  for (int i=0;i < xPes.size();i++) {
510  int node = (i % numDeviceTot)/numDevicesTmp;
511  xPes[i] = CkNodeFirst(node) + (i + 0) % CkNodeSize(node);
512  }
513  for (int i=0;i < yPes.size();i++) {
514  int node = (i % numDeviceTot)/numDevicesTmp;
515  yPes[i] = CkNodeFirst(node) + (i + 0) % CkNodeSize(node);
516  }
517  for (int i=0;i < zPes.size();i++) {
518  int node = (i % numDeviceTot)/numDevicesTmp;
519  zPes[i] = CkNodeFirst(node) + (i + 0) % CkNodeSize(node);
520  }
521 
522  // char peStr[256];
523  // char *p = peStr;
524  // p += sprintf(p, "%2d | xPes", CkMyPe());
525  // for (int i=0;i < xPes.size();i++)
526  // p += sprintf(p, " %d", xPes[i]);
527  // p += sprintf(p, " yPes");
528  // for (int i=0;i < yPes.size();i++)
529  // p += sprintf(p, " %d", yPes[i]);
530  // p += sprintf(p, " zPes");
531  // for (int i=0;i < zPes.size();i++)
532  // p += sprintf(p, " %d", zPes[i]);
533  // fprintf(stderr, "%s | %d %d\n",peStr, CkNodeFirst(CkMyNode()), CkNodeSize(CkMyNode()));
534 
535  //--------------------------------------------------------------------------
536  // Build global node list for x-pencils
537  nodeDeviceList.resize(xPes.size());
538  numDevices = 0;
539  for (int k=0;k < xPes.size();k++) {
540  nodeDeviceList[k].node = CkNodeOf(xPes[k]);
541  nodeDeviceList[k].device = -1;
542  if (nodeDeviceList[k].node == CkMyNode()) {
543  nodeDeviceList[k].device = numDevices++;
544  }
545  }
546 
547  ijPencilX.clear();
548  ijPencilY.clear();
549  ijPencilZ.clear();
550 
551  // Construct list of pencil coordinates (i,j) for each device held by this node
552  for (int k=0;k < xPes.size();k++) {
553  if (CkMyNode() == CkNodeOf(xPes[k])) {
554  IJ ij;
555  ij.i = k % pmeGrid.yBlocks;
556  ij.j = k / pmeGrid.yBlocks;
557  ijPencilX.push_back(ij);
558  }
559  }
560  if (ijPencilX.size() != numDevices)
561  NAMD_bug("ComputePmeCUDAMgr::setupPencils, error setting up x-pencils and devices");
562 
563  int numDevicesY = 0;
564  for (int k=0;k < yPes.size();k++) {
565  if (CkMyNode() == CkNodeOf(yPes[k])) {
566  IJ ij;
567  ij.i = k % pmeGrid.xBlocks;
568  ij.j = k / pmeGrid.xBlocks;
569  ijPencilY.push_back(ij);
570  numDevicesY++;
571  }
572  }
573 
574  int numDevicesZ = 0;
575  for (int k=0;k < zPes.size();k++) {
576  if (CkMyNode() == CkNodeOf(zPes[k])) {
577  IJ ij;
578  ij.i = k % pmeGrid.xBlocks;
579  ij.j = k / pmeGrid.xBlocks;
580  ijPencilZ.push_back(ij);
581  numDevicesZ++;
582  }
583  }
584 }
static Node * Object()
Definition: Node.h:86
int dim2
Definition: PmeBase.h:22
int zBlocks
Definition: PmeBase.h:25
std::ostream & iINFO(std::ostream &s)
Definition: InfoStream.C:81
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23
int dim3
Definition: PmeBase.h:22
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
#define DebugM(x, y)
Definition: Debug.h:75
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
int block1
Definition: PmeBase.h:24
int getNumDevice()
Definition: DeviceCUDA.h:125
#define iout
Definition: InfoStream.h:51
int block2
Definition: PmeBase.h:24
int yBlocks
Definition: PmeBase.h:25
int order
Definition: PmeBase.h:23
void NAMD_bug(const char *err_msg)
Definition: common.C:195
int block3
Definition: PmeBase.h:24
#define simParams
Definition: Output.C:129
int K3
Definition: PmeBase.h:21
int xBlocks
Definition: PmeBase.h:25

◆ skip()

void ComputePmeCUDAMgr::skip ( void  )

Definition at line 953 of file ComputePmeCUDAMgr.C.

References DebugM, and endi().

953  {
954  DebugM(2, "ComputePmeCUDADevice::skip\n" << endi);
955  switch(pmePencilType) {
956  case 1:
957  pmePencilZ.skip();
958  break;
959  case 2:
960  pmePencilZ.skip();
961  break;
962  case 3:
963  pmePencilXYZ[0].skip();
964  break;
965  }
966 }
#define DebugM(x, y)
Definition: Debug.h:75
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54

Member Data Documentation

◆ ComputePmeCUDAMgr_SDAG_CODE

ComputePmeCUDAMgr::ComputePmeCUDAMgr_SDAG_CODE

Definition at line 581 of file ComputePmeCUDAMgr.h.


The documentation for this class was generated from the following files: