NAMD
Classes | Public Member Functions | Static Public Member Functions | Public Attributes | List of all members
ComputePmeCUDAMgr Class Reference

#include <ComputePmeCUDAMgr.h>

Inheritance diagram for ComputePmeCUDAMgr:

Public Member Functions

 ComputePmeCUDAMgr ()
 
 ComputePmeCUDAMgr (CkMigrateMessage *)
 
 ~ComputePmeCUDAMgr ()
 
void setupPencils ()
 
void initialize (CkQdMsg *msg)
 
void initialize_pencils (CkQdMsg *msg)
 
void activate_pencils (CkQdMsg *msg)
 
PmeGrid getPmeGrid ()
 
int getNode (int i, int j)
 
int getDevice (int i, int j)
 
int getDevicePencilY (int i, int j)
 
int getDevicePencilZ (int i, int j)
 
int getDeviceIDPencilX (int i, int j)
 
int getDeviceIDPencilY (int i, int j)
 
int getDeviceIDPencilZ (int i, int j)
 
void recvPencils (CProxy_CudaPmePencilXYZ xyz)
 
void recvPencils (CProxy_CudaPmePencilXY xy, CProxy_CudaPmePencilZ z)
 
void recvPencils (CProxy_CudaPmePencilX x, CProxy_CudaPmePencilY y, CProxy_CudaPmePencilZ z)
 
void createDevicesAndAtomFiler ()
 
void recvDevices (RecvDeviceMsg *msg)
 
void recvAtomFiler (CProxy_PmeAtomFiler filer)
 
void skip ()
 
void recvAtoms (PmeAtomMsg *msg)
 
void getHomePencil (PatchID patchID, int &homey, int &homez)
 
int getHomeNode (PatchID patchID)
 
bool isPmePe (int pe)
 
bool isPmeNode (int node)
 
bool isPmeDevice (int deviceID)
 

Static Public Member Functions

static ComputePmeCUDAMgrObject ()
 

Public Attributes

 ComputePmeCUDAMgr_SDAG_CODE
 

Detailed Description

Definition at line 438 of file ComputePmeCUDAMgr.h.

Constructor & Destructor Documentation

ComputePmeCUDAMgr::ComputePmeCUDAMgr ( )

Definition at line 204 of file ComputePmeCUDAMgr.C.

204  {
205  __sdag_init();
206  numDevices = 0;
207  numTotalPatches = 0;
208  numNodesContributed = 0;
209  numDevicesMax = 0;
210 }
ComputePmeCUDAMgr::ComputePmeCUDAMgr ( CkMigrateMessage *  )

Definition at line 215 of file ComputePmeCUDAMgr.C.

References NAMD_bug().

215  {
216  __sdag_init();
217  NAMD_bug("ComputePmeCUDAMgr cannot be migrated");
218  numDevices = 0;
219  numTotalPatches = 0;
220  numNodesContributed = 0;
221  numDevicesMax = 0;
222 }
void NAMD_bug(const char *err_msg)
Definition: common.C:129
ComputePmeCUDAMgr::~ComputePmeCUDAMgr ( )

Definition at line 227 of file ComputePmeCUDAMgr.C.

References cudaCheck, and stream.

227  {
228  for (int i=0;i < extraDevices.size();i++) {
229  cudaCheck(cudaSetDevice(extraDevices[i].deviceID));
230  cudaCheck(cudaStreamDestroy(extraDevices[i].stream));
231  }
232 }
__thread cudaStream_t stream
#define cudaCheck(stmt)
Definition: CudaUtils.h:95

Member Function Documentation

void ComputePmeCUDAMgr::activate_pencils ( CkQdMsg *  msg)

Definition at line 809 of file ComputePmeCUDAMgr.C.

References PmeStartMsg::data, and PmeStartMsg::dataSize.

809  {
810  if (msg != NULL) delete msg;
811 
812  for (int device=0;device < numDevices;device++) {
813  deviceProxy[device].ckLocalBranch()->activate_pencils();
814  }
815 
816  for (int i=0;i < ijPencilY.size();i++) {
817  PmeStartMsg* pmeStartYMsg = new PmeStartMsg();
818  pmeStartYMsg->data = NULL;
819  pmeStartYMsg->dataSize = 0;
820  pmePencilY(ijPencilY[i].i, 0, ijPencilY[i].j).start(pmeStartYMsg);
821  }
822 
823  for (int i=0;i < ijPencilZ.size();i++) {
824  PmeStartMsg* pmeStartZMsg = new PmeStartMsg();
825  pmeStartZMsg->data = NULL;
826  pmeStartZMsg->dataSize = 0;
827  pmePencilZ(ijPencilZ[i].i, ijPencilZ[i].j, 0).start(pmeStartZMsg);
828  }
829 
830 }
int dataSize
Definition: PmeSolver.h:104
float * data
Definition: PmeSolver.h:103
void ComputePmeCUDAMgr::createDevicesAndAtomFiler ( )

Definition at line 679 of file ComputePmeCUDAMgr.C.

References RecvDeviceMsg::dev, NAMD_bug(), RecvDeviceMsg::numDevicesMax, and PRIORITY_SIZE.

679  {
680  if (CkMyNode() != 0)
681  NAMD_bug("ComputePmeCUDAMgr::createDevicesAndAtomFiler can only be called on root node");
682 
683  // Root node creates all device proxies
684  // NOTE: Only root node has numDevicesMax
685  RecvDeviceMsg* msg = new (numDevicesMax, PRIORITY_SIZE) RecvDeviceMsg();
686  msg->numDevicesMax = numDevicesMax;
687  for (int i=0;i < numDevicesMax;i++) {
688  CProxy_ComputePmeCUDADevice dev = CProxy_ComputePmeCUDADevice::ckNew();
689  memcpy(&msg->dev[i], &dev, sizeof(CProxy_ComputePmeCUDADevice));
690  }
691  thisProxy.recvDevices(msg);
692 
693  CProxy_PmeAtomFiler filer = CProxy_PmeAtomFiler::ckNew();
694  thisProxy.recvAtomFiler(filer);
695 
696 }
#define PRIORITY_SIZE
Definition: Priorities.h:13
void NAMD_bug(const char *err_msg)
Definition: common.C:129
CProxy_ComputePmeCUDADevice * dev
int ComputePmeCUDAMgr::getDevice ( int  i,
int  j 
)

Definition at line 854 of file ComputePmeCUDAMgr.C.

References NAMD_bug(), PmeGrid::yBlocks, and PmeGrid::zBlocks.

Referenced by getDeviceIDPencilX(), and recvAtoms().

854  {
855  if (i < 0 || i >= pmeGrid.yBlocks || j < 0 || j >= pmeGrid.zBlocks)
856  NAMD_bug("ComputePmeCUDAMgr::getDevice, pencil index out of bounds");
857  int ind = i + j*pmeGrid.yBlocks;
858  int device = nodeDeviceList[ind].device;
859  if (device == -1)
860  NAMD_bug("ComputePmeCUDAMgr::getDevice, no device found");
861  return device;
862 }
int zBlocks
Definition: PmeBase.h:22
int yBlocks
Definition: PmeBase.h:22
void NAMD_bug(const char *err_msg)
Definition: common.C:129
int ComputePmeCUDAMgr::getDeviceIDPencilX ( int  i,
int  j 
)

Definition at line 895 of file ComputePmeCUDAMgr.C.

References getDevice().

895  {
896  int device = getDevice(i, j);
897  return deviceProxy[device].ckLocalBranch()->getDeviceID();
898 }
int getDevice(int i, int j)
int ComputePmeCUDAMgr::getDeviceIDPencilY ( int  i,
int  j 
)

Definition at line 903 of file ComputePmeCUDAMgr.C.

References getDevicePencilY().

903  {
904  int device = getDevicePencilY(i, j);
905  return deviceProxy[device].ckLocalBranch()->getDeviceID();
906 }
int getDevicePencilY(int i, int j)
int ComputePmeCUDAMgr::getDeviceIDPencilZ ( int  i,
int  j 
)

Definition at line 911 of file ComputePmeCUDAMgr.C.

References getDevicePencilZ().

911  {
912  int device = getDevicePencilZ(i, j);
913  return deviceProxy[device].ckLocalBranch()->getDeviceID();
914 }
int getDevicePencilZ(int i, int j)
int ComputePmeCUDAMgr::getDevicePencilY ( int  i,
int  j 
)

Definition at line 867 of file ComputePmeCUDAMgr.C.

References NAMD_bug(), PmeGrid::xBlocks, and PmeGrid::zBlocks.

Referenced by getDeviceIDPencilY().

867  {
868  if (i < 0 || i >= pmeGrid.xBlocks || j < 0 || j >= pmeGrid.zBlocks)
869  NAMD_bug("ComputePmeCUDAMgr::getDevicePencilY, pencil index out of bounds");
870  for (int device=0;device < ijPencilY.size();device++) {
871  if (ijPencilY[device].i == i && ijPencilY[device].j == j) return device;
872  }
873  char str[256];
874  sprintf(str, "ComputePmeCUDAMgr::getDevicePencilY, no device found at i %d j %d",i,j);
875  NAMD_bug(str);
876  return -1;
877 }
int zBlocks
Definition: PmeBase.h:22
void NAMD_bug(const char *err_msg)
Definition: common.C:129
int xBlocks
Definition: PmeBase.h:22
int ComputePmeCUDAMgr::getDevicePencilZ ( int  i,
int  j 
)

Definition at line 882 of file ComputePmeCUDAMgr.C.

References NAMD_bug(), PmeGrid::xBlocks, and PmeGrid::yBlocks.

Referenced by getDeviceIDPencilZ().

882  {
883  if (i < 0 || i >= pmeGrid.xBlocks || j < 0 || j >= pmeGrid.yBlocks)
884  NAMD_bug("ComputePmeCUDAMgr::getDevicePencilZ, pencil index out of bounds");
885  for (int device=0;device < ijPencilZ.size();device++) {
886  if (ijPencilZ[device].i == i && ijPencilZ[device].j == j) return device;
887  }
888  NAMD_bug("ComputePmeCUDAMgr::getDevicePencilZ, no device found");
889  return -1;
890 }
int yBlocks
Definition: PmeBase.h:22
void NAMD_bug(const char *err_msg)
Definition: common.C:129
int xBlocks
Definition: PmeBase.h:22
int ComputePmeCUDAMgr::getHomeNode ( PatchID  patchID)

Definition at line 845 of file ComputePmeCUDAMgr.C.

References getHomePencil(), and getNode().

845  {
846  int homey, homez;
847  getHomePencil(patchID, homey, homez);
848  return getNode(homey, homez);
849 }
void getHomePencil(PatchID patchID, int &homey, int &homez)
int getNode(int i, int j)
void ComputePmeCUDAMgr::getHomePencil ( PatchID  patchID,
int &  homey,
int &  homez 
)

Definition at line 238 of file ComputePmeCUDAMgr.C.

References getPencilDim(), PmeGrid::K2, PmeGrid::K3, PatchMap::max_b(), PatchMap::max_c(), PatchMap::min_b(), PatchMap::min_c(), NAMD_bug(), PatchMap::Object(), PmeGrid::order, Perm_X_Y_Z, PmeGrid::yBlocks, and PmeGrid::zBlocks.

Referenced by getHomeNode().

238  {
239  PatchMap *patchMap = PatchMap::Object();
240 
241  BigReal miny = patchMap->min_b(patchID);
242  BigReal maxy = patchMap->max_b(patchID);
243 
244  BigReal minz = patchMap->min_c(patchID);
245  BigReal maxz = patchMap->max_c(patchID);
246 
247  // Determine home pencil = pencil with most overlap
248 
249  // Calculate patch grid coordinates
250  int patch_y0 = floor((miny+0.5)*pmeGrid.K2);
251  int patch_y1 = floor((maxy+0.5)*pmeGrid.K2)-1;
252  int patch_z0 = floor((minz+0.5)*pmeGrid.K3);
253  int patch_z1 = floor((maxz+0.5)*pmeGrid.K3)-1;
254 
255  if (patch_y0 < 0 || patch_y1 >= pmeGrid.K2 || patch_z0 < 0 || patch_z1 >= pmeGrid.K3) {
256  NAMD_bug("ComputePmeCUDAMgr::getHomePencil, patch bounds are outside grid bounds");
257  }
258 
259  int maxOverlap = 0;
260  homey = -1;
261  homez = -1;
262  for (int iz=0;iz < pmeGrid.zBlocks;iz++) {
263  for (int iy=0;iy < pmeGrid.yBlocks;iy++) {
264  int pencil_x0, pencil_x1, pencil_y0, pencil_y1, pencil_z0, pencil_z1;
265  getPencilDim(pmeGrid, Perm_X_Y_Z, iy, iz,
266  pencil_x0, pencil_x1, pencil_y0, pencil_y1, pencil_z0, pencil_z1);
267 
268  if (pencil_y1 - pencil_y0 < pmeGrid.order || pencil_z1 - pencil_z0 < pmeGrid.order)
269  NAMD_bug("ComputePmeCUDAMgr::getHomePencil, pencil size must be >= PMEInterpOrder");
270 
271  int y0 = std::max(patch_y0, pencil_y0);
272  int y1 = std::min(patch_y1, pencil_y1);
273  int z0 = std::max(patch_z0, pencil_z0);
274  int z1 = std::min(patch_z1, pencil_z1);
275 
276  int overlap = (y1-y0 > 0 && z1-z0 > 0) ? (y1-y0)*(z1-z0) : -1;
277 
278  if (overlap > maxOverlap) {
279  maxOverlap = overlap;
280  homey = iy;
281  homez = iz;
282  }
283  }
284  }
285 
286  if (homey == -1 || homez == -1)
287  NAMD_bug("ComputePmeCUDAMgr::getHomePencil, home pencil not found");
288 }
int zBlocks
Definition: PmeBase.h:22
static PatchMap * Object()
Definition: PatchMap.h:27
BigReal max_c(int pid) const
Definition: PatchMap.h:96
int K2
Definition: PmeBase.h:18
static void getPencilDim(const PmeGrid &pmeGrid, const int permutation, const int jblock, const int kblock, int &i0, int &i1, int &j0, int &j1, int &k0, int &k1)
Definition: PmeSolverUtil.h:29
BigReal min_b(int pid) const
Definition: PatchMap.h:93
int yBlocks
Definition: PmeBase.h:22
int order
Definition: PmeBase.h:20
void NAMD_bug(const char *err_msg)
Definition: common.C:129
BigReal max_b(int pid) const
Definition: PatchMap.h:94
int K3
Definition: PmeBase.h:18
BigReal min_c(int pid) const
Definition: PatchMap.h:95
double BigReal
Definition: common.h:114
int ComputePmeCUDAMgr::getNode ( int  i,
int  j 
)

Definition at line 835 of file ComputePmeCUDAMgr.C.

References NAMD_bug(), PmeGrid::yBlocks, and PmeGrid::zBlocks.

Referenced by getHomeNode().

835  {
836  if (i < 0 || i >= pmeGrid.yBlocks || j < 0 || j >= pmeGrid.zBlocks)
837  NAMD_bug("ComputePmeCUDAMgr::getNode, pencil index out of bounds");
838  int ind = i + j*pmeGrid.yBlocks;
839  return nodeDeviceList[ind].node;
840 }
int zBlocks
Definition: PmeBase.h:22
int yBlocks
Definition: PmeBase.h:22
void NAMD_bug(const char *err_msg)
Definition: common.C:129
PmeGrid ComputePmeCUDAMgr::getPmeGrid ( )
inline

Definition at line 448 of file ComputePmeCUDAMgr.h.

Referenced by ComputePmeCUDA::initialize().

448 {return pmeGrid;}
void ComputePmeCUDAMgr::initialize ( CkQdMsg *  msg)

Definition at line 598 of file ComputePmeCUDAMgr.C.

References endi(), iINFO(), iout, setupPencils(), PmeGrid::xBlocks, PmeGrid::yBlocks, and PmeGrid::zBlocks.

598  {
599  if (msg != NULL) delete msg;
600 
601  setupPencils();
602 
603  if ( ! CkMyNode() ) {
604  iout << iINFO << "PME using " << pmeGrid.xBlocks << " x " <<
605  pmeGrid.yBlocks << " x " << pmeGrid.zBlocks <<
606  " pencil grid for FFT and reciprocal sum.\n" << endi;
607  }
608 
609  // Initialize list that contains the number of home patches for each device on this manager
610  numHomePatchesList.resize(numDevices, 0);
611 
612  //--------------------------------------------------------------------------
613  // Create devices and atom filer
614  // numDevices = number of devices we'll be using, possibly different on each node
615  // Use root node to compute the maximum number of devices in use over all nodes
616  thisProxy[0].initializeDevicesAndAtomFiler(new NumDevicesMsg(numDevices));
617  //--------------------------------------------------------------------------
618 
619  if (CkMyNode() == 0) {
620 
621  if (pmePencilType == 3) {
622  // Single block => 3D FFT
623  CProxy_PmePencilXYZMap xyzMap = CProxy_PmePencilXYZMap::ckNew(xPes[0]);
624  CkArrayOptions xyzOpts(1);
625  xyzOpts.setMap(xyzMap);
626  xyzOpts.setAnytimeMigration(false);
627  xyzOpts.setStaticInsertion(true);
628  pmePencilXYZ = CProxy_CudaPmePencilXYZ::ckNew(xyzOpts);
629  pmePencilXYZ[0].initialize(new CudaPmeXYZInitMsg(pmeGrid));
630  thisProxy.recvPencils(pmePencilXYZ);
631  } else if (pmePencilType == 2) {
632  // Blocks in all but y-dimension => 2D FFT
633  CProxy_PmePencilXYMap xyMap = CProxy_PmePencilXYMap::ckNew(xPes);
634  CProxy_PmePencilXMap zMap = CProxy_PmePencilXMap::ckNew(0, 1, pmeGrid.xBlocks, zPes);
635  CkArrayOptions xyOpts(1, 1, pmeGrid.zBlocks);
636  CkArrayOptions zOpts(pmeGrid.xBlocks, 1, 1);
637  xyOpts.setMap(xyMap);
638  zOpts.setMap(zMap);
639  xyOpts.setAnytimeMigration(false);
640  zOpts.setAnytimeMigration(false);
641  xyOpts.setStaticInsertion(true);
642  zOpts.setStaticInsertion(true);
643  pmePencilXY = CProxy_CudaPmePencilXY::ckNew(xyOpts);
644  pmePencilZ = CProxy_CudaPmePencilZ::ckNew(zOpts);
645  // Send pencil proxies to other nodes
646  thisProxy.recvPencils(pmePencilXY, pmePencilZ);
647  pmePencilXY.initialize(new CudaPmeXYInitMsg(pmeGrid, pmePencilXY, pmePencilZ, xyMap, zMap));
648  pmePencilZ.initialize(new CudaPmeXYInitMsg(pmeGrid, pmePencilXY, pmePencilZ, xyMap, zMap));
649  } else {
650  // Blocks in all dimensions => 1D FFT
651  CProxy_PmePencilXMap xMap = CProxy_PmePencilXMap::ckNew(1, 2, pmeGrid.yBlocks, xPes);
652  CProxy_PmePencilXMap yMap = CProxy_PmePencilXMap::ckNew(0, 2, pmeGrid.xBlocks, yPes);
653  CProxy_PmePencilXMap zMap = CProxy_PmePencilXMap::ckNew(0, 1, pmeGrid.xBlocks, zPes);
654  CkArrayOptions xOpts(1, pmeGrid.yBlocks, pmeGrid.zBlocks);
655  CkArrayOptions yOpts(pmeGrid.xBlocks, 1, pmeGrid.zBlocks);
656  CkArrayOptions zOpts(pmeGrid.xBlocks, pmeGrid.yBlocks, 1);
657  xOpts.setMap(xMap);
658  yOpts.setMap(yMap);
659  zOpts.setMap(zMap);
660  xOpts.setAnytimeMigration(false);
661  yOpts.setAnytimeMigration(false);
662  zOpts.setAnytimeMigration(false);
663  xOpts.setStaticInsertion(true);
664  yOpts.setStaticInsertion(true);
665  zOpts.setStaticInsertion(true);
666  pmePencilX = CProxy_CudaPmePencilX::ckNew(xOpts);
667  pmePencilY = CProxy_CudaPmePencilY::ckNew(yOpts);
668  pmePencilZ = CProxy_CudaPmePencilZ::ckNew(zOpts);
669  // Send pencil proxies to other nodes
670  thisProxy.recvPencils(pmePencilX, pmePencilY, pmePencilZ);
671  pmePencilX.initialize(new CudaPmeXInitMsg(pmeGrid, pmePencilX, pmePencilY, pmePencilZ, xMap, yMap, zMap));
672  pmePencilY.initialize(new CudaPmeXInitMsg(pmeGrid, pmePencilX, pmePencilY, pmePencilZ, xMap, yMap, zMap));
673  pmePencilZ.initialize(new CudaPmeXInitMsg(pmeGrid, pmePencilX, pmePencilY, pmePencilZ, xMap, yMap, zMap));
674  }
675  }
676 
677 }
int zBlocks
Definition: PmeBase.h:22
std::ostream & iINFO(std::ostream &s)
Definition: InfoStream.C:81
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
#define iout
Definition: InfoStream.h:51
int yBlocks
Definition: PmeBase.h:22
int xBlocks
Definition: PmeBase.h:22
void ComputePmeCUDAMgr::initialize_pencils ( CkQdMsg *  msg)

Definition at line 732 of file ComputePmeCUDAMgr.C.

References createStream(), cudaCheck, deviceCUDA, DeviceCUDA::getDeviceIDbyRank(), DeviceCUDA::getNumDevice(), and stream.

732  {
733  if (msg != NULL) delete msg;
734 
735  int numDevicesTmp = deviceCUDA->getNumDevice();
736 
737  // Initialize device proxies for real-space interfacing
738  for (int i=0;i < ijPencilX.size();i++) {
739  // NOTE: i is here the device ID
740  int deviceID = deviceCUDA->getDeviceIDbyRank(i % numDevicesTmp);
741  deviceProxy[i].ckLocalBranch()->initialize(pmeGrid, ijPencilX[i].i, ijPencilX[i].j,
742  deviceID, pmePencilType, thisProxy, pmeAtomFiler);
743  if (pmePencilType == 1) {
744  deviceProxy[i].ckLocalBranch()->setPencilProxy(pmePencilX);
745  } else if (pmePencilType == 2) {
746  deviceProxy[i].ckLocalBranch()->setPencilProxy(pmePencilXY);
747  } else {
748  deviceProxy[i].ckLocalBranch()->setPencilProxy(pmePencilXYZ);
749  }
750  }
751 
752  // Use above initialized device proxies for the PME pencils that interface with real-space
753  for (int i=0;i < ijPencilX.size();i++) {
754  if (pmePencilType == 1) {
755  pmePencilX(0, ijPencilX[i].i, ijPencilX[i].j).initializeDevice(new InitDeviceMsg(deviceProxy[i]));
756  } else if (pmePencilType == 2) {
757  pmePencilXY(0, 0, ijPencilX[i].j).initializeDevice(new InitDeviceMsg(deviceProxy[i]));
758  } else {
759  pmePencilXYZ[0].initializeDevice(new InitDeviceMsg(deviceProxy[i]));
760  }
761  }
762 
763  // Create extra devices for Y and Z pencils if necessary
764  int n = std::max(ijPencilY.size(), ijPencilZ.size());
765  if (n > ijPencilX.size()) {
766  int nextra = n - ijPencilX.size();
767  extraDevices.resize(nextra);
768  for (int i=0;i < nextra;i++) {
769  extraDevices[i].deviceID = deviceCUDA->getDeviceIDbyRank((i + ijPencilX.size()) % numDevicesTmp);
770  cudaCheck(cudaSetDevice(extraDevices[i].deviceID));
771  createStream(extraDevices[i].stream);
772  }
773  }
774 
775  // Initialize Y pencils
776  for (int i=0;i < ijPencilY.size();i++) {
777  int deviceID;
778  cudaStream_t stream;
779  if (i < ijPencilX.size()) {
780  deviceID = deviceProxy[i].ckLocalBranch()->getDeviceID();
781  stream = deviceProxy[i].ckLocalBranch()->getStream();
782  } else {
783  deviceID = extraDevices[i-ijPencilX.size()].deviceID;
784  stream = extraDevices[i-ijPencilX.size()].stream;
785  }
786  pmePencilY(ijPencilY[i].i, 0, ijPencilY[i].j).initializeDevice(new InitDeviceMsg2(deviceID, stream, thisProxy));
787  }
788 
789  // Initialize Z pencils
790  for (int i=0;i < ijPencilZ.size();i++) {
791  int deviceID;
792  cudaStream_t stream;
793  if (i < ijPencilX.size()) {
794  deviceID = deviceProxy[i].ckLocalBranch()->getDeviceID();
795  stream = deviceProxy[i].ckLocalBranch()->getStream();
796  } else {
797  deviceID = extraDevices[i-ijPencilX.size()].deviceID;
798  stream = extraDevices[i-ijPencilX.size()].stream;
799  }
800  pmePencilZ(ijPencilZ[i].i, ijPencilZ[i].j, 0).initializeDevice(new InitDeviceMsg2(deviceID, stream, thisProxy));
801  }
802 
803 }
int getNumDevice()
Definition: DeviceCUDA.h:93
__thread cudaStream_t stream
void createStream(cudaStream_t &stream)
int getDeviceIDbyRank(int rank)
Definition: DeviceCUDA.h:113
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:22
#define cudaCheck(stmt)
Definition: CudaUtils.h:95
bool ComputePmeCUDAMgr::isPmeDevice ( int  deviceID)

Definition at line 585 of file ComputePmeCUDAMgr.C.

References deviceCUDA, DeviceCUDA::getDeviceIDbyRank(), and DeviceCUDA::getNumDevice().

585  {
586  for (int i=0;i < nodeDeviceList.size();i++) {
587  if (deviceCUDA->getDeviceIDbyRank(nodeDeviceList[i].device % deviceCUDA->getNumDevice()) == deviceID) {
588  return true;
589  }
590  }
591  return false;
592 }
int getNumDevice()
Definition: DeviceCUDA.h:93
int getDeviceIDbyRank(int rank)
Definition: DeviceCUDA.h:113
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:22
bool ComputePmeCUDAMgr::isPmeNode ( int  node)

Definition at line 573 of file ComputePmeCUDAMgr.C.

573  {
574  for (int i=0;i < nodeDeviceList.size();i++) {
575  if (nodeDeviceList[i].node == node) {
576  return true;
577  }
578  }
579  return false;
580 }
bool ComputePmeCUDAMgr::isPmePe ( int  pe)

Definition at line 563 of file ComputePmeCUDAMgr.C.

Referenced by CudaComputeNonbonded::assignPatches().

563  {
564  for (int i=0;i < xPes.size();i++) {
565  if (pe == xPes[i]) return true;
566  }
567  return false;
568 }
static ComputePmeCUDAMgr* ComputePmeCUDAMgr::Object ( )
inlinestatic

Definition at line 472 of file ComputePmeCUDAMgr.h.

Referenced by CudaComputeNonbonded::assignPatches().

472  {
473  CProxy_ComputePmeCUDAMgr mgrProxy(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
474  return mgrProxy.ckLocalBranch();
475  }
void ComputePmeCUDAMgr::recvAtomFiler ( CProxy_PmeAtomFiler  filer)

Definition at line 698 of file ComputePmeCUDAMgr.C.

698  {
699  pmeAtomFiler = filer;
700 }
void ComputePmeCUDAMgr::recvAtoms ( PmeAtomMsg msg)

Definition at line 933 of file ComputePmeCUDAMgr.C.

References getDevice(), PmeAtomMsg::i, and PmeAtomMsg::j.

933  {
934  int device = getDevice(msg->i, msg->j);
935  deviceProxy[device].ckLocalBranch()->recvAtoms(msg);
936 }
int getDevice(int i, int j)
void ComputePmeCUDAMgr::recvDevices ( RecvDeviceMsg msg)

Definition at line 702 of file ComputePmeCUDAMgr.C.

References RecvDeviceMsg::dev, NAMD_bug(), and RecvDeviceMsg::numDevicesMax.

702  {
703  numDevicesMax = msg->numDevicesMax;
704  if (numDevices > numDevicesMax)
705  NAMD_bug("ComputePmeCUDAMgr::recvDevices, numDevices > numDevicesMax");
706  deviceProxy.resize(numDevices);
707  for (int i=0;i < numDevices;i++) {
708  deviceProxy[i] = msg->dev[i];
709  }
710  delete msg;
711 }
void NAMD_bug(const char *err_msg)
Definition: common.C:129
CProxy_ComputePmeCUDADevice * dev
void ComputePmeCUDAMgr::recvPencils ( CProxy_CudaPmePencilXYZ  xyz)

Definition at line 713 of file ComputePmeCUDAMgr.C.

713  {
714  pmePencilXYZ = xyz;
715 }
void ComputePmeCUDAMgr::recvPencils ( CProxy_CudaPmePencilXY  xy,
CProxy_CudaPmePencilZ  z 
)

Definition at line 717 of file ComputePmeCUDAMgr.C.

References xy, and z.

717  {
718  pmePencilXY = xy;
719  pmePencilZ = z;
720 }
virial xy
gridSize z
void ComputePmeCUDAMgr::recvPencils ( CProxy_CudaPmePencilX  x,
CProxy_CudaPmePencilY  y,
CProxy_CudaPmePencilZ  z 
)

Definition at line 722 of file ComputePmeCUDAMgr.C.

References x, y, and z.

722  {
723  pmePencilX = x;
724  pmePencilY = y;
725  pmePencilZ = z;
726 }
gridSize z
gridSize y
gridSize x
void ComputePmeCUDAMgr::setupPencils ( )

Definition at line 400 of file ComputePmeCUDAMgr.C.

References PmeGrid::block1, PmeGrid::block2, PmeGrid::block3, deviceCUDA, PmeGrid::dim2, PmeGrid::dim3, DeviceCUDA::getNumDevice(), if(), PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, NAMD_bug(), Node::Object(), PmeGrid::order, SimParameters::PMEGridSizeX, SimParameters::PMEGridSizeY, SimParameters::PMEGridSizeZ, SimParameters::PMEInterpOrder, SimParameters::PMEPencilsX, SimParameters::PMEPencilsY, SimParameters::PMEPencilsZ, Node::simParameters, PmeGrid::xBlocks, PmeGrid::yBlocks, and PmeGrid::zBlocks.

Referenced by initialize().

400  {
402 
403  pmeGrid.K1 = simParams->PMEGridSizeX;
404  pmeGrid.K2 = simParams->PMEGridSizeY;
405  pmeGrid.K3 = simParams->PMEGridSizeZ;
406  pmeGrid.order = simParams->PMEInterpOrder;
407  pmeGrid.dim2 = pmeGrid.K2;
408  pmeGrid.dim3 = 2 * (pmeGrid.K3/2 + 1);
409 
410  // Count the total number of devices assuming all nodes have the same number as this node
411  // NOTE: This should be changed in the future to support heterogeneous nodes!!!
412  int numDevicesTmp = deviceCUDA->getNumDevice();
413 
414  int numDeviceTot = CkNumNodes() * numDevicesTmp;
415  // Use approximately 1/4th of the devices for PME
416  int numDeviceToUse = std::max(1, numDeviceTot/4);
417 
418  if (numDeviceToUse < 4) {
419  // 2D Slab
420  pmeGrid.yBlocks = 1;
421  pmeGrid.xBlocks = pmeGrid.zBlocks = numDeviceToUse;
422  } else {
423  // 1D Pencil
424  pmeGrid.yBlocks = (int)sqrt((double)numDeviceToUse);
425  pmeGrid.zBlocks = numDeviceToUse/pmeGrid.yBlocks;
426  pmeGrid.xBlocks = pmeGrid.zBlocks;
427  }
428 
429  if ( simParams->PMEPencilsX > 0 ) pmeGrid.xBlocks = simParams->PMEPencilsX;
430  if ( simParams->PMEPencilsY > 0 ) pmeGrid.yBlocks = simParams->PMEPencilsY;
431  if ( simParams->PMEPencilsZ > 0 ) pmeGrid.zBlocks = simParams->PMEPencilsZ;
432 
433  // Restrict number of pencils to the maximum number
434  restrictToMaxPMEPencils();
435 
436  // Fix pencil numbers if they don't make sense w.r.t. number of devices
437  if (pmeGrid.yBlocks == 1) {
438  // 2D Slab
439  if (pmeGrid.xBlocks > numDeviceTot) pmeGrid.xBlocks = numDeviceTot;
440  if (pmeGrid.zBlocks > numDeviceTot) pmeGrid.zBlocks = numDeviceTot;
441  } else {
442  // 1D Pencil
443  if (pmeGrid.yBlocks*pmeGrid.zBlocks > numDeviceTot ||
444  pmeGrid.xBlocks*pmeGrid.zBlocks > numDeviceTot ||
445  pmeGrid.xBlocks*pmeGrid.yBlocks > numDeviceTot) {
446  pmeGrid.yBlocks = std::min(pmeGrid.yBlocks, (int)sqrt((double)numDeviceTot));
447  pmeGrid.zBlocks = std::min(pmeGrid.zBlocks, numDeviceTot/pmeGrid.yBlocks);
448  }
449  pmeGrid.xBlocks = std::min(pmeGrid.yBlocks, pmeGrid.zBlocks);
450  }
451 
452  // Here (block1, block2, block3) define the size of charge grid pencil in each direction
453  pmeGrid.block1 = ( pmeGrid.K1 + pmeGrid.xBlocks - 1 ) / pmeGrid.xBlocks;
454  pmeGrid.block2 = ( pmeGrid.K2 + pmeGrid.yBlocks - 1 ) / pmeGrid.yBlocks;
455  pmeGrid.block3 = ( pmeGrid.K3 + pmeGrid.zBlocks - 1 ) / pmeGrid.zBlocks;
456 
457  // Determine type of FFT
458  if (pmeGrid.xBlocks == 1 && pmeGrid.yBlocks == 1 && pmeGrid.zBlocks == 1) {
459  // Single block => 3D FFT
460  pmePencilType = 3;
461  } else if (pmeGrid.yBlocks == 1) {
462  // Blocks in all but y-dimension => 2D FFT
463  pmePencilType = 2;
464  } else {
465  // Blocks in all dimensions => 1D FFT
466  pmePencilType = 1;
467  }
468 
469  //--------------------------------------------------------------------------
470  // Map pencils into Pes
471  xPes.resize(pmeGrid.yBlocks*pmeGrid.zBlocks);
472 
473  if (pmePencilType == 1 || pmePencilType == 2) {
474  zPes.resize(pmeGrid.xBlocks*pmeGrid.yBlocks);
475  }
476  if (pmePencilType == 1) {
477  yPes.resize(pmeGrid.xBlocks*pmeGrid.zBlocks);
478  }
479 
480  // i % numDeviceTot = device index
481  // (i % numDeviceTot)/deviceCUDA->getNumDevice() = node index
482  // (i % CkNodeSize(node)) = pe displacement
483  for (int i=0;i < xPes.size();i++) {
484  int node = (i % numDeviceTot)/numDevicesTmp;
485  xPes[i] = CkNodeFirst(node) + (i + 0) % CkNodeSize(node);
486  }
487  for (int i=0;i < yPes.size();i++) {
488  int node = (i % numDeviceTot)/numDevicesTmp;
489  yPes[i] = CkNodeFirst(node) + (i + 0) % CkNodeSize(node);
490  }
491  for (int i=0;i < zPes.size();i++) {
492  int node = (i % numDeviceTot)/numDevicesTmp;
493  zPes[i] = CkNodeFirst(node) + (i + 0) % CkNodeSize(node);
494  }
495 
496  // char peStr[256];
497  // char *p = peStr;
498  // p += sprintf(p, "%2d | xPes", CkMyPe());
499  // for (int i=0;i < xPes.size();i++)
500  // p += sprintf(p, " %d", xPes[i]);
501  // p += sprintf(p, " yPes");
502  // for (int i=0;i < yPes.size();i++)
503  // p += sprintf(p, " %d", yPes[i]);
504  // p += sprintf(p, " zPes");
505  // for (int i=0;i < zPes.size();i++)
506  // p += sprintf(p, " %d", zPes[i]);
507  // fprintf(stderr, "%s | %d %d\n",peStr, CkNodeFirst(CkMyNode()), CkNodeSize(CkMyNode()));
508 
509  //--------------------------------------------------------------------------
510  // Build global node list for x-pencils
511  nodeDeviceList.resize(xPes.size());
512  numDevices = 0;
513  for (int k=0;k < xPes.size();k++) {
514  nodeDeviceList[k].node = CkNodeOf(xPes[k]);
515  nodeDeviceList[k].device = -1;
516  if (nodeDeviceList[k].node == CkMyNode()) {
517  nodeDeviceList[k].device = numDevices++;
518  }
519  }
520 
521  ijPencilX.clear();
522  ijPencilY.clear();
523  ijPencilZ.clear();
524 
525  // Construct list of pencil coordinates (i,j) for each device held by this node
526  for (int k=0;k < xPes.size();k++) {
527  if (CkMyNode() == CkNodeOf(xPes[k])) {
528  IJ ij;
529  ij.i = k % pmeGrid.yBlocks;
530  ij.j = k / pmeGrid.yBlocks;
531  ijPencilX.push_back(ij);
532  }
533  }
534  if (ijPencilX.size() != numDevices)
535  NAMD_bug("ComputePmeCUDAMgr::setupPencils, error setting up x-pencils and devices");
536 
537  int numDevicesY = 0;
538  for (int k=0;k < yPes.size();k++) {
539  if (CkMyNode() == CkNodeOf(yPes[k])) {
540  IJ ij;
541  ij.i = k % pmeGrid.xBlocks;
542  ij.j = k / pmeGrid.xBlocks;
543  ijPencilY.push_back(ij);
544  numDevicesY++;
545  }
546  }
547 
548  int numDevicesZ = 0;
549  for (int k=0;k < zPes.size();k++) {
550  if (CkMyNode() == CkNodeOf(zPes[k])) {
551  IJ ij;
552  ij.i = k % pmeGrid.xBlocks;
553  ij.j = k / pmeGrid.xBlocks;
554  ijPencilZ.push_back(ij);
555  numDevicesZ++;
556  }
557  }
558 }
static Node * Object()
Definition: Node.h:86
int dim2
Definition: PmeBase.h:19
int zBlocks
Definition: PmeBase.h:22
int dim3
Definition: PmeBase.h:19
int K2
Definition: PmeBase.h:18
SimParameters * simParameters
Definition: Node.h:178
int K1
Definition: PmeBase.h:18
int block1
Definition: PmeBase.h:21
int getNumDevice()
Definition: DeviceCUDA.h:93
if(ComputeNonbondedUtil::goMethod==2)
int block2
Definition: PmeBase.h:21
int yBlocks
Definition: PmeBase.h:22
int order
Definition: PmeBase.h:20
void NAMD_bug(const char *err_msg)
Definition: common.C:129
int block3
Definition: PmeBase.h:21
#define simParams
Definition: Output.C:127
int K3
Definition: PmeBase.h:18
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:22
int xBlocks
Definition: PmeBase.h:22
void ComputePmeCUDAMgr::skip ( void  )

Definition at line 919 of file ComputePmeCUDAMgr.C.

919  {
920  switch(pmePencilType) {
921  case 1:
922  pmePencilZ.skip();
923  break;
924  case 2:
925  pmePencilZ.skip();
926  break;
927  case 3:
928  pmePencilXYZ[0].skip();
929  break;
930  }
931 }

Member Data Documentation

ComputePmeCUDAMgr::ComputePmeCUDAMgr_SDAG_CODE

Definition at line 440 of file ComputePmeCUDAMgr.h.


The documentation for this class was generated from the following files: