ComputePmeCUDAMgr Class Reference

#include <ComputePmeCUDAMgr.h>

List of all members.

Public Member Functions

 ComputePmeCUDAMgr ()
 ComputePmeCUDAMgr (CkMigrateMessage *)
 ~ComputePmeCUDAMgr ()
void setupPencils ()
void initialize (CkQdMsg *msg)
void initialize_pencils (CkQdMsg *msg)
void activate_pencils (CkQdMsg *msg)
PmeGrid getPmeGrid ()
int getNode (int i, int j)
int getDevice (int i, int j)
int getDevicePencilY (int i, int j)
int getDevicePencilZ (int i, int j)
int getDeviceIDPencilX (int i, int j)
int getDeviceIDPencilY (int i, int j)
int getDeviceIDPencilZ (int i, int j)
void recvPencils (CProxy_CudaPmePencilXYZ xyz)
void recvPencils (CProxy_CudaPmePencilXY xy, CProxy_CudaPmePencilZ z)
void recvPencils (CProxy_CudaPmePencilX x, CProxy_CudaPmePencilY y, CProxy_CudaPmePencilZ z)
void recvSelfEnergy (PmeSelfEnergyMsg *msg)
void createDevicesAndAtomFiler ()
void recvDevices (RecvDeviceMsg *msg)
void recvAtomFiler (CProxy_PmeAtomFiler filer)
void skip ()
void recvAtoms (PmeAtomMsg *msg)
void getHomePencil (PatchID patchID, int &homey, int &homez)
int getHomeNode (PatchID patchID)
bool isPmePe (int pe)
bool isPmeNode (int node)
bool isPmeDevice (int deviceID)

Static Public Member Functions

static ComputePmeCUDAMgrObject ()

Public Attributes

 ComputePmeCUDAMgr_SDAG_CODE

Classes

struct  ExtraDevice
struct  IJ
struct  NodeDevice


Detailed Description

Definition at line 430 of file ComputePmeCUDAMgr.h.


Constructor & Destructor Documentation

ComputePmeCUDAMgr::ComputePmeCUDAMgr (  ) 

Definition at line 204 of file ComputePmeCUDAMgr.C.

00204                                      {
00205   __sdag_init();
00206   numDevices = 0;
00207   numTotalPatches = 0;
00208   numNodesContributed = 0;
00209   numDevicesMax = 0;
00210 }

ComputePmeCUDAMgr::ComputePmeCUDAMgr ( CkMigrateMessage *   ) 

Definition at line 215 of file ComputePmeCUDAMgr.C.

References NAMD_bug().

00215                                                        {
00216   __sdag_init();
00217   NAMD_bug("ComputePmeCUDAMgr cannot be migrated");
00218   numDevices = 0;
00219   numTotalPatches = 0;
00220   numNodesContributed = 0;
00221   numDevicesMax = 0;
00222 }

ComputePmeCUDAMgr::~ComputePmeCUDAMgr (  ) 

Definition at line 227 of file ComputePmeCUDAMgr.C.

References cudaCheck, and stream.

00227                                       {
00228   for (int i=0;i < extraDevices.size();i++) {
00229     cudaCheck(cudaSetDevice(extraDevices[i].deviceID));
00230     cudaCheck(cudaStreamDestroy(extraDevices[i].stream));
00231   }
00232 }


Member Function Documentation

void ComputePmeCUDAMgr::activate_pencils ( CkQdMsg *  msg  ) 

Definition at line 809 of file ComputePmeCUDAMgr.C.

References j.

00809                                                      {
00810   if (msg != NULL) delete msg;
00811 
00812   for (int device=0;device < numDevices;device++) {
00813     deviceProxy[device].ckLocalBranch()->activate_pencils();
00814   }
00815 
00816   for (int i=0;i < ijPencilY.size();i++) {
00817     PmeStartMsg* pmeStartYMsg = new PmeStartMsg();
00818     pmeStartYMsg->data = NULL;
00819     pmeStartYMsg->dataSize = 0;
00820     pmePencilY(ijPencilY[i].i, 0, ijPencilY[i].j).start(pmeStartYMsg);
00821   }
00822 
00823   for (int i=0;i < ijPencilZ.size();i++) {
00824     PmeStartMsg* pmeStartZMsg = new PmeStartMsg();
00825     pmeStartZMsg->data = NULL;
00826     pmeStartZMsg->dataSize = 0;
00827     pmePencilZ(ijPencilZ[i].i, ijPencilZ[i].j, 0).start(pmeStartZMsg);
00828   }
00829 
00830 }

void ComputePmeCUDAMgr::createDevicesAndAtomFiler (  ) 

Definition at line 679 of file ComputePmeCUDAMgr.C.

References RecvDeviceMsg::dev, NAMD_bug(), RecvDeviceMsg::numDevicesMax, and PRIORITY_SIZE.

00679                                                   {
00680   if (CkMyNode() != 0)
00681     NAMD_bug("ComputePmeCUDAMgr::createDevicesAndAtomFiler can only be called on root node");
00682 
00683   // Root node creates all device proxies
00684   // NOTE: Only root node has numDevicesMax
00685   RecvDeviceMsg* msg = new (numDevicesMax, PRIORITY_SIZE) RecvDeviceMsg();
00686   msg->numDevicesMax = numDevicesMax;
00687   for (int i=0;i < numDevicesMax;i++) {
00688     CProxy_ComputePmeCUDADevice dev = CProxy_ComputePmeCUDADevice::ckNew();
00689     memcpy(&msg->dev[i], &dev, sizeof(CProxy_ComputePmeCUDADevice));
00690   }
00691   thisProxy.recvDevices(msg);
00692 
00693   CProxy_PmeAtomFiler filer = CProxy_PmeAtomFiler::ckNew();
00694   thisProxy.recvAtomFiler(filer);
00695 
00696 }

int ComputePmeCUDAMgr::getDevice ( int  i,
int  j 
)

Definition at line 854 of file ComputePmeCUDAMgr.C.

References NAMD_bug(), PmeGrid::yBlocks, and PmeGrid::zBlocks.

Referenced by getDeviceIDPencilX(), and recvAtoms().

00854                                              {
00855   if (i < 0 || i >= pmeGrid.yBlocks || j < 0 || j >= pmeGrid.zBlocks)
00856     NAMD_bug("ComputePmeCUDAMgr::getDevice, pencil index out of bounds");
00857   int ind = i + j*pmeGrid.yBlocks;
00858   int device = nodeDeviceList[ind].device;
00859   if (device == -1)
00860     NAMD_bug("ComputePmeCUDAMgr::getDevice, no device found");
00861   return device;
00862 }

int ComputePmeCUDAMgr::getDeviceIDPencilX ( int  i,
int  j 
)

Definition at line 895 of file ComputePmeCUDAMgr.C.

References getDevice().

00895                                                       {
00896   int device = getDevice(i, j);
00897   return deviceProxy[device].ckLocalBranch()->getDeviceID();
00898 }

int ComputePmeCUDAMgr::getDeviceIDPencilY ( int  i,
int  j 
)

Definition at line 903 of file ComputePmeCUDAMgr.C.

References getDevicePencilY().

00903                                                       {
00904   int device = getDevicePencilY(i, j);
00905   return deviceProxy[device].ckLocalBranch()->getDeviceID();
00906 }

int ComputePmeCUDAMgr::getDeviceIDPencilZ ( int  i,
int  j 
)

Definition at line 911 of file ComputePmeCUDAMgr.C.

References getDevicePencilZ().

00911                                                       {
00912   int device = getDevicePencilZ(i, j);
00913   return deviceProxy[device].ckLocalBranch()->getDeviceID();
00914 }

int ComputePmeCUDAMgr::getDevicePencilY ( int  i,
int  j 
)

Definition at line 867 of file ComputePmeCUDAMgr.C.

References NAMD_bug(), PmeGrid::xBlocks, and PmeGrid::zBlocks.

Referenced by getDeviceIDPencilY().

00867                                                     {
00868   if (i < 0 || i >= pmeGrid.xBlocks || j < 0 || j >= pmeGrid.zBlocks)
00869     NAMD_bug("ComputePmeCUDAMgr::getDevicePencilY, pencil index out of bounds");
00870   for (int device=0;device < ijPencilY.size();device++) {
00871     if (ijPencilY[device].i == i && ijPencilY[device].j == j) return device;
00872   }
00873   char str[256];
00874   sprintf(str, "ComputePmeCUDAMgr::getDevicePencilY, no device found at i %d j %d",i,j);
00875   NAMD_bug(str);
00876   return -1;
00877 }

int ComputePmeCUDAMgr::getDevicePencilZ ( int  i,
int  j 
)

Definition at line 882 of file ComputePmeCUDAMgr.C.

References NAMD_bug(), PmeGrid::xBlocks, and PmeGrid::yBlocks.

Referenced by getDeviceIDPencilZ().

00882                                                     {
00883   if (i < 0 || i >= pmeGrid.xBlocks || j < 0 || j >= pmeGrid.yBlocks)
00884     NAMD_bug("ComputePmeCUDAMgr::getDevicePencilZ, pencil index out of bounds");
00885   for (int device=0;device < ijPencilZ.size();device++) {
00886     if (ijPencilZ[device].i == i && ijPencilZ[device].j == j) return device;
00887   }
00888   NAMD_bug("ComputePmeCUDAMgr::getDevicePencilZ, no device found");
00889   return -1;
00890 }

int ComputePmeCUDAMgr::getHomeNode ( PatchID  patchID  ) 

Definition at line 845 of file ComputePmeCUDAMgr.C.

References getHomePencil(), and getNode().

00845                                                   {
00846   int homey, homez;
00847   getHomePencil(patchID, homey, homez);
00848   return getNode(homey, homez);
00849 }

void ComputePmeCUDAMgr::getHomePencil ( PatchID  patchID,
int &  homey,
int &  homez 
)

Definition at line 238 of file ComputePmeCUDAMgr.C.

References getPencilDim(), PmeGrid::K2, PmeGrid::K3, PatchMap::max_b(), PatchMap::max_c(), PatchMap::min_b(), PatchMap::min_c(), NAMD_bug(), PatchMap::Object(), Perm_X_Y_Z, PmeGrid::yBlocks, and PmeGrid::zBlocks.

Referenced by getHomeNode().

00238                                                                              {
00239   PatchMap *patchMap = PatchMap::Object();
00240 
00241   BigReal miny = patchMap->min_b(patchID);
00242   BigReal maxy = patchMap->max_b(patchID);
00243 
00244   BigReal minz = patchMap->min_c(patchID);
00245   BigReal maxz = patchMap->max_c(patchID);
00246 
00247   // Determine home pencil = pencil with most overlap
00248   
00249   // Calculate patch grid coordinates
00250   int patch_y0 = floor((miny+0.5)*pmeGrid.K2);
00251   int patch_y1 = floor((maxy+0.5)*pmeGrid.K2)-1;
00252   int patch_z0 = floor((minz+0.5)*pmeGrid.K3);
00253   int patch_z1 = floor((maxz+0.5)*pmeGrid.K3)-1;
00254 
00255   if (patch_y0 < 0 || patch_y1 >= pmeGrid.K2 || patch_z0 < 0 || patch_z1 >= pmeGrid.K3) {
00256     NAMD_bug("ComputePmeCUDAMgr::getHomePencil, patch bounds are outside grid bounds");
00257   }
00258 
00259   int maxOverlap = 0;
00260   homey = -1;
00261   homez = -1;
00262   for (int iz=0;iz < pmeGrid.zBlocks;iz++) {
00263     for (int iy=0;iy < pmeGrid.yBlocks;iy++) {
00264       int pencil_x0, pencil_x1, pencil_y0, pencil_y1, pencil_z0, pencil_z1;
00265       getPencilDim(pmeGrid, Perm_X_Y_Z, iy, iz,
00266         pencil_x0, pencil_x1, pencil_y0, pencil_y1, pencil_z0, pencil_z1);
00267 
00268       if (pencil_y1 - pencil_y0 < pmeGrid.order || pencil_z1 - pencil_z0 < pmeGrid.order)
00269         NAMD_bug("ComputePmeCUDAMgr::getHomePencil, pencil size must be >= PMEInterpOrder");
00270 
00271       int y0 = std::max(patch_y0, pencil_y0);
00272       int y1 = std::min(patch_y1, pencil_y1);
00273       int z0 = std::max(patch_z0, pencil_z0);
00274       int z1 = std::min(patch_z1, pencil_z1);
00275 
00276       int overlap = (y1-y0 > 0 && z1-z0 > 0) ? (y1-y0)*(z1-z0) : -1;
00277 
00278       if (overlap > maxOverlap) {
00279         maxOverlap = overlap;
00280         homey = iy;
00281         homez = iz;
00282       }
00283     }
00284   }
00285 
00286   if (homey == -1 || homez == -1)
00287     NAMD_bug("ComputePmeCUDAMgr::getHomePencil, home pencil not found");
00288 }

int ComputePmeCUDAMgr::getNode ( int  i,
int  j 
)

Definition at line 835 of file ComputePmeCUDAMgr.C.

References NAMD_bug(), PmeGrid::yBlocks, and PmeGrid::zBlocks.

Referenced by getHomeNode().

00835                                            {
00836   if (i < 0 || i >= pmeGrid.yBlocks || j < 0 || j >= pmeGrid.zBlocks)
00837     NAMD_bug("ComputePmeCUDAMgr::getNode, pencil index out of bounds");
00838   int ind = i + j*pmeGrid.yBlocks;
00839   return nodeDeviceList[ind].node;
00840 }

PmeGrid ComputePmeCUDAMgr::getPmeGrid (  )  [inline]

Definition at line 440 of file ComputePmeCUDAMgr.h.

Referenced by ComputePmeCUDA::initialize().

00440 {return pmeGrid;}

void ComputePmeCUDAMgr::initialize ( CkQdMsg *  msg  ) 

Definition at line 598 of file ComputePmeCUDAMgr.C.

References endi(), iINFO(), iout, setupPencils(), PmeGrid::xBlocks, PmeGrid::yBlocks, and PmeGrid::zBlocks.

00598                                                {
00599         if (msg != NULL) delete msg;
00600 
00601   setupPencils();
00602 
00603   if ( ! CkMyNode() ) {
00604     iout << iINFO << "PME using " << pmeGrid.xBlocks << " x " <<
00605       pmeGrid.yBlocks << " x " << pmeGrid.zBlocks <<
00606       " pencil grid for FFT and reciprocal sum.\n" << endi;
00607   }
00608 
00609   // Initialize list that contains the number of home patches for each device on this manager
00610   numHomePatchesList.resize(numDevices, 0);
00611 
00612   //--------------------------------------------------------------------------
00613   // Create devices and atom filer
00614   // numDevices = number of devices we'll be using, possibly different on each node
00615   // Use root node to compute the maximum number of devices in use over all nodes
00616   thisProxy[0].initializeDevicesAndAtomFiler(new NumDevicesMsg(numDevices));
00617   //--------------------------------------------------------------------------
00618 
00619   if (CkMyNode() == 0) {
00620 
00621     if (pmePencilType == 3) {
00622                 // Single block => 3D FFT
00623       CProxy_PmePencilXYZMap xyzMap = CProxy_PmePencilXYZMap::ckNew(xPes[0]);
00624       CkArrayOptions xyzOpts(1);
00625       xyzOpts.setMap(xyzMap);
00626       xyzOpts.setAnytimeMigration(false);
00627       xyzOpts.setStaticInsertion(true);
00628       pmePencilXYZ = CProxy_CudaPmePencilXYZ::ckNew(xyzOpts);
00629       pmePencilXYZ[0].initialize(new CudaPmeXYZInitMsg(pmeGrid));
00630       thisProxy.recvPencils(pmePencilXYZ);
00631     } else if (pmePencilType == 2) {
00632       // Blocks in all but y-dimension => 2D FFT
00633       CProxy_PmePencilXYMap xyMap = CProxy_PmePencilXYMap::ckNew(xPes);
00634       CProxy_PmePencilXMap zMap = CProxy_PmePencilXMap::ckNew(0, 1, pmeGrid.xBlocks, zPes);
00635       CkArrayOptions xyOpts(1, 1, pmeGrid.zBlocks);
00636       CkArrayOptions zOpts(pmeGrid.xBlocks, 1, 1);
00637       xyOpts.setMap(xyMap);
00638       zOpts.setMap(zMap);
00639       xyOpts.setAnytimeMigration(false);
00640       zOpts.setAnytimeMigration(false);
00641       xyOpts.setStaticInsertion(true);
00642       zOpts.setStaticInsertion(true);
00643       pmePencilXY = CProxy_CudaPmePencilXY::ckNew(xyOpts);
00644       pmePencilZ = CProxy_CudaPmePencilZ::ckNew(zOpts);
00645       // Send pencil proxies to other nodes
00646       thisProxy.recvPencils(pmePencilXY, pmePencilZ);
00647       pmePencilXY.initialize(new CudaPmeXYInitMsg(pmeGrid, pmePencilXY, pmePencilZ, xyMap, zMap));
00648       pmePencilZ.initialize(new CudaPmeXYInitMsg(pmeGrid, pmePencilXY, pmePencilZ, xyMap, zMap));
00649     } else {
00650                 // Blocks in all dimensions => 1D FFT
00651       CProxy_PmePencilXMap xMap = CProxy_PmePencilXMap::ckNew(1, 2, pmeGrid.yBlocks, xPes);
00652       CProxy_PmePencilXMap yMap = CProxy_PmePencilXMap::ckNew(0, 2, pmeGrid.xBlocks, yPes);
00653       CProxy_PmePencilXMap zMap = CProxy_PmePencilXMap::ckNew(0, 1, pmeGrid.xBlocks, zPes);
00654       CkArrayOptions xOpts(1, pmeGrid.yBlocks, pmeGrid.zBlocks);
00655       CkArrayOptions yOpts(pmeGrid.xBlocks, 1, pmeGrid.zBlocks);
00656                 CkArrayOptions zOpts(pmeGrid.xBlocks, pmeGrid.yBlocks, 1);
00657       xOpts.setMap(xMap);
00658       yOpts.setMap(yMap);
00659       zOpts.setMap(zMap);
00660       xOpts.setAnytimeMigration(false);
00661       yOpts.setAnytimeMigration(false);
00662       zOpts.setAnytimeMigration(false);
00663       xOpts.setStaticInsertion(true);
00664       yOpts.setStaticInsertion(true);
00665       zOpts.setStaticInsertion(true);
00666       pmePencilX = CProxy_CudaPmePencilX::ckNew(xOpts);
00667       pmePencilY = CProxy_CudaPmePencilY::ckNew(yOpts);
00668       pmePencilZ = CProxy_CudaPmePencilZ::ckNew(zOpts);
00669       // Send pencil proxies to other nodes
00670       thisProxy.recvPencils(pmePencilX, pmePencilY, pmePencilZ);
00671       pmePencilX.initialize(new CudaPmeXInitMsg(pmeGrid, pmePencilX, pmePencilY, pmePencilZ, xMap, yMap, zMap));
00672       pmePencilY.initialize(new CudaPmeXInitMsg(pmeGrid, pmePencilX, pmePencilY, pmePencilZ, xMap, yMap, zMap));
00673       pmePencilZ.initialize(new CudaPmeXInitMsg(pmeGrid, pmePencilX, pmePencilY, pmePencilZ, xMap, yMap, zMap));
00674     }
00675   }
00676 
00677 }

void ComputePmeCUDAMgr::initialize_pencils ( CkQdMsg *  msg  ) 

Definition at line 732 of file ComputePmeCUDAMgr.C.

References createStream(), cudaCheck, deviceCUDA, DeviceCUDA::getDeviceIDbyRank(), DeviceCUDA::getNumDevice(), j, and stream.

00732                                                        {
00733   if (msg != NULL) delete msg;
00734 
00735   int numDevicesTmp = deviceCUDA->getNumDevice();
00736 
00737   // Initialize device proxies for real-space interfacing
00738   for (int i=0;i < ijPencilX.size();i++) {
00739     // NOTE: i is here the device ID
00740     int deviceID = deviceCUDA->getDeviceIDbyRank(i % numDevicesTmp);
00741     deviceProxy[i].ckLocalBranch()->initialize(pmeGrid, ijPencilX[i].i, ijPencilX[i].j,
00742       deviceID, pmePencilType, thisProxy, pmeAtomFiler);
00743     if (pmePencilType == 1) {
00744       deviceProxy[i].ckLocalBranch()->setPencilProxy(pmePencilX);
00745     } else if (pmePencilType == 2) {
00746       deviceProxy[i].ckLocalBranch()->setPencilProxy(pmePencilXY);
00747     } else {
00748       deviceProxy[i].ckLocalBranch()->setPencilProxy(pmePencilXYZ);
00749     }
00750   }
00751 
00752   // Use above initialized device proxies for the PME pencils that interface with real-space
00753   for (int i=0;i < ijPencilX.size();i++) {
00754     if (pmePencilType == 1) {
00755       pmePencilX(0, ijPencilX[i].i, ijPencilX[i].j).initializeDevice(new InitDeviceMsg(deviceProxy[i]));
00756     } else if (pmePencilType == 2) {
00757       pmePencilXY(0, 0, ijPencilX[i].j).initializeDevice(new InitDeviceMsg(deviceProxy[i]));
00758     } else {
00759       pmePencilXYZ[0].initializeDevice(new InitDeviceMsg(deviceProxy[i]));
00760     }
00761   }
00762 
00763   // Create extra devices for Y and Z pencils if necessary
00764   int n = std::max(ijPencilY.size(), ijPencilZ.size());
00765   if (n > ijPencilX.size()) {
00766     int nextra = n - ijPencilX.size();
00767     extraDevices.resize(nextra);
00768     for (int i=0;i < nextra;i++) {
00769       extraDevices[i].deviceID = (i + ijPencilX.size())  % numDevicesTmp;
00770       cudaCheck(cudaSetDevice(extraDevices[i].deviceID));
00771       createStream(extraDevices[i].stream);
00772     }
00773   }
00774 
00775   // Initialize Y pencils
00776   for (int i=0;i < ijPencilY.size();i++) {
00777     int deviceID;
00778     cudaStream_t stream;
00779     if (i < ijPencilX.size()) {
00780       deviceID = deviceProxy[i].ckLocalBranch()->getDeviceID();
00781       stream   = deviceProxy[i].ckLocalBranch()->getStream();
00782     } else {
00783       deviceID = extraDevices[i-ijPencilX.size()].deviceID;
00784       stream   = extraDevices[i-ijPencilX.size()].stream;
00785     }
00786     pmePencilY(ijPencilY[i].i, 0, ijPencilY[i].j).initializeDevice(new InitDeviceMsg2(deviceID, stream, thisProxy));
00787   }
00788 
00789   // Initialize Z pencils
00790   for (int i=0;i < ijPencilZ.size();i++) {
00791     int deviceID;
00792     cudaStream_t stream;
00793     if (i < ijPencilX.size()) {
00794       deviceID = deviceProxy[i].ckLocalBranch()->getDeviceID();
00795       stream   = deviceProxy[i].ckLocalBranch()->getStream();
00796     } else {
00797       deviceID = extraDevices[i-ijPencilX.size()].deviceID;
00798       stream   = extraDevices[i-ijPencilX.size()].stream;
00799     }
00800     pmePencilZ(ijPencilZ[i].i, ijPencilZ[i].j, 0).initializeDevice(new InitDeviceMsg2(deviceID, stream, thisProxy));
00801   }
00802 
00803 }

bool ComputePmeCUDAMgr::isPmeDevice ( int  deviceID  ) 

Definition at line 585 of file ComputePmeCUDAMgr.C.

References deviceCUDA, DeviceCUDA::getDeviceIDbyRank(), and DeviceCUDA::getNumDevice().

00585                                                 {
00586   for (int i=0;i < nodeDeviceList.size();i++) {
00587     if (deviceCUDA->getDeviceIDbyRank(nodeDeviceList[i].device % deviceCUDA->getNumDevice()) == deviceID) {
00588       return true;
00589     }
00590   }
00591   return false;
00592 }

bool ComputePmeCUDAMgr::isPmeNode ( int  node  ) 

Definition at line 573 of file ComputePmeCUDAMgr.C.

00573                                           {
00574   for (int i=0;i < nodeDeviceList.size();i++) {
00575     if (nodeDeviceList[i].node == node) {
00576       return true;
00577     }
00578   }
00579   return false;
00580 }

bool ComputePmeCUDAMgr::isPmePe ( int  pe  ) 

Definition at line 563 of file ComputePmeCUDAMgr.C.

Referenced by CudaComputeNonbonded::assignPatches().

00563                                       {
00564   for (int i=0;i < xPes.size();i++) {
00565     if (pe == xPes[i]) return true;
00566   }
00567   return false;
00568 }

static ComputePmeCUDAMgr* ComputePmeCUDAMgr::Object (  )  [inline, static]

Definition at line 465 of file ComputePmeCUDAMgr.h.

Referenced by CudaComputeNonbonded::assignPatches().

00465                                      {
00466     CProxy_ComputePmeCUDAMgr mgrProxy(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
00467     return mgrProxy.ckLocalBranch();    
00468   }

void ComputePmeCUDAMgr::recvAtomFiler ( CProxy_PmeAtomFiler  filer  ) 

Definition at line 698 of file ComputePmeCUDAMgr.C.

00698                                                                {
00699   pmeAtomFiler = filer;
00700 }

void ComputePmeCUDAMgr::recvAtoms ( PmeAtomMsg msg  ) 

Definition at line 933 of file ComputePmeCUDAMgr.C.

References getDevice(), PmeAtomMsg::i, and PmeAtomMsg::j.

00933                                                  {
00934   int device = getDevice(msg->i, msg->j);
00935   deviceProxy[device].ckLocalBranch()->recvAtoms(msg);
00936 }

void ComputePmeCUDAMgr::recvDevices ( RecvDeviceMsg msg  ) 

Definition at line 702 of file ComputePmeCUDAMgr.C.

References RecvDeviceMsg::dev, NAMD_bug(), and RecvDeviceMsg::numDevicesMax.

00702                                                       {
00703   numDevicesMax = msg->numDevicesMax;
00704   if (numDevices > numDevicesMax)
00705     NAMD_bug("ComputePmeCUDAMgr::recvDevices, numDevices > numDevicesMax");
00706   deviceProxy.resize(numDevices);
00707   for (int i=0;i < numDevices;i++) {
00708     deviceProxy[i] = msg->dev[i];
00709   }
00710   delete msg;
00711 }

void ComputePmeCUDAMgr::recvPencils ( CProxy_CudaPmePencilX  x,
CProxy_CudaPmePencilY  y,
CProxy_CudaPmePencilZ  z 
)

Definition at line 722 of file ComputePmeCUDAMgr.C.

00722                                                                                                              {
00723   pmePencilX = x;
00724   pmePencilY = y;
00725   pmePencilZ = z;
00726 }

void ComputePmeCUDAMgr::recvPencils ( CProxy_CudaPmePencilXY  xy,
CProxy_CudaPmePencilZ  z 
)

Definition at line 717 of file ComputePmeCUDAMgr.C.

00717                                                                                       {
00718   pmePencilXY = xy;
00719   pmePencilZ = z;
00720 }

void ComputePmeCUDAMgr::recvPencils ( CProxy_CudaPmePencilXYZ  xyz  ) 

Definition at line 713 of file ComputePmeCUDAMgr.C.

00713                                                                {
00714   pmePencilXYZ = xyz;
00715 }

void ComputePmeCUDAMgr::recvSelfEnergy ( PmeSelfEnergyMsg msg  ) 

Definition at line 1715 of file ComputePmeCUDAMgr.C.

01715                                                             {
01716   // NOTE: msg is deleted in PmePencilXYZ / PmePencilX
01717   switch(pmePencilType) {
01718     case 1:
01719     pmePencilZ(0,0,0).recvSelfEnergy(msg);
01720     break;
01721     case 2:
01722     pmePencilZ(0,0,0).recvSelfEnergy(msg);
01723     break;
01724     case 3:
01725     pmePencilXYZ[0].recvSelfEnergy(msg);
01726     break;
01727   }  
01728 }

void ComputePmeCUDAMgr::setupPencils (  ) 

Definition at line 400 of file ComputePmeCUDAMgr.C.

References PmeGrid::block1, PmeGrid::block2, PmeGrid::block3, deviceCUDA, PmeGrid::dim2, PmeGrid::dim3, DeviceCUDA::getNumDevice(), if(), PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, NAMD_bug(), Node::Object(), PmeGrid::order, Node::simParameters, simParams, PmeGrid::xBlocks, PmeGrid::yBlocks, and PmeGrid::zBlocks.

Referenced by initialize().

00400                                      {
00401   SimParameters *simParams = Node::Object()->simParameters;
00402 
00403   pmeGrid.K1 = simParams->PMEGridSizeX;
00404   pmeGrid.K2 = simParams->PMEGridSizeY;
00405   pmeGrid.K3 = simParams->PMEGridSizeZ;
00406   pmeGrid.order = simParams->PMEInterpOrder;
00407   pmeGrid.dim2 = pmeGrid.K2;
00408   pmeGrid.dim3 = 2 * (pmeGrid.K3/2 + 1);
00409 
00410   // Count the total number of devices assuming all nodes have the same number as this node
00411   // NOTE: This should be changed in the future to support heterogeneous nodes!!!
00412   int numDevicesTmp = deviceCUDA->getNumDevice();
00413 
00414   int numDeviceTot = CkNumNodes() * numDevicesTmp;
00415   // Use approximately 1/4th of the devices for PME
00416   int numDeviceToUse = std::max(1, numDeviceTot/4);
00417 
00418   if (numDeviceToUse < 4) {
00419     // 2D Slab
00420     pmeGrid.yBlocks = 1;
00421     pmeGrid.xBlocks = pmeGrid.zBlocks = numDeviceToUse;
00422   } else {
00423     // 1D Pencil
00424     pmeGrid.yBlocks = (int)sqrt((double)numDeviceToUse);
00425     pmeGrid.zBlocks = numDeviceToUse/pmeGrid.yBlocks;
00426     pmeGrid.xBlocks = pmeGrid.zBlocks;
00427   }
00428 
00429   if ( simParams->PMEPencilsX > 0 ) pmeGrid.xBlocks = simParams->PMEPencilsX;
00430   if ( simParams->PMEPencilsY > 0 ) pmeGrid.yBlocks = simParams->PMEPencilsY;
00431   if ( simParams->PMEPencilsZ > 0 ) pmeGrid.zBlocks = simParams->PMEPencilsZ;
00432 
00433   // Restrict number of pencils to the maximum number
00434   restrictToMaxPMEPencils();
00435 
00436   // Fix pencil numbers if they don't make sense w.r.t. number of devices
00437   if (pmeGrid.yBlocks == 1) {
00438     // 2D Slab
00439     if (pmeGrid.xBlocks > numDeviceTot) pmeGrid.xBlocks = numDeviceTot;
00440     if (pmeGrid.zBlocks > numDeviceTot) pmeGrid.zBlocks = numDeviceTot;
00441   } else {
00442     // 1D Pencil
00443     if (pmeGrid.yBlocks*pmeGrid.zBlocks > numDeviceTot ||
00444       pmeGrid.xBlocks*pmeGrid.zBlocks > numDeviceTot ||
00445       pmeGrid.xBlocks*pmeGrid.yBlocks > numDeviceTot) {
00446       pmeGrid.yBlocks = std::min(pmeGrid.yBlocks, (int)sqrt((double)numDeviceTot));
00447       pmeGrid.zBlocks = std::min(pmeGrid.zBlocks, numDeviceTot/pmeGrid.yBlocks);
00448     }
00449     pmeGrid.xBlocks = std::min(pmeGrid.yBlocks, pmeGrid.zBlocks);
00450   }
00451 
00452   // Here (block1, block2, block3) define the size of charge grid pencil in each direction
00453   pmeGrid.block1 = ( pmeGrid.K1 + pmeGrid.xBlocks - 1 ) / pmeGrid.xBlocks;
00454   pmeGrid.block2 = ( pmeGrid.K2 + pmeGrid.yBlocks - 1 ) / pmeGrid.yBlocks;
00455   pmeGrid.block3 = ( pmeGrid.K3 + pmeGrid.zBlocks - 1 ) / pmeGrid.zBlocks;
00456 
00457   // Determine type of FFT
00458   if (pmeGrid.xBlocks == 1 && pmeGrid.yBlocks == 1 && pmeGrid.zBlocks == 1) {
00459     // Single block => 3D FFT
00460     pmePencilType = 3;
00461   } else if (pmeGrid.yBlocks == 1) {
00462     // Blocks in all but y-dimension => 2D FFT
00463     pmePencilType = 2;
00464   } else {
00465     // Blocks in all dimensions => 1D FFT
00466     pmePencilType = 1;
00467   }
00468 
00469   //--------------------------------------------------------------------------
00470   // Map pencils into Pes
00471   xPes.resize(pmeGrid.yBlocks*pmeGrid.zBlocks);
00472 
00473   if (pmePencilType == 1 || pmePencilType == 2) {
00474     zPes.resize(pmeGrid.xBlocks*pmeGrid.yBlocks);
00475   }
00476   if (pmePencilType == 1) {
00477     yPes.resize(pmeGrid.xBlocks*pmeGrid.zBlocks);
00478   }  
00479 
00480     // i % numDeviceTot                              = device index
00481     // (i % numDeviceTot)/deviceCUDA->getNumDevice() = node index
00482     // (i % CkNodeSize(node))                        = pe displacement
00483   for (int i=0;i < xPes.size();i++) {
00484     int node = (i % numDeviceTot)/numDevicesTmp;
00485     xPes[i] = CkNodeFirst(node) + (i + 0) % CkNodeSize(node);
00486   }
00487   for (int i=0;i < yPes.size();i++) {
00488     int node = (i % numDeviceTot)/numDevicesTmp;
00489     yPes[i] = CkNodeFirst(node) + (i + 0) % CkNodeSize(node);
00490   }
00491   for (int i=0;i < zPes.size();i++) {
00492     int node = (i % numDeviceTot)/numDevicesTmp;
00493     zPes[i] = CkNodeFirst(node) + (i + 0) % CkNodeSize(node);
00494   }
00495 
00496   // char peStr[256];
00497   // char *p = peStr;
00498   // p += sprintf(p, "%2d | xPes", CkMyPe());
00499   // for (int i=0;i < xPes.size();i++)
00500   //   p += sprintf(p, " %d", xPes[i]);
00501   // p += sprintf(p, " yPes");
00502   // for (int i=0;i < yPes.size();i++)
00503   //   p += sprintf(p, " %d", yPes[i]);
00504   // p += sprintf(p, " zPes");
00505   // for (int i=0;i < zPes.size();i++)
00506   //   p += sprintf(p, " %d", zPes[i]);
00507   // fprintf(stderr, "%s | %d %d\n",peStr, CkNodeFirst(CkMyNode()), CkNodeSize(CkMyNode()));
00508 
00509   //--------------------------------------------------------------------------
00510   // Build global node list for x-pencils
00511   nodeDeviceList.resize(xPes.size());
00512   numDevices = 0;
00513   for (int k=0;k < xPes.size();k++) {
00514     nodeDeviceList[k].node = CkNodeOf(xPes[k]);
00515     nodeDeviceList[k].device = -1;
00516     if (nodeDeviceList[k].node == CkMyNode()) {
00517       nodeDeviceList[k].device = numDevices++;
00518     }
00519   }
00520 
00521   ijPencilX.clear();
00522   ijPencilY.clear();
00523   ijPencilZ.clear();
00524 
00525   // Construct list of pencil coordinates (i,j) for each device held by this node
00526   for (int k=0;k < xPes.size();k++) {
00527     if (CkMyNode() == CkNodeOf(xPes[k])) {
00528       IJ ij;
00529       ij.i = k % pmeGrid.yBlocks;
00530       ij.j = k / pmeGrid.yBlocks;
00531       ijPencilX.push_back(ij);
00532     }
00533   }
00534   if (ijPencilX.size() != numDevices)
00535     NAMD_bug("ComputePmeCUDAMgr::setupPencils, error setting up x-pencils and devices");
00536 
00537   int numDevicesY = 0;
00538   for (int k=0;k < yPes.size();k++) {
00539     if (CkMyNode() == CkNodeOf(yPes[k])) {
00540       IJ ij;
00541       ij.i = k % pmeGrid.xBlocks;
00542       ij.j = k / pmeGrid.xBlocks;
00543       ijPencilY.push_back(ij);
00544       numDevicesY++;
00545     }
00546   }
00547 
00548   int numDevicesZ = 0;
00549   for (int k=0;k < zPes.size();k++) {
00550     if (CkMyNode() == CkNodeOf(zPes[k])) {
00551       IJ ij;
00552       ij.i = k % pmeGrid.xBlocks;
00553       ij.j = k / pmeGrid.xBlocks;
00554       ijPencilZ.push_back(ij);
00555       numDevicesZ++;
00556     }
00557   }  
00558 }

void ComputePmeCUDAMgr::skip (  ) 

Definition at line 919 of file ComputePmeCUDAMgr.C.

00919                              {
00920   switch(pmePencilType) {
00921     case 1:
00922     pmePencilZ.skip();
00923     break;
00924     case 2:
00925     pmePencilZ.skip();
00926     break;
00927     case 3:
00928     pmePencilXYZ[0].skip();
00929     break;
00930   }
00931 }


Member Data Documentation

ComputePmeCUDAMgr::ComputePmeCUDAMgr_SDAG_CODE

Definition at line 432 of file ComputePmeCUDAMgr.h.


The documentation for this class was generated from the following files:
Generated on Wed Sep 20 01:17:17 2017 for NAMD by  doxygen 1.4.7