NAMD
Classes | Public Member Functions | List of all members
ComputePmeCUDADevice Class Reference

#include <ComputePmeCUDAMgr.h>

Inheritance diagram for ComputePmeCUDADevice:

Public Member Functions

 ComputePmeCUDADevice ()
 
 ComputePmeCUDADevice (CkMigrateMessage *m)
 
 ~ComputePmeCUDADevice ()
 
void initialize (PmeGrid &pmeGrid_in, int pencilIndexY_in, int pencilIndexZ_in, int deviceID_in, int pmePencilType_in, CProxy_ComputePmeCUDAMgr mgrProxy_in, CProxy_PmeAtomFiler pmeAtomFiler_in)
 
int getDeviceID ()
 
cudaStream_t getStream ()
 
CProxy_ComputePmeCUDAMgr getMgrProxy ()
 
void setPencilProxy (CProxy_CudaPmePencilXYZ pmePencilXYZ_in)
 
void setPencilProxy (CProxy_CudaPmePencilXY pmePencilXY_in)
 
void setPencilProxy (CProxy_CudaPmePencilX pmePencilX_in)
 
void activate_pencils ()
 
void initializePatches (int numHomePatches_in)
 
void registerNeighbor ()
 
void recvAtoms (PmeAtomMsg *msg)
 
void sendAtomsToNeighbors ()
 
void sendAtomsToNeighbor (int y, int z, int atomIval)
 
void recvAtomsFromNeighbor (PmeAtomPencilMsg *msg)
 
void registerRecvAtomsFromNeighbor ()
 
void spreadCharge ()
 
void gatherForce ()
 
void gatherForceDone (unsigned int iGrid)
 
void sendForcesToNeighbors ()
 
void recvForcesFromNeighbor (PmeForcePencilMsg *msg)
 
void mergeForcesOnPatch (int homePatchIndex)
 
void sendForcesToPatch (PmeForceMsg *forceMsg)
 
void gatherForceDoneSubset (int first, int last)
 
bool isGridEnabled (unsigned int i) const
 

Detailed Description

Definition at line 420 of file ComputePmeCUDAMgr.h.

Constructor & Destructor Documentation

◆ ComputePmeCUDADevice() [1/2]

ComputePmeCUDADevice::ComputePmeCUDADevice ( )

Definition at line 974 of file ComputePmeCUDAMgr.C.

References DebugM, endi(), and NUM_GRID_MAX.

974  {
975  // __sdag_init();
976  numHomePatches = 0;
977 // forceCapacity = 0;
978 // force = NULL;
979  DebugM(4, "ComputePmeCUDADevice::ComputePmeCUDADevice\n" << endi);
980  for (unsigned int iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
981  pmeRealSpaceComputes[iGrid] = NULL;
982  enabledGrid[iGrid] = false;
983  forces[iGrid] = NULL;
984  forceCapacities[iGrid] = 0;
985  }
986 // pmeRealSpaceCompute = NULL;
987  streamCreated = false;
988  lock_numHomePatchesMerged = CmiCreateLock();
989  lock_numPencils = CmiCreateLock();
990  lock_numNeighborsRecv = CmiCreateLock();
991  lock_recvAtoms = CmiCreateLock();
992  numNeighborsExpected = 0;
993  numStrayAtoms = 0;
994  // Reset counters
995  numNeighborsRecv = 0;
996  numHomePatchesRecv = 0;
997  numHomePatchesMerged = 0;
998  atomI = 0;
999  forceI = 1;
1000 }
#define DebugM(x, y)
Definition: Debug.h:75
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
const unsigned int NUM_GRID_MAX
Definition: PmeSolverUtil.h:9

◆ ComputePmeCUDADevice() [2/2]

ComputePmeCUDADevice::ComputePmeCUDADevice ( CkMigrateMessage *  m)

Definition at line 1002 of file ComputePmeCUDAMgr.C.

References DebugM, endi(), and NUM_GRID_MAX.

1002  {
1003  // __sdag_init();
1004  numHomePatches = 0;
1005 // forceCapacity = 0;
1006 // force = NULL;
1007  DebugM(4, "ComputePmeCUDADevice::ComputePmeCUDADevice(CkMigrateMessage)\n" << endi);
1008  for (unsigned int iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
1009  pmeRealSpaceComputes[iGrid] = NULL;
1010  enabledGrid[iGrid] = false;
1011  forces[iGrid] = NULL;
1012  forceCapacities[iGrid] = 0;
1013  }
1014  streamCreated = false;
1015  lock_numHomePatchesMerged = CmiCreateLock();
1016  lock_numPencils = CmiCreateLock();
1017  lock_numNeighborsRecv = CmiCreateLock();
1018  lock_recvAtoms = CmiCreateLock();
1019  numNeighborsExpected = 0;
1020  numStrayAtoms = 0;
1021  // Reset counters
1022  numNeighborsRecv = 0;
1023  numHomePatchesRecv = 0;
1024  numHomePatchesMerged = 0;
1025  atomI = 0;
1026  forceI = 1;
1027 }
#define DebugM(x, y)
Definition: Debug.h:75
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
const unsigned int NUM_GRID_MAX
Definition: PmeSolverUtil.h:9

◆ ~ComputePmeCUDADevice()

ComputePmeCUDADevice::~ComputePmeCUDADevice ( )

Definition at line 1029 of file ComputePmeCUDAMgr.C.

References cudaCheck, and NUM_GRID_MAX.

1029  {
1030  if (streamCreated) {
1031  cudaCheck(cudaSetDevice(deviceID));
1032  cudaCheck(cudaStreamDestroy(stream));
1033  }
1034  for (int j=0;j < 2;j++)
1035  for (int i=0;i < pmeAtomStorage[j].size();i++) {
1036  if (pmeAtomStorageAllocatedHere[i]) delete pmeAtomStorage[j][i];
1037  }
1038 // if (force != NULL) deallocate_host<CudaForce>(&force);
1039  for (unsigned int iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
1040  if (pmeRealSpaceComputes[iGrid] != NULL) delete pmeRealSpaceComputes[iGrid];
1041  if (forces[iGrid] != NULL) deallocate_host<CudaForce>(&forces[iGrid]);
1042  enabledGrid[iGrid] = false;
1043  }
1044  CmiDestroyLock(lock_numHomePatchesMerged);
1045  CmiDestroyLock(lock_numPencils);
1046  CmiDestroyLock(lock_numNeighborsRecv);
1047  CmiDestroyLock(lock_recvAtoms);
1048 }
const unsigned int NUM_GRID_MAX
Definition: PmeSolverUtil.h:9
#define cudaCheck(stmt)
Definition: CudaUtils.h:233

Member Function Documentation

◆ activate_pencils()

void ComputePmeCUDADevice::activate_pencils ( )

Definition at line 1189 of file ComputePmeCUDAMgr.C.

References PmeStartMsg::dataGrid, PmeStartMsg::dataSizes, PmeStartMsg::enabledGrid, and NUM_GRID_MAX.

1189  {
1190  if (pmePencilType == 1) {
1191  PmeStartMsg* pmeStartXMsg = new PmeStartMsg();
1192  for (unsigned int iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
1193  if (enabledGrid[iGrid] == true) {
1194  pmeStartXMsg->dataGrid[iGrid] = pmeRealSpaceComputes[iGrid]->getData();
1195  pmeStartXMsg->dataSizes[iGrid] = pmeRealSpaceComputes[iGrid]->getDataSize();
1196  pmeStartXMsg->enabledGrid[iGrid] = true;
1197  } else {
1198  pmeStartXMsg->dataGrid[iGrid] = NULL;
1199  pmeStartXMsg->dataSizes[iGrid] = 0;
1200  pmeStartXMsg->enabledGrid[iGrid] = false;
1201  }
1202  }
1203  pmePencilX(0, pencilIndexY, pencilIndexZ).start(pmeStartXMsg);
1204  } else if (pmePencilType == 2) {
1205  PmeStartMsg* pmeStartXMsg = new PmeStartMsg();
1206  for (unsigned int iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
1207  if (enabledGrid[iGrid] == true) {
1208  pmeStartXMsg->dataGrid[iGrid] = pmeRealSpaceComputes[iGrid]->getData();
1209  pmeStartXMsg->dataSizes[iGrid] = pmeRealSpaceComputes[iGrid]->getDataSize();
1210  pmeStartXMsg->enabledGrid[iGrid] = true;
1211  } else {
1212  pmeStartXMsg->dataGrid[iGrid] = NULL;
1213  pmeStartXMsg->dataSizes[iGrid] = 0;
1214  pmeStartXMsg->enabledGrid[iGrid] = false;
1215  }
1216  }
1217  pmePencilXY(0, 0, pencilIndexZ).start(pmeStartXMsg);
1218  } else if (pmePencilType == 3) {
1219  PmeStartMsg* pmeStartMsg = new PmeStartMsg();
1220  for (unsigned int iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
1221  if (enabledGrid[iGrid] == true) {
1222  pmeStartMsg->dataGrid[iGrid] = pmeRealSpaceComputes[iGrid]->getData();
1223  pmeStartMsg->dataSizes[iGrid] = pmeRealSpaceComputes[iGrid]->getDataSize();
1224  pmeStartMsg->enabledGrid[iGrid] = true;
1225  } else {
1226  pmeStartMsg->dataGrid[iGrid] = NULL;
1227  pmeStartMsg->dataSizes[iGrid] = 0;
1228  pmeStartMsg->enabledGrid[iGrid] = false;
1229  }
1230  }
1231  pmePencilXYZ[0].start(pmeStartMsg);
1232  }
1233 }
const unsigned int NUM_GRID_MAX
Definition: PmeSolverUtil.h:9
std::array< int, NUM_GRID_MAX > dataSizes
Definition: PmeSolver.h:106
std::array< float *, NUM_GRID_MAX > dataGrid
Definition: PmeSolver.h:105
std::array< bool, NUM_GRID_MAX > enabledGrid
Definition: PmeSolver.h:107

◆ gatherForce()

void ComputePmeCUDADevice::gatherForce ( )

Definition at line 1770 of file ComputePmeCUDAMgr.C.

References CUDA_PME_SPREADCHARGE_EVENT, NUM_GRID_MAX, Node::Object(), Node::simParameters, and simParams.

1770  {
1771  traceUserBracketEvent(CUDA_PME_SPREADCHARGE_EVENT, beforeWalltime, CmiWallTimer());
1772  beforeWalltime = CmiWallTimer();
1773  // gather (i.e. un-grid) forces
1775 // if (simParameters->alchOn) {
1776 // pmeRealSpaceComputes[1]->gatherForce(lattice, force);
1777 // ((CudaPmeRealSpaceCompute*)(pmeRealSpaceComputes[1]))->gatherForceSetCallback(this);
1778 // } else {
1779  for (unsigned int iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
1780  if (enabledGrid[iGrid]) {
1781 // fprintf(stdout, "gatherForce at grid %u\n", iGrid);
1782  pmeRealSpaceComputes[iGrid]->gatherForce(lattice, forces[iGrid]);
1783  // Set callback that will call gatherForceDone() once gatherForce is done
1784  ((CudaPmeRealSpaceCompute*)(pmeRealSpaceComputes[iGrid]))->gatherForceSetCallback(this);
1785  }
1786  }
1787  // ((CudaPmeRealSpaceCompute*)pmeRealSpaceCompute)->waitGatherForceDone();
1788  // gatherForceDone();
1789 // }
1790 }
static Node * Object()
Definition: Node.h:86
SimParameters * simParameters
Definition: Node.h:181
#define CUDA_PME_SPREADCHARGE_EVENT
Definition: DeviceCUDA.h:27
const unsigned int NUM_GRID_MAX
Definition: PmeSolverUtil.h:9
#define simParams
Definition: Output.C:129

◆ gatherForceDone()

void ComputePmeCUDADevice::gatherForceDone ( unsigned int  iGrid)

Definition at line 1816 of file ComputePmeCUDAMgr.C.

References CUDA_PME_GATHERFORCE_EVENT, gatherForceDoneLoop(), NUM_GRID_MAX, Node::Object(), sendForcesToNeighbors(), Node::simParameters, and SimParameters::useCkLoop.

1816  {
1817  // CHC: prevent race condition when there are multiple pmeRealSpaceCompute objects
1818  forceReady[iGrid] = 1;
1819  bool all_force_ready = true;
1820 // fprintf(stdout, "gatherForceDone at grid %u\n", iGrid);
1821  // loop over forceReady to check if all forces are gathered
1822  for (unsigned int i = 0; i < NUM_GRID_MAX; ++i) {
1823  if (forceReady[i] == -1) continue;
1824  if (forceReady[i] == 0) all_force_ready = false;
1825  }
1826  if (all_force_ready) {
1827  for (unsigned int i = 0; i < NUM_GRID_MAX; ++i) {
1828  if (forceReady[i] == -1) continue;
1829  if (forceReady[i] == 1) forceReady[i] = 0;
1830  }
1831 // fprintf(stdout, "all force ready\n");
1832  // Primary pencil has the forces
1833 
1834  traceUserBracketEvent(CUDA_PME_GATHERFORCE_EVENT, beforeWalltime, CmiWallTimer());
1835 
1836  // Send forces to neighbors
1838 
1839 #if CMK_SMP && USE_CKLOOP
1840  int useCkLoop = Node::Object()->simParameters->useCkLoop;
1841  if (useCkLoop >= 1) {
1842  CkLoop_Parallelize(gatherForceDoneLoop, 1, (void *)this, CkMyNodeSize(), 0, numHomePatches-1);
1843  } else
1844 #endif
1845 
1846  {
1847  // Loop through home patches and mark the primary pencil as "done"
1848  for (int homePatchIndex=0;homePatchIndex < numHomePatches;homePatchIndex++) {
1849  bool done = false;
1850  // ----------------------------- lock start ---------------------------
1851  // NOTE: We use node-wide lock here for the entire numPencils[] array, while
1852  // we really would only need to each element but this would required
1853  // numHomePatches number of locks.
1854  if (pmePencilType != 3) CmiLock(lock_numPencils);
1855  numPencils[forceI][homePatchIndex]--;
1856  if (numPencils[forceI][homePatchIndex] == 0) done = true;
1857  if (pmePencilType != 3) CmiUnlock(lock_numPencils);
1858  // ----------------------------- lock end ---------------------------
1859  if (done) {
1860  // This home patch is done, launch force merging
1861  thisProxy[CkMyNode()].mergeForcesOnPatch(homePatchIndex);
1862  }
1863  }
1864  }
1865 
1866  // In case we have no home patches, clear the primary pencil storage here
1867  if (numHomePatches == 0) {
1868  int pp0 = 0-ylo + (0-zlo)*yNBlocks;
1869  pmeAtomStorage[forceI][pp0]->clear();
1870  }
1871  }
1872 }
static Node * Object()
Definition: Node.h:86
static void gatherForceDoneLoop(int first, int last, void *result, int paraNum, void *param)
SimParameters * simParameters
Definition: Node.h:181
const unsigned int NUM_GRID_MAX
Definition: PmeSolverUtil.h:9
#define CUDA_PME_GATHERFORCE_EVENT
Definition: DeviceCUDA.h:28

◆ gatherForceDoneSubset()

void ComputePmeCUDADevice::gatherForceDoneSubset ( int  first,
int  last 
)

Definition at line 1797 of file ComputePmeCUDAMgr.C.

References mergeForcesOnPatch().

Referenced by gatherForceDoneLoop().

1797  {
1798  for (int homePatchIndex=first;homePatchIndex <= last;homePatchIndex++) {
1799  bool done = false;
1800  // ----------------------------- lock start ---------------------------
1801  // NOTE: We use node-wide lock here for the entire numPencils[] array, while
1802  // we really would only need to each element but this would required
1803  // numHomePatches number of locks.
1804  if (pmePencilType != 3) CmiLock(lock_numPencils);
1805  numPencils[forceI][homePatchIndex]--;
1806  if (numPencils[forceI][homePatchIndex] == 0) done = true;
1807  if (pmePencilType != 3) CmiUnlock(lock_numPencils);
1808  // ----------------------------- lock end ---------------------------
1809  if (done) {
1810  // This home patch is done, launch force merging
1811  mergeForcesOnPatch(homePatchIndex);
1812  }
1813  }
1814 }
void mergeForcesOnPatch(int homePatchIndex)

◆ getDeviceID()

int ComputePmeCUDADevice::getDeviceID ( )

Definition at line 1159 of file ComputePmeCUDAMgr.C.

1159  {
1160  return deviceID;
1161 }

◆ getMgrProxy()

CProxy_ComputePmeCUDAMgr ComputePmeCUDADevice::getMgrProxy ( )

Definition at line 1163 of file ComputePmeCUDAMgr.C.

1163  {
1164  return mgrProxy;
1165 }

◆ getStream()

cudaStream_t ComputePmeCUDADevice::getStream ( )

Definition at line 1155 of file ComputePmeCUDAMgr.C.

1155  {
1156  return stream;
1157 }

◆ initialize()

void ComputePmeCUDADevice::initialize ( PmeGrid pmeGrid_in,
int  pencilIndexY_in,
int  pencilIndexZ_in,
int  deviceID_in,
int  pmePencilType_in,
CProxy_ComputePmeCUDAMgr  mgrProxy_in,
CProxy_PmeAtomFiler  pmeAtomFiler_in 
)

Definition at line 1050 of file ComputePmeCUDAMgr.C.

References createStream(), cudaCheck, DebugM, endi(), NUM_GRID_MAX, Node::Object(), Node::simParameters, simParams, PmeGrid::yBlocks, and PmeGrid::zBlocks.

1052  {
1053 
1054  deviceID = deviceID_in;
1055  DebugM(4, "ComputePmeCUDADevice::initialize deviceID "<< deviceID <<"\n"<< endi);
1056  cudaCheck(cudaSetDevice(deviceID));
1057  pmePencilType = pmePencilType_in;
1058  pmeGrid = pmeGrid_in;
1059 #ifdef DEBUGM
1060  // pmeGrid.print();
1061 #endif
1062  pencilIndexY = pencilIndexY_in;
1063  pencilIndexZ = pencilIndexZ_in;
1064  mgrProxy = mgrProxy_in;
1065  pmeAtomFiler = pmeAtomFiler_in;
1066  // Size of the neighboring pencil grid, max 3x3
1067  yNBlocks = std::min(pmeGrid.yBlocks, 3);
1068  zNBlocks = std::min(pmeGrid.zBlocks, 3);
1069  // Local pencil is at y=0,z=0
1070  if (yNBlocks == 1) {
1071  ylo = 0;
1072  yhi = 0;
1073  } else if (yNBlocks == 2) {
1074  ylo = -1;
1075  yhi = 0;
1076  } else {
1077  ylo = -1;
1078  yhi = 1;
1079  }
1080  if (zNBlocks == 1) {
1081  zlo = 0;
1082  zhi = 0;
1083  } else if (zNBlocks == 2) {
1084  zlo = -1;
1085  zhi = 0;
1086  } else {
1087  zlo = -1;
1088  zhi = 1;
1089  }
1090 
1091  neighborForcePencilMsgs.resize(yNBlocks*zNBlocks, NULL);
1092  // neighborForcePencils.resize(yNBlocks*zNBlocks);
1093  for (int j=0;j < 2;j++)
1094  homePatchIndexList[j].resize(yNBlocks*zNBlocks);
1095  neighborPatchIndex.resize(yNBlocks*zNBlocks);
1096 
1097  pmeAtomStorageAllocatedHere.resize(yNBlocks*zNBlocks, false);
1099  for (int j=0;j < 2;j++) {
1100  pmeAtomStorage[j].resize(yNBlocks*zNBlocks, NULL);
1101  for (int z=zlo;z <= zhi;z++) {
1102  for (int y=ylo;y <= yhi;y++) {
1103  int pp = y-ylo + (z-zlo)*yNBlocks;
1104  int yt = (pencilIndexY + y + pmeGrid.yBlocks) % pmeGrid.yBlocks;
1105  int zt = (pencilIndexZ + z + pmeGrid.zBlocks) % pmeGrid.zBlocks;
1106  if (y == 0 && z == 0) {
1107  // Primary pencil
1108  pmeAtomStorage[j][pp] = new CudaPmeAtomStorage(pmePencilType != 3);
1109  pmeAtomStorage[j][pp]->setupAlch(*simParams);
1110  } else {
1111  pmeAtomStorage[j][pp] = new CpuPmeAtomStorage(pmePencilType != 3);
1112  pmeAtomStorage[j][pp]->setupAlch(*simParams);
1113  }
1114  pmeAtomStorageAllocatedHere[pp] = true;
1115  }
1116  }
1117  }
1118 
1119  // Create stream for this device
1120  createStream(stream);
1121  streamCreated = true;
1122  // CHC: enable at least 1 grid
1123  // CHC: do we need a different stream?
1124  pmeRealSpaceComputes[0] = new CudaPmeRealSpaceCompute(pmeGrid, pencilIndexY, pencilIndexZ, deviceID, stream);
1125  pmeRealSpaceComputes[0]->setGrid(0);
1126  enabledGrid[0] = true;
1127  if (simParams->alchOn) {
1128  pmeRealSpaceComputes[1] = new CudaPmeRealSpaceCompute(pmeGrid, pencilIndexY, pencilIndexZ, deviceID, stream);
1129  pmeRealSpaceComputes[1]->setGrid(1);
1130  // at least two grids are required for alchemical transformation
1131  enabledGrid[1] = true;
1132  if (simParams->alchDecouple) {
1133  pmeRealSpaceComputes[2] = new CudaPmeRealSpaceCompute(pmeGrid, pencilIndexY, pencilIndexZ, deviceID, stream);
1134  pmeRealSpaceComputes[2]->setGrid(2);
1135  enabledGrid[2] = true;
1136  pmeRealSpaceComputes[3] = new CudaPmeRealSpaceCompute(pmeGrid, pencilIndexY, pencilIndexZ, deviceID, stream);
1137  pmeRealSpaceComputes[3]->setGrid(3);
1138  enabledGrid[3] = true;
1139  }
1140  if (simParams->alchElecLambdaStart || simParams->alchThermIntOn) {
1141  pmeRealSpaceComputes[4] = new CudaPmeRealSpaceCompute(pmeGrid, pencilIndexY, pencilIndexZ, deviceID, stream);
1142  pmeRealSpaceComputes[4]->setGrid(4);
1143  enabledGrid[4] = true;
1144  }
1145  }
1146  for (unsigned int iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
1147  if (enabledGrid[iGrid]) {
1148  forceReady[iGrid] = 0;
1149  } else {
1150  forceReady[iGrid] = -1;
1151  }
1152  }
1153 }
static Node * Object()
Definition: Node.h:86
int zBlocks
Definition: PmeBase.h:25
SimParameters * simParameters
Definition: Node.h:181
#define DebugM(x, y)
Definition: Debug.h:75
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
const unsigned int NUM_GRID_MAX
Definition: PmeSolverUtil.h:9
int yBlocks
Definition: PmeBase.h:25
void createStream(cudaStream_t &stream)
#define simParams
Definition: Output.C:129
#define cudaCheck(stmt)
Definition: CudaUtils.h:233

◆ initializePatches()

void ComputePmeCUDADevice::initializePatches ( int  numHomePatches_in)

Definition at line 1235 of file ComputePmeCUDAMgr.C.

References PmeGrid::yBlocks, and PmeGrid::zBlocks.

1235  {
1236  numHomePatches = numHomePatches_in;
1237  for (int j=0;j < 2;j++)
1238  numPencils[j].resize(numHomePatches);
1239  for (int j=0;j < 2;j++)
1240  plList[j].resize(numHomePatches);
1241  for (int j=0;j < 2;j++)
1242  homePatchForceMsgs[j].resize(numHomePatches);
1243  // for (int j=0;j < 2;j++)
1244  // numHomeAtoms[j].resize(numHomePatches);
1245  // If we have home patches, register this pencil with the neighbors and with self
1246  if (numHomePatches > 0) {
1247  for (int z=zlo;z <= zhi;z++) {
1248  for (int y=ylo;y <= yhi;y++) {
1249  int yt = (pencilIndexY + y + pmeGrid.yBlocks) % pmeGrid.yBlocks;
1250  int zt = (pencilIndexZ + z + pmeGrid.zBlocks) % pmeGrid.zBlocks;
1251  int node = mgrProxy.ckLocalBranch()->getNode(yt, zt);
1252  mgrProxy[node].registerNeighbor(yt, zt);
1253  }
1254  }
1255  }
1256 }
int zBlocks
Definition: PmeBase.h:25
int yBlocks
Definition: PmeBase.h:25

◆ isGridEnabled()

bool ComputePmeCUDADevice::isGridEnabled ( unsigned int  i) const

Definition at line 1167 of file ComputePmeCUDAMgr.C.

1167  {
1168  return enabledGrid[i];
1169 }

◆ mergeForcesOnPatch()

void ComputePmeCUDADevice::mergeForcesOnPatch ( int  homePatchIndex)

Definition at line 1982 of file ComputePmeCUDAMgr.C.

References Node::Object(), sendForcesToPatch(), Node::simParameters, simParams, CudaForce::x, CudaForce::y, and CudaForce::z.

Referenced by gatherForceDoneSubset().

1982  {
1983  // We have all the forces for this patch => merge on a single Pe
1984 
1985  int pp0 = 0-ylo + (0-zlo)*yNBlocks;
1986 
1987  // Message that goes out to the compute
1988  PmeForceMsg *forceMsg = homePatchForceMsgs[forceI][homePatchIndex];
1989 
1991  if (pmePencilType == 3) {
1992  // 3D box => simple memory copy will do
1993  // Location of forces in the force[] array
1994  int* patchPos = pmeAtomStorage[forceI][pp0]->getPatchPos();
1995  // plList[homePatchIndex] array tells you the location of pencils that are sharing this home patch
1996  int pencilPatchIndex = plList[forceI][homePatchIndex][0].pencilPatchIndex;
1997  int atomStart = (pencilPatchIndex == 0) ? 0 : patchPos[pencilPatchIndex-1];
1998  int atomEnd = patchPos[pencilPatchIndex];
1999  int numAtoms = atomEnd-atomStart;
2000  if (forceMsg->zeroCopy) {
2001  // Zero-copy, just pass the pointer
2002  forceMsg->force = forces[0]+atomStart;
2003  if (simParams->alchOn) {
2004  forceMsg->force2 = forces[1]+atomStart;
2005  if (simParams->alchDecouple) {
2006  forceMsg->force3 = forces[2]+atomStart;
2007  forceMsg->force4 = forces[3]+atomStart;
2008  }
2009  if (bool(simParams->alchElecLambdaStart) == true || simParams->alchThermIntOn) {
2010  forceMsg->force5 = forces[4]+atomStart;
2011  }
2012  }
2013  } else {
2014  memcpy(forceMsg->force, forces[0]+atomStart, numAtoms*sizeof(CudaForce));
2015  if (simParams->alchOn) {
2016  memcpy(forceMsg->force2, forces[1]+atomStart, numAtoms*sizeof(CudaForce));
2017  if (simParams->alchDecouple) {
2018  memcpy(forceMsg->force3, forces[2]+atomStart, numAtoms*sizeof(CudaForce));
2019  memcpy(forceMsg->force4, forces[3]+atomStart, numAtoms*sizeof(CudaForce));
2020  }
2021  if (bool(simParams->alchElecLambdaStart) == true || simParams->alchThermIntOn) {
2022  memcpy(forceMsg->force5, forces[4]+atomStart, numAtoms*sizeof(CudaForce));
2023  }
2024  }
2025  }
2026  } else {
2027 
2028  // Zero force array
2029  // memset(forceMsg->force, 0, numHomeAtoms[forceI][homePatchIndex]*sizeof(CudaForce));
2030  memset(forceMsg->force, 0, forceMsg->numAtoms*sizeof(CudaForce));
2031  if (simParams->alchOn) {
2032  memset(forceMsg->force2, 0, forceMsg->numAtoms*sizeof(CudaForce));
2033  if (simParams->alchDecouple) {
2034  memset(forceMsg->force3, 0, forceMsg->numAtoms*sizeof(CudaForce));
2035  memset(forceMsg->force4, 0, forceMsg->numAtoms*sizeof(CudaForce));
2036  }
2037  if (bool(simParams->alchElecLambdaStart) == true || simParams->alchThermIntOn) {
2038  memset(forceMsg->force5, 0, forceMsg->numAtoms*sizeof(CudaForce));
2039  }
2040  }
2041 
2042  // Store forces from primary pencil
2043  {
2044  int* patchPos = pmeAtomStorage[forceI][pp0]->getPatchPos();
2045  int* index = pmeAtomStorage[forceI][pp0]->getAtomIndex();
2046  int pencilPatchIndex = plList[forceI][homePatchIndex][0].pencilPatchIndex;
2047  int atomStart = (pencilPatchIndex == 0) ? 0 : patchPos[pencilPatchIndex-1];
2048  int atomEnd = patchPos[pencilPatchIndex];
2049  int numAtoms = atomEnd-atomStart;
2050 
2051  // Copy in local forces that are stored in the force[] array
2052  for (int i=0;i < numAtoms;i++) {
2053  forceMsg->force[index[atomStart + i]] = forces[0][atomStart + i];
2054  if (simParams->alchOn) {
2055  forceMsg->force2[index[atomStart + i]] = forces[1][atomStart + i];
2056  if (simParams->alchDecouple) {
2057  forceMsg->force3[index[atomStart + i]] = forces[2][atomStart + i];
2058  forceMsg->force4[index[atomStart + i]] = forces[3][atomStart + i];
2059  }
2060  if (bool(simParams->alchElecLambdaStart) == true || simParams->alchThermIntOn) {
2061  forceMsg->force5[index[atomStart + i]] = forces[4][atomStart + i];
2062  }
2063  }
2064  }
2065 
2066  }
2067 
2068  // Add forces from neighboring pencils
2069  for (int j=1;j < plList[forceI][homePatchIndex].size();j++) {
2070  int pp = plList[forceI][homePatchIndex][j].pp;
2071  int pencilPatchIndex = plList[forceI][homePatchIndex][j].pencilPatchIndex;
2072 
2073  int* patchPos = pmeAtomStorage[forceI][pp]->getPatchPos();
2074  int* index = pmeAtomStorage[forceI][pp]->getAtomIndex();
2075  int atomStart = (pencilPatchIndex == 0) ? 0 : patchPos[pencilPatchIndex-1];
2076  int atomEnd = patchPos[pencilPatchIndex];
2077  int numAtoms = atomEnd-atomStart;
2078  CudaForce *dstForce = forceMsg->force;
2079  // CudaForce *srcForce = neighborForcePencils[pp].force;
2080  CudaForce *dstForce2 = forceMsg->force2;
2081  CudaForce *dstForce3 = forceMsg->force3;
2082  CudaForce *dstForce4 = forceMsg->force4;
2083  CudaForce *dstForce5 = forceMsg->force5;
2084  CudaForce *srcForce = neighborForcePencilMsgs[pp]->force;
2085  CudaForce *srcForce2 = neighborForcePencilMsgs[pp]->force2;
2086  CudaForce *srcForce3 = neighborForcePencilMsgs[pp]->force3;
2087  CudaForce *srcForce4 = neighborForcePencilMsgs[pp]->force4;
2088  CudaForce *srcForce5 = neighborForcePencilMsgs[pp]->force5;
2089 
2090  for (int i=0;i < numAtoms;i++) {
2091  dstForce[index[atomStart + i]].x += srcForce[atomStart + i].x;
2092  dstForce[index[atomStart + i]].y += srcForce[atomStart + i].y;
2093  dstForce[index[atomStart + i]].z += srcForce[atomStart + i].z;
2094  if (simParams->alchOn) {
2095  dstForce2[index[atomStart + i]].x += srcForce2[atomStart + i].x;
2096  dstForce2[index[atomStart + i]].y += srcForce2[atomStart + i].y;
2097  dstForce2[index[atomStart + i]].z += srcForce2[atomStart + i].z;
2098  if (simParams->alchDecouple) {
2099  dstForce3[index[atomStart + i]].x += srcForce3[atomStart + i].x;
2100  dstForce3[index[atomStart + i]].y += srcForce3[atomStart + i].y;
2101  dstForce3[index[atomStart + i]].z += srcForce3[atomStart + i].z;
2102  dstForce4[index[atomStart + i]].x += srcForce4[atomStart + i].x;
2103  dstForce4[index[atomStart + i]].y += srcForce4[atomStart + i].y;
2104  dstForce4[index[atomStart + i]].z += srcForce4[atomStart + i].z;
2105  }
2106  if (bool(simParams->alchElecLambdaStart) == true || simParams->alchThermIntOn) {
2107  dstForce5[index[atomStart + i]].x += srcForce5[atomStart + i].x;
2108  dstForce5[index[atomStart + i]].y += srcForce5[atomStart + i].y;
2109  dstForce5[index[atomStart + i]].z += srcForce5[atomStart + i].z;
2110  }
2111  }
2112  }
2113 
2114  }
2115  }
2116 
2117  // Clear storage
2118  plList[forceI][homePatchIndex].clear();
2119 
2120  // ----------------------------- lock start ---------------------------
2121  // bool done = false;
2122  CmiLock(lock_numHomePatchesMerged);
2123  numHomePatchesMerged++;
2124  if (numHomePatchesMerged == numHomePatches) {
2125  // Reset counter
2126  numHomePatchesMerged = 0;
2127 
2128  // Delete messages
2129  for (int i=0;i < neighborForcePencilMsgs.size();i++) {
2130  if (neighborForcePencilMsgs[i] != NULL) {
2131  delete neighborForcePencilMsgs[i];
2132  neighborForcePencilMsgs[i] = NULL;
2133  }
2134  }
2135 
2136  // Done merging and sending forces => clear storage
2137  for (int pp=0;pp < homePatchIndexList[forceI].size();pp++)
2138  homePatchIndexList[forceI][pp].clear();
2139  for (int pp=0;pp < pmeAtomStorage[forceI].size();pp++)
2140  pmeAtomStorage[forceI][pp]->clear();
2141 
2142  }
2143  CmiUnlock(lock_numHomePatchesMerged);
2144  // ----------------------------- lock end ---------------------------
2145 
2146  // Patch is done => send over to the node that contains the ComputePmeCUDA compute,
2147  // this node will then rely the message to the Pe that originally sent the atoms
2148  int pe = forceMsg->pe;
2149  if (CkNodeOf(pe) != CkMyNode())
2150  thisProxy[CkNodeOf(pe)].sendForcesToPatch(forceMsg);
2151  else
2152  sendForcesToPatch(forceMsg);
2153 
2154 }
static Node * Object()
Definition: Node.h:86
void sendForcesToPatch(PmeForceMsg *forceMsg)
SimParameters * simParameters
Definition: Node.h:181
float x
Definition: CudaRecord.h:63
float z
Definition: CudaRecord.h:63
#define simParams
Definition: Output.C:129
float y
Definition: CudaRecord.h:63

◆ recvAtoms()

void ComputePmeCUDADevice::recvAtoms ( PmeAtomMsg msg)

Definition at line 1267 of file ComputePmeCUDAMgr.C.

References PmeAtomMsg::atoms, PmeAtomMsg::compute, PmeForceMsg::compute, PmeAtomMsg::doEnergy, PmeAtomMsg::doVirial, PmeAtomFiler::fileAtoms(), PmeAtomFiler::getAtomIndex(), PmeAtomFiler::getNumAtoms(), PmeAtomMsg::lattice, NAMD_bug(), PmeAtomMsg::numAtoms, PmeForceMsg::numAtoms, PmeForceMsg::numStrayAtoms, Node::Object(), PmeAtomMsg::pe, PmeForceMsg::pe, PRIORITY_SIZE, sendAtomsToNeighbors(), Node::simParameters, simParams, PmeAtomMsg::simulationStep, CudaAtom::x, CudaAtom::y, PmeGrid::yBlocks, CudaAtom::z, PmeGrid::zBlocks, and PmeForceMsg::zeroCopy.

1267  {
1268 
1269  PmeAtomFiler *pmeAtomFilerPtr = pmeAtomFiler[CkMyPe()].ckLocalBranch();
1270  // Store "virial" and "energy" flags
1271  doVirial = msg->doVirial;
1272  doEnergy = msg->doEnergy;
1273  simulationStep = msg->simulationStep;
1274  // Store lattice
1276  lattice = msg->lattice;
1277 
1278  // Primary pencil index
1279  int pp0 = 0-ylo + (0-zlo)*yNBlocks;
1280  int p0 = 0;
1281  int pencilPatchIndex[9];
1282  int numStrayAtomsPatch = 0;
1283  if (pmePencilType == 3) {
1284  // 3D box => store atoms directly without index
1285  // NOTE: We don't check for stray atoms here!
1286  if (simParams->alchOn) {
1287  if (simParams->alchFepOn && !simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == false)) {
1288  // only FEP, no alchDecouple and alchElecLambdaStart == 0, use 2 grids
1289  pencilPatchIndex[p0] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2});
1290  }
1291  if (simParams->alchFepOn && simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == false)) {
1292  // FEP with alchDecouple, use 4 grids
1293  pencilPatchIndex[p0] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, msg->chargeFactors3, msg->chargeFactors4});
1294  }
1295  if (simParams->alchFepOn && simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == true)) {
1296  // FEP with alchDecouple and alchElecLambdaStart > 0, use 5 grids
1297  pencilPatchIndex[p0] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, msg->chargeFactors3, msg->chargeFactors4, msg->chargeFactors5});
1298  }
1299  if (simParams->alchFepOn && !simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == true)) {
1300  // FEP without alchDecouple and alchElecLambdaStart > 0, use 3 grids (1,2,5)
1301  pencilPatchIndex[p0] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, NULL, NULL, msg->chargeFactors5});
1302  }
1303  if (simParams->alchThermIntOn && !simParams->alchDecouple) {
1304  // TI estimator without alchDecouple, use 3 grids (1,2,5)
1305  pencilPatchIndex[p0] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, NULL, NULL, msg->chargeFactors5});
1306  }
1307  if (simParams->alchThermIntOn && simParams->alchDecouple) {
1308  // TI estimator with alchDecouple, use all grids
1309  pencilPatchIndex[p0] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, msg->chargeFactors3, msg->chargeFactors4, msg->chargeFactors5});
1310  }
1311  } else {
1312  // no alchemistry
1313  pencilPatchIndex[p0] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{});
1314  }
1315  } else {
1316 
1317  // File atoms
1318  pmeAtomFilerPtr->fileAtoms(msg->numAtoms, msg->atoms, lattice, pmeGrid,
1319  pencilIndexY, pencilIndexZ, ylo, yhi, zlo, zhi);
1320 
1321  // Loop through pencils and add atoms to pencil atom lists
1322  // NOTE: we only store to neighboring pencil if there are atoms to store
1323  int numAtomsCheck = 0;
1324  for (int p=0;p < 9;p++) {
1325 
1326  int y = (p % 3);
1327  int z = (p / 3);
1328 
1329  int pp = y + z*yNBlocks;
1330  int numAtoms = pmeAtomFilerPtr->getNumAtoms(p);
1331  if (pp == pp0) p0 = p;
1332  if (pp == pp0 || numAtoms > 0) {
1333  if (pmeGrid.yBlocks == 1 && pmeGrid.zBlocks == 1 && (y != 0 || z != 0))
1334  NAMD_bug("ComputePmeCUDADevice::recvAtoms, problem with atom filing");
1335  int* index = pmeAtomFilerPtr->getAtomIndex(p);
1336  if (simParams->alchOn) {
1337  if (simParams->alchFepOn && !simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == false)) {
1338  // only FEP, no alchDecouple and alchElecLambdaStart == 0, use 2 grids
1339  pencilPatchIndex[p] = pmeAtomStorage[atomI][pp]->addAtomsWithIndex(numAtoms, msg->atoms, index, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2});
1340  }
1341  if (simParams->alchFepOn && simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == false)) {
1342  // FEP with alchDecouple, use 4 grids
1343  pencilPatchIndex[p] = pmeAtomStorage[atomI][pp]->addAtomsWithIndex(numAtoms, msg->atoms, index, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, msg->chargeFactors3, msg->chargeFactors4});
1344  }
1345  if (simParams->alchFepOn && !simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == true)) {
1346  // FEP without alchDecouple and alchElecLambdaStart > 0, use 3 grids (1,2,5)
1347  pencilPatchIndex[p] = pmeAtomStorage[atomI][pp]->addAtomsWithIndex(numAtoms, msg->atoms, index, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, NULL, NULL, msg->chargeFactors5});
1348  }
1349  if (simParams->alchFepOn && simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == true)) {
1350  // FEP with alchDecouple and alchElecLambdaStart > 0, use 5 grids
1351  pencilPatchIndex[p] = pmeAtomStorage[atomI][pp]->addAtomsWithIndex(numAtoms, msg->atoms, index, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, msg->chargeFactors3, msg->chargeFactors4, msg->chargeFactors5});
1352  }
1353  if (simParams->alchThermIntOn && !simParams->alchDecouple) {
1354  // TI estimator without alchDecouple, use 3 grids (1,2,5)
1355  pencilPatchIndex[p] = pmeAtomStorage[atomI][pp]->addAtomsWithIndex(numAtoms, msg->atoms, index, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, NULL, NULL, msg->chargeFactors5});
1356  }
1357  if (simParams->alchThermIntOn && simParams->alchDecouple) {
1358  // TI estimator with alchDecouple, use all grids
1359  pencilPatchIndex[p] = pmeAtomStorage[atomI][pp]->addAtomsWithIndex(numAtoms, msg->atoms, index, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, msg->chargeFactors3, msg->chargeFactors4, msg->chargeFactors5});
1360  }
1361  } else {
1362  // no alchemistry
1363  pencilPatchIndex[p] = pmeAtomStorage[atomI][pp]->addAtomsWithIndex(numAtoms, msg->atoms, index, std::vector<float*>{});
1364  }
1365  // Number of patches in this storage tells you how many home patches contributed and
1366  // homePatchIndex (pe) tells you which patch contributed
1367  numAtomsCheck += numAtoms;
1368  }
1369  }
1370 
1371  // Deal with stray atoms
1372  numStrayAtomsPatch = pmeAtomFilerPtr->getNumAtoms(9);
1373  if (numStrayAtomsPatch > 0) {
1374  int* index = pmeAtomFilerPtr->getAtomIndex(9);
1375  CkPrintf("%d stray charges detected. Up to 10 listed below (index in patch, x, y, z):\n", numStrayAtomsPatch);
1376  for (int i=0;i < std::min(numStrayAtomsPatch, 10);i++) {
1377  int j = index[i];
1378  CkPrintf("%d %f %f %f\n", j, msg->atoms[j].x, msg->atoms[j].y, msg->atoms[j].z);
1379  }
1380  }
1381 
1382  if (numAtomsCheck + numStrayAtomsPatch < msg->numAtoms)
1383  NAMD_bug("ComputePmeCUDADevice::recvAtoms, missing atoms");
1384  }
1385 
1386  // Create storage for home patch forces
1387  PmeForceMsg *forceMsg;
1388  if (pmePencilType == 3 && CkNodeOf(msg->pe) == CkMyNode()) {
1389  // 3D FFT and compute resides on the same node => use zero-copy forces
1390  // CHC: forces are zero-copy so do we need this for alchDecouple?
1391  forceMsg = new (0, 0, 0, 0, 0, PRIORITY_SIZE) PmeForceMsg();
1392  forceMsg->zeroCopy = true;
1393  } else {
1394  const int alchGrid = simParams->alchOn ? 1 : 0;
1395  const int alchDecoupleGrid = simParams->alchDecouple ? 1: 0;
1396  const int alchSoftCoreOrTI = (simParams->alchElecLambdaStart > 0 || simParams->alchThermIntOn) ? 1 : 0;
1397  forceMsg = new (msg->numAtoms, alchGrid * msg->numAtoms,
1398  alchDecoupleGrid * msg->numAtoms, alchDecoupleGrid * msg->numAtoms,
1399  alchSoftCoreOrTI * msg->numAtoms, PRIORITY_SIZE) PmeForceMsg();
1400  forceMsg->zeroCopy = false;
1401  }
1402  forceMsg->numAtoms = msg->numAtoms;
1403  forceMsg->pe = msg->pe;
1404  forceMsg->compute = msg->compute;
1405  forceMsg->numStrayAtoms = numStrayAtomsPatch;
1406 
1407  bool done = false;
1408  // ----------------------------- lock start ---------------------------
1409  // Only after writing has finished, we get homePatchIndex
1410  // This quarantees that for whatever thread that receives "done=true", writing has finished on
1411  // ALL threads.
1412  CmiLock(lock_recvAtoms);
1413  numStrayAtoms += numStrayAtomsPatch;
1414  // Secure homePatchIndex. All writes after this must be inside lock-region
1415  int homePatchIndex = numHomePatchesRecv;
1416  // Store primary pencil first
1417  plList[atomI][homePatchIndex].push_back(PencilLocation(pp0, pencilPatchIndex[p0]));
1418  if (pmePencilType != 3) {
1419  // Go back to through neighboring pencils and store "homePatchIndex"
1420  for (int p=0;p < 9;p++) {
1421 
1422  int y = (p % 3);
1423  int z = (p / 3);
1424 
1425  int pp = y + z*yNBlocks;
1426  int numAtoms = pmeAtomFilerPtr->getNumAtoms(p);
1427  if (pp != pp0 && numAtoms > 0) {
1428  homePatchIndexList[atomI][pp].push_back(homePatchIndex);
1429  // plList[0...numHomePatches-1] = for each home patch stores the location of pencils that are
1430  // sharing it
1431  // plList[homePatchIndex].size() tells the number of pencils that the home patch is shared with
1432  plList[atomI][homePatchIndex].push_back(PencilLocation(pp, pencilPatchIndex[p]));
1433  }
1434  }
1435  }
1436  homePatchForceMsgs[atomI][homePatchIndex] = forceMsg;
1437  // numHomeAtoms[atomI][homePatchIndex] = msg->numAtoms;
1438  // Set the number of pencils contributing to this home patch
1439  numPencils[atomI][homePatchIndex] = plList[atomI][homePatchIndex].size();
1440  //
1441  numHomePatchesRecv++;
1442  if (numHomePatchesRecv == numHomePatches) {
1443  // Reset counter
1444  numHomePatchesRecv = 0;
1445  done = true;
1446  }
1447  CmiUnlock(lock_recvAtoms);
1448  // ----------------------------- lock end ---------------------------
1449 
1450  // plList[atomI][homePatchIndex] array tells you the location of pencils that are sharing this home patch
1451 
1452  delete msg;
1453 
1454  if (done) {
1455  // Pencil has received all home patches and writing to memory is done => send atoms to neighbors
1457  }
1458 }
static Node * Object()
Definition: Node.h:86
int zBlocks
Definition: PmeBase.h:25
ComputePmeCUDA * compute
SimParameters * simParameters
Definition: Node.h:181
float z
Definition: CudaRecord.h:59
float x
Definition: CudaRecord.h:59
void fileAtoms(const int numAtoms, const CudaAtom *atoms, Lattice &lattice, const PmeGrid &pmeGrid, const int pencilIndexY, const int pencilIndexZ, const int ylo, const int yhi, const int zlo, const int zhi)
int getNumAtoms(int p)
int yBlocks
Definition: PmeBase.h:25
#define PRIORITY_SIZE
Definition: Priorities.h:13
void NAMD_bug(const char *err_msg)
Definition: common.C:195
CudaAtom * atoms
float y
Definition: CudaRecord.h:59
#define simParams
Definition: Output.C:129
ComputePmeCUDA * compute
int * getAtomIndex(int p)

◆ recvAtomsFromNeighbor()

void ComputePmeCUDADevice::recvAtomsFromNeighbor ( PmeAtomPencilMsg msg)

Definition at line 1574 of file ComputePmeCUDAMgr.C.

References PmeAtomPencilMsg::atoms, PmeAtomPencilMsg::doEnergy, PmeAtomPencilMsg::doVirial, PmeAtomPencilMsg::lattice, NAMD_bug(), PmeAtomPencilMsg::numAtoms, Node::Object(), registerRecvAtomsFromNeighbor(), Node::simParameters, simParams, PmeAtomPencilMsg::simulationStep, PmeAtomPencilMsg::srcY, PmeAtomPencilMsg::srcZ, PmeGrid::yBlocks, and PmeGrid::zBlocks.

1574  {
1575  // Store into primary pencil
1576  int pp0 = 0-ylo + (0-zlo)*yNBlocks;
1577  // Compute pencil index relative to primary pencil
1578  int y = msg->srcY - pencilIndexY;
1579  if (y < ylo) y += pmeGrid.yBlocks;
1580  if (y > yhi) y -= pmeGrid.yBlocks;
1581  int z = msg->srcZ - pencilIndexZ;
1582  if (z < zlo) z += pmeGrid.zBlocks;
1583  if (z > zhi) z -= pmeGrid.zBlocks;
1584  if (y < ylo || y > yhi || z < zlo || z > zhi || (y == 0 && z == 0)) {
1585  NAMD_bug("ComputePmeCUDADevice::recvAtomsFromNeighbor, pencil index outside bounds");
1586  }
1587  // Read energy and virial flags
1588  doEnergy = msg->doEnergy;
1589  doVirial = msg->doVirial;
1590  simulationStep = msg->simulationStep;
1591  // Read lattice
1592  lattice = msg->lattice;
1593  // Pencil index where atoms came from
1594  int pp = y-ylo + (z-zlo)*yNBlocks;
1595  // Store atoms and mark down the patch index where these atoms were added
1597  if (simParams->alchOn) {
1598  if (simParams->alchFepOn && !simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == false)) {
1599  neighborPatchIndex[pp] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2});
1600  }
1601  if (simParams->alchFepOn && simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == false)) {
1602  neighborPatchIndex[pp] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, msg->chargeFactors3, msg->chargeFactors4});
1603  }
1604  if (simParams->alchFepOn && simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == true)) {
1605  neighborPatchIndex[pp] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, msg->chargeFactors3, msg->chargeFactors4, msg->chargeFactors5});
1606  }
1607  if (simParams->alchFepOn && !simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == true)) {
1608  neighborPatchIndex[pp] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, 0, 0, msg->chargeFactors5});
1609  }
1610  if (simParams->alchThermIntOn && !simParams->alchDecouple) {
1611  neighborPatchIndex[pp] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, 0, 0, msg->chargeFactors5});
1612  }
1613  if (simParams->alchThermIntOn && simParams->alchDecouple) {
1614  neighborPatchIndex[pp] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, msg->chargeFactors3, msg->chargeFactors4, msg->chargeFactors5});
1615  }
1616  } else {
1617  neighborPatchIndex[pp] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{});
1618  }
1619 
1620  delete msg;
1621 
1623 }
static Node * Object()
Definition: Node.h:86
int zBlocks
Definition: PmeBase.h:25
SimParameters * simParameters
Definition: Node.h:181
int yBlocks
Definition: PmeBase.h:25
void NAMD_bug(const char *err_msg)
Definition: common.C:195
#define simParams
Definition: Output.C:129

◆ recvForcesFromNeighbor()

void ComputePmeCUDADevice::recvForcesFromNeighbor ( PmeForcePencilMsg msg)

Definition at line 1927 of file ComputePmeCUDAMgr.C.

References NAMD_bug(), PmeForcePencilMsg::srcY, PmeForcePencilMsg::srcZ, PmeGrid::yBlocks, and PmeGrid::zBlocks.

1927  {
1928 
1929  // Source pencil index
1930  int y = msg->srcY - pencilIndexY;
1931  if (y < ylo) y += pmeGrid.yBlocks;
1932  if (y > yhi) y -= pmeGrid.yBlocks;
1933  int z = msg->srcZ - pencilIndexZ;
1934  if (z < zlo) z += pmeGrid.zBlocks;
1935  if (z > zhi) z -= pmeGrid.zBlocks;
1936 
1937  if (y < ylo || y > yhi || z < zlo || z > zhi || (y == 0 && z == 0)) {
1938  NAMD_bug("ComputePmeCUDADevice::recvForcesFromNeighbor, pencil index outside bounds");
1939  }
1940 
1941  // Source pencil
1942  int pp = y-ylo + (z-zlo)*yNBlocks;
1943 
1944  // Store message (deleted in mergeForcesOnPatch)
1945  neighborForcePencilMsgs[pp] = msg;
1946 
1947  // neighborForcePencils[pp].force = new CudaForce[msg->numAtoms];
1948  // memcpy(neighborForcePencils[pp].force, msg->force, sizeof(CudaForce)*msg->numAtoms);
1949  // neighborForcePencils[pp].numAtoms = msg->numAtoms;
1950  // neighborForcePencils[pp].y = msg->y;
1951  // neighborForcePencils[pp].z = msg->z;
1952  // neighborForcePencils[pp].srcY = msg->srcY;
1953  // neighborForcePencils[pp].srcZ = msg->srcZ;
1954  // delete msg;
1955 
1956  // numPatches = number of home patches this pencil has
1957  int numPatches = pmeAtomStorage[forceI][pp]->getNumPatches();
1958  if (numPatches != homePatchIndexList[forceI][pp].size()) {
1959  NAMD_bug("ComputePmeCUDADevice::recvForcesFromNeighbor, numPatches incorrect");
1960  }
1961  for (int i=0;i < numPatches;i++) {
1962  // this pencil contributed to home patch with index "homePatchIndex"
1963  int homePatchIndex = homePatchIndexList[forceI][pp][i];
1964  // ----------------------------- lock start ---------------------------
1965  // NOTE: We use node-wide lock here for the entire numPencils[] array, while
1966  // we really would only need to each element but this would required
1967  // numHomePatches number of locks.
1968  bool done = false;
1969  CmiLock(lock_numPencils);
1970  numPencils[forceI][homePatchIndex]--;
1971  if (numPencils[forceI][homePatchIndex] == 0) done = true;
1972  CmiUnlock(lock_numPencils);
1973  // ----------------------------- lock end ---------------------------
1974  if (done) {
1975  // This home patch is done, launch force merging
1976  thisProxy[CkMyNode()].mergeForcesOnPatch(homePatchIndex);
1977  }
1978  }
1979 
1980 }
int zBlocks
Definition: PmeBase.h:25
int yBlocks
Definition: PmeBase.h:25
void NAMD_bug(const char *err_msg)
Definition: common.C:195

◆ registerNeighbor()

void ComputePmeCUDADevice::registerNeighbor ( )

Definition at line 1258 of file ComputePmeCUDAMgr.C.

1258  {
1259  CmiLock(lock_numHomePatchesMerged);
1260  numNeighborsExpected++;
1261  CmiUnlock(lock_numHomePatchesMerged);
1262 }

◆ registerRecvAtomsFromNeighbor()

void ComputePmeCUDADevice::registerRecvAtomsFromNeighbor ( )

Definition at line 1625 of file ComputePmeCUDAMgr.C.

References spreadCharge().

Referenced by recvAtomsFromNeighbor(), and sendAtomsToNeighbors().

1625  {
1626  // Primary pencil
1627  int pp0 = 0-ylo + (0-zlo)*yNBlocks;
1628 
1629  bool done = false;
1630  // ----------------------------- lock start ---------------------------
1631  CmiLock(lock_numNeighborsRecv);
1632  numNeighborsRecv++;
1633  if (numNeighborsRecv == numNeighborsExpected) {
1634  // Reset counter
1635  numNeighborsRecv = 0;
1636  done = true;
1637  }
1638  CmiUnlock(lock_numNeighborsRecv);
1639  // ----------------------------- lock end ---------------------------
1640 
1641  if (done) {
1642  // Primary pencil has received all atoms and writing has finished => spread charge
1643  spreadCharge();
1644  }
1645 }

◆ sendAtomsToNeighbor()

void ComputePmeCUDADevice::sendAtomsToNeighbor ( int  y,
int  z,
int  atomIval 
)

Definition at line 1478 of file ComputePmeCUDAMgr.C.

References PmeAtomPencilMsg::atoms, PmeAtomPencilMsg::chargeFactors1, PmeAtomPencilMsg::chargeFactors2, PmeAtomPencilMsg::chargeFactors3, PmeAtomPencilMsg::chargeFactors4, PmeAtomPencilMsg::chargeFactors5, PmeAtomPencilMsg::doEnergy, PmeAtomPencilMsg::doVirial, PmeAtomPencilMsg::lattice, PmeAtomPencilMsg::numAtoms, Node::Object(), PRIORITY_SIZE, Node::simParameters, simParams, PmeAtomPencilMsg::simulationStep, PmeAtomPencilMsg::srcY, PmeAtomPencilMsg::srcZ, PmeAtomPencilMsg::y, PmeGrid::yBlocks, PmeAtomPencilMsg::z, and PmeGrid::zBlocks.

1478  {
1479  // Pencil index
1480  int pp = y-ylo + (z-zlo)*yNBlocks;
1481  // This neighbor pencil is done, finish it up before accessing it
1482  pmeAtomStorage[atomIval][pp]->finish();
1483  // Compute destination neighbor pencil index (yt,zt)
1484  int yt = (pencilIndexY + y + pmeGrid.yBlocks) % pmeGrid.yBlocks;
1485  int zt = (pencilIndexZ + z + pmeGrid.zBlocks) % pmeGrid.zBlocks;
1486  int numAtoms = pmeAtomStorage[atomIval][pp]->getNumAtoms();
1487  CudaAtom* atoms = pmeAtomStorage[atomIval][pp]->getAtoms();
1489  PmeAtomPencilMsg* msgPencil;
1490  if (simParams->alchOn) {
1491  if (simParams->alchFepOn && !simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == false)) {
1492  msgPencil = new (numAtoms, numAtoms, numAtoms, 0, 0, 0, PRIORITY_SIZE) PmeAtomPencilMsg;
1493  float* chargeFactors1 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(0);
1494  float* chargeFactors2 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(1);
1495  memcpy(msgPencil->chargeFactors1, chargeFactors1, numAtoms*sizeof(float));
1496  memcpy(msgPencil->chargeFactors2, chargeFactors2, numAtoms*sizeof(float));
1497  }
1498  if (simParams->alchFepOn && simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == false)) {
1499  msgPencil = new (numAtoms, numAtoms, numAtoms, numAtoms, numAtoms, 0, PRIORITY_SIZE) PmeAtomPencilMsg;
1500  float* chargeFactors1 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(0);
1501  float* chargeFactors2 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(1);
1502  float* chargeFactors3 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(2);
1503  float* chargeFactors4 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(3);
1504  memcpy(msgPencil->chargeFactors1, chargeFactors1, numAtoms*sizeof(float));
1505  memcpy(msgPencil->chargeFactors2, chargeFactors2, numAtoms*sizeof(float));
1506  memcpy(msgPencil->chargeFactors3, chargeFactors3, numAtoms*sizeof(float));
1507  memcpy(msgPencil->chargeFactors4, chargeFactors4, numAtoms*sizeof(float));
1508  }
1509  if (simParams->alchFepOn && simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == true)) {
1510  msgPencil = new (numAtoms, numAtoms, numAtoms, numAtoms, numAtoms, numAtoms, PRIORITY_SIZE) PmeAtomPencilMsg;
1511  float* chargeFactors1 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(0);
1512  float* chargeFactors2 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(1);
1513  float* chargeFactors3 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(2);
1514  float* chargeFactors4 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(3);
1515  float* chargeFactors5 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(4);
1516  memcpy(msgPencil->chargeFactors1, chargeFactors1, numAtoms*sizeof(float));
1517  memcpy(msgPencil->chargeFactors2, chargeFactors2, numAtoms*sizeof(float));
1518  memcpy(msgPencil->chargeFactors3, chargeFactors3, numAtoms*sizeof(float));
1519  memcpy(msgPencil->chargeFactors4, chargeFactors4, numAtoms*sizeof(float));
1520  memcpy(msgPencil->chargeFactors5, chargeFactors5, numAtoms*sizeof(float));
1521  }
1522  if (simParams->alchFepOn && !simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == true)) {
1523  msgPencil = new (numAtoms, numAtoms, numAtoms, 0, 0, numAtoms, PRIORITY_SIZE) PmeAtomPencilMsg;
1524  float* chargeFactors1 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(0);
1525  float* chargeFactors2 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(1);
1526  float* chargeFactors5 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(4);
1527  memcpy(msgPencil->chargeFactors1, chargeFactors1, numAtoms*sizeof(float));
1528  memcpy(msgPencil->chargeFactors2, chargeFactors2, numAtoms*sizeof(float));
1529  memcpy(msgPencil->chargeFactors5, chargeFactors5, numAtoms*sizeof(float));
1530  }
1531  if (simParams->alchThermIntOn && !simParams->alchDecouple) {
1532  msgPencil = new (numAtoms, numAtoms, numAtoms, 0, 0, numAtoms, PRIORITY_SIZE) PmeAtomPencilMsg;
1533  float* chargeFactors1 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(0);
1534  float* chargeFactors2 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(1);
1535  float* chargeFactors5 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(4);
1536  memcpy(msgPencil->chargeFactors1, chargeFactors1, numAtoms*sizeof(float));
1537  memcpy(msgPencil->chargeFactors2, chargeFactors2, numAtoms*sizeof(float));
1538  memcpy(msgPencil->chargeFactors5, chargeFactors5, numAtoms*sizeof(float));
1539  }
1540  if (simParams->alchThermIntOn && simParams->alchDecouple) {
1541  msgPencil = new (numAtoms, numAtoms, numAtoms, numAtoms, numAtoms, numAtoms, PRIORITY_SIZE) PmeAtomPencilMsg;
1542  float* chargeFactors1 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(0);
1543  float* chargeFactors2 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(1);
1544  float* chargeFactors3 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(2);
1545  float* chargeFactors4 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(3);
1546  float* chargeFactors5 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(4);
1547  memcpy(msgPencil->chargeFactors1, chargeFactors1, numAtoms*sizeof(float));
1548  memcpy(msgPencil->chargeFactors2, chargeFactors2, numAtoms*sizeof(float));
1549  memcpy(msgPencil->chargeFactors3, chargeFactors3, numAtoms*sizeof(float));
1550  memcpy(msgPencil->chargeFactors4, chargeFactors4, numAtoms*sizeof(float));
1551  memcpy(msgPencil->chargeFactors5, chargeFactors5, numAtoms*sizeof(float));
1552  }
1553  } else {
1554  msgPencil = new (numAtoms, 0, 0, 0, 0, 0, PRIORITY_SIZE) PmeAtomPencilMsg;
1555  }
1556  memcpy(msgPencil->atoms, atoms, numAtoms*sizeof(CudaAtom));
1557  msgPencil->numAtoms = numAtoms;
1558  // Store destination pencil index
1559  msgPencil->y = yt;
1560  msgPencil->z = zt;
1561  // Store source pencil index
1562  msgPencil->srcY = pencilIndexY;
1563  msgPencil->srcZ = pencilIndexZ;
1564  // Store energy and virial flags
1565  msgPencil->doEnergy = doEnergy;
1566  msgPencil->doVirial = doVirial;
1567  msgPencil->simulationStep = simulationStep;
1568  // Store lattice
1569  msgPencil->lattice = lattice;
1570  int node = mgrProxy.ckLocalBranch()->getNode(yt, zt);
1571  mgrProxy[node].recvAtomsFromNeighbor(msgPencil);
1572 }
static Node * Object()
Definition: Node.h:86
int zBlocks
Definition: PmeBase.h:25
SimParameters * simParameters
Definition: Node.h:181
int yBlocks
Definition: PmeBase.h:25
#define PRIORITY_SIZE
Definition: Priorities.h:13
#define simParams
Definition: Output.C:129

◆ sendAtomsToNeighbors()

void ComputePmeCUDADevice::sendAtomsToNeighbors ( )

Definition at line 1463 of file ComputePmeCUDAMgr.C.

References registerRecvAtomsFromNeighbor().

Referenced by recvAtoms().

1463  {
1464  for (int z=zlo;z <= zhi;z++) {
1465  for (int y=ylo;y <= yhi;y++) {
1466  // Only send to neighbors, not self
1467  if (y != 0 || z != 0) {
1468  // NOTE: Must send atomI -value since this will change in spreadCharge(), which might occur
1469  // before these sends have been performed
1470  thisProxy[CkMyNode()].sendAtomsToNeighbor(y, z, atomI);
1471  }
1472  }
1473  }
1474  // Register primary pencil
1476 }

◆ sendForcesToNeighbors()

void ComputePmeCUDADevice::sendForcesToNeighbors ( )

Definition at line 1877 of file ComputePmeCUDAMgr.C.

References PmeForcePencilMsg::force, PmeForcePencilMsg::force2, PmeForcePencilMsg::force3, PmeForcePencilMsg::force4, PmeForcePencilMsg::force5, PmeForcePencilMsg::numAtoms, Node::Object(), PRIORITY_SIZE, Node::simParameters, simParams, PmeForcePencilMsg::srcY, PmeForcePencilMsg::srcZ, PmeForcePencilMsg::y, PmeGrid::yBlocks, PmeForcePencilMsg::z, and PmeGrid::zBlocks.

Referenced by gatherForceDone().

1877  {
1878  // Primary pencil has the forces
1879  int pp0 = 0-ylo + (0-zlo)*yNBlocks;
1880  int* patchPos = pmeAtomStorage[forceI][pp0]->getPatchPos();
1882  const int alchGrid = simParams->alchOn ? 1 : 0;
1883  const int alchDecoupleGrid = simParams->alchDecouple ? 1: 0;
1884  const int alchSoftCoreOrTI = (simParams->alchElecLambdaStart > 0 || simParams->alchThermIntOn) ? 1 : 0;
1885  // Loop through neighboring pencils
1886  for (int z=zlo;z <= zhi;z++) {
1887  for (int y=ylo;y <= yhi;y++) {
1888  // Only send to neighbors, not self
1889  if (y != 0 || z != 0) {
1890  int pp = y-ylo + (z-zlo)*yNBlocks;
1891  int patchIndex = neighborPatchIndex[pp];
1892  int atomStart = (patchIndex == 0) ? 0 : patchPos[patchIndex-1];
1893  int atomEnd = patchPos[patchIndex];
1894  int natom = atomEnd-atomStart;
1895  // copy forces
1896  PmeForcePencilMsg *msg;
1897  msg = new (natom, alchGrid * natom, alchDecoupleGrid * natom,
1898  alchDecoupleGrid * natom, alchSoftCoreOrTI * natom,
1900  msg->numAtoms = natom;
1901  memcpy(msg->force, forces[0]+atomStart, natom*sizeof(CudaForce));
1902  if (simParams->alchOn) {
1903  memcpy(msg->force2, forces[1]+atomStart, natom*sizeof(CudaForce));
1904  if (simParams->alchDecouple) {
1905  memcpy(msg->force3, forces[2]+atomStart, natom*sizeof(CudaForce));
1906  memcpy(msg->force4, forces[3]+atomStart, natom*sizeof(CudaForce));
1907  }
1908  if (bool(simParams->alchElecLambdaStart) == true || simParams->alchThermIntOn) {
1909  memcpy(msg->force5, forces[4]+atomStart, natom*sizeof(CudaForce));
1910  }
1911  }
1912  // Calculate destination pencil index (dstY, dstZ) for this neighbor
1913  int dstY = (pencilIndexY + y + pmeGrid.yBlocks) % pmeGrid.yBlocks;
1914  int dstZ = (pencilIndexZ + z + pmeGrid.zBlocks) % pmeGrid.zBlocks;
1915  int node = mgrProxy.ckLocalBranch()->getNode(dstY, dstZ);
1916  msg->y = dstY;
1917  msg->z = dstZ;
1918  // Store source pencil index
1919  msg->srcY = pencilIndexY;
1920  msg->srcZ = pencilIndexZ;
1921  mgrProxy[node].recvForcesFromNeighbor(msg);
1922  }
1923  }
1924  }
1925 }
static Node * Object()
Definition: Node.h:86
int zBlocks
Definition: PmeBase.h:25
SimParameters * simParameters
Definition: Node.h:181
int yBlocks
Definition: PmeBase.h:25
#define PRIORITY_SIZE
Definition: Priorities.h:13
#define simParams
Definition: Output.C:129

◆ sendForcesToPatch()

void ComputePmeCUDADevice::sendForcesToPatch ( PmeForceMsg forceMsg)

Definition at line 2156 of file ComputePmeCUDAMgr.C.

References PmeForceMsg::compute, Compute::localWorkMsg, PmeForceMsg::pe, and ComputePmeCUDA::storePmeForceMsg().

Referenced by mergeForcesOnPatch().

2156  {
2157  // Now we're on the node that has Pe, hence "compute" -pointer is valid
2158  int pe = forceMsg->pe;
2159  ComputePmeCUDA *compute = forceMsg->compute;
2160 
2161  // Store message for use in ComputePmeCUDA, where it'll also be deleted.
2162  if (compute->storePmeForceMsg(forceMsg)) {
2163  // Enqueue on the pe that sent the atoms in the first place
2164  LocalWorkMsg *lmsg = compute->localWorkMsg;
2165  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
2166  wdProxy[pe].enqueuePme(lmsg);
2167  }
2168 }
ComputePmeCUDA * compute
LocalWorkMsg *const localWorkMsg
Definition: Compute.h:46
bool storePmeForceMsg(PmeForceMsg *msg)

◆ setPencilProxy() [1/3]

void ComputePmeCUDADevice::setPencilProxy ( CProxy_CudaPmePencilXYZ  pmePencilXYZ_in)

Definition at line 1171 of file ComputePmeCUDAMgr.C.

References NAMD_bug().

1171  {
1172  if (pmePencilType != 3)
1173  NAMD_bug("ComputePmeCUDADevice::setPencilProxy(1), invalid pmePencilType");
1174  pmePencilXYZ = pmePencilXYZ_in;
1175 }
void NAMD_bug(const char *err_msg)
Definition: common.C:195

◆ setPencilProxy() [2/3]

void ComputePmeCUDADevice::setPencilProxy ( CProxy_CudaPmePencilXY  pmePencilXY_in)

Definition at line 1177 of file ComputePmeCUDAMgr.C.

References NAMD_bug().

1177  {
1178  if (pmePencilType != 2)
1179  NAMD_bug("ComputePmeCUDADevice::setPencilProxy(2), invalid pmePencilType");
1180  pmePencilXY = pmePencilXY_in;
1181 }
void NAMD_bug(const char *err_msg)
Definition: common.C:195

◆ setPencilProxy() [3/3]

void ComputePmeCUDADevice::setPencilProxy ( CProxy_CudaPmePencilX  pmePencilX_in)

Definition at line 1183 of file ComputePmeCUDAMgr.C.

References NAMD_bug().

1183  {
1184  if (pmePencilType != 1)
1185  NAMD_bug("ComputePmeCUDADevice::setPencilProxy(3), invalid pmePencilType");
1186  pmePencilX = pmePencilX_in;
1187 }
void NAMD_bug(const char *err_msg)
Definition: common.C:195

◆ spreadCharge()

void ComputePmeCUDADevice::spreadCharge ( )

Definition at line 1647 of file ComputePmeCUDAMgr.C.

References PmeRunMsg::doEnergy, PmeRunMsg::doVirial, PmeRunMsg::lattice, NUM_GRID_MAX, PmeRunMsg::numStrayAtoms, Node::Object(), CudaAtom::q, Node::simParameters, simParams, PmeRunMsg::simulationStep, msm::swap(), CudaAtom::x, CudaAtom::y, and CudaAtom::z.

Referenced by registerRecvAtomsFromNeighbor().

1647  {
1648  // Spread charges in primary pencil
1649  int pp0 = 0-ylo + (0-zlo)*yNBlocks;
1650  // Primary pencil is done, finish it up before accessing it
1651  // (clearing is done in mergeForcesOnPatch)
1652  pmeAtomStorage[atomI][pp0]->finish();
1653  // Get the number of atoms and pointer to atoms
1654  int numAtoms = pmeAtomStorage[atomI][pp0]->getNumAtoms();
1655  CudaAtom* atoms = pmeAtomStorage[atomI][pp0]->getAtoms();
1657  CudaAtom* atoms2 = NULL;
1658  CudaAtom* atoms3 = NULL;
1659  CudaAtom* atoms4 = NULL;
1660  CudaAtom* atoms5 = NULL;
1661  float* chargeFactors1 = NULL;
1662  float* chargeFactors2 = NULL;
1663  float* chargeFactors3 = NULL;
1664  float* chargeFactors4 = NULL;
1665  float* chargeFactors5 = NULL;
1666  if (simParams->alchOn) {
1667  chargeFactors1 = pmeAtomStorage[atomI][pp0]->getAtomElecFactors(0);
1668  chargeFactors2 = pmeAtomStorage[atomI][pp0]->getAtomElecFactors(1);
1669  allocate_host<CudaAtom>(&atoms2, numAtoms);
1670  if (simParams->alchDecouple) {
1671  chargeFactors3 = pmeAtomStorage[atomI][pp0]->getAtomElecFactors(2);
1672  chargeFactors4 = pmeAtomStorage[atomI][pp0]->getAtomElecFactors(3);
1673  allocate_host<CudaAtom>(&atoms3, numAtoms);
1674  allocate_host<CudaAtom>(&atoms4, numAtoms);
1675  }
1676  if (bool(simParams->alchElecLambdaStart) == true || simParams->alchThermIntOn) {
1677  chargeFactors5 = pmeAtomStorage[atomI][pp0]->getAtomElecFactors(4);
1678  allocate_host<CudaAtom>(&atoms5, numAtoms);
1679  }
1680  }
1681  // Flip atomI <-> forceI
1682  std::swap(atomI, forceI);
1683  // Re-allocate force buffer if needed
1684  reallocate_host<CudaForce>(&forces[0], &forceCapacities[0], numAtoms, 1.5f);
1685  if (simParams->alchOn) {
1686  reallocate_host<CudaForce>(&forces[1], &forceCapacities[1], numAtoms, 1.5f);
1687  if (simParams->alchDecouple) {
1688  reallocate_host<CudaForce>(&forces[2], &forceCapacities[2], numAtoms, 1.5f);
1689  reallocate_host<CudaForce>(&forces[3], &forceCapacities[3], numAtoms, 1.5f);
1690  }
1691  if (bool(simParams->alchElecLambdaStart) == true || simParams->alchThermIntOn) {
1692  reallocate_host<CudaForce>(&forces[4], &forceCapacities[4], numAtoms, 1.5f);
1693  }
1694  }
1695  // Setup patches and atoms
1696  // Lattice lattice = simParams->lattice;
1697  if (simParams->alchOn) {
1698  for (int i = 0; i < numAtoms; ++i) {
1699  // copy atoms and scale the charges with factors
1700  atoms2[i].x = atoms[i].x;
1701  atoms2[i].y = atoms[i].y;
1702  atoms2[i].z = atoms[i].z;
1703  atoms2[i].q = atoms[i].q * chargeFactors2[i];
1704  if (simParams->alchDecouple) {
1705  atoms3[i].x = atoms[i].x;
1706  atoms3[i].y = atoms[i].y;
1707  atoms3[i].z = atoms[i].z;
1708  atoms3[i].q = atoms[i].q * chargeFactors3[i];
1709  atoms4[i].x = atoms[i].x;
1710  atoms4[i].y = atoms[i].y;
1711  atoms4[i].z = atoms[i].z;
1712  atoms4[i].q = atoms[i].q * chargeFactors4[i];
1713  }
1714  if (bool(simParams->alchElecLambdaStart) == true || simParams->alchThermIntOn) {
1715  atoms5[i].x = atoms[i].x;
1716  atoms5[i].y = atoms[i].y;
1717  atoms5[i].z = atoms[i].z;
1718  atoms5[i].q = atoms[i].q * chargeFactors5[i];
1719  }
1720  atoms[i].q *= chargeFactors1[i];
1721  }
1722  pmeRealSpaceComputes[0]->copyAtoms(numAtoms, atoms);
1723  pmeRealSpaceComputes[1]->copyAtoms(numAtoms, atoms2);
1724  if (simParams->alchDecouple) {
1725  pmeRealSpaceComputes[2]->copyAtoms(numAtoms, atoms3);
1726  pmeRealSpaceComputes[3]->copyAtoms(numAtoms, atoms4);
1727  deallocate_host<CudaAtom>(&atoms4);
1728  deallocate_host<CudaAtom>(&atoms3);
1729  }
1730  if (bool(simParams->alchElecLambdaStart) == true || simParams->alchThermIntOn) {
1731  pmeRealSpaceComputes[4]->copyAtoms(numAtoms, atoms5);
1732  deallocate_host<CudaAtom>(&atoms5);
1733  }
1734  deallocate_host<CudaAtom>(&atoms2);
1735  } else {
1736  pmeRealSpaceComputes[0]->copyAtoms(numAtoms, atoms);
1737  }
1738  // Spread charge
1739  beforeWalltime = CmiWallTimer();
1740  for (unsigned int iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
1741  if (enabledGrid[iGrid] == true) {
1742  pmeRealSpaceComputes[iGrid]->spreadCharge(lattice);
1743  }
1744  }
1745  // Send "charge grid ready to PME solver"
1746  PmeRunMsg *pmeRunMsg = new PmeRunMsg();
1747  pmeRunMsg->doVirial = doVirial;
1748  pmeRunMsg->doEnergy = doEnergy;
1749  pmeRunMsg->simulationStep = simulationStep;
1750  pmeRunMsg->lattice = lattice;
1751  pmeRunMsg->numStrayAtoms = numStrayAtoms;
1752  // Reset stray atom counter
1753  numStrayAtoms = 0;
1754  switch(pmePencilType) {
1755  case 1:
1756  pmePencilX(0, pencilIndexY, pencilIndexZ).chargeGridReady(pmeRunMsg);
1757  break;
1758  case 2:
1759  pmePencilXY(0, 0, pencilIndexZ).chargeGridReady(pmeRunMsg);
1760  break;
1761  case 3:
1762  pmePencilXYZ[0].chargeGridReady(pmeRunMsg);
1763  break;
1764  }
1765 }
static Node * Object()
Definition: Node.h:86
float q
Definition: CudaRecord.h:59
int numStrayAtoms
Definition: PmeSolver.h:114
bool doVirial
Definition: PmeSolver.h:113
SimParameters * simParameters
Definition: Node.h:181
float z
Definition: CudaRecord.h:59
float x
Definition: CudaRecord.h:59
const unsigned int NUM_GRID_MAX
Definition: PmeSolverUtil.h:9
void swap(Array< T > &s, Array< T > &t)
Definition: MsmMap.h:319
float y
Definition: CudaRecord.h:59
#define simParams
Definition: Output.C:129
Lattice lattice
Definition: PmeSolver.h:116
int simulationStep
Definition: PmeSolver.h:115
bool doEnergy
Definition: PmeSolver.h:113

The documentation for this class was generated from the following files: