#include <ComputePmeCUDAMgr.h>

Inheritance diagram for ComputePmeCUDADevice:

Public Member Functions
	ComputePmeCUDADevice ()

	ComputePmeCUDADevice (CkMigrateMessage *m)

	~ComputePmeCUDADevice ()

void	initialize (PmeGrid &pmeGrid_in, int pencilIndexY_in, int pencilIndexZ_in, int deviceID_in, int pmePencilType_in, CProxy_ComputePmeCUDAMgr mgrProxy_in, CProxy_PmeAtomFiler pmeAtomFiler_in)

int	getDeviceID ()

cudaStream_t	getStream ()

CProxy_ComputePmeCUDAMgr	getMgrProxy ()

void	setPencilProxy (CProxy_CudaPmePencilXYZ pmePencilXYZ_in)

void	setPencilProxy (CProxy_CudaPmePencilXY pmePencilXY_in)

void	setPencilProxy (CProxy_CudaPmePencilX pmePencilX_in)

void	activate_pencils ()

void	initializePatches (int numHomePatches_in)

void	registerNeighbor ()

void	recvAtoms (PmeAtomMsg *msg)

void	sendAtomsToNeighbors ()

void	sendAtomsToNeighbor (int y, int z, int atomIval)

void	recvAtomsFromNeighbor (PmeAtomPencilMsg *msg)

void	registerRecvAtomsFromNeighbor ()

void	spreadCharge ()

void	gatherForce ()

void	gatherForceDone (unsigned int iGrid)

void	sendForcesToNeighbors ()

void	recvForcesFromNeighbor (PmeForcePencilMsg *msg)

void	mergeForcesOnPatch (int homePatchIndex)

void	sendForcesToPatch (PmeForceMsg *forceMsg)

void	gatherForceDoneSubset (int first, int last)

bool	isGridEnabled (unsigned int i) const

Detailed Description

Definition at line 420 of file ComputePmeCUDAMgr.h.

Constructor & Destructor Documentation

◆ ComputePmeCUDADevice() [1/2]

ComputePmeCUDADevice::ComputePmeCUDADevice ( )

Definition at line 974 of file ComputePmeCUDAMgr.C.

References DebugM, endi(), and NUM_GRID_MAX.

                                            {
   // __sdag_init();
   numHomePatches = 0;
 //   forceCapacity = 0;
 //   force = NULL;
   DebugM(4, "ComputePmeCUDADevice::ComputePmeCUDADevice\n" << endi);
   for (unsigned int iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
     pmeRealSpaceComputes[iGrid] = NULL;
     enabledGrid[iGrid] = false;
     forces[iGrid] = NULL;
     forceCapacities[iGrid] = 0;
   }
 //   pmeRealSpaceCompute = NULL;
   streamCreated = false;
   lock_numHomePatchesMerged = CmiCreateLock();
   lock_numPencils = CmiCreateLock();
   lock_numNeighborsRecv = CmiCreateLock();
   lock_recvAtoms = CmiCreateLock();
   numNeighborsExpected = 0;
   numStrayAtoms = 0;
   // Reset counters
   numNeighborsRecv = 0;
   numHomePatchesRecv = 0;
   numHomePatchesMerged = 0;
   atomI = 0;
   forceI = 1;
 }

◆ ComputePmeCUDADevice() [2/2]

ComputePmeCUDADevice::ComputePmeCUDADevice ( CkMigrateMessage * m )

Definition at line 1002 of file ComputePmeCUDAMgr.C.

References DebugM, endi(), and NUM_GRID_MAX.

                                                               {
   // __sdag_init();
   numHomePatches = 0;
 //   forceCapacity = 0;
 //   force = NULL;
   DebugM(4, "ComputePmeCUDADevice::ComputePmeCUDADevice(CkMigrateMessage)\n" << endi);
   for (unsigned int iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
     pmeRealSpaceComputes[iGrid] = NULL;
     enabledGrid[iGrid] = false;
     forces[iGrid] = NULL;
     forceCapacities[iGrid] = 0;
   }
   streamCreated = false;
   lock_numHomePatchesMerged = CmiCreateLock();
   lock_numPencils = CmiCreateLock();
   lock_numNeighborsRecv = CmiCreateLock();
   lock_recvAtoms = CmiCreateLock();
   numNeighborsExpected = 0;
   numStrayAtoms = 0;
   // Reset counters
   numNeighborsRecv = 0;
   numHomePatchesRecv = 0;
   numHomePatchesMerged = 0;
   atomI = 0;
   forceI = 1;
 }

◆ ~ComputePmeCUDADevice()

ComputePmeCUDADevice::~ComputePmeCUDADevice ( )

Definition at line 1029 of file ComputePmeCUDAMgr.C.

References cudaCheck, and NUM_GRID_MAX.

                                             {
   if (streamCreated) {
     cudaCheck(cudaSetDevice(deviceID));
     cudaCheck(cudaStreamDestroy(stream));
   }
   for (int j=0;j < 2;j++)
     for (int i=0;i < pmeAtomStorage[j].size();i++) {
       if (pmeAtomStorageAllocatedHere[i]) delete pmeAtomStorage[j][i];
     }
 //   if (force != NULL) deallocate_host<CudaForce>(&force);
   for (unsigned int iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
     if (pmeRealSpaceComputes[iGrid] != NULL) delete pmeRealSpaceComputes[iGrid];
     if (forces[iGrid] != NULL) deallocate_host<CudaForce>(&forces[iGrid]);
     enabledGrid[iGrid] = false;
   }
   CmiDestroyLock(lock_numHomePatchesMerged);
   CmiDestroyLock(lock_numPencils);
   CmiDestroyLock(lock_numNeighborsRecv);
   CmiDestroyLock(lock_recvAtoms);
 }

Member Function Documentation

◆ activate_pencils()

void ComputePmeCUDADevice::activate_pencils ( )

Definition at line 1189 of file ComputePmeCUDAMgr.C.

References PmeStartMsg::dataGrid, PmeStartMsg::dataSizes, PmeStartMsg::enabledGrid, and NUM_GRID_MAX.

                                             {
   if (pmePencilType == 1) {
     PmeStartMsg* pmeStartXMsg = new PmeStartMsg();
     for (unsigned int iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
       if (enabledGrid[iGrid] == true) {
         pmeStartXMsg->dataGrid[iGrid] = pmeRealSpaceComputes[iGrid]->getData();
         pmeStartXMsg->dataSizes[iGrid] = pmeRealSpaceComputes[iGrid]->getDataSize();
         pmeStartXMsg->enabledGrid[iGrid] = true;
       } else {
         pmeStartXMsg->dataGrid[iGrid] = NULL;
         pmeStartXMsg->dataSizes[iGrid] = 0;
         pmeStartXMsg->enabledGrid[iGrid] = false;
       }
     }
     pmePencilX(0, pencilIndexY, pencilIndexZ).start(pmeStartXMsg);
   } else if (pmePencilType == 2) {
     PmeStartMsg* pmeStartXMsg = new PmeStartMsg();
     for (unsigned int iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
       if (enabledGrid[iGrid] == true) {
         pmeStartXMsg->dataGrid[iGrid] = pmeRealSpaceComputes[iGrid]->getData();
         pmeStartXMsg->dataSizes[iGrid] = pmeRealSpaceComputes[iGrid]->getDataSize();
         pmeStartXMsg->enabledGrid[iGrid] = true;
       } else {
         pmeStartXMsg->dataGrid[iGrid] = NULL;
         pmeStartXMsg->dataSizes[iGrid] = 0;
         pmeStartXMsg->enabledGrid[iGrid] = false;
       }
     }
     pmePencilXY(0, 0, pencilIndexZ).start(pmeStartXMsg);
   } else if (pmePencilType == 3) {
     PmeStartMsg* pmeStartMsg = new PmeStartMsg();
     for (unsigned int iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
       if (enabledGrid[iGrid] == true) {
         pmeStartMsg->dataGrid[iGrid] = pmeRealSpaceComputes[iGrid]->getData();
         pmeStartMsg->dataSizes[iGrid] = pmeRealSpaceComputes[iGrid]->getDataSize();
         pmeStartMsg->enabledGrid[iGrid] = true;
       } else {
         pmeStartMsg->dataGrid[iGrid] = NULL;
         pmeStartMsg->dataSizes[iGrid] = 0;
         pmeStartMsg->enabledGrid[iGrid] = false;
       }
     }
     pmePencilXYZ[0].start(pmeStartMsg);
   }
 }

◆ gatherForce()

void ComputePmeCUDADevice::gatherForce ( )

Definition at line 1770 of file ComputePmeCUDAMgr.C.

References CUDA_PME_SPREADCHARGE_EVENT, NUM_GRID_MAX, Node::Object(), Node::simParameters, and simParams.

                                        {
   traceUserBracketEvent(CUDA_PME_SPREADCHARGE_EVENT, beforeWalltime, CmiWallTimer());
   beforeWalltime = CmiWallTimer();
   // gather (i.e. un-grid) forces
   SimParameters *simParams = Node::Object()->simParameters;
 //   if (simParameters->alchOn) {
 //     pmeRealSpaceComputes[1]->gatherForce(lattice, force);
 //     ((CudaPmeRealSpaceCompute*)(pmeRealSpaceComputes[1]))->gatherForceSetCallback(this);
 //   } else {
   for (unsigned int iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
     if (enabledGrid[iGrid]) {
 //       fprintf(stdout, "gatherForce at grid %u\n", iGrid);
       pmeRealSpaceComputes[iGrid]->gatherForce(lattice, forces[iGrid]);
       // Set callback that will call gatherForceDone() once gatherForce is done
       ((CudaPmeRealSpaceCompute*)(pmeRealSpaceComputes[iGrid]))->gatherForceSetCallback(this);
     }
   }
   // ((CudaPmeRealSpaceCompute*)pmeRealSpaceCompute)->waitGatherForceDone();
     // gatherForceDone();
 //   }
 }

◆ gatherForceDone()

void ComputePmeCUDADevice::gatherForceDone ( unsigned int iGrid )

Definition at line 1816 of file ComputePmeCUDAMgr.C.

References CUDA_PME_GATHERFORCE_EVENT, gatherForceDoneLoop(), NUM_GRID_MAX, Node::Object(), sendForcesToNeighbors(), Node::simParameters, and SimParameters::useCkLoop.

                                                              {
   // CHC: prevent race condition when there are multiple pmeRealSpaceCompute objects
   forceReady[iGrid] = 1;
   bool all_force_ready = true;
 //   fprintf(stdout, "gatherForceDone at grid %u\n", iGrid);
   // loop over forceReady to check if all forces are gathered
   for (unsigned int i = 0; i < NUM_GRID_MAX; ++i) {
     if (forceReady[i] == -1) continue;
     if (forceReady[i] == 0) all_force_ready = false;
   }
   if (all_force_ready) {
     for (unsigned int i = 0; i < NUM_GRID_MAX; ++i) {
       if (forceReady[i] == -1) continue;
       if (forceReady[i] == 1) forceReady[i] = 0;
     }
 //     fprintf(stdout, "all force ready\n");
     // Primary pencil has the forces
 
     traceUserBracketEvent(CUDA_PME_GATHERFORCE_EVENT, beforeWalltime, CmiWallTimer());
 
     // Send forces to neighbors
     sendForcesToNeighbors();
 
 #if CMK_SMP && USE_CKLOOP
     int useCkLoop = Node::Object()->simParameters->useCkLoop;
     if (useCkLoop >= 1) {
       CkLoop_Parallelize(gatherForceDoneLoop, 1, (void *)this, CkMyNodeSize(), 0, numHomePatches-1);
     } else
 #endif
 
     {
       // Loop through home patches and mark the primary pencil as "done"
       for (int homePatchIndex=0;homePatchIndex < numHomePatches;homePatchIndex++) {
         bool done = false;
         // ----------------------------- lock start ---------------------------
         // NOTE: We use node-wide lock here for the entire numPencils[] array, while
         //       we really would only need to each element but this would required
         //       numHomePatches number of locks.
         if (pmePencilType != 3) CmiLock(lock_numPencils);
         numPencils[forceI][homePatchIndex]--;
         if (numPencils[forceI][homePatchIndex] == 0) done = true;
         if (pmePencilType != 3) CmiUnlock(lock_numPencils);
         // ----------------------------- lock end  ---------------------------
         if (done) {
           // This home patch is done, launch force merging
           thisProxy[CkMyNode()].mergeForcesOnPatch(homePatchIndex);
         }
       }
     }
 
     // In case we have no home patches, clear the primary pencil storage here
     if (numHomePatches == 0) {
       int pp0 = 0-ylo + (0-zlo)*yNBlocks;
       pmeAtomStorage[forceI][pp0]->clear();
     }
   }
 }

◆ gatherForceDoneSubset()

void ComputePmeCUDADevice::gatherForceDoneSubset	(	int	first,
		int	last
	)

Definition at line 1797 of file ComputePmeCUDAMgr.C.

References mergeForcesOnPatch().

Referenced by gatherForceDoneLoop().

                                                                     {
   for (int homePatchIndex=first;homePatchIndex <= last;homePatchIndex++) {
     bool done = false;
     // ----------------------------- lock start ---------------------------
     // NOTE: We use node-wide lock here for the entire numPencils[] array, while
     //       we really would only need to each element but this would required
     //       numHomePatches number of locks.
     if (pmePencilType != 3) CmiLock(lock_numPencils);
     numPencils[forceI][homePatchIndex]--;
     if (numPencils[forceI][homePatchIndex] == 0) done = true;
     if (pmePencilType != 3) CmiUnlock(lock_numPencils);
     // ----------------------------- lock end  ---------------------------
     if (done) {
       // This home patch is done, launch force merging
       mergeForcesOnPatch(homePatchIndex);
     }
   }
 }

◆ getDeviceID()

int ComputePmeCUDADevice::getDeviceID ( )

Definition at line 1159 of file ComputePmeCUDAMgr.C.

                                       {
   return deviceID;
 }

◆ getMgrProxy()

CProxy_ComputePmeCUDAMgr ComputePmeCUDADevice::getMgrProxy ( )

Definition at line 1163 of file ComputePmeCUDAMgr.C.

                                                            {
   return mgrProxy;
 }

◆ getStream()

cudaStream_t ComputePmeCUDADevice::getStream ( )

Definition at line 1155 of file ComputePmeCUDAMgr.C.

                                              {
   return stream;
 }

◆ initialize()

void ComputePmeCUDADevice::initialize	(	PmeGrid &	pmeGrid_in,
		int	pencilIndexY_in,
		int	pencilIndexZ_in,
		int	deviceID_in,
		int	pmePencilType_in,
		CProxy_ComputePmeCUDAMgr	mgrProxy_in,
		CProxy_PmeAtomFiler	pmeAtomFiler_in
	)

Definition at line 1050 of file ComputePmeCUDAMgr.C.

References createStream(), cudaCheck, DebugM, endi(), NUM_GRID_MAX, Node::Object(), Node::simParameters, simParams, PmeGrid::yBlocks, and PmeGrid::zBlocks.

                                        {
 
   deviceID = deviceID_in;
   DebugM(4, "ComputePmeCUDADevice::initialize deviceID "<< deviceID <<"\n"<< endi);
   cudaCheck(cudaSetDevice(deviceID));
   pmePencilType = pmePencilType_in;
   pmeGrid = pmeGrid_in;
 #ifdef DEBUGM
   //  pmeGrid.print();
 #endif
   pencilIndexY = pencilIndexY_in;
   pencilIndexZ = pencilIndexZ_in;
   mgrProxy = mgrProxy_in;
   pmeAtomFiler = pmeAtomFiler_in;
   // Size of the neighboring pencil grid, max 3x3
   yNBlocks = std::min(pmeGrid.yBlocks, 3);
   zNBlocks = std::min(pmeGrid.zBlocks, 3);
   // Local pencil is at y=0,z=0
   if (yNBlocks == 1) {
     ylo = 0;
     yhi = 0;
   } else if (yNBlocks == 2) {
     ylo = -1;
     yhi = 0;
   } else {
     ylo = -1;
     yhi = 1;
   }
   if (zNBlocks == 1) {
     zlo = 0;
     zhi = 0;
   } else if (zNBlocks == 2) {
     zlo = -1;
     zhi = 0;
   } else {
     zlo = -1;
     zhi = 1;
   }
   
   neighborForcePencilMsgs.resize(yNBlocks*zNBlocks, NULL);
   // neighborForcePencils.resize(yNBlocks*zNBlocks);
   for (int j=0;j < 2;j++)
     homePatchIndexList[j].resize(yNBlocks*zNBlocks);
   neighborPatchIndex.resize(yNBlocks*zNBlocks);
 
   pmeAtomStorageAllocatedHere.resize(yNBlocks*zNBlocks, false);
   SimParameters *simParams = Node::Object()->simParameters;
   for (int j=0;j < 2;j++) {
     pmeAtomStorage[j].resize(yNBlocks*zNBlocks, NULL);
     for (int z=zlo;z <= zhi;z++) {
       for (int y=ylo;y <= yhi;y++) {
         int pp = y-ylo + (z-zlo)*yNBlocks;
         int yt = (pencilIndexY + y + pmeGrid.yBlocks) % pmeGrid.yBlocks;
         int zt = (pencilIndexZ + z + pmeGrid.zBlocks) % pmeGrid.zBlocks;
         if (y == 0 && z == 0) {
           // Primary pencil
           pmeAtomStorage[j][pp] = new CudaPmeAtomStorage(pmePencilType != 3);
           pmeAtomStorage[j][pp]->setupAlch(*simParams);
         } else {
           pmeAtomStorage[j][pp] = new CpuPmeAtomStorage(pmePencilType != 3);
           pmeAtomStorage[j][pp]->setupAlch(*simParams);
         }
         pmeAtomStorageAllocatedHere[pp] = true;
       }
     }
   }
 
   // Create stream for this device
   createStream(stream);
   streamCreated = true;
   // CHC: enable at least 1 grid
   // CHC: do we need a different stream?
   pmeRealSpaceComputes[0] = new CudaPmeRealSpaceCompute(pmeGrid, pencilIndexY, pencilIndexZ, deviceID, stream);
   pmeRealSpaceComputes[0]->setGrid(0);
   enabledGrid[0] = true;
   if (simParams->alchOn) {
     pmeRealSpaceComputes[1] = new CudaPmeRealSpaceCompute(pmeGrid, pencilIndexY, pencilIndexZ, deviceID, stream);
     pmeRealSpaceComputes[1]->setGrid(1);
     // at least two grids are required for alchemical transformation
     enabledGrid[1] = true;
     if (simParams->alchDecouple) {
       pmeRealSpaceComputes[2] = new CudaPmeRealSpaceCompute(pmeGrid, pencilIndexY, pencilIndexZ, deviceID, stream);
       pmeRealSpaceComputes[2]->setGrid(2);
       enabledGrid[2] = true;
       pmeRealSpaceComputes[3] = new CudaPmeRealSpaceCompute(pmeGrid, pencilIndexY, pencilIndexZ, deviceID, stream);
       pmeRealSpaceComputes[3]->setGrid(3);
       enabledGrid[3] = true;
     }
     if (simParams->alchElecLambdaStart || simParams->alchThermIntOn) {
       pmeRealSpaceComputes[4] = new CudaPmeRealSpaceCompute(pmeGrid, pencilIndexY, pencilIndexZ, deviceID, stream);
       pmeRealSpaceComputes[4]->setGrid(4);
       enabledGrid[4] = true;
     }
   }
   for (unsigned int iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
     if (enabledGrid[iGrid]) {
       forceReady[iGrid] = 0;
     } else {
       forceReady[iGrid] = -1;
     }
   }
 }

◆ initializePatches()

void ComputePmeCUDADevice::initializePatches ( int numHomePatches_in )

Definition at line 1235 of file ComputePmeCUDAMgr.C.

References PmeGrid::yBlocks, and PmeGrid::zBlocks.

                                                                   {
   numHomePatches = numHomePatches_in;
   for (int j=0;j < 2;j++)
     numPencils[j].resize(numHomePatches);
   for (int j=0;j < 2;j++)
     plList[j].resize(numHomePatches);
   for (int j=0;j < 2;j++)
     homePatchForceMsgs[j].resize(numHomePatches);
   // for (int j=0;j < 2;j++)
   //   numHomeAtoms[j].resize(numHomePatches);
   // If we have home patches, register this pencil with the neighbors and with self
   if (numHomePatches > 0) {
     for (int z=zlo;z <= zhi;z++) {
       for (int y=ylo;y <= yhi;y++) {
         int yt = (pencilIndexY + y + pmeGrid.yBlocks) % pmeGrid.yBlocks;
         int zt = (pencilIndexZ + z + pmeGrid.zBlocks) % pmeGrid.zBlocks;
         int node = mgrProxy.ckLocalBranch()->getNode(yt, zt);
         mgrProxy[node].registerNeighbor(yt, zt);
       }
     }
   }
 }

◆ isGridEnabled()

bool ComputePmeCUDADevice::isGridEnabled ( unsigned int i ) const

Definition at line 1167 of file ComputePmeCUDAMgr.C.

                                                              {
   return enabledGrid[i];
 }

◆ mergeForcesOnPatch()

void ComputePmeCUDADevice::mergeForcesOnPatch ( int homePatchIndex )

Definition at line 1982 of file ComputePmeCUDAMgr.C.

References Node::Object(), sendForcesToPatch(), Node::simParameters, simParams, CudaForce::x, CudaForce::y, and CudaForce::z.

Referenced by gatherForceDoneSubset().

                                                                 {
   // We have all the forces for this patch => merge on a single Pe
 
   int pp0 = 0-ylo + (0-zlo)*yNBlocks;
 
   // Message that goes out to the compute
   PmeForceMsg *forceMsg = homePatchForceMsgs[forceI][homePatchIndex];
 
   SimParameters* simParams = Node::Object()->simParameters;
   if (pmePencilType == 3) {
     // 3D box => simple memory copy will do
     // Location of forces in the force[] array
     int* patchPos = pmeAtomStorage[forceI][pp0]->getPatchPos();
     // plList[homePatchIndex] array tells you the location of pencils that are sharing this home patch
     int pencilPatchIndex = plList[forceI][homePatchIndex][0].pencilPatchIndex;
     int atomStart = (pencilPatchIndex == 0) ? 0 : patchPos[pencilPatchIndex-1];
     int atomEnd   = patchPos[pencilPatchIndex];
     int numAtoms = atomEnd-atomStart;
     if (forceMsg->zeroCopy) {
       // Zero-copy, just pass the pointer
       forceMsg->force = forces[0]+atomStart;
       if (simParams->alchOn) {
         forceMsg->force2 = forces[1]+atomStart;
         if (simParams->alchDecouple) {
           forceMsg->force3 = forces[2]+atomStart;
           forceMsg->force4 = forces[3]+atomStart;
         }
         if (bool(simParams->alchElecLambdaStart) == true || simParams->alchThermIntOn) {
           forceMsg->force5 = forces[4]+atomStart;
         }
       }
     } else {
       memcpy(forceMsg->force, forces[0]+atomStart, numAtoms*sizeof(CudaForce));
       if (simParams->alchOn) {
         memcpy(forceMsg->force2, forces[1]+atomStart, numAtoms*sizeof(CudaForce));
         if (simParams->alchDecouple) {
           memcpy(forceMsg->force3, forces[2]+atomStart, numAtoms*sizeof(CudaForce));
           memcpy(forceMsg->force4, forces[3]+atomStart, numAtoms*sizeof(CudaForce));
         }
         if (bool(simParams->alchElecLambdaStart) == true || simParams->alchThermIntOn) {
           memcpy(forceMsg->force5, forces[4]+atomStart, numAtoms*sizeof(CudaForce));
         }
       }
     }
   } else {
 
     // Zero force array
     // memset(forceMsg->force, 0, numHomeAtoms[forceI][homePatchIndex]*sizeof(CudaForce));
     memset(forceMsg->force, 0, forceMsg->numAtoms*sizeof(CudaForce));
     if (simParams->alchOn) {
       memset(forceMsg->force2, 0, forceMsg->numAtoms*sizeof(CudaForce));
       if (simParams->alchDecouple) {
         memset(forceMsg->force3, 0, forceMsg->numAtoms*sizeof(CudaForce));
         memset(forceMsg->force4, 0, forceMsg->numAtoms*sizeof(CudaForce));
       }
       if (bool(simParams->alchElecLambdaStart) == true || simParams->alchThermIntOn) {
         memset(forceMsg->force5, 0, forceMsg->numAtoms*sizeof(CudaForce));
       }
     }
 
     // Store forces from primary pencil
     {
       int* patchPos = pmeAtomStorage[forceI][pp0]->getPatchPos();
       int* index = pmeAtomStorage[forceI][pp0]->getAtomIndex();
       int pencilPatchIndex = plList[forceI][homePatchIndex][0].pencilPatchIndex;
       int atomStart = (pencilPatchIndex == 0) ? 0 : patchPos[pencilPatchIndex-1];
       int atomEnd   = patchPos[pencilPatchIndex];
       int numAtoms = atomEnd-atomStart;
 
       // Copy in local forces that are stored in the force[] array
       for (int i=0;i < numAtoms;i++) {
         forceMsg->force[index[atomStart + i]] = forces[0][atomStart + i];
         if (simParams->alchOn) {
           forceMsg->force2[index[atomStart + i]] = forces[1][atomStart + i];
           if (simParams->alchDecouple) {
             forceMsg->force3[index[atomStart + i]] = forces[2][atomStart + i];
             forceMsg->force4[index[atomStart + i]] = forces[3][atomStart + i];
           }
           if (bool(simParams->alchElecLambdaStart) == true || simParams->alchThermIntOn) {
             forceMsg->force5[index[atomStart + i]] = forces[4][atomStart + i];
           }
         }
       }
 
     }
 
     // Add forces from neighboring pencils
     for (int j=1;j < plList[forceI][homePatchIndex].size();j++) {
       int pp               = plList[forceI][homePatchIndex][j].pp;
       int pencilPatchIndex = plList[forceI][homePatchIndex][j].pencilPatchIndex;
 
       int* patchPos = pmeAtomStorage[forceI][pp]->getPatchPos();
       int* index = pmeAtomStorage[forceI][pp]->getAtomIndex();
       int atomStart = (pencilPatchIndex == 0) ? 0 : patchPos[pencilPatchIndex-1];
       int atomEnd   = patchPos[pencilPatchIndex];
       int numAtoms = atomEnd-atomStart;
       CudaForce *dstForce = forceMsg->force;
       // CudaForce *srcForce = neighborForcePencils[pp].force;
       CudaForce *dstForce2 = forceMsg->force2;
       CudaForce *dstForce3 = forceMsg->force3;
       CudaForce *dstForce4 = forceMsg->force4;
       CudaForce *dstForce5 = forceMsg->force5;
       CudaForce *srcForce = neighborForcePencilMsgs[pp]->force;
       CudaForce *srcForce2 = neighborForcePencilMsgs[pp]->force2;
       CudaForce *srcForce3 = neighborForcePencilMsgs[pp]->force3;
       CudaForce *srcForce4 = neighborForcePencilMsgs[pp]->force4;
       CudaForce *srcForce5 = neighborForcePencilMsgs[pp]->force5;
 
       for (int i=0;i < numAtoms;i++) {
         dstForce[index[atomStart + i]].x += srcForce[atomStart + i].x;
         dstForce[index[atomStart + i]].y += srcForce[atomStart + i].y;
         dstForce[index[atomStart + i]].z += srcForce[atomStart + i].z;
         if (simParams->alchOn) {
           dstForce2[index[atomStart + i]].x += srcForce2[atomStart + i].x;
           dstForce2[index[atomStart + i]].y += srcForce2[atomStart + i].y;
           dstForce2[index[atomStart + i]].z += srcForce2[atomStart + i].z;
           if (simParams->alchDecouple) {
             dstForce3[index[atomStart + i]].x += srcForce3[atomStart + i].x;
             dstForce3[index[atomStart + i]].y += srcForce3[atomStart + i].y;
             dstForce3[index[atomStart + i]].z += srcForce3[atomStart + i].z;
             dstForce4[index[atomStart + i]].x += srcForce4[atomStart + i].x;
             dstForce4[index[atomStart + i]].y += srcForce4[atomStart + i].y;
             dstForce4[index[atomStart + i]].z += srcForce4[atomStart + i].z;
           }
           if (bool(simParams->alchElecLambdaStart) == true || simParams->alchThermIntOn) {
             dstForce5[index[atomStart + i]].x += srcForce5[atomStart + i].x;
             dstForce5[index[atomStart + i]].y += srcForce5[atomStart + i].y;
             dstForce5[index[atomStart + i]].z += srcForce5[atomStart + i].z;
           }
         }
       }
 
     }
   }
 
   // Clear storage
   plList[forceI][homePatchIndex].clear();
 
   // ----------------------------- lock start ---------------------------
   // bool done = false;
   CmiLock(lock_numHomePatchesMerged);
   numHomePatchesMerged++;
   if (numHomePatchesMerged == numHomePatches) {
     // Reset counter
     numHomePatchesMerged = 0;
 
     // Delete messages
     for (int i=0;i < neighborForcePencilMsgs.size();i++) {
       if (neighborForcePencilMsgs[i] != NULL) {
         delete neighborForcePencilMsgs[i];
         neighborForcePencilMsgs[i] = NULL;
       }
     }
 
     // Done merging and sending forces => clear storage
     for (int pp=0;pp < homePatchIndexList[forceI].size();pp++)
       homePatchIndexList[forceI][pp].clear();
     for (int pp=0;pp < pmeAtomStorage[forceI].size();pp++)
       pmeAtomStorage[forceI][pp]->clear();
 
   }
   CmiUnlock(lock_numHomePatchesMerged);
   // ----------------------------- lock end  ---------------------------
 
   // Patch is done => send over to the node that contains the ComputePmeCUDA compute,
   // this node will then rely the message to the Pe that originally sent the atoms
   int pe = forceMsg->pe;
   if (CkNodeOf(pe) != CkMyNode())
     thisProxy[CkNodeOf(pe)].sendForcesToPatch(forceMsg);
   else
     sendForcesToPatch(forceMsg);
 
 }

◆ recvAtoms()

void ComputePmeCUDADevice::recvAtoms ( PmeAtomMsg * msg )

Definition at line 1267 of file ComputePmeCUDAMgr.C.

References PmeAtomMsg::atoms, PmeAtomMsg::compute, PmeForceMsg::compute, PmeAtomMsg::doEnergy, PmeAtomMsg::doVirial, PmeAtomFiler::fileAtoms(), PmeAtomFiler::getAtomIndex(), PmeAtomFiler::getNumAtoms(), PmeAtomMsg::lattice, NAMD_bug(), PmeAtomMsg::numAtoms, PmeForceMsg::numAtoms, PmeForceMsg::numStrayAtoms, Node::Object(), PmeAtomMsg::pe, PmeForceMsg::pe, PRIORITY_SIZE, sendAtomsToNeighbors(), Node::simParameters, simParams, PmeAtomMsg::simulationStep, CudaAtom::x, CudaAtom::y, PmeGrid::yBlocks, CudaAtom::z, PmeGrid::zBlocks, and PmeForceMsg::zeroCopy.

                                                     {
 
   PmeAtomFiler *pmeAtomFilerPtr = pmeAtomFiler[CkMyPe()].ckLocalBranch();
   // Store "virial" and "energy" flags
   doVirial = msg->doVirial;
   doEnergy = msg->doEnergy;
   simulationStep = msg->simulationStep;
   // Store lattice
   SimParameters *simParams = Node::Object()->simParameters;
   lattice = msg->lattice;
 
   // Primary pencil index
   int pp0 = 0-ylo + (0-zlo)*yNBlocks;
   int p0 = 0;
   int pencilPatchIndex[9];
   int numStrayAtomsPatch = 0;
   if (pmePencilType == 3) {
     // 3D box => store atoms directly without index
     // NOTE: We don't check for stray atoms here!
     if (simParams->alchOn) {
       if (simParams->alchFepOn && !simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == false)) {
         // only FEP, no alchDecouple and alchElecLambdaStart == 0, use 2 grids
         pencilPatchIndex[p0] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2});
       }
       if (simParams->alchFepOn && simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == false)) {
         // FEP with alchDecouple, use 4 grids
         pencilPatchIndex[p0] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, msg->chargeFactors3, msg->chargeFactors4});
       }
       if (simParams->alchFepOn && simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == true)) {
         // FEP with alchDecouple and alchElecLambdaStart > 0, use 5 grids
         pencilPatchIndex[p0] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, msg->chargeFactors3, msg->chargeFactors4, msg->chargeFactors5});
       }
       if (simParams->alchFepOn && !simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == true)) {
         // FEP without alchDecouple and alchElecLambdaStart > 0, use 3 grids (1,2,5)
         pencilPatchIndex[p0] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, NULL, NULL, msg->chargeFactors5});
       }
       if (simParams->alchThermIntOn && !simParams->alchDecouple) {
         // TI estimator without alchDecouple, use 3 grids (1,2,5)
         pencilPatchIndex[p0] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, NULL, NULL, msg->chargeFactors5});
       }
       if (simParams->alchThermIntOn && simParams->alchDecouple) {
         // TI estimator with alchDecouple, use all grids
         pencilPatchIndex[p0] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, msg->chargeFactors3, msg->chargeFactors4, msg->chargeFactors5});
       }
     } else {
       // no alchemistry
       pencilPatchIndex[p0] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{});
     }
   } else {
 
     // File atoms
     pmeAtomFilerPtr->fileAtoms(msg->numAtoms, msg->atoms, lattice, pmeGrid,
       pencilIndexY, pencilIndexZ, ylo, yhi, zlo, zhi);
 
     // Loop through pencils and add atoms to pencil atom lists
     // NOTE: we only store to neighboring pencil if there are atoms to store
     int numAtomsCheck = 0;
     for (int p=0;p < 9;p++) {
 
       int y = (p % 3);
       int z = (p / 3);
 
       int pp = y + z*yNBlocks;
       int numAtoms = pmeAtomFilerPtr->getNumAtoms(p);
       if (pp == pp0) p0 = p;
       if (pp == pp0 || numAtoms > 0) {
         if (pmeGrid.yBlocks == 1 && pmeGrid.zBlocks == 1 && (y != 0 || z != 0))
           NAMD_bug("ComputePmeCUDADevice::recvAtoms, problem with atom filing");
         int* index = pmeAtomFilerPtr->getAtomIndex(p);
         if (simParams->alchOn) {
           if (simParams->alchFepOn && !simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == false)) {
             // only FEP, no alchDecouple and alchElecLambdaStart == 0, use 2 grids
             pencilPatchIndex[p] = pmeAtomStorage[atomI][pp]->addAtomsWithIndex(numAtoms, msg->atoms, index, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2});
           }
           if (simParams->alchFepOn && simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == false)) {
             // FEP with alchDecouple, use 4 grids
             pencilPatchIndex[p] = pmeAtomStorage[atomI][pp]->addAtomsWithIndex(numAtoms, msg->atoms, index, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, msg->chargeFactors3, msg->chargeFactors4});
           }
           if (simParams->alchFepOn && !simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == true)) {
             // FEP without alchDecouple and alchElecLambdaStart > 0, use 3 grids (1,2,5)
             pencilPatchIndex[p] = pmeAtomStorage[atomI][pp]->addAtomsWithIndex(numAtoms, msg->atoms, index, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, NULL, NULL, msg->chargeFactors5});
           }
           if (simParams->alchFepOn && simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == true)) {
             // FEP with alchDecouple and alchElecLambdaStart > 0, use 5 grids
             pencilPatchIndex[p] = pmeAtomStorage[atomI][pp]->addAtomsWithIndex(numAtoms, msg->atoms, index, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, msg->chargeFactors3, msg->chargeFactors4, msg->chargeFactors5});
           }
           if (simParams->alchThermIntOn && !simParams->alchDecouple) {
             // TI estimator without alchDecouple, use 3 grids (1,2,5)
             pencilPatchIndex[p] = pmeAtomStorage[atomI][pp]->addAtomsWithIndex(numAtoms, msg->atoms, index, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, NULL, NULL, msg->chargeFactors5});
           }
           if (simParams->alchThermIntOn && simParams->alchDecouple) {
             // TI estimator with alchDecouple, use all grids
             pencilPatchIndex[p] = pmeAtomStorage[atomI][pp]->addAtomsWithIndex(numAtoms, msg->atoms, index, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, msg->chargeFactors3, msg->chargeFactors4, msg->chargeFactors5});
           }
         } else {
           // no alchemistry
           pencilPatchIndex[p] = pmeAtomStorage[atomI][pp]->addAtomsWithIndex(numAtoms, msg->atoms, index, std::vector<float*>{});
         }
         // Number of patches in this storage tells you how many home patches contributed and
         // homePatchIndex (pe) tells you which patch contributed
         numAtomsCheck += numAtoms;
       }
     }
 
     // Deal with stray atoms
     numStrayAtomsPatch = pmeAtomFilerPtr->getNumAtoms(9);
     if (numStrayAtomsPatch > 0) {
       int* index = pmeAtomFilerPtr->getAtomIndex(9);
       CkPrintf("%d stray charges detected. Up to 10 listed below (index in patch, x, y, z):\n", numStrayAtomsPatch);
       for (int i=0;i < std::min(numStrayAtomsPatch, 10);i++) {
         int j = index[i];
         CkPrintf("%d %f %f %f\n", j, msg->atoms[j].x, msg->atoms[j].y, msg->atoms[j].z);
       }
     }
 
     if (numAtomsCheck + numStrayAtomsPatch < msg->numAtoms)
       NAMD_bug("ComputePmeCUDADevice::recvAtoms, missing atoms");
   }
 
   // Create storage for home patch forces
   PmeForceMsg *forceMsg;
   if (pmePencilType == 3 && CkNodeOf(msg->pe) == CkMyNode()) {
     // 3D FFT and compute resides on the same node => use zero-copy forces
     // CHC: forces are zero-copy so do we need this for alchDecouple?
     forceMsg = new (0, 0, 0, 0, 0, PRIORITY_SIZE) PmeForceMsg();
     forceMsg->zeroCopy = true;
   } else {
     const int alchGrid = simParams->alchOn ? 1 : 0;
     const int alchDecoupleGrid = simParams->alchDecouple ? 1: 0;
     const int alchSoftCoreOrTI = (simParams->alchElecLambdaStart > 0 || simParams->alchThermIntOn) ? 1 : 0;
     forceMsg = new (msg->numAtoms, alchGrid * msg->numAtoms,
                     alchDecoupleGrid * msg->numAtoms, alchDecoupleGrid * msg->numAtoms,
                     alchSoftCoreOrTI * msg->numAtoms, PRIORITY_SIZE) PmeForceMsg();
     forceMsg->zeroCopy = false;
   }
   forceMsg->numAtoms = msg->numAtoms;
   forceMsg->pe = msg->pe;
   forceMsg->compute = msg->compute;
   forceMsg->numStrayAtoms = numStrayAtomsPatch;
 
   bool done = false;
   // ----------------------------- lock start ---------------------------
   // Only after writing has finished, we get homePatchIndex
   // This quarantees that for whatever thread that receives "done=true", writing has finished on
   // ALL threads.
   CmiLock(lock_recvAtoms);
   numStrayAtoms += numStrayAtomsPatch;
   // Secure homePatchIndex. All writes after this must be inside lock-region
   int homePatchIndex = numHomePatchesRecv;
   // Store primary pencil first
   plList[atomI][homePatchIndex].push_back(PencilLocation(pp0, pencilPatchIndex[p0]));
   if (pmePencilType != 3) {
     // Go back to through neighboring pencils and store "homePatchIndex"
     for (int p=0;p < 9;p++) {
 
       int y = (p % 3);
       int z = (p / 3);
 
       int pp = y + z*yNBlocks;
       int numAtoms = pmeAtomFilerPtr->getNumAtoms(p);
       if (pp != pp0 && numAtoms > 0) {
         homePatchIndexList[atomI][pp].push_back(homePatchIndex);
         // plList[0...numHomePatches-1] = for each home patch stores the location of pencils that are
         //                                sharing it
         // plList[homePatchIndex].size() tells the number of pencils that the home patch is shared with
         plList[atomI][homePatchIndex].push_back(PencilLocation(pp, pencilPatchIndex[p]));
       }
     }
   }
   homePatchForceMsgs[atomI][homePatchIndex] = forceMsg;
   // numHomeAtoms[atomI][homePatchIndex] = msg->numAtoms;
   // Set the number of pencils contributing to this home patch
   numPencils[atomI][homePatchIndex] = plList[atomI][homePatchIndex].size();
   //
   numHomePatchesRecv++;
   if (numHomePatchesRecv == numHomePatches) {
     // Reset counter
     numHomePatchesRecv = 0;
     done = true;
   }
   CmiUnlock(lock_recvAtoms);
   // ----------------------------- lock end  ---------------------------
 
   // plList[atomI][homePatchIndex] array tells you the location of pencils that are sharing this home patch
 
   delete msg;
 
   if (done) {
     // Pencil has received all home patches and writing to memory is done => send atoms to neighbors
     sendAtomsToNeighbors();
   }
 }

◆ recvAtomsFromNeighbor()

void ComputePmeCUDADevice::recvAtomsFromNeighbor ( PmeAtomPencilMsg * msg )

Definition at line 1574 of file ComputePmeCUDAMgr.C.

References PmeAtomPencilMsg::atoms, PmeAtomPencilMsg::doEnergy, PmeAtomPencilMsg::doVirial, PmeAtomPencilMsg::lattice, NAMD_bug(), PmeAtomPencilMsg::numAtoms, Node::Object(), registerRecvAtomsFromNeighbor(), Node::simParameters, simParams, PmeAtomPencilMsg::simulationStep, PmeAtomPencilMsg::srcY, PmeAtomPencilMsg::srcZ, PmeGrid::yBlocks, and PmeGrid::zBlocks.

                                                                       {
   // Store into primary pencil
   int pp0 = 0-ylo + (0-zlo)*yNBlocks;
   // Compute pencil index relative to primary pencil
   int y = msg->srcY - pencilIndexY;
   if (y < ylo) y += pmeGrid.yBlocks;
   if (y > yhi) y -= pmeGrid.yBlocks;
   int z = msg->srcZ - pencilIndexZ;
   if (z < zlo) z += pmeGrid.zBlocks;
   if (z > zhi) z -= pmeGrid.zBlocks;
   if (y < ylo || y > yhi || z < zlo || z > zhi || (y == 0 && z == 0)) {
     NAMD_bug("ComputePmeCUDADevice::recvAtomsFromNeighbor, pencil index outside bounds");
   }
   // Read energy and virial flags
   doEnergy = msg->doEnergy;
   doVirial = msg->doVirial;
   simulationStep = msg->simulationStep;
   // Read lattice
   lattice = msg->lattice;
   // Pencil index where atoms came from
   int pp = y-ylo + (z-zlo)*yNBlocks;
   // Store atoms and mark down the patch index where these atoms were added
   SimParameters *simParams = Node::Object()->simParameters;
   if (simParams->alchOn) {
     if (simParams->alchFepOn && !simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == false)) {
       neighborPatchIndex[pp] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2});
     }
     if (simParams->alchFepOn && simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == false)) {
       neighborPatchIndex[pp] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, msg->chargeFactors3, msg->chargeFactors4});
     }
     if (simParams->alchFepOn && simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == true)) {
       neighborPatchIndex[pp] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, msg->chargeFactors3, msg->chargeFactors4, msg->chargeFactors5});
     }
     if (simParams->alchFepOn && !simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == true)) {
       neighborPatchIndex[pp] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, 0, 0, msg->chargeFactors5});
     }
     if (simParams->alchThermIntOn && !simParams->alchDecouple) {
       neighborPatchIndex[pp] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, 0, 0, msg->chargeFactors5});
     }
     if (simParams->alchThermIntOn && simParams->alchDecouple) {
       neighborPatchIndex[pp] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{msg->chargeFactors1, msg->chargeFactors2, msg->chargeFactors3, msg->chargeFactors4, msg->chargeFactors5});
     }
   } else {
     neighborPatchIndex[pp] = pmeAtomStorage[atomI][pp0]->addAtoms(msg->numAtoms, msg->atoms, std::vector<float*>{});
   }
 
   delete msg;
 
   registerRecvAtomsFromNeighbor();
 }

◆ recvForcesFromNeighbor()

void ComputePmeCUDADevice::recvForcesFromNeighbor ( PmeForcePencilMsg * msg )

Definition at line 1927 of file ComputePmeCUDAMgr.C.

References NAMD_bug(), PmeForcePencilMsg::srcY, PmeForcePencilMsg::srcZ, PmeGrid::yBlocks, and PmeGrid::zBlocks.

                                                                         {
 
   // Source pencil index
   int y = msg->srcY - pencilIndexY;
   if (y < ylo) y += pmeGrid.yBlocks;
   if (y > yhi) y -= pmeGrid.yBlocks;
   int z = msg->srcZ - pencilIndexZ;
   if (z < zlo) z += pmeGrid.zBlocks;
   if (z > zhi) z -= pmeGrid.zBlocks;
 
   if (y < ylo || y > yhi || z < zlo || z > zhi || (y == 0 && z == 0)) {
     NAMD_bug("ComputePmeCUDADevice::recvForcesFromNeighbor, pencil index outside bounds");
   }
 
   // Source pencil
   int pp = y-ylo + (z-zlo)*yNBlocks;
 
   // Store message (deleted in mergeForcesOnPatch)
   neighborForcePencilMsgs[pp] = msg;
 
   // neighborForcePencils[pp].force = new CudaForce[msg->numAtoms];
   // memcpy(neighborForcePencils[pp].force, msg->force, sizeof(CudaForce)*msg->numAtoms);
   // neighborForcePencils[pp].numAtoms = msg->numAtoms;
   // neighborForcePencils[pp].y = msg->y;
   // neighborForcePencils[pp].z = msg->z;
   // neighborForcePencils[pp].srcY = msg->srcY;
   // neighborForcePencils[pp].srcZ = msg->srcZ;
   // delete msg;
 
   // numPatches = number of home patches this pencil has
   int numPatches = pmeAtomStorage[forceI][pp]->getNumPatches();
   if (numPatches != homePatchIndexList[forceI][pp].size()) {
     NAMD_bug("ComputePmeCUDADevice::recvForcesFromNeighbor, numPatches incorrect");
   }
   for (int i=0;i < numPatches;i++) {
     // this pencil contributed to home patch with index "homePatchIndex"
     int homePatchIndex = homePatchIndexList[forceI][pp][i];
     // ----------------------------- lock start ---------------------------
     // NOTE: We use node-wide lock here for the entire numPencils[] array, while
     //       we really would only need to each element but this would required
     //       numHomePatches number of locks.
     bool done = false;
     CmiLock(lock_numPencils);
     numPencils[forceI][homePatchIndex]--;
     if (numPencils[forceI][homePatchIndex] == 0) done = true;
     CmiUnlock(lock_numPencils);
     // ----------------------------- lock end  ---------------------------
     if (done) {
       // This home patch is done, launch force merging
       thisProxy[CkMyNode()].mergeForcesOnPatch(homePatchIndex);
     }
   }
 
 }

◆ registerNeighbor()

void ComputePmeCUDADevice::registerNeighbor ( )

Definition at line 1258 of file ComputePmeCUDAMgr.C.

                                             {
   CmiLock(lock_numHomePatchesMerged);
   numNeighborsExpected++;
   CmiUnlock(lock_numHomePatchesMerged);
 }

◆ registerRecvAtomsFromNeighbor()

void ComputePmeCUDADevice::registerRecvAtomsFromNeighbor ( )

Definition at line 1625 of file ComputePmeCUDAMgr.C.

References spreadCharge().

Referenced by recvAtomsFromNeighbor(), and sendAtomsToNeighbors().

                                                          {
   // Primary pencil
   int pp0 = 0-ylo + (0-zlo)*yNBlocks;
 
   bool done = false;
   // ----------------------------- lock start ---------------------------
   CmiLock(lock_numNeighborsRecv);
   numNeighborsRecv++;
   if (numNeighborsRecv == numNeighborsExpected) {
     // Reset counter
     numNeighborsRecv = 0;
     done = true;
   }
   CmiUnlock(lock_numNeighborsRecv);
   // ----------------------------- lock end  ---------------------------
 
   if (done) {
     // Primary pencil has received all atoms and writing has finished => spread charge
     spreadCharge();
   }  
 }

◆ sendAtomsToNeighbor()

void ComputePmeCUDADevice::sendAtomsToNeighbor	(	int	y,
		int	z,
		int	atomIval
	)

Definition at line 1478 of file ComputePmeCUDAMgr.C.

                                                                          {
   // Pencil index  
   int pp = y-ylo + (z-zlo)*yNBlocks;
   // This neighbor pencil is done, finish it up before accessing it
   pmeAtomStorage[atomIval][pp]->finish();
   // Compute destination neighbor pencil index (yt,zt)
   int yt = (pencilIndexY + y + pmeGrid.yBlocks) % pmeGrid.yBlocks;
   int zt = (pencilIndexZ + z + pmeGrid.zBlocks) % pmeGrid.zBlocks;
   int numAtoms = pmeAtomStorage[atomIval][pp]->getNumAtoms();
   CudaAtom* atoms = pmeAtomStorage[atomIval][pp]->getAtoms();
   SimParameters *simParams = Node::Object()->simParameters;
   PmeAtomPencilMsg* msgPencil;
   if (simParams->alchOn) {
     if (simParams->alchFepOn && !simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == false)) {
       msgPencil = new (numAtoms, numAtoms, numAtoms, 0, 0, 0, PRIORITY_SIZE) PmeAtomPencilMsg;
       float* chargeFactors1 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(0);
       float* chargeFactors2 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(1);
       memcpy(msgPencil->chargeFactors1, chargeFactors1, numAtoms*sizeof(float));
       memcpy(msgPencil->chargeFactors2, chargeFactors2, numAtoms*sizeof(float));
     }
     if (simParams->alchFepOn && simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == false)) {
       msgPencil = new (numAtoms, numAtoms, numAtoms, numAtoms, numAtoms, 0, PRIORITY_SIZE) PmeAtomPencilMsg;
       float* chargeFactors1 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(0);
       float* chargeFactors2 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(1);
       float* chargeFactors3 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(2);
       float* chargeFactors4 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(3);
       memcpy(msgPencil->chargeFactors1, chargeFactors1, numAtoms*sizeof(float));
       memcpy(msgPencil->chargeFactors2, chargeFactors2, numAtoms*sizeof(float));
       memcpy(msgPencil->chargeFactors3, chargeFactors3, numAtoms*sizeof(float));
       memcpy(msgPencil->chargeFactors4, chargeFactors4, numAtoms*sizeof(float));
     }
     if (simParams->alchFepOn && simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == true)) {
       msgPencil = new (numAtoms, numAtoms, numAtoms, numAtoms, numAtoms, numAtoms, PRIORITY_SIZE) PmeAtomPencilMsg;
       float* chargeFactors1 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(0);
       float* chargeFactors2 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(1);
       float* chargeFactors3 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(2);
       float* chargeFactors4 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(3);
       float* chargeFactors5 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(4);
       memcpy(msgPencil->chargeFactors1, chargeFactors1, numAtoms*sizeof(float));
       memcpy(msgPencil->chargeFactors2, chargeFactors2, numAtoms*sizeof(float));
       memcpy(msgPencil->chargeFactors3, chargeFactors3, numAtoms*sizeof(float));
       memcpy(msgPencil->chargeFactors4, chargeFactors4, numAtoms*sizeof(float));
       memcpy(msgPencil->chargeFactors5, chargeFactors5, numAtoms*sizeof(float));
     }
     if (simParams->alchFepOn && !simParams->alchDecouple && (bool(simParams->alchElecLambdaStart) == true)) {
       msgPencil = new (numAtoms, numAtoms, numAtoms, 0, 0, numAtoms, PRIORITY_SIZE) PmeAtomPencilMsg;
       float* chargeFactors1 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(0);
       float* chargeFactors2 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(1);
       float* chargeFactors5 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(4);
       memcpy(msgPencil->chargeFactors1, chargeFactors1, numAtoms*sizeof(float));
       memcpy(msgPencil->chargeFactors2, chargeFactors2, numAtoms*sizeof(float));
       memcpy(msgPencil->chargeFactors5, chargeFactors5, numAtoms*sizeof(float));
     }
     if (simParams->alchThermIntOn && !simParams->alchDecouple) {
       msgPencil = new (numAtoms, numAtoms, numAtoms, 0, 0, numAtoms, PRIORITY_SIZE) PmeAtomPencilMsg;
       float* chargeFactors1 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(0);
       float* chargeFactors2 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(1);
       float* chargeFactors5 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(4);
       memcpy(msgPencil->chargeFactors1, chargeFactors1, numAtoms*sizeof(float));
       memcpy(msgPencil->chargeFactors2, chargeFactors2, numAtoms*sizeof(float));
       memcpy(msgPencil->chargeFactors5, chargeFactors5, numAtoms*sizeof(float));
     }
     if (simParams->alchThermIntOn && simParams->alchDecouple) {
       msgPencil = new (numAtoms, numAtoms, numAtoms, numAtoms, numAtoms, numAtoms, PRIORITY_SIZE) PmeAtomPencilMsg;
       float* chargeFactors1 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(0);
       float* chargeFactors2 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(1);
       float* chargeFactors3 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(2);
       float* chargeFactors4 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(3);
       float* chargeFactors5 = pmeAtomStorage[atomIval][pp]->getAtomElecFactors(4);
       memcpy(msgPencil->chargeFactors1, chargeFactors1, numAtoms*sizeof(float));
       memcpy(msgPencil->chargeFactors2, chargeFactors2, numAtoms*sizeof(float));
       memcpy(msgPencil->chargeFactors3, chargeFactors3, numAtoms*sizeof(float));
       memcpy(msgPencil->chargeFactors4, chargeFactors4, numAtoms*sizeof(float));
       memcpy(msgPencil->chargeFactors5, chargeFactors5, numAtoms*sizeof(float));
     }
   } else {
     msgPencil = new (numAtoms, 0, 0, 0, 0, 0, PRIORITY_SIZE) PmeAtomPencilMsg;
   }
   memcpy(msgPencil->atoms, atoms, numAtoms*sizeof(CudaAtom));
   msgPencil->numAtoms = numAtoms;
   // Store destination pencil index
   msgPencil->y = yt;
   msgPencil->z = zt;
   // Store source pencil index
   msgPencil->srcY = pencilIndexY;
   msgPencil->srcZ = pencilIndexZ;
   // Store energy and virial flags
   msgPencil->doEnergy = doEnergy;
   msgPencil->doVirial = doVirial;
   msgPencil->simulationStep = simulationStep;
   // Store lattice
   msgPencil->lattice = lattice;
   int node = mgrProxy.ckLocalBranch()->getNode(yt, zt);
   mgrProxy[node].recvAtomsFromNeighbor(msgPencil);
 }

◆ sendAtomsToNeighbors()

void ComputePmeCUDADevice::sendAtomsToNeighbors ( )

Definition at line 1463 of file ComputePmeCUDAMgr.C.

References registerRecvAtomsFromNeighbor().

Referenced by recvAtoms().

                                                 {
   for (int z=zlo;z <= zhi;z++) {
     for (int y=ylo;y <= yhi;y++) {
       // Only send to neighbors, not self
       if (y != 0 || z != 0) {
         // NOTE: Must send atomI -value since this will change in spreadCharge(), which might occur
         // before these sends have been performed
         thisProxy[CkMyNode()].sendAtomsToNeighbor(y, z, atomI);
       }
     }
   }
   // Register primary pencil
   registerRecvAtomsFromNeighbor();
 }

◆ sendForcesToNeighbors()

void ComputePmeCUDADevice::sendForcesToNeighbors ( )

Definition at line 1877 of file ComputePmeCUDAMgr.C.

References PmeForcePencilMsg::force, PmeForcePencilMsg::force2, PmeForcePencilMsg::force3, PmeForcePencilMsg::force4, PmeForcePencilMsg::force5, PmeForcePencilMsg::numAtoms, Node::Object(), PRIORITY_SIZE, Node::simParameters, simParams, PmeForcePencilMsg::srcY, PmeForcePencilMsg::srcZ, PmeForcePencilMsg::y, PmeGrid::yBlocks, PmeForcePencilMsg::z, and PmeGrid::zBlocks.

Referenced by gatherForceDone().

                                                  {
   // Primary pencil has the forces
   int pp0 = 0-ylo + (0-zlo)*yNBlocks;
   int* patchPos = pmeAtomStorage[forceI][pp0]->getPatchPos();
   SimParameters *simParams = Node::Object()->simParameters;
   const int alchGrid = simParams->alchOn ? 1 : 0;
   const int alchDecoupleGrid = simParams->alchDecouple ? 1: 0;
   const int alchSoftCoreOrTI = (simParams->alchElecLambdaStart > 0 || simParams->alchThermIntOn) ? 1 : 0;
   // Loop through neighboring pencils
   for (int z=zlo;z <= zhi;z++) {
     for (int y=ylo;y <= yhi;y++) {
       // Only send to neighbors, not self
       if (y != 0 || z != 0) {
         int pp = y-ylo + (z-zlo)*yNBlocks;
         int patchIndex = neighborPatchIndex[pp];
         int atomStart = (patchIndex == 0) ? 0 : patchPos[patchIndex-1];
         int atomEnd   = patchPos[patchIndex];
         int natom = atomEnd-atomStart;
         // copy forces
         PmeForcePencilMsg *msg;
         msg = new (natom, alchGrid * natom, alchDecoupleGrid * natom, 
                    alchDecoupleGrid * natom, alchSoftCoreOrTI * natom,
                    PRIORITY_SIZE) PmeForcePencilMsg;
         msg->numAtoms = natom;
         memcpy(msg->force, forces[0]+atomStart, natom*sizeof(CudaForce));
         if (simParams->alchOn) {
           memcpy(msg->force2, forces[1]+atomStart, natom*sizeof(CudaForce));
           if (simParams->alchDecouple) {
             memcpy(msg->force3, forces[2]+atomStart, natom*sizeof(CudaForce));
             memcpy(msg->force4, forces[3]+atomStart, natom*sizeof(CudaForce));
           }
           if (bool(simParams->alchElecLambdaStart) == true || simParams->alchThermIntOn) {
             memcpy(msg->force5, forces[4]+atomStart, natom*sizeof(CudaForce));
           }
         }
         // Calculate destination pencil index (dstY, dstZ) for this neighbor
         int dstY = (pencilIndexY + y + pmeGrid.yBlocks) % pmeGrid.yBlocks;
         int dstZ = (pencilIndexZ + z + pmeGrid.zBlocks) % pmeGrid.zBlocks;
         int node = mgrProxy.ckLocalBranch()->getNode(dstY, dstZ);
         msg->y = dstY;
         msg->z = dstZ;
         // Store source pencil index
         msg->srcY = pencilIndexY;
         msg->srcZ = pencilIndexZ;
         mgrProxy[node].recvForcesFromNeighbor(msg);
       }
     }
   }
 }

◆ sendForcesToPatch()

void ComputePmeCUDADevice::sendForcesToPatch ( PmeForceMsg * forceMsg )

Definition at line 2156 of file ComputePmeCUDAMgr.C.

References PmeForceMsg::compute, Compute::localWorkMsg, PmeForceMsg::pe, and ComputePmeCUDA::storePmeForceMsg().

Referenced by mergeForcesOnPatch().

                                                                   {
   // Now we're on the node that has Pe, hence "compute" -pointer is valid
   int pe                  = forceMsg->pe;
   ComputePmeCUDA *compute = forceMsg->compute;
 
   // Store message for use in ComputePmeCUDA, where it'll also be deleted.
   if (compute->storePmeForceMsg(forceMsg)) {
     // Enqueue on the pe that sent the atoms in the first place
     LocalWorkMsg *lmsg = compute->localWorkMsg;
     CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
     wdProxy[pe].enqueuePme(lmsg);
   }
 }

◆ setPencilProxy() [1/3]

void ComputePmeCUDADevice::setPencilProxy ( CProxy_CudaPmePencilXYZ pmePencilXYZ_in )

Definition at line 1171 of file ComputePmeCUDAMgr.C.

References NAMD_bug().

                                                                                  {
   if (pmePencilType != 3)
     NAMD_bug("ComputePmeCUDADevice::setPencilProxy(1), invalid pmePencilType");
   pmePencilXYZ = pmePencilXYZ_in;
 }

◆ setPencilProxy() [2/3]

void ComputePmeCUDADevice::setPencilProxy ( CProxy_CudaPmePencilXY pmePencilXY_in )

Definition at line 1177 of file ComputePmeCUDAMgr.C.

References NAMD_bug().

                                                                                {
   if (pmePencilType != 2)
     NAMD_bug("ComputePmeCUDADevice::setPencilProxy(2), invalid pmePencilType");
   pmePencilXY = pmePencilXY_in;
 }

◆ setPencilProxy() [3/3]

void ComputePmeCUDADevice::setPencilProxy ( CProxy_CudaPmePencilX pmePencilX_in )

Definition at line 1183 of file ComputePmeCUDAMgr.C.

References NAMD_bug().

                                                                              {
   if (pmePencilType != 1)
     NAMD_bug("ComputePmeCUDADevice::setPencilProxy(3), invalid pmePencilType");
   pmePencilX = pmePencilX_in;
 }

◆ spreadCharge()

void ComputePmeCUDADevice::spreadCharge ( )

Definition at line 1647 of file ComputePmeCUDAMgr.C.

References PmeRunMsg::doEnergy, PmeRunMsg::doVirial, PmeRunMsg::lattice, NUM_GRID_MAX, PmeRunMsg::numStrayAtoms, Node::Object(), CudaAtom::q, Node::simParameters, simParams, PmeRunMsg::simulationStep, msm::swap(), CudaAtom::x, CudaAtom::y, and CudaAtom::z.

Referenced by registerRecvAtomsFromNeighbor().

                                         {
   // Spread charges in primary pencil
   int pp0 = 0-ylo + (0-zlo)*yNBlocks;
   // Primary pencil is done, finish it up before accessing it
   // (clearing is done in mergeForcesOnPatch)
   pmeAtomStorage[atomI][pp0]->finish();
   // Get the number of atoms and pointer to atoms
   int numAtoms = pmeAtomStorage[atomI][pp0]->getNumAtoms();
   CudaAtom* atoms = pmeAtomStorage[atomI][pp0]->getAtoms();
   SimParameters *simParams = Node::Object()->simParameters;
   CudaAtom* atoms2 = NULL;
   CudaAtom* atoms3 = NULL;
   CudaAtom* atoms4 = NULL;
   CudaAtom* atoms5 = NULL;
   float* chargeFactors1 = NULL;
   float* chargeFactors2 = NULL;
   float* chargeFactors3 = NULL;
   float* chargeFactors4 = NULL;
   float* chargeFactors5 = NULL;
   if (simParams->alchOn) {
     chargeFactors1 = pmeAtomStorage[atomI][pp0]->getAtomElecFactors(0);
     chargeFactors2 = pmeAtomStorage[atomI][pp0]->getAtomElecFactors(1);
     allocate_host<CudaAtom>(&atoms2, numAtoms);
     if (simParams->alchDecouple) {
       chargeFactors3 = pmeAtomStorage[atomI][pp0]->getAtomElecFactors(2);
       chargeFactors4 = pmeAtomStorage[atomI][pp0]->getAtomElecFactors(3);
       allocate_host<CudaAtom>(&atoms3, numAtoms);
       allocate_host<CudaAtom>(&atoms4, numAtoms);
     }
     if (bool(simParams->alchElecLambdaStart) == true || simParams->alchThermIntOn) {
       chargeFactors5 = pmeAtomStorage[atomI][pp0]->getAtomElecFactors(4);
       allocate_host<CudaAtom>(&atoms5, numAtoms);
     }
   }
   // Flip atomI <-> forceI
   std::swap(atomI, forceI);
   // Re-allocate force buffer if needed
   reallocate_host<CudaForce>(&forces[0], &forceCapacities[0], numAtoms, 1.5f);
   if (simParams->alchOn) {
     reallocate_host<CudaForce>(&forces[1], &forceCapacities[1], numAtoms, 1.5f);
     if (simParams->alchDecouple) {
       reallocate_host<CudaForce>(&forces[2], &forceCapacities[2], numAtoms, 1.5f);
       reallocate_host<CudaForce>(&forces[3], &forceCapacities[3], numAtoms, 1.5f);
     }
     if (bool(simParams->alchElecLambdaStart) == true || simParams->alchThermIntOn) {
       reallocate_host<CudaForce>(&forces[4], &forceCapacities[4], numAtoms, 1.5f);
     }
   }
   // Setup patches and atoms
   // Lattice lattice = simParams->lattice;
   if (simParams->alchOn) {
     for (int i = 0; i < numAtoms; ++i) {
       // copy atoms and scale the charges with factors
       atoms2[i].x = atoms[i].x;
       atoms2[i].y = atoms[i].y;
       atoms2[i].z = atoms[i].z;
       atoms2[i].q = atoms[i].q * chargeFactors2[i];
       if (simParams->alchDecouple) {
         atoms3[i].x = atoms[i].x;
         atoms3[i].y = atoms[i].y;
         atoms3[i].z = atoms[i].z;
         atoms3[i].q = atoms[i].q * chargeFactors3[i];
         atoms4[i].x = atoms[i].x;
         atoms4[i].y = atoms[i].y;
         atoms4[i].z = atoms[i].z;
         atoms4[i].q = atoms[i].q * chargeFactors4[i];
       }
       if (bool(simParams->alchElecLambdaStart) == true || simParams->alchThermIntOn) {
         atoms5[i].x = atoms[i].x;
         atoms5[i].y = atoms[i].y;
         atoms5[i].z = atoms[i].z;
         atoms5[i].q = atoms[i].q * chargeFactors5[i];
       }
       atoms[i].q *= chargeFactors1[i];
     }
     pmeRealSpaceComputes[0]->copyAtoms(numAtoms, atoms);
     pmeRealSpaceComputes[1]->copyAtoms(numAtoms, atoms2);
     if (simParams->alchDecouple) {
       pmeRealSpaceComputes[2]->copyAtoms(numAtoms, atoms3);
       pmeRealSpaceComputes[3]->copyAtoms(numAtoms, atoms4);
       deallocate_host<CudaAtom>(&atoms4);
       deallocate_host<CudaAtom>(&atoms3);
     }
     if (bool(simParams->alchElecLambdaStart) == true || simParams->alchThermIntOn) {
       pmeRealSpaceComputes[4]->copyAtoms(numAtoms, atoms5);
       deallocate_host<CudaAtom>(&atoms5);
     }
     deallocate_host<CudaAtom>(&atoms2);
   } else {
     pmeRealSpaceComputes[0]->copyAtoms(numAtoms, atoms);
   }
   // Spread charge
   beforeWalltime = CmiWallTimer();
   for (unsigned int iGrid = 0; iGrid < NUM_GRID_MAX; ++iGrid) {
     if (enabledGrid[iGrid] == true) {
       pmeRealSpaceComputes[iGrid]->spreadCharge(lattice);
     }
   }
   // Send "charge grid ready to PME solver"
   PmeRunMsg *pmeRunMsg = new PmeRunMsg();
   pmeRunMsg->doVirial = doVirial;
   pmeRunMsg->doEnergy = doEnergy;
   pmeRunMsg->simulationStep = simulationStep;
   pmeRunMsg->lattice = lattice;
   pmeRunMsg->numStrayAtoms = numStrayAtoms;
   // Reset stray atom counter
   numStrayAtoms = 0;
   switch(pmePencilType) {
     case 1:
     pmePencilX(0, pencilIndexY, pencilIndexZ).chargeGridReady(pmeRunMsg);
     break;
     case 2:
     pmePencilXY(0, 0, pencilIndexZ).chargeGridReady(pmeRunMsg);
     break;
     case 3:
     pmePencilXYZ[0].chargeGridReady(pmeRunMsg);
     break;
   }
 }

The documentation for this class was generated from the following files:

Public Member Functions

Detailed Description

Constructor & Destructor Documentation

◆ ComputePmeCUDADevice() [1/2]

◆ ComputePmeCUDADevice() [2/2]

◆ ~ComputePmeCUDADevice()

Member Function Documentation

◆ activate_pencils()

◆ gatherForce()

◆ gatherForceDone()

◆ gatherForceDoneSubset()

◆ getDeviceID()

◆ getMgrProxy()

◆ getStream()

◆ initialize()

◆ initializePatches()

◆ isGridEnabled()

◆ mergeForcesOnPatch()

◆ recvAtoms()

◆ recvAtomsFromNeighbor()

◆ recvForcesFromNeighbor()

◆ registerNeighbor()

◆ registerRecvAtomsFromNeighbor()

◆ sendAtomsToNeighbor()

◆ sendAtomsToNeighbors()

◆ sendForcesToNeighbors()

◆ sendForcesToPatch()

◆ setPencilProxy() [1/3]

◆ setPencilProxy() [2/3]

◆ setPencilProxy() [3/3]

◆ spreadCharge()