#include <CudaPmeSolverUtil.h>

Inheritance diagram for CudaPmeRealSpaceCompute:

Public Member Functions
	CudaPmeRealSpaceCompute (PmeGrid pmeGrid, const int jblock, const int kblock, int deviceID, cudaStream_t stream)

	~CudaPmeRealSpaceCompute ()

void	copyAtoms (const int numAtoms, const CudaAtom *atoms)

void	spreadCharge (Lattice &lattice)

void	gatherForce (Lattice &lattice, CudaForce *force)

void	gatherForceSetCallback (ComputePmeCUDADevice *devicePtr_in)

void	waitGatherForceDone ()

Public Member Functions inherited from PmeRealSpaceCompute
	PmeRealSpaceCompute (PmeGrid pmeGrid, const int jblock, const int kblock, unsigned int grid=0)

virtual	~PmeRealSpaceCompute ()

float *	getData ()

int	getDataSize ()

void	setGrid (unsigned int i)

Additional Inherited Members
Static Public Member Functions inherited from PmeRealSpaceCompute
static double	calcGridCoord (const double x, const double recip11, const int nfftx)

static void	calcGridCoord (const double x, const double y, const double z, const double recip11, const double recip22, const double recip33, const int nfftx, const int nffty, const int nfftz, double &frx, double &fry, double &frz)

static void	calcGridCoord (const float x, const float y, const float z, const float recip11, const float recip22, const float recip33, const int nfftx, const int nffty, const int nfftz, float &frx, float &fry, float &frz)

static void	calcGridCoord (const float x, const float y, const float z, const int nfftx, const int nffty, const int nfftz, float &frx, float &fry, float &frz)

static void	calcGridCoord (const double x, const double y, const double z, const int nfftx, const int nffty, const int nfftz, double &frx, double &fry, double &frz)

Protected Attributes inherited from PmeRealSpaceCompute
int	numAtoms

PmeGrid	pmeGrid

int	y0

int	z0

int	xsize

int	ysize

int	zsize

int	dataSize

float *	data

const int	jblock

const int	kblock

unsigned int	multipleGridIndex

Detailed Description

Definition at line 112 of file CudaPmeSolverUtil.h.

Constructor & Destructor Documentation

◆ CudaPmeRealSpaceCompute()

CudaPmeRealSpaceCompute::CudaPmeRealSpaceCompute	(	PmeGrid	pmeGrid,
		const int	jblock,
		const int	kblock,
		int	deviceID,
		cudaStream_t	stream
	)

Definition at line 542 of file CudaPmeSolverUtil.C.

References cudaCheck, PmeRealSpaceCompute::data, PmeRealSpaceCompute::dataSize, NAMD_bug(), PmeRealSpaceCompute::xsize, PmeRealSpaceCompute::ysize, and PmeRealSpaceCompute::zsize.

                                                                          : 
   PmeRealSpaceCompute(pmeGrid, jblock, kblock), deviceID(deviceID), stream(stream) {
   if (dataSize < xsize*ysize*zsize)
     NAMD_bug("CudaPmeRealSpaceCompute::CudaPmeRealSpaceCompute, insufficient dataSize");
   cudaCheck(cudaSetDevice(deviceID));
   d_atomsCapacity = 0;
   d_atoms = NULL;
   d_forceCapacity = 0;
   d_force = NULL;
   #ifdef NAMD_CUDA
   tex_data = NULL;
   tex_data_len = 0;
   #else
   grid_data = NULL;
   grid_data_len = 0;
   #endif
   allocate_device<float>(&data, dataSize);
   setupGridData(data, xsize*ysize*zsize);
   cudaCheck(cudaEventCreate(&gatherForceEvent));
 }

◆ ~CudaPmeRealSpaceCompute()

CudaPmeRealSpaceCompute::~CudaPmeRealSpaceCompute ( )

Definition at line 567 of file CudaPmeSolverUtil.C.

References cudaCheck, and PmeRealSpaceCompute::data.

                                                   {
   cudaCheck(cudaSetDevice(deviceID));
   if (d_atoms != NULL) deallocate_device<CudaAtom>(&d_atoms);
   if (d_force != NULL) deallocate_device<CudaForce>(&d_force);
   // if (d_patches != NULL) deallocate_device<PatchInfo>(&d_patches);
   // deallocate_device<double>(&d_selfEnergy);
   deallocate_device<float>(&data);
   cudaCheck(cudaEventDestroy(gatherForceEvent));
 }

Member Function Documentation

◆ copyAtoms()

void CudaPmeRealSpaceCompute::copyAtoms	(	const int	numAtoms,
		const CudaAtom *	atoms
	)

virtual

Implements PmeRealSpaceCompute.

Definition at line 598 of file CudaPmeSolverUtil.C.

References cudaCheck, and PmeRealSpaceCompute::numAtoms.

                                                                                  {
   cudaCheck(cudaSetDevice(deviceID));
   this->numAtoms = numAtoms;
 
   // Reallocate device arrays as neccessary
   reallocate_device<CudaAtom>(&d_atoms, &d_atomsCapacity, numAtoms, 1.5f);
 
   // Copy atom data to device
   copy_HtoD<CudaAtom>(atoms, d_atoms, numAtoms, stream);
 }

◆ gatherForce()

void CudaPmeRealSpaceCompute::gatherForce	(	Lattice &	lattice,
		CudaForce *	force
	)

virtual

Implements PmeRealSpaceCompute.

Definition at line 763 of file CudaPmeSolverUtil.C.

References cudaCheck, PmeRealSpaceCompute::data, gather_force(), PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, NAMD_EVENT_START, NAMD_EVENT_STOP, PmeRealSpaceCompute::numAtoms, PmeGrid::order, PmeRealSpaceCompute::pmeGrid, PmeRealSpaceCompute::xsize, PmeRealSpaceCompute::y0, PmeGrid::yBlocks, PmeRealSpaceCompute::ysize, PmeRealSpaceCompute::z0, PmeGrid::zBlocks, and PmeRealSpaceCompute::zsize.

                                                                             {
   cudaCheck(cudaSetDevice(deviceID));
 
   NAMD_EVENT_START(1, NamdProfileEvent::GATHER_FORCE);
 
   // Re-allocate force array if needed
   reallocate_device<CudaForce>(&d_force, &d_forceCapacity, numAtoms, 1.5f);
 
 #ifdef TESTPID
   if (1) {
     fprintf(stderr, "AP gather force arguments\n");
     fprintf(stderr, "numAtoms = %d\n", numAtoms);
     fprintf(stderr, "pmeGrid.K1 = %d\n", pmeGrid.K1);
     fprintf(stderr, "pmeGrid.K2 = %d\n", pmeGrid.K2);
     fprintf(stderr, "pmeGrid.K3 = %d\n", pmeGrid.K3);
     fprintf(stderr, "xsize = %d\n", xsize);
     fprintf(stderr, "ysize = %d\n", ysize);
     fprintf(stderr, "zsize = %d\n", zsize);
     fprintf(stderr, "y0 = %d\n", y0);
     fprintf(stderr, "z0 = %d\n", z0);
     fprintf(stderr, "(pmeGrid.yBlocks == 1) = %d\n", (pmeGrid.yBlocks == 1));
     fprintf(stderr, "(pmeGrid.zBlocks == 1) = %d\n", (pmeGrid.zBlocks == 1));
     fprintf(stderr, "pmeGrid.order = %d\n", pmeGrid.order);
     fprintf(stderr, "gridTexObj = %p\n", gridTexObj);
   }
 #endif
   // The patch-level PME kernels are only used for the GPU-resident code path. The default constructor
   // of PatchLevelPmeData will initialize the compatibility variables to false, so the patch-level kernels
   // won't be used here.
   PatchLevelPmeData patchLevelPmeData;  
   gather_force(patchLevelPmeData,
     (const float4*)d_atoms, numAtoms,
     pmeGrid.K1, pmeGrid.K2, pmeGrid.K3,
     xsize, ysize, zsize, xsize, y0, z0, (pmeGrid.yBlocks == 1), (pmeGrid.zBlocks == 1),
     data, pmeGrid.order, (float3*)d_force, 
 #ifdef NAMD_CUDA
     gridTexObj,
 #endif
     stream);
 #ifdef TESTPID
   if (1) {
     cudaCheck(cudaStreamSynchronize(stream));
     fprintf(stderr, "AP GATHER FORCE\n");
     fprintf(stderr, "COPY DEVICE ARRAYS BACK TO HOST\n");
     float *xyz;
     int natoms = numAtoms;
     allocate_host<float>(&xyz, 3*natoms);
     copy_DtoH<float>((float*)d_force, xyz, 3*natoms, stream);
     cudaCheck(cudaStreamSynchronize(stream));
     TestArray_write<float>("gather_force_good.bin",
         "gather force good", xyz, 3*natoms);
     deallocate_host<float>(&xyz);
   }
 #endif
 
   copy_DtoH<CudaForce>(d_force, force, numAtoms, stream);
 
   cudaCheck(cudaEventRecord(gatherForceEvent, stream));
 
   NAMD_EVENT_STOP(1, NamdProfileEvent::GATHER_FORCE);
 }

◆ gatherForceSetCallback()

void CudaPmeRealSpaceCompute::gatherForceSetCallback ( ComputePmeCUDADevice * devicePtr_in )

Definition at line 715 of file CudaPmeSolverUtil.C.

References CcdCallBacksReset(), and cudaCheck.

                                                                                        {
   cudaCheck(cudaSetDevice(deviceID));
   devicePtr = devicePtr_in;
   checkCount = 0;
   CcdCallBacksReset(0, CmiWallTimer());
   // Set the call back at 0.1ms
   CcdCallFnAfter(cuda_gatherforce_check, this, 0.1);
 }

◆ spreadCharge()

void CudaPmeRealSpaceCompute::spreadCharge ( Lattice & lattice )

virtual

Implements PmeRealSpaceCompute.

Definition at line 612 of file CudaPmeSolverUtil.C.

References cudaCheck, PmeRealSpaceCompute::data, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, NAMD_EVENT_START, NAMD_EVENT_STOP, PmeRealSpaceCompute::numAtoms, PmeGrid::order, PmeRealSpaceCompute::pmeGrid, spread_charge(), PmeRealSpaceCompute::xsize, PmeRealSpaceCompute::y0, PmeGrid::yBlocks, PmeRealSpaceCompute::ysize, PmeRealSpaceCompute::z0, PmeGrid::zBlocks, and PmeRealSpaceCompute::zsize.

                                                            {
   cudaCheck(cudaSetDevice(deviceID));
 #if 0
   if (1) {
     static int step = 0;
     float *xyzq;
     int natoms = numAtoms;
     allocate_host<float>(&xyzq, 4*natoms);
     copy_DtoH<float>((float *)d_atoms, xyzq, 4*natoms, stream);
     cudaCheck(cudaStreamSynchronize(stream));
     char fname[64], remark[64];
     sprintf(fname, "pme_atoms_xyzq_soa_%d.bin", step);
     sprintf(remark, "SOA PME atoms xyzq, step %d\n", step);
     TestArray_write<float>(fname, remark, xyzq, 4*natoms);
     deallocate_host<float>(&xyzq);
     step += 2;
   }
 #endif
 
   NAMD_EVENT_START(1, NamdProfileEvent::SPREAD_CHARGE);
 
   // Clear grid
   clear_device_array<float>(data, xsize*ysize*zsize, stream);
 
 #if defined(TESTPID)
   fprintf(stderr, "Calling spread_charge with parameters:\n");
   fprintf(stderr, "numAtoms = %d\n", numAtoms);
   fprintf(stderr, "pmeGrid.K1 = %d\n", pmeGrid.K1);
   fprintf(stderr, "pmeGrid.K2 = %d\n", pmeGrid.K2);
   fprintf(stderr, "pmeGrid.K3 = %d\n", pmeGrid.K3);
   fprintf(stderr, "xsize = %d\n", xsize);
   fprintf(stderr, "ysize = %d\n", ysize);
   fprintf(stderr, "zsize = %d\n", zsize);
   fprintf(stderr, "y0 = %d\n", y0);
   fprintf(stderr, "z0 = %d\n", z0);
   fprintf(stderr, "(pmeGrid.yBlocks == 1) = %d\n", (pmeGrid.yBlocks == 1));
   fprintf(stderr, "(pmeGrid.zBlocks == 1) = %d\n", (pmeGrid.zBlocks == 1));
   fprintf(stderr, "pmeGrid.order = %d\n", pmeGrid.order);
 #endif
   spread_charge((const float4*)d_atoms, numAtoms,
     pmeGrid.K1, pmeGrid.K2, pmeGrid.K3, xsize, ysize, zsize,
     xsize, y0, z0, (pmeGrid.yBlocks == 1), (pmeGrid.zBlocks == 1),
     data, pmeGrid.order, stream);
 #ifdef TESTPID
   if (1) {
     cudaCheck(cudaStreamSynchronize(stream));
     fprintf(stderr, "AP SPREAD CHARGES\n");
     fprintf(stderr, "COPY DEVICE ARRAYS BACK TO HOST\n");
     float *xyzq;
     allocate_host<float>(&xyzq, 4*numAtoms);
     copy_DtoH<float>((float *)d_atoms, xyzq, 4*numAtoms, stream);
     int gridlen = pmeGrid.K1 * pmeGrid.K2 * pmeGrid.K3;
     float *grid;
     allocate_host<float>(&grid, gridlen);
     copy_DtoH<float>(data, grid, gridlen, stream);
     cudaCheck(cudaStreamSynchronize(stream));
     TestArray_write<float>("xyzq_good.bin", "xyzq good", xyzq, 4*numAtoms);
     TestArray_write<float>("charge_grid_good.bin", "charge grid good",
         grid, gridlen);
     deallocate_host<float>(&xyzq);
     deallocate_host<float>(&grid);
   }
 #endif
 
   // ncall++;
 
   // if (ncall == 1) writeRealToDisk(data, xsize*ysize*zsize, "data.txt");
   NAMD_EVENT_STOP(1, NamdProfileEvent::SPREAD_CHARGE);
 }

◆ waitGatherForceDone()

void CudaPmeRealSpaceCompute::waitGatherForceDone ( )

Definition at line 724 of file CudaPmeSolverUtil.C.

References cudaCheck.

                                                   {
   cudaCheck(cudaSetDevice(deviceID));
   cudaCheck(cudaEventSynchronize(gatherForceEvent));
 }

The documentation for this class was generated from the following files:

Public Member Functions

Additional Inherited Members

Detailed Description

Constructor & Destructor Documentation

◆ CudaPmeRealSpaceCompute()

◆ ~CudaPmeRealSpaceCompute()

Member Function Documentation

◆ copyAtoms()

◆ gatherForce()

◆ gatherForceSetCallback()

◆ spreadCharge()

◆ waitGatherForceDone()