NAMD
Public Member Functions | List of all members
CudaPmeRealSpaceCompute Class Reference

#include <CudaPmeSolverUtil.h>

Inheritance diagram for CudaPmeRealSpaceCompute:
PmeRealSpaceCompute

Public Member Functions

 CudaPmeRealSpaceCompute (PmeGrid pmeGrid, const int jblock, const int kblock, int deviceID, cudaStream_t stream)
 
 ~CudaPmeRealSpaceCompute ()
 
void copyAtoms (const int numAtoms, const CudaAtom *atoms)
 
void spreadCharge (Lattice &lattice)
 
void gatherForce (Lattice &lattice, CudaForce *force)
 
void gatherForceSetCallback (ComputePmeCUDADevice *devicePtr_in)
 
void waitGatherForceDone ()
 
- Public Member Functions inherited from PmeRealSpaceCompute
 PmeRealSpaceCompute (PmeGrid pmeGrid, const int jblock, const int kblock, unsigned int grid=0)
 
virtual ~PmeRealSpaceCompute ()
 
float * getData ()
 
int getDataSize ()
 
void setGrid (unsigned int i)
 

Additional Inherited Members

- Static Public Member Functions inherited from PmeRealSpaceCompute
static double calcGridCoord (const double x, const double recip11, const int nfftx)
 
static void calcGridCoord (const double x, const double y, const double z, const double recip11, const double recip22, const double recip33, const int nfftx, const int nffty, const int nfftz, double &frx, double &fry, double &frz)
 
static void calcGridCoord (const float x, const float y, const float z, const float recip11, const float recip22, const float recip33, const int nfftx, const int nffty, const int nfftz, float &frx, float &fry, float &frz)
 
static void calcGridCoord (const float x, const float y, const float z, const int nfftx, const int nffty, const int nfftz, float &frx, float &fry, float &frz)
 
static void calcGridCoord (const double x, const double y, const double z, const int nfftx, const int nffty, const int nfftz, double &frx, double &fry, double &frz)
 
- Protected Attributes inherited from PmeRealSpaceCompute
int numAtoms
 
PmeGrid pmeGrid
 
int y0
 
int z0
 
int xsize
 
int ysize
 
int zsize
 
int dataSize
 
float * data
 
const int jblock
 
const int kblock
 
unsigned int multipleGridIndex
 

Detailed Description

Definition at line 112 of file CudaPmeSolverUtil.h.

Constructor & Destructor Documentation

◆ CudaPmeRealSpaceCompute()

CudaPmeRealSpaceCompute::CudaPmeRealSpaceCompute ( PmeGrid  pmeGrid,
const int  jblock,
const int  kblock,
int  deviceID,
cudaStream_t  stream 
)

Definition at line 545 of file CudaPmeSolverUtil.C.

References cudaCheck, PmeRealSpaceCompute::data, PmeRealSpaceCompute::dataSize, NAMD_bug(), PmeRealSpaceCompute::xsize, PmeRealSpaceCompute::ysize, and PmeRealSpaceCompute::zsize.

546  :
547  PmeRealSpaceCompute(pmeGrid, jblock, kblock), deviceID(deviceID), stream(stream) {
548  if (dataSize < xsize*ysize*zsize)
549  NAMD_bug("CudaPmeRealSpaceCompute::CudaPmeRealSpaceCompute, insufficient dataSize");
550  cudaCheck(cudaSetDevice(deviceID));
551  d_atomsCapacity = 0;
552  d_atoms = NULL;
553  d_forceCapacity = 0;
554  d_force = NULL;
555  #ifdef NAMD_CUDA
556  tex_data = NULL;
557  tex_data_len = 0;
558  #else
559  grid_data = NULL;
560  grid_data_len = 0;
561  #endif
562  allocate_device<float>(&data, dataSize);
563  setupGridData(data, xsize*ysize*zsize);
564  cudaCheck(cudaEventCreate(&gatherForceEvent));
565 }
PmeRealSpaceCompute(PmeGrid pmeGrid, const int jblock, const int kblock, unsigned int grid=0)
void NAMD_bug(const char *err_msg)
Definition: common.C:196
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ ~CudaPmeRealSpaceCompute()

CudaPmeRealSpaceCompute::~CudaPmeRealSpaceCompute ( )

Definition at line 570 of file CudaPmeSolverUtil.C.

References cudaCheck, and PmeRealSpaceCompute::data.

570  {
571  cudaCheck(cudaSetDevice(deviceID));
572  if (d_atoms != NULL) deallocate_device<CudaAtom>(&d_atoms);
573  if (d_force != NULL) deallocate_device<CudaForce>(&d_force);
574  // if (d_patches != NULL) deallocate_device<PatchInfo>(&d_patches);
575  // deallocate_device<double>(&d_selfEnergy);
576  deallocate_device<float>(&data);
577  cudaCheck(cudaEventDestroy(gatherForceEvent));
578 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

Member Function Documentation

◆ copyAtoms()

void CudaPmeRealSpaceCompute::copyAtoms ( const int  numAtoms,
const CudaAtom atoms 
)
virtual

Implements PmeRealSpaceCompute.

Definition at line 601 of file CudaPmeSolverUtil.C.

References cudaCheck, and PmeRealSpaceCompute::numAtoms.

601  {
602  cudaCheck(cudaSetDevice(deviceID));
603  this->numAtoms = numAtoms;
604 
605  // Reallocate device arrays as neccessary
606  reallocate_device<CudaAtom>(&d_atoms, &d_atomsCapacity, numAtoms, 1.5f);
607 
608  // Copy atom data to device
609  copy_HtoD<CudaAtom>(atoms, d_atoms, numAtoms, stream);
610 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ gatherForce()

void CudaPmeRealSpaceCompute::gatherForce ( Lattice lattice,
CudaForce force 
)
virtual

Implements PmeRealSpaceCompute.

Definition at line 766 of file CudaPmeSolverUtil.C.

References cudaCheck, PmeRealSpaceCompute::data, gather_force(), PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, NAMD_EVENT_START, NAMD_EVENT_STOP, PmeRealSpaceCompute::numAtoms, PmeGrid::order, PmeRealSpaceCompute::pmeGrid, PmeRealSpaceCompute::xsize, PmeRealSpaceCompute::y0, PmeGrid::yBlocks, PmeRealSpaceCompute::ysize, PmeRealSpaceCompute::z0, PmeGrid::zBlocks, and PmeRealSpaceCompute::zsize.

766  {
767  cudaCheck(cudaSetDevice(deviceID));
768 
769  NAMD_EVENT_START(1, NamdProfileEvent::GATHER_FORCE);
770 
771  // Re-allocate force array if needed
772  reallocate_device<CudaForce>(&d_force, &d_forceCapacity, numAtoms, 1.5f);
773 
774 #ifdef TESTPID
775  if (1) {
776  fprintf(stderr, "AP gather force arguments\n");
777  fprintf(stderr, "numAtoms = %d\n", numAtoms);
778  fprintf(stderr, "pmeGrid.K1 = %d\n", pmeGrid.K1);
779  fprintf(stderr, "pmeGrid.K2 = %d\n", pmeGrid.K2);
780  fprintf(stderr, "pmeGrid.K3 = %d\n", pmeGrid.K3);
781  fprintf(stderr, "xsize = %d\n", xsize);
782  fprintf(stderr, "ysize = %d\n", ysize);
783  fprintf(stderr, "zsize = %d\n", zsize);
784  fprintf(stderr, "y0 = %d\n", y0);
785  fprintf(stderr, "z0 = %d\n", z0);
786  fprintf(stderr, "(pmeGrid.yBlocks == 1) = %d\n", (pmeGrid.yBlocks == 1));
787  fprintf(stderr, "(pmeGrid.zBlocks == 1) = %d\n", (pmeGrid.zBlocks == 1));
788  fprintf(stderr, "pmeGrid.order = %d\n", pmeGrid.order);
789  fprintf(stderr, "gridTexObj = %p\n", gridTexObj);
790  }
791 #endif
792  // The patch-level PME kernels are only used for the GPU-resident code path. The default constructor
793  // of PatchLevelPmeData will initialize the compatibility variables to false, so the patch-level kernels
794  // won't be used here.
795  PatchLevelPmeData patchLevelPmeData;
796  gather_force(patchLevelPmeData,
797  (const float4*)d_atoms, numAtoms,
799  xsize, ysize, zsize, xsize, y0, z0, (pmeGrid.yBlocks == 1), (pmeGrid.zBlocks == 1),
800  data, pmeGrid.order, (float3*)d_force,
801 #ifdef NAMD_CUDA
802  gridTexObj,
803 #endif
804  stream);
805 #ifdef TESTPID
806  if (1) {
807  cudaCheck(cudaStreamSynchronize(stream));
808  fprintf(stderr, "AP GATHER FORCE\n");
809  fprintf(stderr, "COPY DEVICE ARRAYS BACK TO HOST\n");
810  float *xyz;
811  int natoms = numAtoms;
812  allocate_host<float>(&xyz, 3*natoms);
813  copy_DtoH<float>((float*)d_force, xyz, 3*natoms, stream);
814  cudaCheck(cudaStreamSynchronize(stream));
815  TestArray_write<float>("gather_force_good.bin",
816  "gather force good", xyz, 3*natoms);
817  deallocate_host<float>(&xyz);
818  }
819 #endif
820 
821  copy_DtoH<CudaForce>(d_force, force, numAtoms, stream);
822 
823  cudaCheck(cudaEventRecord(gatherForceEvent, stream));
824 
825  NAMD_EVENT_STOP(1, NamdProfileEvent::GATHER_FORCE);
826 }
int zBlocks
Definition: PmeBase.h:25
#define NAMD_EVENT_STOP(eon, id)
int K2
Definition: PmeBase.h:21
int K1
Definition: PmeBase.h:21
int yBlocks
Definition: PmeBase.h:25
#define NAMD_EVENT_START(eon, id)
int order
Definition: PmeBase.h:23
void gather_force(const PatchLevelPmeData patchLevelPmeData, const float4 *atoms, const int numAtoms, const int nfftx, const int nffty, const int nfftz, const int xsize, const int ysize, const int zsize, const int xdim, const int y00, const int z00, const bool periodicY, const bool periodicZ, const float *data, const int order, float3 *force, const cudaTextureObject_t gridTexObj, cudaStream_t stream)
int K3
Definition: PmeBase.h:21
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ gatherForceSetCallback()

void CudaPmeRealSpaceCompute::gatherForceSetCallback ( ComputePmeCUDADevice devicePtr_in)

Definition at line 718 of file CudaPmeSolverUtil.C.

References CcdCallBacksReset(), and cudaCheck.

718  {
719  cudaCheck(cudaSetDevice(deviceID));
720  devicePtr = devicePtr_in;
721  checkCount = 0;
722  CcdCallBacksReset(0, CmiWallTimer());
723  // Set the call back at 0.1ms
724  CcdCallFnAfter(cuda_gatherforce_check, this, 0.1);
725 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242
void CcdCallBacksReset(void *ignored, double curWallTime)

◆ spreadCharge()

void CudaPmeRealSpaceCompute::spreadCharge ( Lattice lattice)
virtual

Implements PmeRealSpaceCompute.

Definition at line 615 of file CudaPmeSolverUtil.C.

References cudaCheck, PmeRealSpaceCompute::data, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, NAMD_EVENT_START, NAMD_EVENT_STOP, PmeRealSpaceCompute::numAtoms, PmeGrid::order, PmeRealSpaceCompute::pmeGrid, spread_charge(), PmeRealSpaceCompute::xsize, PmeRealSpaceCompute::y0, PmeGrid::yBlocks, PmeRealSpaceCompute::ysize, PmeRealSpaceCompute::z0, PmeGrid::zBlocks, and PmeRealSpaceCompute::zsize.

615  {
616  cudaCheck(cudaSetDevice(deviceID));
617 #if 0
618  if (1) {
619  static int step = 0;
620  float *xyzq;
621  int natoms = numAtoms;
622  allocate_host<float>(&xyzq, 4*natoms);
623  copy_DtoH<float>((float *)d_atoms, xyzq, 4*natoms, stream);
624  cudaCheck(cudaStreamSynchronize(stream));
625  char fname[64], remark[64];
626  sprintf(fname, "pme_atoms_xyzq_soa_%d.bin", step);
627  sprintf(remark, "SOA PME atoms xyzq, step %d\n", step);
628  TestArray_write<float>(fname, remark, xyzq, 4*natoms);
629  deallocate_host<float>(&xyzq);
630  step += 2;
631  }
632 #endif
633 
634  NAMD_EVENT_START(1, NamdProfileEvent::SPREAD_CHARGE);
635 
636  // Clear grid
637  clear_device_array<float>(data, xsize*ysize*zsize, stream);
638 
639 #if defined(TESTPID)
640  fprintf(stderr, "Calling spread_charge with parameters:\n");
641  fprintf(stderr, "numAtoms = %d\n", numAtoms);
642  fprintf(stderr, "pmeGrid.K1 = %d\n", pmeGrid.K1);
643  fprintf(stderr, "pmeGrid.K2 = %d\n", pmeGrid.K2);
644  fprintf(stderr, "pmeGrid.K3 = %d\n", pmeGrid.K3);
645  fprintf(stderr, "xsize = %d\n", xsize);
646  fprintf(stderr, "ysize = %d\n", ysize);
647  fprintf(stderr, "zsize = %d\n", zsize);
648  fprintf(stderr, "y0 = %d\n", y0);
649  fprintf(stderr, "z0 = %d\n", z0);
650  fprintf(stderr, "(pmeGrid.yBlocks == 1) = %d\n", (pmeGrid.yBlocks == 1));
651  fprintf(stderr, "(pmeGrid.zBlocks == 1) = %d\n", (pmeGrid.zBlocks == 1));
652  fprintf(stderr, "pmeGrid.order = %d\n", pmeGrid.order);
653 #endif
654  spread_charge((const float4*)d_atoms, numAtoms,
656  xsize, y0, z0, (pmeGrid.yBlocks == 1), (pmeGrid.zBlocks == 1),
657  data, pmeGrid.order, stream);
658 #ifdef TESTPID
659  if (1) {
660  cudaCheck(cudaStreamSynchronize(stream));
661  fprintf(stderr, "AP SPREAD CHARGES\n");
662  fprintf(stderr, "COPY DEVICE ARRAYS BACK TO HOST\n");
663  float *xyzq;
664  allocate_host<float>(&xyzq, 4*numAtoms);
665  copy_DtoH<float>((float *)d_atoms, xyzq, 4*numAtoms, stream);
666  int gridlen = pmeGrid.K1 * pmeGrid.K2 * pmeGrid.K3;
667  float *grid;
668  allocate_host<float>(&grid, gridlen);
669  copy_DtoH<float>(data, grid, gridlen, stream);
670  cudaCheck(cudaStreamSynchronize(stream));
671  TestArray_write<float>("xyzq_good.bin", "xyzq good", xyzq, 4*numAtoms);
672  TestArray_write<float>("charge_grid_good.bin", "charge grid good",
673  grid, gridlen);
674  deallocate_host<float>(&xyzq);
675  deallocate_host<float>(&grid);
676  }
677 #endif
678 
679  // ncall++;
680 
681  // if (ncall == 1) writeRealToDisk(data, xsize*ysize*zsize, "data.txt");
682  NAMD_EVENT_STOP(1, NamdProfileEvent::SPREAD_CHARGE);
683 }
int zBlocks
Definition: PmeBase.h:25
#define NAMD_EVENT_STOP(eon, id)
int K2
Definition: PmeBase.h:21
int K1
Definition: PmeBase.h:21
int yBlocks
Definition: PmeBase.h:25
#define NAMD_EVENT_START(eon, id)
int order
Definition: PmeBase.h:23
int K3
Definition: PmeBase.h:21
void spread_charge(const float4 *atoms, const int numAtoms, const int nfftx, const int nffty, const int nfftz, const int xsize, const int ysize, const int zsize, const int xdim, const int y00, const int z00, const bool periodicY, const bool periodicZ, float *data, const int order, cudaStream_t stream)
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

◆ waitGatherForceDone()

void CudaPmeRealSpaceCompute::waitGatherForceDone ( )

Definition at line 727 of file CudaPmeSolverUtil.C.

References cudaCheck.

727  {
728  cudaCheck(cudaSetDevice(deviceID));
729  cudaCheck(cudaEventSynchronize(gatherForceEvent));
730 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

The documentation for this class was generated from the following files: