NAMD
Public Member Functions | List of all members
CudaPmeRealSpaceCompute Class Reference

#include <CudaPmeSolverUtil.h>

Inheritance diagram for CudaPmeRealSpaceCompute:
PmeRealSpaceCompute

Public Member Functions

 CudaPmeRealSpaceCompute (PmeGrid pmeGrid, const int jblock, const int kblock, int deviceID, cudaStream_t stream)
 
 ~CudaPmeRealSpaceCompute ()
 
void copyAtoms (const int numAtoms, const CudaAtom *atoms)
 
void spreadCharge (Lattice &lattice)
 
void gatherForce (Lattice &lattice, CudaForce *force)
 
void gatherForceSetCallback (ComputePmeCUDADevice *devicePtr_in)
 
void waitGatherForceDone ()
 
- Public Member Functions inherited from PmeRealSpaceCompute
 PmeRealSpaceCompute (PmeGrid pmeGrid, const int jblock, const int kblock, unsigned int grid=0)
 
virtual ~PmeRealSpaceCompute ()
 
float * getData ()
 
int getDataSize ()
 
void setGrid (unsigned int i)
 

Additional Inherited Members

- Static Public Member Functions inherited from PmeRealSpaceCompute
static double calcGridCoord (const double x, const double recip11, const int nfftx)
 
static void calcGridCoord (const double x, const double y, const double z, const double recip11, const double recip22, const double recip33, const int nfftx, const int nffty, const int nfftz, double &frx, double &fry, double &frz)
 
static void calcGridCoord (const float x, const float y, const float z, const float recip11, const float recip22, const float recip33, const int nfftx, const int nffty, const int nfftz, float &frx, float &fry, float &frz)
 
static void calcGridCoord (const float x, const float y, const float z, const int nfftx, const int nffty, const int nfftz, float &frx, float &fry, float &frz)
 
static void calcGridCoord (const double x, const double y, const double z, const int nfftx, const int nffty, const int nfftz, double &frx, double &fry, double &frz)
 
- Protected Attributes inherited from PmeRealSpaceCompute
int numAtoms
 
PmeGrid pmeGrid
 
int y0
 
int z0
 
int xsize
 
int ysize
 
int zsize
 
int dataSize
 
float * data
 
const int jblock
 
const int kblock
 
unsigned int multipleGridIndex
 

Detailed Description

Definition at line 112 of file CudaPmeSolverUtil.h.

Constructor & Destructor Documentation

◆ CudaPmeRealSpaceCompute()

CudaPmeRealSpaceCompute::CudaPmeRealSpaceCompute ( PmeGrid  pmeGrid,
const int  jblock,
const int  kblock,
int  deviceID,
cudaStream_t  stream 
)

Definition at line 542 of file CudaPmeSolverUtil.C.

References cudaCheck, PmeRealSpaceCompute::data, PmeRealSpaceCompute::dataSize, NAMD_bug(), PmeRealSpaceCompute::xsize, PmeRealSpaceCompute::ysize, and PmeRealSpaceCompute::zsize.

543  :
544  PmeRealSpaceCompute(pmeGrid, jblock, kblock), deviceID(deviceID), stream(stream) {
545  if (dataSize < xsize*ysize*zsize)
546  NAMD_bug("CudaPmeRealSpaceCompute::CudaPmeRealSpaceCompute, insufficient dataSize");
547  cudaCheck(cudaSetDevice(deviceID));
548  d_atomsCapacity = 0;
549  d_atoms = NULL;
550  d_forceCapacity = 0;
551  d_force = NULL;
552  #ifdef NAMD_CUDA
553  tex_data = NULL;
554  tex_data_len = 0;
555  #else
556  grid_data = NULL;
557  grid_data_len = 0;
558  #endif
559  allocate_device<float>(&data, dataSize);
560  setupGridData(data, xsize*ysize*zsize);
561  cudaCheck(cudaEventCreate(&gatherForceEvent));
562 }
PmeRealSpaceCompute(PmeGrid pmeGrid, const int jblock, const int kblock, unsigned int grid=0)
void NAMD_bug(const char *err_msg)
Definition: common.C:195
#define cudaCheck(stmt)
Definition: CudaUtils.h:233

◆ ~CudaPmeRealSpaceCompute()

CudaPmeRealSpaceCompute::~CudaPmeRealSpaceCompute ( )

Definition at line 567 of file CudaPmeSolverUtil.C.

References cudaCheck, and PmeRealSpaceCompute::data.

567  {
568  cudaCheck(cudaSetDevice(deviceID));
569  if (d_atoms != NULL) deallocate_device<CudaAtom>(&d_atoms);
570  if (d_force != NULL) deallocate_device<CudaForce>(&d_force);
571  // if (d_patches != NULL) deallocate_device<PatchInfo>(&d_patches);
572  // deallocate_device<double>(&d_selfEnergy);
573  deallocate_device<float>(&data);
574  cudaCheck(cudaEventDestroy(gatherForceEvent));
575 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:233

Member Function Documentation

◆ copyAtoms()

void CudaPmeRealSpaceCompute::copyAtoms ( const int  numAtoms,
const CudaAtom atoms 
)
virtual

Implements PmeRealSpaceCompute.

Definition at line 598 of file CudaPmeSolverUtil.C.

References cudaCheck, and PmeRealSpaceCompute::numAtoms.

598  {
599  cudaCheck(cudaSetDevice(deviceID));
600  this->numAtoms = numAtoms;
601 
602  // Reallocate device arrays as neccessary
603  reallocate_device<CudaAtom>(&d_atoms, &d_atomsCapacity, numAtoms, 1.5f);
604 
605  // Copy atom data to device
606  copy_HtoD<CudaAtom>(atoms, d_atoms, numAtoms, stream);
607 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:233

◆ gatherForce()

void CudaPmeRealSpaceCompute::gatherForce ( Lattice lattice,
CudaForce force 
)
virtual

Implements PmeRealSpaceCompute.

Definition at line 763 of file CudaPmeSolverUtil.C.

References cudaCheck, PmeRealSpaceCompute::data, gather_force(), PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, NAMD_EVENT_START, NAMD_EVENT_STOP, PmeRealSpaceCompute::numAtoms, PmeGrid::order, PmeRealSpaceCompute::pmeGrid, PmeRealSpaceCompute::xsize, PmeRealSpaceCompute::y0, PmeGrid::yBlocks, PmeRealSpaceCompute::ysize, PmeRealSpaceCompute::z0, PmeGrid::zBlocks, and PmeRealSpaceCompute::zsize.

763  {
764  cudaCheck(cudaSetDevice(deviceID));
765 
766  NAMD_EVENT_START(1, NamdProfileEvent::GATHER_FORCE);
767 
768  // Re-allocate force array if needed
769  reallocate_device<CudaForce>(&d_force, &d_forceCapacity, numAtoms, 1.5f);
770 
771 #ifdef TESTPID
772  if (1) {
773  fprintf(stderr, "AP gather force arguments\n");
774  fprintf(stderr, "numAtoms = %d\n", numAtoms);
775  fprintf(stderr, "pmeGrid.K1 = %d\n", pmeGrid.K1);
776  fprintf(stderr, "pmeGrid.K2 = %d\n", pmeGrid.K2);
777  fprintf(stderr, "pmeGrid.K3 = %d\n", pmeGrid.K3);
778  fprintf(stderr, "xsize = %d\n", xsize);
779  fprintf(stderr, "ysize = %d\n", ysize);
780  fprintf(stderr, "zsize = %d\n", zsize);
781  fprintf(stderr, "y0 = %d\n", y0);
782  fprintf(stderr, "z0 = %d\n", z0);
783  fprintf(stderr, "(pmeGrid.yBlocks == 1) = %d\n", (pmeGrid.yBlocks == 1));
784  fprintf(stderr, "(pmeGrid.zBlocks == 1) = %d\n", (pmeGrid.zBlocks == 1));
785  fprintf(stderr, "pmeGrid.order = %d\n", pmeGrid.order);
786  fprintf(stderr, "gridTexObj = %p\n", gridTexObj);
787  }
788 #endif
789  // The patch-level PME kernels are only used for the GPU-resident code path. The default constructor
790  // of PatchLevelPmeData will initialize the compatibility variables to false, so the patch-level kernels
791  // won't be used here.
792  PatchLevelPmeData patchLevelPmeData;
793  gather_force(patchLevelPmeData,
794  (const float4*)d_atoms, numAtoms,
796  xsize, ysize, zsize, xsize, y0, z0, (pmeGrid.yBlocks == 1), (pmeGrid.zBlocks == 1),
797  data, pmeGrid.order, (float3*)d_force,
798 #ifdef NAMD_CUDA
799  gridTexObj,
800 #endif
801  stream);
802 #ifdef TESTPID
803  if (1) {
804  cudaCheck(cudaStreamSynchronize(stream));
805  fprintf(stderr, "AP GATHER FORCE\n");
806  fprintf(stderr, "COPY DEVICE ARRAYS BACK TO HOST\n");
807  float *xyz;
808  int natoms = numAtoms;
809  allocate_host<float>(&xyz, 3*natoms);
810  copy_DtoH<float>((float*)d_force, xyz, 3*natoms, stream);
811  cudaCheck(cudaStreamSynchronize(stream));
812  TestArray_write<float>("gather_force_good.bin",
813  "gather force good", xyz, 3*natoms);
814  deallocate_host<float>(&xyz);
815  }
816 #endif
817 
818  copy_DtoH<CudaForce>(d_force, force, numAtoms, stream);
819 
820  cudaCheck(cudaEventRecord(gatherForceEvent, stream));
821 
822  NAMD_EVENT_STOP(1, NamdProfileEvent::GATHER_FORCE);
823 }
int zBlocks
Definition: PmeBase.h:25
#define NAMD_EVENT_STOP(eon, id)
int K2
Definition: PmeBase.h:21
int K1
Definition: PmeBase.h:21
int yBlocks
Definition: PmeBase.h:25
#define NAMD_EVENT_START(eon, id)
int order
Definition: PmeBase.h:23
void gather_force(const PatchLevelPmeData patchLevelPmeData, const float4 *atoms, const int numAtoms, const int nfftx, const int nffty, const int nfftz, const int xsize, const int ysize, const int zsize, const int xdim, const int y00, const int z00, const bool periodicY, const bool periodicZ, const float *data, const int order, float3 *force, const cudaTextureObject_t gridTexObj, cudaStream_t stream)
int K3
Definition: PmeBase.h:21
#define cudaCheck(stmt)
Definition: CudaUtils.h:233

◆ gatherForceSetCallback()

void CudaPmeRealSpaceCompute::gatherForceSetCallback ( ComputePmeCUDADevice devicePtr_in)

Definition at line 715 of file CudaPmeSolverUtil.C.

References CcdCallBacksReset(), and cudaCheck.

715  {
716  cudaCheck(cudaSetDevice(deviceID));
717  devicePtr = devicePtr_in;
718  checkCount = 0;
719  CcdCallBacksReset(0, CmiWallTimer());
720  // Set the call back at 0.1ms
721  CcdCallFnAfter(cuda_gatherforce_check, this, 0.1);
722 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:233
void CcdCallBacksReset(void *ignored, double curWallTime)

◆ spreadCharge()

void CudaPmeRealSpaceCompute::spreadCharge ( Lattice lattice)
virtual

Implements PmeRealSpaceCompute.

Definition at line 612 of file CudaPmeSolverUtil.C.

References cudaCheck, PmeRealSpaceCompute::data, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, NAMD_EVENT_START, NAMD_EVENT_STOP, PmeRealSpaceCompute::numAtoms, PmeGrid::order, PmeRealSpaceCompute::pmeGrid, spread_charge(), PmeRealSpaceCompute::xsize, PmeRealSpaceCompute::y0, PmeGrid::yBlocks, PmeRealSpaceCompute::ysize, PmeRealSpaceCompute::z0, PmeGrid::zBlocks, and PmeRealSpaceCompute::zsize.

612  {
613  cudaCheck(cudaSetDevice(deviceID));
614 #if 0
615  if (1) {
616  static int step = 0;
617  float *xyzq;
618  int natoms = numAtoms;
619  allocate_host<float>(&xyzq, 4*natoms);
620  copy_DtoH<float>((float *)d_atoms, xyzq, 4*natoms, stream);
621  cudaCheck(cudaStreamSynchronize(stream));
622  char fname[64], remark[64];
623  sprintf(fname, "pme_atoms_xyzq_soa_%d.bin", step);
624  sprintf(remark, "SOA PME atoms xyzq, step %d\n", step);
625  TestArray_write<float>(fname, remark, xyzq, 4*natoms);
626  deallocate_host<float>(&xyzq);
627  step += 2;
628  }
629 #endif
630 
631  NAMD_EVENT_START(1, NamdProfileEvent::SPREAD_CHARGE);
632 
633  // Clear grid
634  clear_device_array<float>(data, xsize*ysize*zsize, stream);
635 
636 #if defined(TESTPID)
637  fprintf(stderr, "Calling spread_charge with parameters:\n");
638  fprintf(stderr, "numAtoms = %d\n", numAtoms);
639  fprintf(stderr, "pmeGrid.K1 = %d\n", pmeGrid.K1);
640  fprintf(stderr, "pmeGrid.K2 = %d\n", pmeGrid.K2);
641  fprintf(stderr, "pmeGrid.K3 = %d\n", pmeGrid.K3);
642  fprintf(stderr, "xsize = %d\n", xsize);
643  fprintf(stderr, "ysize = %d\n", ysize);
644  fprintf(stderr, "zsize = %d\n", zsize);
645  fprintf(stderr, "y0 = %d\n", y0);
646  fprintf(stderr, "z0 = %d\n", z0);
647  fprintf(stderr, "(pmeGrid.yBlocks == 1) = %d\n", (pmeGrid.yBlocks == 1));
648  fprintf(stderr, "(pmeGrid.zBlocks == 1) = %d\n", (pmeGrid.zBlocks == 1));
649  fprintf(stderr, "pmeGrid.order = %d\n", pmeGrid.order);
650 #endif
651  spread_charge((const float4*)d_atoms, numAtoms,
653  xsize, y0, z0, (pmeGrid.yBlocks == 1), (pmeGrid.zBlocks == 1),
654  data, pmeGrid.order, stream);
655 #ifdef TESTPID
656  if (1) {
657  cudaCheck(cudaStreamSynchronize(stream));
658  fprintf(stderr, "AP SPREAD CHARGES\n");
659  fprintf(stderr, "COPY DEVICE ARRAYS BACK TO HOST\n");
660  float *xyzq;
661  allocate_host<float>(&xyzq, 4*numAtoms);
662  copy_DtoH<float>((float *)d_atoms, xyzq, 4*numAtoms, stream);
663  int gridlen = pmeGrid.K1 * pmeGrid.K2 * pmeGrid.K3;
664  float *grid;
665  allocate_host<float>(&grid, gridlen);
666  copy_DtoH<float>(data, grid, gridlen, stream);
667  cudaCheck(cudaStreamSynchronize(stream));
668  TestArray_write<float>("xyzq_good.bin", "xyzq good", xyzq, 4*numAtoms);
669  TestArray_write<float>("charge_grid_good.bin", "charge grid good",
670  grid, gridlen);
671  deallocate_host<float>(&xyzq);
672  deallocate_host<float>(&grid);
673  }
674 #endif
675 
676  // ncall++;
677 
678  // if (ncall == 1) writeRealToDisk(data, xsize*ysize*zsize, "data.txt");
679  NAMD_EVENT_STOP(1, NamdProfileEvent::SPREAD_CHARGE);
680 }
int zBlocks
Definition: PmeBase.h:25
#define NAMD_EVENT_STOP(eon, id)
int K2
Definition: PmeBase.h:21
int K1
Definition: PmeBase.h:21
int yBlocks
Definition: PmeBase.h:25
#define NAMD_EVENT_START(eon, id)
int order
Definition: PmeBase.h:23
int K3
Definition: PmeBase.h:21
void spread_charge(const float4 *atoms, const int numAtoms, const int nfftx, const int nffty, const int nfftz, const int xsize, const int ysize, const int zsize, const int xdim, const int y00, const int z00, const bool periodicY, const bool periodicZ, float *data, const int order, cudaStream_t stream)
#define cudaCheck(stmt)
Definition: CudaUtils.h:233

◆ waitGatherForceDone()

void CudaPmeRealSpaceCompute::waitGatherForceDone ( )

Definition at line 724 of file CudaPmeSolverUtil.C.

References cudaCheck.

724  {
725  cudaCheck(cudaSetDevice(deviceID));
726  cudaCheck(cudaEventSynchronize(gatherForceEvent));
727 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:233

The documentation for this class was generated from the following files: