NAMD
Classes | Public Member Functions | List of all members
CudaPmeKSpaceCompute Class Reference

#include <CudaPmeSolverUtil.h>

Inheritance diagram for CudaPmeKSpaceCompute:
PmeKSpaceCompute

Public Member Functions

 CudaPmeKSpaceCompute (PmeGrid pmeGrid, const int permutation, const int jblock, const int kblock, double kappa, int deviceID, cudaStream_t stream, unsigned int iGrid=0)
 
 ~CudaPmeKSpaceCompute ()
 
void solve (Lattice &lattice, const bool doEnergy, const bool doVirial, float *data)
 
void waitEnergyAndVirial ()
 
double getEnergy ()
 
void getVirial (double *virial)
 
void energyAndVirialSetCallback (CudaPmePencilXYZ *pencilPtr)
 
void energyAndVirialSetCallback (CudaPmePencilZ *pencilPtr)
 
- Public Member Functions inherited from PmeKSpaceCompute
 PmeKSpaceCompute (PmeGrid pmeGrid, const int permutation, const int jblock, const int kblock, double kappa, unsigned int multipleGridIndex=0)
 
virtual ~PmeKSpaceCompute ()
 
virtual void setGrid (unsigned int iGrid)
 

Additional Inherited Members

- Protected Attributes inherited from PmeKSpaceCompute
PmeGrid pmeGrid
 
double * bm1
 
double * bm2
 
double * bm3
 
double kappa
 
const int permutation
 
const int jblock
 
const int kblock
 
int size1
 
int size2
 
int size3
 
int j0
 
int k0
 
unsigned int multipleGridIndex
 

Detailed Description

Definition at line 73 of file CudaPmeSolverUtil.h.

Constructor & Destructor Documentation

◆ CudaPmeKSpaceCompute()

CudaPmeKSpaceCompute::CudaPmeKSpaceCompute ( PmeGrid  pmeGrid,
const int  permutation,
const int  jblock,
const int  kblock,
double  kappa,
int  deviceID,
cudaStream_t  stream,
unsigned int  iGrid = 0 
)

Definition at line 233 of file CudaPmeSolverUtil.C.

References PmeKSpaceCompute::bm1, PmeKSpaceCompute::bm2, PmeKSpaceCompute::bm3, cudaCheck, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, and PmeKSpaceCompute::pmeGrid.

234  :
236  deviceID(deviceID), stream(stream) {
237 
238  cudaCheck(cudaSetDevice(deviceID));
239 
240  // Copy bm1 -> prefac_x on GPU memory
241  float *bm1f = new float[pmeGrid.K1];
242  float *bm2f = new float[pmeGrid.K2];
243  float *bm3f = new float[pmeGrid.K3];
244  for (int i=0;i < pmeGrid.K1;i++) bm1f[i] = (float)bm1[i];
245  for (int i=0;i < pmeGrid.K2;i++) bm2f[i] = (float)bm2[i];
246  for (int i=0;i < pmeGrid.K3;i++) bm3f[i] = (float)bm3[i];
247  allocate_device<float>(&d_bm1, pmeGrid.K1);
248  allocate_device<float>(&d_bm2, pmeGrid.K2);
249  allocate_device<float>(&d_bm3, pmeGrid.K3);
250  copy_HtoD_sync<float>(bm1f, d_bm1, pmeGrid.K1);
251  copy_HtoD_sync<float>(bm2f, d_bm2, pmeGrid.K2);
252  copy_HtoD_sync<float>(bm3f, d_bm3, pmeGrid.K3);
253  delete [] bm1f;
254  delete [] bm2f;
255  delete [] bm3f;
256  allocate_device<EnergyVirial>(&d_energyVirial, 1);
257  allocate_host<EnergyVirial>(&h_energyVirial, 1);
258  // cudaCheck(cudaEventCreateWithFlags(&copyEnergyVirialEvent, cudaEventDisableTiming));
259  cudaCheck(cudaEventCreate(&copyEnergyVirialEvent));
260  // ncall = 0;
261 }
int K2
Definition: PmeBase.h:21
int K1
Definition: PmeBase.h:21
int K3
Definition: PmeBase.h:21
const int permutation
#define cudaCheck(stmt)
Definition: CudaUtils.h:233
PmeKSpaceCompute(PmeGrid pmeGrid, const int permutation, const int jblock, const int kblock, double kappa, unsigned int multipleGridIndex=0)

◆ ~CudaPmeKSpaceCompute()

CudaPmeKSpaceCompute::~CudaPmeKSpaceCompute ( )

Definition at line 263 of file CudaPmeSolverUtil.C.

References cudaCheck.

263  {
264  cudaCheck(cudaSetDevice(deviceID));
265  deallocate_device<float>(&d_bm1);
266  deallocate_device<float>(&d_bm2);
267  deallocate_device<float>(&d_bm3);
268  deallocate_device<EnergyVirial>(&d_energyVirial);
269  deallocate_host<EnergyVirial>(&h_energyVirial);
270  cudaCheck(cudaEventDestroy(copyEnergyVirialEvent));
271 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:233

Member Function Documentation

◆ energyAndVirialSetCallback() [1/2]

void CudaPmeKSpaceCompute::energyAndVirialSetCallback ( CudaPmePencilXYZ pencilPtr)

Definition at line 474 of file CudaPmeSolverUtil.C.

References CcdCallBacksReset(), and cudaCheck.

474  {
475  cudaCheck(cudaSetDevice(deviceID));
476  pencilXYZPtr = pencilPtr;
477  pencilZPtr = NULL;
478  checkCount = 0;
479  CcdCallBacksReset(0, CmiWallTimer());
480  // Set the call back at 0.1ms
481  CcdCallFnAfter(energyAndVirialCheck, this, 0.1);
482 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:233
void CcdCallBacksReset(void *ignored, double curWallTime)

◆ energyAndVirialSetCallback() [2/2]

void CudaPmeKSpaceCompute::energyAndVirialSetCallback ( CudaPmePencilZ pencilPtr)

Definition at line 484 of file CudaPmeSolverUtil.C.

References CcdCallBacksReset(), and cudaCheck.

484  {
485  cudaCheck(cudaSetDevice(deviceID));
486  pencilXYZPtr = NULL;
487  pencilZPtr = pencilPtr;
488  checkCount = 0;
489  CcdCallBacksReset(0, CmiWallTimer());
490  // Set the call back at 0.1ms
491  CcdCallFnAfter(energyAndVirialCheck, this, 0.1);
492 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:233
void CcdCallBacksReset(void *ignored, double curWallTime)

◆ getEnergy()

double CudaPmeKSpaceCompute::getEnergy ( )
virtual

Implements PmeKSpaceCompute.

Definition at line 494 of file CudaPmeSolverUtil.C.

494  {
495  return h_energyVirial->energy;
496 }

◆ getVirial()

void CudaPmeKSpaceCompute::getVirial ( double *  virial)
virtual

Implements PmeKSpaceCompute.

Definition at line 498 of file CudaPmeSolverUtil.C.

References Perm_cX_Y_Z, Perm_Z_cX_Y, and PmeKSpaceCompute::permutation.

498  {
499  if (permutation == Perm_Z_cX_Y) {
500  // h_energyVirial->virial is storing ZZ, ZX, ZY, XX, XY, YY
501  virial[0] = h_energyVirial->virial[3];
502  virial[1] = h_energyVirial->virial[4];
503  virial[2] = h_energyVirial->virial[1];
504 
505  virial[3] = h_energyVirial->virial[4];
506  virial[4] = h_energyVirial->virial[5];
507  virial[5] = h_energyVirial->virial[2];
508 
509  virial[6] = h_energyVirial->virial[1];
510  virial[7] = h_energyVirial->virial[7];
511  virial[8] = h_energyVirial->virial[0];
512  } else if (permutation == Perm_cX_Y_Z) {
513  // h_energyVirial->virial is storing XX, XY, XZ, YY, YZ, ZZ
514  virial[0] = h_energyVirial->virial[0];
515  virial[1] = h_energyVirial->virial[1];
516  virial[2] = h_energyVirial->virial[2];
517 
518  virial[3] = h_energyVirial->virial[1];
519  virial[4] = h_energyVirial->virial[3];
520  virial[5] = h_energyVirial->virial[4];
521 
522  virial[6] = h_energyVirial->virial[2];
523  virial[7] = h_energyVirial->virial[4];
524  virial[8] = h_energyVirial->virial[5];
525  }
526 #if 0
527  fprintf(stderr, "AP PME VIRIAL =\n"
528  " %g %g %g\n %g %g %g\n %g %g %g\n",
529  virial[0], virial[1], virial[2], virial[3], virial[4],
530  virial[5], virial[6], virial[7], virial[8]);
531 #endif
532 }
const int permutation

◆ solve()

void CudaPmeKSpaceCompute::solve ( Lattice lattice,
const bool  doEnergy,
const bool  doVirial,
float *  data 
)
virtual

Implements PmeKSpaceCompute.

Definition at line 273 of file CudaPmeSolverUtil.C.

References Lattice::a(), Lattice::a_r(), Lattice::b(), Lattice::b_r(), PmeKSpaceCompute::bm1, PmeKSpaceCompute::bm2, PmeKSpaceCompute::bm3, Lattice::c(), Lattice::c_r(), cudaCheck, PmeKSpaceCompute::j0, PmeKSpaceCompute::k0, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, PmeKSpaceCompute::kappa, NAMD_bug(), Perm_cX_Y_Z, Perm_Z_cX_Y, PmeKSpaceCompute::permutation, PmeKSpaceCompute::pmeGrid, scalar_sum(), PmeKSpaceCompute::size1, PmeKSpaceCompute::size2, PmeKSpaceCompute::size3, Lattice::volume(), Vector::x, Vector::y, and Vector::z.

273  {
274 #if 0
275  // Check lattice to make sure it is updating for constant pressure
276  fprintf(stderr, "K-SPACE LATTICE %g %g %g %g %g %g %g %g %g\n",
277  lattice.a().x, lattice.a().y, lattice.a().z,
278  lattice.b().x, lattice.b().y, lattice.b().z,
279  lattice.c().x, lattice.c().y, lattice.c().z);
280 #endif
281  cudaCheck(cudaSetDevice(deviceID));
282 
283  const bool doEnergyVirial = (doEnergy || doVirial);
284 
285  int nfft1, nfft2, nfft3;
286  float *prefac1, *prefac2, *prefac3;
287 
288  BigReal volume = lattice.volume();
289  Vector a_r = lattice.a_r();
290  Vector b_r = lattice.b_r();
291  Vector c_r = lattice.c_r();
292  float recip1x, recip1y, recip1z;
293  float recip2x, recip2y, recip2z;
294  float recip3x, recip3y, recip3z;
295 
296  if (permutation == Perm_Z_cX_Y) {
297  // Z, X, Y
298  nfft1 = pmeGrid.K3;
299  nfft2 = pmeGrid.K1;
300  nfft3 = pmeGrid.K2;
301  prefac1 = d_bm3;
302  prefac2 = d_bm1;
303  prefac3 = d_bm2;
304  recip1x = c_r.z;
305  recip1y = c_r.x;
306  recip1z = c_r.y;
307  recip2x = a_r.z;
308  recip2y = a_r.x;
309  recip2z = a_r.y;
310  recip3x = b_r.z;
311  recip3y = b_r.x;
312  recip3z = b_r.y;
313  } else if (permutation == Perm_cX_Y_Z) {
314  // X, Y, Z
315  nfft1 = pmeGrid.K1;
316  nfft2 = pmeGrid.K2;
317  nfft3 = pmeGrid.K3;
318  prefac1 = d_bm1;
319  prefac2 = d_bm2;
320  prefac3 = d_bm3;
321  recip1x = a_r.x;
322  recip1y = a_r.y;
323  recip1z = a_r.z;
324  recip2x = b_r.x;
325  recip2y = b_r.y;
326  recip2z = b_r.z;
327  recip3x = c_r.x;
328  recip3y = c_r.y;
329  recip3z = c_r.z;
330  } else {
331  NAMD_bug("CudaPmeKSpaceCompute::solve, invalid permutation");
332  }
333 
334  // ncall++;
335  // if (ncall == 1) {
336  // char filename[256];
337  // sprintf(filename,"dataf_%d_%d.txt",jblock,kblock);
338  // writeComplexToDisk((float2*)data, size1*size2*size3, filename, stream);
339  // }
340 
341  // if (ncall == 1) {
342  // float2* h_data = new float2[size1*size2*size3];
343  // float2* d_data = (float2*)data;
344  // copy_DtoH<float2>(d_data, h_data, size1*size2*size3, stream);
345  // cudaCheck(cudaStreamSynchronize(stream));
346  // FILE *handle = fopen("dataf.txt", "w");
347  // for (int z=0;z < pmeGrid.K3;z++) {
348  // for (int y=0;y < pmeGrid.K2;y++) {
349  // for (int x=0;x < pmeGrid.K1/2+1;x++) {
350  // int i;
351  // if (permutation == Perm_cX_Y_Z) {
352  // i = x + y*size1 + z*size1*size2;
353  // } else {
354  // i = z + x*size1 + y*size1*size2;
355  // }
356  // fprintf(handle, "%f %f\n", h_data[i].x, h_data[i].y);
357  // }
358  // }
359  // }
360  // fclose(handle);
361  // delete [] h_data;
362  // }
363 
364  // Clear energy and virial array if needed
365  if (doEnergyVirial) clear_device_array<EnergyVirial>(d_energyVirial, 1, stream);
366 
367 #ifdef TESTPID
368  if (1) {
369  cudaCheck(cudaStreamSynchronize(stream));
370  fprintf(stderr, "AP calling scalar sum\n");
371  fprintf(stderr, "(permutation == Perm_cX_Y_Z) = %s\n",
372  (permutation == Perm_cX_Y_Z ? "true" : "false"));
373  fprintf(stderr, "nfft1=%d nfft2=%d nfft3=%d\n", nfft1, nfft2, nfft3);
374  fprintf(stderr, "size1=%d size2=%d size3=%d\n", size1, size2, size3);
375  fprintf(stderr, "kappa=%g\n", kappa);
376  fprintf(stderr, "recip1x=%g recip1y=%g recip1z=%g\n",
377  (double)recip1x, (double)recip1y, (double)recip1z);
378  fprintf(stderr, "recip2x=%g recip2y=%g recip2z=%g\n",
379  (double)recip2x, (double)recip2y, (double)recip2z);
380  fprintf(stderr, "recip3x=%g recip3y=%g recip3z=%g\n",
381  (double)recip3x, (double)recip3y, (double)recip3z);
382  fprintf(stderr, "volume=%g\n", volume);
383  fprintf(stderr, "j0=%d k0=%d\n", j0, k0);
384  float *bm1, *bm2, *bm3;
385  allocate_host<float>(&bm1, nfft1);
386  allocate_host<float>(&bm2, nfft2);
387  allocate_host<float>(&bm3, nfft3);
388  copy_DtoH<float>(prefac1, bm1, nfft1, stream);
389  copy_DtoH<float>(prefac2, bm2, nfft2, stream);
390  copy_DtoH<float>(prefac3, bm3, nfft3, stream);
391  TestArray_write<float>("bm1_good.bin", "structure factor bm1 good",
392  bm1, nfft1);
393  TestArray_write<float>("bm2_good.bin", "structure factor bm2 good",
394  bm2, nfft2);
395  TestArray_write<float>("bm3_good.bin", "structure factor bm3 good",
396  bm3, nfft3);
397  deallocate_host<float>(&bm1);
398  deallocate_host<float>(&bm2);
399  deallocate_host<float>(&bm3);
400  }
401 #endif
402 
403  scalar_sum(permutation == Perm_cX_Y_Z, nfft1, nfft2, nfft3, size1, size2, size3, kappa,
404  recip1x, recip1y, recip1z, recip2x, recip2y, recip2z, recip3x, recip3y, recip3z,
405  volume, prefac1, prefac2, prefac3, j0, k0, doEnergyVirial,
406  &d_energyVirial->energy, d_energyVirial->virial, (float2*)data,
407  stream);
408 #ifdef TESTPID
409  if (1) {
410  cudaCheck(cudaStreamSynchronize(stream));
411  fprintf(stderr, "AP SCALAR SUM\n");
412  fprintf(stderr, "COPY DEVICE ARRAYS BACK TO HOST\n");
413  int m = 2 * (nfft1/2 + 1) * nfft2 * nfft3;
414  float *tran = 0;
415  allocate_host<float>(&tran, m);
416  copy_DtoH<float>((float*)data, tran, m, stream);
417  cudaCheck(cudaStreamSynchronize(stream));
418  TestArray_write<float>("tran_potential_grid_good.bin",
419  "transformed potential grid good", tran, m);
420  deallocate_host<float>(&tran);
421  }
422 #endif
423 
424  // Copy energy and virial to host if needed
425  if (doEnergyVirial) {
426  copy_DtoH<EnergyVirial>(d_energyVirial, h_energyVirial, 1, stream);
427  cudaCheck(cudaEventRecord(copyEnergyVirialEvent, stream));
428  // cudaCheck(cudaStreamSynchronize(stream));
429  }
430 
431 }
NAMD_HOST_DEVICE Vector c() const
Definition: Lattice.h:270
Definition: Vector.h:72
int K2
Definition: PmeBase.h:21
int K1
Definition: PmeBase.h:21
BigReal z
Definition: Vector.h:74
void scalar_sum(const bool orderXYZ, const int nfft1, const int nfft2, const int nfft3, const int size1, const int size2, const int size3, const double kappa, const float recip1x, const float recip1y, const float recip1z, const float recip2x, const float recip2y, const float recip2z, const float recip3x, const float recip3y, const float recip3z, const double volume, const float *prefac1, const float *prefac2, const float *prefac3, const int k2_00, const int k3_00, const bool doEnergyVirial, double *energy, double *virial, float2 *data, cudaStream_t stream)
void NAMD_bug(const char *err_msg)
Definition: common.C:195
BigReal x
Definition: Vector.h:74
NAMD_HOST_DEVICE BigReal volume(void) const
Definition: Lattice.h:293
NAMD_HOST_DEVICE Vector a_r() const
Definition: Lattice.h:284
NAMD_HOST_DEVICE Vector b_r() const
Definition: Lattice.h:285
NAMD_HOST_DEVICE Vector c_r() const
Definition: Lattice.h:286
NAMD_HOST_DEVICE Vector b() const
Definition: Lattice.h:269
int K3
Definition: PmeBase.h:21
const int permutation
BigReal y
Definition: Vector.h:74
#define cudaCheck(stmt)
Definition: CudaUtils.h:233
NAMD_HOST_DEVICE Vector a() const
Definition: Lattice.h:268
double BigReal
Definition: common.h:123

◆ waitEnergyAndVirial()

void CudaPmeKSpaceCompute::waitEnergyAndVirial ( )

The documentation for this class was generated from the following files: