CudaPmeKSpaceCompute Class Reference

#include <CudaPmeSolverUtil.h>

Inheritance diagram for CudaPmeKSpaceCompute:

PmeKSpaceCompute List of all members.

Public Member Functions

 CudaPmeKSpaceCompute (PmeGrid pmeGrid, const int permutation, const int jblock, const int kblock, double kappa, int deviceID, cudaStream_t stream)
 ~CudaPmeKSpaceCompute ()
void solve (Lattice &lattice, const bool doEnergy, const bool doVirial, float *data)
double getEnergy ()
void getVirial (double *virial)
void energyAndVirialSetCallback (CudaPmePencilXYZ *pencilPtr)
void energyAndVirialSetCallback (CudaPmePencilZ *pencilPtr)

Classes

struct  EnergyVirial

Detailed Description

Definition at line 59 of file CudaPmeSolverUtil.h.


Constructor & Destructor Documentation

CudaPmeKSpaceCompute::CudaPmeKSpaceCompute ( PmeGrid  pmeGrid,
const int  permutation,
const int  jblock,
const int  kblock,
double  kappa,
int  deviceID,
cudaStream_t  stream 
)

Definition at line 201 of file CudaPmeSolverUtil.C.

References PmeKSpaceCompute::bm1, PmeKSpaceCompute::bm2, PmeKSpaceCompute::bm3, cudaCheck, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, and PmeKSpaceCompute::pmeGrid.

00202                                                                                        : 
00203   PmeKSpaceCompute(pmeGrid, permutation, jblock, kblock, kappa),
00204   deviceID(deviceID), stream(stream) {
00205 
00206   cudaCheck(cudaSetDevice(deviceID));
00207 
00208   // Copy bm1 -> prefac_x on GPU memory
00209   float *bm1f = new float[pmeGrid.K1];
00210   float *bm2f = new float[pmeGrid.K2];
00211   float *bm3f = new float[pmeGrid.K3];
00212   for (int i=0;i < pmeGrid.K1;i++) bm1f[i] = (float)bm1[i];
00213   for (int i=0;i < pmeGrid.K2;i++) bm2f[i] = (float)bm2[i];
00214   for (int i=0;i < pmeGrid.K3;i++) bm3f[i] = (float)bm3[i];
00215   allocate_device<float>(&d_bm1, pmeGrid.K1);
00216   allocate_device<float>(&d_bm2, pmeGrid.K2);
00217   allocate_device<float>(&d_bm3, pmeGrid.K3);
00218   copy_HtoD_sync<float>(bm1f, d_bm1, pmeGrid.K1);
00219   copy_HtoD_sync<float>(bm2f, d_bm2, pmeGrid.K2);
00220   copy_HtoD_sync<float>(bm3f, d_bm3, pmeGrid.K3);
00221   delete [] bm1f;
00222   delete [] bm2f;
00223   delete [] bm3f;
00224   allocate_device<EnergyVirial>(&d_energyVirial, 1);
00225   allocate_host<EnergyVirial>(&h_energyVirial, 1);
00226   // cudaCheck(cudaEventCreateWithFlags(&copyEnergyVirialEvent, cudaEventDisableTiming));
00227   cudaCheck(cudaEventCreate(&copyEnergyVirialEvent));
00228   // ncall = 0;
00229 }

CudaPmeKSpaceCompute::~CudaPmeKSpaceCompute (  ) 

Definition at line 231 of file CudaPmeSolverUtil.C.

References cudaCheck.

00231                                             {
00232   cudaCheck(cudaSetDevice(deviceID));
00233   deallocate_device<float>(&d_bm1);
00234   deallocate_device<float>(&d_bm2);
00235   deallocate_device<float>(&d_bm3);
00236   deallocate_device<EnergyVirial>(&d_energyVirial);
00237   deallocate_host<EnergyVirial>(&h_energyVirial);
00238   cudaCheck(cudaEventDestroy(copyEnergyVirialEvent));
00239 }


Member Function Documentation

void CudaPmeKSpaceCompute::energyAndVirialSetCallback ( CudaPmePencilZ pencilPtr  ) 

Definition at line 388 of file CudaPmeSolverUtil.C.

References CcdCallBacksReset(), and cudaCheck.

00388                                                                                {
00389   cudaCheck(cudaSetDevice(deviceID));
00390   pencilXYZPtr = NULL;
00391   pencilZPtr = pencilPtr;
00392   checkCount = 0;
00393   CcdCallBacksReset(0, CmiWallTimer());
00394   // Set the call back at 0.1ms
00395   CcdCallFnAfter(energyAndVirialCheck, this, 0.1);
00396 }

void CudaPmeKSpaceCompute::energyAndVirialSetCallback ( CudaPmePencilXYZ pencilPtr  ) 

Definition at line 378 of file CudaPmeSolverUtil.C.

References CcdCallBacksReset(), and cudaCheck.

00378                                                                                  {
00379   cudaCheck(cudaSetDevice(deviceID));
00380   pencilXYZPtr = pencilPtr;
00381   pencilZPtr = NULL;
00382   checkCount = 0;
00383   CcdCallBacksReset(0, CmiWallTimer());
00384   // Set the call back at 0.1ms
00385   CcdCallFnAfter(energyAndVirialCheck, this, 0.1);
00386 }

double CudaPmeKSpaceCompute::getEnergy (  )  [virtual]

Implements PmeKSpaceCompute.

Definition at line 398 of file CudaPmeSolverUtil.C.

00398                                        {
00399   return h_energyVirial->energy;
00400 }

void CudaPmeKSpaceCompute::getVirial ( double *  virial  )  [virtual]

Implements PmeKSpaceCompute.

Definition at line 402 of file CudaPmeSolverUtil.C.

References Perm_cX_Y_Z, Perm_Z_cX_Y, and PmeKSpaceCompute::permutation.

00402                                                    {
00403   if (permutation == Perm_Z_cX_Y) {
00404     // h_energyVirial->virial is storing ZZ, ZX, ZY, XX, XY, YY
00405     virial[0] = h_energyVirial->virial[3];
00406     virial[1] = h_energyVirial->virial[4];
00407     virial[2] = h_energyVirial->virial[1];
00408 
00409     virial[3] = h_energyVirial->virial[4];
00410     virial[4] = h_energyVirial->virial[5];
00411     virial[5] = h_energyVirial->virial[2];
00412 
00413     virial[6] = h_energyVirial->virial[1];
00414     virial[7] = h_energyVirial->virial[7];
00415     virial[8] = h_energyVirial->virial[0];
00416   } else if (permutation == Perm_cX_Y_Z) {
00417     // h_energyVirial->virial is storing XX, XY, XZ, YY, YZ, ZZ
00418     virial[0] = h_energyVirial->virial[0];
00419     virial[1] = h_energyVirial->virial[1];
00420     virial[2] = h_energyVirial->virial[2];
00421 
00422     virial[3] = h_energyVirial->virial[1];
00423     virial[4] = h_energyVirial->virial[3];
00424     virial[5] = h_energyVirial->virial[4];
00425 
00426     virial[6] = h_energyVirial->virial[2];
00427     virial[7] = h_energyVirial->virial[4];
00428     virial[8] = h_energyVirial->virial[5];
00429   }
00430 }

void CudaPmeKSpaceCompute::solve ( Lattice lattice,
const bool  doEnergy,
const bool  doVirial,
float *  data 
) [virtual]

Implements PmeKSpaceCompute.

Definition at line 241 of file CudaPmeSolverUtil.C.

References Lattice::a_r(), Lattice::b_r(), Lattice::c_r(), cudaCheck, PmeKSpaceCompute::j0, PmeKSpaceCompute::k0, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, PmeKSpaceCompute::kappa, NAMD_bug(), Perm_cX_Y_Z, Perm_Z_cX_Y, PmeKSpaceCompute::permutation, PmeKSpaceCompute::pmeGrid, scalar_sum(), PmeKSpaceCompute::size1, PmeKSpaceCompute::size2, PmeKSpaceCompute::size3, Lattice::volume(), Vector::x, Vector::y, and Vector::z.

00241                                                                                                         {
00242   cudaCheck(cudaSetDevice(deviceID));
00243 
00244   const bool doEnergyVirial = (doEnergy || doVirial);
00245 
00246   int nfft1, nfft2, nfft3;
00247   float *prefac1, *prefac2, *prefac3;
00248 
00249   BigReal volume = lattice.volume();
00250   Vector a_r = lattice.a_r();
00251   Vector b_r = lattice.b_r();
00252   Vector c_r = lattice.c_r();
00253   float recip1x, recip1y, recip1z;
00254   float recip2x, recip2y, recip2z;
00255   float recip3x, recip3y, recip3z;
00256 
00257   if (permutation == Perm_Z_cX_Y) {
00258     // Z, X, Y
00259     nfft1 = pmeGrid.K3;
00260     nfft2 = pmeGrid.K1;
00261     nfft3 = pmeGrid.K2;
00262     prefac1 = d_bm3;
00263     prefac2 = d_bm1;
00264     prefac3 = d_bm2;
00265     recip1x = c_r.z;
00266     recip1y = c_r.x;
00267     recip1z = c_r.y;
00268     recip2x = a_r.z;
00269     recip2y = a_r.x;
00270     recip2z = a_r.y;
00271     recip3x = b_r.z;
00272     recip3y = b_r.x;
00273     recip3z = b_r.y;
00274   } else if (permutation == Perm_cX_Y_Z) {
00275     // X, Y, Z
00276     nfft1 = pmeGrid.K1;
00277     nfft2 = pmeGrid.K2;
00278     nfft3 = pmeGrid.K3;
00279     prefac1 = d_bm1;
00280     prefac2 = d_bm2;
00281     prefac3 = d_bm3;
00282     recip1x = a_r.x;
00283     recip1y = a_r.y;
00284     recip1z = a_r.z;
00285     recip2x = b_r.x;
00286     recip2y = b_r.y;
00287     recip2z = b_r.z;
00288     recip3x = c_r.x;
00289     recip3y = c_r.y;
00290     recip3z = c_r.z;
00291   } else {
00292     NAMD_bug("CudaPmeKSpaceCompute::solve, invalid permutation");
00293   }
00294 
00295   // ncall++;
00296   // if (ncall == 1) {
00297   //   char filename[256];
00298   //   sprintf(filename,"dataf_%d_%d.txt",jblock,kblock);
00299   //   writeComplexToDisk((float2*)data, size1*size2*size3, filename, stream);
00300   // }
00301 
00302   // if (ncall == 1) {
00303   //   float2* h_data = new float2[size1*size2*size3];
00304   //   float2* d_data = (float2*)data;
00305   //   copy_DtoH<float2>(d_data, h_data, size1*size2*size3, stream);
00306   //   cudaCheck(cudaStreamSynchronize(stream));
00307   //   FILE *handle = fopen("dataf.txt", "w");
00308   //   for (int z=0;z < pmeGrid.K3;z++) {
00309   //     for (int y=0;y < pmeGrid.K2;y++) {
00310   //       for (int x=0;x < pmeGrid.K1/2+1;x++) {
00311   //         int i;
00312   //         if (permutation == Perm_cX_Y_Z) {
00313   //           i = x + y*size1 + z*size1*size2;
00314   //         } else {
00315   //           i = z + x*size1 + y*size1*size2;
00316   //         }
00317   //         fprintf(handle, "%f %f\n", h_data[i].x, h_data[i].y);
00318   //       }
00319   //     }
00320   //   }
00321   //   fclose(handle);
00322   //   delete [] h_data;
00323   // }
00324 
00325   // Clear energy and virial array if needed
00326   if (doEnergyVirial) clear_device_array<EnergyVirial>(d_energyVirial, 1, stream);
00327 
00328   scalar_sum(permutation == Perm_cX_Y_Z, nfft1, nfft2, nfft3, size1, size2, size3, kappa,
00329     recip1x, recip1y, recip1z, recip2x, recip2y, recip2z, recip3x, recip3y, recip3z,
00330     volume, prefac1, prefac2, prefac3, j0, k0, doEnergyVirial,
00331     &d_energyVirial->energy, d_energyVirial->virial, (float2*)data, 
00332     stream);
00333 
00334   // Copy energy and virial to host if needed
00335   if (doEnergyVirial) {
00336     copy_DtoH<EnergyVirial>(d_energyVirial, h_energyVirial, 1, stream);
00337     cudaCheck(cudaEventRecord(copyEnergyVirialEvent, stream));
00338     // cudaCheck(cudaStreamSynchronize(stream));
00339   }
00340 
00341 }


The documentation for this class was generated from the following files:
Generated on Mon Sep 25 01:17:17 2017 for NAMD by  doxygen 1.4.7