CudaPmeKSpaceCompute Class Reference

#include <CudaPmeSolverUtil.h>

Inheritance diagram for CudaPmeKSpaceCompute:

PmeKSpaceCompute List of all members.

Public Member Functions

 CudaPmeKSpaceCompute (PmeGrid pmeGrid, const int permutation, const int jblock, const int kblock, double kappa, int deviceID, cudaStream_t stream)
 ~CudaPmeKSpaceCompute ()
void solve (Lattice &lattice, const bool doEnergy, const bool doVirial, float *data)
double getEnergy ()
void getVirial (double *virial)
void energyAndVirialSetCallback (CudaPmePencilXYZ *pencilPtr)
void energyAndVirialSetCallback (CudaPmePencilZ *pencilPtr)

Classes

struct  EnergyVirial

Detailed Description

Definition at line 59 of file CudaPmeSolverUtil.h.


Constructor & Destructor Documentation

CudaPmeKSpaceCompute::CudaPmeKSpaceCompute ( PmeGrid  pmeGrid,
const int  permutation,
const int  jblock,
const int  kblock,
double  kappa,
int  deviceID,
cudaStream_t  stream 
)

Definition at line 191 of file CudaPmeSolverUtil.C.

References PmeKSpaceCompute::bm1, PmeKSpaceCompute::bm2, PmeKSpaceCompute::bm3, cudaCheck, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, and PmeKSpaceCompute::pmeGrid.

00192                                                                                        : 
00193   PmeKSpaceCompute(pmeGrid, permutation, jblock, kblock, kappa),
00194   deviceID(deviceID), stream(stream) {
00195 
00196   cudaCheck(cudaSetDevice(deviceID));
00197 
00198   // Copy bm1 -> prefac_x on GPU memory
00199   float *bm1f = new float[pmeGrid.K1];
00200   float *bm2f = new float[pmeGrid.K2];
00201   float *bm3f = new float[pmeGrid.K3];
00202   for (int i=0;i < pmeGrid.K1;i++) bm1f[i] = (float)bm1[i];
00203   for (int i=0;i < pmeGrid.K2;i++) bm2f[i] = (float)bm2[i];
00204   for (int i=0;i < pmeGrid.K3;i++) bm3f[i] = (float)bm3[i];
00205   allocate_device<float>(&d_bm1, pmeGrid.K1);
00206   allocate_device<float>(&d_bm2, pmeGrid.K2);
00207   allocate_device<float>(&d_bm3, pmeGrid.K3);
00208   copy_HtoD_sync<float>(bm1f, d_bm1, pmeGrid.K1);
00209   copy_HtoD_sync<float>(bm2f, d_bm2, pmeGrid.K2);
00210   copy_HtoD_sync<float>(bm3f, d_bm3, pmeGrid.K3);
00211   delete [] bm1f;
00212   delete [] bm2f;
00213   delete [] bm3f;
00214   allocate_device<EnergyVirial>(&d_energyVirial, 1);
00215   allocate_host<EnergyVirial>(&h_energyVirial, 1);
00216   // cudaCheck(cudaEventCreateWithFlags(&copyEnergyVirialEvent, cudaEventDisableTiming));
00217   cudaCheck(cudaEventCreate(&copyEnergyVirialEvent));
00218   // ncall = 0;
00219 }

CudaPmeKSpaceCompute::~CudaPmeKSpaceCompute (  ) 

Definition at line 221 of file CudaPmeSolverUtil.C.

References cudaCheck.

00221                                             {
00222   cudaCheck(cudaSetDevice(deviceID));
00223   deallocate_device<float>(&d_bm1);
00224   deallocate_device<float>(&d_bm2);
00225   deallocate_device<float>(&d_bm3);
00226   deallocate_device<EnergyVirial>(&d_energyVirial);
00227   deallocate_host<EnergyVirial>(&h_energyVirial);
00228   cudaCheck(cudaEventDestroy(copyEnergyVirialEvent));
00229 }


Member Function Documentation

void CudaPmeKSpaceCompute::energyAndVirialSetCallback ( CudaPmePencilZ pencilPtr  ) 

Definition at line 378 of file CudaPmeSolverUtil.C.

References CcdCallBacksReset(), and cudaCheck.

00378                                                                                {
00379   cudaCheck(cudaSetDevice(deviceID));
00380   pencilXYZPtr = NULL;
00381   pencilZPtr = pencilPtr;
00382   checkCount = 0;
00383   CcdCallBacksReset(0, CmiWallTimer());
00384   // Set the call back at 0.1ms
00385   CcdCallFnAfter(energyAndVirialCheck, this, 0.1);
00386 }

void CudaPmeKSpaceCompute::energyAndVirialSetCallback ( CudaPmePencilXYZ pencilPtr  ) 

Definition at line 368 of file CudaPmeSolverUtil.C.

References CcdCallBacksReset(), and cudaCheck.

00368                                                                                  {
00369   cudaCheck(cudaSetDevice(deviceID));
00370   pencilXYZPtr = pencilPtr;
00371   pencilZPtr = NULL;
00372   checkCount = 0;
00373   CcdCallBacksReset(0, CmiWallTimer());
00374   // Set the call back at 0.1ms
00375   CcdCallFnAfter(energyAndVirialCheck, this, 0.1);
00376 }

double CudaPmeKSpaceCompute::getEnergy (  )  [virtual]

Implements PmeKSpaceCompute.

Definition at line 388 of file CudaPmeSolverUtil.C.

00388                                        {
00389   return h_energyVirial->energy;
00390 }

void CudaPmeKSpaceCompute::getVirial ( double *  virial  )  [virtual]

Implements PmeKSpaceCompute.

Definition at line 392 of file CudaPmeSolverUtil.C.

References Perm_cX_Y_Z, Perm_Z_cX_Y, and PmeKSpaceCompute::permutation.

00392                                                    {
00393   if (permutation == Perm_Z_cX_Y) {
00394     // h_energyVirial->virial is storing ZZ, ZX, ZY, XX, XY, YY
00395     virial[0] = h_energyVirial->virial[3];
00396     virial[1] = h_energyVirial->virial[4];
00397     virial[2] = h_energyVirial->virial[1];
00398 
00399     virial[3] = h_energyVirial->virial[4];
00400     virial[4] = h_energyVirial->virial[5];
00401     virial[5] = h_energyVirial->virial[2];
00402 
00403     virial[6] = h_energyVirial->virial[1];
00404     virial[7] = h_energyVirial->virial[7];
00405     virial[8] = h_energyVirial->virial[0];
00406   } else if (permutation == Perm_cX_Y_Z) {
00407     // h_energyVirial->virial is storing XX, XY, XZ, YY, YZ, ZZ
00408     virial[0] = h_energyVirial->virial[0];
00409     virial[1] = h_energyVirial->virial[1];
00410     virial[2] = h_energyVirial->virial[2];
00411 
00412     virial[3] = h_energyVirial->virial[1];
00413     virial[4] = h_energyVirial->virial[3];
00414     virial[5] = h_energyVirial->virial[4];
00415 
00416     virial[6] = h_energyVirial->virial[2];
00417     virial[7] = h_energyVirial->virial[4];
00418     virial[8] = h_energyVirial->virial[5];
00419   }
00420 }

void CudaPmeKSpaceCompute::solve ( Lattice lattice,
const bool  doEnergy,
const bool  doVirial,
float *  data 
) [virtual]

Implements PmeKSpaceCompute.

Definition at line 231 of file CudaPmeSolverUtil.C.

References Lattice::a_r(), Lattice::b_r(), Lattice::c_r(), cudaCheck, PmeKSpaceCompute::j0, PmeKSpaceCompute::k0, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, PmeKSpaceCompute::kappa, NAMD_bug(), Perm_cX_Y_Z, Perm_Z_cX_Y, PmeKSpaceCompute::permutation, PmeKSpaceCompute::pmeGrid, scalar_sum(), PmeKSpaceCompute::size1, PmeKSpaceCompute::size2, PmeKSpaceCompute::size3, Lattice::volume(), Vector::x, Vector::y, and Vector::z.

00231                                                                                                         {
00232   cudaCheck(cudaSetDevice(deviceID));
00233 
00234   const bool doEnergyVirial = (doEnergy || doVirial);
00235 
00236   int nfft1, nfft2, nfft3;
00237   float *prefac1, *prefac2, *prefac3;
00238 
00239   BigReal volume = lattice.volume();
00240   Vector a_r = lattice.a_r();
00241   Vector b_r = lattice.b_r();
00242   Vector c_r = lattice.c_r();
00243   float recip1x, recip1y, recip1z;
00244   float recip2x, recip2y, recip2z;
00245   float recip3x, recip3y, recip3z;
00246 
00247   if (permutation == Perm_Z_cX_Y) {
00248     // Z, X, Y
00249     nfft1 = pmeGrid.K3;
00250     nfft2 = pmeGrid.K1;
00251     nfft3 = pmeGrid.K2;
00252     prefac1 = d_bm3;
00253     prefac2 = d_bm1;
00254     prefac3 = d_bm2;
00255     recip1x = c_r.z;
00256     recip1y = c_r.x;
00257     recip1z = c_r.y;
00258     recip2x = a_r.z;
00259     recip2y = a_r.x;
00260     recip2z = a_r.y;
00261     recip3x = b_r.z;
00262     recip3y = b_r.x;
00263     recip3z = b_r.y;
00264   } else if (permutation == Perm_cX_Y_Z) {
00265     // X, Y, Z
00266     nfft1 = pmeGrid.K1;
00267     nfft2 = pmeGrid.K2;
00268     nfft3 = pmeGrid.K3;
00269     prefac1 = d_bm1;
00270     prefac2 = d_bm2;
00271     prefac3 = d_bm3;
00272     recip1x = a_r.x;
00273     recip1y = a_r.y;
00274     recip1z = a_r.z;
00275     recip2x = b_r.x;
00276     recip2y = b_r.y;
00277     recip2z = b_r.z;
00278     recip3x = c_r.x;
00279     recip3y = c_r.y;
00280     recip3z = c_r.z;
00281   } else {
00282     NAMD_bug("CudaPmeKSpaceCompute::solve, invalid permutation");
00283   }
00284 
00285   // ncall++;
00286   // if (ncall == 1) {
00287   //   char filename[256];
00288   //   sprintf(filename,"dataf_%d_%d.txt",jblock,kblock);
00289   //   writeComplexToDisk((float2*)data, size1*size2*size3, filename, stream);
00290   // }
00291 
00292   // if (ncall == 1) {
00293   //   float2* h_data = new float2[size1*size2*size3];
00294   //   float2* d_data = (float2*)data;
00295   //   copy_DtoH<float2>(d_data, h_data, size1*size2*size3, stream);
00296   //   cudaCheck(cudaStreamSynchronize(stream));
00297   //   FILE *handle = fopen("dataf.txt", "w");
00298   //   for (int z=0;z < pmeGrid.K3;z++) {
00299   //     for (int y=0;y < pmeGrid.K2;y++) {
00300   //       for (int x=0;x < pmeGrid.K1/2+1;x++) {
00301   //         int i;
00302   //         if (permutation == Perm_cX_Y_Z) {
00303   //           i = x + y*size1 + z*size1*size2;
00304   //         } else {
00305   //           i = z + x*size1 + y*size1*size2;
00306   //         }
00307   //         fprintf(handle, "%f %f\n", h_data[i].x, h_data[i].y);
00308   //       }
00309   //     }
00310   //   }
00311   //   fclose(handle);
00312   //   delete [] h_data;
00313   // }
00314 
00315   // Clear energy and virial array if needed
00316   if (doEnergyVirial) clear_device_array<EnergyVirial>(d_energyVirial, 1, stream);
00317 
00318   scalar_sum(permutation == Perm_cX_Y_Z, nfft1, nfft2, nfft3, size1, size2, size3, kappa,
00319     recip1x, recip1y, recip1z, recip2x, recip2y, recip2z, recip3x, recip3y, recip3z,
00320     volume, prefac1, prefac2, prefac3, j0, k0, doEnergyVirial,
00321     &d_energyVirial->energy, d_energyVirial->virial, (float2*)data, 
00322     stream);
00323 
00324   // Copy energy and virial to host if needed
00325   if (doEnergyVirial) {
00326     copy_DtoH<EnergyVirial>(d_energyVirial, h_energyVirial, 1, stream);
00327     cudaCheck(cudaEventRecord(copyEnergyVirialEvent, stream));
00328     // cudaCheck(cudaStreamSynchronize(stream));
00329   }
00330 
00331 }


The documentation for this class was generated from the following files:
Generated on Tue Oct 16 01:17:19 2018 for NAMD by  doxygen 1.4.7