Difference for src/CudaPmeSolverUtil.C from version 1.2 to 1.3

version 1.2version 1.3
Line 199
Line 199
 //########################################################################### //###########################################################################
  
 CudaPmeKSpaceCompute::CudaPmeKSpaceCompute(PmeGrid pmeGrid, const int permutation, CudaPmeKSpaceCompute::CudaPmeKSpaceCompute(PmeGrid pmeGrid, const int permutation,
   const int jblock, const int kblock, double kappa, int cuda_arch, int deviceID, cudaStream_t stream) :    const int jblock, const int kblock, double kappa, int deviceID, cudaStream_t stream) : 
   PmeKSpaceCompute(pmeGrid, permutation, jblock, kblock, kappa),   PmeKSpaceCompute(pmeGrid, permutation, jblock, kblock, kappa),
   cuda_arch(cuda_arch), deviceID(deviceID), stream(stream) {   deviceID(deviceID), stream(stream) {
  
   cudaCheck(cudaSetDevice(deviceID));   cudaCheck(cudaSetDevice(deviceID));
  
Line 245
Line 245
  
   int nfft1, nfft2, nfft3;   int nfft1, nfft2, nfft3;
   float *prefac1, *prefac2, *prefac3;   float *prefac1, *prefac2, *prefac3;
   float recip1, recip2, recip3; 
  
   Vector recip1v = lattice.a_r();   BigReal volume = lattice.volume();
   Vector recip2v = lattice.b_r();   Vector a_r = lattice.a_r();
   Vector recip3v = lattice.c_r();   Vector b_r = lattice.b_r();
   double recip[9] = {recip1v.x, recip1v.y, recip1v.z,   Vector c_r = lattice.c_r();
     recip2v.x, recip2v.y, recip2v.z,   float recip1x, recip1y, recip1z;
     recip3v.x, recip3v.y, recip3v.z};   float recip2x, recip2y, recip2z;
    float recip3x, recip3y, recip3z;
  
   if (permutation == Perm_Z_cX_Y) {   if (permutation == Perm_Z_cX_Y) {
     // Z, X, Y     // Z, X, Y
Line 262
Line 262
     prefac1 = d_bm3;     prefac1 = d_bm3;
     prefac2 = d_bm1;     prefac2 = d_bm1;
     prefac3 = d_bm2;     prefac3 = d_bm2;
     recip1 = (float)recip[8];     recip1x = c_r.z;
     recip2 = (float)recip[0];     recip1y = c_r.x;
     recip3 = (float)recip[4];     recip1z = c_r.y;
      recip2x = a_r.z;
      recip2y = a_r.x;
      recip2z = a_r.y;
      recip3x = b_r.z;
      recip3y = b_r.x;
      recip3z = b_r.y;
   } else if (permutation == Perm_cX_Y_Z) {   } else if (permutation == Perm_cX_Y_Z) {
     // X, Y, Z     // X, Y, Z
     nfft1 = pmeGrid.K1;     nfft1 = pmeGrid.K1;
Line 273
Line 279
     prefac1 = d_bm1;     prefac1 = d_bm1;
     prefac2 = d_bm2;     prefac2 = d_bm2;
     prefac3 = d_bm3;     prefac3 = d_bm3;
     recip1 = (float)recip[0];     recip1x = a_r.x;
     recip2 = (float)recip[4];     recip1y = a_r.y;
     recip3 = (float)recip[8];     recip1z = a_r.z;
      recip2x = b_r.x;
      recip2y = b_r.y;
      recip2z = b_r.z;
      recip3x = c_r.x;
      recip3y = c_r.y;
      recip3z = c_r.z;
   } else {   } else {
     NAMD_bug("CudaPmeKSpaceCompute::solve, invalid permutation");     NAMD_bug("CudaPmeKSpaceCompute::solve, invalid permutation");
   }   }
  
   ortho = (recip[1] == 0.0 && recip[2] == 0.0 && recip[3] == 0.0 && 
     recip[5] == 0.0 && recip[6] == 0.0 && recip[7] == 0.0); 
  
   if (!ortho) 
     cudaNAMD_bug("CudaPmeKSpaceCompute::solve, only orthorhombic boxes are currently supported"); 
  
   // ncall++;   // ncall++;
   // if (ncall == 1) {   // if (ncall == 1) {
   //   char filename[256];   //   char filename[256];
Line 319
Line 325
   // Clear energy and virial array if needed   // Clear energy and virial array if needed
   if (doEnergyVirial) clear_device_array<EnergyVirial>(d_energyVirial, 1, stream);   if (doEnergyVirial) clear_device_array<EnergyVirial>(d_energyVirial, 1, stream);
  
   scalar_sum_ortho(permutation == Perm_cX_Y_Z, nfft1, nfft2, nfft3, size1, size2, size3, kappa,   scalar_sum(permutation == Perm_cX_Y_Z, nfft1, nfft2, nfft3, size1, size2, size3, kappa,
     recip1, recip2, recip3, prefac1, prefac2, prefac3, j0, k0, doEnergyVirial,     recip1x, recip1y, recip1z, recip2x, recip2y, recip2z, recip3x, recip3y, recip3z,
     &d_energyVirial->energy, d_energyVirial->virial, (float2*)data, cuda_arch, stream);     volume, prefac1, prefac2, prefac3, j0, k0, doEnergyVirial,
      &d_energyVirial->energy, d_energyVirial->virial, (float2*)data, 
      stream);
  
   // Copy energy and virial to host if needed   // Copy energy and virial to host if needed
   if (doEnergyVirial) {   if (doEnergyVirial) {
Line 330
Line 338
     // cudaCheck(cudaStreamSynchronize(stream));     // cudaCheck(cudaStreamSynchronize(stream));
   }   }
  
   // if (ncall == 1) { 
   //   float2* h_data = new float2[size1*size2*size3]; 
   //   float2* d_data = (float2*)data; 
   //   copy_DtoH<float2>(d_data, h_data, size1*size2*size3, stream); 
   //   cudaCheck(cudaStreamSynchronize(stream)); 
   //   FILE *handle = fopen("datafs.txt", "w"); 
   //   for (int i=0;i < size1*size2*size3;i++) { 
   //     fprintf(handle, "%f %f\n", h_data[i].x, h_data[i].y); 
   //   } 
   //   fclose(handle); 
   //   delete [] h_data; 
   // } 
  
   //fprintf(stderr, "CudaPmeKSpaceCompute::solve()\n"); 
 } }
  
 // void CudaPmeKSpaceCompute::waitEnergyAndVirial() { // void CudaPmeKSpaceCompute::waitEnergyAndVirial() {
Line 406
Line 400
 } }
  
 void CudaPmeKSpaceCompute::getVirial(double *virial) { void CudaPmeKSpaceCompute::getVirial(double *virial) {
   if (ortho) { 
     if (permutation == Perm_Z_cX_Y) {     if (permutation == Perm_Z_cX_Y) {
       // h_energyVirial->virial is storing ZZ, ZX, ZY, XX, XY, YY       // h_energyVirial->virial is storing ZZ, ZX, ZY, XX, XY, YY
       virial[0] = h_energyVirial->virial[3];       virial[0] = h_energyVirial->virial[3];
Line 434
Line 427
       virial[7] = h_energyVirial->virial[4];       virial[7] = h_energyVirial->virial[4];
       virial[8] = h_energyVirial->virial[5];       virial[8] = h_energyVirial->virial[5];
     }     }
   } else { 
     NAMD_bug("CudaPmeKSpaceCompute::getVirial, only orthorhombic boxes are currently supported"); 
   } 
 } }
  
  
Line 448
Line 438
 // Class constructor // Class constructor
 // //
 CudaPmeRealSpaceCompute::CudaPmeRealSpaceCompute(PmeGrid pmeGrid, CudaPmeRealSpaceCompute::CudaPmeRealSpaceCompute(PmeGrid pmeGrid,
   const int jblock, const int kblock, int cuda_arch, int deviceID, cudaStream_t stream) :    const int jblock, const int kblock, int deviceID, cudaStream_t stream) : 
   PmeRealSpaceCompute(pmeGrid, jblock, kblock), cuda_arch(cuda_arch), deviceID(deviceID), stream(stream) {   PmeRealSpaceCompute(pmeGrid, jblock, kblock), deviceID(deviceID), stream(stream) {
   if (dataSize < xsize*ysize*zsize)   if (dataSize < xsize*ysize*zsize)
     NAMD_bug("CudaPmeRealSpaceCompute::CudaPmeRealSpaceCompute, insufficient dataSize");     NAMD_bug("CudaPmeRealSpaceCompute::CudaPmeRealSpaceCompute, insufficient dataSize");
   cudaCheck(cudaSetDevice(deviceID));   cudaCheck(cudaSetDevice(deviceID));
Line 514
Line 504
 // //
 void CudaPmeRealSpaceCompute::spreadCharge(Lattice &lattice) { void CudaPmeRealSpaceCompute::spreadCharge(Lattice &lattice) {
   cudaCheck(cudaSetDevice(deviceID));   cudaCheck(cudaSetDevice(deviceID));
   Vector recip1v = lattice.a_r(); 
   Vector recip2v = lattice.b_r(); 
   Vector recip3v = lattice.c_r(); 
   double recip[9] = {recip1v.x, recip1v.y, recip1v.z, 
     recip2v.x, recip2v.y, recip2v.z, 
     recip3v.x, recip3v.y, recip3v.z}; 
  
   // Clear grid   // Clear grid
   clear_device_array<float>(data, xsize*ysize*zsize, stream);   clear_device_array<float>(data, xsize*ysize*zsize, stream);
  
   bool ortho = (recip[1] == 0.0 && recip[2] == 0.0 && recip[3] == 0.0 &&   spread_charge((const float4*)d_atoms, numAtoms,
     recip[5] == 0.0 && recip[6] == 0.0 && recip[7] == 0.0);     pmeGrid.K1, pmeGrid.K2, pmeGrid.K3, xsize, ysize, zsize,
  
   if (!ortho) { 
     cudaNAMD_bug("CudaPmeRealSpaceCompute::spreadCharge, only orthorhombic boxes are currently supported"); 
   } 
  
   float recip1 = (float)recip[0]; 
   float recip2 = (float)recip[4]; 
   float recip3 = (float)recip[8]; 
  
   spread_charge_ortho((const float4*)d_atoms, numAtoms, 
     recip1, recip2, recip3, pmeGrid.K1, pmeGrid.K2, pmeGrid.K3, xsize, ysize, zsize, 
     xsize, y0, z0, (pmeGrid.yBlocks == 1), (pmeGrid.zBlocks == 1),     xsize, y0, z0, (pmeGrid.yBlocks == 1), (pmeGrid.zBlocks == 1),
     data, pmeGrid.order, stream);     data, pmeGrid.order, stream);
  
Line 587
Line 560
 } }
  
 void CudaPmeRealSpaceCompute::setupGridTexture(float* data, int data_len) { void CudaPmeRealSpaceCompute::setupGridTexture(float* data, int data_len) {
   if (cuda_arch >= 350 || (tex_data == data && tex_data_len == data_len)) return;   if (tex_data == data && tex_data_len == data_len) return;
   tex_data = data;   tex_data = data;
   tex_data_len = data_len;   tex_data_len = data_len;
 #ifndef DISABLE_CUDA_TEXTURE_OBJECTS 
   // Use texture objects   // Use texture objects
   cudaResourceDesc resDesc;   cudaResourceDesc resDesc;
   memset(&resDesc, 0, sizeof(resDesc));   memset(&resDesc, 0, sizeof(resDesc));
Line 603
Line 575
   memset(&texDesc, 0, sizeof(texDesc));   memset(&texDesc, 0, sizeof(texDesc));
   texDesc.readMode = cudaReadModeElementType;   texDesc.readMode = cudaReadModeElementType;
   cudaCheck(cudaCreateTextureObject(&gridTexObj, &resDesc, &texDesc, NULL));   cudaCheck(cudaCreateTextureObject(&gridTexObj, &resDesc, &texDesc, NULL));
 #else 
   bindGridTexture(data, data_len); 
 #endif 
 } }
  
 void CudaPmeRealSpaceCompute::gatherForce(Lattice &lattice, CudaForce* force) { void CudaPmeRealSpaceCompute::gatherForce(Lattice &lattice, CudaForce* force) {
Line 614
Line 583
   // Re-allocate force array if needed   // Re-allocate force array if needed
   reallocate_device<CudaForce>(&d_force, &d_forceCapacity, numAtoms, 1.5f);   reallocate_device<CudaForce>(&d_force, &d_forceCapacity, numAtoms, 1.5f);
  
   Vector recip1v = lattice.a_r();   gather_force((const float4*)d_atoms, numAtoms,
   Vector recip2v = lattice.b_r();     pmeGrid.K1, pmeGrid.K2, pmeGrid.K3,
   Vector recip3v = lattice.c_r(); 
   double recip[9] = {recip1v.x, recip1v.y, recip1v.z, 
     recip2v.x, recip2v.y, recip2v.z, 
     recip3v.x, recip3v.y, recip3v.z}; 
  
   bool ortho = (recip[1] == 0.0 && recip[2] == 0.0 && recip[3] == 0.0 && 
     recip[5] == 0.0 && recip[6] == 0.0 && recip[7] == 0.0); 
  
   if (!ortho) { 
     cudaNAMD_bug("CudaPmeRealSpaceCompute::gatherForce, only orthorhombic boxes are currently supported"); 
   } 
  
   float recip1 = (float)recip[0]; 
   float recip2 = (float)recip[4]; 
   float recip3 = (float)recip[8]; 
  
   gather_force_ortho((const float4*)d_atoms, numAtoms, 
     recip1, recip2, recip3, pmeGrid.K1, pmeGrid.K2, pmeGrid.K3, 
     xsize, ysize, zsize, xsize, y0, z0, (pmeGrid.yBlocks == 1), (pmeGrid.zBlocks == 1),     xsize, ysize, zsize, xsize, y0, z0, (pmeGrid.yBlocks == 1), (pmeGrid.zBlocks == 1),
     data, pmeGrid.order, (float3*)d_force,      data, pmeGrid.order, (float3*)d_force, 
 #ifndef DISABLE_CUDA_TEXTURE_OBJECTS 
     gridTexObj,     gridTexObj,
 #endif 
     stream);     stream);
  
   copy_DtoH<CudaForce>(d_force, force, numAtoms, stream);   copy_DtoH<CudaForce>(d_force, force, numAtoms, stream);


Legend:
Removed in v.1.2 
changed lines
 Added in v.1.3



Made by using version 1.53 of cvs2html