Difference for src/ComputeNonbondedBase2.h from version 1.52 to 1.53

version 1.52version 1.53
Line 68
Line 68
 #else #else
 #ifdef PRAGMA_SIMD #ifdef PRAGMA_SIMD
 #ifndef TABENERGYFLAG #ifndef TABENERGYFLAG
 #pragma simd assert SHORT(FAST(reduction(+:f_i_x,f_i_y,f_i_z) PAIR(reduction(+:virial_xx,virial_xy,virial_xz,virial_yy,virial_yz,virial_zz))) ENERGY(FAST(reduction(+:vdwEnergy) SHORT(reduction(+:electEnergy))))) \ #pragma simd assert SHORT(FAST(reduction(+:f_i_x,f_i_y,f_i_z)) ENERGY(FAST(reduction(+:vdwEnergy) SHORT(reduction(+:electEnergy))))) \
              FULL(reduction(+:fullf_i_x,fullf_i_y,fullf_i_z) PAIR(reduction(+:fullElectVirial_xx,fullElectVirial_xy,fullElectVirial_xz,fullElectVirial_yy,fullElectVirial_yz,fullElectVirial_zz)) ENERGY(reduction(+:fullElectEnergy)))              FULL(reduction(+:fullf_i_x,fullf_i_y,fullf_i_z) ENERGY(reduction(+:fullElectEnergy)))
 #endif #endif
 #pragma loop_count avg=100 #pragma loop_count avg=100
 #else // PRAGMA_SIMD #else // PRAGMA_SIMD
Line 490
Line 490
 #ifndef NAMD_CUDA #ifndef NAMD_CUDA
 #ifndef  A2_QPX #ifndef  A2_QPX
       register BigReal tmp_x = force_r * p_ij_x;       register BigReal tmp_x = force_r * p_ij_x;
       PAIR( virial_xx += tmp_x * p_ij_x; ) 
       PAIR( virial_xy += tmp_x * p_ij_y; ) 
       PAIR( virial_xz += tmp_x * p_ij_z; ) 
  
       f_i_x += tmp_x;       f_i_x += tmp_x;
       f_j->x -= tmp_x;       f_j->x -= tmp_x;
  
       register BigReal tmp_y = force_r * p_ij_y;       register BigReal tmp_y = force_r * p_ij_y;
       PAIR( virial_yy += tmp_y * p_ij_y; ) 
       PAIR( virial_yz += tmp_y * p_ij_z; ) 
       f_i_y += tmp_y;       f_i_y += tmp_y;
       f_j->y -= tmp_y;       f_j->y -= tmp_y;
              
       register BigReal tmp_z = force_r * p_ij_z;       register BigReal tmp_z = force_r * p_ij_z;
       PAIR( virial_zz += tmp_z * p_ij_z; ) 
       f_i_z += tmp_z;       f_i_z += tmp_z;
       f_j->z -= tmp_z;       f_j->z -= tmp_z;
 #else #else
Line 512
Line 505
       vector4double tmp_v = vec_mul(force_rv, p_ij_v);       vector4double tmp_v = vec_mul(force_rv, p_ij_v);
       f_i_v = vec_add(f_i_v, tmp_v);       f_i_v = vec_add(f_i_v, tmp_v);
  
       PAIR ( 
      vector4double tmp_xv = vec_splat(tmp_v, 0); 
      vector4double tmp_yv = vec_splat(tmp_v, 1); 
      virial_v0 = vec_madd(tmp_xv, p_ij_v, virial_v0); 
      virial_v1 = vec_madd(tmp_yv, p_ij_v, virial_v1); 
      virial_v2 = vec_madd(tmp_v,  p_ij_v, virial_v2); 
      ) 
  
 #define tmp_x   vec_extract(tmp_v, 0) #define tmp_x   vec_extract(tmp_v, 0)
 #define tmp_y   vec_extract(tmp_v, 1) #define tmp_y   vec_extract(tmp_v, 1)
 #define tmp_z   vec_extract(tmp_v, 2) #define tmp_z   vec_extract(tmp_v, 2)
Line 706
Line 691
       {       {
 #ifndef  A2_QPX #ifndef  A2_QPX
       register BigReal ftmp_x = fullforce_r * p_ij_x;       register BigReal ftmp_x = fullforce_r * p_ij_x;
       PAIR( fullElectVirial_xx += ftmp_x * p_ij_x; ) 
       PAIR( fullElectVirial_xy += ftmp_x * p_ij_y; ) 
       PAIR( fullElectVirial_xz += ftmp_x * p_ij_z; ) 
       fullf_i_x += ftmp_x;       fullf_i_x += ftmp_x;
       fullf_j->x -= ftmp_x;       fullf_j->x -= ftmp_x;
       register BigReal ftmp_y = fullforce_r * p_ij_y;       register BigReal ftmp_y = fullforce_r * p_ij_y;
       PAIR( fullElectVirial_yy += ftmp_y * p_ij_y; ) 
       PAIR( fullElectVirial_yz += ftmp_y * p_ij_z; ) 
       fullf_i_y += ftmp_y;       fullf_i_y += ftmp_y;
       fullf_j->y -= ftmp_y;       fullf_j->y -= ftmp_y;
       register BigReal ftmp_z = fullforce_r * p_ij_z;       register BigReal ftmp_z = fullforce_r * p_ij_z;
       PAIR( fullElectVirial_zz += ftmp_z * p_ij_z; ) 
       fullf_i_z += ftmp_z;       fullf_i_z += ftmp_z;
       fullf_j->z -= ftmp_z;       fullf_j->z -= ftmp_z;
 #else #else
Line 725
Line 704
       vector4double ftmp_v = vec_mul(fforce_rv, p_ij_v);       vector4double ftmp_v = vec_mul(fforce_rv, p_ij_v);
       fullf_i_v = vec_add(fullf_i_v, ftmp_v);       fullf_i_v = vec_add(fullf_i_v, ftmp_v);
              
       PAIR ( 
      vector4double ftmp_xv = vec_splat(ftmp_v, 0); 
      vector4double ftmp_yv = vec_splat(ftmp_v, 1); 
      fullvirial_v0 = vec_madd(ftmp_xv, p_ij_v, fullvirial_v0); 
      fullvirial_v1 = vec_madd(ftmp_yv, p_ij_v, fullvirial_v1); 
      fullvirial_v2 = vec_madd(ftmp_v,  p_ij_v, fullvirial_v2); 
      ) 
   
 #define ftmp_x  vec_extract(ftmp_v, 0) #define ftmp_x  vec_extract(ftmp_v, 0)
 #define ftmp_y  vec_extract(ftmp_v, 1) #define ftmp_y  vec_extract(ftmp_v, 1)
 #define ftmp_z  vec_extract(ftmp_v, 2) #define ftmp_z  vec_extract(ftmp_v, 2)


Legend:
Removed in v.1.52 
changed lines
 Added in v.1.53



Made by using version 1.53 of cvs2html