| version 1.52 | version 1.53 |
|---|
| |
| #else | #else |
| #ifdef PRAGMA_SIMD | #ifdef PRAGMA_SIMD |
| #ifndef TABENERGYFLAG | #ifndef TABENERGYFLAG |
| #pragma simd assert SHORT(FAST(reduction(+:f_i_x,f_i_y,f_i_z) PAIR(reduction(+:virial_xx,virial_xy,virial_xz,virial_yy,virial_yz,virial_zz))) ENERGY(FAST(reduction(+:vdwEnergy) SHORT(reduction(+:electEnergy))))) \ | #pragma simd assert SHORT(FAST(reduction(+:f_i_x,f_i_y,f_i_z)) ENERGY(FAST(reduction(+:vdwEnergy) SHORT(reduction(+:electEnergy))))) \ |
| FULL(reduction(+:fullf_i_x,fullf_i_y,fullf_i_z) PAIR(reduction(+:fullElectVirial_xx,fullElectVirial_xy,fullElectVirial_xz,fullElectVirial_yy,fullElectVirial_yz,fullElectVirial_zz)) ENERGY(reduction(+:fullElectEnergy))) | FULL(reduction(+:fullf_i_x,fullf_i_y,fullf_i_z) ENERGY(reduction(+:fullElectEnergy))) |
| #endif | #endif |
| #pragma loop_count avg=100 | #pragma loop_count avg=100 |
| #else // PRAGMA_SIMD | #else // PRAGMA_SIMD |
| |
| #ifndef NAMD_CUDA | #ifndef NAMD_CUDA |
| #ifndef A2_QPX | #ifndef A2_QPX |
| register BigReal tmp_x = force_r * p_ij_x; | register BigReal tmp_x = force_r * p_ij_x; |
| PAIR( virial_xx += tmp_x * p_ij_x; ) | |
| PAIR( virial_xy += tmp_x * p_ij_y; ) | |
| PAIR( virial_xz += tmp_x * p_ij_z; ) | |
| | |
| f_i_x += tmp_x; | f_i_x += tmp_x; |
| f_j->x -= tmp_x; | f_j->x -= tmp_x; |
| | |
| register BigReal tmp_y = force_r * p_ij_y; | register BigReal tmp_y = force_r * p_ij_y; |
| PAIR( virial_yy += tmp_y * p_ij_y; ) | |
| PAIR( virial_yz += tmp_y * p_ij_z; ) | |
| f_i_y += tmp_y; | f_i_y += tmp_y; |
| f_j->y -= tmp_y; | f_j->y -= tmp_y; |
| | |
| register BigReal tmp_z = force_r * p_ij_z; | register BigReal tmp_z = force_r * p_ij_z; |
| PAIR( virial_zz += tmp_z * p_ij_z; ) | |
| f_i_z += tmp_z; | f_i_z += tmp_z; |
| f_j->z -= tmp_z; | f_j->z -= tmp_z; |
| #else | #else |
| |
| vector4double tmp_v = vec_mul(force_rv, p_ij_v); | vector4double tmp_v = vec_mul(force_rv, p_ij_v); |
| f_i_v = vec_add(f_i_v, tmp_v); | f_i_v = vec_add(f_i_v, tmp_v); |
| | |
| PAIR ( | |
| vector4double tmp_xv = vec_splat(tmp_v, 0); | |
| vector4double tmp_yv = vec_splat(tmp_v, 1); | |
| virial_v0 = vec_madd(tmp_xv, p_ij_v, virial_v0); | |
| virial_v1 = vec_madd(tmp_yv, p_ij_v, virial_v1); | |
| virial_v2 = vec_madd(tmp_v, p_ij_v, virial_v2); | |
| ) | |
| | |
| #define tmp_x vec_extract(tmp_v, 0) | #define tmp_x vec_extract(tmp_v, 0) |
| #define tmp_y vec_extract(tmp_v, 1) | #define tmp_y vec_extract(tmp_v, 1) |
| #define tmp_z vec_extract(tmp_v, 2) | #define tmp_z vec_extract(tmp_v, 2) |
| |
| { | { |
| #ifndef A2_QPX | #ifndef A2_QPX |
| register BigReal ftmp_x = fullforce_r * p_ij_x; | register BigReal ftmp_x = fullforce_r * p_ij_x; |
| PAIR( fullElectVirial_xx += ftmp_x * p_ij_x; ) | |
| PAIR( fullElectVirial_xy += ftmp_x * p_ij_y; ) | |
| PAIR( fullElectVirial_xz += ftmp_x * p_ij_z; ) | |
| fullf_i_x += ftmp_x; | fullf_i_x += ftmp_x; |
| fullf_j->x -= ftmp_x; | fullf_j->x -= ftmp_x; |
| register BigReal ftmp_y = fullforce_r * p_ij_y; | register BigReal ftmp_y = fullforce_r * p_ij_y; |
| PAIR( fullElectVirial_yy += ftmp_y * p_ij_y; ) | |
| PAIR( fullElectVirial_yz += ftmp_y * p_ij_z; ) | |
| fullf_i_y += ftmp_y; | fullf_i_y += ftmp_y; |
| fullf_j->y -= ftmp_y; | fullf_j->y -= ftmp_y; |
| register BigReal ftmp_z = fullforce_r * p_ij_z; | register BigReal ftmp_z = fullforce_r * p_ij_z; |
| PAIR( fullElectVirial_zz += ftmp_z * p_ij_z; ) | |
| fullf_i_z += ftmp_z; | fullf_i_z += ftmp_z; |
| fullf_j->z -= ftmp_z; | fullf_j->z -= ftmp_z; |
| #else | #else |
| |
| vector4double ftmp_v = vec_mul(fforce_rv, p_ij_v); | vector4double ftmp_v = vec_mul(fforce_rv, p_ij_v); |
| fullf_i_v = vec_add(fullf_i_v, ftmp_v); | fullf_i_v = vec_add(fullf_i_v, ftmp_v); |
| | |
| PAIR ( | |
| vector4double ftmp_xv = vec_splat(ftmp_v, 0); | |
| vector4double ftmp_yv = vec_splat(ftmp_v, 1); | |
| fullvirial_v0 = vec_madd(ftmp_xv, p_ij_v, fullvirial_v0); | |
| fullvirial_v1 = vec_madd(ftmp_yv, p_ij_v, fullvirial_v1); | |
| fullvirial_v2 = vec_madd(ftmp_v, p_ij_v, fullvirial_v2); | |
| ) | |
| | |
| #define ftmp_x vec_extract(ftmp_v, 0) | #define ftmp_x vec_extract(ftmp_v, 0) |
| #define ftmp_y vec_extract(ftmp_v, 1) | #define ftmp_y vec_extract(ftmp_v, 1) |
| #define ftmp_z vec_extract(ftmp_v, 2) | #define ftmp_z vec_extract(ftmp_v, 2) |