ComputeNonbondedBase2KNL.h

Go to the documentation of this file.
00001 
00007 #ifndef KNL_MAKE_DEPENDS_INCLUDE
00008 
00009 #if 0
00010 #if (VDW_SWITCH_MODE == VDW_SWITCH_MODE_FORCE) || (VDW_SWITCH_MODE == VDW_SWITCH_MODE_MARTINI)
00011 // REMOVE WHEN r2list NO LONGER NEEDED!
00012 #pragma ivdep
00013 for (k=0; k<npairi; ++k) {      
00014   r2list[k] = r2list_f[k] + r2_delta;
00015 }
00016 #endif
00017 #endif
00018 
00019 EXCLUDED( FAST( foo bar ) )
00020 EXCLUDED( MODIFIED( foo bar ) )
00021 EXCLUDED( NORMAL( foo bar ) )
00022 NORMAL( MODIFIED( foo bar ) )
00023 ALCHPAIR( NOT_ALCHPAIR( foo bar ) )
00024 
00025 EXCLUDED( foo bar )
00026 MODIFIED( foo bar )
00027 ALCHPAIR( foo bar )
00028 TABENERGY( foo bar )
00029 NOFAST( foo bar )
00030 
00031 #pragma ivdep
00032 
00033 #if ( FULL( EXCLUDED( SHORT( 1+ ) ) ) 0 ) 
00034 // avoid bug in Intel 15.0 compiler
00035 #pragma novector
00036 #else
00037 #ifdef PRAGMA_SIMD
00038 #ifndef TABENERGYFLAG
00039 #if __INTEL_COMPILER_BUILD_DATE == 20160721
00040 #warning disabled pragma simd on innner loop due to compiler segfault
00041 #else
00042 #pragma simd assert SHORT(FAST(reduction(+:f_i_x,f_i_y,f_i_z)) ENERGY(FAST(reduction(+:vdwEnergy) SHORT(reduction(+:electEnergy))))) \
00043              FULL(reduction(+:fullf_i_x,fullf_i_y,fullf_i_z) ENERGY(reduction(+:fullElectEnergy)))
00044 #endif
00045 #endif
00046 #pragma loop_count avg=100
00047 #else // PRAGMA_SIMD
00048 #pragma loop_count avg=4
00049 #endif // PRAGMA_SIMD
00050 #endif
00051     for (k=0; k<npairi; ++k) {      
00052 
00053       const float r2 = r2list_f[k];
00054       const float r_1 = 1.f / sqrtf(r2);
00055       const float r_2 = r_1 * r_1;
00056       const float knl_table_r_1 = r_1 > 1.f ? 1.f : r_1;
00057       const float knl_table_f = (KNL_TABLE_SIZE-2) * knl_table_r_1;
00058       const int knl_table_i = knl_table_f;
00059       const float knl_diff = knl_table_f - knl_table_i;
00060 
00061       const int j = pairlisti[k];
00062       //register const CompAtom *p_j = p_1 + j;
00063 #define p_j (p_1+j)
00064 #define pFlt_j (pFlt_1+j)
00065 
00066 #if (VDW_SWITCH_MODE == VDW_SWITCH_MODE_FORCE) || (VDW_SWITCH_MODE == VDW_SWITCH_MODE_MARTINI)
00067 #if 0
00068       int table_i = (r2iilist[2*k] >> 14) + r2_delta_expc;  // table_i >= 0 
00069       
00070       float diffa = r2list[k] - r2_table[table_i];
00071       //const BigReal* const table_four_i = table_four + 16*table_i;
00072 #define table_four_i (table_four + 16*table_i)
00073 #endif
00074 #endif
00075 
00076       //const LJTable::TableEntry * lj_pars = 
00077       //        lj_row + 2 * p_j->vdwType MODIFIED(+ 1);
00078       const int lj_index = 2 * pFlt_j->vdwType MODIFIED(+ 1);
00079 #define lj_pars (lj_row+lj_index)
00080       
00081 #if ( SHORT( 1+ ) 0 ) 
00082       //Force *f_j = f_1 + j;
00083 #define f_j (f_1+j)
00084 #endif
00085         
00086 #if ( FULL( 1+ ) 0 )
00087       //Force *fullf_j = fullf_1 + j;
00088 #define fullf_j (fullf_1+j)
00089 #endif
00090 
00091       float kqq = kq_i_f * p_j->charge;
00092 
00093       LES( float lambda_pair = lambda_table_i[p_j->partition]; )
00094 
00095       register const float p_ij_x = xlist[k];
00096       register const float p_ij_y = ylist[k];
00097       register const float p_ij_z = zlist[k];
00098 
00099       const float A = scaling_f * lj_pars->A;
00100       const float B = scaling_f * lj_pars->B;
00101 
00102 #if VDW_SWITCH_MODE == VDW_SWITCH_MODE_FORCE
00103       { int vdw_switch_mode_force; }  // for preprocessor debugging only
00104       float vdw_b = 0.f;
00105       {
00106         const float r_6 = r_2 * r_2 * r_2;
00107         float vdwa_energy, vdwb_energy, vdwa_gradient, vdwb_gradient;
00108         // from Steinbach & Brooks, JCC 15, pgs 667-683, 1994, eqns 10-13
00109         if ( r2 > switchOn2_f ) {
00110           const float tmpa = r_6 - cutoff_6_f;
00111           vdwa_energy = k_vdwa_f * tmpa * tmpa;
00112           const float tmpb = r_1 * r_2 - cutoff_3_f;
00113           vdwb_energy = k_vdwb_f * tmpb * tmpb;
00114           vdwa_gradient = -6.f * k_vdwa_f * tmpa * r_2 * r_6;
00115           vdwb_gradient = -3.f * k_vdwb_f * tmpb * r_2 * r_2 * r_1;
00116         } else {
00117           const float r_12 = r_6 * r_6;
00118           vdwa_energy = r_12 + v_vdwa_f;
00119           vdwb_energy = r_6 + v_vdwb_f;
00120           vdwa_gradient = -6.f * r_2 * r_12;
00121           vdwb_gradient = -3.f * r_2 * r_6;
00122         }
00123         vdw_b = -2.f * ( A * vdwa_gradient - B * vdwb_gradient );
00124         ENERGY(
00125           vdwEnergy += A * vdwa_energy - B * vdwb_energy;
00126         )
00127       }
00128 #elif VDW_SWITCH_MODE == VDW_SWITCH_MODE_MARTINI
00129       { int vdw_switch_mode_martini; }  // for preprocessor debugging only
00130       float vdw_b = 0.f;
00131       {
00132         const float r = r2 * r_1;
00133         const float r12 = (r-switchOn_f)*(r-switchOn_f);
00134         const float r13 = (r-switchOn_f)*(r-switchOn_f)*(r-switchOn_f);
00135 
00136         ENERGY(
00137           const float LJshifttempA = -(1.f/3.f)*A12_f*r13 - (1.f/4.f)*B12_f*r12*r12 - C12_f;
00138           const float LJshifttempB = -(1.f/3.f)*A6_f*r13 - (1.f/4.f)*B6_f*r12*r12 - C6_f;
00139           const float shiftValA = ( r > switchOn_f ? LJshifttempA : -C12_f);
00140           const float shiftValB = ( r > switchOn_f ? LJshifttempB : -C6_f);
00141         )
00142 
00143         const float LJdshifttempA = -A12_f*r12 - B12_f*r13;
00144         const float LJdshifttempB = -A6_f*r12 - B6_f*r13;
00145         const float dshiftValA = ( r > switchOn_f ? LJdshifttempA*0.5f*r_1 : 0 );
00146         const float dshiftValB = ( r > switchOn_f ? LJdshifttempB*0.5f*r_1 : 0 );
00147 
00148         const float r_6 = r_2 * r_2 * r_2;
00149         const float r_12 = r_6 * r_6;
00150 
00151         ENERGY(
00152           const float vdwa_energy = r_12 + shiftValA;
00153           const float vdwb_energy = r_6 + shiftValB;
00154         )
00155 
00156         const float vdwa_gradient = -6.f * r_2 * r_12 + dshiftValA ;
00157         const float vdwb_gradient = -3.f * r_2 * r_6 + dshiftValB;
00158 
00159         vdw_b = -2.f * ( A * vdwa_gradient - B * vdwb_gradient );
00160         ENERGY(
00161           vdwEnergy += A * vdwa_energy - B * vdwb_energy;
00162         )
00163       }
00164 #elif VDW_SWITCH_MODE == VDW_SWITCH_MODE_ENERGY
00165       { int vdw_switch_mode_energy; }  // for preprocessor debugging only
00166       float vdw_b = 0.f;
00167       {
00168         const float r_6 = r_2 * r_2 * r_2;
00169         const float r_12 = r_6 * r_6;
00170         const float c2 = cutoff2_f-r2;
00171         const float c4 = c2*(c3_f-2.f*c2);
00172         const float switchVal =         // used for Lennard-Jones
00173                         ( r2 > switchOn2_f ? c2*c4*c1_f : 1.f );
00174         const float dSwitchVal =        // d switchVal / d r2
00175                         ( r2 > switchOn2_f ? 2.f*c1_f*(c2*c2-c4) : 0.f );
00176         const float vdwa_gradient = ( dSwitchVal - 6.f * switchVal * r_2 ) * r_12;
00177         const float vdwb_gradient = ( dSwitchVal - 3.f * switchVal * r_2 ) * r_6;
00178         vdw_b = -2.f * ( A * vdwa_gradient - B * vdwb_gradient );
00179         ENERGY(
00180           vdwEnergy += switchVal * ( A * r_12 - B * r_6 );
00181         )
00182       }
00183 #else
00184 #error VDW_SWITCH_MODE not recognized
00185 #endif  // VDW_SWITCH_MODE
00186 
00187 #if ( SHORT(1+) 0 ) // Short-range electrostatics
00188 
00189       NORMAL(
00190       float fast_b = kqq * ( knl_fast_grad_table[knl_table_i] * (1.f-knl_diff) +
00191                              knl_fast_grad_table[knl_table_i+1] * knl_diff );
00192       )
00193 
00194       {
00195       ENERGY(
00196         float fast_val = kqq * ( knl_fast_ener_table[knl_table_i] * (1.f-knl_diff) +
00197                                  knl_fast_ener_table[knl_table_i+1] * knl_diff );
00198         electEnergy -=  LAM(lambda_pair *) fast_val;
00199       ) //ENERGY
00200       }
00201 
00202       // Combined short-range electrostatics and VdW force:
00203         fast_b += vdw_b;
00204 
00205       float fast_dir = fast_b;
00206 
00207       float force_r =  LAM(lambda_pair *) fast_dir;
00208           
00209       BigReal tmp_x = force_r * p_ij_x;
00210       f_i_x += tmp_x;
00211       f_j->x -= tmp_x;
00212 
00213       BigReal tmp_y = force_r * p_ij_y;
00214       f_i_y += tmp_y;
00215       f_j->y -= tmp_y;
00216       
00217       BigReal tmp_z = force_r * p_ij_z;
00218       f_i_z += tmp_z;
00219       f_j->z -= tmp_z;
00220 
00221 #endif // SHORT
00222 
00223 #if ( FULL( 1+ ) 0 )
00224   #if ( SHORT( 1+ ) 0 )
00225       float slow_b = kqq * ( knl_scor_grad_table[knl_table_i] * (1.f-knl_diff) +
00226                              knl_scor_grad_table[knl_table_i+1] * knl_diff );
00227       ENERGY(
00228         float slow_val = kqq * ( knl_scor_ener_table[knl_table_i] * (1.f-knl_diff) +
00229                                  knl_scor_ener_table[knl_table_i+1] * knl_diff );
00230       )
00231   #else
00232       float slow_b = kqq * ( knl_corr_grad_table[knl_table_i] * (1.f-knl_diff) +
00233                              knl_corr_grad_table[knl_table_i+1] * knl_diff );
00234       ENERGY(
00235         float slow_val = kqq * ( knl_corr_ener_table[knl_table_i] * (1.f-knl_diff) +
00236                                  knl_corr_ener_table[knl_table_i+1] * knl_diff );
00237       )
00238   #endif
00239 
00240       ENERGY(
00241         fullElectEnergy -= LAM(lambda_pair *) slow_val;
00242       ) // ENERGY
00243           
00244 #if     (NOSHORT(1+) 0)
00245         slow_b += vdw_b;
00246 #endif
00247 
00248       register float slow_dir = slow_b;
00249       float fullforce_r = slow_dir LAM(* lambda_pair);
00250           
00251       {
00252       BigReal ftmp_x = fullforce_r * p_ij_x;
00253       fullf_i_x += ftmp_x;
00254       fullf_j->x -= ftmp_x;
00255       BigReal ftmp_y = fullforce_r * p_ij_y;
00256       fullf_i_y += ftmp_y;
00257       fullf_j->y -= ftmp_y;
00258       BigReal ftmp_z = fullforce_r * p_ij_z;
00259       fullf_i_z += ftmp_z;
00260       fullf_j->z -= ftmp_z;
00261       }
00262 #endif //FULL
00263 
00264    } // for pairlist
00265 
00266 #undef p_j
00267 #undef lj_pars
00268 #undef table_four_i
00269 #undef slow_i
00270 #undef f_j
00271 #undef fullf_j
00272 
00273 #endif // KNL_MAKE_DEPENDS_INCLUDE
00274 

Generated on Tue Nov 21 01:17:11 2017 for NAMD by  doxygen 1.4.7