23 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE) 24 #include <emmintrin.h> 25 #if defined(__INTEL_COMPILER) 26 #define __align(X) __declspec(align(X) ) 27 #elif defined(__GNUC__) || defined(__PGI) 28 #define __align(X) __attribute__((aligned(X) )) 30 #define __align(X) __declspec(align(X) ) 35 #define UNLIKELY(X) __builtin_expect((X), 0) 37 #define UNLIKELY(X) (X) 40 #ifdef DEFINITION // ( 46 #if NAMD_ComputeNonbonded_SortAtoms != 0 54 #define NAME CLASSNAME(calc) 59 #define CLASS ComputeNonbondedPair 60 #define CLASSNAME(X) ENERGYNAME( X ## _pair ) 68 #define CLASS ComputeNonbondedSelf 69 #define CLASSNAME(X) ENERGYNAME( X ## _self ) 80 #define ENERGYNAME(X) SLOWONLYNAME( X ## _energy ) 84 #define ENERGYNAME(X) SLOWONLYNAME( X ) 93 #define SLOWONLYNAME(X) MERGEELECTNAME( X ## _slow ) 97 #define SLOWONLYNAME(X) MERGEELECTNAME( X ) 100 #undef MERGEELECTNAME 106 #define MERGEELECTNAME(X) FULLELECTNAME( X ## _merge ) 110 #define MERGEELECTNAME(X) FULLELECTNAME( X ) 117 #define FULLELECTNAME(X) TABENERGYNAME( X ## _fullelect ) 121 #define FULLELECTNAME(X) TABENERGYNAME( X ) 130 #define TABENERGYNAME(X) FEPNAME( X ## _tabener ) 131 #define TABENERGY(X) X 132 #define NOTABENERGY(X) 134 #define TABENERGYNAME(X) FEPNAME( X ) 136 #define NOTABENERGY(X) X 156 #define FEPNAME(X) LAST( X ) 159 #define NOT_ALCHPAIR(X) X 171 #define FEPNAME(X) LAST( X ## _fep ) 179 #define FEPNAME(X) LAST( X ## _ti ) 187 #define FEPNAME(X) LAST( X ## _les ) 194 #define FEPNAME(X) LAST( X ## _int ) 201 #define FEPNAME(X) LAST( X ## _pprof ) 208 #define FEPNAME(X) LAST( X ## _go ) 224 #define KNL_MAKE_DEPENDS_INCLUDE 226 #undef KNL_MAKE_DEPENDS_INCLUDE 231 #if ( TABENERGY(1+) FEP(1+) TI(1+) INT(1+) LES(1+) GO(1+) PPROF(1+) NOFAST(1+) 0 ) 243 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) 244 #define COMPONENT_DOTPRODUCT(A,B) ((A##_x * B##_x) + (A##_y * B##_y) + (A##_z * B##_z)) 279 int exclChecksum = 0;
287 BigReal goEnergyNonnative = 0; ) )
320 const float offset_x_f = params->
offset_f.
x;
321 const float offset_y_f = params->
offset_f.
y;
322 const float offset_z_f = params->
offset_f.
z;
324 register const BigReal plcutoff2 = \
326 register const BigReal groupplcutoff2 = \
353 const float scaling_f =
scaling;
362 const float c1_f =
c1;
363 const float c3_f =
c3;
375 const BigReal switchfactor = 1./(diff*diff*diff);
400 BigReal lambdaDown = 1 - alchLambda;
431 int pswitchTable[5*5];
438 for (
int ip=0; ip<5; ++ip) {
439 for (
int jp=0; jp<5; ++jp ) {
440 pswitchTable[ip+5*jp] = 0;
441 if ((ip==1 && jp==0) || (ip==0 && jp==1)) pswitchTable[ip+5*jp] = 1;
442 if ((ip==2 && jp==0) || (ip==0 && jp==2)) pswitchTable[ip+5*jp] = 2;
443 if ((ip==3 && jp==0) || (ip==0 && jp==3)) pswitchTable[ip+5*jp] = 3;
444 if ((ip==4 && jp==0) || (ip==0 && jp==4)) pswitchTable[ip+5*jp] = 4;
446 if (ip && jp && (abs(ip - jp) != 2)) pswitchTable[ip+5*jp] = 99;
450 if ((ip == 1 && jp == 1) || (ip == 1 && jp == 3) || (ip == 3 && jp == 1)) pswitchTable[ip+5*jp] = 1;
451 if ((ip == 2 && jp == 2) || (ip == 2 && jp == 4) || (ip == 4 && jp == 2)) pswitchTable[ip+5*jp] = 2;
452 if (ip == 3 && jp == 3) pswitchTable[ip+5*jp] = 3;
453 if (ip == 4 && jp == 4) pswitchTable[ip+5*jp] = 4;
461 if (ip == 1 && jp == 1) pswitchTable[ip+5*jp] = 0;
462 if (ip == 2 && jp == 2) pswitchTable[ip+5*jp] = 0;
474 BigReal fullElectEnergy_ti_2 = 0;)
479 const int i_upper = params->
numAtoms[0];
480 register const int j_upper = params->
numAtoms[1];
485 KNL(
const CompAtomFlt *pFlt_0 = params->pFlt[0]; )
486 KNL(
const CompAtomFlt *pFlt_1 = params->pFlt[1]; )
490 char * excl_flags_buff = 0;
491 const int32 * full_excl = 0;
492 const int32 * mod_excl = 0;
494 plint *pairlistn_save;
int npairn;
495 plint *pairlistx_save;
int npairx;
496 plint *pairlistm_save;
int npairm;
501 plint *pairlistnA1_save;
int npairnA1;
502 plint *pairlistxA1_save;
int npairxA1;
503 plint *pairlistmA1_save;
int npairmA1;
504 plint *pairlistnA2_save;
int npairnA2;
505 plint *pairlistxA2_save;
int npairxA2;
506 plint *pairlistmA2_save;
int npairmA2;
507 plint *pairlistnA3_save;
int npairnA3;
508 plint *pairlistxA3_save;
int npairxA3;
509 plint *pairlistmA3_save;
int npairmA3;
510 plint *pairlistnA4_save;
int npairnA4;
511 plint *pairlistxA4_save;
int npairxA4;
512 plint *pairlistmA4_save;
int npairmA4;
517 int arraysize = j_upper+5;
526 union {
double f;
int32 i[2]; } byte_order_test;
527 byte_order_test.f = 1.0;
528 int32 *r2iilist = (
int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
530 if ( ! ( savePairlists || ! usePairlists ) ) arraysize = 0;
541 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) 554 register SortEntry* p_1_sortValues = atomSort_0_sortValues__;
555 register SortEntry* p_1_sortValues_fixg = atomSort_1_sortValues__;
557 int p_0_sortValues_len = 0;
558 int p_1_sortValues_len = 0;
559 int p_1_sortValues_fixg_len = 0;
563 BigReal atomSort_windowRadius = sqrt(groupplcutoff2);
565 if (savePairlists || !usePairlists) {
573 register int nbgs = p_1->nonbondedGroupSize;
574 register BigReal p_x = p_1->position.x;
575 register BigReal p_y = p_1->position.y;
576 register BigReal p_z = p_1->position.z;
577 register int index = 0;
579 for (
register int j = nbgs; j < j_upper; j += nbgs) {
583 register const CompAtom* p_j_next = p_1 + j;
600 register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
601 p_1_sortValStorePtr->
index = index;
602 p_1_sortValStorePtr->
sortValue = sortVal;
603 p_1_sortValues_len++;
612 register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
613 p_1_sortValStorePtr->
index = index;
614 p_1_sortValStorePtr->
sortValue = sortVal;
615 p_1_sortValues_len++;
620 #if 0 // Selection Sort 622 #elif 0 // Bubble Sort 625 #if NAMD_ComputeNonbonded_SortAtoms_LessBranches == 0 634 register int nbgs = p_0->nonbondedGroupSize;
635 register BigReal p_x = p_0->position.x + offset_x;
636 register BigReal p_y = p_0->position.y + offset_y;
637 register BigReal p_z = p_0->position.z + offset_z;
638 register int index = 0;
640 for (
register int i = nbgs; i < i_upper; i += nbgs) {
644 register const CompAtom* p_i_next = p_0 + i;
656 register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
657 *p_0_sortValStorePtr = sortVal;
665 register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
666 *p_0_sortValStorePtr = sortVal;
668 p_0_sortValues_len = i_upper;
673 #endif // NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) 677 #if ! (NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) ) 705 if ( savePairlists || ! usePairlists ) {
707 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) ) 710 register int fixg = 0;
711 for (
int tmpI = 0; tmpI < p_1_sortValues_len; tmpI++) {
712 register SortEntry* p_1_sortEntry = p_1_sortValues + tmpI;
713 register int p_1_index = p_1_sortEntry->
index;
714 if (!pExt_1[p_1_index].groupFixed) {
715 register SortEntry* p_1_sortEntry_fixg = p_1_sortValues_fixg + p_1_sortValues_fixg_len;
716 p_1_sortEntry_fixg->
index = p_1_sortEntry->
index;
718 p_1_sortValues_fixg_len++;
725 for ( j = 0; j < j_upper; ++j ) {
726 if ( p_1[j].nonbondedGroupSize ) {
731 if ( g_upper ) grouplist[g_upper] = grouplist[g_upper-1];
735 for ( g = 0; g < g_upper; ++g ) {
737 if ( ! pExt_1[j].groupFixed ) {
738 fixglist[fixg++] = j;
744 if ( fixg_upper ) fixglist[fixg_upper] = fixglist[fixg_upper-1];
746 #endif // NAMD_ComputeNonbonded_SortAtoms != 0 754 NAMD_bug(
"pairlist i_upper mismatch!");
764 int pairlistoffset=0;
768 #if ( SHORT( FAST( 1+ ) ) 0 ) 781 #define fullf_1 fullf_0 787 int groupCount = params->
minPart;
789 if ( savePairlists || ! usePairlists ) {
796 PAIR(
for ( ; i < (i_upper);))
SELF(
for ( i=0; i < (i_upper- 1);i++))
799 KNL(
const CompAtomFlt &pFlt_i = pFlt_0[i]; )
802 PAIR(
if (savePairlists || ! usePairlists){)
808 __dcbt((
void *) &(p_0[i]));
813 groupCount = maxPart;
822 register const BigReal p_i_x_f = pFlt_i.position.x + offset_x_f;
823 register const BigReal p_i_y_f = pFlt_i.position.y + offset_y_f;
824 register const BigReal p_i_z_f = pFlt_i.position.z + offset_z_f;
837 if ( savePairlists || ! usePairlists ) {
839 #ifdef MEM_OPT_VERSION 841 const int excl_min = pExt_i.id + exclcheck->
min;
842 const int excl_max = pExt_i.id + exclcheck->
max;
845 const int excl_min = exclcheck->
min;
846 const int excl_max = exclcheck->
max;
848 const char * excl_flags_var;
849 if ( exclcheck->
flags ) excl_flags_var = exclcheck->
flags - excl_min;
854 #ifndef MEM_OPT_VERSION 855 #define REZERO_EXCL_FLAGS_BUFF if ( excl_flags_buff ) { \ 857 nl = full_excl[0] + 1; \ 858 for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = 0; \ 859 nl = mod_excl[0] + 1; \ 860 for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = 0; \ 865 int oldsize = wa.
size();
867 memset( (
void*) &wa[oldsize], 0, wa.
size() - oldsize);
869 excl_flags_buff = &wa[0];
873 nl = full_excl[0] + 1;
874 for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] =
EXCHCK_FULL;
876 nl = mod_excl[0] + 1;
877 for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] =
EXCHCK_MOD;
878 excl_flags_var = excl_flags_buff;
882 const char *
const excl_flags = excl_flags_var;
888 const int groupfixed = (
fixedAtomsOn && pExt_i.groupFixed );
902 while ( g_lower < g_upper &&
903 grouplist[g_lower] < j_hgroup ) ++g_lower;
904 while ( fixg_lower < fixg_upper &&
905 fixglist[fixg_lower] < j_hgroup ) ++fixg_lower;
908 for ( j = i + 1; j < j_hgroup; ++j ) {
909 pairlist[pairlistindex++] = j;
914 register int *pli = pairlist + pairlistindex;
919 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) ) 922 register BigReal p_i_sortValue = p_0_sortValues[i];
923 const BigReal p_i_sortValue_plus_windowRadius = p_i_sortValue + atomSort_windowRadius;
924 register SortEntry* sortValues = ( groupfixed ? p_1_sortValues_fixg : p_1_sortValues );
927 register int lower = 0;
928 register int upper = (groupfixed ? p_1_sortValues_fixg_len : p_1_sortValues_len);
929 while ((upper - lower) > 1) {
930 register int j = ((lower + upper) >> 1);
932 if (jSortVal < p_i_sortValue_plus_windowRadius) {
938 const int gu = (sortValues[lower].
sortValue >= p_i_sortValue_plus_windowRadius) ? lower : upper;
942 register int *gli = goodglist;
943 const int *glist = ( groupfixed ? fixglist : grouplist );
944 SELF(
const int gl = ( groupfixed ? fixg_lower : g_lower ); )
945 const int gu = ( groupfixed ? fixg_upper : g_upper );
954 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE) 957 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) ) 958 register SortEntry* sortEntry0 = sortValues + g;
959 register SortEntry* sortEntry1 = sortValues + g + 1;
960 register int jprev0 = sortEntry0->
index;
961 register int jprev1 = sortEntry1->
index;
963 register int jprev0 = glist[g ];
964 register int jprev1 = glist[g + 1];
970 __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
971 __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
972 __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
975 const __m128d P_I_X = _mm_set1_pd(p_i_x);
976 const __m128d P_I_Y = _mm_set1_pd(p_i_y);
977 const __m128d P_I_Z = _mm_set1_pd(p_i_z);
980 for ( ; g < gu - 2; g +=2 ) {
985 __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
986 __m128d R2_01 = _mm_mul_pd(T_01, T_01);
987 T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
988 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
989 T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
990 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
992 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) ) 993 sortEntry0 = sortValues + g;
994 sortEntry1 = sortValues + g + 1;
995 jprev0 = sortEntry0->
index;
996 jprev1 = sortEntry1->
index;
1002 PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
1003 PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
1004 PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
1006 __align(16)
double r2_01[2];
1007 _mm_store_pd(r2_01, R2_01);
1010 bool test0 = ( r2_01[0] < groupplcutoff2 );
1011 bool test1 = ( r2_01[1] < groupplcutoff2 );
1015 goodglist [ hu ] = j0;
1016 goodglist [ hu + test0 ] = j1;
1018 hu += test0 + test1;
1025 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) ) 1026 register SortEntry* sortEntry0 = sortValues + g;
1027 register SortEntry* sortEntry1 = sortValues + g + 1;
1028 register int jprev0 = sortEntry0->
index;
1029 register int jprev1 = sortEntry1->
index;
1031 register int jprev0 = glist[g ];
1032 register int jprev1 = glist[g + 1];
1038 register BigReal pj_x_0, pj_x_1;
1039 register BigReal pj_y_0, pj_y_1;
1040 register BigReal pj_z_0, pj_z_1;
1041 register BigReal t_0, t_1, r2_0, r2_1;
1043 pj_x_0 = p_1[jprev0].position.x;
1044 pj_x_1 = p_1[jprev1].position.x;
1046 pj_y_0 = p_1[jprev0].position.y;
1047 pj_y_1 = p_1[jprev1].position.y;
1049 pj_z_0 = p_1[jprev0].position.z;
1050 pj_z_1 = p_1[jprev1].position.z;
1053 for ( ; g < gu - 2; g +=2 ) {
1058 t_0 = p_i_x - pj_x_0;
1059 t_1 = p_i_x - pj_x_1;
1063 t_0 = p_i_y - pj_y_0;
1064 t_1 = p_i_y - pj_y_1;
1068 t_0 = p_i_z - pj_z_0;
1069 t_1 = p_i_z - pj_z_1;
1073 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) ) 1074 sortEntry0 = sortValues + g;
1075 sortEntry1 = sortValues + g + 1;
1076 jprev0 = sortEntry0->
index;
1077 jprev1 = sortEntry1->
index;
1080 jprev1 = glist[g+1];
1083 pj_x_0 = p_1[jprev0].position.x;
1084 pj_x_1 = p_1[jprev1].position.x;
1085 pj_y_0 = p_1[jprev0].position.y;
1086 pj_y_1 = p_1[jprev1].position.y;
1087 pj_z_0 = p_1[jprev0].position.z;
1088 pj_z_1 = p_1[jprev1].position.z;
1090 bool test0 = ( r2_0 < groupplcutoff2 );
1091 bool test1 = ( r2_1 < groupplcutoff2 );
1095 goodglist [ hu ] = j0;
1096 goodglist [ hu + test0 ] = j1;
1098 hu += test0 + test1;
1106 #pragma omp simd simdlen(16) 1107 for (g = gp; g < gu; g++) {
1109 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) ) 1110 register SortEntry* sortEntry = sortValues + g;
1111 register int j = sortEntry->
index;
1116 BigReal p_j_x = p_1[j].position.x;
1117 BigReal p_j_y = p_1[j].position.y;
1118 BigReal p_j_z = p_1[j].position.z;
1127 #pragma omp ordered simd monotonic(hu:1) 1128 if ( r2 <= groupplcutoff2 )
1129 goodglist[hu ++] = j;
1132 for (
int h=0; h<hu; ++h ) {
1133 int j = goodglist[h];
1134 int nbgs = p_1[j].nonbondedGroupSize;
1147 pairlistindex = pli - pairlist;
1149 if ( pairlistindex ) {
1150 pairlist[pairlistindex] = pairlist[pairlistindex-1];
1160 else pairlistoffset++;
1163 const int atomfixed = (
fixedAtomsOn && pExt_i.atomFixed );
1165 register int *pli = pairlist2;
1168 register plint *plin = pairlistn;
1175 for (
int k=pairlistoffset; k<pairlistindex; k++) {
1181 if (qmGroup_i > 0) {
1182 if (qmGroup_i == qmGroup_j) {
1192 int npair2_int = pli - pairlist2;
1194 for (
int k=0; k<npair2_int; k++) {
1198 BigReal p_j_x = p_1[j].position.x;
1201 BigReal p_j_y = p_1[j].position.y;
1204 BigReal p_j_z = p_1[j].position.z;
1208 if ( ( ! (atomfixed && pExt_1[j].atomFixed) ) && (r2 <= plcutoff2) ) {
1209 int atom2 = pExt_1[j].
id;
1210 if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
1222 if (ifep_type != 1) {
PAIR(++i;)
continue; }
1223 for (
int k=pairlistoffset; k<pairlistindex; k++) {
1225 const int jfep_type = p_1[j].partition;
1227 if (jfep_type == 1) {
1232 if (ifep_type != 1 && ifep_type != 2) {
PAIR(++i;)
continue; }
1233 for (
int k=pairlistoffset; k<pairlistindex; k++) {
1235 const int jfep_type = p_1[j].partition;
1237 if (ifep_type + jfep_type == 3) {
1242 int npair2_int = pli - pairlist2;
1244 for (
int k=0; k<npair2_int; k++) {
1246 BigReal p_j_x = p_1[j].position.x;
1249 BigReal p_j_y = p_1[j].position.y;
1252 BigReal p_j_z = p_1[j].position.z;
1255 if ( ( ! (atomfixed && pExt_1[j].atomFixed) ) && (r2 <= plcutoff2) ) {
1256 int atom2 = pExt_1[j].
id;
1257 if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
1264 for (
int k=pairlistoffset; k<pairlistindex; k++) {
1266 BigReal p_j_x = p_1[j].position.x;
1269 BigReal p_j_y = p_1[j].position.y;
1272 BigReal p_j_z = p_1[j].position.z;
1275 if ( (! pExt_1[j].atomFixed) && (r2 <= plcutoff2) ) {
1276 int atom2 = pExt_1[j].
id;
1277 if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
1282 int k = pairlistoffset;
1283 int ku = pairlistindex;
1286 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE) 1288 register int jprev0 = pairlist [k ];
1289 register int jprev1 = pairlist [k + 1];
1294 __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
1295 __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
1296 __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
1299 const __m128d P_I_X = _mm_set1_pd(p_i_x);
1300 const __m128d P_I_Y = _mm_set1_pd(p_i_y);
1301 const __m128d P_I_Z = _mm_set1_pd(p_i_z);
1303 int atom2_0 = pExt_1[jprev0].
id;
1304 int atom2_1 = pExt_1[jprev1].
id;
1307 for ( ; k < ku - 2; k +=2 ) {
1312 __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
1313 __m128d R2_01 = _mm_mul_pd(T_01, T_01);
1314 T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
1315 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
1316 T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
1317 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
1319 jprev0 = pairlist[k];
1320 jprev1 = pairlist[k+1];
1322 PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
1323 PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
1324 PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
1326 __align(16)
double r2_01[2];
1327 _mm_store_pd(r2_01, R2_01);
1329 if (r2_01[0] <= plcutoff2) {
1330 if (
UNLIKELY( atom2_0 >= excl_min && atom2_0 <= excl_max ))
1335 atom2_0 = pExt_1[jprev0].
id;
1337 if (r2_01[1] <= plcutoff2) {
1338 if (
UNLIKELY( atom2_1 >= excl_min && atom2_1 <= excl_max ))
1343 atom2_1 = pExt_1[jprev1].
id;
1349 register int jprev0 = pairlist [k];
1350 register int jprev1 = pairlist [k + 1];
1355 register BigReal pj_x_0, pj_x_1;
1356 register BigReal pj_y_0, pj_y_1;
1357 register BigReal pj_z_0, pj_z_1;
1358 register BigReal t_0, t_1, r2_0, r2_1;
1360 pj_x_0 = p_1[jprev0].position.x;
1361 pj_x_1 = p_1[jprev1].position.x;
1363 pj_y_0 = p_1[jprev0].position.y;
1364 pj_y_1 = p_1[jprev1].position.y;
1366 pj_z_0 = p_1[jprev0].position.z;
1367 pj_z_1 = p_1[jprev1].position.z;
1369 int atom2_0 = pExt_1[jprev0].
id;
1370 int atom2_1 = pExt_1[jprev1].
id;
1373 for ( ; k < ku - 2; k +=2 ) {
1378 t_0 = p_i_x - pj_x_0;
1379 t_1 = p_i_x - pj_x_1;
1383 t_0 = p_i_y - pj_y_0;
1384 t_1 = p_i_y - pj_y_1;
1388 t_0 = p_i_z - pj_z_0;
1389 t_1 = p_i_z - pj_z_1;
1393 jprev0 = pairlist[k];
1394 jprev1 = pairlist[k+1];
1396 pj_x_0 = p_1[jprev0].position.x;
1397 pj_x_1 = p_1[jprev1].position.x;
1398 pj_y_0 = p_1[jprev0].position.y;
1399 pj_y_1 = p_1[jprev1].position.y;
1400 pj_z_0 = p_1[jprev0].position.z;
1401 pj_z_1 = p_1[jprev1].position.z;
1403 if (r2_0 <= plcutoff2) {
1404 if ( atom2_0 >= excl_min && atom2_0 <= excl_max )
1409 atom2_0 = pExt_1[jprev0].
id;
1411 if (r2_1 <= plcutoff2) {
1412 if ( atom2_1 >= excl_min && atom2_1 <= excl_max )
1417 atom2_1 = pExt_1[jprev1].
id;
1425 #pragma omp simd simdlen(16) 1426 for (k = kp; k < ku; k++) {
1427 int j = pairlist[k];
1428 int atom2 = pExt_1[j].
id;
1430 BigReal p_j_x = p_1[j].position.x;
1431 BigReal p_j_y = p_1[j].position.y;
1432 BigReal p_j_z = p_1[j].position.z;
1441 #pragma omp ordered simd monotonic(plin:1, pli:1) 1442 if (r2 <= plcutoff2) {
1443 if ( atom2 >= excl_min && atom2 <= excl_max )
1453 if ( plin == pairlistn && pli == pairlist2 ) {
1460 plint *plix = pairlistx;
1461 plint *plim = pairlistm;
1463 plint *plinA1 = pairlistnA1;
1464 plint *plixA1 = pairlistxA1;
1465 plint *plimA1 = pairlistmA1;
1466 plint *plinA2 = pairlistnA2;
1467 plint *plixA2 = pairlistxA2;
1468 plint *plimA2 = pairlistmA2;
1469 plint *plinA3 = pairlistnA3;
1470 plint *plixA3 = pairlistxA3;
1471 plint *plimA3 = pairlistmA3;
1472 plint *plinA4 = pairlistnA4;
1473 plint *plixA4 = pairlistxA4;
1474 plint *plimA4 = pairlistmA4;
1480 int unsortedNpairn = plin - pairlistn;
1482 for ( k=0; k<unsortedNpairn; ++k ) {
1483 int j = pairlistn[k];
1484 switch(pswitchTable[p_i_partition + 5*(p_1[j].
partition)]) {
1485 case 0: *(plin++) = j;
break;
1486 case 1: *(plinA1++) = j;
break;
1487 case 2: *(plinA2++) = j;
break;
1488 case 3: *(plinA3++) = j;
break;
1489 case 4: *(plinA4++) = j;
break;
1494 int npair2 = pli - pairlist2;
1497 for (k=0; k < npair2; ++k ) {
1498 int j = pairlist2[k];
1499 int atom2 = pExt_1[j].
id;
1500 int excl_flag = excl_flags[atom2];
1501 ALCH(
int pswitch = pswitchTable[p_i_partition + 5*(p_1[j].
partition)];)
1502 switch ( excl_flag
ALCH( + 3 * pswitch)) {
1503 case 0: *(plin++) = j;
break;
1504 case 1: *(plix++) = j;
break;
1505 case 2: *(plim++) = j;
break;
1507 case 3: *(plinA1++) = j;
break;
1508 case 6: *(plinA2++) = j;
break;
1509 case 9: *(plinA3++) = j;
break;
1510 case 12: *(plinA4++) = j;
break;
1511 case 5: *(plimA1++) = j;
break;
1512 case 8: *(plimA2++) = j;
break;
1513 case 11: *(plimA3++) = j;
break;
1514 case 14: *(plimA4++) = j;
break;
1515 case 4: *(plixA1++) = j;
break;
1516 case 7: *(plixA2++) = j;
break;
1517 case 10: *(plixA3++) = j;
break;
1518 case 13: *(plixA4++) = j;
break;
1523 npairn = plin - pairlistn;
1524 pairlistn_save = pairlistn;
1525 pairlistn_save[npairn] = npairn ? pairlistn_save[npairn-1] : -1;
1526 pairlists.
newsize(npairn + 1);
1528 npairx = plix - pairlistx;
1529 pairlistx_save = pairlists.
newlist();
1530 for ( k=0; k<npairx; ++k ) {
1531 pairlistx_save[k] = pairlistx[k];
1533 pairlistx_save[k] = k ? pairlistx_save[k-1] : -1;
1534 pairlists.
newsize(npairx + 1);
1536 npairm = plim - pairlistm;
1537 pairlistm_save = pairlists.
newlist();
1538 for ( k=0; k<npairm; ++k ) {
1539 pairlistm_save[k] = pairlistm[k];
1541 pairlistm_save[k] = k ? pairlistm_save[k-1] : -1;
1542 pairlists.
newsize(npairm + 1);
1546 #define PAIRLISTFROMARRAY(NPAIRS,PL1,PL2,PLSAVE) \ 1548 (NPAIRS) = (PL2) - (PL1); \ 1549 (PLSAVE) = pairlists.newlist(); \ 1550 for ( k=0; k<(NPAIRS); ++k ) { \ 1551 (PLSAVE)[k] = (PL1)[k]; \ 1553 (PLSAVE)[k] = k ? (PLSAVE)[k-1] : -1; \ 1554 pairlists.newsize((NPAIRS) + 1); \ 1557 PAIRLISTFROMARRAY(npairnA1,pairlistnA1,plinA1,pairlistnA1_save);
1558 PAIRLISTFROMARRAY(npairxA1,pairlistxA1,plixA1,pairlistxA1_save);
1559 PAIRLISTFROMARRAY(npairmA1,pairlistmA1,plimA1,pairlistmA1_save);
1560 PAIRLISTFROMARRAY(npairnA2,pairlistnA2,plinA2,pairlistnA2_save);
1561 PAIRLISTFROMARRAY(npairxA2,pairlistxA2,plixA2,pairlistxA2_save);
1562 PAIRLISTFROMARRAY(npairmA2,pairlistmA2,plimA2,pairlistmA2_save);
1563 PAIRLISTFROMARRAY(npairnA3,pairlistnA3,plinA3,pairlistnA3_save);
1564 PAIRLISTFROMARRAY(npairxA3,pairlistxA3,plixA3,pairlistxA3_save);
1565 PAIRLISTFROMARRAY(npairmA3,pairlistmA3,plimA3,pairlistmA3_save);
1566 PAIRLISTFROMARRAY(npairnA4,pairlistnA4,plinA4,pairlistnA4_save);
1567 PAIRLISTFROMARRAY(npairxA4,pairlistxA4,plixA4,pairlistxA4_save);
1568 PAIRLISTFROMARRAY(npairmA4,pairlistmA4,plimA4,pairlistmA4_save);
1569 #undef PAIRLISTFROMARRAY 1573 if ( ! savePairlists ) pairlists.
reset();
1580 pairlists.
nextlist(&pairlistn_save,&npairn); --npairn;
1581 pairlists.
nextlist(&pairlistx_save,&npairx); --npairx;
1582 pairlists.
nextlist(&pairlistm_save,&npairm); --npairm;
1584 pairlists.
nextlist(&pairlistnA1_save,&npairnA1); --npairnA1;
1585 pairlists.
nextlist(&pairlistxA1_save,&npairxA1); --npairxA1;
1586 pairlists.
nextlist(&pairlistmA1_save,&npairmA1); --npairmA1;
1587 pairlists.
nextlist(&pairlistnA2_save,&npairnA2); --npairnA2;
1588 pairlists.
nextlist(&pairlistxA2_save,&npairxA2); --npairxA2;
1589 pairlists.
nextlist(&pairlistmA2_save,&npairmA2); --npairmA2;
1590 pairlists.
nextlist(&pairlistnA3_save,&npairnA3); --npairnA3;
1591 pairlists.
nextlist(&pairlistxA3_save,&npairxA3); --npairxA3;
1592 pairlists.
nextlist(&pairlistmA3_save,&npairmA3); --npairmA3;
1593 pairlists.
nextlist(&pairlistnA4_save,&npairnA4); --npairnA4;
1594 pairlists.
nextlist(&pairlistxA4_save,&npairxA4); --npairxA4;
1595 pairlists.
nextlist(&pairlistmA4_save,&npairmA4); --npairmA4;
1606 ( ( p_i.
partition == 1 ) ? 1. : -1. ) : 0.
1626 const float kq_i_f = kq_i;
1629 p_i_x_f, p_i_y_f, p_i_z_f, pFlt_1, pairlistn_save, npairn, pairlisti,
1630 r2list_f, xlist, ylist, zlist);
1633 p_i_x, p_i_y, p_i_z, p_1, pairlistn_save, npairn, pairlisti,
1648 KNL(
float drudeNbtholeCut2 = (drudeNbtholeCut * drudeNbtholeCut); )
1652 for (k = 0; k < npairi; k++) {
1653 NOKNL(
if (r2list[k] > drudeNbtholeCut2) {
continue; } )
1654 KNL(
if (r2list_f[k] > drudeNbtholeCut2) {
continue; } )
1656 const int j = pairlisti[k];
1659 if (
p_j.hydrogenGroupSize < 2 ) {
continue; }
1661 for (kk = 0;kk < NumNbtholePairParams; kk++) {
1663 if (((nbthole_array[kk].ind1 == p_i.
vdwType) &&
1664 (nbthole_array[kk].
ind2 ==
p_j.vdwType)) ||
1666 (nbthole_array[kk].
ind1 ==
p_j.vdwType))) {
1670 if ( kk < NumNbtholePairParams ) {
1673 const BigReal aa = nbthole_array[kk].
tholeij * powf(aprod, -1.f/6);
1674 const BigReal qqaa = CC * p_0[i].charge * p_1[j].charge;
1675 const BigReal qqad = CC * p_0[i].charge * p_1[j+1].charge;
1676 const BigReal qqda = CC * p_0[i+1].charge * p_1[j].charge;
1677 const BigReal qqdd = CC * p_0[i+1].charge * p_1[j+1].charge;
1679 Vector raa = (p_0[i].position + params->
offset) - p_1[j].position;
1680 Vector rad = (p_0[i].position + params->
offset) - p_1[j+1].position;
1681 Vector rda = (p_0[i+1].position + params->
offset) - p_1[j].position;
1682 Vector rdd = (p_0[i+1].position + params->
offset) - p_1[j+1].position;
1689 BigReal auaa = aa / raa_invlen;
1690 BigReal auad = aa / rad_invlen;
1691 BigReal auda = aa / rda_invlen;
1692 BigReal audd = aa / rdd_invlen;
1699 BigReal polyauaa = 1 + 0.5*auaa;
1700 BigReal polyauad = 1 + 0.5*auad;
1701 BigReal polyauda = 1 + 0.5*auda;
1702 BigReal polyaudd = 1 + 0.5*audd;
1705 ethole += qqaa * raa_invlen * (- polyauaa * expauaa);
1706 ethole += qqad * rad_invlen * (- polyauad * expauad);
1707 ethole += qqda * rda_invlen * (- polyauda * expauda);
1708 ethole += qqdd * rdd_invlen * (- polyaudd * expaudd);
1710 polyauaa = 1 + auaa*polyauaa;
1711 polyauad = 1 + auad*polyauad;
1712 polyauda = 1 + auda*polyauda;
1713 polyaudd = 1 + audd*polyaudd;
1715 BigReal raa_invlen3 = raa_invlen * raa_invlen * raa_invlen;
1716 BigReal rad_invlen3 = rad_invlen * rad_invlen * rad_invlen;
1717 BigReal rda_invlen3 = rda_invlen * rda_invlen * rda_invlen;
1718 BigReal rdd_invlen3 = rdd_invlen * rdd_invlen * rdd_invlen;
1720 BigReal dfaa = qqaa * raa_invlen3 * (polyauaa*expauaa);
1721 BigReal dfad = qqad * rad_invlen3 * (polyauad*expauad);
1722 BigReal dfda = qqda * rda_invlen3 * (polyauda*expauda);
1723 BigReal dfdd = qqdd * rdd_invlen3 * (polyaudd*expaudd);
1725 Vector faa = -dfaa * raa;
1726 Vector fad = -dfad * rad;
1727 Vector fda = -dfda * rda;
1728 Vector fdd = -dfdd * rdd;
1730 SHORT(f_net)
NOSHORT(fullf_net) += (faa + fad) + (fda + fdd);
1731 params->
ff[0][i] += faa + fad;
1732 params->
ff[0][i+1] += fda + fdd;
1733 params->
ff[1][j] -= faa + fda;
1734 params->
ff[1][j+1] -= fad + fdd;
1748 KNL(
float loweAndersenCutoff2 = (loweAndersenCutoff * loweAndersenCutoff); )
1762 int atom_i = pExt_0[i].
id;
1764 if (loweAndersenUseCOMvelocity) {
1770 pos += v_0[i+l].
charge * p_0[i+l].position;
1781 for (k = 0; k < npairi; k++) {
1782 NOKNL(
if (r2list[k] > loweAndersenCutoff2) {
continue; } )
1783 KNL(
if (r2list_f[k] > loweAndersenCutoff2) {
continue; } )
1785 const int j = pairlisti[k];
1789 if (!
p_j.hydrogenGroupSize) {
continue; }
1790 if (rand->
uniform() > loweAndersenProb) {
continue; }
1792 Mass mass_j = v_j.charge;
1795 int atom_j = pExt_1[j].
id;
1797 if (loweAndersenUseCOMvelocity) {
1801 for (
int l = 0; l <
p_j.hydrogenGroupSize; l++) {
1803 pos += v_1[j+l].
charge * p_1[j+l].position;
1813 Mass mu_ij = (mass_i * mass_j)/(mass_i + mass_j);
1818 Force force = mu_ij * dt_inv * (lambda - (deltaV * sigma_ij)) * sigma_ij;
1823 if (loweAndersenUseCOMvelocity) {
1824 BigReal inv_mass_i = 1.0 / mass_i;
1825 BigReal inv_mass_j = 1.0 / mass_j;
1827 params->
ff[0][i+l] += (v_0[i+l].
charge * inv_mass_i) * force;
1829 for (
int l = 0; l <
p_j.hydrogenGroupSize; l++) {
1830 params->
ff[1][j+l] -= (v_1[j+l].
charge * inv_mass_j) * force;
1833 params->
ff[0][i] += force;
1834 params->
ff[1][j] -= force;
1849 #define VDW_SWITCH_MODE VDW_SWITCH_MODE_ENERGY 1850 case VDW_SWITCH_MODE:
1853 #undef VDW_SWITCH_MODE 1855 #define VDW_SWITCH_MODE VDW_SWITCH_MODE_MARTINI 1856 case VDW_SWITCH_MODE:
1859 #undef VDW_SWITCH_MODE 1861 #define VDW_SWITCH_MODE VDW_SWITCH_MODE_FORCE 1862 case VDW_SWITCH_MODE:
1865 #undef VDW_SWITCH_MODE 1879 p_i_x, p_i_y, p_i_z, p_1, pairlistm_save, npairm, pairlisti,
1881 exclChecksum += npairi;
1885 #define MODIFIED(X) X 1893 p_i_x, p_i_y, p_i_z, p_1, pairlistx_save, npairx, pairlisti,
1895 exclChecksum += npairi;
1900 #define EXCLUDED(X) X 1913 exclChecksum += npairx;
1916 #if 0 ALCH(+1) // nonbondedbase2 for alchemical-separeted pairlists 1919 #define ALCHPAIR(X) X 1921 #define NOT_ALCHPAIR(X) 1943 p_i_x, p_i_y, p_i_z, p_1, pairlistnA1_save, npairnA1, pairlisti,
1956 p_i_x, p_i_y, p_i_z, p_1, pairlistnA2_save, npairnA2, pairlisti,
1969 p_i_x, p_i_y, p_i_z, p_1, pairlistnA3_save, npairnA3, pairlisti,
1982 p_i_x, p_i_y, p_i_z, p_1, pairlistnA4_save, npairnA4, pairlisti,
1998 #define MODIFIED(X) X 2005 p_i_x, p_i_y, p_i_z, p_1, pairlistmA1_save, npairmA1, pairlisti,
2007 exclChecksum += npairi;
2019 p_i_x, p_i_y, p_i_z, p_1, pairlistmA2_save, npairmA2, pairlisti,
2021 exclChecksum += npairi;
2033 p_i_x, p_i_y, p_i_z, p_1, pairlistmA3_save, npairmA3, pairlisti,
2035 exclChecksum += npairi;
2047 p_i_x, p_i_y, p_i_z, p_1, pairlistmA4_save, npairmA4, pairlisti,
2049 exclChecksum += npairi;
2067 #define EXCLUDED(X) X 2075 p_i_x, p_i_y, p_i_z, p_1, pairlistxA1_save, npairxA1, pairlisti,
2077 exclChecksum += npairi;
2089 p_i_x, p_i_y, p_i_z, p_1, pairlistxA2_save, npairxA2, pairlisti,
2091 exclChecksum += npairi;
2103 p_i_x, p_i_y, p_i_z, p_1, pairlistxA3_save, npairxA3, pairlisti,
2105 exclChecksum += npairi;
2117 p_i_x, p_i_y, p_i_z, p_1, pairlistxA4_save, npairxA4, pairlisti,
2119 exclChecksum += npairi;
2136 exclChecksum += npairxA1 + npairxA2 + npairxA3 + npairxA4;
2142 #define NOT_ALCHPAIR(X) X 2144 #endif // end nonbondedbase2.h includes for alchemical pairlists 2146 #ifdef NETWORK_PROGRESS 2147 CkNetworkProgress();
2152 __dcbt ((
void *) &(p_0[i+1]));
2159 FULL( fullf_net.x += fullf_i_x; )
2160 FULL( fullf_net.y += fullf_i_y; )
2161 FULL( fullf_net.z += fullf_i_z; )
2165 FULL( fullf_0[i].x += fullf_i_x; )
2166 FULL( fullf_0[i].y += fullf_i_y; )
2167 FULL( fullf_0[i].z += fullf_i_z; )
2169 if ( savePairlists || ! usePairlists ) {
2186 #
if (
PAIR( 1+ ) 0 )
2211 #if ( FULL( 1+ ) 0 ) 2212 #if ( PAIR( 1+ ) 0 ) 2221 reduction[fullElectVirialIndex_XX] += fullElectVirial_xx;
2222 reduction[fullElectVirialIndex_XY] += fullElectVirial_xy;
2223 reduction[fullElectVirialIndex_XZ] += fullElectVirial_xz;
2224 reduction[fullElectVirialIndex_YX] += fullElectVirial_xy;
2225 reduction[fullElectVirialIndex_YY] += fullElectVirial_yy;
2226 reduction[fullElectVirialIndex_YZ] += fullElectVirial_yz;
2227 reduction[fullElectVirialIndex_ZX] += fullElectVirial_xz;
2228 reduction[fullElectVirialIndex_ZY] += fullElectVirial_yz;
2229 reduction[fullElectVirialIndex_ZZ] += fullElectVirial_zz;
2271 #ifndef MEM_OPT_VERSION
register BigReal virial_xy
void sortEntries_mergeSort_v1(SortEntry *&se, SortEntry *&buf, int seLen)
static int pressureProfileSlabs
register BigReal virial_xz
const TableEntry * table_row(unsigned int i) const
register BigReal virial_yz
void sortEntries_selectionSort(SortEntry *const se, const int seLen)
static BigReal dielectric_1
static void partition(int *order, const FullAtom *atoms, int begin, int end)
register BigReal electEnergy
static const Molecule * mol
int pairlist_from_pairlist(BigReal cutoff2, BigReal p_i_x, BigReal p_i_y, BigReal p_i_z, const CompAtom *p_j, const plint *list, int list_size, int *newlist, BigReal r2_delta, BigReal *r2list)
#define NBWORKARRAY(TYPE, NAME, SIZE)
static BigReal pressureProfileThickness
const int32 * get_full_exclusions_for_atom(int anum) const
BigReal GetAtomAlpha(int i) const
const int32 * get_mod_exclusions_for_atom(int anum) const
SimParameters * simParameters
const ExclusionCheck * get_excl_check_for_atom(int anum) const
Molecule stores the structural information for the system.
void sortEntries_bubbleSort(SortEntry *const se, const int seLen)
static int vdw_switch_mode
#define NBWORKARRAYSINIT(ARRAYS)
register BigReal virial_yy
void pp_clamp(int &n, int nslabs)
void nextlist(plint **list, int *list_size)
static Bool pairInteractionSelf
static BigReal * table_noshort
void NAMD_bug(const char *err_msg)
static BigReal pressureProfileMin
register BigReal virial_zz
const Real * get_qmAtomGroup() const
NbtholePairValue * nbthole_array
static Bool vdwForceSwitching
static BigReal * lambda_table
static BigReal * slow_table
#define REZERO_EXCL_FLAGS_BUFF
static Bool pairInteractionOn
static const LJTable * ljTable
register BigReal virial_xx
void newsize(int list_size)
static BigReal * table_short
void setIndexValue(plint i)
#define COMPONENT_DOTPRODUCT(A, B)
static BigReal alchVdwShiftCoeff
NAMD_HOST_DEVICE BigReal rlength(void) const
BigReal * pressureProfileReduction
plint * newlist(int max_size)
NAMD_HOST_DEVICE Vector unit(void) const
void sortEntries_mergeSort_v2(SortEntry *&se, SortEntry *&buf, int seLen)
ComputeNonbondedWorkArrays * workArrays