23 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE) 24 #include <emmintrin.h> 25 #if defined(__INTEL_COMPILER) 26 #define __align(X) __declspec(align(X) ) 27 #elif defined(__GNUC__) || defined(__PGI) 28 #define __align(X) __attribute__((aligned(X) )) 30 #define __align(X) __declspec(align(X) ) 35 #define UNLIKELY(X) __builtin_expect((X), 0) 37 #define UNLIKELY(X) (X) 40 #ifdef DEFINITION // ( 46 #if NAMD_ComputeNonbonded_SortAtoms != 0 54 #define NAME CLASSNAME(calc) 60 #define CLASS ComputeNonbondedPair 61 #define CLASSNAME(X) ENERGYNAME( X ## _pair ) 70 #define CLASS ComputeNonbondedSelf 71 #define CLASSNAME(X) ENERGYNAME( X ## _self ) 83 #define ENERGYNAME(X) SLOWONLYNAME( X ## _energy ) 87 #define ENERGYNAME(X) SLOWONLYNAME( X ) 98 #define SLOWONLYNAME(X) MERGEELECTNAME( X ## _slow ) 102 #define SLOWONLYNAME(X) MERGEELECTNAME( X ) 107 #undef MERGEELECTNAME 113 #define MERGEELECTNAME(X) FULLELECTNAME( X ## _merge ) 117 #define MERGEELECTNAME(X) FULLELECTNAME( X ) 125 #define FULLELECTNAME(X) FULLDISPNAME( X ## _fullelect ) 129 #define FULLELECTNAME(X) FULLDISPNAME( X ) 139 #define FULLDISPNAME(X) TABENERGYNAME( X ## _fulldisp ) 143 #define FULLDISPNAME(X) TABENERGYNAME( X ) 153 #define TABENERGYNAME(X) FEPNAME( X ## _tabener ) 154 #define TABENERGY(X) X 155 #define NOTABENERGY(X) 157 #define TABENERGYNAME(X) FEPNAME( X ) 159 #define NOTABENERGY(X) X 179 #define FEPNAME(X) LAST( X ) 182 #define NOT_ALCHPAIR(X) X 194 #define FEPNAME(X) LAST( X ## _fep ) 202 #define FEPNAME(X) LAST( X ## _ti ) 210 #define FEPNAME(X) LAST( X ## _les ) 217 #define FEPNAME(X) LAST( X ## _int ) 224 #define FEPNAME(X) LAST( X ## _pprof ) 231 #define FEPNAME(X) LAST( X ## _go ) 247 #define KNL_MAKE_DEPENDS_INCLUDE 249 #undef KNL_MAKE_DEPENDS_INCLUDE 254 #if ( TABENERGY(1+) FEP(1+) TI(1+) INT(1+) LES(1+) GO(1+) PPROF(1+) NOFAST(1+) 0 ) 266 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) 267 #define COMPONENT_DOTPRODUCT(A,B) ((A##_x * B##_x) + (A##_y * B##_y) + (A##_z * B##_z)) 302 int exclChecksum = 0;
310 BigReal goEnergyNonnative = 0; ) )
347 const float offset_x_f = params->
offset_f.
x;
348 const float offset_y_f = params->
offset_f.
y;
349 const float offset_z_f = params->
offset_f.
z;
351 register const BigReal plcutoff2 = \
353 register const BigReal groupplcutoff2 = \
380 const float scaling_f =
scaling;
389 const float c1_f =
c1;
390 const float c3_f =
c3;
400 const BigReal rcut2inv = 1 / rcut2;
401 const BigReal rcut6inv = rcut2inv * rcut2inv * rcut2inv;
402 const BigReal rcut12inv = rcut6inv * rcut6inv;
404 const BigReal aRc2 = aRc * aRc;
405 const BigReal screen_rc = (1 - (1 + aRc2 + aRc2*aRc2/2)*exp(-aRc2));
413 const BigReal switchfactor = 1./(diff*diff*diff);
438 BigReal lambdaDown = 1 - alchLambda;
469 int pswitchTable[5*5];
476 for (
int ip=0; ip<5; ++ip) {
477 for (
int jp=0; jp<5; ++jp ) {
478 pswitchTable[ip+5*jp] = 0;
479 if ((ip==1 && jp==0) || (ip==0 && jp==1)) pswitchTable[ip+5*jp] = 1;
480 if ((ip==2 && jp==0) || (ip==0 && jp==2)) pswitchTable[ip+5*jp] = 2;
481 if ((ip==3 && jp==0) || (ip==0 && jp==3)) pswitchTable[ip+5*jp] = 3;
482 if ((ip==4 && jp==0) || (ip==0 && jp==4)) pswitchTable[ip+5*jp] = 4;
484 if (ip && jp && (abs(ip - jp) != 2)) pswitchTable[ip+5*jp] = 99;
488 if ((ip == 1 && jp == 1) || (ip == 1 && jp == 3) || (ip == 3 && jp == 1)) pswitchTable[ip+5*jp] = 1;
489 if ((ip == 2 && jp == 2) || (ip == 2 && jp == 4) || (ip == 4 && jp == 2)) pswitchTable[ip+5*jp] = 2;
490 if (ip == 3 && jp == 3) pswitchTable[ip+5*jp] = 3;
491 if (ip == 4 && jp == 4) pswitchTable[ip+5*jp] = 4;
499 if (ip == 1 && jp == 1) pswitchTable[ip+5*jp] = 0;
500 if (ip == 2 && jp == 2) pswitchTable[ip+5*jp] = 0;
512 BigReal fullElectEnergy_ti_2 = 0;)
517 const int i_upper = params->
numAtoms[0];
518 register const int j_upper = params->
numAtoms[1];
523 KNL(
const CompAtomFlt *pFlt_0 = params->pFlt[0]; )
524 KNL(
const CompAtomFlt *pFlt_1 = params->pFlt[1]; )
528 char * excl_flags_buff = 0;
529 const int32 * full_excl = 0;
530 const int32 * mod_excl = 0;
532 plint *pairlistn_save;
int npairn;
533 plint *pairlistx_save;
int npairx;
534 plint *pairlistm_save;
int npairm;
539 plint *pairlistnA1_save;
int npairnA1;
540 plint *pairlistxA1_save;
int npairxA1;
541 plint *pairlistmA1_save;
int npairmA1;
542 plint *pairlistnA2_save;
int npairnA2;
543 plint *pairlistxA2_save;
int npairxA2;
544 plint *pairlistmA2_save;
int npairmA2;
545 plint *pairlistnA3_save;
int npairnA3;
546 plint *pairlistxA3_save;
int npairxA3;
547 plint *pairlistmA3_save;
int npairmA3;
548 plint *pairlistnA4_save;
int npairnA4;
549 plint *pairlistxA4_save;
int npairxA4;
550 plint *pairlistmA4_save;
int npairmA4;
555 int arraysize = j_upper+5;
564 union {
double f;
int32 i[2]; } byte_order_test;
565 byte_order_test.f = 1.0;
566 int32 *r2iilist = (
int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
568 if ( ! ( savePairlists || ! usePairlists ) ) arraysize = 0;
579 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) 592 register SortEntry* p_1_sortValues = atomSort_0_sortValues__;
593 register SortEntry* p_1_sortValues_fixg = atomSort_1_sortValues__;
595 int p_0_sortValues_len = 0;
596 int p_1_sortValues_len = 0;
597 int p_1_sortValues_fixg_len = 0;
601 BigReal atomSort_windowRadius = sqrt(groupplcutoff2);
603 if (savePairlists || !usePairlists) {
611 register int nbgs = p_1->nonbondedGroupSize;
612 register BigReal p_x = p_1->position.x;
613 register BigReal p_y = p_1->position.y;
614 register BigReal p_z = p_1->position.z;
615 register int index = 0;
617 for (
register int j = nbgs; j < j_upper; j += nbgs) {
621 register const CompAtom* p_j_next = p_1 + j;
638 register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
639 p_1_sortValStorePtr->
index = index;
640 p_1_sortValStorePtr->
sortValue = sortVal;
641 p_1_sortValues_len++;
650 register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
651 p_1_sortValStorePtr->
index = index;
652 p_1_sortValStorePtr->
sortValue = sortVal;
653 p_1_sortValues_len++;
658 #if 0 // Selection Sort 660 #elif 0 // Bubble Sort 663 #if NAMD_ComputeNonbonded_SortAtoms_LessBranches == 0 672 register int nbgs = p_0->nonbondedGroupSize;
673 register BigReal p_x = p_0->position.x + offset_x;
674 register BigReal p_y = p_0->position.y + offset_y;
675 register BigReal p_z = p_0->position.z + offset_z;
676 register int index = 0;
678 for (
register int i = nbgs; i < i_upper; i += nbgs) {
682 register const CompAtom* p_i_next = p_0 + i;
694 register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
695 *p_0_sortValStorePtr = sortVal;
703 register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
704 *p_0_sortValStorePtr = sortVal;
706 p_0_sortValues_len = i_upper;
711 #endif // NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) 715 #if ! (NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) ) 743 if ( savePairlists || ! usePairlists ) {
745 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) ) 748 register int fixg = 0;
749 for (
int tmpI = 0; tmpI < p_1_sortValues_len; tmpI++) {
750 register SortEntry* p_1_sortEntry = p_1_sortValues + tmpI;
751 register int p_1_index = p_1_sortEntry->
index;
752 if (!pExt_1[p_1_index].groupFixed) {
753 register SortEntry* p_1_sortEntry_fixg = p_1_sortValues_fixg + p_1_sortValues_fixg_len;
754 p_1_sortEntry_fixg->
index = p_1_sortEntry->
index;
756 p_1_sortValues_fixg_len++;
763 for ( j = 0; j < j_upper; ++j ) {
764 if ( p_1[j].nonbondedGroupSize ) {
769 if ( g_upper ) grouplist[g_upper] = grouplist[g_upper-1];
773 for ( g = 0; g < g_upper; ++g ) {
775 if ( ! pExt_1[j].groupFixed ) {
776 fixglist[fixg++] = j;
782 if ( fixg_upper ) fixglist[fixg_upper] = fixglist[fixg_upper-1];
784 #endif // NAMD_ComputeNonbonded_SortAtoms != 0 792 NAMD_bug(
"pairlist i_upper mismatch!");
802 int pairlistoffset=0;
806 #if ( SHORT( FAST( 1+ ) ) 0 ) 819 #define fullf_1 fullf_0 825 int groupCount = params->
minPart;
827 if ( savePairlists || ! usePairlists ) {
834 PAIR(
for ( ; i < (i_upper);))
SELF(
for ( i=0; i < (i_upper- 1);i++))
837 KNL(
const CompAtomFlt &pFlt_i = pFlt_0[i]; )
840 PAIR(
if (savePairlists || ! usePairlists){)
846 __dcbt((
void *) &(p_0[i]));
851 groupCount = maxPart;
860 register const BigReal p_i_x_f = pFlt_i.position.x + offset_x_f;
861 register const BigReal p_i_y_f = pFlt_i.position.y + offset_y_f;
862 register const BigReal p_i_z_f = pFlt_i.position.z + offset_z_f;
875 if ( savePairlists || ! usePairlists ) {
877 #ifdef MEM_OPT_VERSION 879 const int excl_min = pExt_i.id + exclcheck->
min;
880 const int excl_max = pExt_i.id + exclcheck->
max;
883 const int excl_min = exclcheck->
min;
884 const int excl_max = exclcheck->
max;
886 const char * excl_flags_var;
887 if ( exclcheck->
flags ) excl_flags_var = exclcheck->
flags - excl_min;
892 #ifndef MEM_OPT_VERSION 893 #define REZERO_EXCL_FLAGS_BUFF if ( excl_flags_buff ) { \ 895 nl = full_excl[0] + 1; \ 896 for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = 0; \ 897 nl = mod_excl[0] + 1; \ 898 for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = 0; \ 903 int oldsize = wa.
size();
905 memset( (
void*) &wa[oldsize], 0, wa.
size() - oldsize);
907 excl_flags_buff = &wa[0];
911 nl = full_excl[0] + 1;
912 for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] =
EXCHCK_FULL;
914 nl = mod_excl[0] + 1;
915 for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] =
EXCHCK_MOD;
916 excl_flags_var = excl_flags_buff;
920 const char *
const excl_flags = excl_flags_var;
926 const int groupfixed = (
fixedAtomsOn && pExt_i.groupFixed );
940 while ( g_lower < g_upper &&
941 grouplist[g_lower] < j_hgroup ) ++g_lower;
942 while ( fixg_lower < fixg_upper &&
943 fixglist[fixg_lower] < j_hgroup ) ++fixg_lower;
946 for ( j = i + 1; j < j_hgroup; ++j ) {
947 pairlist[pairlistindex++] = j;
952 register int *pli = pairlist + pairlistindex;
957 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) ) 960 register BigReal p_i_sortValue = p_0_sortValues[i];
961 const BigReal p_i_sortValue_plus_windowRadius = p_i_sortValue + atomSort_windowRadius;
962 register SortEntry* sortValues = ( groupfixed ? p_1_sortValues_fixg : p_1_sortValues );
965 register int lower = 0;
966 register int upper = (groupfixed ? p_1_sortValues_fixg_len : p_1_sortValues_len);
967 while ((upper - lower) > 1) {
968 register int j = ((lower + upper) >> 1);
970 if (jSortVal < p_i_sortValue_plus_windowRadius) {
976 const int gu = (sortValues[lower].
sortValue >= p_i_sortValue_plus_windowRadius) ? lower : upper;
980 register int *gli = goodglist;
981 const int *glist = ( groupfixed ? fixglist : grouplist );
982 SELF(
const int gl = ( groupfixed ? fixg_lower : g_lower ); )
983 const int gu = ( groupfixed ? fixg_upper : g_upper );
992 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE) 995 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) ) 996 register SortEntry* sortEntry0 = sortValues + g;
997 register SortEntry* sortEntry1 = sortValues + g + 1;
998 register int jprev0 = sortEntry0->
index;
999 register int jprev1 = sortEntry1->
index;
1001 register int jprev0 = glist[g ];
1002 register int jprev1 = glist[g + 1];
1008 __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
1009 __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
1010 __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
1013 const __m128d P_I_X = _mm_set1_pd(p_i_x);
1014 const __m128d P_I_Y = _mm_set1_pd(p_i_y);
1015 const __m128d P_I_Z = _mm_set1_pd(p_i_z);
1018 for ( ; g < gu - 2; g +=2 ) {
1023 __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
1024 __m128d R2_01 = _mm_mul_pd(T_01, T_01);
1025 T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
1026 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
1027 T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
1028 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
1030 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) ) 1031 sortEntry0 = sortValues + g;
1032 sortEntry1 = sortValues + g + 1;
1033 jprev0 = sortEntry0->
index;
1034 jprev1 = sortEntry1->
index;
1037 jprev1 = glist[g+1];
1040 PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
1041 PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
1042 PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
1044 __align(16)
double r2_01[2];
1045 _mm_store_pd(r2_01, R2_01);
1048 bool test0 = ( r2_01[0] < groupplcutoff2 );
1049 bool test1 = ( r2_01[1] < groupplcutoff2 );
1053 goodglist [ hu ] = j0;
1054 goodglist [ hu + test0 ] = j1;
1056 hu += test0 + test1;
1063 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) ) 1064 register SortEntry* sortEntry0 = sortValues + g;
1065 register SortEntry* sortEntry1 = sortValues + g + 1;
1066 register int jprev0 = sortEntry0->
index;
1067 register int jprev1 = sortEntry1->
index;
1069 register int jprev0 = glist[g ];
1070 register int jprev1 = glist[g + 1];
1076 register BigReal pj_x_0, pj_x_1;
1077 register BigReal pj_y_0, pj_y_1;
1078 register BigReal pj_z_0, pj_z_1;
1079 register BigReal t_0, t_1, r2_0, r2_1;
1081 pj_x_0 = p_1[jprev0].position.x;
1082 pj_x_1 = p_1[jprev1].position.x;
1084 pj_y_0 = p_1[jprev0].position.y;
1085 pj_y_1 = p_1[jprev1].position.y;
1087 pj_z_0 = p_1[jprev0].position.z;
1088 pj_z_1 = p_1[jprev1].position.z;
1091 for ( ; g < gu - 2; g +=2 ) {
1096 t_0 = p_i_x - pj_x_0;
1097 t_1 = p_i_x - pj_x_1;
1101 t_0 = p_i_y - pj_y_0;
1102 t_1 = p_i_y - pj_y_1;
1106 t_0 = p_i_z - pj_z_0;
1107 t_1 = p_i_z - pj_z_1;
1111 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) ) 1112 sortEntry0 = sortValues + g;
1113 sortEntry1 = sortValues + g + 1;
1114 jprev0 = sortEntry0->
index;
1115 jprev1 = sortEntry1->
index;
1118 jprev1 = glist[g+1];
1121 pj_x_0 = p_1[jprev0].position.x;
1122 pj_x_1 = p_1[jprev1].position.x;
1123 pj_y_0 = p_1[jprev0].position.y;
1124 pj_y_1 = p_1[jprev1].position.y;
1125 pj_z_0 = p_1[jprev0].position.z;
1126 pj_z_1 = p_1[jprev1].position.z;
1128 bool test0 = ( r2_0 < groupplcutoff2 );
1129 bool test1 = ( r2_1 < groupplcutoff2 );
1133 goodglist [ hu ] = j0;
1134 goodglist [ hu + test0 ] = j1;
1136 hu += test0 + test1;
1144 #pragma omp simd simdlen(16) 1145 for (g = gp; g < gu; g++) {
1147 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) ) 1148 register SortEntry* sortEntry = sortValues + g;
1149 register int j = sortEntry->
index;
1154 BigReal p_j_x = p_1[j].position.x;
1155 BigReal p_j_y = p_1[j].position.y;
1156 BigReal p_j_z = p_1[j].position.z;
1165 #pragma omp ordered simd monotonic(hu:1) 1166 if ( r2 <= groupplcutoff2 )
1167 goodglist[hu ++] = j;
1170 for (
int h=0; h<hu; ++h ) {
1171 int j = goodglist[h];
1172 int nbgs = p_1[j].nonbondedGroupSize;
1185 pairlistindex = pli - pairlist;
1187 if ( pairlistindex ) {
1188 pairlist[pairlistindex] = pairlist[pairlistindex-1];
1198 else pairlistoffset++;
1201 const int atomfixed = (
fixedAtomsOn && pExt_i.atomFixed );
1203 register int *pli = pairlist2;
1206 register plint *plin = pairlistn;
1213 for (
int k=pairlistoffset; k<pairlistindex; k++) {
1219 if (qmGroup_i > 0) {
1220 if (qmGroup_i == qmGroup_j) {
1230 int npair2_int = pli - pairlist2;
1232 for (
int k=0; k<npair2_int; k++) {
1236 BigReal p_j_x = p_1[j].position.x;
1239 BigReal p_j_y = p_1[j].position.y;
1242 BigReal p_j_z = p_1[j].position.z;
1246 if ( ( ! (atomfixed && pExt_1[j].atomFixed) ) && (r2 <= plcutoff2) ) {
1247 int atom2 = pExt_1[j].
id;
1248 if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
1260 if (ifep_type != 1) {
PAIR(++i;)
continue; }
1261 for (
int k=pairlistoffset; k<pairlistindex; k++) {
1263 const int jfep_type = p_1[j].partition;
1265 if (jfep_type == 1) {
1270 if (ifep_type != 1 && ifep_type != 2) {
PAIR(++i;)
continue; }
1271 for (
int k=pairlistoffset; k<pairlistindex; k++) {
1273 const int jfep_type = p_1[j].partition;
1275 if (ifep_type + jfep_type == 3) {
1280 int npair2_int = pli - pairlist2;
1282 for (
int k=0; k<npair2_int; k++) {
1284 BigReal p_j_x = p_1[j].position.x;
1287 BigReal p_j_y = p_1[j].position.y;
1290 BigReal p_j_z = p_1[j].position.z;
1293 if ( ( ! (atomfixed && pExt_1[j].atomFixed) ) && (r2 <= plcutoff2) ) {
1294 int atom2 = pExt_1[j].
id;
1295 if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
1302 for (
int k=pairlistoffset; k<pairlistindex; k++) {
1304 BigReal p_j_x = p_1[j].position.x;
1307 BigReal p_j_y = p_1[j].position.y;
1310 BigReal p_j_z = p_1[j].position.z;
1313 if ( (! pExt_1[j].atomFixed) && (r2 <= plcutoff2) ) {
1314 int atom2 = pExt_1[j].
id;
1315 if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
1320 int k = pairlistoffset;
1321 int ku = pairlistindex;
1324 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE) 1326 register int jprev0 = pairlist [k ];
1327 register int jprev1 = pairlist [k + 1];
1332 __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
1333 __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
1334 __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
1337 const __m128d P_I_X = _mm_set1_pd(p_i_x);
1338 const __m128d P_I_Y = _mm_set1_pd(p_i_y);
1339 const __m128d P_I_Z = _mm_set1_pd(p_i_z);
1341 int atom2_0 = pExt_1[jprev0].
id;
1342 int atom2_1 = pExt_1[jprev1].
id;
1345 for ( ; k < ku - 2; k +=2 ) {
1350 __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
1351 __m128d R2_01 = _mm_mul_pd(T_01, T_01);
1352 T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
1353 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
1354 T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
1355 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
1357 jprev0 = pairlist[k];
1358 jprev1 = pairlist[k+1];
1360 PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
1361 PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
1362 PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
1364 __align(16)
double r2_01[2];
1365 _mm_store_pd(r2_01, R2_01);
1367 if (r2_01[0] <= plcutoff2) {
1368 if (
UNLIKELY( atom2_0 >= excl_min && atom2_0 <= excl_max ))
1373 atom2_0 = pExt_1[jprev0].
id;
1375 if (r2_01[1] <= plcutoff2) {
1376 if (
UNLIKELY( atom2_1 >= excl_min && atom2_1 <= excl_max ))
1381 atom2_1 = pExt_1[jprev1].
id;
1387 register int jprev0 = pairlist [k];
1388 register int jprev1 = pairlist [k + 1];
1393 register BigReal pj_x_0, pj_x_1;
1394 register BigReal pj_y_0, pj_y_1;
1395 register BigReal pj_z_0, pj_z_1;
1396 register BigReal t_0, t_1, r2_0, r2_1;
1398 pj_x_0 = p_1[jprev0].position.x;
1399 pj_x_1 = p_1[jprev1].position.x;
1401 pj_y_0 = p_1[jprev0].position.y;
1402 pj_y_1 = p_1[jprev1].position.y;
1404 pj_z_0 = p_1[jprev0].position.z;
1405 pj_z_1 = p_1[jprev1].position.z;
1407 int atom2_0 = pExt_1[jprev0].
id;
1408 int atom2_1 = pExt_1[jprev1].
id;
1411 for ( ; k < ku - 2; k +=2 ) {
1416 t_0 = p_i_x - pj_x_0;
1417 t_1 = p_i_x - pj_x_1;
1421 t_0 = p_i_y - pj_y_0;
1422 t_1 = p_i_y - pj_y_1;
1426 t_0 = p_i_z - pj_z_0;
1427 t_1 = p_i_z - pj_z_1;
1431 jprev0 = pairlist[k];
1432 jprev1 = pairlist[k+1];
1434 pj_x_0 = p_1[jprev0].position.x;
1435 pj_x_1 = p_1[jprev1].position.x;
1436 pj_y_0 = p_1[jprev0].position.y;
1437 pj_y_1 = p_1[jprev1].position.y;
1438 pj_z_0 = p_1[jprev0].position.z;
1439 pj_z_1 = p_1[jprev1].position.z;
1441 if (r2_0 <= plcutoff2) {
1442 if ( atom2_0 >= excl_min && atom2_0 <= excl_max )
1447 atom2_0 = pExt_1[jprev0].
id;
1449 if (r2_1 <= plcutoff2) {
1450 if ( atom2_1 >= excl_min && atom2_1 <= excl_max )
1455 atom2_1 = pExt_1[jprev1].
id;
1463 #pragma omp simd simdlen(16) 1464 for (k = kp; k < ku; k++) {
1465 int j = pairlist[k];
1466 int atom2 = pExt_1[j].
id;
1468 BigReal p_j_x = p_1[j].position.x;
1469 BigReal p_j_y = p_1[j].position.y;
1470 BigReal p_j_z = p_1[j].position.z;
1479 #pragma omp ordered simd monotonic(plin:1, pli:1) 1480 if (r2 <= plcutoff2) {
1481 if ( atom2 >= excl_min && atom2 <= excl_max )
1491 if ( plin == pairlistn && pli == pairlist2 ) {
1498 plint *plix = pairlistx;
1499 plint *plim = pairlistm;
1501 plint *plinA1 = pairlistnA1;
1502 plint *plixA1 = pairlistxA1;
1503 plint *plimA1 = pairlistmA1;
1504 plint *plinA2 = pairlistnA2;
1505 plint *plixA2 = pairlistxA2;
1506 plint *plimA2 = pairlistmA2;
1507 plint *plinA3 = pairlistnA3;
1508 plint *plixA3 = pairlistxA3;
1509 plint *plimA3 = pairlistmA3;
1510 plint *plinA4 = pairlistnA4;
1511 plint *plixA4 = pairlistxA4;
1512 plint *plimA4 = pairlistmA4;
1518 int unsortedNpairn = plin - pairlistn;
1520 for ( k=0; k<unsortedNpairn; ++k ) {
1521 int j = pairlistn[k];
1522 switch(pswitchTable[p_i_partition + 5*(p_1[j].
partition)]) {
1523 case 0: *(plin++) = j;
break;
1524 case 1: *(plinA1++) = j;
break;
1525 case 2: *(plinA2++) = j;
break;
1526 case 3: *(plinA3++) = j;
break;
1527 case 4: *(plinA4++) = j;
break;
1532 int npair2 = pli - pairlist2;
1535 for (k=0; k < npair2; ++k ) {
1536 int j = pairlist2[k];
1537 int atom2 = pExt_1[j].
id;
1538 int excl_flag = excl_flags[atom2];
1539 ALCH(
int pswitch = pswitchTable[p_i_partition + 5*(p_1[j].
partition)];)
1540 switch ( excl_flag
ALCH( + 3 * pswitch)) {
1541 case 0: *(plin++) = j;
break;
1542 case 1: *(plix++) = j;
break;
1543 case 2: *(plim++) = j;
break;
1545 case 3: *(plinA1++) = j;
break;
1546 case 6: *(plinA2++) = j;
break;
1547 case 9: *(plinA3++) = j;
break;
1548 case 12: *(plinA4++) = j;
break;
1549 case 5: *(plimA1++) = j;
break;
1550 case 8: *(plimA2++) = j;
break;
1551 case 11: *(plimA3++) = j;
break;
1552 case 14: *(plimA4++) = j;
break;
1553 case 4: *(plixA1++) = j;
break;
1554 case 7: *(plixA2++) = j;
break;
1555 case 10: *(plixA3++) = j;
break;
1556 case 13: *(plixA4++) = j;
break;
1561 npairn = plin - pairlistn;
1562 pairlistn_save = pairlistn;
1563 pairlistn_save[npairn] = npairn ? pairlistn_save[npairn-1] : -1;
1564 pairlists.
newsize(npairn + 1);
1566 npairx = plix - pairlistx;
1567 pairlistx_save = pairlists.
newlist();
1568 for ( k=0; k<npairx; ++k ) {
1569 pairlistx_save[k] = pairlistx[k];
1571 pairlistx_save[k] = k ? pairlistx_save[k-1] : -1;
1572 pairlists.
newsize(npairx + 1);
1574 npairm = plim - pairlistm;
1575 pairlistm_save = pairlists.
newlist();
1576 for ( k=0; k<npairm; ++k ) {
1577 pairlistm_save[k] = pairlistm[k];
1579 pairlistm_save[k] = k ? pairlistm_save[k-1] : -1;
1580 pairlists.
newsize(npairm + 1);
1584 #define PAIRLISTFROMARRAY(NPAIRS,PL1,PL2,PLSAVE) \ 1586 (NPAIRS) = (PL2) - (PL1); \ 1587 (PLSAVE) = pairlists.newlist(); \ 1588 for ( k=0; k<(NPAIRS); ++k ) { \ 1589 (PLSAVE)[k] = (PL1)[k]; \ 1591 (PLSAVE)[k] = k ? (PLSAVE)[k-1] : -1; \ 1592 pairlists.newsize((NPAIRS) + 1); \ 1595 PAIRLISTFROMARRAY(npairnA1,pairlistnA1,plinA1,pairlistnA1_save);
1596 PAIRLISTFROMARRAY(npairxA1,pairlistxA1,plixA1,pairlistxA1_save);
1597 PAIRLISTFROMARRAY(npairmA1,pairlistmA1,plimA1,pairlistmA1_save);
1598 PAIRLISTFROMARRAY(npairnA2,pairlistnA2,plinA2,pairlistnA2_save);
1599 PAIRLISTFROMARRAY(npairxA2,pairlistxA2,plixA2,pairlistxA2_save);
1600 PAIRLISTFROMARRAY(npairmA2,pairlistmA2,plimA2,pairlistmA2_save);
1601 PAIRLISTFROMARRAY(npairnA3,pairlistnA3,plinA3,pairlistnA3_save);
1602 PAIRLISTFROMARRAY(npairxA3,pairlistxA3,plixA3,pairlistxA3_save);
1603 PAIRLISTFROMARRAY(npairmA3,pairlistmA3,plimA3,pairlistmA3_save);
1604 PAIRLISTFROMARRAY(npairnA4,pairlistnA4,plinA4,pairlistnA4_save);
1605 PAIRLISTFROMARRAY(npairxA4,pairlistxA4,plixA4,pairlistxA4_save);
1606 PAIRLISTFROMARRAY(npairmA4,pairlistmA4,plimA4,pairlistmA4_save);
1607 #undef PAIRLISTFROMARRAY 1611 if ( ! savePairlists ) pairlists.
reset();
1618 pairlists.
nextlist(&pairlistn_save,&npairn); --npairn;
1619 pairlists.
nextlist(&pairlistx_save,&npairx); --npairx;
1620 pairlists.
nextlist(&pairlistm_save,&npairm); --npairm;
1622 pairlists.
nextlist(&pairlistnA1_save,&npairnA1); --npairnA1;
1623 pairlists.
nextlist(&pairlistxA1_save,&npairxA1); --npairxA1;
1624 pairlists.
nextlist(&pairlistmA1_save,&npairmA1); --npairmA1;
1625 pairlists.
nextlist(&pairlistnA2_save,&npairnA2); --npairnA2;
1626 pairlists.
nextlist(&pairlistxA2_save,&npairxA2); --npairxA2;
1627 pairlists.
nextlist(&pairlistmA2_save,&npairmA2); --npairmA2;
1628 pairlists.
nextlist(&pairlistnA3_save,&npairnA3); --npairnA3;
1629 pairlists.
nextlist(&pairlistxA3_save,&npairxA3); --npairxA3;
1630 pairlists.
nextlist(&pairlistmA3_save,&npairmA3); --npairmA3;
1631 pairlists.
nextlist(&pairlistnA4_save,&npairnA4); --npairnA4;
1632 pairlists.
nextlist(&pairlistxA4_save,&npairxA4); --npairxA4;
1633 pairlists.
nextlist(&pairlistmA4_save,&npairmA4); --npairmA4;
1644 ( ( p_i.
partition == 1 ) ? 1. : -1. ) : 0.
1665 const float kq_i_f = kq_i;
1668 p_i_x_f, p_i_y_f, p_i_z_f, pFlt_1, pairlistn_save, npairn, pairlisti,
1669 r2list_f, xlist, ylist, zlist);
1672 p_i_x, p_i_y, p_i_z, p_1, pairlistn_save, npairn, pairlisti,
1687 KNL(
float drudeNbtholeCut2 = (drudeNbtholeCut * drudeNbtholeCut); )
1691 for (k = 0; k < npairi; k++) {
1692 NOKNL(
if (r2list[k] > drudeNbtholeCut2) {
continue; } )
1693 KNL(
if (r2list_f[k] > drudeNbtholeCut2) {
continue; } )
1695 const int j = pairlisti[k];
1698 if (
p_j.hydrogenGroupSize < 2 ) {
continue; }
1700 for (kk = 0;kk < NumNbtholePairParams; kk++) {
1702 if (((nbthole_array[kk].ind1 == p_i.
vdwType) &&
1703 (nbthole_array[kk].
ind2 ==
p_j.vdwType)) ||
1705 (nbthole_array[kk].
ind1 ==
p_j.vdwType))) {
1709 if ( kk < NumNbtholePairParams ) {
1712 const BigReal aa = nbthole_array[kk].
tholeij * powf(aprod, -1.f/6);
1713 const BigReal qqaa = CC * p_0[i].charge * p_1[j].charge;
1714 const BigReal qqad = CC * p_0[i].charge * p_1[j+1].charge;
1715 const BigReal qqda = CC * p_0[i+1].charge * p_1[j].charge;
1716 const BigReal qqdd = CC * p_0[i+1].charge * p_1[j+1].charge;
1718 Vector raa = (p_0[i].position + params->
offset) - p_1[j].position;
1719 Vector rad = (p_0[i].position + params->
offset) - p_1[j+1].position;
1720 Vector rda = (p_0[i+1].position + params->
offset) - p_1[j].position;
1721 Vector rdd = (p_0[i+1].position + params->
offset) - p_1[j+1].position;
1728 BigReal auaa = aa / raa_invlen;
1729 BigReal auad = aa / rad_invlen;
1730 BigReal auda = aa / rda_invlen;
1731 BigReal audd = aa / rdd_invlen;
1738 BigReal polyauaa = 1 + 0.5*auaa;
1739 BigReal polyauad = 1 + 0.5*auad;
1740 BigReal polyauda = 1 + 0.5*auda;
1741 BigReal polyaudd = 1 + 0.5*audd;
1744 ethole += qqaa * raa_invlen * (- polyauaa * expauaa);
1745 ethole += qqad * rad_invlen * (- polyauad * expauad);
1746 ethole += qqda * rda_invlen * (- polyauda * expauda);
1747 ethole += qqdd * rdd_invlen * (- polyaudd * expaudd);
1749 polyauaa = 1 + auaa*polyauaa;
1750 polyauad = 1 + auad*polyauad;
1751 polyauda = 1 + auda*polyauda;
1752 polyaudd = 1 + audd*polyaudd;
1754 BigReal raa_invlen3 = raa_invlen * raa_invlen * raa_invlen;
1755 BigReal rad_invlen3 = rad_invlen * rad_invlen * rad_invlen;
1756 BigReal rda_invlen3 = rda_invlen * rda_invlen * rda_invlen;
1757 BigReal rdd_invlen3 = rdd_invlen * rdd_invlen * rdd_invlen;
1759 BigReal dfaa = qqaa * raa_invlen3 * (polyauaa*expauaa);
1760 BigReal dfad = qqad * rad_invlen3 * (polyauad*expauad);
1761 BigReal dfda = qqda * rda_invlen3 * (polyauda*expauda);
1762 BigReal dfdd = qqdd * rdd_invlen3 * (polyaudd*expaudd);
1764 Vector faa = -dfaa * raa;
1765 Vector fad = -dfad * rad;
1766 Vector fda = -dfda * rda;
1767 Vector fdd = -dfdd * rdd;
1769 SHORT(f_net)
NOSHORT(fullf_net) += (faa + fad) + (fda + fdd);
1770 params->
ff[0][i] += faa + fad;
1771 params->
ff[0][i+1] += fda + fdd;
1772 params->
ff[1][j] -= faa + fda;
1773 params->
ff[1][j+1] -= fad + fdd;
1787 KNL(
float loweAndersenCutoff2 = (loweAndersenCutoff * loweAndersenCutoff); )
1801 int atom_i = pExt_0[i].
id;
1803 if (loweAndersenUseCOMvelocity) {
1809 pos += v_0[i+l].
charge * p_0[i+l].position;
1820 for (k = 0; k < npairi; k++) {
1821 NOKNL(
if (r2list[k] > loweAndersenCutoff2) {
continue; } )
1822 KNL(
if (r2list_f[k] > loweAndersenCutoff2) {
continue; } )
1824 const int j = pairlisti[k];
1828 if (!
p_j.hydrogenGroupSize) {
continue; }
1829 if (rand->
uniform() > loweAndersenProb) {
continue; }
1831 Mass mass_j = v_j.charge;
1834 int atom_j = pExt_1[j].
id;
1836 if (loweAndersenUseCOMvelocity) {
1840 for (
int l = 0; l <
p_j.hydrogenGroupSize; l++) {
1842 pos += v_1[j+l].
charge * p_1[j+l].position;
1852 Mass mu_ij = (mass_i * mass_j)/(mass_i + mass_j);
1857 Force force = mu_ij * dt_inv * (lambda - (deltaV * sigma_ij)) * sigma_ij;
1862 if (loweAndersenUseCOMvelocity) {
1863 BigReal inv_mass_i = 1.0 / mass_i;
1864 BigReal inv_mass_j = 1.0 / mass_j;
1866 params->
ff[0][i+l] += (v_0[i+l].
charge * inv_mass_i) * force;
1868 for (
int l = 0; l <
p_j.hydrogenGroupSize; l++) {
1869 params->
ff[1][j+l] -= (v_1[j+l].
charge * inv_mass_j) * force;
1872 params->
ff[0][i] += force;
1873 params->
ff[1][j] -= force;
1889 #define VDW_SWITCH_MODE VDW_SWITCH_MODE_ENERGY 1890 case VDW_SWITCH_MODE:
1893 #undef VDW_SWITCH_MODE 1895 #define VDW_SWITCH_MODE VDW_SWITCH_MODE_MARTINI 1896 case VDW_SWITCH_MODE:
1899 #undef VDW_SWITCH_MODE 1901 #define VDW_SWITCH_MODE VDW_SWITCH_MODE_FORCE 1902 case VDW_SWITCH_MODE:
1905 #undef VDW_SWITCH_MODE 1919 p_i_x, p_i_y, p_i_z, p_1, pairlistm_save, npairm, pairlisti,
1921 exclChecksum += npairi;
1926 #define MODIFIED(X) X 1934 p_i_x, p_i_y, p_i_z, p_1, pairlistx_save, npairx, pairlisti,
1936 exclChecksum += npairi;
1942 #define EXCLUDED(X) X 1955 exclChecksum += npairx;
1958 #if 0 ALCH(+1) // nonbondedbase2 for alchemical-separeted pairlists 1961 #define ALCHPAIR(X) X 1963 #define NOT_ALCHPAIR(X) 1985 p_i_x, p_i_y, p_i_z, p_1, pairlistnA1_save, npairnA1, pairlisti,
1998 p_i_x, p_i_y, p_i_z, p_1, pairlistnA2_save, npairnA2, pairlisti,
2011 p_i_x, p_i_y, p_i_z, p_1, pairlistnA3_save, npairnA3, pairlisti,
2024 p_i_x, p_i_y, p_i_z, p_1, pairlistnA4_save, npairnA4, pairlisti,
2040 #define MODIFIED(X) X 2047 p_i_x, p_i_y, p_i_z, p_1, pairlistmA1_save, npairmA1, pairlisti,
2049 exclChecksum += npairi;
2061 p_i_x, p_i_y, p_i_z, p_1, pairlistmA2_save, npairmA2, pairlisti,
2063 exclChecksum += npairi;
2075 p_i_x, p_i_y, p_i_z, p_1, pairlistmA3_save, npairmA3, pairlisti,
2077 exclChecksum += npairi;
2089 p_i_x, p_i_y, p_i_z, p_1, pairlistmA4_save, npairmA4, pairlisti,
2091 exclChecksum += npairi;
2109 #define EXCLUDED(X) X 2117 p_i_x, p_i_y, p_i_z, p_1, pairlistxA1_save, npairxA1, pairlisti,
2119 exclChecksum += npairi;
2131 p_i_x, p_i_y, p_i_z, p_1, pairlistxA2_save, npairxA2, pairlisti,
2133 exclChecksum += npairi;
2145 p_i_x, p_i_y, p_i_z, p_1, pairlistxA3_save, npairxA3, pairlisti,
2147 exclChecksum += npairi;
2159 p_i_x, p_i_y, p_i_z, p_1, pairlistxA4_save, npairxA4, pairlisti,
2161 exclChecksum += npairi;
2178 exclChecksum += npairxA1 + npairxA2 + npairxA3 + npairxA4;
2184 #define NOT_ALCHPAIR(X) X 2186 #endif // end nonbondedbase2.h includes for alchemical pairlists 2188 #ifdef NETWORK_PROGRESS 2189 CkNetworkProgress();
2194 __dcbt ((
void *) &(p_0[i+1]));
2201 FULL( fullf_net.
x += fullf_i_x; )
2202 FULL( fullf_net.
y += fullf_i_y; )
2203 FULL( fullf_net.
z += fullf_i_z; )
2207 FULL( fullf_0[i].x += fullf_i_x; )
2208 FULL( fullf_0[i].y += fullf_i_y; )
2209 FULL( fullf_0[i].z += fullf_i_z; )
2211 if ( savePairlists || ! usePairlists ) {
2228 #
if (
PAIR( 1+ ) 0 )
2253 #if ( FULL( 1+ ) 0 ) 2254 #if ( PAIR( 1+ ) 0 ) 2263 reduction[fullElectVirialIndex_XX] += fullElectVirial_xx;
2264 reduction[fullElectVirialIndex_XY] += fullElectVirial_xy;
2265 reduction[fullElectVirialIndex_XZ] += fullElectVirial_xz;
2266 reduction[fullElectVirialIndex_YX] += fullElectVirial_xy;
2267 reduction[fullElectVirialIndex_YY] += fullElectVirial_yy;
2268 reduction[fullElectVirialIndex_YZ] += fullElectVirial_yz;
2269 reduction[fullElectVirialIndex_ZX] += fullElectVirial_xz;
2270 reduction[fullElectVirialIndex_ZY] += fullElectVirial_yz;
2271 reduction[fullElectVirialIndex_ZZ] += fullElectVirial_zz;
2318 #ifndef MEM_OPT_VERSION
register BigReal virial_xy
void sortEntries_mergeSort_v1(SortEntry *&se, SortEntry *&buf, int seLen)
static int pressureProfileSlabs
register BigReal virial_xz
const TableEntry * table_row(unsigned int i) const
register BigReal virial_yz
void sortEntries_selectionSort(SortEntry *const se, const int seLen)
static BigReal dielectric_1
static void partition(int *order, const FullAtom *atoms, int begin, int end)
register BigReal electEnergy
static const Molecule * mol
int pairlist_from_pairlist(BigReal cutoff2, BigReal p_i_x, BigReal p_i_y, BigReal p_i_z, const CompAtom *p_j, const plint *list, int list_size, int *newlist, BigReal r2_delta, BigReal *r2list)
#define NBWORKARRAY(TYPE, NAME, SIZE)
static BigReal pressureProfileThickness
const int32 * get_full_exclusions_for_atom(int anum) const
BigReal GetAtomAlpha(int i) const
const int32 * get_mod_exclusions_for_atom(int anum) const
SimParameters * simParameters
const ExclusionCheck * get_excl_check_for_atom(int anum) const
Molecule stores the structural information for the system.
void sortEntries_bubbleSort(SortEntry *const se, const int seLen)
static int vdw_switch_mode
#define NBWORKARRAYSINIT(ARRAYS)
register BigReal virial_yy
void pp_clamp(int &n, int nslabs)
void nextlist(plint **list, int *list_size)
static Bool pairInteractionSelf
static BigReal * table_noshort
void NAMD_bug(const char *err_msg)
static BigReal pressureProfileMin
register BigReal virial_zz
const Real * get_qmAtomGroup() const
NbtholePairValue * nbthole_array
static Bool vdwForceSwitching
static BigReal * lambda_table
static BigReal * slow_table
#define REZERO_EXCL_FLAGS_BUFF
static Bool pairInteractionOn
static const LJTable * ljTable
register BigReal virial_xx
void newsize(int list_size)
static BigReal * table_short
void setIndexValue(plint i)
#define COMPONENT_DOTPRODUCT(A, B)
static BigReal alchVdwShiftCoeff
NAMD_HOST_DEVICE BigReal rlength(void) const
BigReal * pressureProfileReduction
plint * newlist(int max_size)
NAMD_HOST_DEVICE Vector unit(void) const
void sortEntries_mergeSort_v2(SortEntry *&se, SortEntry *&buf, int seLen)
ComputeNonbondedWorkArrays * workArrays