11 #ifndef COMPUTENONBONDEDINL_H 12 #define COMPUTENONBONDEDINL_H 34 inline int pairlist_from_pairlist_knl(
float cutoff2,
35 float p_i_x,
float p_i_y,
float p_i_z,
36 const CompAtomFlt *
p_j,
37 const plint *list,
int list_size,
int *newlist,
38 float *r2list,
float *xlist,
float *ylist,
float *zlist) {
46 if ( list_size <= 0)
return 0;
50 float p_j_x, p_j_y, p_j_z;
52 #pragma vector aligned 54 for ( g = 0 ; g < list_size; ++g ) {
56 p_j_x =
p_j[ gi ].position.x;
57 p_j_y =
p_j[ gi ].position.y;
58 p_j_z =
p_j[ gi ].position.z;
67 if ( r2 <= cutoff2 ) {
83 const plint *list,
int list_size,
int *newlist,
86 BigReal cutoff2_delta = cutoff2 + r2_delta;
90 if ( list_size <= 0)
return 0;
98 #pragma vector aligned 100 for ( g = 0 ; g < list_size; ++g ) {
102 p_j_x =
p_j[ gi ].position.x;
103 p_j_y =
p_j[ gi ].position.y;
104 p_j_z =
p_j[ gi ].position.z;
107 r2 = x2 * x2 + r2_delta;
113 if ( r2 <= cutoff2_delta ) {
120 #ifndef SIMPLE_PAIRLIST 127 if ( list_size > 16) {
130 int jcur1 = list[g + 1];
131 int jcur2 = list[g + 2];
132 int jcur3 = list[g + 3];
136 vector4double pj_v_0, pj_v_1, pj_v_2, pj_v_3;
137 vector4double v_0, v_1, v_2, v_3;
138 register BigReal r2_0, r2_1, r2_2, r2_3;
140 vector4double p_i_v = {p_i_x, p_i_y, p_i_z, 0.};
141 vector4double r2_delta_v = {r2_delta};
148 for ( g = 4 ; g < list_size - 4; g += 4 ) {
152 j0 = jcur0; j1 = jcur1;
153 j2 = jcur2; j3 = jcur3;
155 jcur0 = list[g ]; jcur1 = list[g + 1];
156 jcur2 = list[g + 2]; jcur3 = list[g + 3];
158 __dcbt((
void*)(
p_j + jcur0));
160 v_0 = vec_sub (p_i_v, pj_v_0);
161 v_1 = vec_sub (p_i_v, pj_v_1);
162 v_2 = vec_sub (p_i_v, pj_v_2);
163 v_3 = vec_sub (p_i_v, pj_v_3);
165 v_0 = vec_madd (v_0, v_0, r2_delta_v);
166 v_1 = vec_madd (v_1, v_1, r2_delta_v);
167 v_2 = vec_madd (v_2, v_2, r2_delta_v);
168 v_3 = vec_madd (v_3, v_3, r2_delta_v);
175 r2_0 = vec_extract(v_0, 0) + vec_extract(v_0, 1) + vec_extract(v_0, 2);
176 r2_1 = vec_extract(v_1, 0) + vec_extract(v_1, 1) + vec_extract(v_1, 2);
177 r2_2 = vec_extract(v_2, 0) + vec_extract(v_2, 1) + vec_extract(v_2, 2);
178 r2_3 = vec_extract(v_3, 0) + vec_extract(v_3, 1) + vec_extract(v_3, 2);
180 size_t test0, test1, test2, test3;
181 size_t jout0, jout1, jout2, jout3;
183 test0 = ( r2_0 < cutoff2_delta );
184 test1 = ( r2_1 < cutoff2_delta );
185 test2 = ( r2_2 < cutoff2_delta );
186 test3 = ( r2_3 < cutoff2_delta );
189 nli[ jout0 ] = j0; r2i[ jout0 ] = r2_0;
190 jout += test0; jout1 = jout;
192 nli[ jout1 ] = j1; r2i[ jout1 ] = r2_1;
193 jout += test1; jout2 = jout;
195 nli[ jout2 ] = j2; r2i[ jout2 ] = r2_2;
196 jout += test2; jout3 = jout;
198 nli[ jout3 ] = j3; r2i[ jout3 ] = r2_3;
212 if ( list_size > 16) {
215 int jcur1 = list[g + 1];
216 int jcur2 = list[g + 2];
217 int jcur3 = list[g + 3];
221 register BigReal pj_x_0, pj_x_1, pj_x_2, pj_x_3;
222 register BigReal pj_y_0, pj_y_1, pj_y_2, pj_y_3;
223 register BigReal pj_z_0, pj_z_1, pj_z_2, pj_z_3;
225 register BigReal t_0, t_1, t_2, t_3, r2_0, r2_1, r2_2, r2_3;
227 pj_x_0 =
p_j[jcur0].position.x;
228 pj_x_1 =
p_j[jcur1].position.x;
229 pj_x_2 =
p_j[jcur2].position.x;
230 pj_x_3 =
p_j[jcur3].position.x;
231 pj_y_0 =
p_j[jcur0].position.y;
232 pj_y_1 =
p_j[jcur1].position.y;
233 pj_y_2 =
p_j[jcur2].position.y;
234 pj_y_3 =
p_j[jcur3].position.y;
235 pj_z_0 =
p_j[jcur0].position.z;
236 pj_z_1 =
p_j[jcur1].position.z;
237 pj_z_2 =
p_j[jcur2].position.z;
238 pj_z_3 =
p_j[jcur3].position.z;
240 for ( g = 4 ; g < list_size - 4; g += 4 ) {
245 j0 = jcur0; j1 = jcur1;
246 j2 = jcur2; j3 = jcur3;
248 jcur0 = list[g ]; jcur1 = list[g + 1];
249 jcur2 = list[g + 2]; jcur3 = list[g + 3];
252 __dcbt ((
void *) &
p_j[jcur0]);
256 t_0 = p_i_x - pj_x_0; t_1 = p_i_x - pj_x_1;
257 t_2 = p_i_x - pj_x_2; t_3 = p_i_x - pj_x_3;
259 r2_0 = t_0 * t_0 + r2_delta;
260 r2_1 = t_1 * t_1 + r2_delta;
261 r2_2 = t_2 * t_2 + r2_delta;
262 r2_3 = t_3 * t_3 + r2_delta;
265 t_0 = p_i_y - pj_y_0; t_1 = p_i_y - pj_y_1;
266 t_2 = p_i_y - pj_y_2; t_3 = p_i_y - pj_y_3;
267 r2_0 += t_0 * t_0; r2_1 += t_1 * t_1;
268 r2_2 += t_2 * t_2; r2_3 += t_3 * t_3;
271 t_0 = p_i_z - pj_z_0; t_1 = p_i_z - pj_z_1;
272 t_2 = p_i_z - pj_z_2; t_3 = p_i_z - pj_z_3;
273 r2_0 += t_0 * t_0; r2_1 += t_1 * t_1;
274 r2_2 += t_2 * t_2; r2_3 += t_3 * t_3;
276 pj_x_0 =
p_j[jcur0].position.x;
277 pj_x_1 =
p_j[jcur1].position.x;
278 pj_x_2 =
p_j[jcur2].position.x;
279 pj_x_3 =
p_j[jcur3].position.x;
280 pj_y_0 =
p_j[jcur0].position.y;
281 pj_y_1 =
p_j[jcur1].position.y;
282 pj_y_2 =
p_j[jcur2].position.y;
283 pj_y_3 =
p_j[jcur3].position.y;
284 pj_z_0 =
p_j[jcur0].position.z;
285 pj_z_1 =
p_j[jcur1].position.z;
286 pj_z_2 =
p_j[jcur2].position.z;
287 pj_z_3 =
p_j[jcur3].position.z;
289 bool test0, test1, test2, test3;
291 test0 = ( r2_0 < cutoff2_delta );
292 test1 = ( r2_1 < cutoff2_delta );
293 test2 = ( r2_2 < cutoff2_delta );
294 test3 = ( r2_3 < cutoff2_delta );
296 int jout0, jout1, jout2, jout3;
299 nli[ jout0 ] = j0; r2i[ jout0 ] = r2_0;
300 jout += test0; jout1 = jout;
301 nli[ jout1 ] = j1; r2i[ jout1 ] = r2_1;
302 jout += test1; jout2 = jout;
303 nli[ jout2 ] = j2; r2i[ jout2 ] = r2_2;
304 jout += test2; jout3 = jout;
305 nli[ jout3 ] = j3; r2i[ jout3 ] = r2_3;
321 while ( g < list_size ) {
325 BigReal r2 = t2 * t2 + r2_delta;
326 p_j_x =
p_j[j2].position.x;
329 p_j_y =
p_j[j2].position.y;
332 p_j_z =
p_j[j2].position.z;
333 if ( r2 <= cutoff2_delta ) {
341 return nli - newlist;
352 #if NAMD_ComputeNonbonded_SortAtoms != 0 358 for (i = 0; i < seLen; i++) {
363 register int smallestIndex = i;
366 for (j = i + 1; j < seLen; j++) {
368 if (currentValue < smallestValue) {
370 smallestValue = currentValue;
375 if (smallestIndex != i) {
377 register SortEntry* entryB = se + smallestIndex;
378 register unsigned int tmpIndex = entryA->
index;
382 entryB->
index = tmpIndex;
390 register int keepSorting = 0;
399 for (
int i = 1; i < seLen; i++) {
401 register SortEntry* sortEntry0 = sortEntry1;
406 if (sortEntry0_sortValue > sortEntry1_sortValue) {
407 register int sortEntry0_index = sortEntry0->
index;
408 register int sortEntry1_index = sortEntry1->
index;
409 sortEntry0->
index = sortEntry1_index;
410 sortEntry0->
sortValue = sortEntry1_sortValue;
411 sortEntry1->
index = sortEntry0_index;
412 sortEntry1->
sortValue = sortEntry0_sortValue;
417 }
while (keepSorting != 0);
432 register int subListSize = 1;
433 while (subListSize < seLen) {
441 register int firstListOffset = 0;
442 while (firstListOffset < seLen) {
446 register int numElements = std::min(2 * subListSize, seLen - firstListOffset);
447 register int list0len;
448 register int list1len;
449 if (numElements > subListSize) {
450 list0len = subListSize;
451 list1len = numElements - subListSize;
453 list0len = numElements;
457 register SortEntry* list0ptr = srcArray + firstListOffset;
458 register SortEntry* list1ptr = list0ptr + subListSize;
459 register SortEntry* dstptr = dstArray + firstListOffset;
464 while (list0len > 0 && list1len > 0) {
469 if (sortValue0 < sortValue1) {
472 register int index0 = list0ptr->
index;
474 dstptr->
index = index0;
484 register int index1 = list1ptr->
index;
486 dstptr->
index = index1;
500 while (list0len > 0) {
504 register int index0 = list0ptr->
index;
506 dstptr->
index = index0;
516 while (list1len > 0) {
520 register int index1 = list1ptr->
index;
522 dstptr->
index = index1;
532 firstListOffset += (2 * subListSize);
560 #define __TERNARY_ASSIGN(test, val0, val1) ((test * val0) + ((1 - test) * val1)) 567 register int subListSize = 1;
568 while (subListSize < seLen) {
576 register int firstListOffset = 0;
577 while (firstListOffset < seLen) {
583 register int numElements;
585 register int numElements_val0 = 2 * subListSize;
586 register int numElements_val1 = seLen - firstListOffset;
587 register bool numElements_test = (numElements_val0 < numElements_val1);
588 numElements =
__TERNARY_ASSIGN(numElements_test, numElements_val0, numElements_val1);
592 register SortEntry* dstptr = dstArray + firstListOffset;
593 register SortEntry* list0ptr = srcArray + firstListOffset;
594 register SortEntry* list1ptr = list0ptr + subListSize;
598 register bool lenTest = (numElements > subListSize);
599 register int list0len_val0 = subListSize;
600 register int list1len_val0 = numElements - subListSize;
601 register int list0len_val1 = numElements;
602 register int list0len =
__TERNARY_ASSIGN(lenTest, list0len_val0, list0len_val1);
605 if ( ! lenTest ) list1ptr = list0ptr;
606 list0ptr_end = list0ptr + list0len;
607 list1ptr_end = list1ptr + list1len;
613 firstListOffset += (2 * subListSize);
620 register int index0 = list0ptr->
index;
621 register int index1 = list1ptr->
index;
626 while (list0ptr < list0ptr_end && list1ptr < list1ptr_end) {
629 register bool test = (sortValue0 < sortValue1);
644 index0 = list0ptr->
index;
645 index1 = list1ptr->
index;
653 while (list0ptr < list0ptr_end) {
657 dstptr->
index = index0;
663 index0 = list0ptr->
index;
668 while (list1ptr < list1ptr_end) {
672 dstptr->
index = index1;
678 index1 = list1ptr->
index;
699 #undef __TERNARY_ASSIGN 702 #endif // NAMD_ComputeNonbonded_SortAtoms != 0 705 #endif // COMPUTENONBONDEDINL_H void sortEntries_mergeSort_v1(SortEntry *&se, SortEntry *&buf, int seLen)
#define __TERNARY_ASSIGN(test, val0, val1)
void sortEntries_selectionSort(SortEntry *const se, const int seLen)
int pairlist_from_pairlist(BigReal cutoff2, BigReal p_i_x, BigReal p_i_y, BigReal p_i_z, const CompAtom *p_j, const plint *list, int list_size, int *newlist, BigReal r2_delta, BigReal *r2list)
void sortEntries_bubbleSort(SortEntry *const se, const int seLen)
void sortEntries_mergeSort_v2(SortEntry *&se, SortEntry *&buf, int seLen)