86 BigReal cutoff2_delta = cutoff2 + r2_delta;
90 if ( list_size <= 0)
return 0;
98 #pragma vector aligned 100 for ( g = 0 ; g < list_size; ++g ) {
102 p_j_x =
p_j[ gi ].position.x;
103 p_j_y =
p_j[ gi ].position.y;
104 p_j_z =
p_j[ gi ].position.z;
107 r2 = x2 * x2 + r2_delta;
113 if ( r2 <= cutoff2_delta ) {
120 #ifndef SIMPLE_PAIRLIST 127 if ( list_size > 16) {
130 int jcur1 = list[g + 1];
131 int jcur2 = list[g + 2];
132 int jcur3 = list[g + 3];
136 vector4double pj_v_0, pj_v_1, pj_v_2, pj_v_3;
137 vector4double v_0, v_1, v_2, v_3;
138 register BigReal r2_0, r2_1, r2_2, r2_3;
140 vector4double p_i_v = {p_i_x, p_i_y, p_i_z, 0.};
141 vector4double r2_delta_v = {r2_delta};
148 for ( g = 4 ; g < list_size - 4; g += 4 ) {
152 j0 = jcur0; j1 = jcur1;
153 j2 = jcur2; j3 = jcur3;
155 jcur0 = list[g ]; jcur1 = list[g + 1];
156 jcur2 = list[g + 2]; jcur3 = list[g + 3];
158 __dcbt((
void*)(
p_j + jcur0));
160 v_0 = vec_sub (p_i_v, pj_v_0);
161 v_1 = vec_sub (p_i_v, pj_v_1);
162 v_2 = vec_sub (p_i_v, pj_v_2);
163 v_3 = vec_sub (p_i_v, pj_v_3);
165 v_0 = vec_madd (v_0, v_0, r2_delta_v);
166 v_1 = vec_madd (v_1, v_1, r2_delta_v);
167 v_2 = vec_madd (v_2, v_2, r2_delta_v);
168 v_3 = vec_madd (v_3, v_3, r2_delta_v);
175 r2_0 = vec_extract(v_0, 0) + vec_extract(v_0, 1) + vec_extract(v_0, 2);
176 r2_1 = vec_extract(v_1, 0) + vec_extract(v_1, 1) + vec_extract(v_1, 2);
177 r2_2 = vec_extract(v_2, 0) + vec_extract(v_2, 1) + vec_extract(v_2, 2);
178 r2_3 = vec_extract(v_3, 0) + vec_extract(v_3, 1) + vec_extract(v_3, 2);
180 size_t test0, test1, test2, test3;
181 size_t jout0, jout1, jout2, jout3;
183 test0 = ( r2_0 < cutoff2_delta );
184 test1 = ( r2_1 < cutoff2_delta );
185 test2 = ( r2_2 < cutoff2_delta );
186 test3 = ( r2_3 < cutoff2_delta );
189 nli[ jout0 ] = j0; r2i[ jout0 ] = r2_0;
190 jout += test0; jout1 = jout;
192 nli[ jout1 ] = j1; r2i[ jout1 ] = r2_1;
193 jout += test1; jout2 = jout;
195 nli[ jout2 ] = j2; r2i[ jout2 ] = r2_2;
196 jout += test2; jout3 = jout;
198 nli[ jout3 ] = j3; r2i[ jout3 ] = r2_3;
212 if ( list_size > 16) {
215 int jcur1 = list[g + 1];
216 int jcur2 = list[g + 2];
217 int jcur3 = list[g + 3];
221 register BigReal pj_x_0, pj_x_1, pj_x_2, pj_x_3;
222 register BigReal pj_y_0, pj_y_1, pj_y_2, pj_y_3;
223 register BigReal pj_z_0, pj_z_1, pj_z_2, pj_z_3;
225 register BigReal t_0, t_1, t_2, t_3, r2_0, r2_1, r2_2, r2_3;
227 pj_x_0 =
p_j[jcur0].position.x;
228 pj_x_1 =
p_j[jcur1].position.x;
229 pj_x_2 =
p_j[jcur2].position.x;
230 pj_x_3 =
p_j[jcur3].position.x;
231 pj_y_0 =
p_j[jcur0].position.y;
232 pj_y_1 =
p_j[jcur1].position.y;
233 pj_y_2 =
p_j[jcur2].position.y;
234 pj_y_3 =
p_j[jcur3].position.y;
235 pj_z_0 =
p_j[jcur0].position.z;
236 pj_z_1 =
p_j[jcur1].position.z;
237 pj_z_2 =
p_j[jcur2].position.z;
238 pj_z_3 =
p_j[jcur3].position.z;
240 for ( g = 4 ; g < list_size - 4; g += 4 ) {
245 j0 = jcur0; j1 = jcur1;
246 j2 = jcur2; j3 = jcur3;
248 jcur0 = list[g ]; jcur1 = list[g + 1];
249 jcur2 = list[g + 2]; jcur3 = list[g + 3];
252 __dcbt ((
void *) &
p_j[jcur0]);
256 t_0 = p_i_x - pj_x_0; t_1 = p_i_x - pj_x_1;
257 t_2 = p_i_x - pj_x_2; t_3 = p_i_x - pj_x_3;
259 r2_0 = t_0 * t_0 + r2_delta;
260 r2_1 = t_1 * t_1 + r2_delta;
261 r2_2 = t_2 * t_2 + r2_delta;
262 r2_3 = t_3 * t_3 + r2_delta;
265 t_0 = p_i_y - pj_y_0; t_1 = p_i_y - pj_y_1;
266 t_2 = p_i_y - pj_y_2; t_3 = p_i_y - pj_y_3;
267 r2_0 += t_0 * t_0; r2_1 += t_1 * t_1;
268 r2_2 += t_2 * t_2; r2_3 += t_3 * t_3;
271 t_0 = p_i_z - pj_z_0; t_1 = p_i_z - pj_z_1;
272 t_2 = p_i_z - pj_z_2; t_3 = p_i_z - pj_z_3;
273 r2_0 += t_0 * t_0; r2_1 += t_1 * t_1;
274 r2_2 += t_2 * t_2; r2_3 += t_3 * t_3;
276 pj_x_0 =
p_j[jcur0].position.x;
277 pj_x_1 =
p_j[jcur1].position.x;
278 pj_x_2 =
p_j[jcur2].position.x;
279 pj_x_3 =
p_j[jcur3].position.x;
280 pj_y_0 =
p_j[jcur0].position.y;
281 pj_y_1 =
p_j[jcur1].position.y;
282 pj_y_2 =
p_j[jcur2].position.y;
283 pj_y_3 =
p_j[jcur3].position.y;
284 pj_z_0 =
p_j[jcur0].position.z;
285 pj_z_1 =
p_j[jcur1].position.z;
286 pj_z_2 =
p_j[jcur2].position.z;
287 pj_z_3 =
p_j[jcur3].position.z;
289 bool test0, test1, test2, test3;
291 test0 = ( r2_0 < cutoff2_delta );
292 test1 = ( r2_1 < cutoff2_delta );
293 test2 = ( r2_2 < cutoff2_delta );
294 test3 = ( r2_3 < cutoff2_delta );
296 int jout0, jout1, jout2, jout3;
299 nli[ jout0 ] = j0; r2i[ jout0 ] = r2_0;
300 jout += test0; jout1 = jout;
301 nli[ jout1 ] = j1; r2i[ jout1 ] = r2_1;
302 jout += test1; jout2 = jout;
303 nli[ jout2 ] = j2; r2i[ jout2 ] = r2_2;
304 jout += test2; jout3 = jout;
305 nli[ jout3 ] = j3; r2i[ jout3 ] = r2_3;
321 while ( g < list_size ) {
325 BigReal r2 = t2 * t2 + r2_delta;
326 p_j_x =
p_j[j2].position.x;
329 p_j_y =
p_j[j2].position.y;
332 p_j_z =
p_j[j2].position.z;
333 if ( r2 <= cutoff2_delta ) {
341 return nli - newlist;