NAMD
ComputeNonbondedMICKernelBase2_scalar.h
Go to the documentation of this file.
1 #ifdef NAMD_MIC
2 
3  // For each entry in the pairlist...
4 
5  // Auto-vectorize via pairlist padding
6  #if __MIC_PAD_PLGEN_CTRL != 0
7 
8  // Set the number of elements/lanes per vector unit width for the data type that will be used
9  #if MIC_HANDCODE_FORCE_SINGLE != 0
10  const int _plI_fs_outer_step = 16; // 32-bit
11  #else
12  const int _plI_fs_outer_step = 8; // 64-bit
13  #endif
14 
15  // Create an "outer" loop that iterates over the the entire loop, stepping by the
16  // number of lanes in the vector units
17  #pragma novector
18  for (int _plI_fs_outer = 0; _plI_fs_outer < plSize; _plI_fs_outer += _plI_fs_outer_step) {
19 
20  // Preload i value here (use broadcast)...
21  const int i = (plArray[_plI_fs_outer] >> 16) & 0xFFFF;
22 
23  // Preload x,y,z,q values here
24  #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
25  const CALC_TYPE p_i_x = ((CALC_TYPE)p_0[i].x) + ((CALC_TYPE)params.offset.x);
26  const CALC_TYPE p_i_y = ((CALC_TYPE)p_0[i].y) + ((CALC_TYPE)params.offset.y);
27  const CALC_TYPE p_i_z = ((CALC_TYPE)p_0[i].z) + ((CALC_TYPE)params.offset.z);
28  const CALC_TYPE p_i_q = (CALC_TYPE)(p_0[i].charge);
29  const int p_i_vdwType = pExt_0[i].vdw_type;
30  #else
31  const CALC_TYPE p_i_x = ((CALC_TYPE)p_0_x[i]) + ((CALC_TYPE)params.offset.x);
32  const CALC_TYPE p_i_y = ((CALC_TYPE)p_0_y[i]) + ((CALC_TYPE)params.offset.y);
33  const CALC_TYPE p_i_z = ((CALC_TYPE)p_0_z[i]) + ((CALC_TYPE)params.offset.z);
34  const CALC_TYPE p_i_q = (CALC_TYPE)(p_0_q[i]);
35  const int p_i_vdwType = pExt_0_vdwType[i];
36  #endif
37 
38  // Create variables to hold the force contributions for the given "i" atom in the "inner" loop below
39  double tmp_x_i_sum = 0.0;
40  double tmp_y_i_sum = 0.0;
41  double tmp_z_i_sum = 0.0;
42  double tmp_w_i_sum = 0.0;
43  double fulltmp_x_i_sum = 0.0;
44  double fulltmp_y_i_sum = 0.0;
45  double fulltmp_z_i_sum = 0.0;
46 
47  #if MIC_EXCL_CHECKSUM_FULL != 0
48  int exclusionSum = 0;
49  #define EXCL_CHECKSUM_CLAUSE reduction(+ : exclusionSum)
50  #else
51  #define EXCL_CHECKSUM_CLAUSE
52  #endif
53 
54  // Create an "inner" loop with one iteration per vector unit lane
55  #pragma omp simd vectorlength(16) \
56  reduction(+ : tmp_x_i_sum, tmp_y_i_sum, tmp_z_i_sum, tmp_w_i_sum, \
57  fulltmp_x_i_sum, fulltmp_y_i_sum, fulltmp_z_i_sum ) \
58  EXCL_CHECKSUM_CLAUSE
59  for (int _plI_fs_inner = 0; _plI_fs_inner < _plI_fs_outer_step; _plI_fs_inner++) {
60  const int plI = _plI_fs_outer + _plI_fs_inner;
61  if ((plArray[plI] & 0xFFFF) != 0xFFFF) {
62 
63  // Scalar version of the code
64  #else
65 
66  // DMK - NOTE : These loop_count values are loose, lower-bound guesses on my part (TODO : verify & refine)
67  #if (0 PAIR(+1))
68  #pragma loop_count (1000)
69  #elif (0 SELF(+1))
70  #pragma loop_count (10000)
71  #endif
72  for (int plI = 0; plI < plSize; plI++) {
73 
74  #endif
75 
76  // Load the particle indicies
77  const int ij = plArray[plI];
78  #if __MIC_PAD_PLGEN_CTRL != 0
79  // NOTE: moved before this loop, to the start of the _plI_fs_outer loop's body
80  #else
81  const int i = (ij >> 16) & 0xFFFF;
82  #endif
83  const int j = (ij ) & 0xFFFF;
84 
85  // TODO | FIXME - Spread these out throughout the loop body (if possible) and
86  // change based on AoS versus SoA
87  #if MIC_PREFETCH_DISTANCE > 0
88  const int pfIJ = plArray[plI + MIC_PREFETCH_DISTANCE];
89  const int pfI = (pfIJ >> 16) & 0xFFFF;
90  const int pfJ = (pfIJ ) & 0xFFFF;
91  _mm_prefetch((char*)(p_0_x + pfI), MIC_PREFETCH_HINT);
92  _mm_prefetch((char*)(p_0_y + pfI), MIC_PREFETCH_HINT);
93  _mm_prefetch((char*)(p_0_z + pfI), MIC_PREFETCH_HINT);
94  _mm_prefetch((char*)(p_0_q + pfI), MIC_PREFETCH_HINT);
95  _mm_prefetch((char*)(f_0_x + pfI), MIC_PREFETCH_HINT);
96  _mm_prefetch((char*)(f_0_y + pfI), MIC_PREFETCH_HINT);
97  _mm_prefetch((char*)(f_0_z + pfI), MIC_PREFETCH_HINT);
98  _mm_prefetch((char*)(p_1_x + pfJ), MIC_PREFETCH_HINT);
99  _mm_prefetch((char*)(p_1_y + pfJ), MIC_PREFETCH_HINT);
100  _mm_prefetch((char*)(p_1_z + pfJ), MIC_PREFETCH_HINT);
101  _mm_prefetch((char*)(p_1_q + pfJ), MIC_PREFETCH_HINT);
102  _mm_prefetch((char*)(f_1_x + pfJ), MIC_PREFETCH_HINT);
103  _mm_prefetch((char*)(f_1_y + pfJ), MIC_PREFETCH_HINT);
104  _mm_prefetch((char*)(f_1_z + pfJ), MIC_PREFETCH_HINT);
105  #endif
106 
107  // Load atom information
108  #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
109  #if __MIC_PAD_PLGEN_CTRL != 0
110  // NOTE: moved before this loop, to the start of the _plI_fs_outer loop's body
111  #else
112  const CALC_TYPE p_i_x = ((CALC_TYPE)p_0[i].x) + ((CALC_TYPE)params.offset.x);
113  const CALC_TYPE p_i_y = ((CALC_TYPE)p_0[i].y) + ((CALC_TYPE)params.offset.y);
114  const CALC_TYPE p_i_z = ((CALC_TYPE)p_0[i].z) + ((CALC_TYPE)params.offset.z);
115  #endif
116  const CALC_TYPE p_j_x = (CALC_TYPE)(p_1[j].x); // Neighboring gather to be optimized
117  const CALC_TYPE p_j_y = (CALC_TYPE)(p_1[j].y);
118  const CALC_TYPE p_j_z = (CALC_TYPE)(p_1[j].z);
119  #else
120  #if __MIC_PAD_PLGEN_CTRL != 0
121  // NOTE: moved before this loop, to the start of the _plI_fs_outer loop's body
122  #else
123  const CALC_TYPE p_i_x = ((CALC_TYPE)p_0_x[i]) + ((CALC_TYPE)params.offset.x);
124  const CALC_TYPE p_i_y = ((CALC_TYPE)p_0_y[i]) + ((CALC_TYPE)params.offset.y);
125  const CALC_TYPE p_i_z = ((CALC_TYPE)p_0_z[i]) + ((CALC_TYPE)params.offset.z);
126  #endif
127  const CALC_TYPE p_j_x = (CALC_TYPE)(p_1_x[j]);
128  const CALC_TYPE p_j_y = (CALC_TYPE)(p_1_y[j]);
129  const CALC_TYPE p_j_z = (CALC_TYPE)(p_1_z[j]);
130  #endif
131 
132  // Load position deltas and r2
133  CALC_TYPE p_ij_x = p_i_x - p_j_x;
134  CALC_TYPE p_ij_y = p_i_y - p_j_y;
135  CALC_TYPE p_ij_z = p_i_z - p_j_z;
136 
137  #if REFINE_PAIRLISTS != 0
138  CALC_TYPE r2 = (CALC_TYPE)(r2Array[plI]);
139  #else
140  CALC_TYPE r2 = (p_ij_x * p_ij_x) + (p_ij_y * p_ij_y) + (p_ij_z * p_ij_z) + r2_delta;
141  if (r2 < cutoff2_delta) {
142  #endif
143 
144  #if (MIC_EXCL_CHECKSUM_FULL != 0) && (0 EXCLUDED(+1) MODIFIED(+1))
145  #if __MIC_PAD_PLGEN_CTRL != 0
146  exclusionSum += 1;
147  #else
148  params.exclusionSum += 1;
149  #endif
150  #endif
151 
152  // Calculate the table_i value (table index)
153  #if MIC_HANDCODE_FORCE_SINGLE != 0
154  const unsigned int table_i = ((int)((__intel_castf32_u32(r2)) >> 17)) + r2_delta_expc;
155  #else
156  const unsigned int table_i = ((int)((__intel_castf64_u64(r2)) >> 46)) + r2_delta_expc;
157  #endif
158 
159  #if MIC_HANDCODE_FORCE_CALCR2TABLE != 0
160  // From ComputeNonbondedUtil.C Simplified:
161  // r2_base = r2_delta * (1 << (i/64)) r2_base = r2_delta * (1 << (i/64))
162  // r2_del = r2_base / 64.0; r2_del = r2_base / 64.0;
163  // r2 = r2_base - r2_delta + r2_del * (i%64) r2_table[i] = r2_base - r2_delta + r2_del * (i%64) + r2_delta;
164  // r2_table[i] = r2 + r2_delta; = r2_base + r2_del * (i%64)
165  // NOTE: For i = 0, r2_table[0] = r2_delta + (r2_delta / 64) * 0 = r2_delta, so there no need
166  // to special case if table_i = 0 then r2_table[0] = r2_delta (see ComputeNonbondedUtil.C:606)
167  CALC_TYPE r2_base = r2_delta * (1 << (table_i >> 6)); // avoid original divide (table_i / 64)
168  CALC_TYPE r2_del = r2_base * ((CALC_TYPE)0.015625f); // avoid original divide (r2_base / 64)
169  CALC_TYPE r2_table_i = r2_base + r2_del * (table_i & 0x3F); //(table_i % 64); // NOTE: removing '+ r2_delta - r2_delta'
170  #else
171  CALC_TYPE r2_table_i = r2_table[table_i];
172  #endif
173  CALC_TYPE diffa = r2 - r2_table_i;
174  const CALC_TYPE * const table_four_ptr = SHORT(table_short) NOSHORT(table_noshort);
175  const int table_four_idx = 16 * table_i;
176 
177  // NOTE : These charge values are already scaled by
178  // 'sqrt(COULOMB * scaling * dielectric_1).' See HomePatch.C.
179  #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
180  #if __MIC_PAD_PLGEN_CTRL != 0
181  // NOTE: moved before this loop, to the start of the _plI_fs_outer loop's body
182  #else
183  const CALC_TYPE p_i_q = (CALC_TYPE)(p_0[i].charge);
184  #endif
185  const CALC_TYPE p_j_q = (CALC_TYPE)(p_1[j].charge);
186  #else
187  #if __MIC_PAD_PLGEN_CTRL != 0
188  // NOTE: moved before this loop, to the start of the _plI_fs_outer loop's body
189  #else
190  const CALC_TYPE p_i_q = (CALC_TYPE)(p_0_q[i]);
191  #endif
192  const CALC_TYPE p_j_q = (CALC_TYPE)(p_1_q[j]);
193  #endif
194  CALC_TYPE kqq = p_i_q * p_j_q;
195 
196  #if (0 FAST(+1))
197 
198  #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
199  #if __MIC_PAD_PLGEN_CTRL != 0
200  // NOTE: moved before this loop, to the start of the _plI_fs_outer loop's body
201  #else
202  int p_i_vdwType = pExt_0[i].vdw_type;
203  #endif
204  int p_j_vdwType = pExt_1[j].vdw_type;
205  #else
206  #if __MIC_PAD_PLGEN_CTRL != 0
207  // NOTE: moved before this loop, to the start of the _plI_fs_outer loop's body
208  #else
209  int p_i_vdwType = pExt_0_vdwType[i];
210  #endif
211  int p_j_vdwType = pExt_1_vdwType[j];
212  #endif
213 
214  // Lookup A and B values in the LJ table
215  const int lj_pars_offset = (4 * (p_i_vdwType * lj_table_dim + p_j_vdwType)) MODIFIED(+ 2);
216  CALC_TYPE A = scaling * lj_table_base_ptr[lj_pars_offset ];
217  CALC_TYPE B = scaling * lj_table_base_ptr[lj_pars_offset + 1];
218 
219  // 16x16 AoS table lookup with transpose
220  //CALC_TYPE vdw_d = A * table_four_ptr[table_four_idx + 0] - B * table_four_ptr[table_four_idx + 2];
221  //CALC_TYPE vdw_c = A * table_four_ptr[table_four_idx + 1] - B * table_four_ptr[table_four_idx + 3];
222  //CALC_TYPE vdw_b = A * table_four_ptr[table_four_idx + 4] - B * table_four_ptr[table_four_idx + 6];
223  //CALC_TYPE vdw_a = A * table_four_ptr[table_four_idx + 5] - B * table_four_ptr[table_four_idx + 7];
224  CALC_TYPE vdw_d = A * table_four_ptr[table_four_idx + 0] - B * table_four_ptr[table_four_idx + 4];
225  CALC_TYPE vdw_c = A * table_four_ptr[table_four_idx + 1] - B * table_four_ptr[table_four_idx + 5];
226  CALC_TYPE vdw_b = A * table_four_ptr[table_four_idx + 2] - B * table_four_ptr[table_four_idx + 6];
227  CALC_TYPE vdw_a = A * table_four_ptr[table_four_idx + 3] - B * table_four_ptr[table_four_idx + 7];
228 
229  #if (0 ENERGY(+1))
230  CALC_TYPE vdw_val = ((diffa * vdw_d * (1/6.0) + vdw_c * (1/4.0)) * diffa + vdw_b * (1/2.0)) * diffa + vdw_a;
231  vdwEnergy -= vdw_val;
232  // DMK - TODO | FIXME : Apply vdw_val to FEP(vdwEnergy_s)
233  #endif
234 
235  #if (0 SHORT(+1))
236 
237  #if (0 NORMAL(+1))
238  CALC_TYPE fast_d = kqq * table_four_ptr[table_four_idx + 8];
239  CALC_TYPE fast_c = kqq * table_four_ptr[table_four_idx + 9];
240  CALC_TYPE fast_b = kqq * table_four_ptr[table_four_idx + 10];
241  CALC_TYPE fast_a = kqq * table_four_ptr[table_four_idx + 11];
242  #endif
243  #if (0 MODIFIED(+1))
244  CALC_TYPE modfckqq = (1.0 - modf_mod) * kqq;
245  CALC_TYPE fast_d = modfckqq * table_four_ptr[table_four_idx + 8];
246  CALC_TYPE fast_c = modfckqq * table_four_ptr[table_four_idx + 9];
247  CALC_TYPE fast_b = modfckqq * table_four_ptr[table_four_idx + 10];
248  CALC_TYPE fast_a = modfckqq * table_four_ptr[table_four_idx + 11];
249  #endif
250 
251  #if (0 ENERGY(+1))
252  CALC_TYPE fast_val = ((diffa * fast_d * (1/6.0) + fast_c * (1/4.0)) * diffa + fast_b * (1/2.0)) * diffa + fast_a;
253  #if (0 NOT_ALCHPAIR(+1))
254  electEnergy -= fast_val;
255  // DMK - TODO | FIXME : Apply fast_val to FEP(electEnergy_s)
256  #endif
257  #endif
258 
259  #if (0 NOT_ALCHPAIR(+1))
260  fast_d += vdw_d;
261  fast_c += vdw_c;
262  fast_b += vdw_b;
263  fast_a += vdw_a;
264  #endif
265 
266  CALC_TYPE fast_dir = (fast_d * diffa + fast_c) * diffa + fast_b;
267  CALC_TYPE force_r = fast_dir;
268 
269  CALC_TYPE tmp_x = force_r * p_ij_x;
270  PAIR( virial_xx += tmp_x * p_ij_x; )
271  PAIR( virial_xy += tmp_x * p_ij_y; )
272  PAIR( virial_xz += tmp_x * p_ij_z; )
273  #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
274  #if __MIC_PAD_PLGEN_CTRL != 0
275  tmp_x_i_sum += tmp_x;
276  #else
277  f_0[i].x += tmp_x;
278  #endif
279  f_1[j].x -= tmp_x;
280  #else
281  #if __MIC_PAD_PLGEN_CTRL != 0
282  tmp_x_i_sum += tmp_x;
283  #else
284  f_0_x[i] += tmp_x;
285  #endif
286  f_1_x[j] -= tmp_x;
287  #endif
288 
289  CALC_TYPE tmp_y = force_r * p_ij_y;
290  PAIR( virial_yy += tmp_y * p_ij_y; )
291  PAIR( virial_yz += tmp_y * p_ij_z; )
292  #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
293  #if __MIC_PAD_PLGEN_CTRL != 0
294  tmp_y_i_sum += tmp_y;
295  #else
296  f_0[i].y += tmp_y;
297  #endif
298  f_1[j].y -= tmp_y;
299  #else
300  #if __MIC_PAD_PLGEN_CTRL != 0
301  tmp_y_i_sum += tmp_y;
302  #else
303  f_0_y[i] += tmp_y;
304  #endif
305  f_1_y[j] -= tmp_y;
306  #endif
307 
308  CALC_TYPE tmp_z = force_r * p_ij_z;
309  PAIR( virial_zz += tmp_z * p_ij_z; )
310  #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
311  #if __MIC_PAD_PLGEN_CTRL != 0
312  tmp_z_i_sum += tmp_z;
313  #else
314  f_0[i].z += tmp_z;
315  #endif
316  f_1[j].z -= tmp_z;
317  #else
318  #if __MIC_PAD_PLGEN_CTRL != 0
319  tmp_z_i_sum += tmp_z;
320  #else
321  f_0_z[i] += tmp_z;
322  #endif
323  f_1_z[j] -= tmp_z;
324  #endif
325 
326  #endif // SHORT
327  #endif // FAST
328 
329  #if (0 FULL(+1))
330 
331  CALC_TYPE slow_d = table_four_ptr[table_four_idx + 8 SHORT(+ 4)];
332  CALC_TYPE slow_c = table_four_ptr[table_four_idx + 9 SHORT(+ 4)];
333  CALC_TYPE slow_b = table_four_ptr[table_four_idx + 10 SHORT(+ 4)];
334  CALC_TYPE slow_a = table_four_ptr[table_four_idx + 11 SHORT(+ 4)];
335 
336  #if (0 SHORT( EXCLUDED(+1) MODIFIED(+1) ))
337  const int slow_idx = 4 * table_i;
338  #endif
339  #if (0 EXCLUDED(+1))
340  #if (0 SHORT(+1))
341  //slow_a += 1.0 * slow_table[slow_idx + 0]; // AoS transpose (4 members)
342  //slow_b += 2.0 * slow_table[slow_idx + 1];
343  //slow_c += 4.0 * slow_table[slow_idx + 2];
344  //slow_d += 6.0 * slow_table[slow_idx + 3];
345  slow_a += 1.0 * slow_table[slow_idx + 3]; // AoS transpose (4 members)
346  slow_b += 2.0 * slow_table[slow_idx + 2];
347  slow_c += 4.0 * slow_table[slow_idx + 1];
348  slow_d += 6.0 * slow_table[slow_idx + 0];
349  #endif
350  #if (0 NOSHORT(+1))
351  slow_d -= table_four_ptr[table_four_idx + 12];
352  slow_c -= table_four_ptr[table_four_idx + 13];
353  slow_b -= table_four_ptr[table_four_idx + 14];
354  slow_a -= table_four_ptr[table_four_idx + 15];
355  #endif
356  #endif
357  #if (0 MODIFIED(+1))
358  #if (0 SHORT(+1))
359  //slow_a += 1.0 * modf_mod * slow_table[slow_idx + 0];
360  //slow_b += 2.0 * modf_mod * slow_table[slow_idx + 1];
361  //slow_c += 4.0 * modf_mod * slow_table[slow_idx + 2];
362  //slow_d += 6.0 * modf_mod * slow_table[slow_idx + 3];
363  slow_a += 1.0 * modf_mod * slow_table[slow_idx + 3];
364  slow_b += 2.0 * modf_mod * slow_table[slow_idx + 2];
365  slow_c += 4.0 * modf_mod * slow_table[slow_idx + 1];
366  slow_d += 6.0 * modf_mod * slow_table[slow_idx + 0];
367  #endif
368  #if (0 NOSHORT(+1))
369  slow_d -= modf_mod * table_four_ptr[table_four_idx + 12];
370  slow_c -= modf_mod * table_four_ptr[table_four_idx + 13];
371  slow_b -= modf_mod * table_four_ptr[table_four_idx + 14];
372  slow_a -= modf_mod * table_four_ptr[table_four_idx + 15];
373  #endif
374  #endif
375  slow_d *= kqq;
376  slow_c *= kqq;
377  slow_b *= kqq;
378  slow_a *= kqq;
379 
380  #if (0 ENERGY(+1))
381  CALC_TYPE slow_val = ((diffa * slow_d * (1/6.0) + slow_c * (1/4.0)) * diffa + slow_b * (1/2.0)) * diffa + slow_a;
382  #if (0 NOT_ALCHPAIR(+1))
383  fullElectEnergy -= slow_val;
384  // DMK - TODO | FIXME : Apply slow_val to FEP(fullElectEnergy_s)
385  #endif
386  #endif
387 
388  #if (0 NOT_ALCHPAIR(FAST(NOSHORT(+1))))
389  slow_d += vdw_d;
390  slow_c += vdw_c;
391  slow_b += vdw_b;
392  slow_a += vdw_a;
393  #endif
394 
395  CALC_TYPE slow_dir = (diffa * slow_d + slow_c) * diffa + slow_b;
396  CALC_TYPE fullforce_r = slow_dir;
397 
398  CALC_TYPE fulltmp_x = fullforce_r * p_ij_x;
399  PAIR( fullElectVirial_xx += fulltmp_x * p_ij_x; )
400  PAIR( fullElectVirial_xy += fulltmp_x * p_ij_y; )
401  PAIR( fullElectVirial_xz += fulltmp_x * p_ij_z; )
402  #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
403  #if __MIC_PAD_PLGEN_CTRL != 0
404  fulltmp_x_i_sum += fulltmp_x;
405  #else
406  fullf_0[i].x += fulltmp_x;
407  #endif
408  fullf_1[j].x -= fulltmp_x;
409  #else
410  #if __MIC_PAD_PLGEN_CTRL != 0
411  fulltmp_x_i_sum += fulltmp_x;
412  #else
413  fullf_0_x[i] += fulltmp_x;
414  #endif
415  fullf_1_x[j] -= fulltmp_x;
416  #endif
417 
418  CALC_TYPE fulltmp_y = fullforce_r * p_ij_y;
419  PAIR( fullElectVirial_yy += fulltmp_y * p_ij_y; )
420  PAIR( fullElectVirial_yz += fulltmp_y * p_ij_z; )
421  #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
422  #if __MIC_PAD_PLGEN_CTRL != 0
423  fulltmp_y_i_sum += fulltmp_y;
424  #else
425  fullf_0[i].y += fulltmp_y;
426  #endif
427  fullf_1[j].y -= fulltmp_y;
428  #else
429  #if __MIC_PAD_PLGEN_CTRL != 0
430  fulltmp_y_i_sum += fulltmp_y;
431  #else
432  fullf_0_y[i] += fulltmp_y;
433  #endif
434  fullf_1_y[j] -= fulltmp_y;
435  #endif
436 
437  CALC_TYPE fulltmp_z = fullforce_r * p_ij_z;
438  PAIR( fullElectVirial_zz += fulltmp_z * p_ij_z; )
439  #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
440  #if __MIC_PAD_PLGEN_CTRL != 0
441  fulltmp_z_i_sum += fulltmp_z;
442  #else
443  fullf_0[i].z += fulltmp_z;
444  #endif
445  fullf_1[j].z -= fulltmp_z;
446  #else
447  #if __MIC_PAD_PLGEN_CTRL != 0
448  fulltmp_z_i_sum += fulltmp_z;
449  #else
450  fullf_0_z[i] += fulltmp_z;
451  #endif
452  fullf_1_z[j] -= fulltmp_z;
453  #endif
454 
455  #endif // FULL
456 
457  #if REFINE_PAIRLISTS == 0
458  } // end if (r2 < cutoff2_delta)
459  #endif
460 
461  // End of loops auto-vectorized via pairlist padding
462  #if __MIC_PAD_PLGEN_CTRL != 0
463 
464  } // end if
465  } // end for
466 
467  #if MIC_EXCL_CHECKSUM_FULL != 0
468  params.exclusionSum += exclusionSum;
469  #endif
470  #undef EXCL_CHECKSUM_CLAUSE
471 
472  #if (0 FAST(SHORT(+1)))
473  #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
474  f_0[i].x += tmp_x_i_sum;
475  f_0[i].y += tmp_y_i_sum;
476  f_0[i].z += tmp_z_i_sum;
477  #else
478  f_0_x[i] += tmp_x_i_sum;
479  f_0_y[i] += tmp_y_i_sum;
480  f_0_z[i] += tmp_z_i_sum;
481  #endif
482  #endif
483 
484  #if (0 FULL(+1))
485  #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
486  fullf_0[i].x += fulltmp_x_i_sum;
487  fullf_0[i].y += fulltmp_y_i_sum;
488  fullf_0[i].z += fulltmp_z_i_sum;
489  #else
490  fullf_0_x[i] += fulltmp_x_i_sum;
491  fullf_0_y[i] += fulltmp_y_i_sum;
492  fullf_0_z[i] += fulltmp_z_i_sum;
493  #endif
494  #endif
495 
496  } // end for
497 
498 
499  // End of scalar loop
500  #else
501 
502  } // end pairlist-loop
503 
504  #endif
505 
506 #endif // NAMD_MIC
register BigReal virial_xy
register BigReal virial_xz
register BigReal fast_dir
#define PAIR(X)
register BigReal virial_yz
BigReal force_r
const BigReal A
register BigReal electEnergy
#define NOSHORT(X)
BigReal vdw_b
BigReal vdw_c
register const BigReal p_ij_z
register BigReal virial_yy
#define SHORT(X)
gridSize z
register BigReal virial_zz
#define MODIFIED(X)
register const BigReal p_ij_x
BigReal vdw_d
const BigReal B
register BigReal virial_xx
BigReal vdw_a
k< npairi;++k){TABENERGY(const int numtypes=simParams->tableNumTypes;const float table_spacing=simParams->tableSpacing;const int npertype=(int)(namdnearbyint(simParams->tableMaxDist/simParams->tableSpacing)+1);) int table_i=(r2iilist[2 *k] >> 14)+r2_delta_expc;const int j=pairlisti[k];#define p_j BigReal diffa=r2list[k]-r2_table[table_i];#define table_four_i TABENERGY(register const int tabtype=-1-(lj_pars->A< 0?lj_pars->A:0);) BigReal kqq=kq_i *p_j-> charge
gridSize y
gridSize x
register const BigReal p_ij_y