ComputeNonbondedMICKernelBase2_scalar.h

Go to the documentation of this file.
00001 #ifdef NAMD_MIC
00002 
00003   // For each entry in the pairlist...
00004 
00005   // Auto-vectorize via pairlist padding
00006   #if __MIC_PAD_PLGEN_CTRL != 0
00007 
00008     // Set the number of elements/lanes per vector unit width for the data type that will be used
00009     #if MIC_HANDCODE_FORCE_SINGLE != 0
00010       const int _plI_fs_outer_step = 16;  // 32-bit
00011     #else
00012       const int _plI_fs_outer_step = 8;  // 64-bit
00013     #endif
00014 
00015     // Create an "outer" loop that iterates over the the entire loop, stepping by the
00016     //   number of lanes in the vector units
00017     #pragma novector
00018     for (int _plI_fs_outer = 0; _plI_fs_outer < plSize; _plI_fs_outer += _plI_fs_outer_step) {
00019 
00020       // Preload i value here (use broadcast)...
00021       const int i = (plArray[_plI_fs_outer] >> 16) & 0xFFFF;
00022 
00023       // Preload x,y,z,q values here
00024       #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
00025         const CALC_TYPE p_i_x = ((CALC_TYPE)p_0[i].x) + ((CALC_TYPE)params.offset.x);
00026         const CALC_TYPE p_i_y = ((CALC_TYPE)p_0[i].y) + ((CALC_TYPE)params.offset.y);
00027         const CALC_TYPE p_i_z = ((CALC_TYPE)p_0[i].z) + ((CALC_TYPE)params.offset.z);
00028         const CALC_TYPE p_i_q = (CALC_TYPE)(p_0[i].charge);
00029         const int p_i_vdwType = pExt_0[i].vdw_type;
00030       #else
00031         const CALC_TYPE p_i_x = ((CALC_TYPE)p_0_x[i]) + ((CALC_TYPE)params.offset.x);
00032         const CALC_TYPE p_i_y = ((CALC_TYPE)p_0_y[i]) + ((CALC_TYPE)params.offset.y);
00033         const CALC_TYPE p_i_z = ((CALC_TYPE)p_0_z[i]) + ((CALC_TYPE)params.offset.z);
00034         const CALC_TYPE p_i_q = (CALC_TYPE)(p_0_q[i]);
00035         const int p_i_vdwType = pExt_0_vdwType[i];
00036       #endif
00037 
00038       // Create variables to hold the force contributions for the given "i" atom in the "inner" loop below
00039       double tmp_x_i_sum = 0.0;
00040       double tmp_y_i_sum = 0.0;
00041       double tmp_z_i_sum = 0.0;
00042       double tmp_w_i_sum = 0.0;
00043       double fulltmp_x_i_sum = 0.0;
00044       double fulltmp_y_i_sum = 0.0;
00045       double fulltmp_z_i_sum = 0.0;
00046 
00047       #if MIC_EXCL_CHECKSUM_FULL != 0
00048         int exclusionSum = 0;
00049         #define EXCL_CHECKSUM_CLAUSE  reduction(+ : exclusionSum)
00050       #else
00051         #define EXCL_CHECKSUM_CLAUSE
00052       #endif
00053 
00054       // Create an "inner" loop with one iteration per vector unit lane
00055       #pragma simd vectorlength(16) \
00056                    reduction(+ : tmp_x_i_sum, tmp_y_i_sum, tmp_z_i_sum, tmp_w_i_sum, \
00057                                  fulltmp_x_i_sum, fulltmp_y_i_sum, fulltmp_z_i_sum ) \
00058                    EXCL_CHECKSUM_CLAUSE
00059       for (int _plI_fs_inner = 0; _plI_fs_inner < _plI_fs_outer_step; _plI_fs_inner++) {
00060         const int plI = _plI_fs_outer + _plI_fs_inner;
00061         if ((plArray[plI] & 0xFFFF) != 0xFFFF) {
00062 
00063   // Scalar version of the code
00064   #else
00065 
00066     // DMK - NOTE : These loop_count values are loose, lower-bound guesses on my part (TODO : verify & refine)
00067     #if (0 PAIR(+1))
00068       #pragma loop_count (1000)
00069     #elif (0 SELF(+1))
00070       #pragma loop_count (10000)
00071     #endif
00072     for (int plI = 0; plI < plSize; plI++) {  
00073 
00074   #endif
00075 
00076     // Load the particle indicies
00077     const int ij = plArray[plI];
00078     #if __MIC_PAD_PLGEN_CTRL != 0
00079       // NOTE: moved before this loop, to the start of the _plI_fs_outer loop's body
00080     #else
00081       const int i = (ij >> 16) & 0xFFFF;
00082     #endif
00083     const int j = (ij      ) & 0xFFFF;
00084 
00085     // TODO | FIXME - Spread these out throughout the loop body (if possible) and
00086     //   change based on AoS versus SoA
00087     #if MIC_PREFETCH_DISTANCE > 0
00088       const int pfIJ = plArray[plI + MIC_PREFETCH_DISTANCE];
00089       const int pfI = (pfIJ >> 16) & 0xFFFF;
00090       const int pfJ = (pfIJ      ) & 0xFFFF;
00091       _mm_prefetch((char*)(p_0_x + pfI), MIC_PREFETCH_HINT);
00092       _mm_prefetch((char*)(p_0_y + pfI), MIC_PREFETCH_HINT);
00093       _mm_prefetch((char*)(p_0_z + pfI), MIC_PREFETCH_HINT);
00094       _mm_prefetch((char*)(p_0_q + pfI), MIC_PREFETCH_HINT);
00095       _mm_prefetch((char*)(f_0_x + pfI), MIC_PREFETCH_HINT);
00096       _mm_prefetch((char*)(f_0_y + pfI), MIC_PREFETCH_HINT);
00097       _mm_prefetch((char*)(f_0_z + pfI), MIC_PREFETCH_HINT);
00098       _mm_prefetch((char*)(p_1_x + pfJ), MIC_PREFETCH_HINT);
00099       _mm_prefetch((char*)(p_1_y + pfJ), MIC_PREFETCH_HINT);
00100       _mm_prefetch((char*)(p_1_z + pfJ), MIC_PREFETCH_HINT);
00101       _mm_prefetch((char*)(p_1_q + pfJ), MIC_PREFETCH_HINT);
00102       _mm_prefetch((char*)(f_1_x + pfJ), MIC_PREFETCH_HINT);
00103       _mm_prefetch((char*)(f_1_y + pfJ), MIC_PREFETCH_HINT);
00104       _mm_prefetch((char*)(f_1_z + pfJ), MIC_PREFETCH_HINT);
00105     #endif
00106 
00107     // Load atom information
00108     #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
00109       #if __MIC_PAD_PLGEN_CTRL != 0
00110         // NOTE: moved before this loop, to the start of the _plI_fs_outer loop's body
00111       #else
00112         const CALC_TYPE p_i_x = ((CALC_TYPE)p_0[i].x) + ((CALC_TYPE)params.offset.x);
00113         const CALC_TYPE p_i_y = ((CALC_TYPE)p_0[i].y) + ((CALC_TYPE)params.offset.y);
00114         const CALC_TYPE p_i_z = ((CALC_TYPE)p_0[i].z) + ((CALC_TYPE)params.offset.z);
00115       #endif
00116       const CALC_TYPE p_j_x = (CALC_TYPE)(p_1[j].x);  // Neighboring gather to be optimized
00117       const CALC_TYPE p_j_y = (CALC_TYPE)(p_1[j].y);
00118       const CALC_TYPE p_j_z = (CALC_TYPE)(p_1[j].z);
00119     #else
00120       #if __MIC_PAD_PLGEN_CTRL != 0
00121         // NOTE: moved before this loop, to the start of the _plI_fs_outer loop's body
00122       #else
00123         const CALC_TYPE p_i_x = ((CALC_TYPE)p_0_x[i]) + ((CALC_TYPE)params.offset.x);
00124         const CALC_TYPE p_i_y = ((CALC_TYPE)p_0_y[i]) + ((CALC_TYPE)params.offset.y);
00125         const CALC_TYPE p_i_z = ((CALC_TYPE)p_0_z[i]) + ((CALC_TYPE)params.offset.z);
00126       #endif
00127       const CALC_TYPE p_j_x = (CALC_TYPE)(p_1_x[j]);
00128       const CALC_TYPE p_j_y = (CALC_TYPE)(p_1_y[j]);
00129       const CALC_TYPE p_j_z = (CALC_TYPE)(p_1_z[j]);
00130     #endif
00131 
00132     // Load position deltas and r2
00133     CALC_TYPE p_ij_x = p_i_x - p_j_x;
00134     CALC_TYPE p_ij_y = p_i_y - p_j_y;
00135     CALC_TYPE p_ij_z = p_i_z - p_j_z;
00136 
00137     #if REFINE_PAIRLISTS != 0
00138     CALC_TYPE r2 = (CALC_TYPE)(r2Array[plI]);
00139     #else
00140     CALC_TYPE r2 = (p_ij_x * p_ij_x) + (p_ij_y * p_ij_y) + (p_ij_z * p_ij_z) + r2_delta;
00141     if (r2 < cutoff2_delta) {
00142     #endif
00143 
00144       #if (MIC_EXCL_CHECKSUM_FULL != 0) && (0 EXCLUDED(+1) MODIFIED(+1))
00145         #if __MIC_PAD_PLGEN_CTRL != 0
00146           exclusionSum += 1;
00147         #else
00148           params.exclusionSum += 1;
00149         #endif
00150       #endif
00151 
00152       // Calculate the table_i value (table index)
00153       #if MIC_HANDCODE_FORCE_SINGLE != 0
00154         const unsigned int table_i = ((int)((__intel_castf32_u32(r2)) >> 17)) + r2_delta_expc;
00155       #else
00156         const unsigned int table_i = ((int)((__intel_castf64_u64(r2)) >> 46)) + r2_delta_expc;
00157       #endif
00158 
00159       #if MIC_HANDCODE_FORCE_CALCR2TABLE != 0
00160         // From ComputeNonbondedUtil.C                    Simplified:
00161         //   r2_base = r2_delta * (1 << (i/64))             r2_base = r2_delta * (1 << (i/64))
00162         //   r2_del = r2_base / 64.0;                       r2_del = r2_base / 64.0;
00163         //   r2 = r2_base - r2_delta + r2_del * (i%64)      r2_table[i] = r2_base - r2_delta + r2_del * (i%64) + r2_delta;
00164         //   r2_table[i] = r2 + r2_delta;                               = r2_base + r2_del * (i%64)
00165         // NOTE: For i = 0, r2_table[0] = r2_delta + (r2_delta / 64) * 0 = r2_delta, so there no need
00166         //   to special case if table_i = 0 then r2_table[0] = r2_delta (see ComputeNonbondedUtil.C:606)
00167         CALC_TYPE r2_base = r2_delta * (1 << (table_i >> 6)); // avoid original divide (table_i / 64)
00168         CALC_TYPE r2_del = r2_base * ((CALC_TYPE)0.015625f);  // avoid original divide (r2_base / 64)
00169         CALC_TYPE r2_table_i = r2_base + r2_del * (table_i & 0x3F); //(table_i % 64);  // NOTE: removing '+ r2_delta - r2_delta'
00170       #else
00171         CALC_TYPE r2_table_i = r2_table[table_i];
00172       #endif
00173       CALC_TYPE diffa = r2 - r2_table_i;
00174       const CALC_TYPE * const table_four_ptr = SHORT(table_short) NOSHORT(table_noshort);
00175       const int table_four_idx = 16 * table_i;
00176 
00177       // NOTE : These charge values are already scaled by
00178       //   'sqrt(COULOMB * scaling * dielectric_1).'  See HomePatch.C.
00179       #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
00180         #if __MIC_PAD_PLGEN_CTRL != 0
00181           // NOTE: moved before this loop, to the start of the _plI_fs_outer loop's body
00182         #else
00183           const CALC_TYPE p_i_q = (CALC_TYPE)(p_0[i].charge);
00184         #endif
00185         const CALC_TYPE p_j_q = (CALC_TYPE)(p_1[j].charge);
00186       #else
00187         #if __MIC_PAD_PLGEN_CTRL != 0
00188           // NOTE: moved before this loop, to the start of the _plI_fs_outer loop's body
00189         #else
00190           const CALC_TYPE p_i_q = (CALC_TYPE)(p_0_q[i]);
00191         #endif
00192         const CALC_TYPE p_j_q = (CALC_TYPE)(p_1_q[j]);
00193       #endif
00194       CALC_TYPE kqq = p_i_q * p_j_q;
00195 
00196       #if (0 FAST(+1))
00197 
00198         #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
00199           #if __MIC_PAD_PLGEN_CTRL != 0
00200             // NOTE: moved before this loop, to the start of the _plI_fs_outer loop's body
00201           #else
00202             int p_i_vdwType = pExt_0[i].vdw_type;
00203           #endif
00204           int p_j_vdwType = pExt_1[j].vdw_type;
00205         #else
00206           #if __MIC_PAD_PLGEN_CTRL != 0
00207             // NOTE: moved before this loop, to the start of the _plI_fs_outer loop's body
00208           #else
00209             int p_i_vdwType = pExt_0_vdwType[i];
00210           #endif
00211           int p_j_vdwType = pExt_1_vdwType[j];
00212         #endif
00213 
00214         // Lookup A and B values in the LJ table
00215         const int lj_pars_offset = (4 * (p_i_vdwType * lj_table_dim + p_j_vdwType)) MODIFIED(+ 2);
00216         CALC_TYPE A = scaling * lj_table_base_ptr[lj_pars_offset    ];
00217         CALC_TYPE B = scaling * lj_table_base_ptr[lj_pars_offset + 1];
00218 
00219         // 16x16 AoS table lookup with transpose
00220         //CALC_TYPE vdw_d = A * table_four_ptr[table_four_idx + 0] - B * table_four_ptr[table_four_idx + 2];
00221         //CALC_TYPE vdw_c = A * table_four_ptr[table_four_idx + 1] - B * table_four_ptr[table_four_idx + 3];
00222         //CALC_TYPE vdw_b = A * table_four_ptr[table_four_idx + 4] - B * table_four_ptr[table_four_idx + 6];
00223         //CALC_TYPE vdw_a = A * table_four_ptr[table_four_idx + 5] - B * table_four_ptr[table_four_idx + 7];
00224         CALC_TYPE vdw_d = A * table_four_ptr[table_four_idx + 0] - B * table_four_ptr[table_four_idx + 4];
00225         CALC_TYPE vdw_c = A * table_four_ptr[table_four_idx + 1] - B * table_four_ptr[table_four_idx + 5];
00226         CALC_TYPE vdw_b = A * table_four_ptr[table_four_idx + 2] - B * table_four_ptr[table_four_idx + 6];
00227         CALC_TYPE vdw_a = A * table_four_ptr[table_four_idx + 3] - B * table_four_ptr[table_four_idx + 7];
00228 
00229         #if (0 ENERGY(+1))
00230           CALC_TYPE vdw_val = ((diffa * vdw_d * (1/6.0) + vdw_c * (1/4.0)) * diffa + vdw_b * (1/2.0)) * diffa + vdw_a;
00231           vdwEnergy -= vdw_val;
00232           // DMK - TODO | FIXME : Apply vdw_val to FEP(vdwEnergy_s)
00233         #endif
00234 
00235         #if (0 SHORT(+1))
00236 
00237           #if (0 NORMAL(+1))
00238             CALC_TYPE fast_d = kqq * table_four_ptr[table_four_idx +  8];
00239             CALC_TYPE fast_c = kqq * table_four_ptr[table_four_idx +  9];
00240             CALC_TYPE fast_b = kqq * table_four_ptr[table_four_idx + 10];
00241             CALC_TYPE fast_a = kqq * table_four_ptr[table_four_idx + 11];
00242           #endif
00243           #if (0 MODIFIED(+1))
00244             CALC_TYPE modfckqq = (1.0 - modf_mod) * kqq;
00245             CALC_TYPE fast_d = modfckqq * table_four_ptr[table_four_idx +  8];
00246             CALC_TYPE fast_c = modfckqq * table_four_ptr[table_four_idx +  9];
00247             CALC_TYPE fast_b = modfckqq * table_four_ptr[table_four_idx + 10];
00248             CALC_TYPE fast_a = modfckqq * table_four_ptr[table_four_idx + 11];
00249           #endif
00250 
00251           #if (0 ENERGY(+1))
00252             CALC_TYPE fast_val = ((diffa * fast_d * (1/6.0) + fast_c * (1/4.0)) * diffa + fast_b * (1/2.0)) * diffa + fast_a;
00253             #if (0 NOT_ALCHPAIR(+1))
00254               electEnergy -= fast_val;
00255               // DMK - TODO | FIXME : Apply fast_val to FEP(electEnergy_s)
00256             #endif
00257           #endif
00258 
00259           #if (0 NOT_ALCHPAIR(+1))
00260             fast_d += vdw_d;
00261             fast_c += vdw_c;
00262             fast_b += vdw_b;
00263             fast_a += vdw_a;
00264           #endif
00265 
00266           CALC_TYPE fast_dir = (fast_d * diffa + fast_c) * diffa + fast_b;
00267           CALC_TYPE force_r = fast_dir;
00268 
00269           CALC_TYPE tmp_x = force_r * p_ij_x;
00270           PAIR( virial_xx += tmp_x * p_ij_x; )
00271           PAIR( virial_xy += tmp_x * p_ij_y; )
00272           PAIR( virial_xz += tmp_x * p_ij_z; )
00273           #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
00274             #if __MIC_PAD_PLGEN_CTRL != 0
00275               tmp_x_i_sum += tmp_x;
00276             #else
00277               f_0[i].x += tmp_x;
00278             #endif
00279             f_1[j].x -= tmp_x;
00280           #else
00281             #if __MIC_PAD_PLGEN_CTRL != 0
00282               tmp_x_i_sum += tmp_x;
00283             #else
00284               f_0_x[i] += tmp_x;
00285             #endif
00286             f_1_x[j] -= tmp_x;
00287           #endif
00288 
00289           CALC_TYPE tmp_y = force_r * p_ij_y;
00290           PAIR( virial_yy += tmp_y * p_ij_y; )
00291           PAIR( virial_yz += tmp_y * p_ij_z; )
00292           #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
00293             #if __MIC_PAD_PLGEN_CTRL != 0
00294               tmp_y_i_sum += tmp_y;
00295             #else
00296               f_0[i].y += tmp_y;   
00297             #endif
00298             f_1[j].y -= tmp_y;
00299           #else
00300             #if __MIC_PAD_PLGEN_CTRL != 0
00301               tmp_y_i_sum += tmp_y;
00302             #else
00303               f_0_y[i] += tmp_y;
00304             #endif
00305             f_1_y[j] -= tmp_y;
00306           #endif
00307 
00308           CALC_TYPE tmp_z = force_r * p_ij_z;
00309           PAIR( virial_zz += tmp_z * p_ij_z; )
00310           #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
00311             #if __MIC_PAD_PLGEN_CTRL != 0
00312               tmp_z_i_sum += tmp_z;
00313             #else
00314               f_0[i].z += tmp_z;
00315             #endif
00316             f_1[j].z -= tmp_z;
00317           #else
00318             #if __MIC_PAD_PLGEN_CTRL != 0
00319               tmp_z_i_sum += tmp_z;
00320             #else
00321               f_0_z[i] += tmp_z;
00322             #endif
00323             f_1_z[j] -= tmp_z;
00324           #endif
00325 
00326         #endif // SHORT
00327       #endif // FAST
00328 
00329       #if (0 FULL(+1))
00330 
00331         CALC_TYPE slow_d = table_four_ptr[table_four_idx +  8 SHORT(+ 4)];
00332         CALC_TYPE slow_c = table_four_ptr[table_four_idx +  9 SHORT(+ 4)];
00333         CALC_TYPE slow_b = table_four_ptr[table_four_idx + 10 SHORT(+ 4)];
00334         CALC_TYPE slow_a = table_four_ptr[table_four_idx + 11 SHORT(+ 4)];
00335 
00336         #if (0 SHORT( EXCLUDED(+1) MODIFIED(+1) ))
00337           const int slow_idx = 4 * table_i;
00338         #endif
00339         #if (0 EXCLUDED(+1))
00340           #if (0 SHORT(+1))
00341             //slow_a += 1.0 * slow_table[slow_idx + 0];  // AoS transpose (4 members)
00342             //slow_b += 2.0 * slow_table[slow_idx + 1];
00343             //slow_c += 4.0 * slow_table[slow_idx + 2];
00344             //slow_d += 6.0 * slow_table[slow_idx + 3];
00345             slow_a += 1.0 * slow_table[slow_idx + 3];  // AoS transpose (4 members)
00346             slow_b += 2.0 * slow_table[slow_idx + 2];
00347             slow_c += 4.0 * slow_table[slow_idx + 1];
00348             slow_d += 6.0 * slow_table[slow_idx + 0];
00349           #endif
00350           #if (0 NOSHORT(+1))
00351             slow_d -= table_four_ptr[table_four_idx + 12];
00352             slow_c -= table_four_ptr[table_four_idx + 13];
00353             slow_b -= table_four_ptr[table_four_idx + 14];
00354             slow_a -= table_four_ptr[table_four_idx + 15];
00355           #endif
00356         #endif
00357         #if (0 MODIFIED(+1))
00358           #if (0 SHORT(+1))
00359             //slow_a += 1.0 * modf_mod * slow_table[slow_idx + 0];
00360             //slow_b += 2.0 * modf_mod * slow_table[slow_idx + 1];
00361             //slow_c += 4.0 * modf_mod * slow_table[slow_idx + 2];
00362             //slow_d += 6.0 * modf_mod * slow_table[slow_idx + 3];
00363             slow_a += 1.0 * modf_mod * slow_table[slow_idx + 3];
00364             slow_b += 2.0 * modf_mod * slow_table[slow_idx + 2];
00365             slow_c += 4.0 * modf_mod * slow_table[slow_idx + 1];
00366             slow_d += 6.0 * modf_mod * slow_table[slow_idx + 0];
00367           #endif
00368           #if (0 NOSHORT(+1))
00369             slow_d -= modf_mod * table_four_ptr[table_four_idx + 12];
00370             slow_c -= modf_mod * table_four_ptr[table_four_idx + 13];
00371             slow_b -= modf_mod * table_four_ptr[table_four_idx + 14];
00372             slow_a -= modf_mod * table_four_ptr[table_four_idx + 15];
00373           #endif
00374         #endif
00375         slow_d *= kqq;
00376         slow_c *= kqq;
00377         slow_b *= kqq;
00378         slow_a *= kqq;
00379 
00380         #if (0 ENERGY(+1))
00381           CALC_TYPE slow_val = ((diffa * slow_d * (1/6.0) + slow_c * (1/4.0)) * diffa + slow_b * (1/2.0)) * diffa + slow_a;
00382           #if (0 NOT_ALCHPAIR(+1))
00383             fullElectEnergy -= slow_val;
00384             // DMK - TODO | FIXME : Apply slow_val to FEP(fullElectEnergy_s)
00385           #endif
00386         #endif
00387 
00388         #if (0 NOT_ALCHPAIR(FAST(NOSHORT(+1))))
00389           slow_d += vdw_d;
00390           slow_c += vdw_c;
00391           slow_b += vdw_b;
00392           slow_a += vdw_a;
00393         #endif
00394 
00395         CALC_TYPE slow_dir = (diffa * slow_d + slow_c) * diffa + slow_b;
00396         CALC_TYPE fullforce_r = slow_dir;
00397 
00398         CALC_TYPE fulltmp_x = fullforce_r * p_ij_x;
00399         PAIR( fullElectVirial_xx += fulltmp_x * p_ij_x; )
00400         PAIR( fullElectVirial_xy += fulltmp_x * p_ij_y; )
00401         PAIR( fullElectVirial_xz += fulltmp_x * p_ij_z; )
00402         #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
00403           #if __MIC_PAD_PLGEN_CTRL != 0
00404             fulltmp_x_i_sum += fulltmp_x;
00405           #else
00406             fullf_0[i].x += fulltmp_x;
00407           #endif
00408           fullf_1[j].x -= fulltmp_x;
00409         #else
00410           #if __MIC_PAD_PLGEN_CTRL != 0
00411             fulltmp_x_i_sum += fulltmp_x;
00412           #else
00413             fullf_0_x[i] += fulltmp_x;
00414           #endif
00415           fullf_1_x[j] -= fulltmp_x;
00416         #endif
00417 
00418         CALC_TYPE fulltmp_y = fullforce_r * p_ij_y;
00419         PAIR( fullElectVirial_yy += fulltmp_y * p_ij_y; )
00420         PAIR( fullElectVirial_yz += fulltmp_y * p_ij_z; )
00421         #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
00422           #if __MIC_PAD_PLGEN_CTRL != 0
00423             fulltmp_y_i_sum += fulltmp_y;
00424           #else
00425             fullf_0[i].y += fulltmp_y;
00426           #endif
00427           fullf_1[j].y -= fulltmp_y;
00428         #else
00429           #if __MIC_PAD_PLGEN_CTRL != 0
00430             fulltmp_y_i_sum += fulltmp_y;
00431           #else
00432             fullf_0_y[i] += fulltmp_y;
00433           #endif
00434           fullf_1_y[j] -= fulltmp_y;
00435         #endif
00436 
00437         CALC_TYPE fulltmp_z = fullforce_r * p_ij_z;
00438         PAIR( fullElectVirial_zz += fulltmp_z * p_ij_z; )
00439         #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
00440           #if __MIC_PAD_PLGEN_CTRL != 0
00441             fulltmp_z_i_sum += fulltmp_z;
00442           #else
00443             fullf_0[i].z += fulltmp_z;
00444           #endif
00445           fullf_1[j].z -= fulltmp_z;
00446         #else
00447           #if __MIC_PAD_PLGEN_CTRL != 0
00448             fulltmp_z_i_sum += fulltmp_z;
00449           #else
00450             fullf_0_z[i] += fulltmp_z;
00451           #endif
00452           fullf_1_z[j] -= fulltmp_z;
00453         #endif
00454 
00455       #endif // FULL
00456 
00457     #if REFINE_PAIRLISTS == 0
00458     } // end if (r2 < cutoff2_delta)
00459     #endif
00460 
00461   // End of loops auto-vectorized via pairlist padding
00462   #if __MIC_PAD_PLGEN_CTRL != 0
00463 
00464       } // end if
00465     } // end for
00466 
00467     #if MIC_EXCL_CHECKSUM_FULL != 0
00468       params.exclusionSum += exclusionSum;
00469     #endif
00470     #undef EXCL_CHECKSUM_CLAUSE
00471 
00472     #if (0 FAST(SHORT(+1)))
00473       #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
00474         f_0[i].x += tmp_x_i_sum;
00475         f_0[i].y += tmp_y_i_sum;
00476         f_0[i].z += tmp_z_i_sum;
00477       #else
00478         f_0_x[i] += tmp_x_i_sum;
00479         f_0_y[i] += tmp_y_i_sum;
00480         f_0_z[i] += tmp_z_i_sum;
00481       #endif
00482     #endif
00483 
00484     #if (0 FULL(+1))
00485       #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
00486         fullf_0[i].x += fulltmp_x_i_sum;
00487         fullf_0[i].y += fulltmp_y_i_sum;
00488         fullf_0[i].z += fulltmp_z_i_sum;
00489       #else
00490         fullf_0_x[i] += fulltmp_x_i_sum;
00491         fullf_0_y[i] += fulltmp_y_i_sum;
00492         fullf_0_z[i] += fulltmp_z_i_sum;
00493       #endif
00494     #endif
00495 
00496   } // end for
00497 
00498 
00499   // End of scalar loop
00500   #else
00501 
00502   } // end pairlist-loop
00503 
00504   #endif
00505 
00506 #endif  // NAMD_MIC

Generated on Sun Nov 19 01:17:12 2017 for NAMD by  doxygen 1.4.7