ComputeNonbondedBase2.h

Go to the documentation of this file.
00001 
00007 EXCLUDED( FAST( foo bar ) )
00008 EXCLUDED( MODIFIED( foo bar ) )
00009 EXCLUDED( NORMAL( foo bar ) )
00010 NORMAL( MODIFIED( foo bar ) )
00011 ALCHPAIR( NOT_ALCHPAIR( foo bar ) )
00012 
00013 ALCHPAIR(
00014   // get alchemical nonbonded scaling parameters (once per pairlist)
00015   myLambda = ALCH1(lambdaUp) ALCH2(lambdaDown);
00016   FEP(myLambda2 = ALCH1(lambda2Up) ALCH2(lambda2Down);)
00017   myElecLambda =  ALCH1(elecLambdaUp) ALCH2(elecLambdaDown); 
00018   FEP(myElecLambda2 = ALCH1(elecLambda2Up) ALCH2(elecLambda2Down);)
00019   myVdwLambda =  ALCH1(vdwLambdaUp) ALCH2(vdwLambdaDown); 
00020   FEP(myVdwLambda2 = ALCH1(vdwLambda2Up) ALCH2(vdwLambda2Down);) 
00021   myVdwShift =  ALCH1(vdwShiftUp) ALCH2(vdwShiftDown); 
00022   FEP(myVdwShift2 =  ALCH1(vdwShift2Up) ALCH2(vdwShift2Down);) 
00023 )
00024 
00025 #ifdef  A2_QPX
00026 #if ( SHORT(1+) 0 )
00027 NORMAL(kq_iv    = vec_splats(kq_i); )
00028 MODIFIED(kq_iv    = vec_splats((1.0-modf_mod) *kq_i); )
00029 #endif
00030 
00031 #if ( FULL( 1+ ) 0 )
00032 EXCLUDED(
00033          SHORT(
00034                full_cnst = (vector4double)(6., 4., 2., 1.);
00035                ) 
00036          NOSHORT(
00037                  full_cnst = (vector4double)(1., 1., 1., 1.);
00038                  )
00039          )
00040 MODIFIED(
00041          SHORT(
00042                full_cnst = (vector4double)(6., 4., 2., 1.);
00043                full_cnst = vec_mul (full_cnst, vec_splats(modf_mod));          
00044                ) 
00045          NOSHORT(
00046                  full_cnst = vec_splats(modf_mod);
00047                  )
00048          )      
00049 #endif
00050 #endif
00051 
00052 #ifdef ARCH_POWERPC
00053      __alignx(64, table_four);
00054      __alignx(32, p_1);
00055 #pragma unroll(1)
00056 #pragma ibm independent_loop
00057 #endif
00058 
00059 #ifndef ARCH_POWERPC
00060 #pragma ivdep
00061 #endif
00062 
00063 
00064   
00065 #if ( FULL( EXCLUDED( SHORT( 1+ ) ) ) 0 ) 
00066 // avoid bug in Intel 15.0 compiler
00067 #pragma novector
00068 #else
00069 #ifdef PRAGMA_SIMD
00070 #ifndef TABENERGYFLAG
00071 #ifndef GOFORCES
00072 #pragma simd assert SHORT(FAST(reduction(+:f_i_x,f_i_y,f_i_z)) ENERGY(FAST(reduction(+:vdwEnergy) SHORT(reduction(+:electEnergy))))) \
00073              FULL(reduction(+:fullf_i_x,fullf_i_y,fullf_i_z) ENERGY(reduction(+:fullElectEnergy)))
00074 #endif
00075 #endif
00076 #pragma loop_count avg=100
00077 #else // PRAGMA_SIMD
00078 #pragma loop_count avg=4
00079 #endif // PRAGMA_SIMD
00080 #endif
00081     for (k=0; k<npairi; ++k) {      
00082       TABENERGY(
00083       const int numtypes = simParams->tableNumTypes;
00084       const float table_spacing = simParams->tableSpacing;
00085       const int npertype = (int) (namdnearbyint(simParams->tableMaxDist / simParams->tableSpacing) + 1);
00086       )
00087 
00088       int table_i = (r2iilist[2*k] >> 14) + r2_delta_expc;  // table_i >= 0 
00089       const int j = pairlisti[k];
00090       //register const CompAtom *p_j = p_1 + j;
00091 #define p_j (p_1+j)
00092 #ifdef  A2_QPX
00093       register double *p_j_d = (double *) p_j;
00094 #endif
00095       // const CompAtomExt *pExt_j = pExt_1 + j;
00096       
00097       BigReal diffa = r2list[k] - r2_table[table_i];
00098       //const BigReal* const table_four_i = table_four + 16*table_i;
00099 #define table_four_i (table_four + 16*table_i)
00100 
00101 #if  ( FAST( 1 + ) TABENERGY( 1 + ) 0 ) // FAST or TABENERGY
00102       //const LJTable::TableEntry * lj_pars = 
00103       //        lj_row + 2 * p_j->vdwType MODIFIED(+ 1);
00104       const int lj_index = 2 * p_j->vdwType MODIFIED(+ 1);
00105 #define lj_pars (lj_row+lj_index)
00106 #ifdef  A2_QPX
00107       double *lj_pars_d = (double *) lj_pars;
00108 #endif
00109 #endif
00110       
00111       TABENERGY(
00112       register const int tabtype = -1 - ( lj_pars->A < 0 ? lj_pars->A : 0 );
00113       )
00114       
00115 #if ( SHORT( FAST( 1+ ) ) 0 ) 
00116       //Force *f_j = f_1 + j;
00117 #define f_j (f_1+j)
00118 #endif
00119         
00120 #if ( FULL( 1+ ) 0 )
00121       //Force *fullf_j = fullf_1 + j;
00122 #define fullf_j (fullf_1+j)
00123 #endif
00124 
00125       //Power PC aliasing and alignment constraints
00126 #ifdef ARCH_POWERPC      
00127 #if ( FULL( 1+ ) 0 )
00128 #pragma disjoint (*table_four, *fullf_1)
00129 #pragma disjoint (*p_1,          *fullf_1)
00130 #ifdef  A2_QPX
00131 #pragma disjoint (*p_j_d,        *fullf_1)
00132 #endif
00133 #pragma disjoint (*r2_table,     *fullf_1)
00134 #pragma disjoint (*r2list,       *fullf_1)
00135 #if ( SHORT( FAST( 1+ ) ) 0 ) 
00136 #pragma disjoint (*f_1    ,      *fullf_1)
00137 #pragma disjoint (*fullf_1,      *f_1)
00138 #endif   //Short + fast
00139 #endif   //Full
00140 
00141 #if ( SHORT( FAST( 1+ ) ) 0 ) 
00142 #pragma disjoint (*table_four, *f_1)
00143 #pragma disjoint (*p_1,          *f_1)
00144 #pragma disjoint (*r2_table,     *f_1)
00145 #pragma disjoint (*r2list,       *f_1)
00146 #pragma disjoint (*lj_row,      *f_1)
00147 #ifdef  A2_QPX
00148 #pragma disjoint (*p_j_d,        *f_1)
00149 #endif
00150 #endif //Short + Fast
00151 
00152       __alignx(64, table_four_i);
00153       FAST (
00154       __alignx(32, lj_pars);
00155       )
00156       __alignx(32, p_j);
00157 #endif   //ARCH_POWERPC
00158 
00159       /*
00160       BigReal modf = 0.0;
00161       int atom2 = p_j->id;
00162       register char excl_flag = ( (atom2 >= excl_min && atom2 <= excl_max) ?
00163                                         excl_flags[atom2-excl_min] : 0 );
00164       if ( excl_flag ) { ++exclChecksum; }
00165       SELF( if ( j < j_hgroup ) { excl_flag = EXCHCK_FULL; } )
00166       if ( excl_flag ) {
00167         if ( excl_flag == EXCHCK_FULL ) {
00168           lj_pars = lj_null_pars;
00169           modf = 1.0;
00170         } else {
00171           ++lj_pars;
00172           modf = modf_mod;
00173         }
00174       }
00175       */
00176 
00177       BigReal kqq = kq_i * p_j->charge;
00178 
00179       
00180 #ifdef  A2_QPX
00181       float * cg = (float *)&p_j->charge;
00182 #if ( FULL( 1+ ) 0 )
00183 #pragma disjoint (*cg, *fullf_1)
00184 #endif   //Full
00185 
00186 #if ( SHORT( FAST( 1+ ) ) 0 ) 
00187 #pragma disjoint (*cg, *f_1)
00188 #endif   //Short + fast
00189 #endif
00190 
00191       LES( BigReal lambda_pair = lambda_table_i[p_j->partition]; )
00192 
00193 #ifndef  A2_QPX
00194       register const BigReal p_ij_x = p_i_x - p_j->position.x;
00195       register const BigReal p_ij_y = p_i_y - p_j->position.y;
00196       register const BigReal p_ij_z = p_i_z - p_j->position.z;
00197 #else
00198       vector4double charge_v = vec_lds(0, cg);
00199       vector4double kqqv = vec_mul(kq_iv, charge_v );
00200       vector4double p_ij_v = vec_sub(p_i_v, vec_ld (0, p_j_d));
00201 #define p_ij_x     vec_extract(p_i_v, 0)
00202 #define p_ij_y     vec_extract(p_i_v, 1)
00203 #define p_ij_z     vec_extract(p_i_v, 2)
00204 #endif  
00205 
00206 #if ( FAST(1+) 0 )
00207       const BigReal A = scaling * lj_pars->A;
00208       const BigReal B = scaling * lj_pars->B;
00209 #ifndef  A2_QPX
00210       BigReal vdw_d = A * table_four_i[0] - B * table_four_i[4];
00211       BigReal vdw_c = A * table_four_i[1] - B * table_four_i[5];
00212       BigReal vdw_b = A * table_four_i[2] - B * table_four_i[6];
00213       BigReal vdw_a = A * table_four_i[3] - B * table_four_i[7];
00214 #else
00215       const vector4double Av = vec_mul(scalingv, vec_lds(0, lj_pars_d));
00216       const vector4double Bv = vec_mul(scalingv, vec_lds(8, lj_pars_d));
00217       vector4double vdw_v = vec_msub( Av, vec_ld(0, (BigReal*)table_four_i), vec_mul(Bv, vec_ld(4*sizeof(BigReal), (BigReal*)table_four_i)) );
00218 #define   vdw_d  vec_extract(vdw_v, 0)
00219 #define   vdw_c  vec_extract(vdw_v, 1)
00220 #define   vdw_b  vec_extract(vdw_v, 2)
00221 #define   vdw_a  vec_extract(vdw_v, 3)
00222 #endif
00223 
00224       ALCHPAIR (
00225         // Alchemical free energy calculation
00226         // Pairlists are separated so that lambda-coupled pairs are handled
00227         // independently from normal nonbonded (inside ALCHPAIR macro).
00228         // The separation-shifted van der Waals potential and a shifted 
00229         // electrostatics potential for decoupling are calculated explicitly.
00230         // Would be faster with lookup tables but because only a small minority
00231         // of nonbonded pairs are lambda-coupled the impact is minimal. 
00232         // Explicit calculation also makes things easier to modify.
00233         
00234         const BigReal r2 = r2list[k] - r2_delta;
00235 
00236         // These are now inline functions (in ComputeNonbondedFep.C) to 
00237         // tidy the code
00238 
00239         FEP(fep_vdw_forceandenergies(A,B,r2,myVdwShift,myVdwShift2,switchdist2,
00240           cutoff2, myVdwLambda, myVdwLambda2, Fep_WCA_repuOn, Fep_WCA_dispOn,
00241           Fep_Wham, WCA_rcut1, WCA_rcut2, WCA_rcut3, switchfactor,
00242           vdwForceSwitching, LJcorrection, &alch_vdw_energy, &alch_vdw_force, 
00243           &alch_vdw_energy_2, &alch_vdw_energy_2_Left);)
00244         TI(ti_vdw_force_energy_dUdl(A,B,r2,myVdwShift,switchdist2,
00245           cutoff2, myVdwLambda, alchVdwShiftCoeff, switchfactor,
00246           vdwForceSwitching, LJcorrection, &alch_vdw_energy, &alch_vdw_force, 
00247           &alch_vdw_dUdl);)
00248       )
00249       
00250         //NOT_ALCHPAIR(
00251         //TABENERGY(
00252 #if (NOT_ALCHPAIR(1+) 0)
00253 #if (TABENERGY(1+) 0)
00254         if (tabtype >= 0) {
00255           register BigReal r1;
00256           r1 = sqrt(p_ij_x*p_ij_x + p_ij_y*p_ij_y + p_ij_z*p_ij_z);
00257 
00258           //CkPrintf("%i %i %f %f %i\n", npertype, tabtype, r1, table_spacing, (int) (namdnearbyint(r1 / table_spacing)));
00259           register int eneraddress;
00260           eneraddress = 2 * ((npertype * tabtype) + ((int) namdnearbyint(r1 / table_spacing)));
00261           //CkPrintf("Using distance bin %i for distance %f\n", eneraddress, r1);
00262 #ifndef A2_QPX
00263           vdw_d = 0.;
00264           vdw_c = 0.;
00265           vdw_b = table_ener[eneraddress + 1] / r1;
00266           vdw_a = (-1/2.) * diffa * vdw_b;
00267 #else
00268           vec_insert(0.,                               vdw_v, 0);
00269           vec_insert(0.,                               vdw_v, 1);
00270           vec_insert(table_ener[eneraddress + 1] / r1, vdw_v, 2);
00271           vec_insert((-1/2.) * diffa * vdw_b,          vdw_v, 3);
00272 #endif
00273           ENERGY(
00274             register BigReal vdw_val = table_ener[eneraddress];
00275             //CkPrintf("Found vdw energy of %f\n", vdw_val);
00276             vdwEnergy += LAM(lambda_pair *) vdw_val;
00277             FEP( vdwEnergy_s += d_lambda_pair * vdw_val; )
00278             FEP( vdwEnergy_s_Left += d_lambda_pair * vdw_val; )
00279           )
00280         }  else {
00281           //)
00282 #endif
00283       ENERGY(
00284         register BigReal vdw_val =
00285           ( ( diffa * vdw_d * (1/6.)+ vdw_c * (1/4.)) * diffa + vdw_b *(1/2.)) * diffa + vdw_a;
00286 
00287         vdwEnergy -= LAM(lambda_pair *) vdw_val;
00288 
00289         FEP(vdwEnergy_s -= vdw_val;)
00290         FEP(vdwEnergy_s_Left -= vdw_val;)
00291       )
00292         //TABENERGY( } ) /* endif (tabtype >= 0) */
00293 #if (TABENERGY (1+) 0)
00294         }
00295 #endif
00296       //) // NOT_ALCHPAIR
00297 #endif
00298 
00299       ALCHPAIR(
00300         ENERGY(vdwEnergy   += alch_vdw_energy;)
00301         FEP(vdwEnergy_s += alch_vdw_energy_2;)
00302         FEP(vdwEnergy_s_Left += alch_vdw_energy_2_Left;)
00303         TI(ALCH1(vdwEnergy_ti_1) ALCH2(vdwEnergy_ti_2) += alch_vdw_dUdl;)
00304       ) // ALCHPAIR
00305       
00306 #endif // FAST
00307 
00308 #if ( FAST(1+) 0 )
00309      INT( 
00310       register BigReal vdw_dir;
00311       vdw_dir = ( diffa * vdw_d + vdw_c ) * diffa + vdw_b;
00312       //BigReal force_r =  LAM(lambda_pair *) vdw_dir;
00313       reduction[pairVDWForceIndex_X] += force_sign * vdw_dir * p_ij_x;
00314       reduction[pairVDWForceIndex_Y] += force_sign * vdw_dir * p_ij_y;
00315       reduction[pairVDWForceIndex_Z] += force_sign * vdw_dir * p_ij_z;
00316       )
00317 
00318 #if ( SHORT(1+) 0 ) // Short-range electrostatics
00319 
00320 #ifndef  A2_QPX
00321       NORMAL(
00322       BigReal fast_d = kqq * table_four_i[8];
00323       BigReal fast_c = kqq * table_four_i[9];
00324       BigReal fast_b = kqq * table_four_i[10];
00325       BigReal fast_a = kqq * table_four_i[11];
00326       )
00327       MODIFIED(
00328       BigReal modfckqq = (1.0-modf_mod) * kqq;
00329       BigReal fast_d = modfckqq * table_four_i[8];
00330       BigReal fast_c = modfckqq * table_four_i[9];
00331       BigReal fast_b = modfckqq * table_four_i[10];
00332       BigReal fast_a = modfckqq * table_four_i[11];
00333       )
00334 #else
00335       vector4double fastv = vec_mul(kqqv, vec_ld(8 * sizeof(BigReal), (BigReal*)table_four_i));
00336 #define fast_d   vec_extract(fastv, 0)
00337 #define fast_c   vec_extract(fastv, 1)
00338 #define fast_b   vec_extract(fastv, 2)
00339 #define fast_a   vec_extract(fastv, 3)
00340 #endif
00341     
00342       {
00343       ENERGY(
00344       register BigReal fast_val =
00345         ( ( diffa * fast_d * (1/6.)+ fast_c * (1/4.)) * diffa + fast_b *(1/2.)) * diffa + fast_a;
00346       NOT_ALCHPAIR (
00347         electEnergy -=  LAM(lambda_pair *) fast_val;
00348         FEP(electEnergy_s -= fast_val;)
00349       )
00350       ) //ENERGY
00351       ALCHPAIR(
00352         ENERGY(electEnergy   -= myElecLambda * fast_val;)
00353         if( Fep_Wham ) {
00354                 if( Fep_ElecOn ) {
00355             FEP(electEnergy_s -= (myElecLambda * fast_val + fast_val);)
00356                 }
00357                 else {  // repulsive or dispersive, no constribution from charges
00358             FEP(electEnergy_s -= (myElecLambda * fast_val);)
00359                 }
00360         }
00361         else{ // the default (orginal) FEP
00362           FEP(electEnergy_s -= myElecLambda2 * fast_val;)
00363         } 
00364         TI(
00365           NOENERGY(register BigReal fast_val = 
00366             ( ( diffa * fast_d * (1/6.)+ fast_c * (1/4.)) * diffa + fast_b *(1/2.)) * diffa + fast_a;)
00367           ALCH1(electEnergy_ti_1) ALCH2(electEnergy_ti_2)  -= fast_val;
00368         )
00369       )
00370 
00371       INT(
00372       register BigReal fast_dir =
00373       ( diffa * fast_d + fast_c ) * diffa + fast_b;
00374       // force_r -= -1.0 * LAM(lambda_pair *) fast_dir;
00375       reduction[pairElectForceIndex_X] +=  force_sign * fast_dir * p_ij_x;
00376       reduction[pairElectForceIndex_Y] +=  force_sign * fast_dir * p_ij_y;
00377       reduction[pairElectForceIndex_Z] +=  force_sign * fast_dir * p_ij_z;
00378       )
00379       }
00380 
00381 
00382      /*****  JE - Go  *****/
00383      // Now Go energy should appear in VDW place -- put vdw_b back into place
00384 #if    ( NORMAL (1+) 0)
00385 #if    ( GO (1+) 0)
00386 
00387 
00388 // JLai
00389 #ifndef CODE_REDUNDANT
00390 #define CODE_REDUNDANT 0
00391 #endif
00392 #if CODE_REDUNDANT
00393        if (ComputeNonbondedUtil::goGroPair) {
00394          // Explicit goGroPair calculation; only calculates goGroPair if goGroPair is turned on
00395          //
00396          // get_gro_force has an internal checklist that sees if atom_i and atom_j are
00397          // in the explicit pairlist.  This is done because there is no guarantee that a 
00398          // processor will have atom_i and atom_j so we cannot loop over the explict atom pairs.
00399          // We can only loop over all pairs.
00400          //
00401          // NOTE: It does not look like fast_b is not normalized by the r vector.
00402          //
00403          // JLai
00404          BigReal groLJe = 0.0;
00405          BigReal groGausse = 0.0;
00406          const CompAtomExt *pExt_z = pExt_1 + j;
00407              BigReal groForce = mol->get_gro_force2(p_ij_x, p_ij_y, p_ij_z,pExt_i.id,pExt_z->id,&groLJe,&groGausse);
00408              NAMD_die("Failsafe.  This line should never be reached\n");
00409 #ifndef A2_QPX
00410              fast_b += groForce;
00411 #else 
00412              vec_insert(fast_b + groForce, fastv, 2);
00413 #endif
00414          ENERGY(
00415              NOT_ALCHPAIR (
00416                  // JLai
00417                  groLJEnergy += groLJe;
00418                  groGaussEnergy += groGausse;
00419                  )
00420              ) //ENERGY
00421        }
00422 #endif 
00423        BigReal goNative = 0;
00424        BigReal goNonnative = 0;
00425        BigReal goForce = 0;
00426        register const CompAtomExt *pExt_j = pExt_1 + j;
00427        if (ComputeNonbondedUtil::goMethod == 2) {
00428          goForce = mol->get_go_force2(p_ij_x, p_ij_y, p_ij_z, pExt_i.id, pExt_j->id,&goNative,&goNonnative);
00429        } else {
00430          //  Ported by JLai -- JE - added (
00431          const BigReal r2go = square(p_ij_x, p_ij_y, p_ij_z);
00432          const BigReal rgo = sqrt(r2go);
00433        
00434          if (ComputeNonbondedUtil::goMethod == 1) {
00435            goForce = mol->get_go_force(rgo, pExt_i.id, pExt_j->id, &goNative, &goNonnative);
00436          } else if (ComputeNonbondedUtil::goMethod == 3) {  
00437            goForce = mol->get_go_force_new(rgo, pExt_i.id, pExt_j->id, &goNative, &goNonnative);
00438          } else {
00439            NAMD_die("I SHOULDN'T BE HERE.  DYING MELODRAMATICALLY.\n");
00440          }
00441        }
00442        
00443 #ifndef A2_QPX
00444        fast_b += goForce;
00445 #else 
00446        vec_insert(fast_b + goForce, fastv, 2);
00447 #endif
00448        {
00449        ENERGY(
00450          NOT_ALCHPAIR (
00451                        // JLai
00452                        goEnergyNative +=  goNative;
00453                        goEnergyNonnative += goNonnative;
00454          )
00455        ) //ENERGY                                                                                                                                                  
00456          INT(
00457            reduction[pairVDWForceIndex_X] +=  force_sign * goForce * p_ij_x;
00458            reduction[pairVDWForceIndex_Y] +=  force_sign * goForce * p_ij_y;
00459            reduction[pairVDWForceIndex_Z] +=  force_sign * goForce * p_ij_z;
00460          )
00461        }
00462        // End of INT 
00463 
00464        //DebugM(3,"rgo:" << rgo << ", pExt_i.id:" << pExt_i.id << ", pExt_j->id:" << pExt_j->id << \
00465          //      ", goForce:" << goForce << ", fast_b:" << fast_b << std::endl);
00466 #endif       //     ) // End of GO macro 
00467        /*****  JE - End Go  *****/
00468        // End of port JL
00469 #endif      //) // End of Normal MACRO
00470 
00471       // Combined short-range electrostatics and VdW force:
00472 #if    (  NOT_ALCHPAIR(1+) 0)
00473 #ifndef A2_QPX
00474         fast_d += vdw_d;
00475         fast_c += vdw_c;
00476         fast_b += vdw_b;
00477         fast_a += vdw_a;  // not used!
00478 #else
00479         fastv = vec_add(fastv, vdw_v);
00480 #endif
00481 #endif
00482 
00483       register BigReal fast_dir =
00484                   (diffa * fast_d + fast_c) * diffa + fast_b;
00485 
00486       BigReal force_r =  LAM(lambda_pair *) fast_dir;
00487       ALCHPAIR(
00488         force_r *= myElecLambda; 
00489         force_r += alch_vdw_force;
00490         // special ALCH forces already multiplied by relevant lambda
00491       )
00492           
00493 #ifndef NAMD_CUDA
00494 #ifndef  A2_QPX
00495       register BigReal tmp_x = force_r * p_ij_x;
00496       f_i_x += tmp_x;
00497       f_j->x -= tmp_x;
00498 
00499       register BigReal tmp_y = force_r * p_ij_y;
00500       f_i_y += tmp_y;
00501       f_j->y -= tmp_y;
00502       
00503       register BigReal tmp_z = force_r * p_ij_z;
00504       f_i_z += tmp_z;
00505       f_j->z -= tmp_z;
00506 #else
00507       vector4double force_rv = vec_splats (force_r);
00508       vector4double tmp_v = vec_mul(force_rv, p_ij_v);
00509       f_i_v = vec_add(f_i_v, tmp_v);
00510 
00511 #define tmp_x   vec_extract(tmp_v, 0)
00512 #define tmp_y   vec_extract(tmp_v, 1)
00513 #define tmp_z   vec_extract(tmp_v, 2)
00514 
00515       f_j->x -= tmp_x;
00516       f_j->y -= tmp_y;
00517       f_j->z -= tmp_z;
00518 #endif
00519 
00520       PPROF(
00521         const BigReal p_j_z = p_j->position.z;
00522         int n2 = (int)floor((p_j_z-pressureProfileMin)*invThickness);
00523         pp_clamp(n2, pressureProfileSlabs);
00524         int p_j_partition = p_j->partition;
00525 
00526         pp_reduction(pressureProfileSlabs, n1, n2, 
00527                      p_i_partition, p_j_partition, pressureProfileAtomTypes,
00528                      tmp_x*p_ij_x, tmp_y * p_ij_y, tmp_z*p_ij_z,
00529                      pressureProfileReduction);
00530       )
00531 #endif
00532 
00533 #endif // SHORT
00534 #endif // FAST
00535 
00536 #if ( FULL (EXCLUDED( SHORT ( 1+ ) ) ) 0 ) 
00537       //const BigReal* const slow_i = slow_table + 4*table_i;
00538 #define slow_i (slow_table + 4*table_i)
00539 
00540 #ifdef ARCH_POWERPC  //Alignment and aliasing constraints
00541       __alignx (32, slow_table);
00542 #if ( SHORT( FAST( 1+ ) ) 0 ) 
00543 #pragma disjoint (*slow_table, *f_1)
00544 #endif
00545 #pragma disjoint (*slow_table, *fullf_1)
00546 #endif  //ARCH_POWERPC
00547 
00548 #endif //FULL 
00549 
00550 
00551 #if ( FULL (MODIFIED( SHORT ( 1+ ) ) ) 0 ) 
00552       //const BigReal* const slow_i = slow_table + 4*table_i;
00553 #define slow_i (slow_table + 4*table_i)
00554 
00555 #ifdef ARCH_POWERPC //Alignment and aliasing constraints
00556       __alignx (32, slow_table);
00557 #if ( SHORT( FAST( 1+ ) ) 0 ) 
00558 #pragma disjoint (*slow_table, *f_1)
00559 #endif
00560 #pragma disjoint (*slow_table, *fullf_1)
00561 #endif //ARCH_POWERPC
00562 
00563 #endif //FULL
00564       
00565 #if ( FULL( 1+ ) 0 )
00566 #ifndef  A2_QPX
00567       BigReal slow_d = table_four_i[8 SHORT(+ 4)];
00568       BigReal slow_c = table_four_i[9 SHORT(+ 4)];
00569       BigReal slow_b = table_four_i[10 SHORT(+ 4)];
00570       BigReal slow_a = table_four_i[11 SHORT(+ 4)];
00571       EXCLUDED(
00572       SHORT(
00573       slow_a +=    slow_i[3];
00574       slow_b += 2.*slow_i[2];
00575       slow_c += 4.*slow_i[1];
00576       slow_d += 6.*slow_i[0];
00577       )
00578       NOSHORT(
00579       slow_d -= table_four_i[12];
00580       slow_c -= table_four_i[13];
00581       slow_b -= table_four_i[14];
00582       slow_a -= table_four_i[15];
00583       )
00584       )
00585       MODIFIED(
00586       SHORT(
00587       slow_a +=    modf_mod * slow_i[3];
00588       slow_b += 2.*modf_mod * slow_i[2];
00589       slow_c += 4.*modf_mod * slow_i[1];
00590       slow_d += 6.*modf_mod * slow_i[0];
00591       )
00592       NOSHORT(
00593       slow_d -= modf_mod * table_four_i[12];
00594       slow_c -= modf_mod * table_four_i[13];
00595       slow_b -= modf_mod * table_four_i[14];
00596       slow_a -= modf_mod * table_four_i[15];
00597       )
00598       )
00599       slow_d *= kqq;
00600       slow_c *= kqq;
00601       slow_b *= kqq;
00602       slow_a *= kqq;
00603 #else
00604       vector4double slow_v = vec_ld((8 SHORT(+ 4)) * sizeof(BigReal), (BigReal*)table_four_i);
00605       EXCLUDED(
00606                SHORT(
00607                      slow_v = vec_madd(full_cnst, vec_ld(0, (BigReal*)slow_i), slow_v);
00608                      )
00609                NOSHORT(
00610                        slow_v = vec_sub(slow_v, vec_ld(12*sizeof(BigReal), (BigReal*)table_four_i));
00611                        )
00612                );
00613       MODIFIED(
00614                SHORT(
00615                      slow_v = vec_madd(full_cnst,  vec_ld(0, (BigReal*)slow_i), slow_v);
00616                      )
00617                NOSHORT(
00618                        slow_v = vec_nmsub(full_cnst, vec_ld(12*sizeof(BigReal), (BigReal*)table_four_i), slow_v);
00619                        )
00620                );
00621       slow_v = vec_mul (slow_v, vec_splats(kqq));
00622 
00623 #define slow_d   vec_extract(slow_v, 0)
00624 #define slow_c   vec_extract(slow_v, 1)
00625 #define slow_b   vec_extract(slow_v, 2)
00626 #define slow_a   vec_extract(slow_v, 3)
00627 
00628 #endif
00629 
00630       ENERGY(
00631       register BigReal slow_val =
00632         ( ( diffa * slow_d *(1/6.)+ slow_c * (1/4.)) * diffa + slow_b *(1/2.)) * diffa + slow_a;
00633       
00634       NOT_ALCHPAIR (
00635         fullElectEnergy -= LAM(lambda_pair *) slow_val;
00636         FEP(fullElectEnergy_s -= slow_val;) 
00637       )
00638       ) // ENERGY
00639           
00640       ALCHPAIR(
00641         ENERGY(fullElectEnergy   -= myElecLambda * slow_val;)
00642         if( Fep_Wham ) {
00643                 if(Fep_ElecOn)  {
00644                         FEP(fullElectEnergy_s -= (myElecLambda * slow_val + slow_val);)
00645                 }
00646                 else    {
00647             FEP(fullElectEnergy_s -= myElecLambda * slow_val;)
00648                 }
00649         }
00650         else{ // orignal FEP
00651           FEP(fullElectEnergy_s -= myElecLambda2 * slow_val;)
00652         }
00653         TI(
00654           NOENERGY(register BigReal slow_val =
00655                 ( ( diffa * slow_d *(1/6.)+ slow_c * (1/4.)) * diffa + slow_b *(1/2.)) * diffa + slow_a;)
00656           ALCH1(fullElectEnergy_ti_1) ALCH2(fullElectEnergy_ti_2) -= slow_val;
00657         )
00658       )
00659 
00660       INT( {
00661       register BigReal slow_dir =
00662         ( diffa * slow_d + slow_c ) * diffa + slow_b;
00663       reduction[pairElectForceIndex_X] += force_sign * slow_dir * p_ij_x;
00664       reduction[pairElectForceIndex_Y] += force_sign * slow_dir * p_ij_y;
00665       reduction[pairElectForceIndex_Z] += force_sign * slow_dir * p_ij_z;
00666       } )
00667 
00668 
00669 #if     (NOT_ALCHPAIR (1+) 0)
00670 #if     (FAST(1+) 0)
00671 #if     (NOSHORT(1+) 0)
00672 #ifndef A2_QPX
00673         slow_d += vdw_d;
00674         slow_c += vdw_c;
00675         slow_b += vdw_b;
00676         slow_a += vdw_a; // unused!
00677 #else
00678         slow_v = vec_add (slow_v, vdw_v);
00679 #endif
00680 #endif
00681 #endif
00682 #endif
00683 
00684       register BigReal slow_dir = (diffa * slow_d + slow_c) * diffa + slow_b;
00685       BigReal fullforce_r = slow_dir LAM(* lambda_pair);
00686       ALCHPAIR (
00687         fullforce_r *= myElecLambda;
00688         FAST( NOSHORT(
00689           fullforce_r += alch_vdw_force;
00690         ))
00691       )
00692           
00693 #ifndef NAMD_CUDA
00694       {
00695 #ifndef  A2_QPX
00696       register BigReal ftmp_x = fullforce_r * p_ij_x;
00697       fullf_i_x += ftmp_x;
00698       fullf_j->x -= ftmp_x;
00699       register BigReal ftmp_y = fullforce_r * p_ij_y;
00700       fullf_i_y += ftmp_y;
00701       fullf_j->y -= ftmp_y;
00702       register BigReal ftmp_z = fullforce_r * p_ij_z;
00703       fullf_i_z += ftmp_z;
00704       fullf_j->z -= ftmp_z;
00705 #else
00706       vector4double fforce_rv = vec_splats (fullforce_r);
00707       vector4double ftmp_v = vec_mul(fforce_rv, p_ij_v);
00708       fullf_i_v = vec_add(fullf_i_v, ftmp_v);
00709       
00710 #define ftmp_x  vec_extract(ftmp_v, 0)
00711 #define ftmp_y  vec_extract(ftmp_v, 1)
00712 #define ftmp_z  vec_extract(ftmp_v, 2)
00713 
00714       fullf_j->x -= ftmp_x;
00715       fullf_j->y -= ftmp_y;
00716       fullf_j->z -= ftmp_z;
00717 
00718 #endif
00719 
00720       PPROF(
00721         const BigReal p_j_z = p_j->position.z;
00722         int n2 = (int)floor((p_j_z-pressureProfileMin)*invThickness);
00723         pp_clamp(n2, pressureProfileSlabs);
00724         int p_j_partition = p_j->partition;
00725 
00726         pp_reduction(pressureProfileSlabs, n1, n2, 
00727                      p_i_partition, p_j_partition, pressureProfileAtomTypes,
00728                      ftmp_x*p_ij_x, ftmp_y * p_ij_y, ftmp_z*p_ij_z,
00729                      pressureProfileReduction);
00730 
00731       )
00732       }
00733 #endif
00734 #endif //FULL
00735 
00736    } // for pairlist
00737 
00738 #undef p_j
00739 #undef lj_pars
00740 #undef table_four_i
00741 #undef slow_i
00742 #undef f_j
00743 #undef fullf_j
00744 

Generated on Tue Nov 21 01:17:11 2017 for NAMD by  doxygen 1.4.7