Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

ComputeNonbondedBase2.h

Go to the documentation of this file.
00001 
00007 EXCLUDED( FAST( foo bar ) )
00008 EXCLUDED( MODIFIED( foo bar ) )
00009 EXCLUDED( NORMAL( foo bar ) )
00010 NORMAL( MODIFIED( foo bar ) )
00011 ALCHPAIR( NOT_ALCHPAIR( foo bar ) )
00012 
00013 ALCHPAIR(
00014   // get alchemical nonbonded scaling parameters (once per pairlist)
00015   myLambda = ALCH1(lambdaUp) ALCH2(lambdaDown);
00016   FEP(myLambda2 = ALCH1(lambda2Up) ALCH2(lambda2Down);)
00017   myElecLambda =  ALCH1(elecLambdaUp) ALCH2(elecLambdaDown); 
00018   FEP(myElecLambda2 = ALCH1(elecLambda2Up) ALCH2(elecLambda2Down);)
00019   myVdwLambda =  ALCH1(vdwLambdaUp) ALCH2(vdwLambdaDown); 
00020   FEP(myVdwLambda2 = ALCH1(vdwLambda2Up) ALCH2(vdwLambda2Down);) 
00021   myVdwShift =  ALCH1(vdwShiftUp) ALCH2(vdwShiftDown); 
00022   FEP(myVdwShift2 =  ALCH1(vdwShift2Up) ALCH2(vdwShift2Down);) 
00023 )
00024 
00025 #ifdef  A2_QPX
00026 #if ( SHORT(1+) 0 )
00027 NORMAL(kq_iv    = vec_splats(kq_i); )
00028 MODIFIED(kq_iv    = vec_splats((1.0-modf_mod) *kq_i); )
00029 #endif
00030 
00031 #if ( FULL( 1+ ) 0 )
00032 EXCLUDED(
00033          SHORT(
00034                full_cnst = (vector4double)(6., 4., 2., 1.);
00035                ) 
00036          NOSHORT(
00037                  full_cnst = (vector4double)(1., 1., 1., 1.);
00038                  )
00039          )
00040 MODIFIED(
00041          SHORT(
00042                full_cnst = (vector4double)(6., 4., 2., 1.);
00043                full_cnst = vec_mul (full_cnst, vec_splats(modf_mod));          
00044                ) 
00045          NOSHORT(
00046                  full_cnst = vec_splats(modf_mod);
00047                  )
00048          )      
00049 #endif
00050 #endif
00051 
00052 #ifdef ARCH_POWERPC
00053      __alignx(64, table_four);
00054      __alignx(32, p_1);
00055 #pragma unroll(1)
00056 #pragma ibm independent_loop
00057 #endif
00058 
00059 #ifndef ARCH_POWERPC
00060 #pragma ivdep
00061 #endif
00062 
00063 
00064   
00065     for (k=0; k<npairi; ++k) {      
00066       TABENERGY(
00067       const int numtypes = simParams->tableNumTypes;
00068       const float table_spacing = simParams->tableSpacing;
00069       const int npertype = (int) (mynearbyint(simParams->tableMaxDist / simParams->tableSpacing) + 1);
00070       )
00071 
00072       int table_i = (r2iilist[2*k] >> 14) + r2_delta_expc;  // table_i >= 0 
00073       const int j = pairlisti[k];
00074       register const CompAtom *p_j = p_1 + j;
00075 #ifdef  A2_QPX
00076       register double *p_j_d = (double *) p_j;
00077 #endif
00078       // const CompAtomExt *pExt_j = pExt_1 + j;
00079       
00080       BigReal diffa = r2list[k] - r2_table[table_i];
00081 #ifdef  A2_QPX
00082       BigReal* table_four_i = (BigReal *)(table_four + 16*table_i);
00083 #else
00084       const BigReal* const table_four_i = table_four + 16*table_i;
00085 #endif
00086 
00087 #if  ( FAST( 1 + ) TABENERGY( 1 + ) 0 ) // FAST or TABENERGY
00088       const LJTable::TableEntry * lj_pars = 
00089               lj_row + 2 * p_j->vdwType MODIFIED(+ 1);
00090 #ifdef  A2_QPX
00091       double *lj_pars_d = (double *) lj_pars;
00092 #endif
00093 #endif
00094       
00095       TABENERGY(
00096       register const int tabtype = -1 - ( lj_pars->A < 0 ? lj_pars->A : 0 );
00097       )
00098       
00099 #if ( SHORT( FAST( 1+ ) ) 0 ) 
00100       Force *f_j = f_1 + j;
00101 #endif
00102         
00103 #if ( FULL( 1+ ) 0 )
00104       Force *fullf_j = fullf_1 + j;
00105 #endif
00106 
00107       //Power PC aliasing and alignment constraints
00108 #ifdef ARCH_POWERPC      
00109 #if ( FULL( 1+ ) 0 )
00110 #pragma disjoint (*table_four_i, *fullf_j)
00111 #pragma disjoint (*p_j,          *fullf_j)
00112 #ifdef  A2_QPX
00113 #pragma disjoint (*p_j_d,        *fullf_j)
00114 #endif
00115 #pragma disjoint (*r2_table,     *fullf_j)
00116 #pragma disjoint (*r2list,       *fullf_j)
00117 #if ( SHORT( FAST( 1+ ) ) 0 ) 
00118 #pragma disjoint (*f_j    ,      *fullf_j)
00119 #pragma disjoint (*fullf_j,      *f_j)
00120 #endif   //Short + fast
00121 #endif   //Full
00122 
00123 #if ( SHORT( FAST( 1+ ) ) 0 ) 
00124 #pragma disjoint (*table_four_i, *f_j)
00125 #pragma disjoint (*p_j,          *f_j)
00126 #pragma disjoint (*r2_table,     *f_j)
00127 #pragma disjoint (*r2list,       *f_j)
00128 #pragma disjoint (*lj_pars,      *f_j)
00129 #ifdef  A2_QPX
00130 #pragma disjoint (*p_j_d,        *f_j)
00131 #endif
00132 #endif //Short + Fast
00133 
00134       __alignx(64, table_four_i);
00135       FAST (
00136       __alignx(32, lj_pars);
00137       )
00138       __alignx(32, p_j);
00139 #endif   //ARCH_POWERPC
00140 
00141       /*
00142       BigReal modf = 0.0;
00143       int atom2 = p_j->id;
00144       register char excl_flag = ( (atom2 >= excl_min && atom2 <= excl_max) ?
00145                                         excl_flags[atom2-excl_min] : 0 );
00146       if ( excl_flag ) { ++exclChecksum; }
00147       SELF( if ( j < j_hgroup ) { excl_flag = EXCHCK_FULL; } )
00148       if ( excl_flag ) {
00149         if ( excl_flag == EXCHCK_FULL ) {
00150           lj_pars = lj_null_pars;
00151           modf = 1.0;
00152         } else {
00153           ++lj_pars;
00154           modf = modf_mod;
00155         }
00156       }
00157       */
00158 
00159       BigReal kqq = kq_i * p_j->charge;
00160 
00161 #ifdef  A2_QPX
00162       float * cg = (float *)&p_j->charge;
00163 #if ( FULL( 1+ ) 0 )
00164 #pragma disjoint (*cg, *fullf_j)
00165 #endif   //Full
00166 
00167 #if ( SHORT( FAST( 1+ ) ) 0 ) 
00168 #pragma disjoint (*cg, *f_j)
00169 #endif   //Short + fast
00170 #endif
00171 
00172       LES( BigReal lambda_pair = lambda_table_i[p_j->partition]; )
00173 
00174 #ifndef  A2_QPX
00175       register const BigReal p_ij_x = p_i_x - p_j->position.x;
00176       register const BigReal p_ij_y = p_i_y - p_j->position.y;
00177       register const BigReal p_ij_z = p_i_z - p_j->position.z;
00178 #else
00179       vector4double charge_v = vec_lds(0, cg);
00180       vector4double kqqv = vec_mul(kq_iv, charge_v );
00181       vector4double p_ij_v = vec_sub(p_i_v, vec_ld (0, p_j_d));
00182 #define p_ij_x     vec_extract(p_i_v, 0)
00183 #define p_ij_y     vec_extract(p_i_v, 1)
00184 #define p_ij_z     vec_extract(p_i_v, 2)
00185 #endif  
00186 
00187 #if ( FAST(1+) 0 )
00188       const BigReal A = scaling * lj_pars->A;
00189       const BigReal B = scaling * lj_pars->B;
00190 #ifndef  A2_QPX
00191       BigReal vdw_d = A * table_four_i[0] - B * table_four_i[4];
00192       BigReal vdw_c = A * table_four_i[1] - B * table_four_i[5];
00193       BigReal vdw_b = A * table_four_i[2] - B * table_four_i[6];
00194       BigReal vdw_a = A * table_four_i[3] - B * table_four_i[7];
00195 #else
00196       const vector4double Av = vec_mul(scalingv, vec_lds(0, lj_pars_d));
00197       const vector4double Bv = vec_mul(scalingv, vec_lds(8, lj_pars_d));
00198       vector4double vdw_v = vec_msub( Av, vec_ld(0, table_four_i), vec_mul(Bv, vec_ld(4*sizeof(BigReal), table_four_i)) );
00199 #define   vdw_d  vec_extract(vdw_v, 0)
00200 #define   vdw_c  vec_extract(vdw_v, 1)
00201 #define   vdw_b  vec_extract(vdw_v, 2)
00202 #define   vdw_a  vec_extract(vdw_v, 3)
00203 #endif
00204 
00205       ALCHPAIR (
00206         // Alchemical free energy calculation
00207         // Pairlists are separated so that lambda-coupled pairs are handled
00208         // independently from normal nonbonded (inside ALCHPAIR macro).
00209         // The separation-shifted van der Waals potential and a shifted 
00210         // electrostatics potential for decoupling are calculated explicitly.
00211         // Would be faster with lookup tables but because only a small minority
00212         // of nonbonded pairs are lambda-coupled the impact is minimal. 
00213         // Explicit calculation also makes things easier to modify.
00214         
00215         const BigReal r2 = r2list[k] - r2_delta;
00216 
00217         // These are now inline functions (in ComputeNonbondedFep.C) to 
00218         // tidy the code
00219 
00220         FEP(fep_vdw_forceandenergies(A,B,r2,myVdwShift,myVdwShift2,switchdist2,
00221           cutoff2, myVdwLambda, myVdwLambda2, Fep_WCA_repuOn, Fep_WCA_dispOn, Fep_Wham,
00222           WCA_rcut1, WCA_rcut2, WCA_rcut3, switchfactor, &alch_vdw_energy, 
00223           &alch_vdw_force,&alch_vdw_energy_2,&alch_vdw_energy_2_Left);)
00224         TI(ti_vdw_force_energy_dUdl(A,B,r2,myVdwShift,switchdist2,
00225           cutoff2, myVdwLambda, alchVdwShiftCoeff, switchfactor, 
00226           &alch_vdw_energy, &alch_vdw_force, &alch_vdw_dUdl);)
00227       )
00228       
00229         //NOT_ALCHPAIR(
00230         //TABENERGY(
00231 #if (NOT_ALCHPAIR(1+) 0)
00232 #if (TABENERGY(1+) 0)
00233         if (tabtype >= 0) {
00234           register BigReal r1;
00235           r1 = sqrt(p_ij_x*p_ij_x + p_ij_y*p_ij_y + p_ij_z*p_ij_z);
00236 
00237           //CkPrintf("%i %i %f %f %i\n", npertype, tabtype, r1, table_spacing, (int) (mynearbyint(r1 / table_spacing)));
00238           register int eneraddress;
00239           eneraddress = 2 * ((npertype * tabtype) + ((int) mynearbyint(r1 / table_spacing)));
00240           //CkPrintf("Using distance bin %i for distance %f\n", eneraddress, r1);
00241 #ifndef A2_QPX
00242           vdw_d = 0.;
00243           vdw_c = 0.;
00244           vdw_b = table_ener[eneraddress + 1] / r1;
00245           vdw_a = (-1/2.) * diffa * vdw_b;
00246 #else
00247           vec_insert(0.,                               vdw_v, 0);
00248           vec_insert(0.,                               vdw_v, 1);
00249           vec_insert(table_ener[eneraddress + 1] / r1, vdw_v, 2);
00250           vec_insert((-1/2.) * diffa * vdw_b,          vdw_v, 3);
00251 #endif
00252           ENERGY(
00253             register BigReal vdw_val = table_ener[eneraddress];
00254             //CkPrintf("Found vdw energy of %f\n", vdw_val);
00255             vdwEnergy += LAM(lambda_pair *) vdw_val;
00256             FEP( vdwEnergy_s += d_lambda_pair * vdw_val; )
00257             FEP( vdwEnergy_s_Left += d_lambda_pair * vdw_val; )
00258           )
00259         }  else {
00260           //)
00261 #endif
00262       ENERGY(
00263         register BigReal vdw_val =
00264           ( ( diffa * vdw_d * (1/6.)+ vdw_c * (1/4.)) * diffa + vdw_b *(1/2.)) * diffa + vdw_a;
00265 
00266         vdwEnergy -= LAM(lambda_pair *) vdw_val;
00267 
00268         FEP(vdwEnergy_s -= vdw_val;)
00269         FEP(vdwEnergy_s_Left -= vdw_val;)
00270       )
00271         //TABENERGY( } ) /* endif (tabtype >= 0) */
00272 #if (TABENERGY (1+) 0)
00273         }
00274 #endif
00275       //) // NOT_ALCHPAIR
00276 #endif
00277 
00278       ALCHPAIR(
00279         ENERGY(vdwEnergy   += alch_vdw_energy;)
00280         FEP(vdwEnergy_s += alch_vdw_energy_2;)
00281         FEP(vdwEnergy_s_Left += alch_vdw_energy_2_Left;)
00282         TI(ALCH1(vdwEnergy_ti_1) ALCH2(vdwEnergy_ti_2) += alch_vdw_dUdl;)
00283       ) // ALCHPAIR
00284       
00285 #endif // FAST
00286 
00287 #if ( FAST(1+) 0 )
00288      INT( 
00289       register BigReal vdw_dir;
00290       vdw_dir = ( diffa * vdw_d + vdw_c ) * diffa + vdw_b;
00291       //BigReal force_r =  LAM(lambda_pair *) vdw_dir;
00292       reduction[pairVDWForceIndex_X] += force_sign * vdw_dir * p_ij_x;
00293       reduction[pairVDWForceIndex_Y] += force_sign * vdw_dir * p_ij_y;
00294       reduction[pairVDWForceIndex_Z] += force_sign * vdw_dir * p_ij_z;
00295       )
00296 
00297 #if ( SHORT(1+) 0 ) // Short-range electrostatics
00298 
00299 #ifndef  A2_QPX
00300       NORMAL(
00301       BigReal fast_d = kqq * table_four_i[8];
00302       BigReal fast_c = kqq * table_four_i[9];
00303       BigReal fast_b = kqq * table_four_i[10];
00304       BigReal fast_a = kqq * table_four_i[11];
00305       )
00306       MODIFIED(
00307       BigReal modfckqq = (1.0-modf_mod) * kqq;
00308       BigReal fast_d = modfckqq * table_four_i[8];
00309       BigReal fast_c = modfckqq * table_four_i[9];
00310       BigReal fast_b = modfckqq * table_four_i[10];
00311       BigReal fast_a = modfckqq * table_four_i[11];
00312       )
00313 #else
00314       vector4double fastv = vec_mul(kqqv, vec_ld(8 * sizeof(BigReal), table_four_i));
00315 #define fast_d   vec_extract(fastv, 0)
00316 #define fast_c   vec_extract(fastv, 1)
00317 #define fast_b   vec_extract(fastv, 2)
00318 #define fast_a   vec_extract(fastv, 3)
00319 #endif
00320     
00321       {
00322       ENERGY(
00323       register BigReal fast_val =
00324         ( ( diffa * fast_d * (1/6.)+ fast_c * (1/4.)) * diffa + fast_b *(1/2.)) * diffa + fast_a;
00325       NOT_ALCHPAIR (
00326         electEnergy -=  LAM(lambda_pair *) fast_val;
00327         FEP(electEnergy_s -= fast_val;)
00328       )
00329       ) //ENERGY
00330       ALCHPAIR(
00331         ENERGY(electEnergy   -= myElecLambda * fast_val;)
00332         if( Fep_Wham ) {
00333                 if( Fep_ElecOn ) {
00334             FEP(electEnergy_s -= (myElecLambda * fast_val + fast_val);)
00335                 }
00336                 else {  // repulsive or dispersive, no constribution from charges
00337             FEP(electEnergy_s -= (myElecLambda * fast_val);)
00338                 }
00339         }
00340         else{ // the default (orginal) FEP
00341           FEP(electEnergy_s -= myElecLambda2 * fast_val;)
00342         } 
00343         TI(
00344           NOENERGY(register BigReal fast_val = 
00345             ( ( diffa * fast_d * (1/6.)+ fast_c * (1/4.)) * diffa + fast_b *(1/2.)) * diffa + fast_a;)
00346           ALCH1(electEnergy_ti_1) ALCH2(electEnergy_ti_2)  -= fast_val;
00347         )
00348       )
00349 
00350       INT(
00351       register BigReal fast_dir =
00352       ( diffa * fast_d + fast_c ) * diffa + fast_b;
00353       // force_r -= -1.0 * LAM(lambda_pair *) fast_dir;
00354       reduction[pairElectForceIndex_X] +=  force_sign * fast_dir * p_ij_x;
00355       reduction[pairElectForceIndex_Y] +=  force_sign * fast_dir * p_ij_y;
00356       reduction[pairElectForceIndex_Z] +=  force_sign * fast_dir * p_ij_z;
00357       )
00358       }
00359 
00360 
00361      /*****  JE - Go  *****/
00362      // Now Go energy should appear in VDW place -- put vdw_b back into place
00363 #if    ( NORMAL (1+) 0)
00364 #if    ( GO (1+) 0)
00365 
00366        if (ComputeNonbondedUtil::goGroPair) {
00367          // Explicit goGroPair calculation; only calculates goGroPair if goGroPair is turned on
00368          //
00369          // get_gro_force has an internal checklist that sees if atom_i and atom_j are
00370          // in the explicit pairlist.  This is done because there is no guarantee that a 
00371          // processor will have atom_i and atom_j so we cannot loop over the explict atom pairs.
00372          // We can only loop over all pairs.
00373          //
00374          // NOTE: It does not look like fast_b is not normalized by the r vector.
00375          //
00376          // JLai
00377          BigReal groLJe = 0.0;
00378          BigReal groGausse = 0.0;
00379          const CompAtomExt *pExt_z = pExt_1 + j;
00380              BigReal groForce = mol->get_gro_force2(p_ij_x, p_ij_y, p_ij_z,pExt_i.id,pExt_z->id,&groLJe,&groGausse);
00381 #ifndef A2_QPX
00382              fast_b += groForce;
00383 #else 
00384              vec_insert(fast_b + groForce, fastv, 2);
00385 #endif
00386          ENERGY(
00387              NOT_ALCHPAIR (
00388                  // JLai
00389                  groLJEnergy += groLJe;
00390                  groGaussEnergy += groGausse;
00391                  )
00392              ) //ENERGY
00393        }
00394        BigReal goNative = 0;
00395        BigReal goNonnative = 0;
00396        BigReal goForce = 0;
00397        register const CompAtomExt *pExt_j = pExt_1 + j;
00398        if (ComputeNonbondedUtil::goMethod == 2) {
00399          goForce = mol->get_go_force2(p_ij_x, p_ij_y, p_ij_z, pExt_i.id, pExt_j->id,&goNative,&goNonnative);
00400        } else {
00401          //  Ported by JLai -- JE - added (
00402          const BigReal r2go = square(p_ij_x, p_ij_y, p_ij_z);
00403          const BigReal rgo = sqrt(r2go);
00404        
00405          if (ComputeNonbondedUtil::goMethod == 1) {
00406            goForce = mol->get_go_force(rgo, pExt_i.id, pExt_j->id, &goNative, &goNonnative);
00407          } else if (ComputeNonbondedUtil::goMethod == 3) {  
00408            goForce = mol->get_go_force_new(rgo, pExt_i.id, pExt_j->id, &goNative, &goNonnative);
00409          } else {
00410            NAMD_die("I SHOULDN'T BE HERE.  DYING MELODRAMATICALLY.\n");
00411          }
00412        }
00413        
00414 #ifndef A2_QPX
00415        fast_b += goForce;
00416 #else 
00417        vec_insert(fast_b + goForce, fastv, 2);
00418 #endif
00419        {
00420        ENERGY(
00421          NOT_ALCHPAIR (
00422                        // JLai
00423                        goEnergyNative +=  goNative;
00424                        goEnergyNonnative += goNonnative;
00425          )
00426        ) //ENERGY                                                                                                                                                  
00427          INT(
00428            reduction[pairVDWForceIndex_X] +=  force_sign * goForce * p_ij_x;
00429            reduction[pairVDWForceIndex_Y] +=  force_sign * goForce * p_ij_y;
00430            reduction[pairVDWForceIndex_Z] +=  force_sign * goForce * p_ij_z;
00431          )
00432        }
00433        // End of INT 
00434 
00435        //DebugM(3,"rgo:" << rgo << ", pExt_i.id:" << pExt_i.id << ", pExt_j->id:" << pExt_j->id << \
00436          //      ", goForce:" << goForce << ", fast_b:" << fast_b << std::endl);
00437 #endif       //     ) // End of GO macro 
00438        /*****  JE - End Go  *****/
00439        // End of port JL
00440 #endif      //) // End of Normal MACRO
00441 
00442       // Combined short-range electrostatics and VdW force:
00443 #if    (  NOT_ALCHPAIR(1+) 0)
00444 #ifndef A2_QPX
00445         fast_d += vdw_d;
00446         fast_c += vdw_c;
00447         fast_b += vdw_b;
00448         fast_a += vdw_a;  // not used!
00449 #else
00450         fastv = vec_add(fastv, vdw_v);
00451 #endif
00452 #endif
00453 
00454       register BigReal fast_dir =
00455                   (diffa * fast_d + fast_c) * diffa + fast_b;
00456 
00457       BigReal force_r =  LAM(lambda_pair *) fast_dir;
00458       ALCHPAIR(
00459         force_r *= myElecLambda; 
00460         force_r += alch_vdw_force;
00461         // special ALCH forces already multiplied by relevant lambda
00462       )
00463           
00464 #ifndef NAMD_CUDA
00465 #ifndef  A2_QPX
00466       register BigReal tmp_x = force_r * p_ij_x;
00467       PAIR( virial_xx += tmp_x * p_ij_x; )
00468       PAIR( virial_xy += tmp_x * p_ij_y; )
00469       PAIR( virial_xz += tmp_x * p_ij_z; )
00470 
00471       f_i_x += tmp_x;
00472       f_j->x -= tmp_x;
00473 
00474       register BigReal tmp_y = force_r * p_ij_y;
00475       PAIR( virial_yy += tmp_y * p_ij_y; )
00476       PAIR( virial_yz += tmp_y * p_ij_z; )
00477       f_i_y += tmp_y;
00478       f_j->y -= tmp_y;
00479       
00480       register BigReal tmp_z = force_r * p_ij_z;
00481       PAIR( virial_zz += tmp_z * p_ij_z; )
00482       f_i_z += tmp_z;
00483       f_j->z -= tmp_z;
00484 #else
00485       vector4double force_rv = vec_splats (force_r);
00486       vector4double tmp_v = vec_mul(force_rv, p_ij_v);
00487       f_i_v = vec_add(f_i_v, tmp_v);
00488 
00489       PAIR (
00490             vector4double tmp_xv = vec_splat(tmp_v, 0);
00491             vector4double tmp_yv = vec_splat(tmp_v, 1);
00492             virial_v0 = vec_madd(tmp_xv, p_ij_v, virial_v0);
00493             virial_v1 = vec_madd(tmp_yv, p_ij_v, virial_v1);
00494             virial_v2 = vec_madd(tmp_v,  p_ij_v, virial_v2);
00495             )
00496 
00497 #define tmp_x   vec_extract(tmp_v, 0)
00498 #define tmp_y   vec_extract(tmp_v, 1)
00499 #define tmp_z   vec_extract(tmp_v, 2)
00500 
00501       f_j->x -= tmp_x;
00502       f_j->y -= tmp_y;
00503       f_j->z -= tmp_z;
00504 #endif
00505 
00506       PPROF(
00507         const BigReal p_j_z = p_j->position.z;
00508         int n2 = (int)floor((p_j_z-pressureProfileMin)*invThickness);
00509         pp_clamp(n2, pressureProfileSlabs);
00510         int p_j_partition = p_j->partition;
00511 
00512         pp_reduction(pressureProfileSlabs, n1, n2, 
00513                      p_i_partition, p_j_partition, pressureProfileAtomTypes,
00514                      tmp_x*p_ij_x, tmp_y * p_ij_y, tmp_z*p_ij_z,
00515                      pressureProfileReduction);
00516       )
00517 #endif
00518 
00519 #endif // SHORT
00520 #endif // FAST
00521 
00522 #if ( FULL (EXCLUDED( SHORT ( 1+ ) ) ) 0 ) 
00523 #ifndef A2_QPX
00524       const BigReal* const slow_i = slow_table + 4*table_i;
00525 #else
00526       BigReal* slow_i = (BigReal *)(slow_table + 4*table_i);
00527 #endif
00528 
00529 #ifdef ARCH_POWERPC  //Alignment and aliasing constraints
00530       __alignx (32, slow_i);
00531 #if ( SHORT( FAST( 1+ ) ) 0 ) 
00532 #pragma disjoint (*slow_i, *f_j)
00533 #endif
00534 #pragma disjoint (*slow_i, *fullf_j)
00535 #endif  //ARCH_POWERPC
00536 
00537 #endif //FULL 
00538 
00539 
00540 #if ( FULL (MODIFIED( SHORT ( 1+ ) ) ) 0 ) 
00541 #ifndef A2_QPX
00542       const BigReal* const slow_i = slow_table + 4*table_i;
00543 #else
00544       BigReal* slow_i = (BigReal *)(slow_table + 4*table_i);
00545 #endif
00546 
00547 #ifdef ARCH_POWERPC //Alignment and aliasing constraints
00548       __alignx (32, slow_i);
00549 #if ( SHORT( FAST( 1+ ) ) 0 ) 
00550 #pragma disjoint (*slow_i, *f_j)
00551 #endif
00552 #pragma disjoint (*slow_i, *fullf_j)
00553 #endif //ARCH_POWERPC
00554 
00555 #endif //FULL
00556       
00557 #if ( FULL( 1+ ) 0 )
00558 #ifndef  A2_QPX
00559       BigReal slow_d = table_four_i[8 SHORT(+ 4)];
00560       BigReal slow_c = table_four_i[9 SHORT(+ 4)];
00561       BigReal slow_b = table_four_i[10 SHORT(+ 4)];
00562       BigReal slow_a = table_four_i[11 SHORT(+ 4)];
00563       EXCLUDED(
00564       SHORT(
00565       slow_a +=    slow_i[3];
00566       slow_b += 2.*slow_i[2];
00567       slow_c += 4.*slow_i[1];
00568       slow_d += 6.*slow_i[0];
00569       )
00570       NOSHORT(
00571       slow_d -= table_four_i[12];
00572       slow_c -= table_four_i[13];
00573       slow_b -= table_four_i[14];
00574       slow_a -= table_four_i[15];
00575       )
00576       )
00577       MODIFIED(
00578       SHORT(
00579       slow_a +=    modf_mod * slow_i[3];
00580       slow_b += 2.*modf_mod * slow_i[2];
00581       slow_c += 4.*modf_mod * slow_i[1];
00582       slow_d += 6.*modf_mod * slow_i[0];
00583       )
00584       NOSHORT(
00585       slow_d -= modf_mod * table_four_i[12];
00586       slow_c -= modf_mod * table_four_i[13];
00587       slow_b -= modf_mod * table_four_i[14];
00588       slow_a -= modf_mod * table_four_i[15];
00589       )
00590       )
00591       slow_d *= kqq;
00592       slow_c *= kqq;
00593       slow_b *= kqq;
00594       slow_a *= kqq;
00595 #else
00596       vector4double slow_v = vec_ld((8 SHORT(+ 4)) * sizeof(BigReal), table_four_i);
00597       EXCLUDED(
00598                SHORT(
00599                      slow_v = vec_madd(full_cnst, vec_ld(0, slow_i), slow_v);
00600                      )
00601                NOSHORT(
00602                        slow_v = vec_sub(slow_v, vec_ld(12*sizeof(BigReal), table_four_i));
00603                        )
00604                );
00605       MODIFIED(
00606                SHORT(
00607                      slow_v = vec_madd(full_cnst,  vec_ld(0, slow_i), slow_v);
00608                      )
00609                NOSHORT(
00610                        slow_v = vec_nmsub(full_cnst, vec_ld(12*sizeof(BigReal), table_four_i), slow_v);
00611                        )
00612                );
00613       slow_v = vec_mul (slow_v, vec_splats(kqq));
00614 
00615 #define slow_d   vec_extract(slow_v, 0)
00616 #define slow_c   vec_extract(slow_v, 1)
00617 #define slow_b   vec_extract(slow_v, 2)
00618 #define slow_a   vec_extract(slow_v, 3)
00619 
00620 #endif
00621 
00622       ENERGY(
00623       register BigReal slow_val =
00624         ( ( diffa * slow_d *(1/6.)+ slow_c * (1/4.)) * diffa + slow_b *(1/2.)) * diffa + slow_a;
00625       
00626       NOT_ALCHPAIR (
00627         fullElectEnergy -= LAM(lambda_pair *) slow_val;
00628         FEP(fullElectEnergy_s -= slow_val;) 
00629       )
00630       ) // ENERGY
00631           
00632       ALCHPAIR(
00633         ENERGY(fullElectEnergy   -= myElecLambda * slow_val;)
00634         if( Fep_Wham ) {
00635                 if(Fep_ElecOn)  {
00636                         FEP(fullElectEnergy_s -= (myElecLambda * slow_val + slow_val);)
00637                 }
00638                 else    {
00639             FEP(fullElectEnergy_s -= myElecLambda * slow_val;)
00640                 }
00641         }
00642         else{ // orignal FEP
00643           FEP(fullElectEnergy_s -= myElecLambda2 * slow_val;)
00644         }
00645         TI(
00646           NOENERGY(register BigReal slow_val =
00647                 ( ( diffa * slow_d *(1/6.)+ slow_c * (1/4.)) * diffa + slow_b *(1/2.)) * diffa + slow_a;)
00648           ALCH1(fullElectEnergy_ti_1) ALCH2(fullElectEnergy_ti_2) -= slow_val;
00649         )
00650       )
00651 
00652       INT( {
00653       register BigReal slow_dir =
00654         ( diffa * slow_d + slow_c ) * diffa + slow_b;
00655       reduction[pairElectForceIndex_X] += force_sign * slow_dir * p_ij_x;
00656       reduction[pairElectForceIndex_Y] += force_sign * slow_dir * p_ij_y;
00657       reduction[pairElectForceIndex_Z] += force_sign * slow_dir * p_ij_z;
00658       } )
00659 
00660 
00661 #if     (NOT_ALCHPAIR (1+) 0)
00662 #if     (FAST(1+) 0)
00663 #if     (NOSHORT(1+) 0)
00664 #ifndef A2_QPX
00665         slow_d += vdw_d;
00666         slow_c += vdw_c;
00667         slow_b += vdw_b;
00668         slow_a += vdw_a; // unused!
00669 #else
00670         slow_v = vec_add (slow_v, vdw_v);
00671 #endif
00672 #endif
00673 #endif
00674 #endif
00675 
00676       register BigReal slow_dir = (diffa * slow_d + slow_c) * diffa + slow_b;
00677       BigReal fullforce_r = slow_dir LAM(* lambda_pair);
00678       ALCHPAIR (
00679         fullforce_r *= myElecLambda;
00680         FAST( NOSHORT(
00681           fullforce_r += alch_vdw_force;
00682         ))
00683       )
00684           
00685 #ifndef NAMD_CUDA
00686       {
00687 #ifndef  A2_QPX
00688       register BigReal ftmp_x = fullforce_r * p_ij_x;
00689       PAIR( fullElectVirial_xx += ftmp_x * p_ij_x; )
00690       PAIR( fullElectVirial_xy += ftmp_x * p_ij_y; )
00691       PAIR( fullElectVirial_xz += ftmp_x * p_ij_z; )
00692       fullf_i_x += ftmp_x;
00693       fullf_j->x -= ftmp_x;
00694       register BigReal ftmp_y = fullforce_r * p_ij_y;
00695       PAIR( fullElectVirial_yy += ftmp_y * p_ij_y; )
00696       PAIR( fullElectVirial_yz += ftmp_y * p_ij_z; )
00697       fullf_i_y += ftmp_y;
00698       fullf_j->y -= ftmp_y;
00699       register BigReal ftmp_z = fullforce_r * p_ij_z;
00700       PAIR( fullElectVirial_zz += ftmp_z * p_ij_z; )
00701       fullf_i_z += ftmp_z;
00702       fullf_j->z -= ftmp_z;
00703 #else
00704       vector4double fforce_rv = vec_splats (fullforce_r);
00705       vector4double ftmp_v = vec_mul(fforce_rv, p_ij_v);
00706       fullf_i_v = vec_add(fullf_i_v, ftmp_v);
00707       
00708       PAIR (
00709             vector4double ftmp_xv = vec_splat(ftmp_v, 0);
00710             vector4double ftmp_yv = vec_splat(ftmp_v, 1);
00711             fullvirial_v0 = vec_madd(ftmp_xv, p_ij_v, fullvirial_v0);
00712             fullvirial_v1 = vec_madd(ftmp_yv, p_ij_v, fullvirial_v1);
00713             fullvirial_v2 = vec_madd(ftmp_v,  p_ij_v, fullvirial_v2);
00714             )
00715         
00716 #define ftmp_x  vec_extract(ftmp_v, 0)
00717 #define ftmp_y  vec_extract(ftmp_v, 1)
00718 #define ftmp_z  vec_extract(ftmp_v, 2)
00719 
00720       fullf_j->x -= ftmp_x;
00721       fullf_j->y -= ftmp_y;
00722       fullf_j->z -= ftmp_z;
00723 
00724 #endif
00725 
00726       PPROF(
00727         const BigReal p_j_z = p_j->position.z;
00728         int n2 = (int)floor((p_j_z-pressureProfileMin)*invThickness);
00729         pp_clamp(n2, pressureProfileSlabs);
00730         int p_j_partition = p_j->partition;
00731 
00732         pp_reduction(pressureProfileSlabs, n1, n2, 
00733                      p_i_partition, p_j_partition, pressureProfileAtomTypes,
00734                      ftmp_x*p_ij_x, ftmp_y * p_ij_y, ftmp_z*p_ij_z,
00735                      pressureProfileReduction);
00736 
00737       )
00738       }
00739 #endif
00740 #endif //FULL
00741 
00742    } // for pairlist
00743 

Generated on Tue May 21 04:07:14 2013 for NAMD by  doxygen 1.3.9.1