#include "ComputeNonbondedBase2.h"#include "PatchMap.h"Go to the source code of this file.
Defines | |
| #define | NAME CLASSNAME(calc) |
| #define | PAIR(X) X |
| #define | CLASS ComputeNonbondedPair |
| #define | CLASSNAME(X) ENERGYNAME( X ## _pair ) |
| #define | SELF(X) X |
| #define | CLASS ComputeNonbondedSelf |
| #define | CLASSNAME(X) ENERGYNAME( X ## _self ) |
| #define | ENERGY(X) |
| #define | NOENERGY(X) X |
| #define | ENERGYNAME(X) SLOWONLYNAME( X ) |
| #define | FAST(X) X |
| #define | SLOWONLYNAME(X) MERGEELECTNAME( X ) |
| #define | SHORT(X) X |
| #define | NOSHORT(X) |
| #define | MERGEELECTNAME(X) FULLELECTNAME( X ) |
| #define | FULLELECTNAME(X) FEPNAME( X ) |
| #define | FULL(X) |
| #define | NOFULL(X) X |
| #define | FEPNAME(X) LAST( X ) |
| #define | FEP(X) |
| #define | ALCHPAIR(X) |
| #define | NOT_ALCHPAIR(X) X |
| #define | LES(X) |
| #define | INT(X) |
| #define | PPROF(X) |
| #define | LAM(X) |
| #define | ALCH(X) |
| #define | TI(X) |
| #define | LAST(X) X |
| #define | NORMAL(X) X |
| #define | EXCLUDED(X) |
| #define | MODIFIED(X) |
| #define | NORMAL(X) |
| #define | EXCLUDED(X) |
| #define | MODIFIED(X) X |
Functions | |
| SELF (PAIR(foo bar)) LES(FEP(foo bar)) LES(INT(foo bar)) FEP(INT(foo bar)) LAM(INT(foo bar)) FEP(NOENERGY(foo bar)) ENERGY(NOENERGY(foo bar)) void ComputeNonbondedUtil | |
|
|
Definition at line 131 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 125 of file ComputeNonbondedBase.h. |
|
|
Definition at line 52 of file ComputeNonbondedBase.h. |
|
|
Definition at line 52 of file ComputeNonbondedBase.h. |
|
|
Definition at line 53 of file ComputeNonbondedBase.h. |
|
|
Definition at line 53 of file ComputeNonbondedBase.h. |
|
|
Definition at line 66 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 68 of file ComputeNonbondedBase.h. |
|
|
|
|
|
|
|
|
Definition at line 77 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 124 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 123 of file ComputeNonbondedBase.h. |
|
|
Definition at line 103 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 102 of file ComputeNonbondedBase.h. |
|
|
Definition at line 128 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 130 of file ComputeNonbondedBase.h. |
|
|
Definition at line 172 of file ComputeNonbondedBase.h. |
|
|
Definition at line 127 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 91 of file ComputeNonbondedBase.h. |
|
|
|
|
|
|
|
|
Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved. Definition at line 38 of file ComputeNonbondedBase.h. |
|
|
Definition at line 67 of file ComputeNonbondedBase.h. |
|
|
Definition at line 104 of file ComputeNonbondedBase.h. |
|
|
|
|
|
|
|
|
Definition at line 90 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 126 of file ComputeNonbondedBase.h. |
|
|
Definition at line 42 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 129 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 51 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 89 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 78 of file ComputeNonbondedBase.h. |
|
|
Definition at line 132 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved. Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved. Definition at line 175 of file ComputeNonbondedBase.h. References LJTable::TableEntry::A, ALCH, Atom, CompAtom::atomFixed, Molecule::atomvdwtype(), LJTable::TableEntry::B, BigReal, CompAtom::charge, COLOUMB, ENERGY, CompAtomExt::exclId, FAST, FEP, ExclusionCheck::flags, Force, FULL, Molecule::get_excl_check_for_atom(), Molecule::get_full_exclusions_for_atom(), Molecule::get_mod_exclusions_for_atom(), Molecule::getAtoms(), CompAtom::groupFixed, CompAtom::hydrogenGroupSize, CompAtom::id, __sort_entry::index, INT, int32, LES, ExclusionCheck::max, ExclusionCheck::min, NAMD_bug(), NAMD_ComputeNonbonded_SortAtoms, NAMD_die(), NBWORKARRAY, NBWORKARRAYSINIT, Pairlists::newlist(), Pairlists::newsize(), Pairlists::nextlist(), CompAtom::nonbondedGroupIsAtom, NOSHORT, Molecule::numAtoms, PAIR, pairlist_from_pairlist(), CompAtom::partition, plint, CompAtom::position, pp_clamp(), PPROF, Pairlists::reset(), SELF, SHORT, sortEntries_bubbleSort(), sortEntries_mergeSort_v1(), sortEntries_mergeSort_v2(), sortEntries_selectionSort(), SortEntry, __sort_entry::sortValue, LJTable::table_row(), TI, atom_constants::vdw_type, CompAtomExt::vdwType, Vector::x, Vector::y, and Vector::z. 00195 {
00196 // int NAME; // to track errors via unused variable warnings
00197
00198 if ( ComputeNonbondedUtil::commOnly ) return;
00199
00200 // speedup variables
00201 BigReal *reduction = params->reduction;
00202
00203 PPROF(
00204 BigReal *pressureProfileReduction = params->pressureProfileReduction;
00205 const BigReal invThickness = 1.0 / pressureProfileThickness;
00206 )
00207
00208 Pairlists &pairlists = *(params->pairlists);
00209 int savePairlists = params->savePairlists;
00210 int usePairlists = params->usePairlists;
00211 pairlists.reset();
00212 // PAIR(iout << "--------\n" << endi;)
00213
00214 // local variables
00215 int exclChecksum = 0;
00216 FAST
00217 (
00218 ENERGY( BigReal vdwEnergy = 0; )
00219 SHORT
00220 (
00221 ENERGY( BigReal electEnergy = 0; )
00222 )
00223
00224 FEP
00225 (
00226 ENERGY( BigReal vdwEnergy_s = 0; )
00227 SHORT
00228 (
00229 ENERGY( BigReal electEnergy_s = 0; )
00230 )
00231 )
00232
00233 SHORT
00234 (
00235 BigReal virial_xx = 0;
00236 BigReal virial_xy = 0;
00237 BigReal virial_xz = 0;
00238 BigReal virial_yy = 0;
00239 BigReal virial_yz = 0;
00240 BigReal virial_zz = 0;
00241 )
00242 )
00243 FULL
00244 (
00245 ENERGY( BigReal fullElectEnergy = 0; )
00246 FEP
00247 (
00248 ENERGY( BigReal fullElectEnergy_s = 0; )
00249 )
00250 BigReal fullElectVirial_xx = 0;
00251 BigReal fullElectVirial_xy = 0;
00252 BigReal fullElectVirial_xz = 0;
00253 BigReal fullElectVirial_yy = 0;
00254 BigReal fullElectVirial_yz = 0;
00255 BigReal fullElectVirial_zz = 0;
00256 )
00257
00258 // Bringing stuff into local namespace for speed.
00259
00260 const BigReal offset_x = params->offset.x;
00261 const BigReal offset_y = params->offset.y;
00262 const BigReal offset_z = params->offset.z;
00263
00264 register const BigReal plcutoff2 = \
00265 params->plcutoff * params->plcutoff;
00266 register const BigReal groupplcutoff2 = \
00267 params->groupplcutoff * params->groupplcutoff;
00268 const BigReal dielectric_1 = ComputeNonbondedUtil:: dielectric_1;
00269 const LJTable* const ljTable = ComputeNonbondedUtil:: ljTable;
00270 LJTable::TableEntry ljNull; ljNull.A = 0; ljNull.B = 0;
00271 const LJTable::TableEntry* const lj_null_pars = &ljNull;
00272 const Molecule* const mol = ComputeNonbondedUtil:: mol;
00273 SHORT
00274 (
00275 const BigReal* const table_four = ComputeNonbondedUtil:: table_short;
00276 )
00277 FULL
00278 (
00279 SHORT
00280 (
00281 const BigReal* const slow_table = ComputeNonbondedUtil:: slow_table;
00282 )
00283 NOSHORT
00284 (
00285 //#if 1 ALCH(-1)
00286 const BigReal* const table_four = ComputeNonbondedUtil:: table_noshort;
00287 //#else // have to switch this for ALCH
00288 // BigReal* table_four = ComputeNonbondedUtil:: table_noshort;
00289 //#endif
00290 )
00291 )
00292 const BigReal scaling = ComputeNonbondedUtil:: scaling;
00293 const BigReal modf_mod = 1.0 - scale14;
00294 FAST
00295 (
00296 const BigReal switchOn2 = ComputeNonbondedUtil:: switchOn2;
00297 const BigReal c1 = ComputeNonbondedUtil:: c1;
00298 const BigReal c3 = ComputeNonbondedUtil:: c3;
00299 )
00300 const BigReal r2_delta = ComputeNonbondedUtil:: r2_delta;
00301 const int r2_delta_exp = ComputeNonbondedUtil:: r2_delta_exp;
00302 // const int r2_delta_expc = 64 * (r2_delta_exp - 127);
00303 const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
00304
00305 ALCH(
00306 const BigReal switchdist2 = ComputeNonbondedUtil::switchOn2;
00307 const BigReal cutoff2 = ComputeNonbondedUtil::cutoff2;
00308 const BigReal switchfactor = 1./((cutoff2 - switchdist2)*(cutoff2 - switchdist2)*(cutoff2 - switchdist2));
00309 const BigReal fepElecLambdaStart = ComputeNonbondedUtil::fepElecLambdaStart;
00310 const BigReal fepVdwLambdaEnd = ComputeNonbondedUtil::fepVdwLambdaEnd;
00311 const BigReal fepVdwShiftCoeff = ComputeNonbondedUtil::fepVdwShiftCoeff;
00312
00313 /*lambda values 'up' are for atoms scaled up with lambda (partition 1)*/
00314 BigReal lambdaUp = ComputeNonbondedUtil::lambda;
00315 BigReal elecLambdaUp = (lambdaUp <= fepElecLambdaStart)? 0. : \
00316 (lambdaUp - fepElecLambdaStart) / (1. - fepElecLambdaStart);
00317 BigReal vdwLambdaUp =
00318 (lambdaUp >= fepVdwLambdaEnd)? 1. : lambdaUp / fepVdwLambdaEnd;
00319 BigReal vdwShiftUp = ComputeNonbondedUtil::fepVdwShiftCoeff * (1-vdwLambdaUp);
00320 FEP(BigReal lambda2Up = ComputeNonbondedUtil::lambda2;)
00321 FEP(BigReal elecLambda2Up = (lambda2Up <= fepElecLambdaStart)? 0. : \
00322 (lambda2Up - fepElecLambdaStart) / (1. - fepElecLambdaStart);)
00323 FEP(BigReal vdwLambda2Up =
00324 (lambda2Up >= fepVdwLambdaEnd)? 1. : lambda2Up / fepVdwLambdaEnd;)
00325 FEP(BigReal vdwShift2Up = ComputeNonbondedUtil::fepVdwShiftCoeff * (1-vdwLambda2Up);)
00326
00327
00328 /*lambda values 'down' are for atoms scaled down with lambda (partition 2)*/
00329 BigReal lambdaDown = 1 - ComputeNonbondedUtil::lambda;
00330 BigReal elecLambdaDown = (lambdaDown <= fepElecLambdaStart)? 0. : \
00331 (lambdaDown - fepElecLambdaStart) / (1. - fepElecLambdaStart);
00332 BigReal vdwLambdaDown =
00333 (lambdaDown >= fepVdwLambdaEnd)? 1. : lambdaDown / fepVdwLambdaEnd;
00334 BigReal vdwShiftDown = ComputeNonbondedUtil::fepVdwShiftCoeff * (1-vdwLambdaDown);
00335 FEP(BigReal lambda2Down = 1 - ComputeNonbondedUtil::lambda2;)
00336 FEP(BigReal elecLambda2Down = (lambda2Down <= fepElecLambdaStart)? 0. : \
00337 (lambda2Down - fepElecLambdaStart) / (1. - fepElecLambdaStart); )
00338 FEP(BigReal vdwLambda2Down =
00339 (lambda2Down >= fepVdwLambdaEnd)? 1. : lambda2Down / fepVdwLambdaEnd; )
00340 FEP(BigReal vdwShift2Down = ComputeNonbondedUtil::fepVdwShiftCoeff * (1-vdwLambda2Down);)
00341
00342
00343 // Thermodynamic Integration Notes (F. Buelens):
00344 // Separation of atom pairs into different pairlist according to lambda
00345 // coupling, for alchemical free energy calculations. Normal pairlists
00346 // (pairlist[nxm]_save) are re-used for non-lambda-coupled pairs; new ones
00347 // (pairlist[nxm][12]_save are created for atoms switched up or down with
00348 // lambda respectively.
00349 // This makes TI coding far easier and more readable, since it's necessary
00350 // to store dU/dlambda in different variables depending on whether atoms are
00351 // being switched up or down. Further, it allows Jordi's explicit coding of
00352 // the separation-shifted vdW potential to stay in place without a big
00353 // performance hit, since it's no longer necessary to calculate vdW potentials
00354 // explicity for the bulk (non-alchemical) interactions - so that part of the
00355 // free energy code stays readable and easy to modify. Finally there should
00356 // be some performance gain over the old FEP implementation as we no
00357 // longer have to check the partitions of each atom pair and switch
00358 // parameters accordingly for every single NonbondedBase2.h loop - this is
00359 // done at the pairlist level
00360
00361 int pswitchTable[3*3];
00362 // determines which pairlist alchemical pairings get sent to
00363 // 0: non-alchemical pairs, partition 0 <-> partition 0
00364 // 1: atoms scaled up as lambda increases, p0<->p1
00365 // 2: atoms scaled down as lambda increases, p0<->p2
00366 // all p1<->p2 interactions to be dropped (99)
00367 // in general, 'up' refers to 1, 'down' refers to 2
00368 for (int ip=0; ip<3; ++ip) {
00369 for (int jp=0; jp<3; ++jp ) {
00370 pswitchTable[ip+3*jp] = 0;
00371 if ((ip==1 && jp==0) || (ip==0 && jp==1)) pswitchTable[ip+3*jp] = 1;
00372 if ((ip==2 && jp==0) || (ip==0 && jp==2)) pswitchTable[ip+3*jp] = 2;
00373 if (ip + jp == 3) pswitchTable[ip+3*jp] = 99; // no interaction
00374
00375 if (! ComputeNonbondedUtil::decouple) {
00376 // no decoupling: interactions within a partition are treated like
00377 // normal alchemical pairs
00378 if (ip == 1 && jp == 1) pswitchTable[ip+3*jp] = 1;
00379 if (ip == 2 && jp == 2) pswitchTable[ip+3*jp] = 2;
00380 }
00381 if (ComputeNonbondedUtil::decouple) {
00382 // decoupling: PME calculates extra grids so that while PME
00383 // interaction with the full system is switched off, a new PME grid
00384 // containing only alchemical atoms is switched on. Full interactions
00385 // between alchemical atoms are maintained; potentials within one
00386 // partition need not be scaled here.
00387 if (ip == 1 && jp == 1) pswitchTable[ip+3*jp] = 0;
00388 if (ip == 2 && jp == 2) pswitchTable[ip+3*jp] = 0;
00389 }
00390 }
00391 }
00392
00393 // dU/dlambda variables for thermodynamic integration
00394 TI(
00395 BigReal vdwEnergy_ti_1 = 0;
00396 BigReal vdwEnergy_ti_2 = 0;
00397 SHORT(BigReal electEnergy_ti_1 = 0;
00398 BigReal electEnergy_ti_2 = 0;)
00399 FULL(BigReal fullElectEnergy_ti_1 = 0;
00400 BigReal fullElectEnergy_ti_2 = 0;)
00401 )
00402 )
00403
00404
00405 const int i_upper = params->numAtoms[0];
00406 register const int j_upper = params->numAtoms[1];
00407 register int j;
00408 register int i;
00409 const CompAtom *p_0 = params->p[0];
00410 const CompAtom *p_1 = params->p[1];
00411 #ifdef MEM_OPT_VERSION
00412 const CompAtomExt *pExt_0 = params->pExt[0];
00413 const CompAtomExt *pExt_1 = params->pExt[1];
00414 #endif
00415
00416 char * excl_flags_buff = 0;
00417 const int32 * full_excl = 0;
00418 const int32 * mod_excl = 0;
00419
00420 plint *pairlistn_save; int npairn;
00421 plint *pairlistx_save; int npairx;
00422 plint *pairlistm_save; int npairm;
00423
00424 ALCH(
00425 plint *pairlistnA1_save; int npairnA1;
00426 plint *pairlistxA1_save; int npairxA1;
00427 plint *pairlistmA1_save; int npairmA1;
00428 plint *pairlistnA2_save; int npairnA2;
00429 plint *pairlistxA2_save; int npairxA2;
00430 plint *pairlistmA2_save; int npairmA2;
00431 )
00432
00433 NBWORKARRAYSINIT(params->workArrays);
00434
00435 int arraysize = j_upper+5;
00436
00437 NBWORKARRAY(plint,pairlisti,arraysize)
00438 NBWORKARRAY(BigReal,r2list,arraysize)
00439
00440 union { double f; int32 i[2]; } byte_order_test;
00441 byte_order_test.f = 1.0; // should occupy high-order bits only
00442 int32 *r2iilist = (int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
00443
00444 if ( ! ( savePairlists || ! usePairlists ) ) arraysize = 0;
00445
00446
00447 // DMK - Atom Sort
00448 //
00449 // Basic Idea: Determine the line between the center of masses of
00450 // the two patches. Project and then sort the lists of atoms
00451 // along this line. Then, as the pairlists are being generated
00452 // for the atoms in the first atom list, use the sorted
00453 // list to only add atoms from the second list that are between
00454 // +/- ~cutoff from the atoms position on the line.
00455 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00456
00457 // NOTE: For the first atom list, just the sort values themselves need to be
00458 // calculated (a BigReal vs. SortEntry data structure). However, a second
00459 // SortEntry array is needed to perform the merge sort on the second list of
00460 // atoms. Use the atomSort_[0/1]_sortValues__ arrays to sort the second
00461 // list of atoms and then use the left over array to make a version of the
00462 // list that only includes non-fixed groups (fixg).
00463
00464 NBWORKARRAY(SortEntry, atomSort_0_sortValues__, arraysize);
00465 NBWORKARRAY(SortEntry, atomSort_1_sortValues__, arraysize);
00466 NBWORKARRAY(BigReal, p_0_sortValues, arraysize);
00467
00468 register SortEntry* p_1_sortValues = atomSort_0_sortValues__;
00469 register SortEntry* p_1_sortValues_fixg = atomSort_1_sortValues__;
00470
00471 int p_0_sortValues_len = 0;
00472 int p_1_sortValues_len = 0;
00473 int p_1_sortValues_fixg_len = 0;
00474
00475 // Calculate the distance between to projected points on the line that
00476 // represents the cutoff distance.
00477 BigReal atomSort_windowRadius = sqrt(groupplcutoff2);
00478
00479 if (savePairlists || !usePairlists) {
00480
00481 register const BigReal projLineVec_x = params->projLineVec.x;
00482 register const BigReal projLineVec_y = params->projLineVec.y;
00483 register const BigReal projLineVec_z = params->projLineVec.z;
00484
00485 // Calculate the sort values for the atoms in patch 1
00486 {
00487 register int j = 0;
00488 register unsigned int ngia = p_1->nonbondedGroupIsAtom;
00489 register unsigned int hgs = p_1->hydrogenGroupSize;
00490 register BigReal p_x = p_1->position.x;
00491 register BigReal p_y = p_1->position.y;
00492 register BigReal p_z = p_1->position.z;
00493 register int index = j;
00494
00495 while (j < j_upper) {
00496
00497 // Advance j... NOTE: ngia is either 0 or 1, so if ngia is set '1'
00498 // is added to 'j'. Otherwise, the value of 'hgs' is added to j.
00499 j += ((ngia) + ((1 - ngia) * hgs));
00500
00501 // Set p_j_next to point to the atom for the next iteration and begin
00502 // loading the 'ngia' and 'hgs' values for that atom.
00503 register const CompAtom* p_j_next = p_1 + j;
00504 ngia = p_j_next->nonbondedGroupIsAtom;
00505 hgs = p_j_next->hydrogenGroupSize;
00506
00507 // Calculate the distance along the projection vector
00508 // NOTE: If the vector from the origin to the point is 'A' and the vector
00509 // between the patches is 'B' then to project 'A' onto 'B' we take the dot
00510 // product of the two vectors 'A dot B' divided by the length of 'B'.
00511 // So... projection of A onto B = (A dot B) / length(B), however, note
00512 // that length(B) == 1, therefore the sort value is simply (A dot B)
00513 register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00514
00515 // Start loading the next iteration's atom's position
00516 p_x = p_j_next->position.x;
00517 p_y = p_j_next->position.y;
00518 p_z = p_j_next->position.z;
00519
00520 // Store the caclulated sort value into the array of sort values
00521 register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
00522 p_1_sortValStorePtr->index = index;
00523 p_1_sortValStorePtr->sortValue = sortVal;
00524 p_1_sortValues_len++;
00525
00526 // Update index for the next iteration
00527 index = j;
00528
00529 } // end while (j < j_upper)
00530 }
00531
00532 // NOTE: This list and another version of it with only non-fixed
00533 // atoms will be used in place of grouplist and fixglist.
00534 #if 0 // Selection Sort
00535 sortEntries_selectionSort(p_1_sortValues, p_1_sortValues_len);
00536 #elif 0 // Bubble Sort
00537 sortEntries_bubbleSort(p_1_sortValues, p_1_sortValues_len);
00538 #else // Merge Sort
00539 #if NAMD_ComputeNonbonded_SortAtoms_LessBranches == 0
00540 sortEntries_mergeSort_v1(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
00541 #else
00542 sortEntries_mergeSort_v2(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
00543 #endif
00544 #endif
00545
00546 // Calculate the sort values for the atoms in patch 0
00547 {
00548 register int i = 0;
00549 register unsigned int ngia = p_0->nonbondedGroupIsAtom;
00550 register unsigned int hgs = p_0->hydrogenGroupSize;
00551 register BigReal p_x = p_0->position.x;
00552 register BigReal p_y = p_0->position.y;
00553 register BigReal p_z = p_0->position.z;
00554 register int index = i;
00555
00556 while (i < i_upper) {
00557
00558 // Advance i... NOTE: ngia is either 0 or 1, so if ngia is set '1'
00559 // is added to 'i'. Otherwise, the value of 'hgs' is added to i.
00560 i += ((ngia) + ((1 - ngia) * hgs));
00561
00562 // Set p_i_next to point to the atom for the next iteration and begin
00563 // loading the 'ngia' and 'hgs' values for that atom.
00564 register const CompAtom* p_i_next = p_0 + i;
00565 ngia = p_i_next->nonbondedGroupIsAtom;
00566 hgs = p_i_next->hydrogenGroupSize;
00567
00568 // Calculate the distance along the projection vector
00569 register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00570
00571 // Start loading the next iteration's atom's position
00572 p_x = p_i_next->position.x;
00573 p_y = p_i_next->position.y;
00574 p_z = p_i_next->position.z;
00575
00576 // Store the calculated sort value into the array of sort values
00577 register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
00578 *p_0_sortValStorePtr = sortVal;
00579
00580 // Update index for the next iteration
00581 index = i;
00582 }
00583
00584 p_0_sortValues_len = i_upper;
00585 }
00586
00587 } // end if (savePairlists || !usePairlists)
00588
00589 #endif // NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00590
00591 // Atom Sort : The grouplist and fixglist arrays are not needed when the
00592 // the atom sorting code is in use.
00593 #if ! (NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) )
00594 NBWORKARRAY(plint,grouplist,arraysize);
00595 NBWORKARRAY(plint,fixglist,arraysize);
00596 #endif
00597
00598 NBWORKARRAY(plint,goodglist,arraysize);
00599 NBWORKARRAY(plint,pairlistx,arraysize);
00600 NBWORKARRAY(plint,pairlistm,arraysize);
00601 NBWORKARRAY(plint,pairlist,arraysize);
00602 NBWORKARRAY(plint,pairlist2,arraysize);
00603 ALCH(
00604 NBWORKARRAY(plint,pairlistnAlch,arraysize);
00605 NBWORKARRAY(plint,pairlistnA0,arraysize);
00606 NBWORKARRAY(plint,pairlistnA1,arraysize);
00607 NBWORKARRAY(plint,pairlistxA1,arraysize);
00608 NBWORKARRAY(plint,pairlistmA1,arraysize);
00609 NBWORKARRAY(plint,pairlistnA2,arraysize);
00610 NBWORKARRAY(plint,pairlistxA2,arraysize);
00611 NBWORKARRAY(plint,pairlistmA2,arraysize);
00612 )
00613
00614 NBWORKARRAY(short,vdwtype_array,j_upper+5);
00615
00616
00617 #ifdef MEM_OPT_VERSION
00618 for (j = 0; j < j_upper; ++j){
00619 vdwtype_array[j] = pExt_1[j].vdwType;
00620 }
00621 #else
00622 const Atom *atomlist = mol->getAtoms();
00623 #ifdef ARCH_POWERPC
00624 #pragma disjoint (*atomlist, *vdwtype_array)
00625 #pragma disjoint (*p_1, *vdwtype_array)
00626 #pragma unroll(4)
00627 #endif
00628 for (j = 0; j < j_upper; ++j) {
00629 int id = p_1[j].id;
00630 vdwtype_array [j] = atomlist[id].vdw_type;
00631 }
00632 #endif
00633
00634 int fixg_upper = 0;
00635 int g_upper = 0;
00636
00637 if ( savePairlists || ! usePairlists ) {
00638
00639 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00640
00641 // Create a sorted list of non-fixed groups
00642 register int fixg = 0;
00643 for (int tmpI = 0; tmpI < p_1_sortValues_len; tmpI++) {
00644 register SortEntry* p_1_sortEntry = p_1_sortValues + tmpI;
00645 register int p_1_index = p_1_sortEntry->index;
00646 if (!p_1[p_1_index].groupFixed) {
00647 register SortEntry* p_1_sortEntry_fixg = p_1_sortValues_fixg + p_1_sortValues_fixg_len;
00648 p_1_sortEntry_fixg->index = p_1_sortEntry->index;
00649 p_1_sortEntry_fixg->sortValue = p_1_sortEntry->sortValue;
00650 p_1_sortValues_fixg_len++;
00651 }
00652 }
00653
00654 #else
00655
00656 register int g = 0;
00657 for ( j = 0; j < j_upper; ++j ) {
00658 if ( p_1[j].hydrogenGroupSize || p_1[j].nonbondedGroupIsAtom ) {
00659 grouplist[g++] = j;
00660 }
00661 }
00662 g_upper = g;
00663 if ( g_upper ) grouplist[g_upper] = grouplist[g_upper-1];
00664 int fixg = 0;
00665
00666 if ( fixedAtomsOn ) {
00667 for ( g = 0; g < g_upper; ++g ) {
00668 j = grouplist[g];
00669 if ( ! p_1[j].groupFixed ) {
00670 fixglist[fixg++] = j;
00671 }
00672 }
00673 }
00674
00675 fixg_upper = fixg;
00676 if ( fixg_upper ) fixglist[fixg_upper] = fixglist[fixg_upper-1];
00677
00678 #endif // NAMD_ComputeNonbonded_SortAtoms != 0
00679
00680 *(pairlists.newlist(1)) = i_upper;
00681 pairlists.newsize(1);
00682
00683 } else { // if ( savePairlists || ! usePairlists )
00684
00685 plint *i_upper_check;
00686 int i_upper_check_count;
00687 pairlists.nextlist(&i_upper_check,&i_upper_check_count);
00688 if ( i_upper_check[0] != i_upper )
00689 NAMD_bug("pairlist i_upper mismatch!");
00690
00691 } // if ( savePairlists || ! usePairlists )
00692
00693 SELF(
00694 int j_hgroup = 0;
00695 int g_lower = 0;
00696 int fixg_lower = 0;
00697 )
00698 int pairlistindex=0;
00699 int pairlistoffset=0;
00700
00701
00702
00703 #if ( SHORT( FAST( 1+ ) ) 0 )
00704 #if ( PAIR( 1+ ) 0 )
00705 Force *f_0 = params->ff[0];
00706 Force *f_1 = params->ff[1];
00707 #else
00708 #define f_1 f_0
00709 NBWORKARRAY(Force,f_0,i_upper)
00710 memset( (void*) f_0, 0, i_upper * sizeof(Force) );
00711 #endif
00712 #endif
00713 #if ( FULL( 1+ ) 0 )
00714 #if ( PAIR( 1+ ) 0 )
00715 Force *fullf_0 = params->fullf[0];
00716 Force *fullf_1 = params->fullf[1];
00717 #else
00718 #define fullf_1 fullf_0
00719 NBWORKARRAY(Force,fullf_0,i_upper);
00720 memset( (void*) fullf_0, 0, i_upper * sizeof(Force) );
00721 #endif
00722 #endif
00723
00724
00725 int numParts = params->numParts;
00726 int myPart = params->minPart;
00727 int groupCount = 0;
00728
00729 for ( i = 0; i < (i_upper SELF(- 1)); ++i )
00730 {
00731 const CompAtom &p_i = p_0[i];
00732 #ifdef MEM_OPT_VERSION
00733 const CompAtomExt &pExt_i = pExt_0[i];
00734 #endif
00735 if ( p_i.hydrogenGroupSize ) {
00736 //save current group count
00737 int curgrpcount = groupCount;
00738 //increment group count
00739 groupCount ++;
00740
00741 if (groupCount >= numParts)
00742 groupCount = 0;
00743
00744 if ( curgrpcount != myPart ) {
00745 i += p_i.hydrogenGroupSize - 1;
00746
00747 //Power PC alignment constraint
00748 #ifdef ARCH_POWERPC
00749 __dcbt((void *) &(p_0[i+1]));
00750 #endif
00751 continue;
00752 }
00753 }
00754
00755 register const BigReal p_i_x = p_i.position.x + offset_x;
00756 register const BigReal p_i_y = p_i.position.y + offset_y;
00757 register const BigReal p_i_z = p_i.position.z + offset_z;
00758
00759 ALCH(const int p_i_partition = p_i.partition;)
00760
00761 PPROF(
00762 const int p_i_partition = p_i.partition;
00763 int n1 = (int)floor((p_i.position.z-pressureProfileMin)*invThickness);
00764 pp_clamp(n1, pressureProfileSlabs);
00765 )
00766
00767 SELF ( if ( p_i.hydrogenGroupSize ) j_hgroup = i + p_i.hydrogenGroupSize; )
00768
00769 if ( savePairlists || ! usePairlists ) {
00770
00771 if ( ! savePairlists ) pairlists.reset(); // limit space usage
00772
00773 #ifdef MEM_OPT_VERSION
00774 const ExclusionCheck *exclcheck = mol->get_excl_check_for_idx(pExt_i.exclId);
00775 const int excl_min = p_i.id + exclcheck->min;
00776 const int excl_max = p_i.id + exclcheck->max;
00777 #else
00778 const ExclusionCheck *exclcheck = mol->get_excl_check_for_atom(p_i.id);
00779 const int excl_min = exclcheck->min;
00780 const int excl_max = exclcheck->max;
00781 #endif
00782 const char * excl_flags_var;
00783 if ( exclcheck->flags ) excl_flags_var = exclcheck->flags - excl_min;
00784 else { // need to build list on the fly
00785
00786 //TODO: Should change later!!!!!!!!!! --Chao Mei
00787 //Now just for the sake of passing compilation
00788 #ifndef MEM_OPT_VERSION
00789 if ( excl_flags_buff ) {
00790 int nl,l;
00791 nl = full_excl[0] + 1;
00792 for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = 0;
00793 nl = mod_excl[0] + 1;
00794 for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = 0;
00795 } else {
00796 excl_flags_buff = new char[mol->numAtoms];
00797 memset( (void*) excl_flags_buff, 0, mol->numAtoms);
00798 }
00799 int nl,l;
00800 full_excl = mol->get_full_exclusions_for_atom(p_i.id);
00801 nl = full_excl[0] + 1;
00802 for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = EXCHCK_FULL;
00803 mod_excl = mol->get_mod_exclusions_for_atom(p_i.id);
00804 nl = mod_excl[0] + 1;
00805 for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = EXCHCK_MOD;
00806 excl_flags_var = excl_flags_buff;
00807 #endif
00808
00809 }
00810 const char * const excl_flags = excl_flags_var;
00811
00812 if (p_i.hydrogenGroupSize || p_i.nonbondedGroupIsAtom) {
00813
00814 pairlistindex = 0; // initialize with 0 elements
00815 pairlistoffset=0;
00816 const int groupfixed = ( fixedAtomsOn && p_i.groupFixed );
00817
00818 // If patch divisions are not made by hydrogen groups, then
00819 // hydrogenGroupSize is set to 1 for all atoms. Thus we can
00820 // carry on as if we did have groups - only less efficiently.
00821 // An optimization in this case is to not rebuild the temporary
00822 // pairlist but to include every atom in it. This should be a
00823 // a very minor expense.
00824
00825 SELF
00826 (
00827 if ( p_i.hydrogenGroupSize ) {
00828 // exclude child hydrogens of i
00829 // j_hgroup = i + p_i.hydrogenGroupSize; (moved above)
00830 while ( g_lower < g_upper &&
00831 grouplist[g_lower] < j_hgroup ) ++g_lower;
00832 while ( fixg_lower < fixg_upper &&
00833 fixglist[fixg_lower] < j_hgroup ) ++fixg_lower;
00834 }
00835 // add all child or sister hydrogens of i
00836 for ( j = i + 1; j < j_hgroup; ++j ) {
00837 pairlist[pairlistindex++] = j;
00838 }
00839 )
00840
00841 // add remaining atoms to pairlist via hydrogen groups
00842 register plint *pli = pairlist + pairlistindex;
00843
00844 {
00845 // Atom Sort : Modify the values of g and gu based on the added information
00846 // of the linear projections (sort values) information.
00847 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00848
00849 register int g = 0;
00850 register BigReal p_i_sortValue = p_0_sortValues[i];
00851 const BigReal p_i_sortValue_plus_windowRadius = p_i_sortValue + atomSort_windowRadius;
00852 register SortEntry* sortValues = ( groupfixed ? p_1_sortValues_fixg : p_1_sortValues );
00853
00854 // Find the actual gu (upper bound in sorted list for this outer-loop atom) based on the sort values
00855 register int lower = 0;
00856 register int upper = (groupfixed ? p_1_sortValues_fixg_len : p_1_sortValues_len);
00857 while ((upper - lower) > 1) {
00858 register int j = ((lower + upper) >> 1);
00859 register BigReal jSortVal = sortValues[j].sortValue;
00860 if (jSortVal < p_i_sortValue_plus_windowRadius) {
00861 lower = j;
00862 } else {
00863 upper = j;
00864 }
00865 }
00866 const int gu = (sortValues[lower].sortValue >= p_i_sortValue_plus_windowRadius) ? lower : upper;
00867
00868 #else
00869
00870 register plint *gli = goodglist;
00871 const plint *glist = ( groupfixed ? fixglist : grouplist );
00872 SELF( const int gl = ( groupfixed ? fixg_lower : g_lower ); )
00873 const int gu = ( groupfixed ? fixg_upper : g_upper );
00874 register int g = PAIR(0) SELF(gl);
00875
00876 #endif
00877
00878
00879 if ( g < gu ) {
00880 int hu = 0;
00881 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
00882 if ( gu - g > 6 ) {
00883
00884 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00885 register SortEntry* sortEntry0 = sortValues + g;
00886 register SortEntry* sortEntry1 = sortValues + g + 1;
00887 register int jprev0 = sortEntry0->index;
00888 register int jprev1 = sortEntry1->index;
00889 #else
00890 register int jprev0 = glist[g ];
00891 register int jprev1 = glist[g + 1];
00892 #endif
00893
00894 register int j0;
00895 register int j1;
00896
00897 __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
00898 __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
00899 __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
00900
00901 // these don't change here, so we could move them into outer scope
00902 const __m128d P_I_X = _mm_set1_pd(p_i_x);
00903 const __m128d P_I_Y = _mm_set1_pd(p_i_y);
00904 const __m128d P_I_Z = _mm_set1_pd(p_i_z);
00905
00906 g += 2;
00907 for ( ; g < gu - 2; g +=2 ) {
00908 // compute 1d distance, 2-way parallel
00909 j0 = jprev0;
00910 j1 = jprev1;
00911
00912 __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
00913 __m128d R2_01 = _mm_mul_pd(T_01, T_01);
00914 T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
00915 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
00916 T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
00917 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
00918
00919 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00920 sortEntry0 = sortValues + g;
00921 sortEntry1 = sortValues + g + 1;
00922 jprev0 = sortEntry0->index;
00923 jprev1 = sortEntry1->index;
00924 #else
00925 jprev0 = glist[g ];
00926 jprev1 = glist[g+1];
00927 #endif
00928
00929 PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
00930 PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
00931 PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
00932
00933 __align(16) double r2_01[2];
00934 _mm_store_pd(r2_01, R2_01); // 16-byte-aligned store
00935
00936 // XXX these could possibly benefit from SSE-based conditionals
00937 bool test0 = ( r2_01[0] < groupplcutoff2 );
00938 bool test1 = ( r2_01[1] < groupplcutoff2 );
00939
00940 //removing ifs benefits on many architectures
00941 //as the extra stores will only warm the cache up
00942 goodglist [ hu ] = j0;
00943 goodglist [ hu + test0 ] = j1;
00944
00945 hu += test0 + test1;
00946 }
00947 g-=2;
00948 }
00949 #else
00950 if ( gu - g > 6 ) {
00951
00952 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00953 register SortEntry* sortEntry0 = sortValues + g;
00954 register SortEntry* sortEntry1 = sortValues + g + 1;
00955 register int jprev0 = sortEntry0->index;
00956 register int jprev1 = sortEntry1->index;
00957 #else
00958 register int jprev0 = glist[g ];
00959 register int jprev1 = glist[g + 1];
00960 #endif
00961
00962 register int j0;
00963 register int j1;
00964
00965 register BigReal pj_x_0, pj_x_1;
00966 register BigReal pj_y_0, pj_y_1;
00967 register BigReal pj_z_0, pj_z_1;
00968 register BigReal t_0, t_1, r2_0, r2_1;
00969
00970 pj_x_0 = p_1[jprev0].position.x;
00971 pj_x_1 = p_1[jprev1].position.x;
00972
00973 pj_y_0 = p_1[jprev0].position.y;
00974 pj_y_1 = p_1[jprev1].position.y;
00975
00976 pj_z_0 = p_1[jprev0].position.z;
00977 pj_z_1 = p_1[jprev1].position.z;
00978
00979 g += 2;
00980 for ( ; g < gu - 2; g +=2 ) {
00981 // compute 1d distance, 2-way parallel
00982 j0 = jprev0;
00983 j1 = jprev1;
00984
00985 t_0 = p_i_x - pj_x_0;
00986 t_1 = p_i_x - pj_x_1;
00987 r2_0 = t_0 * t_0;
00988 r2_1 = t_1 * t_1;
00989
00990 t_0 = p_i_y - pj_y_0;
00991 t_1 = p_i_y - pj_y_1;
00992 r2_0 += t_0 * t_0;
00993 r2_1 += t_1 * t_1;
00994
00995 t_0 = p_i_z - pj_z_0;
00996 t_1 = p_i_z - pj_z_1;
00997 r2_0 += t_0 * t_0;
00998 r2_1 += t_1 * t_1;
00999
01000 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01001 sortEntry0 = sortValues + g;
01002 sortEntry1 = sortValues + g + 1;
01003 jprev0 = sortEntry0->index;
01004 jprev1 = sortEntry1->index;
01005 #else
01006 jprev0 = glist[g ];
01007 jprev1 = glist[g+1];
01008 #endif
01009
01010 pj_x_0 = p_1[jprev0].position.x;
01011 pj_x_1 = p_1[jprev1].position.x;
01012 pj_y_0 = p_1[jprev0].position.y;
01013 pj_y_1 = p_1[jprev1].position.y;
01014 pj_z_0 = p_1[jprev0].position.z;
01015 pj_z_1 = p_1[jprev1].position.z;
01016
01017 bool test0 = ( r2_0 < groupplcutoff2 );
01018 bool test1 = ( r2_1 < groupplcutoff2 );
01019
01020 //removing ifs benefits on many architectures
01021 //as the extra stores will only warm the cache up
01022 goodglist [ hu ] = j0;
01023 goodglist [ hu + test0 ] = j1;
01024
01025 hu += test0 + test1;
01026 }
01027 g-=2;
01028 }
01029 #endif
01030
01031 for (; g < gu; g++) {
01032
01033 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01034 register SortEntry* sortEntry = sortValues + g;
01035 register int j = sortEntry->index;
01036 #else
01037 int j = glist[g];
01038 #endif
01039
01040 BigReal p_j_x = p_1[j].position.x;
01041 BigReal p_j_y = p_1[j].position.y;
01042 BigReal p_j_z = p_1[j].position.z;
01043
01044 BigReal r2 = p_i_x - p_j_x;
01045 r2 *= r2;
01046 BigReal t2 = p_i_y - p_j_y;
01047 r2 += t2 * t2;
01048 t2 = p_i_z - p_j_z;
01049 r2 += t2 * t2;
01050
01051 if ( r2 <= groupplcutoff2 )
01052 goodglist[hu ++] = j;
01053 }
01054
01055 for ( int h=0; h<hu; ++h ) {
01056 int j = goodglist[h];
01057 int hgs = ( p_1[j].nonbondedGroupIsAtom ? 1 :
01058 p_1[j].hydrogenGroupSize );
01059 pli[0] = j; // copy over the next four in any case
01060 pli[1] = j+1;
01061 pli[2] = j+2;
01062 pli[3] = j+3; // assume hgs <= 4
01063 pli += hgs;
01064 }
01065 }
01066 }
01067
01068 pairlistindex = pli - pairlist;
01069 // make sure padded element on pairlist points to real data
01070 if ( pairlistindex ) {
01071 pairlist[pairlistindex] = pairlist[pairlistindex-1];
01072 } /* PAIR( else { // skip empty loops if no pairs were found
01073 int hgs = ( p_i.nonbondedGroupIsAtom ? 1 : p_i.hydrogenGroupSize );
01074 i += hgs - 1;
01075 continue;
01076 } ) */
01077 } // if i is hydrogen group parent
01078 SELF
01079 (
01080 // self-comparisions require list to be incremented
01081 // pair-comparisions use entire list (pairlistoffset is 0)
01082 else pairlistoffset++;
01083 )
01084
01085 const int atomfixed = ( fixedAtomsOn && p_i.atomFixed );
01086
01087 register plint *pli = pairlist2;
01088 #if 1 ALCH(-1)
01089 plint *pairlistn = pairlists.newlist(j_upper + 5 + 1 + 5) SELF(+ 1);
01090 #else
01091 plint *pairlistn = pairlistnAlch;
01092 #endif
01093 SELF( plint &pairlistn_skip = *(pairlistn-1); )
01094 register plint *plin = pairlistn;
01095
01096
01097 INT(
01098 if ( pairInteractionOn ) {
01099 const int ifep_type = p_i.partition;
01100 if (pairInteractionSelf) {
01101 if (ifep_type != 1) continue;
01102 for (int k=pairlistoffset; k<pairlistindex; k++) {
01103 j = pairlist[k];
01104 const int jfep_type = p_1[j].partition;
01105 // for pair-self, both atoms must be in group 1.
01106 if (jfep_type == 1) {
01107 *(pli++) = j;
01108 }
01109 }
01110 } else {
01111 if (ifep_type != 1 && ifep_type != 2) continue;
01112 for (int k=pairlistoffset; k<pairlistindex; k++) {
01113 j = pairlist[k];
01114 const int jfep_type = p_1[j].partition;
01115 // for pair, must have one from each group.
01116 if (ifep_type + jfep_type == 3) {
01117 *(pli++) = j;
01118 }
01119 }
01120 }
01121 int npair2_int = pli - pairlist2;
01122 pli = pairlist2;
01123 for (int k=0; k<npair2_int; k++) {
01124 j = pairlist2[k];
01125 BigReal p_j_x = p_1[j].position.x;
01126 BigReal r2 = p_i_x - p_j_x;
01127 r2 *= r2;
01128 BigReal p_j_y = p_1[j].position.y;
01129 BigReal t2 = p_i_y - p_j_y;
01130 r2 += t2 * t2;
01131 BigReal p_j_z = p_1[j].position.z;
01132 t2 = p_i_z - p_j_z;
01133 r2 += t2 * t2;
01134 if ( ( ! (atomfixed && p_1[j].atomFixed) ) && (r2 <= plcutoff2) ) {
01135 int atom2 = p_1[j].id;
01136 if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
01137 else *(plin++) = j;
01138 }
01139 }
01140 } else
01141 )
01142 if ( atomfixed ) {
01143 for (int k=pairlistoffset; k<pairlistindex; k++) {
01144 j = pairlist[k];
01145 BigReal p_j_x = p_1[j].position.x;
01146 BigReal r2 = p_i_x - p_j_x;
01147 r2 *= r2;
01148 BigReal p_j_y = p_1[j].position.y;
01149 BigReal t2 = p_i_y - p_j_y;
01150 r2 += t2 * t2;
01151 BigReal p_j_z = p_1[j].position.z;
01152 t2 = p_i_z - p_j_z;
01153 r2 += t2 * t2;
01154 if ( (! p_1[j].atomFixed) && (r2 <= plcutoff2) ) {
01155 int atom2 = p_1[j].id;
01156 if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
01157 else *(plin++) = j;
01158 }
01159 }
01160 } else {
01161 int k = pairlistoffset;
01162 int ku = pairlistindex;
01163 if ( k < ku ) {
01164 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
01165 if ( ku - k > 6 ) {
01166 register int jprev0 = pairlist [k ];
01167 register int jprev1 = pairlist [k + 1];
01168
01169 register int j0;
01170 register int j1;
01171
01172 __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01173 __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01174 __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01175
01176 // these don't change here, so we could move them into outer scope
01177 const __m128d P_I_X = _mm_set1_pd(p_i_x);
01178 const __m128d P_I_Y = _mm_set1_pd(p_i_y);
01179 const __m128d P_I_Z = _mm_set1_pd(p_i_z);
01180
01181 int atom2_0 = p_1[jprev0].id;
01182 int atom2_1 = p_1[jprev1].id;
01183
01184 k += 2;
01185 for ( ; k < ku - 2; k +=2 ) {
01186 // compute 1d distance, 2-way parallel
01187 j0 = jprev0;
01188 j1 = jprev1;
01189
01190 __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
01191 __m128d R2_01 = _mm_mul_pd(T_01, T_01);
01192 T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
01193 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01194 T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
01195 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01196
01197 jprev0 = pairlist[k];
01198 jprev1 = pairlist[k+1];
01199
01200 PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01201 PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01202 PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01203
01204 __align(16) double r2_01[2];
01205 _mm_store_pd(r2_01, R2_01); // 16-byte-aligned store
01206
01207 if (r2_01[0] <= plcutoff2) {
01208 if ( atom2_0 >= excl_min && atom2_0 <= excl_max )
01209 *(pli++) = j0;
01210 else
01211 *(plin++) = j0;
01212 }
01213 ato |