#include "ComputeNonbondedBase2.h"#include "PatchMap.h"Go to the source code of this file.
Defines | |
| #define | NAME CLASSNAME(calc) |
| #define | PAIR(X) X |
| #define | CLASS ComputeNonbondedPair |
| #define | CLASSNAME(X) ENERGYNAME( X ## _pair ) |
| #define | SELF(X) X |
| #define | CLASS ComputeNonbondedSelf |
| #define | CLASSNAME(X) ENERGYNAME( X ## _self ) |
| #define | ENERGY(X) |
| #define | NOENERGY(X) X |
| #define | ENERGYNAME(X) SLOWONLYNAME( X ) |
| #define | FAST(X) X |
| #define | SLOWONLYNAME(X) MERGEELECTNAME( X ) |
| #define | SHORT(X) X |
| #define | NOSHORT(X) |
| #define | MERGEELECTNAME(X) FULLELECTNAME( X ) |
| #define | FULLELECTNAME(X) TABENERGYNAME( X ) |
| #define | FULL(X) |
| #define | NOFULL(X) X |
| #define | TABENERGYNAME(X) FEPNAME( X ) |
| #define | TABENERGY(X) |
| #define | NOTABENERGY(X) X |
| #define | FEPNAME(X) LAST( X ) |
| #define | FEP(X) |
| #define | ALCHPAIR(X) |
| #define | NOT_ALCHPAIR(X) X |
| #define | LES(X) |
| #define | INT(X) |
| #define | PPROF(X) |
| #define | LAM(X) |
| #define | CUDA(X) |
| #define | ALCH(X) |
| #define | TI(X) |
| #define | CUDA(X) X |
| #define | LAST(X) X |
| #define | NORMAL(X) X |
| #define | EXCLUDED(X) |
| #define | MODIFIED(X) |
| #define | NORMAL(X) |
| #define | EXCLUDED(X) |
| #define | MODIFIED(X) X |
Functions | |
| SELF (PAIR(foo bar)) LES(FEP(foo bar)) LES(INT(foo bar)) FEP(INT(foo bar)) LAM(INT(foo bar)) FEP(NOENERGY(foo bar)) ENERGY(NOENERGY(foo bar)) TABENERGY(NOTABENERGY(foo bar)) void ComputeNonbondedUtil | |
|
|
Definition at line 148 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 141 of file ComputeNonbondedBase.h. |
|
|
Definition at line 54 of file ComputeNonbondedBase.h. |
|
|
Definition at line 54 of file ComputeNonbondedBase.h. |
|
|
Definition at line 55 of file ComputeNonbondedBase.h. |
|
|
Definition at line 55 of file ComputeNonbondedBase.h. |
|
|
Definition at line 190 of file ComputeNonbondedBase.h. |
|
|
Definition at line 190 of file ComputeNonbondedBase.h. |
|
|
Definition at line 68 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 70 of file ComputeNonbondedBase.h. |
|
|
|
|
|
|
|
|
Definition at line 79 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 140 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 139 of file ComputeNonbondedBase.h. |
|
|
Definition at line 105 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 104 of file ComputeNonbondedBase.h. |
|
|
Definition at line 144 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 146 of file ComputeNonbondedBase.h. |
|
|
Definition at line 193 of file ComputeNonbondedBase.h. |
|
|
Definition at line 143 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 93 of file ComputeNonbondedBase.h. |
|
|
|
|
|
|
|
|
Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved. Definition at line 40 of file ComputeNonbondedBase.h. |
|
|
Definition at line 69 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 106 of file ComputeNonbondedBase.h. |
|
|
|
|
|
|
|
|
Definition at line 92 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 142 of file ComputeNonbondedBase.h. |
|
|
Definition at line 119 of file ComputeNonbondedBase.h. |
|
|
Definition at line 44 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 145 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 53 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 91 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 80 of file ComputeNonbondedBase.h. |
|
|
Definition at line 118 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Definition at line 117 of file ComputeNonbondedBase.h. |
|
|
Definition at line 149 of file ComputeNonbondedBase.h. Referenced by SELF(). |
|
|
Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved. Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved. Definition at line 196 of file ComputeNonbondedBase.h. References LJTable::TableEntry::A, ALCH, CompAtomExt::atomFixed, LJTable::TableEntry::B, BigReal, CompAtom::charge, COLOUMB, ENERGY, FAST, FEP, ExclusionCheck::flags, Force, FULL, Molecule::get_excl_check_for_atom(), Molecule::get_full_exclusions_for_atom(), Molecule::get_mod_exclusions_for_atom(), CompAtomExt::groupFixed, CompAtom::hydrogenGroupSize, CompAtomExt::id, __sort_entry::index, INT, int32, j, LES, ExclusionCheck::max, ExclusionCheck::min, NAMD_bug(), NAMD_ComputeNonbonded_SortAtoms, NAMD_die(), NBWORKARRAY, NBWORKARRAYSINIT, Pairlists::newlist(), Pairlists::newsize(), Pairlists::nextlist(), NOENERGY, CompAtom::nonbondedGroupSize, NOSHORT, Molecule::numAtoms, Node::Object(), PAIR, pairlist_from_pairlist(), CompAtom::partition, plint, CompAtom::position, pp_clamp(), PPROF, Pairlists::reset(), SELF, SHORT, Node::simParameters, simParams, sortEntries_bubbleSort(), sortEntries_mergeSort_v1(), sortEntries_mergeSort_v2(), sortEntries_selectionSort(), SortEntry, __sort_entry::sortValue, TABENERGY, LJTable::table_row(), LJTable::TableEntry::tabletype, TI, SimParameters::usePairlists, CompAtom::vdwType, Vector::x, Vector::y, and Vector::z. 00216 {
00217 // int NAME; // to track errors via unused variable warnings
00218
00219 if ( ComputeNonbondedUtil::commOnly ) return;
00220
00221 #ifdef NAMD_CUDA
00222 NOENERGY(return;)
00223 #endif
00224
00225 // speedup variables
00226 BigReal *reduction = params->reduction;
00227 SimParameters *simParams = Node::Object()->simParameters;
00228
00229 PPROF(
00230 BigReal *pressureProfileReduction = params->pressureProfileReduction;
00231 const BigReal invThickness = 1.0 / pressureProfileThickness;
00232 )
00233
00234 Pairlists &pairlists = *(params->pairlists);
00235 #ifdef NAMD_CUDA
00236 int savePairlists = 0;
00237 int usePairlists = 0;
00238 #else
00239 int savePairlists = params->savePairlists;
00240 int usePairlists = params->usePairlists;
00241 #endif
00242 pairlists.reset();
00243 // PAIR(iout << "--------\n" << endi;)
00244
00245 // local variables
00246 int exclChecksum = 0;
00247 FAST
00248 (
00249 ENERGY( BigReal vdwEnergy = 0; )
00250 SHORT
00251 (
00252 ENERGY( BigReal electEnergy = 0; )
00253 )
00254
00255 FEP
00256 (
00257 ENERGY( BigReal vdwEnergy_s = 0; )
00258 SHORT
00259 (
00260 ENERGY( BigReal electEnergy_s = 0; )
00261 )
00262 )
00263
00264 SHORT
00265 (
00266 BigReal virial_xx = 0;
00267 BigReal virial_xy = 0;
00268 BigReal virial_xz = 0;
00269 BigReal virial_yy = 0;
00270 BigReal virial_yz = 0;
00271 BigReal virial_zz = 0;
00272 )
00273 )
00274 FULL
00275 (
00276 ENERGY( BigReal fullElectEnergy = 0; )
00277 FEP
00278 (
00279 ENERGY( BigReal fullElectEnergy_s = 0; )
00280 )
00281 BigReal fullElectVirial_xx = 0;
00282 BigReal fullElectVirial_xy = 0;
00283 BigReal fullElectVirial_xz = 0;
00284 BigReal fullElectVirial_yy = 0;
00285 BigReal fullElectVirial_yz = 0;
00286 BigReal fullElectVirial_zz = 0;
00287 )
00288
00289 // Bringing stuff into local namespace for speed.
00290
00291 const BigReal offset_x = params->offset.x;
00292 const BigReal offset_y = params->offset.y;
00293 const BigReal offset_z = params->offset.z;
00294
00295 register const BigReal plcutoff2 = \
00296 params->plcutoff * params->plcutoff;
00297 register const BigReal groupplcutoff2 = \
00298 params->groupplcutoff * params->groupplcutoff;
00299 const BigReal dielectric_1 = ComputeNonbondedUtil:: dielectric_1;
00300 const LJTable* const ljTable = ComputeNonbondedUtil:: ljTable;
00301 LJTable::TableEntry ljNull; ljNull.A = 0; ljNull.B = 0; TABENERGY(ljNull.tabletype=-1;)
00302 const LJTable::TableEntry* const lj_null_pars = &ljNull;
00303 const Molecule* const mol = ComputeNonbondedUtil:: mol;
00304 SHORT
00305 (
00306 const BigReal* const table_four = ComputeNonbondedUtil:: table_short;
00307 )
00308 FULL
00309 (
00310 SHORT
00311 (
00312 const BigReal* const slow_table = ComputeNonbondedUtil:: slow_table;
00313 )
00314 NOSHORT
00315 (
00316 //#if 1 ALCH(-1)
00317 const BigReal* const table_four = ComputeNonbondedUtil:: table_noshort;
00318 //#else // have to switch this for ALCH
00319 // BigReal* table_four = ComputeNonbondedUtil:: table_noshort;
00320 //#endif
00321 )
00322 )
00323 const BigReal scaling = ComputeNonbondedUtil:: scaling;
00324 const BigReal modf_mod = 1.0 - scale14;
00325 FAST
00326 (
00327 const BigReal switchOn2 = ComputeNonbondedUtil:: switchOn2;
00328 const BigReal c1 = ComputeNonbondedUtil:: c1;
00329 const BigReal c3 = ComputeNonbondedUtil:: c3;
00330 )
00331 const BigReal r2_delta = ComputeNonbondedUtil:: r2_delta;
00332 const int r2_delta_exp = ComputeNonbondedUtil:: r2_delta_exp;
00333 // const int r2_delta_expc = 64 * (r2_delta_exp - 127);
00334 const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
00335
00336 ALCH(
00337 const BigReal switchdist2 = ComputeNonbondedUtil::switchOn2;
00338 const BigReal cutoff2 = ComputeNonbondedUtil::cutoff2;
00339 const BigReal switchfactor = 1./((cutoff2 - switchdist2)*(cutoff2 - switchdist2)*(cutoff2 - switchdist2));
00340 const BigReal alchElecLambdaStart = ComputeNonbondedUtil::alchElecLambdaStart;
00341 const BigReal alchVdwLambdaEnd = ComputeNonbondedUtil::alchVdwLambdaEnd;
00342 const BigReal alchVdwShiftCoeff = ComputeNonbondedUtil::alchVdwShiftCoeff;
00343
00344 /*lambda values 'up' are for atoms scaled up with lambda (partition 1)*/
00345 BigReal lambdaUp = ComputeNonbondedUtil::alchLambda;
00346 BigReal elecLambdaUp = (lambdaUp <= alchElecLambdaStart)? 0. : \
00347 (lambdaUp - alchElecLambdaStart) / (1. - alchElecLambdaStart);
00348 BigReal vdwLambdaUp =
00349 (lambdaUp >= alchVdwLambdaEnd)? 1. : lambdaUp / alchVdwLambdaEnd;
00350 BigReal vdwShiftUp = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambdaUp);
00351 FEP(BigReal lambda2Up = ComputeNonbondedUtil::alchLambda2;)
00352 FEP(BigReal elecLambda2Up = (lambda2Up <= alchElecLambdaStart)? 0. : \
00353 (lambda2Up - alchElecLambdaStart) / (1. - alchElecLambdaStart);)
00354 FEP(BigReal vdwLambda2Up =
00355 (lambda2Up >= alchVdwLambdaEnd)? 1. : lambda2Up / alchVdwLambdaEnd;)
00356 FEP(BigReal vdwShift2Up = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambda2Up);)
00357
00358
00359 /*lambda values 'down' are for atoms scaled down with lambda (partition 2)*/
00360 BigReal lambdaDown = 1 - ComputeNonbondedUtil::alchLambda;
00361 BigReal elecLambdaDown = (lambdaDown <= alchElecLambdaStart)? 0. : \
00362 (lambdaDown - alchElecLambdaStart) / (1. - alchElecLambdaStart);
00363 BigReal vdwLambdaDown =
00364 (lambdaDown >= alchVdwLambdaEnd)? 1. : lambdaDown / alchVdwLambdaEnd;
00365 BigReal vdwShiftDown = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambdaDown);
00366 FEP(BigReal lambda2Down = 1 - ComputeNonbondedUtil::alchLambda2;)
00367 FEP(BigReal elecLambda2Down = (lambda2Down <= alchElecLambdaStart)? 0. : \
00368 (lambda2Down - alchElecLambdaStart) / (1. - alchElecLambdaStart); )
00369 FEP(BigReal vdwLambda2Down =
00370 (lambda2Down >= alchVdwLambdaEnd)? 1. : lambda2Down / alchVdwLambdaEnd; )
00371 FEP(BigReal vdwShift2Down = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambda2Down);)
00372
00373
00374 // Thermodynamic Integration Notes:
00375 // Separation of atom pairs into different pairlist according to lambda
00376 // coupling, for alchemical free energy calculations. Normal pairlists
00377 // (pairlist[nxm]_save) are re-used for non-lambda-coupled pairs; new ones
00378 // (pairlist[nxm][12]_save are created for atoms switched up or down with
00379 // lambda respectively.
00380 // This makes TI coding far easier and more readable, since it's necessary
00381 // to store dU/dlambda in different variables depending on whether atoms are
00382 // being switched up or down. Further, it allows the prior explicit coding of
00383 // the separation-shifted vdW potential to stay in place without a big
00384 // performance hit, since it's no longer necessary to calculate vdW potentials
00385 // explicity for the bulk (non-alchemical) interactions - so that part of the
00386 // free energy code stays readable and easy to modify. Finally there should
00387 // be some performance gain over the old FEP implementation as we no
00388 // longer have to check the partitions of each atom pair and switch
00389 // parameters accordingly for every single NonbondedBase2.h loop - this is
00390 // done at the pairlist level
00391
00392 int pswitchTable[3*3];
00393 // determines which pairlist alchemical pairings get sent to
00394 // 0: non-alchemical pairs, partition 0 <-> partition 0
00395 // 1: atoms scaled up as lambda increases, p0<->p1
00396 // 2: atoms scaled down as lambda increases, p0<->p2
00397 // all p1<->p2 interactions to be dropped (99)
00398 // in general, 'up' refers to 1, 'down' refers to 2
00399 for (int ip=0; ip<3; ++ip) {
00400 for (int jp=0; jp<3; ++jp ) {
00401 pswitchTable[ip+3*jp] = 0;
00402 if ((ip==1 && jp==0) || (ip==0 && jp==1)) pswitchTable[ip+3*jp] = 1;
00403 if ((ip==2 && jp==0) || (ip==0 && jp==2)) pswitchTable[ip+3*jp] = 2;
00404 if (ip + jp == 3) pswitchTable[ip+3*jp] = 99; // no interaction
00405
00406 if (! ComputeNonbondedUtil::alchDecouple) {
00407 // no decoupling: interactions within a partition are treated like
00408 // normal alchemical pairs
00409 if (ip == 1 && jp == 1) pswitchTable[ip+3*jp] = 1;
00410 if (ip == 2 && jp == 2) pswitchTable[ip+3*jp] = 2;
00411 }
00412 if (ComputeNonbondedUtil::alchDecouple) {
00413 // decoupling: PME calculates extra grids so that while PME
00414 // interaction with the full system is switched off, a new PME grid
00415 // containing only alchemical atoms is switched on. Full interactions
00416 // between alchemical atoms are maintained; potentials within one
00417 // partition need not be scaled here.
00418 if (ip == 1 && jp == 1) pswitchTable[ip+3*jp] = 0;
00419 if (ip == 2 && jp == 2) pswitchTable[ip+3*jp] = 0;
00420 }
00421 }
00422 }
00423
00424 // dU/dlambda variables for thermodynamic integration
00425 TI(
00426 BigReal vdwEnergy_ti_1 = 0;
00427 BigReal vdwEnergy_ti_2 = 0;
00428 SHORT(BigReal electEnergy_ti_1 = 0;
00429 BigReal electEnergy_ti_2 = 0;)
00430 FULL(BigReal fullElectEnergy_ti_1 = 0;
00431 BigReal fullElectEnergy_ti_2 = 0;)
00432 )
00433 )
00434
00435
00436 const int i_upper = params->numAtoms[0];
00437 register const int j_upper = params->numAtoms[1];
00438 register int j;
00439 register int i;
00440 const CompAtom *p_0 = params->p[0];
00441 const CompAtom *p_1 = params->p[1];
00442 const CompAtomExt *pExt_0 = params->pExt[0];
00443 const CompAtomExt *pExt_1 = params->pExt[1];
00444
00445 char * excl_flags_buff = 0;
00446 const int32 * full_excl = 0;
00447 const int32 * mod_excl = 0;
00448
00449 plint *pairlistn_save; int npairn;
00450 plint *pairlistx_save; int npairx;
00451 plint *pairlistm_save; int npairm;
00452
00453 ALCH(
00454 plint *pairlistnA1_save; int npairnA1;
00455 plint *pairlistxA1_save; int npairxA1;
00456 plint *pairlistmA1_save; int npairmA1;
00457 plint *pairlistnA2_save; int npairnA2;
00458 plint *pairlistxA2_save; int npairxA2;
00459 plint *pairlistmA2_save; int npairmA2;
00460 )
00461
00462 NBWORKARRAYSINIT(params->workArrays);
00463
00464 int arraysize = j_upper+5;
00465
00466 NBWORKARRAY(plint,pairlisti,arraysize)
00467 NBWORKARRAY(BigReal,r2list,arraysize)
00468
00469 union { double f; int32 i[2]; } byte_order_test;
00470 byte_order_test.f = 1.0; // should occupy high-order bits only
00471 int32 *r2iilist = (int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
00472
00473 if ( ! ( savePairlists || ! usePairlists ) ) arraysize = 0;
00474
00475
00476 // DMK - Atom Sort
00477 //
00478 // Basic Idea: Determine the line between the center of masses of
00479 // the two patches. Project and then sort the lists of atoms
00480 // along this line. Then, as the pairlists are being generated
00481 // for the atoms in the first atom list, use the sorted
00482 // list to only add atoms from the second list that are between
00483 // +/- ~cutoff from the atoms position on the line.
00484 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00485
00486 // NOTE: For the first atom list, just the sort values themselves need to be
00487 // calculated (a BigReal vs. SortEntry data structure). However, a second
00488 // SortEntry array is needed to perform the merge sort on the second list of
00489 // atoms. Use the atomSort_[0/1]_sortValues__ arrays to sort the second
00490 // list of atoms and then use the left over array to make a version of the
00491 // list that only includes non-fixed groups (fixg).
00492
00493 NBWORKARRAY(SortEntry, atomSort_0_sortValues__, arraysize);
00494 NBWORKARRAY(SortEntry, atomSort_1_sortValues__, arraysize);
00495 NBWORKARRAY(BigReal, p_0_sortValues, arraysize);
00496
00497 register SortEntry* p_1_sortValues = atomSort_0_sortValues__;
00498 register SortEntry* p_1_sortValues_fixg = atomSort_1_sortValues__;
00499
00500 int p_0_sortValues_len = 0;
00501 int p_1_sortValues_len = 0;
00502 int p_1_sortValues_fixg_len = 0;
00503
00504 // Calculate the distance between to projected points on the line that
00505 // represents the cutoff distance.
00506 BigReal atomSort_windowRadius = sqrt(groupplcutoff2);
00507
00508 if (savePairlists || !usePairlists) {
00509
00510 register const BigReal projLineVec_x = params->projLineVec.x;
00511 register const BigReal projLineVec_y = params->projLineVec.y;
00512 register const BigReal projLineVec_z = params->projLineVec.z;
00513
00514 // Calculate the sort values for the atoms in patch 1
00515 {
00516 register int j = 0;
00517 register int nbgs = p_1->nonbondedGroupSize;
00518 register BigReal p_x = p_1->position.x;
00519 register BigReal p_y = p_1->position.y;
00520 register BigReal p_z = p_1->position.z;
00521 register int index = j;
00522
00523 while (j < j_upper) {
00524
00525 j += nbgs;
00526
00527 // Set p_j_next to point to the atom for the next iteration and begin
00528 // loading the 'nbgs' value for that atom.
00529 register const CompAtom* p_j_next = p_1 + j;
00530 nbgs = p_j_next->nonbondedGroupSize;
00531
00532 // Calculate the distance along the projection vector
00533 // NOTE: If the vector from the origin to the point is 'A' and the vector
00534 // between the patches is 'B' then to project 'A' onto 'B' we take the dot
00535 // product of the two vectors 'A dot B' divided by the length of 'B'.
00536 // So... projection of A onto B = (A dot B) / length(B), however, note
00537 // that length(B) == 1, therefore the sort value is simply (A dot B)
00538 register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00539
00540 // Start loading the next iteration's atom's position
00541 p_x = p_j_next->position.x;
00542 p_y = p_j_next->position.y;
00543 p_z = p_j_next->position.z;
00544
00545 // Store the caclulated sort value into the array of sort values
00546 register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
00547 p_1_sortValStorePtr->index = index;
00548 p_1_sortValStorePtr->sortValue = sortVal;
00549 p_1_sortValues_len++;
00550
00551 // Update index for the next iteration
00552 index = j;
00553
00554 } // end while (j < j_upper)
00555 }
00556
00557 // NOTE: This list and another version of it with only non-fixed
00558 // atoms will be used in place of grouplist and fixglist.
00559 #if 0 // Selection Sort
00560 sortEntries_selectionSort(p_1_sortValues, p_1_sortValues_len);
00561 #elif 0 // Bubble Sort
00562 sortEntries_bubbleSort(p_1_sortValues, p_1_sortValues_len);
00563 #else // Merge Sort
00564 #if NAMD_ComputeNonbonded_SortAtoms_LessBranches == 0
00565 sortEntries_mergeSort_v1(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
00566 #else
00567 sortEntries_mergeSort_v2(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
00568 #endif
00569 #endif
00570
00571 // Calculate the sort values for the atoms in patch 0
00572 {
00573 register int i = 0;
00574 register int nbgs = p_0->nonbondedGroupSize;
00575 register BigReal p_x = p_0->position.x + offset_x;
00576 register BigReal p_y = p_0->position.y + offset_y;
00577 register BigReal p_z = p_0->position.z + offset_z;
00578 register int index = i;
00579
00580 while (i < i_upper) {
00581
00582 i += nbgs;
00583
00584 // Set p_i_next to point to the atom for the next iteration and begin
00585 // loading the 'nbgs' value for that atom.
00586 register const CompAtom* p_i_next = p_0 + i;
00587 nbgs = p_i_next->nonbondedGroupSize;
00588
00589 // Calculate the distance along the projection vector
00590 register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00591
00592 // Start loading the next iteration's atom's position
00593 p_x = p_i_next->position.x + offset_x;
00594 p_y = p_i_next->position.y + offset_y;
00595 p_z = p_i_next->position.z + offset_z;
00596
00597 // Store the calculated sort value into the array of sort values
00598 register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
00599 *p_0_sortValStorePtr = sortVal;
00600
00601 // Update index for the next iteration
00602 index = i;
00603 }
00604
00605 p_0_sortValues_len = i_upper;
00606 }
00607
00608 } // end if (savePairlists || !usePairlists)
00609
00610 #endif // NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00611
00612 // Atom Sort : The grouplist and fixglist arrays are not needed when the
00613 // the atom sorting code is in use.
00614 #if ! (NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) )
00615 NBWORKARRAY(plint,grouplist,arraysize);
00616 NBWORKARRAY(plint,fixglist,arraysize);
00617 #endif
00618
00619 NBWORKARRAY(plint,goodglist,arraysize);
00620 NBWORKARRAY(plint,pairlistx,arraysize);
00621 NBWORKARRAY(plint,pairlistm,arraysize);
00622 NBWORKARRAY(plint,pairlist,arraysize);
00623 NBWORKARRAY(plint,pairlist2,arraysize);
00624 ALCH(
00625 NBWORKARRAY(plint,pairlistnAlch,arraysize);
00626 NBWORKARRAY(plint,pairlistnA0,arraysize);
00627 NBWORKARRAY(plint,pairlistnA1,arraysize);
00628 NBWORKARRAY(plint,pairlistxA1,arraysize);
00629 NBWORKARRAY(plint,pairlistmA1,arraysize);
00630 NBWORKARRAY(plint,pairlistnA2,arraysize);
00631 NBWORKARRAY(plint,pairlistxA2,arraysize);
00632 NBWORKARRAY(plint,pairlistmA2,arraysize);
00633 )
00634
00635 NBWORKARRAY(short,vdwtype_array,j_upper+5);
00636 for (j = 0; j < j_upper; ++j)
00637 vdwtype_array [j] = p_1[j].vdwType;
00638
00639 int fixg_upper = 0;
00640 int g_upper = 0;
00641
00642 if ( savePairlists || ! usePairlists ) {
00643
00644 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00645
00646 // Create a sorted list of non-fixed groups
00647 register int fixg = 0;
00648 for (int tmpI = 0; tmpI < p_1_sortValues_len; tmpI++) {
00649 register SortEntry* p_1_sortEntry = p_1_sortValues + tmpI;
00650 register int p_1_index = p_1_sortEntry->index;
00651 if (!pExt_1[p_1_index].groupFixed) {
00652 register SortEntry* p_1_sortEntry_fixg = p_1_sortValues_fixg + p_1_sortValues_fixg_len;
00653 p_1_sortEntry_fixg->index = p_1_sortEntry->index;
00654 p_1_sortEntry_fixg->sortValue = p_1_sortEntry->sortValue;
00655 p_1_sortValues_fixg_len++;
00656 }
00657 }
00658
00659 #else
00660
00661 register int g = 0;
00662 for ( j = 0; j < j_upper; ++j ) {
00663 if ( p_1[j].nonbondedGroupSize ) {
00664 grouplist[g++] = j;
00665 }
00666 }
00667 g_upper = g;
00668 if ( g_upper ) grouplist[g_upper] = grouplist[g_upper-1];
00669 int fixg = 0;
00670
00671 if ( fixedAtomsOn ) {
00672 for ( g = 0; g < g_upper; ++g ) {
00673 j = grouplist[g];
00674 if ( ! pExt_1[j].groupFixed ) {
00675 fixglist[fixg++] = j;
00676 }
00677 }
00678 }
00679
00680 fixg_upper = fixg;
00681 if ( fixg_upper ) fixglist[fixg_upper] = fixglist[fixg_upper-1];
00682
00683 #endif // NAMD_ComputeNonbonded_SortAtoms != 0
00684
00685 *(pairlists.newlist(1)) = i_upper;
00686 pairlists.newsize(1);
00687
00688 } else { // if ( savePairlists || ! usePairlists )
00689
00690 plint *i_upper_check;
00691 int i_upper_check_count;
00692 pairlists.nextlist(&i_upper_check,&i_upper_check_count);
00693 if ( i_upper_check[0] != i_upper )
00694 NAMD_bug("pairlist i_upper mismatch!");
00695
00696 } // if ( savePairlists || ! usePairlists )
00697
00698 SELF(
00699 int j_hgroup = 0;
00700 int g_lower = 0;
00701 int fixg_lower = 0;
00702 )
00703 int pairlistindex=0;
00704 int pairlistoffset=0;
00705
00706
00707
00708 #if ( SHORT( FAST( 1+ ) ) 0 )
00709 #if ( PAIR( 1+ ) 0 )
00710 Force *f_0 = params->ff[0];
00711 Force *f_1 = params->ff[1];
00712 #else
00713 #define f_1 f_0
00714 NBWORKARRAY(Force,f_0,i_upper)
00715 memset( (void*) f_0, 0, i_upper * sizeof(Force) );
00716 #endif
00717 #endif
00718 #if ( FULL( 1+ ) 0 )
00719 #if ( PAIR( 1+ ) 0 )
00720 Force *fullf_0 = params->fullf[0];
00721 Force *fullf_1 = params->fullf[1];
00722 #else
00723 #define fullf_1 fullf_0
00724 NBWORKARRAY(Force,fullf_0,i_upper);
00725 memset( (void*) fullf_0, 0, i_upper * sizeof(Force) );
00726 #endif
00727 #endif
00728
00729
00730 int numParts = params->numParts;
00731 int myPart = params->minPart;
00732 int groupCount = 0;
00733
00734 for ( i = 0; i < (i_upper SELF(- 1)); ++i )
00735 {
00736 const CompAtom &p_i = p_0[i];
00737 const CompAtomExt &pExt_i = pExt_0[i];
00738 if ( p_i.hydrogenGroupSize ) {
00739 //save current group count
00740 int curgrpcount = groupCount;
00741 //increment group count
00742 groupCount ++;
00743
00744 if (groupCount >= numParts)
00745 groupCount = 0;
00746
00747 if ( curgrpcount != myPart ) {
00748 i += p_i.hydrogenGroupSize - 1;
00749
00750 //Power PC alignment constraint
00751 #ifdef ARCH_POWERPC
00752 __dcbt((void *) &(p_0[i+1]));
00753 #endif
00754 continue;
00755 }
00756 }
00757
00758 register const BigReal p_i_x = p_i.position.x + offset_x;
00759 register const BigReal p_i_y = p_i.position.y + offset_y;
00760 register const BigReal p_i_z = p_i.position.z + offset_z;
00761
00762 ALCH(const int p_i_partition = p_i.partition;)
00763
00764 PPROF(
00765 const int p_i_partition = p_i.partition;
00766 int n1 = (int)floor((p_i.position.z-pressureProfileMin)*invThickness);
00767 pp_clamp(n1, pressureProfileSlabs);
00768 )
00769
00770 SELF ( if ( p_i.hydrogenGroupSize ) j_hgroup = i + p_i.hydrogenGroupSize; )
00771
00772 if ( savePairlists || ! usePairlists ) {
00773
00774 if ( ! savePairlists ) pairlists.reset(); // limit space usage
00775
00776 #ifdef MEM_OPT_VERSION
00777 const ExclusionCheck *exclcheck = mol->get_excl_check_for_idx(pExt_i.exclId);
00778 const int excl_min = pExt_i.id + exclcheck->min;
00779 const int excl_max = pExt_i.id + exclcheck->max;
00780 #else
00781 const ExclusionCheck *exclcheck = mol->get_excl_check_for_atom(pExt_i.id);
00782 const int excl_min = exclcheck->min;
00783 const int excl_max = exclcheck->max;
00784 #endif
00785 const char * excl_flags_var;
00786 if ( exclcheck->flags ) excl_flags_var = exclcheck->flags - excl_min;
00787 else { // need to build list on the fly
00788
00789 //TODO: Should change later!!!!!!!!!! --Chao Mei
00790 //Now just for the sake of passing compilation
00791 #ifndef MEM_OPT_VERSION
00792 if ( excl_flags_buff ) {
00793 int nl,l;
00794 nl = full_excl[0] + 1;
00795 for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = 0;
00796 nl = mod_excl[0] + 1;
00797 for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = 0;
00798 } else {
00799 excl_flags_buff = new char[mol->numAtoms];
00800 memset( (void*) excl_flags_buff, 0, mol->numAtoms);
00801 }
00802 int nl,l;
00803 full_excl = mol->get_full_exclusions_for_atom(pExt_i.id);
00804 nl = full_excl[0] + 1;
00805 for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = EXCHCK_FULL;
00806 mod_excl = mol->get_mod_exclusions_for_atom(pExt_i.id);
00807 nl = mod_excl[0] + 1;
00808 for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = EXCHCK_MOD;
00809 excl_flags_var = excl_flags_buff;
00810 #endif
00811
00812 }
00813 const char * const excl_flags = excl_flags_var;
00814
00815 if ( p_i.nonbondedGroupSize ) {
00816
00817 pairlistindex = 0; // initialize with 0 elements
00818 pairlistoffset=0;
00819 const int groupfixed = ( fixedAtomsOn && pExt_i.groupFixed );
00820
00821 // If patch divisions are not made by hydrogen groups, then
00822 // hydrogenGroupSize is set to 1 for all atoms. Thus we can
00823 // carry on as if we did have groups - only less efficiently.
00824 // An optimization in this case is to not rebuild the temporary
00825 // pairlist but to include every atom in it. This should be a
00826 // a very minor expense.
00827
00828 SELF
00829 (
00830 if ( p_i.hydrogenGroupSize ) {
00831 // exclude child hydrogens of i
00832 // j_hgroup = i + p_i.hydrogenGroupSize; (moved above)
00833 while ( g_lower < g_upper &&
00834 grouplist[g_lower] < j_hgroup ) ++g_lower;
00835 while ( fixg_lower < fixg_upper &&
00836 fixglist[fixg_lower] < j_hgroup ) ++fixg_lower;
00837 }
00838 // add all child or sister hydrogens of i
00839 for ( j = i + 1; j < j_hgroup; ++j ) {
00840 pairlist[pairlistindex++] = j;
00841 }
00842 )
00843
00844 // add remaining atoms to pairlist via hydrogen groups
00845 register plint *pli = pairlist + pairlistindex;
00846
00847 {
00848 // Atom Sort : Modify the values of g and gu based on the added information
00849 // of the linear projections (sort values) information.
00850 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00851
00852 register int g = 0;
00853 register BigReal p_i_sortValue = p_0_sortValues[i];
00854 const BigReal p_i_sortValue_plus_windowRadius = p_i_sortValue + atomSort_windowRadius;
00855 register SortEntry* sortValues = ( groupfixed ? p_1_sortValues_fixg : p_1_sortValues );
00856
00857 // Find the actual gu (upper bound in sorted list for this outer-loop atom) based on the sort values
00858 register int lower = 0;
00859 register int upper = (groupfixed ? p_1_sortValues_fixg_len : p_1_sortValues_len);
00860 while ((upper - lower) > 1) {
00861 register int j = ((lower + upper) >> 1);
00862 register BigReal jSortVal = sortValues[j].sortValue;
00863 if (jSortVal < p_i_sortValue_plus_windowRadius) {
00864 lower = j;
00865 } else {
00866 upper = j;
00867 }
00868 }
00869 const int gu = (sortValues[lower].sortValue >= p_i_sortValue_plus_windowRadius) ? lower : upper;
00870
00871 #else
00872
00873 register plint *gli = goodglist;
00874 const plint *glist = ( groupfixed ? fixglist : grouplist );
00875 SELF( const int gl = ( groupfixed ? fixg_lower : g_lower ); )
00876 const int gu = ( groupfixed ? fixg_upper : g_upper );
00877 register int g = PAIR(0) SELF(gl);
00878
00879 #endif
00880
00881
00882 if ( g < gu ) {
00883 int hu = 0;
00884 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
00885 if ( gu - g > 6 ) {
00886
00887 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00888 register SortEntry* sortEntry0 = sortValues + g;
00889 register SortEntry* sortEntry1 = sortValues + g + 1;
00890 register int jprev0 = sortEntry0->index;
00891 register int jprev1 = sortEntry1->index;
00892 #else
00893 register int jprev0 = glist[g ];
00894 register int jprev1 = glist[g + 1];
00895 #endif
00896
00897 register int j0;
00898 register int j1;
00899
00900 __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
00901 __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
00902 __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
00903
00904 // these don't change here, so we could move them into outer scope
00905 const __m128d P_I_X = _mm_set1_pd(p_i_x);
00906 const __m128d P_I_Y = _mm_set1_pd(p_i_y);
00907 const __m128d P_I_Z = _mm_set1_pd(p_i_z);
00908
00909 g += 2;
00910 for ( ; g < gu - 2; g +=2 ) {
00911 // compute 1d distance, 2-way parallel
00912 j0 = jprev0;
00913 j1 = jprev1;
00914
00915 __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
00916 __m128d R2_01 = _mm_mul_pd(T_01, T_01);
00917 T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
00918 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
00919 T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
00920 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
00921
00922 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00923 sortEntry0 = sortValues + g;
00924 sortEntry1 = sortValues + g + 1;
00925 jprev0 = sortEntry0->index;
00926 jprev1 = sortEntry1->index;
00927 #else
00928 jprev0 = glist[g ];
00929 jprev1 = glist[g+1];
00930 #endif
00931
00932 PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
00933 PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
00934 PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
00935
00936 __align(16) double r2_01[2];
00937 _mm_store_pd(r2_01, R2_01); // 16-byte-aligned store
00938
00939 // XXX these could possibly benefit from SSE-based conditionals
00940 bool test0 = ( r2_01[0] < groupplcutoff2 );
00941 bool test1 = ( r2_01[1] < groupplcutoff2 );
00942
00943 //removing ifs benefits on many architectures
00944 //as the extra stores will only warm the cache up
00945 goodglist [ hu ] = j0;
00946 goodglist [ hu + test0 ] = j1;
00947
00948 hu += test0 + test1;
00949 }
00950 g-=2;
00951 }
00952 #else
00953 if ( gu - g > 6 ) {
00954
00955 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00956 register SortEntry* sortEntry0 = sortValues + g;
00957 register SortEntry* sortEntry1 = sortValues + g + 1;
00958 register int jprev0 = sortEntry0->index;
00959 register int jprev1 = sortEntry1->index;
00960 #else
00961 register int jprev0 = glist[g ];
00962 register int jprev1 = glist[g + 1];
00963 #endif
00964
00965 register int j0;
00966 register int j1;
00967
00968 register BigReal pj_x_0, pj_x_1;
00969 register BigReal pj_y_0, pj_y_1;
00970 register BigReal pj_z_0, pj_z_1;
00971 register BigReal t_0, t_1, r2_0, r2_1;
00972
00973 pj_x_0 = p_1[jprev0].position.x;
00974 pj_x_1 = p_1[jprev1].position.x;
00975
00976 pj_y_0 = p_1[jprev0].position.y;
00977 pj_y_1 = p_1[jprev1].position.y;
00978
00979 pj_z_0 = p_1[jprev0].position.z;
00980 pj_z_1 = p_1[jprev1].position.z;
00981
00982 g += 2;
00983 for ( ; g < gu - 2; g +=2 ) {
00984 // compute 1d distance, 2-way parallel
00985 j0 = jprev0;
00986 j1 = jprev1;
00987
00988 t_0 = p_i_x - pj_x_0;
00989 t_1 = p_i_x - pj_x_1;
00990 r2_0 = t_0 * t_0;
00991 r2_1 = t_1 * t_1;
00992
00993 t_0 = p_i_y - pj_y_0;
00994 t_1 = p_i_y - pj_y_1;
00995 r2_0 += t_0 * t_0;
00996 r2_1 += t_1 * t_1;
00997
00998 t_0 = p_i_z - pj_z_0;
00999 t_1 = p_i_z - pj_z_1;
01000 r2_0 += t_0 * t_0;
01001 r2_1 += t_1 * t_1;
01002
01003 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01004 sortEntry0 = sortValues + g;
01005 sortEntry1 = sortValues + g + 1;
01006 jprev0 = sortEntry0->index;
01007 jprev1 = sortEntry1->index;
01008 #else
01009 jprev0 = glist[g ];
01010 jprev1 = glist[g+1];
01011 #endif
01012
01013 pj_x_0 = p_1[jprev0].position.x;
01014 pj_x_1 = p_1[jprev1].position.x;
01015 pj_y_0 = p_1[jprev0].position.y;
01016 pj_y_1 = p_1[jprev1].position.y;
01017 pj_z_0 = p_1[jprev0].position.z;
01018 pj_z_1 = p_1[jprev1].position.z;
01019
01020 bool test0 = ( r2_0 < groupplcutoff2 );
01021 bool test1 = ( r2_1 < groupplcutoff2 );
01022
01023 //removing ifs benefits on many architectures
01024 //as the extra stores will only warm the cache up
01025 goodglist [ hu ] = j0;
01026 goodglist [ hu + test0 ] = j1;
01027
01028 hu += test0 + test1;
01029 }
01030 g-=2;
01031 }
01032 #endif
01033
01034 for (; g < gu; g++) {
01035
01036 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01037 register SortEntry* sortEntry = sortValues + g;
01038 register int j = sortEntry->index;
01039 #else
01040 int j = glist[g];
01041 #endif
01042
01043 BigReal p_j_x = p_1[j].position.x;
01044 BigReal p_j_y = p_1[j].position.y;
01045 BigReal p_j_z = p_1[j].position.z;
01046
01047 BigReal r2 = p_i_x - p_j_x;
01048 r2 *= r2;
01049 BigReal t2 = p_i_y - p_j_y;
01050 r2 += t2 * t2;
01051 t2 = p_i_z - p_j_z;
01052 r2 += t2 * t2;
01053
01054 if ( r2 <= groupplcutoff2 )
01055 goodglist[hu ++] = j;
01056 }
01057
01058 for ( int h=0; h<hu; ++h ) {
01059 int j = goodglist[h];
01060 int nbgs = p_1[j].nonbondedGroupSize;
01061 pli[0] = j; // copy over the next four in any case
01062 pli[1] = j+1;
01063 pli[2] = j+2;
01064 pli[3] = j+3; // assume nbgs <= 4
01065 pli += nbgs;
01066 }
01067 }
01068 }
01069
01070 pairlistindex = pli - pairlist;
01071 // make sure padded element on pairlist points to real data
01072 if ( pairlistindex ) {
01073 pairlist[pairlistindex] = pairlist[pairlistindex-1];
01074 } /* PAIR( else { // skip empty loops if no pairs were found
01075 int nbgs = p_i.nonbondedGroupSize;
01076 i += nbgs - 1;
01077 continue;
01078 } ) */
01079 } // if i is hydrogen group parent
01080 SELF
01081 (
01082 // self-comparisions require list to be incremented
01083 // pair-comparisions use entire list (pairlistoffset is 0)
01084 else pairlistoffset++;
01085 )
01086
01087 const int atomfixed = ( fixedAtomsOn && pExt_i.atomFixed );
01088
01089 register plint *pli = pairlist2;
01090 #if 1 ALCH(-1)
01091 plint *pairlistn = pairlists.newlist(j_upper + 5 + 1 + 5) SELF(+ 1);
01092 #else
01093 plint *pairlistn = pairlistnAlch;
01094 #endif
01095 SELF( plint &pairlistn_skip = *(pairlistn-1); )
01096 register plint *plin = pairlistn;
01097
01098
01099 INT(
01100 if ( pairInteractionOn ) {
01101 const int ifep_type = p_i.partition;
01102 if (pairInteractionSelf) {
01103 if (ifep_type != 1) continue;
01104 for (int k=pairlistoffset; k<pairlistindex; k++) {
01105 j = pairlist[k];
01106 const int jfep_type = p_1[j].partition;
01107 // for pair-self, both atoms must be in group 1.
01108 if (jfep_type == 1) {
01109 *(pli++) = j;
01110 }
01111 }
01112 } else {
01113 if (ifep_type != 1 && ifep_type != 2) continue;
01114 for (int k=pairlistoffset; k<pairlistindex; k++) {
01115 j = pairlist[k];
01116 const int jfep_type = p_1[j].partition;
01117 // for pair, must have one from each group.
01118 if (ifep_type + jfep_type == 3) {
01119 *(pli++) = j;
01120 }
01121 }
01122 }
01123 int npair2_int = pli - pairlist2;
01124 pli = pairlist2;
01125 for (int k=0; k<npair2_int; k++) {
01126 j = pairlist2[k];
01127 BigReal p_j_x = p_1[j].position.x;
01128 BigReal r2 = p_i_x - p_j_x;
01129 r2 *= r2;
01130 BigReal p_j_y = p_1[j].position.y;
01131 BigReal t2 = p_i_y - p_j_y;
01132 r2 += t2 * t2;
01133 BigReal p_j_z = p_1[j].position.z;
01134 t2 = p_i_z - p_j_z;
01135 r2 += t2 * t2;
01136 if ( ( ! (atomfixed && pExt_1[j].atomFixed) ) && (r2 <= plcutoff2) ) {
01137 int atom2 = pExt_1[j].id;
01138 if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
01139 else *(plin++) = j;
01140 }
01141 }
01142 } else
01143 )
01144 if ( atomfixed ) {
01145 for (int k=pairlistoffset; k<pairlistindex; k++) {
01146 j = pairlist[k];
01147 BigReal p_j_x = p_1[j].position.x;
01148 BigReal r2 = p_i_x - p_j_x;
01149 r2 *= r2;
01150 BigReal p_j_y = p_1[j].position.y;
01151 BigReal t2 = p_i_y - p_j_y;
01152 r2 += t2 * t2;
01153 BigReal p_j_z = p_1[j].position.z;
01154 t2 = p_i_z - p_j_z;
01155 r2 += t2 * t2;
01156 if ( (! pExt_1[j].atomFixed) && (r2 <= plcutoff2) ) {
01157 int atom2 = pExt_1[j].id;
01158 if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
01159 else *(plin++) = j;
01160 }
01161 }
01162 } else {
01163 int k = pairlistoffset;
01164 int ku = pairlistindex;
01165 if ( k < ku ) {
01166 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
01167 if ( ku - k > 6 ) {
01168 register int jprev0 = pairlist [k ];
01169 register int jprev1 = pairlist [k + 1];
01170
01171 register int j0;
01172 register int j1;
01173
01174 __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01175 __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01176 __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01177
01178 // these don't change here, so we could move them into outer scope
01179 const __m128d P_I_X = _mm_set1_pd(p_i_x);
01180 const __m128d P_I_Y = _mm_set1_pd(p_i_y);
01181 const __m128d P_I_Z = _mm_set1_pd(p_i_z);
01182
01183 int atom2_0 = pExt_1[jprev0].id;
01184 int atom2_1 = pExt_1[jprev1].id;
01185
01186 k += 2;
01187 for ( ; k < ku - 2; k +=2 ) {
01188 // compute 1d distance, 2-way parallel
01189 j0 = jprev0;
01190 j1 = jprev1;
01191
01192 __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
01193 __m128d R2_01 = _mm_mul_pd(T_01, T_01);
01194 T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
01195 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01196 T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
01197 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01198
01199 jprev0 = pairlist[k];
01200 jprev1 = pairlist[k+1];
01201
01202 PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01203 PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01204 PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01205
01206 __align(16) double r2_01[2];
01207 _mm_store_pd(r2_01, R2_01); // 16-byte-aligned store
01208
01209 if (r2_01[0] <= plcutoff2) {
01210 if ( atom2_0 >= excl_min && atom2_0 <= excl_max )
01211 *(pli++) = j0;
01212 else
01213 *(plin++) = j0;
01214 }
01215 atom2_0 = pExt_1[jprev0].id;
01216
01217 if (r2_01[1] <= plcutoff2) {
01218 if ( atom2_1 >= excl_min && atom2_1 <= excl_max )
01219 *(pli++) = j1;
01220 else
01221 *(plin++) = j1;
01222 }
01223 atom2_1 = pExt_1[jprev1].id;
01224 }
01225 k-=2;
01226 }
01227 #else
01228 if ( ku - k > 6 ) {
01229 register int jprev0 = pairlist [k];
01230 register int jprev1 = pairlist [k + 1];
01231
01232 register int j0;
01233 register int j1;
01234
01235 register BigReal pj_x_0, pj_x_1;
01236 register BigReal pj_y_0, pj_y_1;
01237 register BigReal pj_z_0, pj_z_1;
01238 register BigReal t_0, t_1, r2_0, r2_1;
01239
01240 pj_x_0 = p_1[jprev0].position.x;
01241 pj_x_1 = p_1[jprev1].position.x;
01242
01243 pj_y_0 = p_1[jprev0].position.y;
01244 pj_y_1 = p_1[jprev1].position.y;
01245
01246 pj_z_0 = p_1[jprev0].position.z;
01247 pj_z_1 = p_1[jprev1].position.z;
01248
01249 int atom2_0 = pExt_1[jprev0].id;
01250 int atom2_1 = pExt_1[jprev1].id;
01251
01252 k += 2;
01253 for ( ; k < ku - 2; k +=2 ) {
01254 // compute 1d distance, 2-way parallel
01255 j0 = jprev0;
01256 j1 = jprev1;
01257
01258 t_0 = p_i_x - pj_x_0;
01259 t_1 = p_i_x - pj_x_1;
01260 r2_0 = t_0 * t_0;
01261 r2_1 = t_1 * t_1;
01262
01263 t_0 = p_i_y - pj_y_0;
01264 t_1 = p_i_y - pj_y_1;
01265 r2_0 += t_0 * t_0;
01266 r2_1 += t_1 * t_1;
01267
01268 t_0 = p_i_z - pj_z_0;
01269 t_1 = p_i_z - pj_z_1;
01270 r2_0 += t_0 * t_0;
01271 r2_1 += t_1 * t_1;
01272
01273 jprev0 = pairlist[k];
01274 jprev1 = pairlist[k+1];
01275
01276 pj_x_0 = p_1[jprev0].position.x;
01277 pj_x_1 = p_1[jprev1].position.x;
01278 pj_y_0 = p_1[jprev0].position.y;
01279 pj_y_1 = p_1[jprev1].position.y;
01280 pj_z_0 = p_1[jprev0].position.z;
01281 pj_z_1 = p_1[jprev1].position.z;
01282
01283 if (r2_0 <= plcutoff2) {
01284 if ( atom2_0 >= excl_min && atom2_0 <= excl_max )
01285 *(pli++) = j0;
01286 else
01287 *(plin++) = j0;
01288 }
01289 atom2_0 = pExt_1[jprev0].id;
01290
01291 if (r2_1 <= plcutoff2) {
01292 if ( atom2_1 >= excl_min && atom2_1 <= excl_max )
01293 *(pli++) = j1;
01294 else
01295 *(plin++) = j1;
01296 }
01297 atom2_1 = pExt_1[jprev1].id;
01298 }
01299 k-=2;
01300 }
01301 #endif
01302
01303 for (; k < ku; k++) {
01304 int j = pairlist[k];
01305 int atom2 = pExt_1[j].id;
01306
01307 BigReal p_j_x = p_1[j].position.x;
01308 BigReal p_j_y = p_1[j].position.y;
01309 BigReal p_j_z = p_1[j].position.z;
01310
01311 BigReal r2 = p_i_x - p_j_x;
01312 r2 *= r2;
01313 BigReal t2 = p_i_y - p_j_y;
01314 r2 += t2 * t2;
01315 t2 = p_i_z - p_j_z;
01316 r2 += t2 * t2;
01317
01318 if (r2 <= plcutoff2) {
01319 if ( atom2 >= excl_min && atom2 <= excl_max )
01320 *(pli++) = j;
01321 else
01322 *(plin++) = j;
01323 }
01324 }
01325 }
01326 }
01327 int npair2 = pli - pairlist2;
01328 if ( npair2 ) pairlist2[npair2] = pairlist2[npair2-1];
01329
01330 plint *plix = pairlistx;
01331 plint *plim = pairlistm;
01332 plint *pln = pairlistn;
01333 ALCH(
01334 plint *plinA1 = pairlistnA1;
01335 plint *plixA1 = pairlistxA1;
01336 plint *plimA1 = pairlistmA1;
01337 plint *plinA2 = pairlistnA2;
01338 plint *plixA2 = pairlistxA2;
01339 plint *plimA2 = pairlistmA2;
01340 )
01341 int k=0;
01342 #if 1 ALCH(-1)
01343 SELF(
01344 for (; pln < plin && *pln < j_hgroup; ++pln) {
01345 *(plix++) = *pln; // --exclChecksum;
01346 }
01347 pairlistn_skip = pln - pairlistn;
01348 for (; k < npair2 && pairlist2[k] < j_hgroup; ++k) {
01349 *(plix++) = pairlist2[k]; // --exclChecksum;
01350 }
01351 )
01352 #endif
01353 ALCH(
01354 SELF(
01355 for (; pln < plin && *pln < j_hgroup; ++pln) {
01356 switch (pswitchTable[4*p_i_partition]) { //p_i_partition + 3*p_1[i].partition
01357 case 0: *(plix++) = *pln; break;
01358 case 1: *(plixA1++) = *pln; break;
01359 case 2: *(plixA2++) = *pln; break;
01360 default: NAMD_die("Alchemical pairlist error"); break;
01361 }
01362 }
01363 for (; k < npair2 && pairlist2[k] < j_hgroup; ++k) {
01364 switch (pswitchTable[4*p_i_partition]) {
01365 case 0: *(plix++) = pairlist2[k]; break;
01366 case 1: *(plixA1++) = pairlist2[k]; break;
01367 case 2: *(plixA2++) = pairlist2[k]; break;
01368 default: NAMD_die("Alchemical pairlist error"); break;
01369 }
01370 }
01371 )
01372 )
01373 for (; k < npair2; ++k ) {
01374 int j = pairlist2[k];
01375 int atom2 = pExt_1[j].id;
01376 int excl_flag = excl_flags[atom2];
01377 ALCH(int pswitch = pswitchTable[p_i_partition + 3*(p_1[j].partition)];)
01378 switch ( excl_flag ALCH( + 3 * pswitch)) {
01379 case 0: *(plin++) = j; break;
01380 case 1: *(plix++) = j; break;
01381 case 2: *(plim++) = j; break;
01382 ALCH(
01383 case 3: *(plinA1++) = j; break;
01384 case 6: *(plinA2++) = j; break;
01385 case 5: *(plimA1++) = j; break;
01386 case 8: *(plimA2++) = j; break;
01387 case 4: *(plixA1++) = j; break;
01388 case 7: *(plixA2++) = j; break;
01389 )
01390 }
01391 }
01392 // exclChecksum += (plix - pairlistx);
01393 // exclChecksum += (plim - pairlistm);
01394
01395
01396 #if 1 ALCH(-1)
01397 npairn = plin - pln;
01398 pairlistn_save = pln;
01399 pairlistn_save[npairn] = npairn ? pairlistn_save[npairn-1] : -1;
01400 pairlists.newsize(plin - pairlistn SELF(+ 1) + 1);
01401 #else
01402 plint *plinA0 = pairlistnA0;
01403 int unsortedNpairn = plin - pln;
01404 for ( k=0; k<unsortedNpairn; ++k ) {
01405 int j = pln[k];
01406 switch(pswitchTable[p_i_partition + 3*(p_1[j].partition)]) {
01407 case 0: *(plinA0++) = j; break;
01408 case 1: *(plinA1++) = j; break;
01409 case 2: *(plinA2++) = j; break;
01410 }
01411 }
01412
01413 npairn = plinA0 - pairlistnA0;
01414 // FB preallocation (incl extra for overhead) seems to be necessary
01415 pairlistn_save = pairlists.newlist(j_upper + 30);
01416 for ( k=0; k<npairn; ++k ) {
01417 pairlistn_save[k] = pairlistnA0[k];
01418 }
01419 pairlistn_save[k] = k ? pairlistn_save[k-1] : -1;
01420 pairlists.newsize(npairn + 1);
01421
01422 #endif
01423
01424 npairx = plix - pairlistx;
01425 pairlistx_save = pairlists.newlist(npairx + 1);
01426 for ( k=0; k<npairx; ++k ) {
01427 pairlistx_save[k] = pairlistx[k];
01428 }
01429 pairlistx_save[k] = k ? pairlistx_save[k-1] : -1;
01430 pairlists.newsize(npairx + 1);
01431
01432 npairm = plim - pairlistm;
01433 pairlistm_save = pairlists.newlist(npairm + 1);
01434 for ( k=0; k<npairm; ++k ) {
01435 pairlistm_save[k] = pairlistm[k];
01436 }
01437 pairlistm_save[k] = k ? pairlistm_save[k-1] : -1;
01438 pairlists.newsize(npairm + 1);
01439
01440
01441 #if 0 ALCH(+1)
01442 #define PAIRLISTFROMARRAY(NPAIRS,PL1,PL2,PLSAVE) \
01443 { \
01444 (NPAIRS) = (PL2) - (PL1); \
01445 (PLSAVE) = pairlists.newlist((NPAIRS) + 1); \
01446 for ( k=0; k<(NPAIRS); ++k ) { \
01447 (PLSAVE)[k] = (PL1)[k]; \
01448 } \
01449 (PLSAVE)[k] = k ? (PLSAVE)[k-1] : -1; \
01450 pairlists.newsize((NPAIRS) + 1); \
01451 }
01452
01453 PAIRLISTFROMARRAY(npairnA1,pairlistnA1,plinA1,pairlistnA1_save);
01454 PAIRLISTFROMARRAY(npairxA1,pairlistxA1,plixA1,pairlistxA1_save);
01455 PAIRLISTFROMARRAY(npairmA1,pairlistmA1,plimA1,pairlistmA1_save);
01456 PAIRLISTFROMARRAY(npairnA2,pairlistnA2,plinA2,pairlistnA2_save);
01457 PAIRLISTFROMARRAY(npairxA2,pairlistxA2,plixA2,pairlistxA2_save);
01458 PAIRLISTFROMARRAY(npairmA2,pairlistmA2,plimA2,pairlistmA2_save);
01459 #undef PAIRLISTFROMARRAY
01460
01461 #endif
01462
01463
01464 // PAIR( iout << i << " " << i_upper << " save\n" << endi;)
01465 } else { // if ( savePairlists || ! usePairlists )
01466 // PAIR( iout << i << " " << i_upper << " use\n" << endi;)
01467
01468 pairlists.nextlist(&pairlistn_save,&npairn); --npairn;
01469 //if ( npairn > 1000 )
01470 // iout << i << " " << i_upper << " " << npairn << " n\n" << endi;
01471 #if 1 ALCH(-1)
01472 SELF(
01473 int pairlistn_skip = *pairlistn_save;
01474 pairlistn_save += (pairlistn_skip + 1);
01475 npairn -= (pairlistn_skip + 1);
01476 )
01477 #endif
01478
01479 pairlists.nextlist(&pairlistx_save,&npairx); --npairx;
01480
01481 //if ( npairx > 1000 )
01482 // iout << i << " " << i_upper << " " << npairx << " x\n" << endi;
01483 // exclChecksum += npairx;
01484 pairlists.nextlist(&pairlistm_save,&npairm); --npairm;
01485 ALCH(
01486 pairlists.nextlist(&pairlistnA1_save,&npairnA1); --npairnA1;
01487 pairlists.nextlist(&pairlistxA1_save,&npairxA1); --npairxA1;
01488 pairlists.nextlist(&pairlistmA1_save,&npairmA1); --npairmA1;
01489 pairlists.nextlist(&pairlistnA2_save,&npairnA2); --npairnA2;
01490 pairlists.nextlist(&pairlistxA2_save,&npairxA2); --npairxA2;
01491 pairlists.nextlist(&pairlistmA2_save,&npairmA2); --npairmA2;
01492 )
01493 //if ( npairm > 1000 )
01494 // iout << i << " " << i_upper << " " << npairm << " m\n" << endi;
01495 // exclChecksum += npairm;
01496
01497 } // if ( savePairlists || ! usePairlists )
01498
01499 LES( BigReal *lambda_table_i =
01500 lambda_table + (lesFactor+1) * p_i.partition; )
01501
01502 INT(
01503 const BigReal force_sign = (
01504 ( pairInteractionOn && ! pairInteractionSelf ) ?
01505 ( ( p_i.partition == 1 ) ? 1. : -1. ) : 0.
01506 );
01507
01508 )
01509
01510 const BigReal kq_i = COLOUMB * p_i.charge * scaling * dielectric_1;
01511 const LJTable::TableEntry * const lj_row =
01512 ljTable->table_row(p_i.vdwType);
01513
01514 SHORT( FAST( BigReal f_i_x = 0.; ) )
01515 SHORT( FAST( BigReal f_i_y = 0.; ) )
01516 SHORT( FAST( BigReal f_i_z = 0.; ) )
01517 FULL( BigReal fullf_i_x = 0.; )
01518 FULL( BigReal fullf_i_y = 0.; )
01519 FULL( BigReal fullf_i_z = 0.; )
01520
01521 int npairi;
01522 int k;
01523
01524 npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01525 p_i_x, p_i_y, p_i_z, p_1, pairlistn_save, npairn, pairlisti,
01526 r2_delta, r2list);
01527
01528 #define NORMAL(X) X
01529 #define EXCLUDED(X)
01530 #define MODIFIED(X)
01531 #include "ComputeNonbondedBase2.h"
01532 #undef NORMAL
01533 #undef EXCLUDED
01534 #undef MODIFIED
01535
01536 npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01537 p_i_x, p_i_y, p_i_z, p_1, pairlistm_save, npairm, pairlisti,
01538 r2_delta, r2list);
01539 exclChecksum += npairi;
01540
01541 #define NORMAL(X)
01542 #define EXCLUDED(X)
01543 #define MODIFIED(X) X
01544 #include "ComputeNonbondedBase2.h"
01545 #undef NORMAL
01546 #undef EXCLUDED
01547 #undef MODIFIED
01548
01549 #ifdef FULLELECT
01550 npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01551 p_i_x, p_i_y, p_i_z, p_1, pairlistx_save, npairx, pairlisti,
01552 r2_delta, r2list);
01553 exclChecksum += npairi;
01554 #ifndef NAMD_CUDA
01555 SELF(
01556 for (k=0; k<npairi && pairlisti[k] < j_hgroup; ++k) --exclChecksum;
01557 )
01558 #endif
01559
01560 #undef FAST
01561 #define FAST(X)
01562 #define NORMAL(X)
01563 #define EXCLUDED(X) X
01564 #define MODIFIED(X)
01565 #include "ComputeNonbondedBase2.h"
01566 #undef FAST
01567 #ifdef SLOWONLY
01568 #define FAST(X)
01569 #else
01570 #define FAST(X) X
01571 #endif
01572 #undef NORMAL
01573 #undef EXCLUDED
01574 #undef MODIFIED
01575 #else
01576 exclChecksum += npairx;
01577 #ifndef NAMD_CUDA
01578 SELF(
01579 for (k=0; k<npairx && pairlistx_save[k] < j_hgroup; ++k) --exclChecksum;
01580 )
01581 #endif
01582 #endif
01583
01584 #if 0 ALCH(+1) // nonbondedbase2 for alchemical-separeted pairlists
01585
01586 #undef ALCHPAIR
01587 #define ALCHPAIR(X) X
01588 #undef NOT_ALCHPAIR
01589 #define NOT_ALCHPAIR(X)
01590 BigReal myLambda; FEP(BigReal myLambda2;)
01591 BigReal myElecLambda; FEP(BigReal myElecLambda2;)
01592 BigReal myVdwLambda; FEP(BigReal myVdwLambda2;)
01593 BigReal myVdwShift; FEP(BigReal myVdwShift2;)
01594 BigReal alch_vdw_energy; BigReal alch_vdw_force;
01595 FEP(BigReal alch_vdw_energy_2;) TI(BigReal alch_vdw_dUdl;)
01596 BigReal shiftedElec; BigReal shiftedElecForce;
01597
01598 /********************************************************************/
01599 /*******NONBONDEDBASE2 FOR NORMAL INTERACTIONS SCALED BY LAMBDA******/
01600 /********************************************************************/
01601 #define NORMAL(X) X
01602 #define EXCLUDED(X)
01603 #define MODIFIED(X)
01604
01605 #define ALCH1(X) X
01606 #define ALCH2(X)
01607 npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01608 p_i_x, p_i_y, p_i_z, p_1, pairlistnA1_save, npairnA1, pairlisti,
01609 r2_delta, r2list);
01610 #include "ComputeNonbondedBase2.h" // normal, direction 'up'
01611 #undef ALCH1
01612 #undef ALCH2
01613
01614 #define ALCH1(X)
01615 #define ALCH2(X) X
01616 npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01617 p_i_x, p_i_y, p_i_z, p_1, pairlistnA2_save, npairnA2, pairlisti,
01618 r2_delta, r2list);
01619 #include "ComputeNonbondedBase2.h" // normal, direction 'down'
01620 #undef ALCH1
01621 #undef ALCH2
01622
01623 #undef NORMAL
01624 #undef EXCLUDED
01625 #undef MODIFIED
01626
01627 /********************************************************************/
01628 /******NONBONDEDBASE2 FOR MODIFIED INTERACTIONS SCALED BY LAMBDA*****/
01629 /********************************************************************/
01630 #define NORMAL(X)
01631 #define EXCLUDED(X)
01632 #define MODIFIED(X) X
01633
01634 #define ALCH1(X) X
01635 #define ALCH2(X)
01636 npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01637 p_i_x, p_i_y, p_i_z, p_1, pairlistmA1_save, npairmA1, pairlisti,
01638 r2_delta, r2list);
01639 exclChecksum += npairi;
01640 #include "ComputeNonbondedBase2.h" // modified, direction 'up'
01641 #undef ALCH1
01642 #undef ALCH2
01643
01644 #define ALCH1(X)
01645 #define ALCH2(X) X
01646 npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01647 p_i_x, p_i_y, p_i_z, p_1, pairlistmA2_save, npairmA2, pairlisti,
01648 r2_delta, r2list);
01649 exclChecksum += npairi;
01650 #include "ComputeNonbondedBase2.h" // modified, direction 'down'
01651 #undef ALCH1
01652 #undef ALCH2
01653
01654 #undef NORMAL
01655 #undef EXCLUDED
01656 #undef MODIFIED
01657
01658 /********************************************************************/
01659 /******NONBONDEDBASE2 FOR EXCLUDED INTERACTIONS SCALED BY LAMBDA*****/
01660 /********************************************************************/
01661 #ifdef FULLELECT
01662 #undef FAST
01663 #define FAST(X)
01664 #define NORMAL(X)
01665 #define EXCLUDED(X) X
01666 #define MODIFIED(X)
01667
01668 #define ALCH1(X) X
01669 #define ALCH2(X)
01670 npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01671 p_i_x, p_i_y, p_i_z, p_1, pairlistxA1_save, npairxA1, pairlisti,
01672 r2_delta, r2list);
01673 exclChecksum += npairi;
01674 SELF(
01675 for (k=0; k<npairi && pairlisti[k] < j_hgroup; ++k) --exclChecksum;
01676 )
01677 #include "ComputeNonbondedBase2.h" //excluded, direction 'up'
01678 #undef ALCH1
01679 #undef ALCH2
01680
01681 #define ALCH1(X)
01682 #define ALCH2(X) X
01683 npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01684 p_i_x, p_i_y, p_i_z, p_1, pairlistxA2_save, npairxA2, pairlisti,
01685 r2_delta, r2list);
01686 exclChecksum += npairi;
01687 SELF(
01688 for (k=0; k<npairi && pairlisti[k] < j_hgroup; ++k) --exclChecksum;
01689 )
01690 #include "ComputeNonbondedBase2.h" //excluded, direction 'down'
01691 #undef ALCH1
01692 #undef ALCH2
01693
01694 #undef FAST
01695 #ifdef SLOWONLY
01696 #define FAST(X)
01697 #else
01698 #define FAST(X) X
01699 #endif
01700 #undef NORMAL
01701 #undef EXCLUDED
01702 #undef MODIFIED
01703 #else
01704 exclChecksum += npairxA1 + npairxA2;
01705 SELF(
01706 for (k=0; k<npairxA1 && pairlistxA1_save[k] < j_hgroup; ++k) --exclChecksum;
01707 for (k=0; k<npairxA2 && pairlistxA2_save[k] < j_hgroup; ++k) --exclChecksum;
01708 )
01709 #endif
01710
01711 #undef ALCHPAIR
01712 #define ALCHPAIR(X)
01713 #undef NOT_ALCHPAIR
01714 #define NOT_ALCHPAIR(X) X
01715
01716 #endif // end nonbondedbase2.h includes for alchemical pairlists
01717
01718 #ifdef NETWORK_PROGRESS
01719 CkNetworkProgress();
01720 #endif
01721
01722 #ifdef ARCH_POWERPC
01723 //data cache block touch the position structure
01724 __dcbt ((void *) &(p_0[i+1]));
01725 __prefetch_by_load ((void *)&(groupCount));
01726 #endif
01727
01728 #ifndef NAMD_CUDA
01729 SHORT( FAST( f_0[i].x += f_i_x; ) )
01730 SHORT( FAST( f_0[i].y += f_i_y; ) )
01731 SHORT( FAST( f_0[i].z += f_i_z; ) )
01732 FULL( fullf_0[i].x += fullf_i_x; )
01733 FULL( fullf_0[i].y += fullf_i_y; )
01734 FULL( fullf_0[i].z += fullf_i_z; )
01735 #endif
01736
01737 // PAIR( iout << i << " " << i_upper << " end\n" << endi;)
01738 } // for i
01739
01740 // PAIR(iout << "++++++++\n" << endi;)
01741
01742 #ifdef f_1
01743 #undef f_1
01744 #endif
01745 #if ( SHORT( FAST( 1+ ) ) 0 )
01746 #if ( SELF( 1+ ) 0 )
01747 {
01748 Force *patch_f_0 = params->ff[0];
01749
01750 #ifndef NAMD_CUDA
01751 #ifndef ARCH_POWERPC
01752 #pragma ivdep
01753 #endif
01754 for ( int i = 0; i < i_upper; ++i ) {
01755 patch_f_0[i].x += f_0[i].x;
01756 patch_f_0[i].y += f_0[i].y;
01757 patch_f_0[i].z += f_0[i].z;
01758 virial_xx += f_0[i].x * p_0[i].position.x;
01759 virial_xy += f_0[i].x * p_0[i].position.y;
01760 virial_xz += f_0[i].x * p_0[i].position.z;
01761 virial_yy += f_0[i].y * p_0[i].position.y;
01762 virial_yz += f_0[i].y * p_0[i].position.z;
01763 virial_zz += f_0[i].z * p_0[i].position.z;
01764 }
01765 #endif
01766 }
01767 #endif
01768 #endif
01769 #ifdef fullf_1
01770 #undef fullf_1
01771 #endif
01772 #if ( FULL( 1+ ) 0 )
01773 #if ( SELF( 1+ ) 0 )
01774 {
01775 Force *patch_fullf_0 = params->fullf[0];
01776 #ifndef NAMD_CUDA
01777 #ifndef ARCH_POWERPC
01778 #pragma ivdep
01779 #endif
01780 for ( int i = 0; i < i_upper; ++i ) {
01781 patch_fullf_0[i].x += fullf_0[i].x;
01782 patch_fullf_0[i].y += fullf_0[i].y;
01783 patch_fullf_0[i].z += fullf_0[i].z;
01784 fullElectVirial_xx += fullf_0[i].x * p_0[i].position.x;
01785 fullElectVirial_xy += fullf_0[i].x * p_0[i].position.y;
01786 fullElectVirial_xz += fullf_0[i].x * p_0[i].position.z;
01787 fullElectVirial_yy += fullf_0[i].y * p_0[i].position.y;
01788 fullElectVirial_yz += fullf_0[i].y * p_0[i].position.z;
01789 fullElectVirial_zz += fullf_0[i].z * p_0[i].position.z;
01790 }
01791 #endif
01792 }
01793 #endif
01794 #endif
01795
01796 #ifndef NAMD_CUDA
01797 reduction[exclChecksumIndex] += exclChecksum;
01798 #endif
01799 FAST
01800 (
01801 ENERGY( reduction[vdwEnergyIndex] += vdwEnergy; )
01802 SHORT
01803 (
01804 ENERGY( reduction[electEnergyIndex] += electEnergy; )
01805 )
01806 ALCH
01807 (
01808 TI(reduction[vdwEnergyIndex_ti_1] += vdwEnergy_ti_1;)
01809 TI(reduction[vdwEnergyIndex_ti_2] += vdwEnergy_ti_2;)
01810 FEP( reduction[vdwEnergyIndex_s] += vdwEnergy_s; )
01811 SHORT
01812 (
01813 FEP( reduction[electEnergyIndex_s] += electEnergy_s; )
01814 TI(reduction[electEnergyIndex_ti_1] += electEnergy_ti_1;)
01815 TI(reduction[electEnergyIndex_ti_2] += electEnergy_ti_2;)
01816 )
01817 )
01818 SHORT
01819 (
01820 reduction[virialIndex_XX] += virial_xx;
01821 reduction[virialIndex_XY] += virial_xy;
01822 reduction[virialIndex_XZ] += virial_xz;
01823 reduction[virialIndex_YX] += virial_xy;
01824 reduction[virialIndex_YY] += virial_yy;
01825 reduction[virialIndex_YZ] += virial_yz;
01826 reduction[virialIndex_ZX] += virial_xz;
01827 reduction[virialIndex_ZY] += virial_yz;
01828 reduction[virialIndex_ZZ] += virial_zz;
01829 )
01830 )
01831 FULL
01832 (
01833 ENERGY( reduction[fullElectEnergyIndex] += fullElectEnergy; )
01834 ALCH
01835 (
01836 FEP( reduction[fullElectEnergyIndex_s] += fullElectEnergy_s; )
01837 TI(reduction[fullElectEnergyIndex_ti_1] += fullElectEnergy_ti_1;)
01838 TI(reduction[fullElectEnergyIndex_ti_2] += fullElectEnergy_ti_2;)
01839 )
01840 reduction[fullElectVirialIndex_XX] += fullElectVirial_xx;
01841 reduction[fullElectVirialIndex_XY] += fullElectVirial_xy;
01842 reduction[fullElectVirialIndex_XZ] += fullElectVirial_xz;
01843 reduction[fullElectVirialIndex_YX] += fullElectVirial_xy;
01844 reduction[fullElectVirialIndex_YY] += fullElectVirial_yy;
01845 reduction[fullElectVirialIndex_YZ] += fullElectVirial_yz;
01846 reduction[fullElectVirialIndex_ZX] += fullElectVirial_xz;
01847 reduction[fullElectVirialIndex_ZY] += fullElectVirial_yz;
01848 reduction[fullElectVirialIndex_ZZ] += fullElectVirial_zz;
01849 )
01850
01851 delete [] excl_flags_buff;
01852
01853 }
|
1.3.9.1