00001
00007
00008
00009
00010
00011 #ifdef ARCH_POWERPC
00012 #include <builtins.h>
00013 #endif
00014
00015 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
00016 #include <emmintrin.h>
00017 #if defined(__INTEL_COMPILER)
00018 #define __align(X) __declspec(align(X) )
00019 #elif defined(__GNUC__) || defined(__PGI)
00020 #define __align(X) __attribute__((aligned(X) ))
00021 #else
00022 #define __align(X) __declspec(align(X) )
00023 #endif
00024 #endif
00025
00026 #ifdef DEFINITION // (
00027 #include "LJTable.h"
00028 #include "Molecule.h"
00029 #include "ComputeNonbondedUtil.h"
00030 #endif // )
00031
00032 #if NAMD_ComputeNonbonded_SortAtoms != 0
00033 #include "PatchMap.h"
00034 #endif
00035
00036
00037 #undef NAME
00038 #undef CLASS
00039 #undef CLASSNAME
00040 #define NAME CLASSNAME(calc)
00041
00042 #undef PAIR
00043 #if NBTYPE == NBPAIR
00044 #define PAIR(X) X
00045 #define CLASS ComputeNonbondedPair
00046 #define CLASSNAME(X) ENERGYNAME( X ## _pair )
00047 #else
00048 #define PAIR(X)
00049 #endif
00050
00051 #undef SELF
00052 #if NBTYPE == NBSELF
00053 #define SELF(X) X
00054 #define CLASS ComputeNonbondedSelf
00055 #define CLASSNAME(X) ENERGYNAME( X ## _self )
00056 #else
00057 #define SELF(X)
00058 #endif
00059
00060 #undef ENERGYNAME
00061 #undef ENERGY
00062 #undef NOENERGY
00063 #ifdef CALCENERGY
00064 #define ENERGY(X) X
00065 #define NOENERGY(X)
00066 #define ENERGYNAME(X) SLOWONLYNAME( X ## _energy )
00067 #else
00068 #define ENERGY(X)
00069 #define NOENERGY(X) X
00070 #define ENERGYNAME(X) SLOWONLYNAME( X )
00071 #endif
00072
00073 #undef SLOWONLYNAME
00074 #undef FAST
00075 #ifdef SLOWONLY
00076 #define FAST(X)
00077 #define SLOWONLYNAME(X) MERGEELECTNAME( X ## _slow )
00078 #else
00079 #define FAST(X) X
00080 #define SLOWONLYNAME(X) MERGEELECTNAME( X )
00081 #endif
00082
00083 #undef MERGEELECTNAME
00084 #undef SHORT
00085 #undef NOSHORT
00086 #ifdef MERGEELECT
00087 #define SHORT(X)
00088 #define NOSHORT(X) X
00089 #define MERGEELECTNAME(X) FULLELECTNAME( X ## _merge )
00090 #else
00091 #define SHORT(X) X
00092 #define NOSHORT(X)
00093 #define MERGEELECTNAME(X) FULLELECTNAME( X )
00094 #endif
00095
00096 #undef FULLELECTNAME
00097 #undef FULL
00098 #undef NOFULL
00099 #ifdef FULLELECT
00100 #define FULLELECTNAME(X) TABENERGYNAME( X ## _fullelect )
00101 #define FULL(X) X
00102 #define NOFULL(X)
00103 #else
00104 #define FULLELECTNAME(X) TABENERGYNAME( X )
00105 #define FULL(X)
00106 #define NOFULL(X) X
00107 #endif
00108
00109 #undef TABENERGYNAME
00110 #undef TABENERGY
00111 #undef NOTABENERGY
00112 #ifdef TABENERGYFLAG
00113 #define TABENERGYNAME(X) FEPNAME( X ## _tabener )
00114 #define TABENERGY(X) X
00115 #define NOTABENERGY(X)
00116 #else
00117 #define TABENERGYNAME(X) FEPNAME( X )
00118 #define TABENERGY(X)
00119 #define NOTABENERGY(X) X
00120 #endif
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130 #undef FEPNAME
00131 #undef FEP
00132 #undef LES
00133 #undef INT
00134 #undef PPROF
00135 #undef LAM
00136 #undef CUDA
00137 #undef ALCH
00138 #undef TI
00139 #define FEPNAME(X) LAST( X )
00140 #define FEP(X)
00141 #define ALCHPAIR(X)
00142 #define NOT_ALCHPAIR(X) X
00143 #define LES(X)
00144 #define INT(X)
00145 #define PPROF(X)
00146 #define LAM(X)
00147 #define CUDA(X)
00148 #define ALCH(X)
00149 #define TI(X)
00150 #ifdef FEPFLAG
00151 #undef FEPNAME
00152 #undef FEP
00153 #undef ALCH
00154 #define FEPNAME(X) LAST( X ## _fep )
00155 #define FEP(X) X
00156 #define ALCH(X) X
00157 #endif
00158 #ifdef TIFLAG
00159 #undef FEPNAME
00160 #undef TI
00161 #undef ALCH
00162 #define FEPNAME(X) LAST( X ## _ti )
00163 #define TI(X) X
00164 #define ALCH(X) X
00165 #endif
00166 #ifdef LESFLAG
00167 #undef FEPNAME
00168 #undef LES
00169 #undef LAM
00170 #define FEPNAME(X) LAST( X ## _les )
00171 #define LES(X) X
00172 #define LAM(X) X
00173 #endif
00174 #ifdef INTFLAG
00175 #undef FEPNAME
00176 #undef INT
00177 #define FEPNAME(X) LAST( X ## _int )
00178 #define INT(X) X
00179 #endif
00180 #ifdef PPROFFLAG
00181 #undef FEPNAME
00182 #undef INT
00183 #undef PPROF
00184 #define FEPNAME(X) LAST( X ## _pprof )
00185 #define INT(X) X
00186 #define PPROF(X) X
00187 #endif
00188 #ifdef NAMD_CUDA
00189 #undef CUDA
00190 #define CUDA(X) X
00191 #endif
00192
00193 #define LAST(X) X
00194
00195
00196 SELF( PAIR( foo bar ) )
00197 LES( FEP( foo bar ) )
00198 LES( INT( foo bar ) )
00199 FEP( INT( foo bar ) )
00200 LAM( INT( foo bar ) )
00201 FEP( NOENERGY( foo bar ) )
00202 ENERGY( NOENERGY( foo bar ) )
00203 TABENERGY(NOTABENERGY( foo bar ) )
00204
00205 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00206 #define COMPONENT_DOTPRODUCT(A,B) ((A##_x * B##_x) + (A##_y * B##_y) + (A##_z * B##_z))
00207 #endif
00208
00209
00210
00211
00212 void ComputeNonbondedUtil :: NAME
00213 ( nonbonded *params )
00214
00215
00216 {
00217
00218
00219 if ( ComputeNonbondedUtil::commOnly ) return;
00220
00221 #ifdef NAMD_CUDA
00222 NOENERGY(return;)
00223 #endif
00224
00225
00226 BigReal *reduction = params->reduction;
00227 SimParameters *simParams = Node::Object()->simParameters;
00228
00229 PPROF(
00230 BigReal *pressureProfileReduction = params->pressureProfileReduction;
00231 const BigReal invThickness = 1.0 / pressureProfileThickness;
00232 )
00233
00234 Pairlists &pairlists = *(params->pairlists);
00235 #ifdef NAMD_CUDA
00236 int savePairlists = 0;
00237 int usePairlists = 0;
00238 #else
00239 int savePairlists = params->savePairlists;
00240 int usePairlists = params->usePairlists;
00241 #endif
00242 pairlists.reset();
00243
00244
00245
00246 int exclChecksum = 0;
00247 FAST
00248 (
00249 ENERGY( BigReal vdwEnergy = 0; )
00250 SHORT
00251 (
00252 ENERGY( BigReal electEnergy = 0; )
00253 )
00254
00255 FEP
00256 (
00257 ENERGY( BigReal vdwEnergy_s = 0; )
00258 SHORT
00259 (
00260 ENERGY( BigReal electEnergy_s = 0; )
00261 )
00262 )
00263
00264 SHORT
00265 (
00266 BigReal virial_xx = 0;
00267 BigReal virial_xy = 0;
00268 BigReal virial_xz = 0;
00269 BigReal virial_yy = 0;
00270 BigReal virial_yz = 0;
00271 BigReal virial_zz = 0;
00272 )
00273 )
00274 FULL
00275 (
00276 ENERGY( BigReal fullElectEnergy = 0; )
00277 FEP
00278 (
00279 ENERGY( BigReal fullElectEnergy_s = 0; )
00280 )
00281 BigReal fullElectVirial_xx = 0;
00282 BigReal fullElectVirial_xy = 0;
00283 BigReal fullElectVirial_xz = 0;
00284 BigReal fullElectVirial_yy = 0;
00285 BigReal fullElectVirial_yz = 0;
00286 BigReal fullElectVirial_zz = 0;
00287 )
00288
00289
00290
00291 const BigReal offset_x = params->offset.x;
00292 const BigReal offset_y = params->offset.y;
00293 const BigReal offset_z = params->offset.z;
00294
00295 register const BigReal plcutoff2 = \
00296 params->plcutoff * params->plcutoff;
00297 register const BigReal groupplcutoff2 = \
00298 params->groupplcutoff * params->groupplcutoff;
00299 const BigReal dielectric_1 = ComputeNonbondedUtil:: dielectric_1;
00300 const LJTable* const ljTable = ComputeNonbondedUtil:: ljTable;
00301 LJTable::TableEntry ljNull; ljNull.A = 0; ljNull.B = 0; TABENERGY(ljNull.tabletype=-1;)
00302 const LJTable::TableEntry* const lj_null_pars = &ljNull;
00303 const Molecule* const mol = ComputeNonbondedUtil:: mol;
00304 SHORT
00305 (
00306 const BigReal* const table_four = ComputeNonbondedUtil:: table_short;
00307 )
00308 FULL
00309 (
00310 SHORT
00311 (
00312 const BigReal* const slow_table = ComputeNonbondedUtil:: slow_table;
00313 )
00314 NOSHORT
00315 (
00316
00317 const BigReal* const table_four = ComputeNonbondedUtil:: table_noshort;
00318
00319
00320
00321 )
00322 )
00323 const BigReal scaling = ComputeNonbondedUtil:: scaling;
00324 const BigReal modf_mod = 1.0 - scale14;
00325 FAST
00326 (
00327 const BigReal switchOn2 = ComputeNonbondedUtil:: switchOn2;
00328 const BigReal c1 = ComputeNonbondedUtil:: c1;
00329 const BigReal c3 = ComputeNonbondedUtil:: c3;
00330 )
00331 const BigReal r2_delta = ComputeNonbondedUtil:: r2_delta;
00332 const int r2_delta_exp = ComputeNonbondedUtil:: r2_delta_exp;
00333
00334 const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
00335
00336 ALCH(
00337 const BigReal switchdist2 = ComputeNonbondedUtil::switchOn2;
00338 const BigReal cutoff2 = ComputeNonbondedUtil::cutoff2;
00339 const BigReal switchfactor = 1./((cutoff2 - switchdist2)*(cutoff2 - switchdist2)*(cutoff2 - switchdist2));
00340 const BigReal alchElecLambdaStart = ComputeNonbondedUtil::alchElecLambdaStart;
00341 const BigReal alchVdwLambdaEnd = ComputeNonbondedUtil::alchVdwLambdaEnd;
00342 const BigReal alchVdwShiftCoeff = ComputeNonbondedUtil::alchVdwShiftCoeff;
00343
00344
00345 BigReal lambdaUp = ComputeNonbondedUtil::alchLambda;
00346 BigReal elecLambdaUp = (lambdaUp <= alchElecLambdaStart)? 0. : \
00347 (lambdaUp - alchElecLambdaStart) / (1. - alchElecLambdaStart);
00348 BigReal vdwLambdaUp =
00349 (lambdaUp >= alchVdwLambdaEnd)? 1. : lambdaUp / alchVdwLambdaEnd;
00350 BigReal vdwShiftUp = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambdaUp);
00351 FEP(BigReal lambda2Up = ComputeNonbondedUtil::alchLambda2;)
00352 FEP(BigReal elecLambda2Up = (lambda2Up <= alchElecLambdaStart)? 0. : \
00353 (lambda2Up - alchElecLambdaStart) / (1. - alchElecLambdaStart);)
00354 FEP(BigReal vdwLambda2Up =
00355 (lambda2Up >= alchVdwLambdaEnd)? 1. : lambda2Up / alchVdwLambdaEnd;)
00356 FEP(BigReal vdwShift2Up = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambda2Up);)
00357
00358
00359
00360 BigReal lambdaDown = 1 - ComputeNonbondedUtil::alchLambda;
00361 BigReal elecLambdaDown = (lambdaDown <= alchElecLambdaStart)? 0. : \
00362 (lambdaDown - alchElecLambdaStart) / (1. - alchElecLambdaStart);
00363 BigReal vdwLambdaDown =
00364 (lambdaDown >= alchVdwLambdaEnd)? 1. : lambdaDown / alchVdwLambdaEnd;
00365 BigReal vdwShiftDown = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambdaDown);
00366 FEP(BigReal lambda2Down = 1 - ComputeNonbondedUtil::alchLambda2;)
00367 FEP(BigReal elecLambda2Down = (lambda2Down <= alchElecLambdaStart)? 0. : \
00368 (lambda2Down - alchElecLambdaStart) / (1. - alchElecLambdaStart); )
00369 FEP(BigReal vdwLambda2Down =
00370 (lambda2Down >= alchVdwLambdaEnd)? 1. : lambda2Down / alchVdwLambdaEnd; )
00371 FEP(BigReal vdwShift2Down = ComputeNonbondedUtil::alchVdwShiftCoeff * (1-vdwLambda2Down);)
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392 int pswitchTable[3*3];
00393
00394
00395
00396
00397
00398
00399 for (int ip=0; ip<3; ++ip) {
00400 for (int jp=0; jp<3; ++jp ) {
00401 pswitchTable[ip+3*jp] = 0;
00402 if ((ip==1 && jp==0) || (ip==0 && jp==1)) pswitchTable[ip+3*jp] = 1;
00403 if ((ip==2 && jp==0) || (ip==0 && jp==2)) pswitchTable[ip+3*jp] = 2;
00404 if (ip + jp == 3) pswitchTable[ip+3*jp] = 99;
00405
00406 if (! ComputeNonbondedUtil::alchDecouple) {
00407
00408
00409 if (ip == 1 && jp == 1) pswitchTable[ip+3*jp] = 1;
00410 if (ip == 2 && jp == 2) pswitchTable[ip+3*jp] = 2;
00411 }
00412 if (ComputeNonbondedUtil::alchDecouple) {
00413
00414
00415
00416
00417
00418 if (ip == 1 && jp == 1) pswitchTable[ip+3*jp] = 0;
00419 if (ip == 2 && jp == 2) pswitchTable[ip+3*jp] = 0;
00420 }
00421 }
00422 }
00423
00424
00425 TI(
00426 BigReal vdwEnergy_ti_1 = 0;
00427 BigReal vdwEnergy_ti_2 = 0;
00428 SHORT(BigReal electEnergy_ti_1 = 0;
00429 BigReal electEnergy_ti_2 = 0;)
00430 FULL(BigReal fullElectEnergy_ti_1 = 0;
00431 BigReal fullElectEnergy_ti_2 = 0;)
00432 )
00433 )
00434
00435
00436 const int i_upper = params->numAtoms[0];
00437 register const int j_upper = params->numAtoms[1];
00438 register int j;
00439 register int i;
00440 const CompAtom *p_0 = params->p[0];
00441 const CompAtom *p_1 = params->p[1];
00442 const CompAtomExt *pExt_0 = params->pExt[0];
00443 const CompAtomExt *pExt_1 = params->pExt[1];
00444
00445 char * excl_flags_buff = 0;
00446 const int32 * full_excl = 0;
00447 const int32 * mod_excl = 0;
00448
00449 plint *pairlistn_save; int npairn;
00450 plint *pairlistx_save; int npairx;
00451 plint *pairlistm_save; int npairm;
00452
00453 ALCH(
00454 plint *pairlistnA1_save; int npairnA1;
00455 plint *pairlistxA1_save; int npairxA1;
00456 plint *pairlistmA1_save; int npairmA1;
00457 plint *pairlistnA2_save; int npairnA2;
00458 plint *pairlistxA2_save; int npairxA2;
00459 plint *pairlistmA2_save; int npairmA2;
00460 )
00461
00462 NBWORKARRAYSINIT(params->workArrays);
00463
00464 int arraysize = j_upper+5;
00465
00466 NBWORKARRAY(plint,pairlisti,arraysize)
00467 NBWORKARRAY(BigReal,r2list,arraysize)
00468
00469 union { double f; int32 i[2]; } byte_order_test;
00470 byte_order_test.f = 1.0;
00471 int32 *r2iilist = (int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
00472
00473 if ( ! ( savePairlists || ! usePairlists ) ) arraysize = 0;
00474
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00485
00486
00487
00488
00489
00490
00491
00492
00493 NBWORKARRAY(SortEntry, atomSort_0_sortValues__, arraysize);
00494 NBWORKARRAY(SortEntry, atomSort_1_sortValues__, arraysize);
00495 NBWORKARRAY(BigReal, p_0_sortValues, arraysize);
00496
00497 register SortEntry* p_1_sortValues = atomSort_0_sortValues__;
00498 register SortEntry* p_1_sortValues_fixg = atomSort_1_sortValues__;
00499
00500 int p_0_sortValues_len = 0;
00501 int p_1_sortValues_len = 0;
00502 int p_1_sortValues_fixg_len = 0;
00503
00504
00505
00506 BigReal atomSort_windowRadius = sqrt(groupplcutoff2);
00507
00508 if (savePairlists || !usePairlists) {
00509
00510 register const BigReal projLineVec_x = params->projLineVec.x;
00511 register const BigReal projLineVec_y = params->projLineVec.y;
00512 register const BigReal projLineVec_z = params->projLineVec.z;
00513
00514
00515 {
00516 register int j = 0;
00517 register int nbgs = p_1->nonbondedGroupSize;
00518 register BigReal p_x = p_1->position.x;
00519 register BigReal p_y = p_1->position.y;
00520 register BigReal p_z = p_1->position.z;
00521 register int index = j;
00522
00523 while (j < j_upper) {
00524
00525 j += nbgs;
00526
00527
00528
00529 register const CompAtom* p_j_next = p_1 + j;
00530 nbgs = p_j_next->nonbondedGroupSize;
00531
00532
00533
00534
00535
00536
00537
00538 register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00539
00540
00541 p_x = p_j_next->position.x;
00542 p_y = p_j_next->position.y;
00543 p_z = p_j_next->position.z;
00544
00545
00546 register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
00547 p_1_sortValStorePtr->index = index;
00548 p_1_sortValStorePtr->sortValue = sortVal;
00549 p_1_sortValues_len++;
00550
00551
00552 index = j;
00553
00554 }
00555 }
00556
00557
00558
00559 #if 0 // Selection Sort
00560 sortEntries_selectionSort(p_1_sortValues, p_1_sortValues_len);
00561 #elif 0 // Bubble Sort
00562 sortEntries_bubbleSort(p_1_sortValues, p_1_sortValues_len);
00563 #else // Merge Sort
00564 #if NAMD_ComputeNonbonded_SortAtoms_LessBranches == 0
00565 sortEntries_mergeSort_v1(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
00566 #else
00567 sortEntries_mergeSort_v2(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
00568 #endif
00569 #endif
00570
00571
00572 {
00573 register int i = 0;
00574 register int nbgs = p_0->nonbondedGroupSize;
00575 register BigReal p_x = p_0->position.x + offset_x;
00576 register BigReal p_y = p_0->position.y + offset_y;
00577 register BigReal p_z = p_0->position.z + offset_z;
00578 register int index = i;
00579
00580 while (i < i_upper) {
00581
00582 i += nbgs;
00583
00584
00585
00586 register const CompAtom* p_i_next = p_0 + i;
00587 nbgs = p_i_next->nonbondedGroupSize;
00588
00589
00590 register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
00591
00592
00593 p_x = p_i_next->position.x + offset_x;
00594 p_y = p_i_next->position.y + offset_y;
00595 p_z = p_i_next->position.z + offset_z;
00596
00597
00598 register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
00599 *p_0_sortValStorePtr = sortVal;
00600
00601
00602 index = i;
00603 }
00604
00605 p_0_sortValues_len = i_upper;
00606 }
00607
00608 }
00609
00610 #endif // NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
00611
00612
00613
00614 #if ! (NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) )
00615 NBWORKARRAY(plint,grouplist,arraysize);
00616 NBWORKARRAY(plint,fixglist,arraysize);
00617 #endif
00618
00619 NBWORKARRAY(plint,goodglist,arraysize);
00620 NBWORKARRAY(plint,pairlistx,arraysize);
00621 NBWORKARRAY(plint,pairlistm,arraysize);
00622 NBWORKARRAY(plint,pairlist,arraysize);
00623 NBWORKARRAY(plint,pairlist2,arraysize);
00624 ALCH(
00625 NBWORKARRAY(plint,pairlistnAlch,arraysize);
00626 NBWORKARRAY(plint,pairlistnA0,arraysize);
00627 NBWORKARRAY(plint,pairlistnA1,arraysize);
00628 NBWORKARRAY(plint,pairlistxA1,arraysize);
00629 NBWORKARRAY(plint,pairlistmA1,arraysize);
00630 NBWORKARRAY(plint,pairlistnA2,arraysize);
00631 NBWORKARRAY(plint,pairlistxA2,arraysize);
00632 NBWORKARRAY(plint,pairlistmA2,arraysize);
00633 )
00634
00635 NBWORKARRAY(short,vdwtype_array,j_upper+5);
00636 for (j = 0; j < j_upper; ++j)
00637 vdwtype_array [j] = p_1[j].vdwType;
00638
00639 int fixg_upper = 0;
00640 int g_upper = 0;
00641
00642 if ( savePairlists || ! usePairlists ) {
00643
00644 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00645
00646
00647 register int fixg = 0;
00648 for (int tmpI = 0; tmpI < p_1_sortValues_len; tmpI++) {
00649 register SortEntry* p_1_sortEntry = p_1_sortValues + tmpI;
00650 register int p_1_index = p_1_sortEntry->index;
00651 if (!pExt_1[p_1_index].groupFixed) {
00652 register SortEntry* p_1_sortEntry_fixg = p_1_sortValues_fixg + p_1_sortValues_fixg_len;
00653 p_1_sortEntry_fixg->index = p_1_sortEntry->index;
00654 p_1_sortEntry_fixg->sortValue = p_1_sortEntry->sortValue;
00655 p_1_sortValues_fixg_len++;
00656 }
00657 }
00658
00659 #else
00660
00661 register int g = 0;
00662 for ( j = 0; j < j_upper; ++j ) {
00663 if ( p_1[j].nonbondedGroupSize ) {
00664 grouplist[g++] = j;
00665 }
00666 }
00667 g_upper = g;
00668 if ( g_upper ) grouplist[g_upper] = grouplist[g_upper-1];
00669 int fixg = 0;
00670
00671 if ( fixedAtomsOn ) {
00672 for ( g = 0; g < g_upper; ++g ) {
00673 j = grouplist[g];
00674 if ( ! pExt_1[j].groupFixed ) {
00675 fixglist[fixg++] = j;
00676 }
00677 }
00678 }
00679
00680 fixg_upper = fixg;
00681 if ( fixg_upper ) fixglist[fixg_upper] = fixglist[fixg_upper-1];
00682
00683 #endif // NAMD_ComputeNonbonded_SortAtoms != 0
00684
00685 *(pairlists.newlist(1)) = i_upper;
00686 pairlists.newsize(1);
00687
00688 } else {
00689
00690 plint *i_upper_check;
00691 int i_upper_check_count;
00692 pairlists.nextlist(&i_upper_check,&i_upper_check_count);
00693 if ( i_upper_check[0] != i_upper )
00694 NAMD_bug("pairlist i_upper mismatch!");
00695
00696 }
00697
00698 SELF(
00699 int j_hgroup = 0;
00700 int g_lower = 0;
00701 int fixg_lower = 0;
00702 )
00703 int pairlistindex=0;
00704 int pairlistoffset=0;
00705
00706
00707
00708 #if ( SHORT( FAST( 1+ ) ) 0 )
00709 #if ( PAIR( 1+ ) 0 )
00710 Force *f_0 = params->ff[0];
00711 Force *f_1 = params->ff[1];
00712 #else
00713 #define f_1 f_0
00714 NBWORKARRAY(Force,f_0,i_upper)
00715 memset( (void*) f_0, 0, i_upper * sizeof(Force) );
00716 #endif
00717 #endif
00718 #if ( FULL( 1+ ) 0 )
00719 #if ( PAIR( 1+ ) 0 )
00720 Force *fullf_0 = params->fullf[0];
00721 Force *fullf_1 = params->fullf[1];
00722 #else
00723 #define fullf_1 fullf_0
00724 NBWORKARRAY(Force,fullf_0,i_upper);
00725 memset( (void*) fullf_0, 0, i_upper * sizeof(Force) );
00726 #endif
00727 #endif
00728
00729
00730 int numParts = params->numParts;
00731 int myPart = params->minPart;
00732 int groupCount = 0;
00733
00734 for ( i = 0; i < (i_upper SELF(- 1)); ++i )
00735 {
00736 const CompAtom &p_i = p_0[i];
00737 const CompAtomExt &pExt_i = pExt_0[i];
00738 if ( p_i.hydrogenGroupSize ) {
00739
00740 int curgrpcount = groupCount;
00741
00742 groupCount ++;
00743
00744 if (groupCount >= numParts)
00745 groupCount = 0;
00746
00747 if ( curgrpcount != myPart ) {
00748 i += p_i.hydrogenGroupSize - 1;
00749
00750
00751 #ifdef ARCH_POWERPC
00752 __dcbt((void *) &(p_0[i+1]));
00753 #endif
00754 continue;
00755 }
00756 }
00757
00758 register const BigReal p_i_x = p_i.position.x + offset_x;
00759 register const BigReal p_i_y = p_i.position.y + offset_y;
00760 register const BigReal p_i_z = p_i.position.z + offset_z;
00761
00762 ALCH(const int p_i_partition = p_i.partition;)
00763
00764 PPROF(
00765 const int p_i_partition = p_i.partition;
00766 int n1 = (int)floor((p_i.position.z-pressureProfileMin)*invThickness);
00767 pp_clamp(n1, pressureProfileSlabs);
00768 )
00769
00770 SELF ( if ( p_i.hydrogenGroupSize ) j_hgroup = i + p_i.hydrogenGroupSize; )
00771
00772 if ( savePairlists || ! usePairlists ) {
00773
00774 if ( ! savePairlists ) pairlists.reset();
00775
00776 #ifdef MEM_OPT_VERSION
00777 const ExclusionCheck *exclcheck = mol->get_excl_check_for_idx(pExt_i.exclId);
00778 const int excl_min = pExt_i.id + exclcheck->min;
00779 const int excl_max = pExt_i.id + exclcheck->max;
00780 #else
00781 const ExclusionCheck *exclcheck = mol->get_excl_check_for_atom(pExt_i.id);
00782 const int excl_min = exclcheck->min;
00783 const int excl_max = exclcheck->max;
00784 #endif
00785 const char * excl_flags_var;
00786 if ( exclcheck->flags ) excl_flags_var = exclcheck->flags - excl_min;
00787 else {
00788
00789
00790
00791 #ifndef MEM_OPT_VERSION
00792 if ( excl_flags_buff ) {
00793 int nl,l;
00794 nl = full_excl[0] + 1;
00795 for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = 0;
00796 nl = mod_excl[0] + 1;
00797 for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = 0;
00798 } else {
00799 excl_flags_buff = new char[mol->numAtoms];
00800 memset( (void*) excl_flags_buff, 0, mol->numAtoms);
00801 }
00802 int nl,l;
00803 full_excl = mol->get_full_exclusions_for_atom(pExt_i.id);
00804 nl = full_excl[0] + 1;
00805 for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = EXCHCK_FULL;
00806 mod_excl = mol->get_mod_exclusions_for_atom(pExt_i.id);
00807 nl = mod_excl[0] + 1;
00808 for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = EXCHCK_MOD;
00809 excl_flags_var = excl_flags_buff;
00810 #endif
00811
00812 }
00813 const char * const excl_flags = excl_flags_var;
00814
00815 if ( p_i.nonbondedGroupSize ) {
00816
00817 pairlistindex = 0;
00818 pairlistoffset=0;
00819 const int groupfixed = ( fixedAtomsOn && pExt_i.groupFixed );
00820
00821
00822
00823
00824
00825
00826
00827
00828 SELF
00829 (
00830 if ( p_i.hydrogenGroupSize ) {
00831
00832
00833 while ( g_lower < g_upper &&
00834 grouplist[g_lower] < j_hgroup ) ++g_lower;
00835 while ( fixg_lower < fixg_upper &&
00836 fixglist[fixg_lower] < j_hgroup ) ++fixg_lower;
00837 }
00838
00839 for ( j = i + 1; j < j_hgroup; ++j ) {
00840 pairlist[pairlistindex++] = j;
00841 }
00842 )
00843
00844
00845 register plint *pli = pairlist + pairlistindex;
00846
00847 {
00848
00849
00850 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00851
00852 register int g = 0;
00853 register BigReal p_i_sortValue = p_0_sortValues[i];
00854 const BigReal p_i_sortValue_plus_windowRadius = p_i_sortValue + atomSort_windowRadius;
00855 register SortEntry* sortValues = ( groupfixed ? p_1_sortValues_fixg : p_1_sortValues );
00856
00857
00858 register int lower = 0;
00859 register int upper = (groupfixed ? p_1_sortValues_fixg_len : p_1_sortValues_len);
00860 while ((upper - lower) > 1) {
00861 register int j = ((lower + upper) >> 1);
00862 register BigReal jSortVal = sortValues[j].sortValue;
00863 if (jSortVal < p_i_sortValue_plus_windowRadius) {
00864 lower = j;
00865 } else {
00866 upper = j;
00867 }
00868 }
00869 const int gu = (sortValues[lower].sortValue >= p_i_sortValue_plus_windowRadius) ? lower : upper;
00870
00871 #else
00872
00873 register plint *gli = goodglist;
00874 const plint *glist = ( groupfixed ? fixglist : grouplist );
00875 SELF( const int gl = ( groupfixed ? fixg_lower : g_lower ); )
00876 const int gu = ( groupfixed ? fixg_upper : g_upper );
00877 register int g = PAIR(0) SELF(gl);
00878
00879 #endif
00880
00881
00882 if ( g < gu ) {
00883 int hu = 0;
00884 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
00885 if ( gu - g > 6 ) {
00886
00887 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00888 register SortEntry* sortEntry0 = sortValues + g;
00889 register SortEntry* sortEntry1 = sortValues + g + 1;
00890 register int jprev0 = sortEntry0->index;
00891 register int jprev1 = sortEntry1->index;
00892 #else
00893 register int jprev0 = glist[g ];
00894 register int jprev1 = glist[g + 1];
00895 #endif
00896
00897 register int j0;
00898 register int j1;
00899
00900 __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
00901 __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
00902 __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
00903
00904
00905 const __m128d P_I_X = _mm_set1_pd(p_i_x);
00906 const __m128d P_I_Y = _mm_set1_pd(p_i_y);
00907 const __m128d P_I_Z = _mm_set1_pd(p_i_z);
00908
00909 g += 2;
00910 for ( ; g < gu - 2; g +=2 ) {
00911
00912 j0 = jprev0;
00913 j1 = jprev1;
00914
00915 __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
00916 __m128d R2_01 = _mm_mul_pd(T_01, T_01);
00917 T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
00918 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
00919 T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
00920 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
00921
00922 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00923 sortEntry0 = sortValues + g;
00924 sortEntry1 = sortValues + g + 1;
00925 jprev0 = sortEntry0->index;
00926 jprev1 = sortEntry1->index;
00927 #else
00928 jprev0 = glist[g ];
00929 jprev1 = glist[g+1];
00930 #endif
00931
00932 PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
00933 PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
00934 PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
00935
00936 __align(16) double r2_01[2];
00937 _mm_store_pd(r2_01, R2_01);
00938
00939
00940 bool test0 = ( r2_01[0] < groupplcutoff2 );
00941 bool test1 = ( r2_01[1] < groupplcutoff2 );
00942
00943
00944
00945 goodglist [ hu ] = j0;
00946 goodglist [ hu + test0 ] = j1;
00947
00948 hu += test0 + test1;
00949 }
00950 g-=2;
00951 }
00952 #else
00953 if ( gu - g > 6 ) {
00954
00955 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
00956 register SortEntry* sortEntry0 = sortValues + g;
00957 register SortEntry* sortEntry1 = sortValues + g + 1;
00958 register int jprev0 = sortEntry0->index;
00959 register int jprev1 = sortEntry1->index;
00960 #else
00961 register int jprev0 = glist[g ];
00962 register int jprev1 = glist[g + 1];
00963 #endif
00964
00965 register int j0;
00966 register int j1;
00967
00968 register BigReal pj_x_0, pj_x_1;
00969 register BigReal pj_y_0, pj_y_1;
00970 register BigReal pj_z_0, pj_z_1;
00971 register BigReal t_0, t_1, r2_0, r2_1;
00972
00973 pj_x_0 = p_1[jprev0].position.x;
00974 pj_x_1 = p_1[jprev1].position.x;
00975
00976 pj_y_0 = p_1[jprev0].position.y;
00977 pj_y_1 = p_1[jprev1].position.y;
00978
00979 pj_z_0 = p_1[jprev0].position.z;
00980 pj_z_1 = p_1[jprev1].position.z;
00981
00982 g += 2;
00983 for ( ; g < gu - 2; g +=2 ) {
00984
00985 j0 = jprev0;
00986 j1 = jprev1;
00987
00988 t_0 = p_i_x - pj_x_0;
00989 t_1 = p_i_x - pj_x_1;
00990 r2_0 = t_0 * t_0;
00991 r2_1 = t_1 * t_1;
00992
00993 t_0 = p_i_y - pj_y_0;
00994 t_1 = p_i_y - pj_y_1;
00995 r2_0 += t_0 * t_0;
00996 r2_1 += t_1 * t_1;
00997
00998 t_0 = p_i_z - pj_z_0;
00999 t_1 = p_i_z - pj_z_1;
01000 r2_0 += t_0 * t_0;
01001 r2_1 += t_1 * t_1;
01002
01003 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01004 sortEntry0 = sortValues + g;
01005 sortEntry1 = sortValues + g + 1;
01006 jprev0 = sortEntry0->index;
01007 jprev1 = sortEntry1->index;
01008 #else
01009 jprev0 = glist[g ];
01010 jprev1 = glist[g+1];
01011 #endif
01012
01013 pj_x_0 = p_1[jprev0].position.x;
01014 pj_x_1 = p_1[jprev1].position.x;
01015 pj_y_0 = p_1[jprev0].position.y;
01016 pj_y_1 = p_1[jprev1].position.y;
01017 pj_z_0 = p_1[jprev0].position.z;
01018 pj_z_1 = p_1[jprev1].position.z;
01019
01020 bool test0 = ( r2_0 < groupplcutoff2 );
01021 bool test1 = ( r2_1 < groupplcutoff2 );
01022
01023
01024
01025 goodglist [ hu ] = j0;
01026 goodglist [ hu + test0 ] = j1;
01027
01028 hu += test0 + test1;
01029 }
01030 g-=2;
01031 }
01032 #endif
01033
01034 for (; g < gu; g++) {
01035
01036 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
01037 register SortEntry* sortEntry = sortValues + g;
01038 register int j = sortEntry->index;
01039 #else
01040 int j = glist[g];
01041 #endif
01042
01043 BigReal p_j_x = p_1[j].position.x;
01044 BigReal p_j_y = p_1[j].position.y;
01045 BigReal p_j_z = p_1[j].position.z;
01046
01047 BigReal r2 = p_i_x - p_j_x;
01048 r2 *= r2;
01049 BigReal t2 = p_i_y - p_j_y;
01050 r2 += t2 * t2;
01051 t2 = p_i_z - p_j_z;
01052 r2 += t2 * t2;
01053
01054 if ( r2 <= groupplcutoff2 )
01055 goodglist[hu ++] = j;
01056 }
01057
01058 for ( int h=0; h<hu; ++h ) {
01059 int j = goodglist[h];
01060 int nbgs = p_1[j].nonbondedGroupSize;
01061 pli[0] = j;
01062 pli[1] = j+1;
01063 pli[2] = j+2;
01064 pli[3] = j+3;
01065 pli += nbgs;
01066 }
01067 }
01068 }
01069
01070 pairlistindex = pli - pairlist;
01071
01072 if ( pairlistindex ) {
01073 pairlist[pairlistindex] = pairlist[pairlistindex-1];
01074 }
01075
01076
01077
01078
01079 }
01080 SELF
01081 (
01082
01083
01084 else pairlistoffset++;
01085 )
01086
01087 const int atomfixed = ( fixedAtomsOn && pExt_i.atomFixed );
01088
01089 register plint *pli = pairlist2;
01090 #if 1 ALCH(-1)
01091 plint *pairlistn = pairlists.newlist(j_upper + 5 + 1 + 5) SELF(+ 1);
01092 #else
01093 plint *pairlistn = pairlistnAlch;
01094 #endif
01095 SELF( plint &pairlistn_skip = *(pairlistn-1); )
01096 register plint *plin = pairlistn;
01097
01098
01099 INT(
01100 if ( pairInteractionOn ) {
01101 const int ifep_type = p_i.partition;
01102 if (pairInteractionSelf) {
01103 if (ifep_type != 1) continue;
01104 for (int k=pairlistoffset; k<pairlistindex; k++) {
01105 j = pairlist[k];
01106 const int jfep_type = p_1[j].partition;
01107
01108 if (jfep_type == 1) {
01109 *(pli++) = j;
01110 }
01111 }
01112 } else {
01113 if (ifep_type != 1 && ifep_type != 2) continue;
01114 for (int k=pairlistoffset; k<pairlistindex; k++) {
01115 j = pairlist[k];
01116 const int jfep_type = p_1[j].partition;
01117
01118 if (ifep_type + jfep_type == 3) {
01119 *(pli++) = j;
01120 }
01121 }
01122 }
01123 int npair2_int = pli - pairlist2;
01124 pli = pairlist2;
01125 for (int k=0; k<npair2_int; k++) {
01126 j = pairlist2[k];
01127 BigReal p_j_x = p_1[j].position.x;
01128 BigReal r2 = p_i_x - p_j_x;
01129 r2 *= r2;
01130 BigReal p_j_y = p_1[j].position.y;
01131 BigReal t2 = p_i_y - p_j_y;
01132 r2 += t2 * t2;
01133 BigReal p_j_z = p_1[j].position.z;
01134 t2 = p_i_z - p_j_z;
01135 r2 += t2 * t2;
01136 if ( ( ! (atomfixed && pExt_1[j].atomFixed) ) && (r2 <= plcutoff2) ) {
01137 int atom2 = pExt_1[j].id;
01138 if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
01139 else *(plin++) = j;
01140 }
01141 }
01142 } else
01143 )
01144 if ( atomfixed ) {
01145 for (int k=pairlistoffset; k<pairlistindex; k++) {
01146 j = pairlist[k];
01147 BigReal p_j_x = p_1[j].position.x;
01148 BigReal r2 = p_i_x - p_j_x;
01149 r2 *= r2;
01150 BigReal p_j_y = p_1[j].position.y;
01151 BigReal t2 = p_i_y - p_j_y;
01152 r2 += t2 * t2;
01153 BigReal p_j_z = p_1[j].position.z;
01154 t2 = p_i_z - p_j_z;
01155 r2 += t2 * t2;
01156 if ( (! pExt_1[j].atomFixed) && (r2 <= plcutoff2) ) {
01157 int atom2 = pExt_1[j].id;
01158 if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
01159 else *(plin++) = j;
01160 }
01161 }
01162 } else {
01163 int k = pairlistoffset;
01164 int ku = pairlistindex;
01165 if ( k < ku ) {
01166 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
01167 if ( ku - k > 6 ) {
01168 register int jprev0 = pairlist [k ];
01169 register int jprev1 = pairlist [k + 1];
01170
01171 register int j0;
01172 register int j1;
01173
01174 __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01175 __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01176 __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01177
01178
01179 const __m128d P_I_X = _mm_set1_pd(p_i_x);
01180 const __m128d P_I_Y = _mm_set1_pd(p_i_y);
01181 const __m128d P_I_Z = _mm_set1_pd(p_i_z);
01182
01183 int atom2_0 = pExt_1[jprev0].id;
01184 int atom2_1 = pExt_1[jprev1].id;
01185
01186 k += 2;
01187 for ( ; k < ku - 2; k +=2 ) {
01188
01189 j0 = jprev0;
01190 j1 = jprev1;
01191
01192 __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
01193 __m128d R2_01 = _mm_mul_pd(T_01, T_01);
01194 T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
01195 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01196 T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
01197 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
01198
01199 jprev0 = pairlist[k];
01200 jprev1 = pairlist[k+1];
01201
01202 PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
01203 PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
01204 PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
01205
01206 __align(16) double r2_01[2];
01207 _mm_store_pd(r2_01, R2_01);
01208
01209 if (r2_01[0] <= plcutoff2) {
01210 if ( atom2_0 >= excl_min && atom2_0 <= excl_max )
01211 *(pli++) = j0;
01212 else
01213 *(plin++) = j0;
01214 }
01215 atom2_0 = pExt_1[jprev0].id;
01216
01217 if (r2_01[1] <= plcutoff2) {
01218 if ( atom2_1 >= excl_min && atom2_1 <= excl_max )
01219 *(pli++) = j1;
01220 else
01221 *(plin++) = j1;
01222 }
01223 atom2_1 = pExt_1[jprev1].id;
01224 }
01225 k-=2;
01226 }
01227 #else
01228 if ( ku - k > 6 ) {
01229 register int jprev0 = pairlist [k];
01230 register int jprev1 = pairlist [k + 1];
01231
01232 register int j0;
01233 register int j1;
01234
01235 register BigReal pj_x_0, pj_x_1;
01236 register BigReal pj_y_0, pj_y_1;
01237 register BigReal pj_z_0, pj_z_1;
01238 register BigReal t_0, t_1, r2_0, r2_1;
01239
01240 pj_x_0 = p_1[jprev0].position.x;
01241 pj_x_1 = p_1[jprev1].position.x;
01242
01243 pj_y_0 = p_1[jprev0].position.y;
01244 pj_y_1 = p_1[jprev1].position.y;
01245
01246 pj_z_0 = p_1[jprev0].position.z;
01247 pj_z_1 = p_1[jprev1].position.z;
01248
01249 int atom2_0 = pExt_1[jprev0].id;
01250 int atom2_1 = pExt_1[jprev1].id;
01251
01252 k += 2;
01253 for ( ; k < ku - 2; k +=2 ) {
01254
01255 j0 = jprev0;
01256 j1 = jprev1;
01257
01258 t_0 = p_i_x - pj_x_0;
01259 t_1 = p_i_x - pj_x_1;
01260 r2_0 = t_0 * t_0;
01261 r2_1 = t_1 * t_1;
01262
01263 t_0 = p_i_y - pj_y_0;
01264 t_1 = p_i_y - pj_y_1;
01265 r2_0 += t_0 * t_0;
01266 r2_1 += t_1 * t_1;
01267
01268 t_0 = p_i_z - pj_z_0;
01269 t_1 = p_i_z - pj_z_1;
01270 r2_0 += t_0 * t_0;
01271 r2_1 += t_1 * t_1;
01272
01273 jprev0 = pairlist[k];
01274 jprev1 = pairlist[k+1];
01275
01276 pj_x_0 = p_1[jprev0].position.x;
01277 pj_x_1 = p_1[jprev1].position.x;
01278 pj_y_0 = p_1[jprev0].position.y;
01279 pj_y_1 = p_1[jprev1].position.y;
01280 pj_z_0 = p_1[jprev0].position.z;
01281 pj_z_1 = p_1[jprev1].position.z;
01282
01283 if (r2_0 <= plcutoff2) {
01284 if ( atom2_0 >= excl_min && atom2_0 <= excl_max )
01285 *(pli++) = j0;
01286 else
01287 *(plin++) = j0;
01288 }
01289 atom2_0 = pExt_1[jprev0].id;
01290
01291 if (r2_1 <= plcutoff2) {
01292 if ( atom2_1 >= excl_min && atom2_1 <= excl_max )
01293 *(pli++) = j1;
01294 else
01295 *(plin++) = j1;
01296 }
01297 atom2_1 = pExt_1[jprev1].id;
01298 }
01299 k-=2;
01300 }
01301 #endif
01302
01303 for (; k < ku; k++) {
01304 int j = pairlist[k];
01305 int atom2 = pExt_1[j].id;
01306
01307 BigReal p_j_x = p_1[j].position.x;
01308 BigReal p_j_y = p_1[j].position.y;
01309 BigReal p_j_z = p_1[j].position.z;
01310
01311 BigReal r2 = p_i_x - p_j_x;
01312 r2 *= r2;
01313 BigReal t2 = p_i_y - p_j_y;
01314 r2 += t2 * t2;
01315 t2 = p_i_z - p_j_z;
01316 r2 += t2 * t2;
01317
01318 if (r2 <= plcutoff2) {
01319 if ( atom2 >= excl_min && atom2 <= excl_max )
01320 *(pli++) = j;
01321 else
01322 *(plin++) = j;
01323 }
01324 }
01325 }
01326 }
01327 int npair2 = pli - pairlist2;
01328 if ( npair2 ) pairlist2[npair2] = pairlist2[npair2-1];
01329
01330 plint *plix = pairlistx;
01331 plint *plim = pairlistm;
01332 plint *pln = pairlistn;
01333 ALCH(
01334 plint *plinA1 = pairlistnA1;
01335 plint *plixA1 = pairlistxA1;
01336 plint *plimA1 = pairlistmA1;
01337 plint *plinA2 = pairlistnA2;
01338 plint *plixA2 = pairlistxA2;
01339 plint *plimA2 = pairlistmA2;
01340 )
01341 int k=0;
01342 #if 1 ALCH(-1)
01343 SELF(
01344 for (; pln < plin && *pln < j_hgroup; ++pln) {
01345 *(plix++) = *pln;
01346 }
01347 pairlistn_skip = pln - pairlistn;
01348 for (; k < npair2 && pairlist2[k] < j_hgroup; ++k) {
01349 *(plix++) = pairlist2[k];
01350 }
01351 )
01352 #endif
01353 ALCH(
01354 SELF(
01355 for (; pln < plin && *pln < j_hgroup; ++pln) {
01356 switch (pswitchTable[4*p_i_partition]) {
01357 case 0: *(plix++) = *pln; break;
01358 case 1: *(plixA1++) = *pln; break;
01359 case 2: *(plixA2++) = *pln; break;
01360 default: NAMD_die("Alchemical pairlist error"); break;
01361 }
01362 }
01363 for (; k < npair2 && pairlist2[k] < j_hgroup; ++k) {
01364 switch (pswitchTable[4*p_i_partition]) {
01365 case 0: *(plix++) = pairlist2[k]; break;
01366 case 1: *(plixA1++) = pairlist2[k]; break;
01367 case 2: *(plixA2++) = pairlist2[k]; break;
01368 default: NAMD_die("Alchemical pairlist error"); break;
01369 }
01370 }
01371 )
01372 )
01373 for (; k < npair2; ++k ) {
01374 int j = pairlist2[k];
01375 int atom2 = pExt_1[j].id;
01376 int excl_flag = excl_flags[atom2];
01377 ALCH(int pswitch = pswitchTable[p_i_partition + 3*(p_1[j].partition)];)
01378 switch ( excl_flag ALCH( + 3 * pswitch)) {
01379 case 0: *(plin++) = j; break;
01380 case 1: *(plix++) = j; break;
01381 case 2: *(plim++) = j; break;
01382 ALCH(
01383 case 3: *(plinA1++) = j; break;
01384 case 6: *(plinA2++) = j; break;
01385 case 5: *(plimA1++) = j; break;
01386 case 8: *(plimA2++) = j; break;
01387 case 4: *(plixA1++) = j; break;
01388 case 7: *(plixA2++) = j; break;
01389 )
01390 }
01391 }
01392
01393
01394
01395
01396 #if 1 ALCH(-1)
01397 npairn = plin - pln;
01398 pairlistn_save = pln;
01399 pairlistn_save[npairn] = npairn ? pairlistn_save[npairn-1] : -1;
01400 pairlists.newsize(plin - pairlistn SELF(+ 1) + 1);
01401 #else
01402 plint *plinA0 = pairlistnA0;
01403 int unsortedNpairn = plin - pln;
01404 for ( k=0; k<unsortedNpairn; ++k ) {
01405 int j = pln[k];
01406 switch(pswitchTable[p_i_partition + 3*(p_1[j].partition)]) {
01407 case 0: *(plinA0++) = j; break;
01408 case 1: *(plinA1++) = j; break;
01409 case 2: *(plinA2++) = j; break;
01410 }
01411 }
01412
01413 npairn = plinA0 - pairlistnA0;
01414
01415 pairlistn_save = pairlists.newlist(j_upper + 30);
01416 for ( k=0; k<npairn; ++k ) {
01417 pairlistn_save[k] = pairlistnA0[k];
01418 }
01419 pairlistn_save[k] = k ? pairlistn_save[k-1] : -1;
01420 pairlists.newsize(npairn + 1);
01421
01422 #endif
01423
01424 npairx = plix - pairlistx;
01425 pairlistx_save = pairlists.newlist(npairx + 1);
01426 for ( k=0; k<npairx; ++k ) {
01427 pairlistx_save[k] = pairlistx[k];
01428 }
01429 pairlistx_save[k] = k ? pairlistx_save[k-1] : -1;
01430 pairlists.newsize(npairx + 1);
01431
01432 npairm = plim - pairlistm;
01433 pairlistm_save = pairlists.newlist(npairm + 1);
01434 for ( k=0; k<npairm; ++k ) {
01435 pairlistm_save[k] = pairlistm[k];
01436 }
01437 pairlistm_save[k] = k ? pairlistm_save[k-1] : -1;
01438 pairlists.newsize(npairm + 1);
01439
01440
01441 #if 0 ALCH(+1)
01442 #define PAIRLISTFROMARRAY(NPAIRS,PL1,PL2,PLSAVE) \
01443 { \
01444 (NPAIRS) = (PL2) - (PL1); \
01445 (PLSAVE) = pairlists.newlist((NPAIRS) + 1); \
01446 for ( k=0; k<(NPAIRS); ++k ) { \
01447 (PLSAVE)[k] = (PL1)[k]; \
01448 } \
01449 (PLSAVE)[k] = k ? (PLSAVE)[k-1] : -1; \
01450 pairlists.newsize((NPAIRS) + 1); \
01451 }
01452
01453 PAIRLISTFROMARRAY(npairnA1,pairlistnA1,plinA1,pairlistnA1_save);
01454 PAIRLISTFROMARRAY(npairxA1,pairlistxA1,plixA1,pairlistxA1_save);
01455 PAIRLISTFROMARRAY(npairmA1,pairlistmA1,plimA1,pairlistmA1_save);
01456 PAIRLISTFROMARRAY(npairnA2,pairlistnA2,plinA2,pairlistnA2_save);
01457 PAIRLISTFROMARRAY(npairxA2,pairlistxA2,plixA2,pairlistxA2_save);
01458 PAIRLISTFROMARRAY(npairmA2,pairlistmA2,plimA2,pairlistmA2_save);
01459 #undef PAIRLISTFROMARRAY
01460
01461 #endif
01462
01463
01464
01465 } else {
01466
01467
01468 pairlists.nextlist(&pairlistn_save,&npairn); --npairn;
01469
01470
01471 #if 1 ALCH(-1)
01472 SELF(
01473 int pairlistn_skip = *pairlistn_save;
01474 pairlistn_save += (pairlistn_skip + 1);
01475 npairn -= (pairlistn_skip + 1);
01476 )
01477 #endif
01478
01479 pairlists.nextlist(&pairlistx_save,&npairx); --npairx;
01480
01481
01482
01483
01484 pairlists.nextlist(&pairlistm_save,&npairm); --npairm;
01485 ALCH(
01486 pairlists.nextlist(&pairlistnA1_save,&npairnA1); --npairnA1;
01487 pairlists.nextlist(&pairlistxA1_save,&npairxA1); --npairxA1;
01488 pairlists.nextlist(&pairlistmA1_save,&npairmA1); --npairmA1;
01489 pairlists.nextlist(&pairlistnA2_save,&npairnA2); --npairnA2;
01490 pairlists.nextlist(&pairlistxA2_save,&npairxA2); --npairxA2;
01491 pairlists.nextlist(&pairlistmA2_save,&npairmA2); --npairmA2;
01492 )
01493
01494
01495
01496
01497 }
01498
01499 LES( BigReal *lambda_table_i =
01500 lambda_table + (lesFactor+1) * p_i.partition; )
01501
01502 INT(
01503 const BigReal force_sign = (
01504 ( pairInteractionOn && ! pairInteractionSelf ) ?
01505 ( ( p_i.partition == 1 ) ? 1. : -1. ) : 0.
01506 );
01507
01508 )
01509
01510 const BigReal kq_i = COLOUMB * p_i.charge * scaling * dielectric_1;
01511 const LJTable::TableEntry * const lj_row =
01512 ljTable->table_row(p_i.vdwType);
01513
01514 SHORT( FAST( BigReal f_i_x = 0.; ) )
01515 SHORT( FAST( BigReal f_i_y = 0.; ) )
01516 SHORT( FAST( BigReal f_i_z = 0.; ) )
01517 FULL( BigReal fullf_i_x = 0.; )
01518 FULL( BigReal fullf_i_y = 0.; )
01519 FULL( BigReal fullf_i_z = 0.; )
01520
01521 int npairi;
01522 int k;
01523
01524 npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01525 p_i_x, p_i_y, p_i_z, p_1, pairlistn_save, npairn, pairlisti,
01526 r2_delta, r2list);
01527
01528 #define NORMAL(X) X
01529 #define EXCLUDED(X)
01530 #define MODIFIED(X)
01531 #include "ComputeNonbondedBase2.h"
01532 #undef NORMAL
01533 #undef EXCLUDED
01534 #undef MODIFIED
01535
01536 npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01537 p_i_x, p_i_y, p_i_z, p_1, pairlistm_save, npairm, pairlisti,
01538 r2_delta, r2list);
01539 exclChecksum += npairi;
01540
01541 #define NORMAL(X)
01542 #define EXCLUDED(X)
01543 #define MODIFIED(X) X
01544 #include "ComputeNonbondedBase2.h"
01545 #undef NORMAL
01546 #undef EXCLUDED
01547 #undef MODIFIED
01548
01549 #ifdef FULLELECT
01550 npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01551 p_i_x, p_i_y, p_i_z, p_1, pairlistx_save, npairx, pairlisti,
01552 r2_delta, r2list);
01553 exclChecksum += npairi;
01554 #ifndef NAMD_CUDA
01555 SELF(
01556 for (k=0; k<npairi && pairlisti[k] < j_hgroup; ++k) --exclChecksum;
01557 )
01558 #endif
01559
01560 #undef FAST
01561 #define FAST(X)
01562 #define NORMAL(X)
01563 #define EXCLUDED(X) X
01564 #define MODIFIED(X)
01565 #include "ComputeNonbondedBase2.h"
01566 #undef FAST
01567 #ifdef SLOWONLY
01568 #define FAST(X)
01569 #else
01570 #define FAST(X) X
01571 #endif
01572 #undef NORMAL
01573 #undef EXCLUDED
01574 #undef MODIFIED
01575 #else
01576 exclChecksum += npairx;
01577 #ifndef NAMD_CUDA
01578 SELF(
01579 for (k=0; k<npairx && pairlistx_save[k] < j_hgroup; ++k) --exclChecksum;
01580 )
01581 #endif
01582 #endif
01583
01584 #if 0 ALCH(+1) // nonbondedbase2 for alchemical-separeted pairlists
01585
01586 #undef ALCHPAIR
01587 #define ALCHPAIR(X) X
01588 #undef NOT_ALCHPAIR
01589 #define NOT_ALCHPAIR(X)
01590 BigReal myLambda; FEP(BigReal myLambda2;)
01591 BigReal myElecLambda; FEP(BigReal myElecLambda2;)
01592 BigReal myVdwLambda; FEP(BigReal myVdwLambda2;)
01593 BigReal myVdwShift; FEP(BigReal myVdwShift2;)
01594 BigReal alch_vdw_energy; BigReal alch_vdw_force;
01595 FEP(BigReal alch_vdw_energy_2;) TI(BigReal alch_vdw_dUdl;)
01596 BigReal shiftedElec; BigReal shiftedElecForce;
01597
01598
01599
01600
01601 #define NORMAL(X) X
01602 #define EXCLUDED(X)
01603 #define MODIFIED(X)
01604
01605 #define ALCH1(X) X
01606 #define ALCH2(X)
01607 npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01608 p_i_x, p_i_y, p_i_z, p_1, pairlistnA1_save, npairnA1, pairlisti,
01609 r2_delta, r2list);
01610 #include "ComputeNonbondedBase2.h"
01611 #undef ALCH1
01612 #undef ALCH2
01613
01614 #define ALCH1(X)
01615 #define ALCH2(X) X
01616 npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01617 p_i_x, p_i_y, p_i_z, p_1, pairlistnA2_save, npairnA2, pairlisti,
01618 r2_delta, r2list);
01619 #include "ComputeNonbondedBase2.h"
01620 #undef ALCH1
01621 #undef ALCH2
01622
01623 #undef NORMAL
01624 #undef EXCLUDED
01625 #undef MODIFIED
01626
01627
01628
01629
01630 #define NORMAL(X)
01631 #define EXCLUDED(X)
01632 #define MODIFIED(X) X
01633
01634 #define ALCH1(X) X
01635 #define ALCH2(X)
01636 npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01637 p_i_x, p_i_y, p_i_z, p_1, pairlistmA1_save, npairmA1, pairlisti,
01638 r2_delta, r2list);
01639 exclChecksum += npairi;
01640 #include "ComputeNonbondedBase2.h"
01641 #undef ALCH1
01642 #undef ALCH2
01643
01644 #define ALCH1(X)
01645 #define ALCH2(X) X
01646 npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01647 p_i_x, p_i_y, p_i_z, p_1, pairlistmA2_save, npairmA2, pairlisti,
01648 r2_delta, r2list);
01649 exclChecksum += npairi;
01650 #include "ComputeNonbondedBase2.h"
01651 #undef ALCH1
01652 #undef ALCH2
01653
01654 #undef NORMAL
01655 #undef EXCLUDED
01656 #undef MODIFIED
01657
01658
01659
01660
01661 #ifdef FULLELECT
01662 #undef FAST
01663 #define FAST(X)
01664 #define NORMAL(X)
01665 #define EXCLUDED(X) X
01666 #define MODIFIED(X)
01667
01668 #define ALCH1(X) X
01669 #define ALCH2(X)
01670 npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01671 p_i_x, p_i_y, p_i_z, p_1, pairlistxA1_save, npairxA1, pairlisti,
01672 r2_delta, r2list);
01673 exclChecksum += npairi;
01674 SELF(
01675 for (k=0; k<npairi && pairlisti[k] < j_hgroup; ++k) --exclChecksum;
01676 )
01677 #include "ComputeNonbondedBase2.h"
01678 #undef ALCH1
01679 #undef ALCH2
01680
01681 #define ALCH1(X)
01682 #define ALCH2(X) X
01683 npairi = pairlist_from_pairlist(ComputeNonbondedUtil::cutoff2,
01684 p_i_x, p_i_y, p_i_z, p_1, pairlistxA2_save, npairxA2, pairlisti,
01685 r2_delta, r2list);
01686 exclChecksum += npairi;
01687 SELF(
01688 for (k=0; k<npairi && pairlisti[k] < j_hgroup; ++k) --exclChecksum;
01689 )
01690 #include "ComputeNonbondedBase2.h"
01691 #undef ALCH1
01692 #undef ALCH2
01693
01694 #undef FAST
01695 #ifdef SLOWONLY
01696 #define FAST(X)
01697 #else
01698 #define FAST(X) X
01699 #endif
01700 #undef NORMAL
01701 #undef EXCLUDED
01702 #undef MODIFIED
01703 #else
01704 exclChecksum += npairxA1 + npairxA2;
01705 SELF(
01706 for (k=0; k<npairxA1 && pairlistxA1_save[k] < j_hgroup; ++k) --exclChecksum;
01707 for (k=0; k<npairxA2 && pairlistxA2_save[k] < j_hgroup; ++k) --exclChecksum;
01708 )
01709 #endif
01710
01711 #undef ALCHPAIR
01712 #define ALCHPAIR(X)
01713 #undef NOT_ALCHPAIR
01714 #define NOT_ALCHPAIR(X) X
01715
01716 #endif // end nonbondedbase2.h includes for alchemical pairlists
01717
01718 #ifdef NETWORK_PROGRESS
01719 CkNetworkProgress();
01720 #endif
01721
01722 #ifdef ARCH_POWERPC
01723
01724 __dcbt ((void *) &(p_0[i+1]));
01725 __prefetch_by_load ((void *)&(groupCount));
01726 #endif
01727
01728 #ifndef NAMD_CUDA
01729 SHORT( FAST( f_0[i].x += f_i_x; ) )
01730 SHORT( FAST( f_0[i].y += f_i_y; ) )
01731 SHORT( FAST( f_0[i].z += f_i_z; ) )
01732 FULL( fullf_0[i].x += fullf_i_x; )
01733 FULL( fullf_0[i].y += fullf_i_y; )
01734 FULL( fullf_0[i].z += fullf_i_z; )
01735 #endif
01736
01737
01738 }
01739
01740
01741
01742 #ifdef f_1
01743 #undef f_1
01744 #endif
01745 #if ( SHORT( FAST( 1+ ) ) 0 )
01746 #if ( SELF( 1+ ) 0 )
01747 {
01748 Force *patch_f_0 = params->ff[0];
01749
01750 #ifndef NAMD_CUDA
01751 #ifndef ARCH_POWERPC
01752 #pragma ivdep
01753 #endif
01754 for ( int i = 0; i < i_upper; ++i ) {
01755 patch_f_0[i].x += f_0[i].x;
01756 patch_f_0[i].y += f_0[i].y;
01757 patch_f_0[i].z += f_0[i].z;
01758 virial_xx += f_0[i].x * p_0[i].position.x;
01759 virial_xy += f_0[i].x * p_0[i].position.y;
01760 virial_xz += f_0[i].x * p_0[i].position.z;
01761 virial_yy += f_0[i].y * p_0[i].position.y;
01762 virial_yz += f_0[i].y * p_0[i].position.z;
01763 virial_zz += f_0[i].z * p_0[i].position.z;
01764 }
01765 #endif
01766 }
01767 #endif
01768 #endif
01769 #ifdef fullf_1
01770 #undef fullf_1
01771 #endif
01772 #if ( FULL( 1+ ) 0 )
01773 #if ( SELF( 1+ ) 0 )
01774 {
01775 Force *patch_fullf_0 = params->fullf[0];
01776 #ifndef NAMD_CUDA
01777 #ifndef ARCH_POWERPC
01778 #pragma ivdep
01779 #endif
01780 for ( int i = 0; i < i_upper; ++i ) {
01781 patch_fullf_0[i].x += fullf_0[i].x;
01782 patch_fullf_0[i].y += fullf_0[i].y;
01783 patch_fullf_0[i].z += fullf_0[i].z;
01784 fullElectVirial_xx += fullf_0[i].x * p_0[i].position.x;
01785 fullElectVirial_xy += fullf_0[i].x * p_0[i].position.y;
01786 fullElectVirial_xz += fullf_0[i].x * p_0[i].position.z;
01787 fullElectVirial_yy += fullf_0[i].y * p_0[i].position.y;
01788 fullElectVirial_yz += fullf_0[i].y * p_0[i].position.z;
01789 fullElectVirial_zz += fullf_0[i].z * p_0[i].position.z;
01790 }
01791 #endif
01792 }
01793 #endif
01794 #endif
01795
01796 #ifndef NAMD_CUDA
01797 reduction[exclChecksumIndex] += exclChecksum;
01798 #endif
01799 FAST
01800 (
01801 ENERGY( reduction[vdwEnergyIndex] += vdwEnergy; )
01802 SHORT
01803 (
01804 ENERGY( reduction[electEnergyIndex] += electEnergy; )
01805 )
01806 ALCH
01807 (
01808 TI(reduction[vdwEnergyIndex_ti_1] += vdwEnergy_ti_1;)
01809 TI(reduction[vdwEnergyIndex_ti_2] += vdwEnergy_ti_2;)
01810 FEP( reduction[vdwEnergyIndex_s] += vdwEnergy_s; )
01811 SHORT
01812 (
01813 FEP( reduction[electEnergyIndex_s] += electEnergy_s; )
01814 TI(reduction[electEnergyIndex_ti_1] += electEnergy_ti_1;)
01815 TI(reduction[electEnergyIndex_ti_2] += electEnergy_ti_2;)
01816 )
01817 )
01818 SHORT
01819 (
01820 reduction[virialIndex_XX] += virial_xx;
01821 reduction[virialIndex_XY] += virial_xy;
01822 reduction[virialIndex_XZ] += virial_xz;
01823 reduction[virialIndex_YX] += virial_xy;
01824 reduction[virialIndex_YY] += virial_yy;
01825 reduction[virialIndex_YZ] += virial_yz;
01826 reduction[virialIndex_ZX] += virial_xz;
01827 reduction[virialIndex_ZY] += virial_yz;
01828 reduction[virialIndex_ZZ] += virial_zz;
01829 )
01830 )
01831 FULL
01832 (
01833 ENERGY( reduction[fullElectEnergyIndex] += fullElectEnergy; )
01834 ALCH
01835 (
01836 FEP( reduction[fullElectEnergyIndex_s] += fullElectEnergy_s; )
01837 TI(reduction[fullElectEnergyIndex_ti_1] += fullElectEnergy_ti_1;)
01838 TI(reduction[fullElectEnergyIndex_ti_2] += fullElectEnergy_ti_2;)
01839 )
01840 reduction[fullElectVirialIndex_XX] += fullElectVirial_xx;
01841 reduction[fullElectVirialIndex_XY] += fullElectVirial_xy;
01842 reduction[fullElectVirialIndex_XZ] += fullElectVirial_xz;
01843 reduction[fullElectVirialIndex_YX] += fullElectVirial_xy;
01844 reduction[fullElectVirialIndex_YY] += fullElectVirial_yy;
01845 reduction[fullElectVirialIndex_YZ] += fullElectVirial_yz;
01846 reduction[fullElectVirialIndex_ZX] += fullElectVirial_xz;
01847 reduction[fullElectVirialIndex_ZY] += fullElectVirial_yz;
01848 reduction[fullElectVirialIndex_ZZ] += fullElectVirial_zz;
01849 )
01850
01851 delete [] excl_flags_buff;
01852
01853 }
01854