ComputeNonbondedInl.h

Go to the documentation of this file.
00001 
00007 /*
00008    Common operations for ComputeNonbonded classes
00009 */
00010 
00011 #ifndef COMPUTENONBONDEDINL_H
00012 #define COMPUTENONBONDEDINL_H
00013 
00014 #ifndef NAMD_RESTRICT
00015 #define restrict
00016 #endif
00017 
00018 #include "ComputeNonbondedUtil.h"
00019 #include "SimParameters.h"
00020 #include "Molecule.h"
00021 #include "LJTable.h"
00022 #include "ReductionMgr.h"
00023 #include "ReserveArray.h"
00024 
00025 #include "PressureProfile.h"
00026 
00027 // BEGIN LA
00028 #include "Random.h"
00029 // END LA
00030 
00031 #include <algorithm>
00032 
00033 #ifdef NAMD_KNL
00034 inline int pairlist_from_pairlist_knl(float cutoff2,
00035                                   float p_i_x, float p_i_y, float p_i_z,
00036                                   const CompAtomFlt *p_j,
00037                                   const plint *list, int list_size, int *newlist,
00038                                   float *r2list, float *xlist, float *ylist, float *zlist) {
00039 
00040   int *nli = newlist;
00041   float *r2i = r2list;
00042   float *xli = xlist;
00043   float *yli = ylist;
00044   float *zli = zlist;
00045 
00046   if ( list_size <= 0) return 0;
00047 
00048   int g = 0;
00049 
00050   float p_j_x, p_j_y, p_j_z;
00051   float x2, y2, z2, r2;
00052 #pragma vector aligned
00053 #pragma ivdep
00054   for ( g = 0 ; g < list_size; ++g ) {
00055     int gi=list[g];
00056     p_j_x = p_j[ gi ].position.x;
00057     p_j_y = p_j[ gi ].position.y;
00058     p_j_z = p_j[ gi ].position.z;
00059 
00060     x2 = p_i_x - p_j_x;
00061     r2 = x2 * x2;
00062     y2 = p_i_y - p_j_y;
00063     r2 += y2 * y2;
00064     z2 = p_i_z - p_j_z;
00065     r2 += z2 * z2;
00066 
00067     if ( r2 <= cutoff2 ) {
00068       *nli = gi;  ++nli;
00069       *r2i = r2;  ++r2i;
00070       *xli = x2;  ++xli;
00071       *yli = y2;  ++yli;
00072       *zli = z2;  ++zli;
00073     }
00074   }
00075 
00076   return nli - newlist;
00077 }
00078 #endif// NAMD_KNL
00079 
00080 inline int pairlist_from_pairlist(BigReal cutoff2,
00081                                   BigReal p_i_x, BigReal p_i_y, BigReal p_i_z,
00082                                   const CompAtom *p_j,
00083                                   const plint *list, int list_size, int *newlist,
00084                                   BigReal r2_delta, BigReal *r2list) {
00085   
00086   BigReal cutoff2_delta = cutoff2 + r2_delta;
00087   int *nli = newlist;
00088   BigReal *r2i = r2list;
00089 
00090   if ( list_size <= 0) return 0;
00091 
00092   int g = 0;
00093 
00094 #ifdef NAMD_KNL
00095 
00096   BigReal p_j_x, p_j_y, p_j_z;
00097   BigReal x2, y2, z2, r2;
00098 #pragma vector aligned
00099 #pragma ivdep
00100   for ( g = 0 ; g < list_size; ++g ) {
00101     int gi=list[g];
00102     p_j_x = p_j[ gi ].position.x;
00103     p_j_y = p_j[ gi ].position.y;
00104     p_j_z = p_j[ gi ].position.z;
00105 
00106     x2 = p_i_x - p_j_x;
00107     r2 = x2 * x2 + r2_delta;
00108     y2 = p_i_y - p_j_y;
00109     r2 += y2 * y2;
00110     z2 = p_i_z - p_j_z;
00111     r2 += z2 * z2;
00112 
00113     if ( r2 <= cutoff2_delta ) {
00114       *nli = gi;  ++nli;
00115       *r2i = r2;  ++r2i;
00116     }
00117   }
00118 
00119 #else // NAMD_KNL
00120 #ifndef SIMPLE_PAIRLIST
00121 #ifdef  A2_QPX
00122   //***************************************************************
00123   //* 4-way unrolled and software-pipelined and optiized via QPX
00124   //***************************************************************  
00125 
00126   int jout = 0;
00127   if ( list_size > 16) {
00128     // prefetch
00129     int jcur0 = list[g];
00130     int jcur1 = list[g + 1];
00131     int jcur2 = list[g + 2];
00132     int jcur3 = list[g + 3];
00133     
00134     int j0, j1, j2, j3;
00135     
00136     vector4double    pj_v_0, pj_v_1, pj_v_2, pj_v_3; 
00137     vector4double    v_0, v_1, v_2, v_3;
00138     register BigReal r2_0, r2_1, r2_2, r2_3;
00139     
00140     vector4double p_i_v = {p_i_x, p_i_y, p_i_z, 0.};
00141     vector4double r2_delta_v = {r2_delta};
00142 
00143     pj_v_0 = vec_ld(jcur0 * sizeof(CompAtom), (BigReal *)p_j);
00144     pj_v_1 = vec_ld(jcur1 * sizeof(CompAtom), (BigReal *)p_j);  
00145     pj_v_2 = vec_ld(jcur2 * sizeof(CompAtom), (BigReal *)p_j);  
00146     pj_v_3 = vec_ld(jcur3 * sizeof(CompAtom), (BigReal *)p_j);  
00147     
00148     for ( g = 4 ; g < list_size - 4; g += 4 ) {
00149       // compute 1d distance, 4-way parallel      
00150       //Save the previous iterations values, gives more flexibility 
00151       //to the compiler to schedule the loads and the computation
00152       j0   =   jcur0;           j1   =   jcur1;
00153       j2   =   jcur2;           j3   =   jcur3;
00154       
00155       jcur0  =  list[g    ];    jcur1  =  list[g + 1];
00156       jcur2  =  list[g + 2];    jcur3  =  list[g + 3];
00157 
00158       __dcbt((void*)(p_j + jcur0));
00159 
00160       v_0 = vec_sub (p_i_v, pj_v_0);
00161       v_1 = vec_sub (p_i_v, pj_v_1);
00162       v_2 = vec_sub (p_i_v, pj_v_2);
00163       v_3 = vec_sub (p_i_v, pj_v_3);
00164       
00165       v_0 = vec_madd (v_0, v_0, r2_delta_v);
00166       v_1 = vec_madd (v_1, v_1, r2_delta_v);
00167       v_2 = vec_madd (v_2, v_2, r2_delta_v);
00168       v_3 = vec_madd (v_3, v_3, r2_delta_v);
00169 
00170       pj_v_0 = vec_ld(jcur0 * sizeof(CompAtom), (BigReal *)p_j);
00171       pj_v_1 = vec_ld(jcur1 * sizeof(CompAtom), (BigReal *)p_j);  
00172       pj_v_2 = vec_ld(jcur2 * sizeof(CompAtom), (BigReal *)p_j);  
00173       pj_v_3 = vec_ld(jcur3 * sizeof(CompAtom), (BigReal *)p_j);  
00174 
00175       r2_0 = vec_extract(v_0, 0) + vec_extract(v_0, 1) + vec_extract(v_0, 2);
00176       r2_1 = vec_extract(v_1, 0) + vec_extract(v_1, 1) + vec_extract(v_1, 2);
00177       r2_2 = vec_extract(v_2, 0) + vec_extract(v_2, 1) + vec_extract(v_2, 2);
00178       r2_3 = vec_extract(v_3, 0) + vec_extract(v_3, 1) + vec_extract(v_3, 2);
00179       
00180       size_t test0, test1, test2, test3;
00181       size_t jout0, jout1, jout2, jout3;
00182       
00183       test0 = ( r2_0   <   cutoff2_delta );
00184       test1 = ( r2_1   <   cutoff2_delta );
00185       test2 = ( r2_2   <   cutoff2_delta );
00186       test3 = ( r2_3   <   cutoff2_delta );
00187       
00188       jout0 = jout;
00189       nli[ jout0 ]  = j0;         r2i[ jout0 ] = r2_0;
00190       jout += test0;              jout1 = jout;
00191 
00192       nli[ jout1 ]  = j1;         r2i[ jout1 ] = r2_1;
00193       jout += test1;              jout2 = jout;
00194 
00195       nli[ jout2 ]  = j2;         r2i[ jout2 ] = r2_2;
00196       jout += test2;              jout3 = jout;
00197 
00198       nli[ jout3 ]  = j3;         r2i[ jout3 ] = r2_3;
00199       jout += test3;
00200     }
00201     g -= 4;
00202   }
00203 
00204   nli += jout;
00205   r2i += jout;  
00206 #else
00207   //***************************************************************
00208   //* 4-way unrolled and software-pipelined 
00209   //***************************************************************
00210 
00211   int jout = 0;
00212   if ( list_size > 16) {
00213     // prefetch
00214     int jcur0 = list[g];
00215     int jcur1 = list[g + 1];
00216     int jcur2 = list[g + 2];
00217     int jcur3 = list[g + 3];
00218     
00219     int j0, j1, j2, j3;
00220     
00221     register  BigReal pj_x_0, pj_x_1, pj_x_2, pj_x_3; 
00222     register  BigReal pj_y_0, pj_y_1, pj_y_2, pj_y_3; 
00223     register  BigReal pj_z_0, pj_z_1, pj_z_2, pj_z_3; 
00224     
00225     register BigReal t_0, t_1, t_2, t_3, r2_0, r2_1, r2_2, r2_3;
00226     
00227     pj_x_0 = p_j[jcur0].position.x;
00228     pj_x_1 = p_j[jcur1].position.x;  
00229     pj_x_2 = p_j[jcur2].position.x;  
00230     pj_x_3 = p_j[jcur3].position.x;  
00231     pj_y_0 = p_j[jcur0].position.y; 
00232     pj_y_1 = p_j[jcur1].position.y;  
00233     pj_y_2 = p_j[jcur2].position.y;  
00234     pj_y_3 = p_j[jcur3].position.y;  
00235     pj_z_0 = p_j[jcur0].position.z; 
00236     pj_z_1 = p_j[jcur1].position.z;
00237     pj_z_2 = p_j[jcur2].position.z; 
00238     pj_z_3 = p_j[jcur3].position.z;
00239     
00240     for ( g = 4 ; g < list_size - 4; g += 4 ) {
00241       // compute 1d distance, 4-way parallel
00242       
00243       //Save the previous iterations values, gives more flexibility 
00244       //to the compiler to schedule the loads and the computation
00245       j0   =   jcur0;           j1   =   jcur1;
00246       j2   =   jcur2;           j3   =   jcur3;
00247       
00248       jcur0  =  list[g    ];    jcur1  =  list[g + 1];
00249       jcur2  =  list[g + 2];    jcur3  =  list[g + 3];
00250 
00251 #ifdef ARCH_POWERPC
00252       __dcbt ((void *) &p_j[jcur0]);
00253 #endif      
00254 
00255       //Compute X distance
00256       t_0   =  p_i_x - pj_x_0;   t_1   =  p_i_x - pj_x_1;
00257       t_2   =  p_i_x - pj_x_2;   t_3   =  p_i_x - pj_x_3;
00258       
00259       r2_0  =  t_0 * t_0 + r2_delta; 
00260       r2_1  =  t_1 * t_1 + r2_delta;
00261       r2_2  =  t_2 * t_2 + r2_delta;
00262       r2_3  =  t_3 * t_3 + r2_delta;
00263       
00264       //Compute y distance
00265       t_0    =  p_i_y - pj_y_0;   t_1    =  p_i_y - pj_y_1;
00266       t_2    =  p_i_y - pj_y_2;   t_3    =  p_i_y - pj_y_3;
00267       r2_0  +=  t_0 * t_0;        r2_1  +=  t_1 * t_1;
00268       r2_2  +=  t_2 * t_2;        r2_3  +=  t_3 * t_3;
00269       
00270       //compute z distance
00271       t_0    =  p_i_z - pj_z_0;   t_1    =  p_i_z - pj_z_1;
00272       t_2    =  p_i_z - pj_z_2;   t_3    =  p_i_z - pj_z_3;
00273       r2_0  +=  t_0 * t_0;        r2_1  +=  t_1 * t_1;
00274       r2_2  +=  t_2 * t_2;        r2_3  +=  t_3 * t_3;
00275       
00276       pj_x_0 = p_j[jcur0].position.x;
00277       pj_x_1 = p_j[jcur1].position.x;  
00278       pj_x_2 = p_j[jcur2].position.x;  
00279       pj_x_3 = p_j[jcur3].position.x;  
00280       pj_y_0 = p_j[jcur0].position.y; 
00281       pj_y_1 = p_j[jcur1].position.y;  
00282       pj_y_2 = p_j[jcur2].position.y;  
00283       pj_y_3 = p_j[jcur3].position.y;  
00284       pj_z_0 = p_j[jcur0].position.z; 
00285       pj_z_1 = p_j[jcur1].position.z;
00286       pj_z_2 = p_j[jcur2].position.z; 
00287       pj_z_3 = p_j[jcur3].position.z;
00288       
00289       bool test0, test1, test2, test3;
00290       
00291       test0 = ( r2_0   <   cutoff2_delta );
00292       test1 = ( r2_1   <   cutoff2_delta );
00293       test2 = ( r2_2   <   cutoff2_delta );
00294       test3 = ( r2_3   <   cutoff2_delta );
00295       
00296       int jout0, jout1, jout2, jout3;
00297 
00298       jout0 = jout;
00299       nli[ jout0 ]  = j0;         r2i[ jout0 ] = r2_0;
00300       jout += test0;              jout1 = jout;
00301       nli[ jout1 ]  = j1;         r2i[ jout1 ] = r2_1;
00302       jout += test1;              jout2 = jout;
00303       nli[ jout2 ]  = j2;         r2i[ jout2 ] = r2_2;
00304       jout += test2;              jout3 = jout;
00305       nli[ jout3 ]  = j3;         r2i[ jout3 ] = r2_3;
00306 
00307       jout += test3;
00308     }
00309     g -= 4;
00310   }
00311 
00312   nli += jout;
00313   r2i += jout;  
00314 #endif
00315 #endif
00316 
00317   int j2 = list[g];
00318   BigReal p_j_x = p_j[j2].position.x;
00319   BigReal p_j_y = p_j[j2].position.y;
00320   BigReal p_j_z = p_j[j2].position.z;
00321   while ( g < list_size ) {
00322     int j = j2;
00323     j2 = list[++g];
00324     BigReal t2 = p_i_x - p_j_x;
00325     BigReal r2 = t2 * t2 + r2_delta;
00326     p_j_x = p_j[j2].position.x;
00327     t2 = p_i_y - p_j_y;
00328     r2 += t2 * t2;
00329     p_j_y = p_j[j2].position.y;
00330     t2 = p_i_z - p_j_z;
00331     r2 += t2 * t2;
00332     p_j_z = p_j[j2].position.z;
00333     if ( r2 <= cutoff2_delta ) {
00334       *nli= j; ++nli;
00335       *r2i = r2; ++r2i;
00336     }
00337   }
00338 
00339 #endif // NAMD_KNL
00340 
00341   return nli - newlist;
00342 }
00343 
00344 // clear all
00345 // define interaction type (pair or self)
00346 #define NBPAIR  1
00347 #define NBSELF  2
00348 
00349 
00350 
00351 // Various atom sorting functions
00352 #if NAMD_ComputeNonbonded_SortAtoms != 0
00353 
00354 inline void sortEntries_selectionSort(SortEntry * const se, const int seLen) {
00355 
00356   register int i;
00357 
00358   for (i = 0; i < seLen; i++) {
00359 
00360     // Search through the remaining elements, finding the lowest
00361     //   value, and then swap it with the first remaining element.
00362     //   Start by assuming the first element is the smallest.
00363     register int smallestIndex = i;
00364     register BigReal smallestValue = se[i].sortValue;
00365     register int j;
00366     for (j = i + 1; j < seLen; j++) {
00367       register BigReal currentValue = se[j].sortValue;
00368       if (currentValue < smallestValue) {
00369         smallestIndex = j;
00370         smallestValue = currentValue;
00371       }
00372     }
00373 
00374     // Swap the first remaining element with the smallest element
00375     if (smallestIndex != i) {
00376       register SortEntry* entryA = se + i;
00377       register SortEntry* entryB = se + smallestIndex;
00378       register unsigned int tmpIndex = entryA->index;
00379       register BigReal tmpSortValue = entryA->sortValue;
00380       entryA->index = entryB->index;
00381       entryA->sortValue = entryB->sortValue;
00382       entryB->index = tmpIndex;
00383       entryB->sortValue = tmpSortValue;
00384     }
00385   }
00386 }
00387 
00388 inline void sortEntries_bubbleSort(SortEntry * const se, const int seLen) {
00389 
00390   register int keepSorting = 0;
00391 
00392   do {
00393 
00394     // Reset the keepSorting flag (assume no swaps will occur)
00395     keepSorting = 0;
00396 
00397     // Loop through the pairs and swap if needed
00398     register SortEntry* sortEntry1 = se;
00399     for (int i = 1; i < seLen; i++) {
00400 
00401       register SortEntry* sortEntry0 = sortEntry1;
00402       sortEntry1 = se + i;
00403       register BigReal sortEntry0_sortValue = sortEntry0->sortValue;
00404       register BigReal sortEntry1_sortValue = sortEntry1->sortValue;
00405 
00406       if (sortEntry0_sortValue > sortEntry1_sortValue) {
00407         register int sortEntry0_index = sortEntry0->index;
00408         register int sortEntry1_index = sortEntry1->index;
00409         sortEntry0->index = sortEntry1_index;
00410         sortEntry0->sortValue = sortEntry1_sortValue;
00411         sortEntry1->index = sortEntry0_index;
00412         sortEntry1->sortValue = sortEntry0_sortValue;
00413         keepSorting = 1;
00414       }
00415     }
00416 
00417   } while (keepSorting != 0);  // Loop again if at least one set of
00418                                //   elements was swapped.
00419 }
00420 
00421 // NOTE: The buf parameter should point to a buffer that this function
00422 //   can use as a temp storage location (scratch pad).
00423 // NOTE: This function may swap the values of se and buf, but will not
00424 //   return any other value (nor will it set either to NULL).
00425 inline void sortEntries_mergeSort_v1(SortEntry * &se, SortEntry * &buf, int seLen) {
00426 
00427   register SortEntry* srcArray = se;
00428   register SortEntry* dstArray = buf;
00429 
00430   // Start with each element being a separate list.  Start
00431   //   merging the "lists" into larger lists.
00432   register int subListSize = 1;
00433   while (subListSize < seLen) {
00434 
00435     // NOTE: This iteration consumes sublists of length
00436     //   subListSize and produces sublists of length
00437     //   (2*subListSize).  So, keep looping while the length of a
00438     //   single sorted sublist is not the size of the entire array.
00439 
00440     // Iterate through the lists, merging each consecutive pair of lists.
00441     register int firstListOffset = 0;
00442     while (firstListOffset < seLen) {
00443 
00445 
00446       register int numElements = std::min(2 * subListSize, seLen - firstListOffset);
00447       register int list0len;
00448       register int list1len;
00449       if (numElements > subListSize) {
00450         list0len = subListSize;                // First list full
00451         list1len = numElements - subListSize;  // 1+ elements in second list
00452       } else {
00453         list0len = numElements;                // 1+ elements in first list
00454         list1len = 0;                          // Zero elements in second list
00455       }
00456 
00457       register SortEntry* list0ptr = srcArray + firstListOffset;
00458       register SortEntry* list1ptr = list0ptr + subListSize;
00459       register SortEntry* dstptr = dstArray + firstListOffset;
00460 
00462 
00463       // While there are elements in both lists, pick from one
00464       while (list0len > 0 && list1len > 0) {
00465 
00466         register BigReal sortValue0 = list0ptr->sortValue;
00467         register BigReal sortValue1 = list1ptr->sortValue;
00468 
00469         if (sortValue0 < sortValue1) {  // choose first list (list0)
00470 
00471           // Copy the value from srcArray to dstArray
00472           register int index0 = list0ptr->index;
00473           dstptr->sortValue = sortValue0;
00474           dstptr->index = index0;
00475 
00476           // Move the pointers forward for the sublists
00477           dstptr++;
00478           list0ptr++;
00479           list0len--;
00480 
00481         } else {                        // choose second list (list1)
00482 
00483           // Copy the value from srcArray to dstArray
00484           register int index1 = list1ptr->index;
00485           dstptr->sortValue = sortValue1;
00486           dstptr->index = index1;
00487 
00488           // Move the pointers forward for the sublists
00489           dstptr++;
00490           list1ptr++;
00491           list1len--;
00492         }
00493 
00494       } // end while (list0len > 0 && list1len > 0)
00495 
00496       // NOTE: Either list0len or list1len is zero at this point
00497       //   so only one of the following loops should execute.
00498 
00499       // Drain remaining elements from the first list (list0)
00500       while (list0len > 0) {
00501 
00502         // Copy the value from srcArray to dstArray
00503         register BigReal sortValue0 = list0ptr->sortValue;
00504         register int index0 = list0ptr->index;
00505         dstptr->sortValue = sortValue0;
00506         dstptr->index = index0;
00507 
00508         // Move the pointers forward for the sublists
00509         dstptr++;
00510         list0ptr++;
00511         list0len--;
00512 
00513       } // end while (list0len > 0)
00514 
00515       // Drain remaining elements from the first list (list1)
00516       while (list1len > 0) {
00517 
00518         // Copy the value from srcArray to dstArray
00519         register BigReal sortValue1 = list1ptr->sortValue;
00520         register int index1 = list1ptr->index;
00521         dstptr->sortValue = sortValue1;
00522         dstptr->index = index1;
00523 
00524         // Move the pointers forward for the sublists
00525         dstptr++;
00526         list1ptr++;
00527         list1len--;
00528 
00529       } // end while (list1len > 0)
00530 
00531       // Move forward to the next pair of sub-lists
00532       firstListOffset += (2 * subListSize);
00533 
00534     } // end while (firstListOffset < seLen) {
00535 
00536     // Swap the dstArray and srcArray pointers
00537     register SortEntry* tmpPtr = dstArray;
00538     dstArray = srcArray;
00539     srcArray = tmpPtr;
00540 
00541     // Double the subListSize
00542     subListSize <<= 1;
00543 
00544   }  // end while (subListSize < seLen)
00545 
00546   // Set the sort values pointers (NOTE: srcArray and dstArray are
00547   //   swapped at the end of each iteration of the merge sort outer-loop).
00548   buf = dstArray;
00549   se = srcArray;
00550 }
00551 
00552 // NOTE: The buf parameter should point to a buffer that this function
00553 //   can use as a temp storage location (scratch pad).
00554 // NOTE: This function may swap the values of se and buf, but will not
00555 //   return any other value (nor will it set either to NULL).
00556 inline void sortEntries_mergeSort_v2(SortEntry * &se, SortEntry * &buf, int seLen) {
00557 
00558   // NOTE: This macro "returns" either val0 (if test == 0) or val1 (if
00559   // test == 1).  It expects test to be either 0 or 1 (no other values).
00560   #define __TERNARY_ASSIGN(test, val0, val1)   ((test * val0) + ((1 - test) * val1))
00561 
00562   register SortEntry* srcArray = se;
00563   register SortEntry* dstArray = buf;
00564 
00565   // Start with each element being a separate list.  Start
00566   //   merging the "lists" into larger lists.
00567   register int subListSize = 1;
00568   while (subListSize < seLen) {
00569 
00570     // NOTE: This iteration consumes sublists of length
00571     //   subListSize and produces sublists of length
00572     //   (2*subListSize).  So, keep looping while the length of a
00573     //   single sorted sublist is not the size of the entire array.
00574 
00575     // Iterate through the lists, merging each consecutive pair of lists.
00576     register int firstListOffset = 0;
00577     while (firstListOffset < seLen) {
00578 
00580 
00581       // Calculate the number of elements for both sublists...
00582       //   min(2 * subListSize, seLen - firstListOffset);
00583       register int numElements;
00584       {
00585         register int numElements_val0 = 2 * subListSize;
00586         register int numElements_val1 = seLen - firstListOffset;
00587         register bool numElements_test = (numElements_val0 < numElements_val1);
00588         numElements = __TERNARY_ASSIGN(numElements_test, numElements_val0, numElements_val1);
00589       }
00590 
00591       // Setup the pointers for the source and destination arrays
00592       register SortEntry* dstptr = dstArray + firstListOffset;    // destination array pointer
00593       register SortEntry* list0ptr = srcArray + firstListOffset;  // source list 0 pointer
00594       register SortEntry* list1ptr = list0ptr + subListSize;      // source list 1 pointer
00595       register SortEntry* list0ptr_end;  // pointer to end of source list0's elements (element after last)
00596       register SortEntry* list1ptr_end;  // pointer to end of source list1's elements (element after last)
00597       {
00598         register bool lenTest = (numElements > subListSize);
00599         register int list0len_val0 = subListSize;
00600         register int list1len_val0 = numElements - subListSize;
00601         register int list0len_val1 = numElements;  // NOTE: list1len_val1 = 0
00602         register int list0len = __TERNARY_ASSIGN(lenTest, list0len_val0, list0len_val1);
00603         register int list1len = __TERNARY_ASSIGN(lenTest, list1len_val0, 0);
00604         // avoid pre-load of sortValue1 from past end of array
00605         if ( ! lenTest ) list1ptr = list0ptr;
00606         list0ptr_end = list0ptr + list0len;
00607         list1ptr_end = list1ptr + list1len;
00608       }
00609 
00610       // The firstListOffset variable won't be used again until the next
00611       //   iteration, so go ahead and update it now...
00612       //   Move forward to the next pair of sub-lists
00613       firstListOffset += (2 * subListSize);
00614 
00616 
00617       // Pre-load values from both source arrays
00618       register BigReal sortValue0 = list0ptr->sortValue;
00619       register BigReal sortValue1 = list1ptr->sortValue;
00620       register int index0 = list0ptr->index;
00621       register int index1 = list1ptr->index;
00622 
00623       // While both lists have at least one element in them, compare the
00624       //   heads of each list and place the smaller of the two in the
00625       //   destination array.
00626       while (list0ptr < list0ptr_end && list1ptr < list1ptr_end) {
00627 
00628         // Compare the values
00629         register bool test = (sortValue0 < sortValue1);
00630 
00631         // Place the "winner" in the destination array
00632         dstptr->sortValue = __TERNARY_ASSIGN(test, sortValue0, sortValue1);
00633         dstptr->index = __TERNARY_ASSIGN(test, index0, index1);
00634         dstptr++;
00635 
00636         // Update the pointers
00637         list0ptr += __TERNARY_ASSIGN(test, 1, 0);
00638         list1ptr += __TERNARY_ASSIGN(test, 0, 1);
00639 
00640         // Refill the sortValue and index register
00641         // NOTE: These memory locations are likely to be in cache
00642         sortValue0 = list0ptr->sortValue;
00643         sortValue1 = list1ptr->sortValue;
00644         index0 = list0ptr->index;
00645         index1 = list1ptr->index;
00646 
00647       } // end while (list0ptr < list0ptr_end && list1ptr < list1ptr_end)
00648 
00649       // NOTE: At this point, at least one of the lists is empty so no
00650       //   more than one of the loops will be executed.
00651 
00652       // Drain the remaining elements from list0
00653       while (list0ptr < list0ptr_end) {
00654 
00655         // Place the value into the destination array
00656         dstptr->sortValue = sortValue0;
00657         dstptr->index = index0;
00658         dstptr++;
00659 
00660         // Load the next entry in list0
00661         list0ptr++;
00662         sortValue0 = list0ptr->sortValue;
00663         index0 = list0ptr->index;
00664 
00665       } // end while (list0ptr < list0ptr_end)
00666 
00667       // Drain the remaining elements from list1
00668       while (list1ptr < list1ptr_end) {
00669 
00670         // Place the value into the destination array
00671         dstptr->sortValue = sortValue1;
00672         dstptr->index = index1;
00673         dstptr++;
00674 
00675         // Load the next entry in list1
00676         list1ptr++;
00677         sortValue1 = list1ptr->sortValue;
00678         index1 = list1ptr->index;
00679 
00680       } // end while (list1ptr < list1ptr_end)
00681 
00682     } // end while (firstListOffset < seLen) {
00683 
00684     // Swap the dstArray and srcArray pointers
00685     register SortEntry* tmpPtr = dstArray;
00686     dstArray = srcArray;
00687     srcArray = tmpPtr;
00688 
00689     // Double the subListSize
00690     subListSize <<= 1;
00691 
00692   }  // end while (subListSize < seLen)
00693 
00694   // Set the sort values pointers (NOTE: srcArray and dstArray are
00695   //   swapped at the end of each iteration of the merge sort outer-loop).
00696   buf = dstArray;
00697   se = srcArray;
00698 
00699   #undef __TERNARY_ASSIGN
00700 }
00701 
00702 #endif  // NAMD_ComputeNonbonded_SortAtoms != 0
00703 
00704 
00705 #endif // COMPUTENONBONDEDINL_H
00706 

Generated on Tue Nov 21 01:17:12 2017 for NAMD by  doxygen 1.4.7