Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

ComputeNonbondedInl.h

Go to the documentation of this file.
00001 
00007 /*
00008    Common operations for ComputeNonbonded classes
00009 */
00010 
00011 #ifndef COMPUTENONBONDEDINL_H
00012 #define COMPUTENONBONDEDINL_H
00013 
00014 #ifndef NAMD_RESTRICT
00015 #define restrict
00016 #endif
00017 
00018 #include "ComputeNonbondedUtil.h"
00019 #include "SimParameters.h"
00020 #include "Node.h"
00021 #include "Molecule.h"
00022 #include "LJTable.h"
00023 #include "ReductionMgr.h"
00024 #include "ReserveArray.h"
00025 
00026 #include "PressureProfile.h"
00027 
00028 // BEGIN LA
00029 #include "Random.h"
00030 // END LA
00031 
00032 inline int pairlist_from_pairlist(BigReal cutoff2,
00033                                   BigReal p_i_x, BigReal p_i_y, BigReal p_i_z,
00034                                   const CompAtom *p_j,
00035                                   const plint *list, int list_size, plint *newlist,
00036                                   BigReal r2_delta, BigReal *r2list) {
00037   
00038   BigReal cutoff2_delta = cutoff2 + r2_delta;
00039   plint *nli = newlist;
00040   BigReal *r2i = r2list;
00041 
00042   if ( list_size <= 0) return 0;
00043 
00044   int g = 0;
00045 
00046 #ifndef SIMPLE_PAIRLIST
00047   //***************************************************************
00048   //* 4-way unrolled and software-pipelined 
00049   //***************************************************************
00050 
00051   int jout = 0;
00052   if ( list_size > 16) {
00053     // prefetch
00054     int jcur0 = list[g];
00055     int jcur1 = list[g + 1];
00056     int jcur2 = list[g + 2];
00057     int jcur3 = list[g + 3];
00058     
00059     int j0, j1, j2, j3;
00060     
00061     register  BigReal pj_x_0, pj_x_1, pj_x_2, pj_x_3; 
00062     register  BigReal pj_y_0, pj_y_1, pj_y_2, pj_y_3; 
00063     register  BigReal pj_z_0, pj_z_1, pj_z_2, pj_z_3; 
00064     
00065     register BigReal t_0, t_1, t_2, t_3, r2_0, r2_1, r2_2, r2_3;
00066     
00067     pj_x_0 = p_j[jcur0].position.x;
00068     pj_x_1 = p_j[jcur1].position.x;  
00069     pj_x_2 = p_j[jcur2].position.x;  
00070     pj_x_3 = p_j[jcur3].position.x;  
00071     pj_y_0 = p_j[jcur0].position.y; 
00072     pj_y_1 = p_j[jcur1].position.y;  
00073     pj_y_2 = p_j[jcur2].position.y;  
00074     pj_y_3 = p_j[jcur3].position.y;  
00075     pj_z_0 = p_j[jcur0].position.z; 
00076     pj_z_1 = p_j[jcur1].position.z;
00077     pj_z_2 = p_j[jcur2].position.z; 
00078     pj_z_3 = p_j[jcur3].position.z;
00079     
00080     for ( g = 4 ; g < list_size - 4; g += 4 ) {
00081       // compute 1d distance, 4-way parallel
00082       
00083       //Save the previous iterations values, gives more flexibility 
00084       //to the compiler to schedule the loads and the computation
00085       j0   =   jcur0;           j1   =   jcur1;
00086       j2   =   jcur2;           j3   =   jcur3;
00087       
00088       jcur0  =  list[g    ];    jcur1  =  list[g + 1];
00089       jcur2  =  list[g + 2];    jcur3  =  list[g + 3];
00090 
00091 #ifdef ARCH_POWERPC
00092       __dcbt ((void *) &p_j[jcur0]);
00093 #endif      
00094 
00095       //Compute X distance
00096       t_0   =  p_i_x - pj_x_0;   t_1   =  p_i_x - pj_x_1;
00097       t_2   =  p_i_x - pj_x_2;   t_3   =  p_i_x - pj_x_3;
00098       
00099       r2_0  =  t_0 * t_0 + r2_delta; 
00100       r2_1  =  t_1 * t_1 + r2_delta;
00101       r2_2  =  t_2 * t_2 + r2_delta;
00102       r2_3  =  t_3 * t_3 + r2_delta;
00103       
00104       //Compute y distance
00105       t_0    =  p_i_y - pj_y_0;   t_1    =  p_i_y - pj_y_1;
00106       t_2    =  p_i_y - pj_y_2;   t_3    =  p_i_y - pj_y_3;
00107       r2_0  +=  t_0 * t_0;        r2_1  +=  t_1 * t_1;
00108       r2_2  +=  t_2 * t_2;        r2_3  +=  t_3 * t_3;
00109       
00110       //compute z distance
00111       t_0    =  p_i_z - pj_z_0;   t_1    =  p_i_z - pj_z_1;
00112       t_2    =  p_i_z - pj_z_2;   t_3    =  p_i_z - pj_z_3;
00113       r2_0  +=  t_0 * t_0;        r2_1  +=  t_1 * t_1;
00114       r2_2  +=  t_2 * t_2;        r2_3  +=  t_3 * t_3;
00115       
00116       pj_x_0 = p_j[jcur0].position.x;
00117       pj_x_1 = p_j[jcur1].position.x;  
00118       pj_x_2 = p_j[jcur2].position.x;  
00119       pj_x_3 = p_j[jcur3].position.x;  
00120       pj_y_0 = p_j[jcur0].position.y; 
00121       pj_y_1 = p_j[jcur1].position.y;  
00122       pj_y_2 = p_j[jcur2].position.y;  
00123       pj_y_3 = p_j[jcur3].position.y;  
00124       pj_z_0 = p_j[jcur0].position.z; 
00125       pj_z_1 = p_j[jcur1].position.z;
00126       pj_z_2 = p_j[jcur2].position.z; 
00127       pj_z_3 = p_j[jcur3].position.z;
00128       
00129       bool test0, test1, test2, test3;
00130       
00131       test0 = ( r2_0   <   cutoff2_delta );
00132       test1 = ( r2_1   <   cutoff2_delta );
00133       test2 = ( r2_2   <   cutoff2_delta );
00134       test3 = ( r2_3   <   cutoff2_delta );
00135       
00136       int jout0, jout1, jout2, jout3;
00137 
00138       jout0 = jout;
00139       nli[ jout0 ]  = j0;         r2i[ jout0 ] = r2_0;
00140       jout += test0;              jout1 = jout;
00141       nli[ jout1 ]  = j1;         r2i[ jout1 ] = r2_1;
00142       jout += test1;              jout2 = jout;
00143       nli[ jout2 ]  = j2;         r2i[ jout2 ] = r2_2;
00144       jout += test2;              jout3 = jout;
00145       nli[ jout3 ]  = j3;         r2i[ jout3 ] = r2_3;
00146 
00147       jout += test3;
00148     }
00149     g -= 4;
00150   }
00151 
00152   nli += jout;
00153   r2i += jout;  
00154 #endif
00155 
00156   int j2 = list[g];
00157   BigReal p_j_x = p_j[j2].position.x;
00158   BigReal p_j_y = p_j[j2].position.y;
00159   BigReal p_j_z = p_j[j2].position.z;
00160   while ( g < list_size ) {
00161     int j = j2;
00162     j2 = list[++g];
00163     BigReal t2 = p_i_x - p_j_x;
00164     BigReal r2 = t2 * t2 + r2_delta;
00165     p_j_x = p_j[j2].position.x;
00166     t2 = p_i_y - p_j_y;
00167     r2 += t2 * t2;
00168     p_j_y = p_j[j2].position.y;
00169     t2 = p_i_z - p_j_z;
00170     r2 += t2 * t2;
00171     p_j_z = p_j[j2].position.z;
00172     if ( r2 <= cutoff2_delta ) {
00173       *nli= j; ++nli;
00174       *r2i = r2; ++r2i;
00175     }
00176   }
00177 
00178   return nli - newlist;
00179 }
00180 
00181 // clear all
00182 // define interaction type (pair or self)
00183 #define NBPAIR  1
00184 #define NBSELF  2
00185 
00186 
00187 
00188 // Various atom sorting functions
00189 #if NAMD_ComputeNonbonded_SortAtoms != 0
00190 
00191 inline void sortEntries_selectionSort(SortEntry * const se, const int seLen) {
00192 
00193   register int i;
00194 
00195   for (i = 0; i < seLen; i++) {
00196 
00197     // Search through the remaining elements, finding the lowest
00198     //   value, and then swap it with the first remaining element.
00199     //   Start by assuming the first element is the smallest.
00200     register int smallestIndex = i;
00201     register BigReal smallestValue = se[i].sortValue;
00202     register int j;
00203     for (j = i + 1; j < seLen; j++) {
00204       register BigReal currentValue = se[j].sortValue;
00205       if (currentValue < smallestValue) {
00206         smallestIndex = j;
00207         smallestValue = currentValue;
00208       }
00209     }
00210 
00211     // Swap the first remaining element with the smallest element
00212     if (smallestIndex != i) {
00213       register SortEntry* entryA = se + i;
00214       register SortEntry* entryB = se + smallestIndex;
00215       register unsigned int tmpIndex = entryA->index;
00216       register BigReal tmpSortValue = entryA->sortValue;
00217       entryA->index = entryB->index;
00218       entryA->sortValue = entryB->sortValue;
00219       entryB->index = tmpIndex;
00220       entryB->sortValue = tmpSortValue;
00221     }
00222   }
00223 }
00224 
00225 inline void sortEntries_bubbleSort(SortEntry * const se, const int seLen) {
00226 
00227   register int keepSorting = 0;
00228 
00229   do {
00230 
00231     // Reset the keepSorting flag (assume no swaps will occur)
00232     keepSorting = 0;
00233 
00234     // Loop through the pairs and swap if needed
00235     register SortEntry* sortEntry1 = se;
00236     for (int i = 1; i < seLen; i++) {
00237 
00238       register SortEntry* sortEntry0 = sortEntry1;
00239       sortEntry1 = se + i;
00240       register BigReal sortEntry0_sortValue = sortEntry0->sortValue;
00241       register BigReal sortEntry1_sortValue = sortEntry1->sortValue;
00242 
00243       if (sortEntry0_sortValue > sortEntry1_sortValue) {
00244         register int sortEntry0_index = sortEntry0->index;
00245         register int sortEntry1_index = sortEntry1->index;
00246         sortEntry0->index = sortEntry1_index;
00247         sortEntry0->sortValue = sortEntry1_sortValue;
00248         sortEntry1->index = sortEntry0_index;
00249         sortEntry1->sortValue = sortEntry0_sortValue;
00250         keepSorting = 1;
00251       }
00252     }
00253 
00254   } while (keepSorting != 0);  // Loop again if at least one set of
00255                                //   elements was swapped.
00256 }
00257 
00258 // NOTE: The buf parameter should point to a buffer that this function
00259 //   can use as a temp storage location (scratch pad).
00260 // NOTE: This function may swap the values of se and buf, but will not
00261 //   return any other value (nor will it set either to NULL).
00262 inline void sortEntries_mergeSort_v1(SortEntry * &se, SortEntry * &buf, int seLen) {
00263 
00264   register SortEntry* srcArray = se;
00265   register SortEntry* dstArray = buf;
00266 
00267   // Start with each element being a separate list.  Start
00268   //   merging the "lists" into larger lists.
00269   register int subListSize = 1;
00270   while (subListSize < seLen) {
00271 
00272     // NOTE: This iteration consumes sublists of length
00273     //   subListSize and produces sublists of length
00274     //   (2*subListSize).  So, keep looping while the length of a
00275     //   single sorted sublist is not the size of the entire array.
00276 
00277     // Iterate through the lists, merging each consecutive pair of lists.
00278     register int firstListOffset = 0;
00279     while (firstListOffset < seLen) {
00280 
00282 
00283       register int numElements = std::min(2 * subListSize, seLen - firstListOffset);
00284       register int list0len;
00285       register int list1len;
00286       if (numElements > subListSize) {
00287         list0len = subListSize;                // First list full
00288         list1len = numElements - subListSize;  // 1+ elements in second list
00289       } else {
00290         list0len = numElements;                // 1+ elements in first list
00291         list1len = 0;                          // Zero elements in second list
00292       }
00293 
00294       register SortEntry* list0ptr = srcArray + firstListOffset;
00295       register SortEntry* list1ptr = list0ptr + subListSize;
00296       register SortEntry* dstptr = dstArray + firstListOffset;
00297 
00299 
00300       // While there are elements in both lists, pick from one
00301       while (list0len > 0 && list1len > 0) {
00302 
00303         register BigReal sortValue0 = list0ptr->sortValue;
00304         register BigReal sortValue1 = list1ptr->sortValue;
00305 
00306         if (sortValue0 < sortValue1) {  // choose first list (list0)
00307 
00308           // Copy the value from srcArray to dstArray
00309           register int index0 = list0ptr->index;
00310           dstptr->sortValue = sortValue0;
00311           dstptr->index = index0;
00312 
00313           // Move the pointers forward for the sublists
00314           dstptr++;
00315           list0ptr++;
00316           list0len--;
00317 
00318         } else {                        // choose second list (list1)
00319 
00320           // Copy the value from srcArray to dstArray
00321           register int index1 = list1ptr->index;
00322           dstptr->sortValue = sortValue1;
00323           dstptr->index = index1;
00324 
00325           // Move the pointers forward for the sublists
00326           dstptr++;
00327           list1ptr++;
00328           list1len--;
00329         }
00330 
00331       } // end while (list0len > 0 && list1len > 0)
00332 
00333       // NOTE: Either list0len or list1len is zero at this point
00334       //   so only one of the following loops should execute.
00335 
00336       // Drain remaining elements from the first list (list0)
00337       while (list0len > 0) {
00338 
00339         // Copy the value from srcArray to dstArray
00340         register BigReal sortValue0 = list0ptr->sortValue;
00341         register int index0 = list0ptr->index;
00342         dstptr->sortValue = sortValue0;
00343         dstptr->index = index0;
00344 
00345         // Move the pointers forward for the sublists
00346         dstptr++;
00347         list0ptr++;
00348         list0len--;
00349 
00350       } // end while (list0len > 0)
00351 
00352       // Drain remaining elements from the first list (list1)
00353       while (list1len > 0) {
00354 
00355         // Copy the value from srcArray to dstArray
00356         register BigReal sortValue1 = list1ptr->sortValue;
00357         register int index1 = list1ptr->index;
00358         dstptr->sortValue = sortValue1;
00359         dstptr->index = index1;
00360 
00361         // Move the pointers forward for the sublists
00362         dstptr++;
00363         list1ptr++;
00364         list1len--;
00365 
00366       } // end while (list1len > 0)
00367 
00368       // Move forward to the next pair of sub-lists
00369       firstListOffset += (2 * subListSize);
00370 
00371     } // end while (firstListOffset < seLen) {
00372 
00373     // Swap the dstArray and srcArray pointers
00374     register SortEntry* tmpPtr = dstArray;
00375     dstArray = srcArray;
00376     srcArray = tmpPtr;
00377 
00378     // Double the subListSize
00379     subListSize <<= 1;
00380 
00381   }  // end while (subListSize < seLen)
00382 
00383   // Set the sort values pointers (NOTE: srcArray and dstArray are
00384   //   swapped at the end of each iteration of the merge sort outer-loop).
00385   buf = dstArray;
00386   se = srcArray;
00387 }
00388 
00389 // NOTE: The buf parameter should point to a buffer that this function
00390 //   can use as a temp storage location (scratch pad).
00391 // NOTE: This function may swap the values of se and buf, but will not
00392 //   return any other value (nor will it set either to NULL).
00393 inline void sortEntries_mergeSort_v2(SortEntry * &se, SortEntry * &buf, int seLen) {
00394 
00395   // NOTE: This macro "returns" either val0 (if test == 0) or val1 (if
00396   // test == 1).  It expects test to be either 0 or 1 (no other values).
00397   #define __TERNARY_ASSIGN(test, val0, val1)   ((test * val0) + ((1 - test) * val1))
00398 
00399   register SortEntry* srcArray = se;
00400   register SortEntry* dstArray = buf;
00401 
00402   // Start with each element being a separate list.  Start
00403   //   merging the "lists" into larger lists.
00404   register int subListSize = 1;
00405   while (subListSize < seLen) {
00406 
00407     // NOTE: This iteration consumes sublists of length
00408     //   subListSize and produces sublists of length
00409     //   (2*subListSize).  So, keep looping while the length of a
00410     //   single sorted sublist is not the size of the entire array.
00411 
00412     // Iterate through the lists, merging each consecutive pair of lists.
00413     register int firstListOffset = 0;
00414     while (firstListOffset < seLen) {
00415 
00417 
00418       // Calculate the number of elements for both sublists...
00419       //   min(2 * subListSize, seLen - firstListOffset);
00420       register int numElements;
00421       {
00422         register int numElements_val0 = 2 * subListSize;
00423         register int numElements_val1 = seLen - firstListOffset;
00424         register bool numElements_test = (numElements_val0 < numElements_val1);
00425         numElements = __TERNARY_ASSIGN(numElements_test, numElements_val0, numElements_val1);
00426       }
00427 
00428       // Setup the pointers for the source and destination arrays
00429       register SortEntry* dstptr = dstArray + firstListOffset;    // destination array pointer
00430       register SortEntry* list0ptr = srcArray + firstListOffset;  // source list 0 pointer
00431       register SortEntry* list1ptr = list0ptr + subListSize;      // source list 1 pointer
00432       register SortEntry* list0ptr_end;  // pointer to end of source list0's elements (element after last)
00433       register SortEntry* list1ptr_end;  // pointer to end of source list1's elements (element after last)
00434       {
00435         register bool lenTest = (numElements > subListSize);
00436         register int list0len_val0 = subListSize;
00437         register int list1len_val0 = numElements - subListSize;
00438         register int list0len_val1 = numElements;  // NOTE: list1len_val1 = 0
00439         register int list0len = __TERNARY_ASSIGN(lenTest, list0len_val0, list0len_val1);
00440         register int list1len = __TERNARY_ASSIGN(lenTest, list1len_val0, 0);
00441         // avoid pre-load of sortValue1 from past end of array
00442         if ( ! lenTest ) list1ptr = list0ptr;
00443         list0ptr_end = list0ptr + list0len;
00444         list1ptr_end = list1ptr + list1len;
00445       }
00446 
00447       // The firstListOffset variable won't be used again until the next
00448       //   iteration, so go ahead and update it now...
00449       //   Move forward to the next pair of sub-lists
00450       firstListOffset += (2 * subListSize);
00451 
00453 
00454       // Pre-load values from both source arrays
00455       register BigReal sortValue0 = list0ptr->sortValue;
00456       register BigReal sortValue1 = list1ptr->sortValue;
00457       register int index0 = list0ptr->index;
00458       register int index1 = list1ptr->index;
00459 
00460       // While both lists have at least one element in them, compare the
00461       //   heads of each list and place the smaller of the two in the
00462       //   destination array.
00463       while (list0ptr < list0ptr_end && list1ptr < list1ptr_end) {
00464 
00465         // Compare the values
00466         register bool test = (sortValue0 < sortValue1);
00467 
00468         // Place the "winner" in the destination array
00469         dstptr->sortValue = __TERNARY_ASSIGN(test, sortValue0, sortValue1);
00470         dstptr->index = __TERNARY_ASSIGN(test, index0, index1);
00471         dstptr++;
00472 
00473         // Update the pointers
00474         list0ptr += __TERNARY_ASSIGN(test, 1, 0);
00475         list1ptr += __TERNARY_ASSIGN(test, 0, 1);
00476 
00477         // Refill the sortValue and index register
00478         // NOTE: These memory locations are likely to be in cache
00479         sortValue0 = list0ptr->sortValue;
00480         sortValue1 = list1ptr->sortValue;
00481         index0 = list0ptr->index;
00482         index1 = list1ptr->index;
00483 
00484       } // end while (list0ptr < list0ptr_end && list1ptr < list1ptr_end)
00485 
00486       // NOTE: At this point, at least one of the lists is empty so no
00487       //   more than one of the loops will be executed.
00488 
00489       // Drain the remaining elements from list0
00490       while (list0ptr < list0ptr_end) {
00491 
00492         // Place the value into the destination array
00493         dstptr->sortValue = sortValue0;
00494         dstptr->index = index0;
00495         dstptr++;
00496 
00497         // Load the next entry in list0
00498         list0ptr++;
00499         sortValue0 = list0ptr->sortValue;
00500         index0 = list0ptr->index;
00501 
00502       } // end while (list0ptr < list0ptr_end)
00503 
00504       // Drain the remaining elements from list1
00505       while (list1ptr < list1ptr_end) {
00506 
00507         // Place the value into the destination array
00508         dstptr->sortValue = sortValue1;
00509         dstptr->index = index1;
00510         dstptr++;
00511 
00512         // Load the next entry in list1
00513         list1ptr++;
00514         sortValue1 = list1ptr->sortValue;
00515         index1 = list1ptr->index;
00516 
00517       } // end while (list1ptr < list1ptr_end)
00518 
00519     } // end while (firstListOffset < seLen) {
00520 
00521     // Swap the dstArray and srcArray pointers
00522     register SortEntry* tmpPtr = dstArray;
00523     dstArray = srcArray;
00524     srcArray = tmpPtr;
00525 
00526     // Double the subListSize
00527     subListSize <<= 1;
00528 
00529   }  // end while (subListSize < seLen)
00530 
00531   // Set the sort values pointers (NOTE: srcArray and dstArray are
00532   //   swapped at the end of each iteration of the merge sort outer-loop).
00533   buf = dstArray;
00534   se = srcArray;
00535 
00536   #undef __TERNARY_ASSIGN
00537 }
00538 
00539 #endif  // NAMD_ComputeNonbonded_SortAtoms != 0
00540 
00541 
00542 #endif // COMPUTENONBONDEDINL_H
00543 

Generated on Fri May 25 04:07:14 2012 for NAMD by  doxygen 1.3.9.1