Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members

ComputeNonbondedInl.h

Go to the documentation of this file.
00001 
00007 /*
00008    Common operations for ComputeNonbonded classes
00009 */
00010 
00011 #ifndef COMPUTENONBONDEDINL_H
00012 #define COMPUTENONBONDEDINL_H
00013 
00014 #ifndef NAMD_RESTRICT
00015 #define restrict
00016 #endif
00017 
00018 #include "ComputeNonbondedUtil.h"
00019 #include "SimParameters.h"
00020 #include "Node.h"
00021 #include "Molecule.h"
00022 #include "LJTable.h"
00023 #include "ReductionMgr.h"
00024 #include "ReserveArray.h"
00025 
00026 #include "PressureProfile.h"
00027 
00028 inline int pairlist_from_pairlist(BigReal cutoff2,
00029                                   BigReal p_i_x, BigReal p_i_y, BigReal p_i_z,
00030                                   const CompAtom *p_j,
00031                                   const plint *list, int list_size, plint *newlist,
00032                                   BigReal r2_delta, BigReal *r2list) {
00033   
00034   BigReal cutoff2_delta = cutoff2 + r2_delta;
00035   plint *nli = newlist;
00036   BigReal *r2i = r2list;
00037 
00038   if ( list_size <= 0) return 0;
00039 
00040   int g = 0;
00041 
00042 #ifndef SIMPLE_PAIRLIST
00043   //***************************************************************
00044   //* 4-way unrolled and software-pipelined 
00045   //***************************************************************
00046 
00047   int jout = 0;
00048   if ( list_size > 16) {
00049     // prefetch
00050     int jcur0 = list[g];
00051     int jcur1 = list[g + 1];
00052     int jcur2 = list[g + 2];
00053     int jcur3 = list[g + 3];
00054     
00055     int j0, j1, j2, j3;
00056     
00057     register  BigReal pj_x_0, pj_x_1, pj_x_2, pj_x_3; 
00058     register  BigReal pj_y_0, pj_y_1, pj_y_2, pj_y_3; 
00059     register  BigReal pj_z_0, pj_z_1, pj_z_2, pj_z_3; 
00060     
00061     register BigReal t_0, t_1, t_2, t_3, r2_0, r2_1, r2_2, r2_3;
00062     
00063     pj_x_0 = p_j[jcur0].position.x;
00064     pj_x_1 = p_j[jcur1].position.x;  
00065     pj_x_2 = p_j[jcur2].position.x;  
00066     pj_x_3 = p_j[jcur3].position.x;  
00067     pj_y_0 = p_j[jcur0].position.y; 
00068     pj_y_1 = p_j[jcur1].position.y;  
00069     pj_y_2 = p_j[jcur2].position.y;  
00070     pj_y_3 = p_j[jcur3].position.y;  
00071     pj_z_0 = p_j[jcur0].position.z; 
00072     pj_z_1 = p_j[jcur1].position.z;
00073     pj_z_2 = p_j[jcur2].position.z; 
00074     pj_z_3 = p_j[jcur3].position.z;
00075     
00076     for ( g = 4 ; g < list_size - 4; g += 4 ) {
00077       // compute 1d distance, 4-way parallel
00078       
00079       //Save the previous iterations values, gives more flexibility 
00080       //to the compiler to schedule the loads and the computation
00081       j0   =   jcur0;           j1   =   jcur1;
00082       j2   =   jcur2;           j3   =   jcur3;
00083       
00084       jcur0  =  list[g    ];    jcur1  =  list[g + 1];
00085       jcur2  =  list[g + 2];    jcur3  =  list[g + 3];
00086 
00087 #ifdef ARCH_POWERPC
00088       __dcbt ((void *) &p_j[jcur0]);
00089 #endif      
00090 
00091       //Compute X distance
00092       t_0   =  p_i_x - pj_x_0;   t_1   =  p_i_x - pj_x_1;
00093       t_2   =  p_i_x - pj_x_2;   t_3   =  p_i_x - pj_x_3;
00094       
00095       r2_0  =  t_0 * t_0 + r2_delta; 
00096       r2_1  =  t_1 * t_1 + r2_delta;
00097       r2_2  =  t_2 * t_2 + r2_delta;
00098       r2_3  =  t_3 * t_3 + r2_delta;
00099       
00100       //Compute y distance
00101       t_0    =  p_i_y - pj_y_0;   t_1    =  p_i_y - pj_y_1;
00102       t_2    =  p_i_y - pj_y_2;   t_3    =  p_i_y - pj_y_3;
00103       r2_0  +=  t_0 * t_0;        r2_1  +=  t_1 * t_1;
00104       r2_2  +=  t_2 * t_2;        r2_3  +=  t_3 * t_3;
00105       
00106       //compute z distance
00107       t_0    =  p_i_z - pj_z_0;   t_1    =  p_i_z - pj_z_1;
00108       t_2    =  p_i_z - pj_z_2;   t_3    =  p_i_z - pj_z_3;
00109       r2_0  +=  t_0 * t_0;        r2_1  +=  t_1 * t_1;
00110       r2_2  +=  t_2 * t_2;        r2_3  +=  t_3 * t_3;
00111       
00112       pj_x_0 = p_j[jcur0].position.x;
00113       pj_x_1 = p_j[jcur1].position.x;  
00114       pj_x_2 = p_j[jcur2].position.x;  
00115       pj_x_3 = p_j[jcur3].position.x;  
00116       pj_y_0 = p_j[jcur0].position.y; 
00117       pj_y_1 = p_j[jcur1].position.y;  
00118       pj_y_2 = p_j[jcur2].position.y;  
00119       pj_y_3 = p_j[jcur3].position.y;  
00120       pj_z_0 = p_j[jcur0].position.z; 
00121       pj_z_1 = p_j[jcur1].position.z;
00122       pj_z_2 = p_j[jcur2].position.z; 
00123       pj_z_3 = p_j[jcur3].position.z;
00124       
00125       bool test0, test1, test2, test3;
00126       
00127       test0 = ( r2_0   <   cutoff2_delta );
00128       test1 = ( r2_1   <   cutoff2_delta );
00129       test2 = ( r2_2   <   cutoff2_delta );
00130       test3 = ( r2_3   <   cutoff2_delta );
00131       
00132       int jout0, jout1, jout2, jout3;
00133 
00134       jout0 = jout;
00135       nli[ jout0 ]  = j0;         r2i[ jout0 ] = r2_0;
00136       jout += test0;              jout1 = jout;
00137       nli[ jout1 ]  = j1;         r2i[ jout1 ] = r2_1;
00138       jout += test1;              jout2 = jout;
00139       nli[ jout2 ]  = j2;         r2i[ jout2 ] = r2_2;
00140       jout += test2;              jout3 = jout;
00141       nli[ jout3 ]  = j3;         r2i[ jout3 ] = r2_3;
00142 
00143       jout += test3;
00144     }
00145     g -= 4;
00146   }
00147 
00148   nli += jout;
00149   r2i += jout;  
00150 #endif
00151 
00152   int j2 = list[g];
00153   BigReal p_j_x = p_j[j2].position.x;
00154   BigReal p_j_y = p_j[j2].position.y;
00155   BigReal p_j_z = p_j[j2].position.z;
00156   while ( g < list_size ) {
00157     int j = j2;
00158     j2 = list[++g];
00159     BigReal t2 = p_i_x - p_j_x;
00160     BigReal r2 = t2 * t2 + r2_delta;
00161     p_j_x = p_j[j2].position.x;
00162     t2 = p_i_y - p_j_y;
00163     r2 += t2 * t2;
00164     p_j_y = p_j[j2].position.y;
00165     t2 = p_i_z - p_j_z;
00166     r2 += t2 * t2;
00167     p_j_z = p_j[j2].position.z;
00168     if ( r2 <= cutoff2_delta ) {
00169       *nli= j; ++nli;
00170       *r2i = r2; ++r2i;
00171     }
00172   }
00173 
00174   return nli - newlist;
00175 }
00176 
00177 // clear all
00178 // define interaction type (pair or self)
00179 #define NBPAIR  1
00180 #define NBSELF  2
00181 
00182 
00183 
00184 // Various atom sorting functions
00185 #if NAMD_ComputeNonbonded_SortAtoms != 0
00186 
00187 inline void sortEntries_selectionSort(SortEntry * const se, const int seLen) {
00188 
00189   register int i;
00190 
00191   for (i = 0; i < seLen; i++) {
00192 
00193     // Search through the remaining elements, finding the lowest
00194     //   value, and then swap it with the first remaining element.
00195     //   Start by assuming the first element is the smallest.
00196     register int smallestIndex = i;
00197     register BigReal smallestValue = se[i].sortValue;
00198     register int j;
00199     for (j = i + 1; j < seLen; j++) {
00200       register BigReal currentValue = se[j].sortValue;
00201       if (currentValue < smallestValue) {
00202         smallestIndex = j;
00203         smallestValue = currentValue;
00204       }
00205     }
00206 
00207     // Swap the first remaining element with the smallest element
00208     if (smallestIndex != i) {
00209       register SortEntry* entryA = se + i;
00210       register SortEntry* entryB = se + smallestIndex;
00211       register unsigned int tmpIndex = entryA->index;
00212       register BigReal tmpSortValue = entryA->sortValue;
00213       entryA->index = entryB->index;
00214       entryA->sortValue = entryB->sortValue;
00215       entryB->index = tmpIndex;
00216       entryB->sortValue = tmpSortValue;
00217     }
00218   }
00219 }
00220 
00221 inline void sortEntries_bubbleSort(SortEntry * const se, const int seLen) {
00222 
00223   register int keepSorting = 0;
00224 
00225   do {
00226 
00227     // Reset the keepSorting flag (assume no swaps will occur)
00228     keepSorting = 0;
00229 
00230     // Loop through the pairs and swap if needed
00231     register SortEntry* sortEntry1 = se;
00232     for (int i = 1; i < seLen; i++) {
00233 
00234       register SortEntry* sortEntry0 = sortEntry1;
00235       sortEntry1 = se + i;
00236       register BigReal sortEntry0_sortValue = sortEntry0->sortValue;
00237       register BigReal sortEntry1_sortValue = sortEntry1->sortValue;
00238 
00239       if (sortEntry0_sortValue > sortEntry1_sortValue) {
00240         register int sortEntry0_index = sortEntry0->index;
00241         register int sortEntry1_index = sortEntry1->index;
00242         sortEntry0->index = sortEntry1_index;
00243         sortEntry0->sortValue = sortEntry1_sortValue;
00244         sortEntry1->index = sortEntry0_index;
00245         sortEntry1->sortValue = sortEntry0_sortValue;
00246         keepSorting = 1;
00247       }
00248     }
00249 
00250   } while (keepSorting != 0);  // Loop again if at least one set of
00251                                //   elements was swapped.
00252 }
00253 
00254 // NOTE: The buf parameter should point to a buffer that this function
00255 //   can use as a temp storage location (scratch pad).
00256 // NOTE: This function may swap the values of se and buf, but will not
00257 //   return any other value (nor will it set either to NULL).
00258 inline void sortEntries_mergeSort_v1(SortEntry * &se, SortEntry * &buf, int seLen) {
00259 
00260   register SortEntry* srcArray = se;
00261   register SortEntry* dstArray = buf;
00262 
00263   // Start with each element being a separate list.  Start
00264   //   merging the "lists" into larger lists.
00265   register int subListSize = 1;
00266   while (subListSize < seLen) {
00267 
00268     // NOTE: This iteration consumes sublists of length
00269     //   subListSize and produces sublists of length
00270     //   (2*subListSize).  So, keep looping while the length of a
00271     //   single sorted sublist is not the size of the entire array.
00272 
00273     // Iterate through the lists, merging each consecutive pair of lists.
00274     register int firstListOffset = 0;
00275     while (firstListOffset < seLen) {
00276 
00278 
00279       register int numElements = min(2 * subListSize, seLen - firstListOffset);
00280       register int list0len;
00281       register int list1len;
00282       if (numElements > subListSize) {
00283         list0len = subListSize;                // First list full
00284         list1len = numElements - subListSize;  // 1+ elements in second list
00285       } else {
00286         list0len = numElements;                // 1+ elements in first list
00287         list1len = 0;                          // Zero elements in second list
00288       }
00289 
00290       register SortEntry* list0ptr = srcArray + firstListOffset;
00291       register SortEntry* list1ptr = list0ptr + subListSize;
00292       register SortEntry* dstptr = dstArray + firstListOffset;
00293 
00295 
00296       // While there are elements in both lists, pick from one
00297       while (list0len > 0 && list1len > 0) {
00298 
00299         register BigReal sortValue0 = list0ptr->sortValue;
00300         register BigReal sortValue1 = list1ptr->sortValue;
00301 
00302         if (sortValue0 < sortValue1) {  // choose first list (list0)
00303 
00304           // Copy the value from srcArray to dstArray
00305           register int index0 = list0ptr->index;
00306           dstptr->sortValue = sortValue0;
00307           dstptr->index = index0;
00308 
00309           // Move the pointers forward for the sublists
00310           dstptr++;
00311           list0ptr++;
00312           list0len--;
00313 
00314         } else {                        // choose second list (list1)
00315 
00316           // Copy the value from srcArray to dstArray
00317           register int index1 = list1ptr->index;
00318           dstptr->sortValue = sortValue1;
00319           dstptr->index = index1;
00320 
00321           // Move the pointers forward for the sublists
00322           dstptr++;
00323           list1ptr++;
00324           list1len--;
00325         }
00326 
00327       } // end while (list0len > 0 && list1len > 0)
00328 
00329       // NOTE: Either list0len or list1len is zero at this point
00330       //   so only one of the following loops should execute.
00331 
00332       // Drain remaining elements from the first list (list0)
00333       while (list0len > 0) {
00334 
00335         // Copy the value from srcArray to dstArray
00336         register BigReal sortValue0 = list0ptr->sortValue;
00337         register int index0 = list0ptr->index;
00338         dstptr->sortValue = sortValue0;
00339         dstptr->index = index0;
00340 
00341         // Move the pointers forward for the sublists
00342         dstptr++;
00343         list0ptr++;
00344         list0len--;
00345 
00346       } // end while (list0len > 0)
00347 
00348       // Drain remaining elements from the first list (list1)
00349       while (list1len > 0) {
00350 
00351         // Copy the value from srcArray to dstArray
00352         register BigReal sortValue1 = list1ptr->sortValue;
00353         register int index1 = list1ptr->index;
00354         dstptr->sortValue = sortValue1;
00355         dstptr->index = index1;
00356 
00357         // Move the pointers forward for the sublists
00358         dstptr++;
00359         list1ptr++;
00360         list1len--;
00361 
00362       } // end while (list1len > 0)
00363 
00364       // Move forward to the next pair of sub-lists
00365       firstListOffset += (2 * subListSize);
00366 
00367     } // end while (firstListOffset < seLen) {
00368 
00369     // Swap the dstArray and srcArray pointers
00370     register SortEntry* tmpPtr = dstArray;
00371     dstArray = srcArray;
00372     srcArray = tmpPtr;
00373 
00374     // Double the subListSize
00375     subListSize <<= 1;
00376 
00377   }  // end while (subListSize < seLen)
00378 
00379   // Set the sort values pointers (NOTE: srcArray and dstArray are
00380   //   swapped at the end of each iteration of the merge sort outer-loop).
00381   buf = dstArray;
00382   se = srcArray;
00383 }
00384 
00385 // NOTE: The buf parameter should point to a buffer that this function
00386 //   can use as a temp storage location (scratch pad).
00387 // NOTE: This function may swap the values of se and buf, but will not
00388 //   return any other value (nor will it set either to NULL).
00389 inline void sortEntries_mergeSort_v2(SortEntry * &se, SortEntry * &buf, int seLen) {
00390 
00391   // NOTE: This macro "returns" either val0 (if test == 0) or val1 (if
00392   // test == 1).  It expects test to be either 0 or 1 (no other values).
00393   #define __TERNARY_ASSIGN(test, val0, val1)   ((test * val0) + ((1 - test) * val1))
00394 
00395   register SortEntry* srcArray = se;
00396   register SortEntry* dstArray = buf;
00397 
00398   // Start with each element being a separate list.  Start
00399   //   merging the "lists" into larger lists.
00400   register int subListSize = 1;
00401   while (subListSize < seLen) {
00402 
00403     // NOTE: This iteration consumes sublists of length
00404     //   subListSize and produces sublists of length
00405     //   (2*subListSize).  So, keep looping while the length of a
00406     //   single sorted sublist is not the size of the entire array.
00407 
00408     // Iterate through the lists, merging each consecutive pair of lists.
00409     register int firstListOffset = 0;
00410     while (firstListOffset < seLen) {
00411 
00413 
00414       // Calculate the number of elements for both sublists...
00415       //   min(2 * subListSize, seLen - firstListOffset);
00416       register int numElements;
00417       {
00418         register int numElements_val0 = 2 * subListSize;
00419         register int numElements_val1 = seLen - firstListOffset;
00420         register bool numElements_test = (numElements_val0 < numElements_val1);
00421         numElements = __TERNARY_ASSIGN(numElements_test, numElements_val0, numElements_val1);
00422       }
00423 
00424       // Setup the pointers for the source and destination arrays
00425       register SortEntry* dstptr = dstArray + firstListOffset;    // destination array pointer
00426       register SortEntry* list0ptr = srcArray + firstListOffset;  // source list 0 pointer
00427       register SortEntry* list1ptr = list0ptr + subListSize;      // source list 1 pointer
00428       register SortEntry* list0ptr_end;  // pointer to end of source list0's elements (element after last)
00429       register SortEntry* list1ptr_end;  // pointer to end of source list1's elements (element after last)
00430       {
00431         register bool lenTest = (numElements > subListSize);
00432         register int list0len_val0 = subListSize;
00433         register int list1len_val0 = numElements - subListSize;
00434         register int list0len_val1 = numElements;  // NOTE: list1len_val1 = 0
00435         register int list0len = __TERNARY_ASSIGN(lenTest, list0len_val0, list0len_val1);
00436         register int list1len = __TERNARY_ASSIGN(lenTest, list1len_val0, 0);
00437         list0ptr_end = list0ptr + list0len;
00438         list1ptr_end = list1ptr + list1len;
00439       }
00440 
00441       // The firstListOffset variable won't be used again until the next
00442       //   iteration, so go ahead and update it now...
00443       //   Move forward to the next pair of sub-lists
00444       firstListOffset += (2 * subListSize);
00445 
00447 
00448       // Pre-load values from both source arrays
00449       register BigReal sortValue0 = list0ptr->sortValue;
00450       register BigReal sortValue1 = list1ptr->sortValue;
00451       register int index0 = list0ptr->index;
00452       register int index1 = list1ptr->index;
00453 
00454       // While both lists have at least one element in them, compare the
00455       //   heads of each list and place the smaller of the two in the
00456       //   destination array.
00457       while (list0ptr < list0ptr_end && list1ptr < list1ptr_end) {
00458 
00459         // Compare the values
00460         register bool test = (sortValue0 < sortValue1);
00461 
00462         // Place the "winner" in the destination array
00463         dstptr->sortValue = __TERNARY_ASSIGN(test, sortValue0, sortValue1);
00464         dstptr->index = __TERNARY_ASSIGN(test, index0, index1);
00465         dstptr++;
00466 
00467         // Update the pointers
00468         list0ptr += __TERNARY_ASSIGN(test, 1, 0);
00469         list1ptr += __TERNARY_ASSIGN(test, 0, 1);
00470 
00471         // Refill the sortValue and index register
00472         // NOTE: These memory locations are likely to be in cache
00473         sortValue0 = list0ptr->sortValue;
00474         sortValue1 = list1ptr->sortValue;
00475         index0 = list0ptr->index;
00476         index1 = list1ptr->index;
00477 
00478       } // end while (list0ptr < list0ptr_end && list1ptr < list1ptr_end)
00479 
00480       // NOTE: At this point, at least one of the lists is empty so no
00481       //   more than one of the loops will be executed.
00482 
00483       // Drain the remaining elements from list0
00484       while (list0ptr < list0ptr_end) {
00485 
00486         // Place the value into the destination array
00487         dstptr->sortValue = sortValue0;
00488         dstptr->index = index0;
00489         dstptr++;
00490 
00491         // Load the next entry in list0
00492         list0ptr++;
00493         sortValue0 = list0ptr->sortValue;
00494         index0 = list0ptr->index;
00495 
00496       } // end while (list0ptr < list0ptr_end)
00497 
00498       // Drain the remaining elements from list1
00499       while (list1ptr < list1ptr_end) {
00500 
00501         // Place the value into the destination array
00502         dstptr->sortValue = sortValue1;
00503         dstptr->index = index1;
00504         dstptr++;
00505 
00506         // Load the next entry in list1
00507         list1ptr++;
00508         sortValue1 = list1ptr->sortValue;
00509         index1 = list1ptr->index;
00510 
00511       } // end while (list1ptr < list1ptr_end)
00512 
00513     } // end while (firstListOffset < seLen) {
00514 
00515     // Swap the dstArray and srcArray pointers
00516     register SortEntry* tmpPtr = dstArray;
00517     dstArray = srcArray;
00518     srcArray = tmpPtr;
00519 
00520     // Double the subListSize
00521     subListSize <<= 1;
00522 
00523   }  // end while (subListSize < seLen)
00524 
00525   // Set the sort values pointers (NOTE: srcArray and dstArray are
00526   //   swapped at the end of each iteration of the merge sort outer-loop).
00527   buf = dstArray;
00528   se = srcArray;
00529 
00530   #undef __TERNARY_ASSIGN
00531 }
00532 
00533 #endif  // NAMD_ComputeNonbonded_SortAtoms != 0
00534 
00535 
00536 #endif // COMPUTENONBONDEDINL_H
00537 

Generated on Tue Nov 24 04:07:43 2009 for NAMD by  doxygen 1.3.9.1