Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

utilities.C

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr                                                                       
00003  *cr            (C) Copyright 1995-2011 The Board of Trustees of the           
00004  *cr                        University of Illinois                       
00005  *cr                         All Rights Reserved                        
00006  *cr                                                                   
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: utilities.C,v $
00013  *      $Author: johns $        $Locker:  $             $State: Exp $
00014  *      $Revision: 1.165 $      $Date: 2013/03/17 02:31:11 $
00015  *
00016  ***************************************************************************
00017  * DESCRIPTION:
00018  *
00019  * General utility routines and definitions.
00020  *
00021  ***************************************************************************/
00022 
00023 #define VMDUSESSE 1
00024 // #define VMDUSEAVX 1
00025 // #define VMDUSENEON 1
00026 
00027 #if defined(VMDUSESSE) && defined(__SSE2__)
00028 #include <emmintrin.h>
00029 #endif
00030 #if defined(VMDUSEAVX) && defined(__AVX__)
00031 #include <immintrin.h>
00032 #endif
00033 #if defined(VMDUSENEON) && defined(__ARM_NEON__)
00034 #include <arm_neon.h>
00035 #endif
00036 #include <string.h>
00037 #include <ctype.h>
00038 #include <math.h>
00039 #include <stdio.h>
00040 #include <stdlib.h>
00041 
00042 #if defined(_MSC_VER)
00043 #include <windows.h>
00044 #include <conio.h>
00045 #else
00046 #include <unistd.h>
00047 #include <sys/time.h>
00048 #include <errno.h>
00049 
00050 #if defined(ARCH_AIX4)
00051 #include <strings.h>
00052 #endif
00053 
00054 #if defined(__irix)
00055 #include <bstring.h>
00056 #endif
00057 
00058 #if defined(__hpux)
00059 #include <time.h>
00060 #endif // HPUX
00061 #endif // _MSC_VER
00062 
00063 #if defined(AIXUSEPERFSTAT)
00064 #include <libperfstat.h>
00065 #endif
00066 
00067 #if defined(__APPLE__)
00068 #include <sys/sysctl.h>
00069 #endif
00070 
00071 #include "utilities.h"
00072 
00073 // given an argc, argv pair, take all the arguments from the Nth one on
00074 // and combine them into a single string with spaces separating words.  This
00075 // allocates space for the string, which must be freed by the user.
00076 char *combine_arguments(int argc, const char **argv, int n) {
00077   char *newstr = NULL;
00078 
00079   if(argc > 0 && n < argc && n >= 0) {
00080     int i, sl = 0;
00081     // find out the length of the words we must combine
00082     for(i=n; i < argc; i++)
00083       sl += strlen(argv[i]);
00084 
00085     // combine the words together
00086     if(sl) {
00087       newstr = new char[sl + 8 + argc - n];     // extra buffer added
00088       *newstr = '\0';
00089       for(i=n; i < argc; i++) {
00090         if(i != n)
00091           strcat(newstr," ");
00092         strcat(newstr, argv[i]);
00093       }
00094     }
00095   }
00096 
00097   // return the string, or NULL if a problem occurred
00098   return newstr;
00099 }
00100 
00101 
00102 // duplicate a string using c++ new call
00103 char *stringdup(const char *s) {
00104   char *rs;
00105 
00106   if(!s)
00107     return NULL;
00108 
00109   rs = new char[strlen(s) + 1];
00110   strcpy(rs,s);
00111 
00112   return rs;
00113 }
00114 
00115 
00116 // convert a string to upper case
00117 char *stringtoupper(char *s) {
00118   if (s != NULL) {
00119     int i;
00120     int sz = strlen(s);
00121     for(i=0; i<sz; i++)
00122       s[i] = toupper(s[i]);
00123   }
00124 
00125   return s;
00126 }
00127 
00128 void stripslashes(char *str) {
00129   while (strlen(str) > 0 && str[strlen(str) - 1] == '/') {
00130     str[strlen(str) - 1] = '\0';
00131   }
00132 }
00133 
00134 // do upper-case comparison
00135 int strupcmp(const char *a, const char *b) {
00136   char *ua, *ub;
00137   int retval;
00138 
00139   ua = stringtoupper(stringdup(a));
00140   ub = stringtoupper(stringdup(b));
00141 
00142   retval = strcmp(ua,ub);
00143 
00144   delete [] ub;
00145   delete [] ua;
00146 
00147   return retval;
00148 }
00149 
00150 
00151 // do upper-case comparison, up to n characters
00152 int strupncmp(const char *a, const char *b, int n) {
00153 #if defined(ARCH_AIX3) || defined(ARCH_AIX4) || defined(_MSC_VER)
00154    while (n-- > 0) {
00155       if (toupper(*a) != toupper(*b)) {
00156          return toupper(*b) - toupper(*a);
00157       }
00158       if (*a == 0) return 0;
00159       a++; b++;
00160    }
00161    return 0;
00162 #else
00163    return strncasecmp(a, b, n);
00164 #endif
00165 }
00166 
00167 
00168 // break a file name up into path + name, returning both in the specified
00169 //      character pointers.  This creates storage for the new strings
00170 //      by allocating space for them.
00171 void breakup_filename(const char *full, char **path, char **name) {
00172   const char *namestrt;
00173   int pathlen;
00174 
00175   if(full == NULL) {
00176     *path = *name = NULL;
00177     return;
00178   } else if (strlen(full) == 0) {
00179     *path = new char[1];
00180     *name = new char[1];
00181     (*path)[0] = (*name)[0] = '\0';
00182     return;
00183   }
00184 
00185   // find start of final file name
00186   if((namestrt = strrchr(full,'/')) != NULL && strlen(namestrt) > 0) {
00187     namestrt++;
00188   } else {
00189     namestrt = full;
00190   }
00191 
00192   // make a copy of the name
00193   *name = stringdup(namestrt);
00194 
00195   // make a copy of the path
00196   pathlen = strlen(full) - strlen(*name);
00197   *path = new char[pathlen + 1];
00198   strncpy(*path,full,pathlen);
00199   (*path)[pathlen] = '\0';
00200 } 
00201 
00202 // break a configuration line up into tokens.
00203 char *str_tokenize(const char *newcmd, int *argc, char *argv[]) {
00204   char *cmd; 
00205   const char *cmdstart;
00206   cmdstart = newcmd;
00207 
00208   // guarantee that the command string we return begins on the first
00209   // character returned by strtok(), otherwise the subsequent delete[]
00210   // calls will reference invalid memory blocks
00211   while (cmdstart != NULL &&
00212          (*cmdstart == ' '  ||
00213           *cmdstart == ','  ||
00214           *cmdstart == ';'  ||
00215           *cmdstart == '\t' ||
00216           *cmdstart == '\n')) {
00217     cmdstart++; // advance pointer to first command character
00218   } 
00219 
00220   cmd = stringdup(cmdstart);
00221   *argc = 0;
00222 
00223   // initialize tokenizing calls
00224   argv[*argc] = strtok(cmd, " ,;\t\n");
00225 
00226   // loop through words until end-of-string, or comment character, found
00227   while(argv[*argc] != NULL) {
00228     // see if the token starts with '#'
00229     if(argv[*argc][0] == '#') {
00230       break;                    // don't process any further tokens
00231     } else {
00232       (*argc)++;                // another token in list
00233     }
00234     
00235     // scan for next token
00236     argv[*argc] = strtok(NULL," ,;\t\n");
00237   }
00238 
00239   return (*argc > 0 ? argv[0] : (char *) NULL);
00240 }
00241 
00242 
00243 // get the time of day from the system clock, and store it (in seconds)
00244 double time_of_day(void) {
00245 #if defined(_MSC_VER)
00246   double t;
00247  
00248   t = GetTickCount(); 
00249   t = t / 1000.0;
00250 
00251   return t;
00252 #else
00253   struct timeval tm;
00254   struct timezone tz;
00255 
00256   gettimeofday(&tm, &tz);
00257   return((double)(tm.tv_sec) + (double)(tm.tv_usec)/1000000.0);
00258 #endif
00259 }
00260 
00261 
00262 int vmd_check_stdin(void) {
00263 #if defined(_MSC_VER)
00264   if (_kbhit() != 0)
00265     return TRUE;
00266   else
00267     return FALSE;
00268 #else
00269   fd_set readvec;
00270   struct timeval timeout;
00271   int ret, stdin_fd;
00272 
00273   timeout.tv_sec = 0;
00274   timeout.tv_usec = 0;
00275   stdin_fd = 0;
00276   FD_ZERO(&readvec);
00277   FD_SET(stdin_fd, &readvec);
00278 
00279 #if !defined(ARCH_AIX3)
00280   ret = select(16, &readvec, NULL, NULL, &timeout);
00281 #else
00282   ret = select(16, (int *)(&readvec), NULL, NULL, &timeout);
00283 #endif
00284  
00285   if (ret == -1) {  // got an error
00286     if (errno != EINTR)  // XXX: this is probably too lowlevel to be converted to Inform.h
00287       printf("select() error while attempting to read text input.\n");
00288     return FALSE;
00289   } else if (ret == 0) {
00290     return FALSE;  // select timed out
00291   }
00292   return TRUE;
00293 #endif
00294 }
00295 
00296 
00297 // return the username of the currently logged-on user
00298 char *vmd_username(void) {
00299 #if defined(_MSC_VER)
00300   char username[1024];
00301   unsigned long size = 1023;
00302 
00303   if (GetUserName((char *) &username, &size)) {
00304     return stringdup(username);
00305   }
00306   else { 
00307     return stringdup("Windows User");
00308   }
00309 #else
00310 #if defined(ARCH_FREEBSD) || defined(ARCH_FREEBSDAMD64) || defined(__APPLE__) || defined(__linux)
00311   return stringdup(getlogin());
00312 #else
00313   return stringdup(cuserid(NULL));
00314 #endif 
00315 #endif
00316 }
00317 
00318 int vmd_getuid(void) {
00319 #if defined(_MSC_VER)
00320   return 0;
00321 #else
00322   return getuid(); 
00323 #endif
00324 }
00325 
00326 
00327 #if 0
00328 //
00329 // XXX array init/copy routines that avoid polluting cache, where possible
00330 //
00331 // Fast 16-byte-aligned integer assignment loop for use in the
00332 // VMD color scale routines
00333 void set_1fv_aligned(const int *iv, int n, const int val) {
00334   int i=0;
00335 
00336 #if defined(VMDUSESSE) && defined(__SSE2__)
00337   __m128i = _mm_set_p
00338   // do groups of four elements
00339   for (; i<(n-3); i+=4) {
00340   }
00341 #endif
00342 }
00343 #endif
00344 
00345 
00346 #if defined(VMDUSESSE) || defined(VMDUSEAVX) || defined(VMDUSENEON)
00347 
00348 //
00349 // Helper routine for use when coping with unaligned
00350 // buffers returned by malloc() on many GNU systems:
00351 //   http://gcc.gnu.org/bugzilla/show_bug.cgi?id=24261
00352 //   http://www.sourceware.org/bugzilla/show_bug.cgi?id=206
00353 //
00354 // XXX until all compilers support uintptr_t, we have to do 
00355 //     dangerous and ugly things with pointer casting here...
00356 //
00357 #if 1
00358 /* sizeof(unsigned long) == sizeof(void*) */
00359 #define myintptrtype unsigned long
00360 #elif 1
00361 /* sizeof(size_t) == sizeof(void*) */
00362 #define myintptrtype size_t
00363 #else
00364 /* C99 */
00365 #define myintptrtype uintptr_t
00366 #endif
00367 
00368 #if 0
00369 // arbitrary pointer alignment test
00370 static int is_Nbyte_aligned(const void *ptr, int N) {
00371   return ((((myintptrtype) ptr) % N) == 0);
00372 }
00373 #endif
00374 
00375 // Aligment test routine for x86 16-byte SSE vector instructions
00376 static int is_16byte_aligned(const void *ptr) {
00377   return (((myintptrtype) ptr) == (((myintptrtype) ptr) & (~0xf)));
00378 }
00379 
00380 #if defined(VMDUSEAVX)
00381 // Aligment test routine for x86 32-byte AVX vector instructions
00382 static int is_32byte_aligned(const void *ptr) {
00383   return (((myintptrtype) ptr) == (((myintptrtype) ptr) & (~0x1f)));
00384 }
00385 #endif
00386 
00387 #if 0
00388 // Aligment test routine for x86 LRB/MIC 64-byte vector instructions
00389 static int is_64byte_aligned(const void *ptr) {
00390   return (((myintptrtype) ptr) == (((myintptrtype) ptr) & (~0x3f)));
00391 }
00392 #endif
00393 #endif 
00394 
00395 
00396 //
00397 // Small inlinable SSE helper routines to make code easier to read
00398 //
00399 #if defined(VMDUSESSE) && defined(__SSE2__)
00400 
00401 static void print_m128i(__m128i mask4) {
00402   int * iv = (int *) &mask4;
00403   printf("vec: %08x %08x %08x %08x\n", iv[0], iv[1], iv[2], iv[3]);
00404 }
00405 
00406 
00407 static int hand_m128i(__m128i mask4) {
00408   __m128i tmp = mask4;
00409   tmp = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(2, 3, 0, 1));
00410   tmp = _mm_and_si128(mask4, tmp);
00411   mask4 = tmp;
00412   tmp = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(1, 0, 3, 2));
00413   tmp = _mm_and_si128(mask4, tmp);
00414   mask4 = tmp; // all 4 elements are now set to the reduced mask
00415 
00416   int mask = _mm_cvtsi128_si32(mask4); // return zeroth element
00417   return mask;
00418 }
00419 
00420 
00421 static int hor_m128i(__m128i mask4) {
00422   __m128i tmp = mask4;
00423   tmp = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(2, 3, 0, 1));
00424   tmp = _mm_or_si128(mask4, tmp);
00425   mask4 = tmp;
00426   tmp = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(1, 0, 3, 2));
00427   tmp = _mm_or_si128(mask4, tmp);
00428   mask4 = tmp; // all 4 elements are now set to the reduced mask
00429 
00430   int mask = _mm_cvtsi128_si32(mask4); // return zeroth element
00431   return mask;
00432 }
00433 
00434 
00435 static int hadd_m128i(__m128i sum4) {
00436   __m128i tmp = sum4;
00437   tmp = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(2, 3, 0, 1));
00438   tmp = _mm_add_epi32(sum4, tmp);
00439   sum4 = tmp;
00440   tmp = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(1, 0, 3, 2));
00441   tmp = _mm_add_epi32(sum4, tmp);
00442   sum4 = tmp; // all 4 elements are now set to the sum
00443 
00444   int sum = _mm_cvtsi128_si32(sum4); // return zeroth element
00445   return sum;
00446 }
00447 
00448 
00449 static __m128i _mm_sel_m128i(const __m128i &a, const __m128i &b, const __m128i &mask) {
00450   // (((b ^ a) & mask)^a)
00451   return _mm_xor_si128(a, _mm_and_si128(mask, _mm_xor_si128(b, a)));
00452 }
00453 
00454 
00455 static __m128 _mm_sel_ps(const __m128 &a, const __m128 &b, const __m128 &mask) {
00456   // (((b ^ a) & mask)^a)
00457   return _mm_xor_ps(a, _mm_and_ps(mask, _mm_xor_ps(b, a)));
00458 }
00459 
00460 
00461 // helper routine to perform a min among all 4 elements of an __m128
00462 static float fmin_m128(__m128 min4) {
00463   __m128 tmp;
00464   tmp = min4;
00465   tmp = _mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(2, 3, 0, 1));
00466   tmp = _mm_min_ps(min4, tmp);
00467   min4 = tmp;
00468   tmp = _mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(1, 0, 3, 2));
00469   tmp = _mm_min_ps(min4, tmp);
00470   min4 = tmp; // all 4 elements are now set to the min
00471 
00472   float fmin;
00473   _mm_store_ss(&fmin, min4);
00474   return fmin;
00475 }
00476 
00477 
00478 // helper routine to perform a max among all 4 elements of an __m128
00479 static float fmax_m128(__m128 max4) {
00480   __m128 tmp = max4;
00481   tmp = _mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(2, 3, 0, 1));
00482   tmp = _mm_max_ps(max4, tmp);
00483   max4 = tmp;
00484   tmp = _mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(1, 0, 3, 2));
00485   tmp = _mm_max_ps(max4, tmp);
00486   max4 = tmp; // all 4 elements are now set to the max
00487 
00488   float fmax;
00489   _mm_store_ss(&fmax, max4);
00490   return fmax;
00491 }
00492 #endif
00493 
00494 
00495 //
00496 // Small inlinable ARM Neon helper routines to make code easier to read
00497 //
00498 #if defined(VMDUSENEON) && defined(__ARM_NEON__)
00499 
00500 // helper routine to perform a min among all 4 elements of an __m128
00501 static float fmin_f32x4(float32x4_t min4) {
00502   float *f1 = (float *) &min4;
00503   float min1 = f1[0];
00504   if (f1[1] < min1) min1 = f1[1];
00505   if (f1[2] < min1) min1 = f1[2];
00506   if (f1[3] < min1) min1 = f1[3];
00507   return min1;
00508 }
00509 
00510 static float fmax_f32x4(float32x4_t max4) {
00511   float *f1 = (float *) &max4;
00512   float max1 = f1[0];
00513   if (f1[1] > max1) max1 = f1[1];
00514   if (f1[2] > max1) max1 = f1[2];
00515   if (f1[3] > max1) max1 = f1[3];
00516   return max1;
00517 }
00518 
00519 #endif
00520 
00521 
00522 // Find the first selected atom
00523 int find_first_selection_aligned(int n, const int *on, int *firstsel) {
00524   int i;
00525   *firstsel = 0;
00526 
00527   // find the first selected atom, if any
00528 #if defined(VMDUSEAVX) && defined(__AVX__)
00529   // roll up to the first 32-byte-aligned array index
00530   for (i=0; ((i<n) && !is_32byte_aligned(&on[i])); i++) {
00531     if (on[i]) {
00532       *firstsel = i; // found first selected atom
00533       return 0;
00534     }
00535   }
00536 
00537   // AVX vectorized search loop
00538   for (; i<(n-7); i+=8) {
00539     // aligned load of 8 selection flags
00540     __m256i on8 = _mm256_load_si256((__m256i*) &on[i]);
00541     if (!_mm256_testz_si256(on8, on8))
00542       break; // found a block containing the first selected atom
00543   }
00544 
00545   for (; i<n; i++) {
00546     if (on[i]) {
00547       *firstsel = i; // found first selected atom
00548       return 0;
00549     }
00550   }
00551 #elif defined(VMDUSESSE) && defined(__SSE2__)
00552   // roll up to the first 16-byte-aligned array index
00553   for (i=0; ((i<n) && !is_16byte_aligned(&on[i])); i++) {
00554     if (on[i]) {
00555       *firstsel = i; // found first selected atom
00556       return 0;
00557     }
00558   }
00559 
00560   // SSE vectorized search loop
00561   for (; i<(n-3); i+=4) {
00562     // aligned load of 4 selection flags
00563     __m128i on4 = _mm_load_si128((__m128i*) &on[i]);
00564     if (hor_m128i(on4))
00565       break; // found a block containing the first selected atom
00566   }
00567 
00568   for (; i<n; i++) {
00569     if (on[i]) {
00570       *firstsel = i; // found first selected atom
00571       return 0;
00572     }
00573   }
00574 #else
00575   // plain C...
00576   for (i=0; i<n; i++) {
00577     if (on[i]) {
00578       *firstsel = i; // found first selected atom
00579       return 0;
00580     }
00581   }
00582 #endif
00583 
00584   // no atoms were selected if we got here
00585   *firstsel = 0;
00586   return -1;
00587 }
00588 
00589 
00590 // Find the last selected atom
00591 int find_last_selection_aligned(int n, const int *on, int *lastsel) {
00592   int i;
00593   *lastsel =  -1;
00594 
00595   // find the last selected atom, if any
00596 #if defined(VMDUSEAVX) && defined(__AVX__)
00597   // AVX vectorized search loop
00598   // Roll down to next 32-byte boundary
00599   for (i=n-1; i>=0; i--) {
00600     if (on[i]) {
00601       *lastsel = i; // found last selected atom
00602       return 0;
00603     }
00604 
00605     // drop out of the alignment loop once we hit a 32-byte boundary
00606     if (is_32byte_aligned(&on[i]))
00607       break;
00608   }
00609 
00610   for (i-=8; i>=0; i-=8) {
00611     // aligned load of 8 selection flags
00612     __m256i on8 = _mm256_load_si256((__m256i*) &on[i]);
00613     if (!_mm256_testz_si256(on8, on8))
00614       break; // found a block containing the last selected atom
00615   }
00616 
00617   int last8=i;
00618   for (i=last8+7; i>=last8; i--) {
00619     if (on[i]) {
00620       *lastsel = i; // found last selected atom
00621       return 0;
00622     }
00623   }
00624 #elif defined(VMDUSESSE) && defined(__SSE2__)
00625   // SSE vectorized search loop
00626   // Roll down to next 16-byte boundary
00627   for (i=n-1; i>=0; i--) {
00628     if (on[i]) {
00629       *lastsel = i; // found last selected atom
00630       return 0;
00631     }
00632 
00633     // drop out of the alignment loop once we hit a 16-byte boundary
00634     if (is_16byte_aligned(&on[i]))
00635       break;
00636   }
00637 
00638   for (i-=4; i>=0; i-=4) {
00639     // aligned load of 4 selection flags
00640     __m128i on4 = _mm_load_si128((__m128i*) &on[i]);
00641     if (hor_m128i(on4))
00642       break; // found a block containing the last selected atom
00643   }
00644 
00645   int last4=i;
00646   for (i=last4+3; i>=last4; i--) {
00647     if (on[i]) {
00648       *lastsel = i; // found last selected atom
00649       return 0;
00650     }
00651   }
00652 #else
00653   // plain C...
00654   for (i=n-1; i>=0; i--) {
00655     if (on[i]) {
00656       *lastsel = i; // found last selected atom
00657       return 0;
00658     }
00659   }
00660 #endif
00661 
00662   // no atoms were selected if we got here
00663   *lastsel = -1;
00664   return -1;
00665 }
00666 
00667 
00668 // Find the first selected atom, the last selected atom,
00669 // and the total number of selected atoms.
00670 int analyze_selection_aligned(int n, const int *on, 
00671                               int *firstsel, int *lastsel, int *selected) {
00672   int sel   = *selected =  0;
00673   int first = *firstsel = 0;   // if we early-exit, firstsel is 0 
00674   int last  = *lastsel  = -1;  // and lastsel is -1
00675   int i;
00676 
00677   // find the first selected atom, if any
00678   if (find_first_selection_aligned(n, on, &first)) {
00679     return -1; // indicate that no selection was found
00680   }
00681 
00682   // find the last selected atom, if any
00683   if (find_last_selection_aligned(n, on, &last)) {
00684     return -1; // indicate that no selection was found
00685   }
00686 
00687   // count the number of selected atoms (there are only 0s and 1s)
00688   // and determine the index of the last selected atom
00689 
00690   // XXX the Intel 12.x compiler is able to beat this code in some
00691   //     cases, but GCC 4.x cannot, so for Intel C/C++ we use the plain C 
00692   //     loop and let it autovectorize, but for GCC we do it by hand.
00693 #if !defined(__INTEL_COMPILER) && defined(VMDUSESSE) && defined(__SSE2__)
00694   // SSE vectorized search loop
00695   // Roll up to next 16-byte boundary
00696   for (i=first; ((i<=last) && (!is_16byte_aligned(&on[i]))); i++) {
00697     sel += on[i];
00698   }
00699 
00700   // Process groups of 4 flags at a time
00701   for (; i<=(last-3); i+=4) {
00702     // aligned load of four selection flags
00703     __m128i on4 = _mm_load_si128((__m128i*) &on[i]);
00704 
00705     // count selected atoms
00706     sel += hadd_m128i(on4);
00707   }
00708 
00709   // check the very end of the array (non-divisible by four)
00710   for (; i<=last; i++) {
00711     sel += on[i];
00712   }
00713 #else
00714   // plain C...
00715   for (i=first; i<=last; i++) {
00716     sel += on[i];
00717   }
00718 #endif
00719 
00720   *selected = sel; 
00721   *firstsel = first;
00722   *lastsel = last;
00723 
00724   return 0;
00725 }
00726 
00727 
00728 // Compute min/max values for a 16-byte-aligned array of floats
00729 void minmax_1fv_aligned(const float *f, int n, float *fmin, float *fmax) {
00730   if (n < 1)
00731     return;
00732 
00733 #if defined(VMDUSESSE) && defined(__SSE2__)
00734   int i=0;
00735   float min1 = f[0];
00736   float max1 = f[0];
00737 
00738   // roll up to the first 16-byte-aligned array index
00739   for (i=0; ((i<n) && !is_16byte_aligned(&f[i])); i++) {
00740     if (f[i] < min1) min1 = f[i];
00741     if (f[i] > max1) max1 = f[i];
00742   }
00743 
00744   // SSE vectorized min/max loop
00745   __m128 min4 = _mm_set_ps1(min1);
00746   __m128 max4 = _mm_set_ps1(max1);
00747 
00748   // do groups of 32 elements
00749   for (; i<(n-31); i+=32) {
00750     __m128 f4 = _mm_load_ps(&f[i]); // assume 16-byte aligned array!
00751     min4 = _mm_min_ps(min4, f4);
00752     max4 = _mm_max_ps(max4, f4);
00753     f4 = _mm_load_ps(&f[i+4]); // assume 16-byte aligned array!
00754     min4 = _mm_min_ps(min4, f4);
00755     max4 = _mm_max_ps(max4, f4);
00756     f4 = _mm_load_ps(&f[i+8]); // assume 16-byte aligned array!
00757     min4 = _mm_min_ps(min4, f4);
00758     max4 = _mm_max_ps(max4, f4);
00759     f4 = _mm_load_ps(&f[i+12]); // assume 16-byte aligned array!
00760     min4 = _mm_min_ps(min4, f4);
00761     max4 = _mm_max_ps(max4, f4);
00762 
00763     f4 = _mm_load_ps(&f[i+16]); // assume 16-byte aligned array!
00764     min4 = _mm_min_ps(min4, f4);
00765     max4 = _mm_max_ps(max4, f4);
00766     f4 = _mm_load_ps(&f[i+20]); // assume 16-byte aligned array!
00767     min4 = _mm_min_ps(min4, f4);
00768     max4 = _mm_max_ps(max4, f4);
00769     f4 = _mm_load_ps(&f[i+24]); // assume 16-byte aligned array!
00770     min4 = _mm_min_ps(min4, f4);
00771     max4 = _mm_max_ps(max4, f4);
00772     f4 = _mm_load_ps(&f[i+28]); // assume 16-byte aligned array!
00773     min4 = _mm_min_ps(min4, f4);
00774     max4 = _mm_max_ps(max4, f4);
00775   }
00776 
00777   // do groups of 4 elements
00778   for (; i<(n-3); i+=4) {
00779     __m128 f4 = _mm_load_ps(&f[i]); // assume 16-byte aligned array!
00780     min4 = _mm_min_ps(min4, f4);
00781     max4 = _mm_max_ps(max4, f4);
00782   }
00783 
00784   // finish last elements off
00785   for (; i<n; i++) {
00786     __m128 f4 = _mm_set_ps1(f[i]);
00787     min4 = _mm_min_ps(min4, f4);
00788     max4 = _mm_max_ps(max4, f4);
00789   }
00790 
00791   // compute min/max among the final 4-element vectors by shuffling
00792   // and and reducing the elements within the vectors
00793   *fmin = fmin_m128(min4);
00794   *fmax = fmax_m128(max4);
00795 #elif defined(VMDUSENEON) && defined(__ARM_NEON__)
00796   int i=0;
00797   float min1 = f[0];
00798   float max1 = f[0];
00799 
00800   // roll up to the first 16-byte-aligned array index
00801   for (i=0; ((i<n) && !is_16byte_aligned(&f[i])); i++) {
00802     if (f[i] < min1) min1 = f[i];
00803     if (f[i] > max1) max1 = f[i];
00804   }
00805 
00806   // NEON vectorized min/max loop
00807   float32x4_t min4 = vdupq_n_f32(min1);
00808   float32x4_t max4 = vdupq_n_f32(max1);
00809 
00810   // do groups of 32 elements
00811   for (; i<(n-31); i+=32) {
00812     float32x4_t f4;
00813     f4 = vld1q_f32(&f[i   ]); // assume 16-byte aligned array!
00814     min4 = vminq_f32(min4, f4);
00815     max4 = vmaxq_f32(max4, f4);
00816     f4 = vld1q_f32(&f[i+ 4]); // assume 16-byte aligned array!
00817     min4 = vminq_f32(min4, f4);
00818     max4 = vmaxq_f32(max4, f4);
00819     f4 = vld1q_f32(&f[i+ 8]); // assume 16-byte aligned array!
00820     min4 = vminq_f32(min4, f4);
00821     max4 = vmaxq_f32(max4, f4);
00822     f4 = vld1q_f32(&f[i+12]); // assume 16-byte aligned array!
00823     min4 = vminq_f32(min4, f4);
00824     max4 = vmaxq_f32(max4, f4);
00825 
00826     f4 = vld1q_f32(&f[i+16]); // assume 16-byte aligned array!
00827     min4 = vminq_f32(min4, f4);
00828     max4 = vmaxq_f32(max4, f4);
00829     f4 = vld1q_f32(&f[i+20]); // assume 16-byte aligned array!
00830     min4 = vminq_f32(min4, f4);
00831     max4 = vmaxq_f32(max4, f4);
00832     f4 = vld1q_f32(&f[i+24]); // assume 16-byte aligned array!
00833     min4 = vminq_f32(min4, f4);
00834     max4 = vmaxq_f32(max4, f4);
00835     f4 = vld1q_f32(&f[i+28]); // assume 16-byte aligned array!
00836     min4 = vminq_f32(min4, f4);
00837     max4 = vmaxq_f32(max4, f4);
00838   }
00839 
00840   // do groups of 4 elements
00841   for (; i<(n-3); i+=4) {
00842     float32x4_t f4 = vld1q_f32(&f[i]); // assume 16-byte aligned array!
00843     min4 = vminq_f32(min4, f4);
00844     max4 = vmaxq_f32(max4, f4);
00845   }
00846 
00847   // finish last elements off
00848   for (; i<n; i++) {
00849     float32x4_t f4 = vdupq_n_f32(f[i]);
00850     min4 = vminq_f32(min4, f4);
00851     max4 = vmaxq_f32(max4, f4);
00852   }
00853 
00854   // compute min/max among the final 4-element vectors by shuffling
00855   // and and reducing the elements within the vectors
00856   *fmin = fmin_f32x4(min4);
00857   *fmax = fmax_f32x4(max4);
00858 #else
00859   // scalar min/max loop
00860   float min1 = f[0];
00861   float max1 = f[0];
00862   for (int i=1; i<n; i++) {
00863     if (f[i] < min1) min1 = f[i];
00864     if (f[i] > max1) max1 = f[i];
00865   }
00866   *fmin = min1;
00867   *fmax = max1;
00868 #endif
00869 }
00870 
00871 
00872 // Compute min/max values for a 16-byte-aligned array of float3s
00873 // input value n3 is the number of 3-element vectors to process
00874 void minmax_3fv_aligned(const float *f, const int n3, float *fmin, float *fmax) {
00875   float minx, maxx, miny, maxy, minz, maxz;
00876   const int end = n3*3;
00877 
00878   if (n3 < 1)
00879     return;
00880 
00881   int i=0;
00882   minx=maxx=f[i  ];
00883   miny=maxy=f[i+1];
00884   minz=maxz=f[i+2];
00885 
00886 #if defined(VMDUSESSE) && defined(__SSE2__)
00887   // Since we may not be on a 16-byte boundary when we start, we roll 
00888   // through the first few items with plain C until we get to one.
00889   for (; i<end; i+=3) {
00890     // exit if/when we reach a 16-byte boundary for both arrays
00891     if (is_16byte_aligned(&f[i])) {
00892       break;
00893     }
00894 
00895     float tmpx = f[i  ];
00896     if (tmpx < minx) minx = tmpx;
00897     if (tmpx > maxx) maxx = tmpx;
00898 
00899     float tmpy = f[i+1];
00900     if (tmpy < miny) miny = tmpy;
00901     if (tmpy > maxy) maxy = tmpy;
00902 
00903     float tmpz = f[i+2];
00904     if (tmpz < minz) minz = tmpz;
00905     if (tmpz > maxz) maxz = tmpz;
00906   }
00907 
00908   // initialize min/max values
00909   __m128 xmin4 = _mm_set_ps1(minx);
00910   __m128 xmax4 = _mm_set_ps1(maxx);
00911   __m128 ymin4 = _mm_set_ps1(miny);
00912   __m128 ymax4 = _mm_set_ps1(maxy);
00913   __m128 zmin4 = _mm_set_ps1(minz);
00914   __m128 zmax4 = _mm_set_ps1(maxz);
00915 
00916   for (; i<(end-11); i+=12) {
00917     // aligned load of four consecutive 3-element vectors into
00918     // three 4-element vectors
00919     __m128 x0y0z0x1 = _mm_load_ps(&f[i  ]);
00920     __m128 y1z1x2y2 = _mm_load_ps(&f[i+4]);
00921     __m128 z2x3y3z3 = _mm_load_ps(&f[i+8]);
00922 
00923     // convert rgb3f AOS format to 4-element SOA vectors using shuffle instructions
00924     __m128 x2y2x3y3 = _mm_shuffle_ps(y1z1x2y2, z2x3y3z3, _MM_SHUFFLE(2, 1, 3, 2));
00925     __m128 y0z0y1z1 = _mm_shuffle_ps(x0y0z0x1, y1z1x2y2, _MM_SHUFFLE(1, 0, 2, 1));
00926     __m128 x        = _mm_shuffle_ps(x0y0z0x1, x2y2x3y3, _MM_SHUFFLE(2, 0, 3, 0)); // x0x1x2x3
00927     __m128 y        = _mm_shuffle_ps(y0z0y1z1, x2y2x3y3, _MM_SHUFFLE(3, 1, 2, 0)); // y0y1y2y3
00928     __m128 z        = _mm_shuffle_ps(y0z0y1z1, z2x3y3z3, _MM_SHUFFLE(3, 0, 3, 1)); // z0y1z2z3
00929 
00930     // compute mins and maxes
00931     xmin4 = _mm_min_ps(xmin4, x);
00932     xmax4 = _mm_max_ps(xmax4, x);
00933     ymin4 = _mm_min_ps(ymin4, y);
00934     ymax4 = _mm_max_ps(ymax4, y);
00935     zmin4 = _mm_min_ps(zmin4, z);
00936     zmax4 = _mm_max_ps(zmax4, z);
00937   }
00938 
00939   minx = fmin_m128(xmin4);
00940   miny = fmin_m128(ymin4);
00941   minz = fmin_m128(zmin4);
00942 
00943   maxx = fmax_m128(xmax4);
00944   maxy = fmax_m128(ymax4);
00945   maxz = fmax_m128(zmax4);
00946 #endif
00947 
00948   // regular C code... 
00949   for (; i<end; i+=3) {
00950     float tmpx = f[i  ];
00951     if (tmpx < minx) minx = tmpx;
00952     if (tmpx > maxx) maxx = tmpx;
00953 
00954     float tmpy = f[i+1];
00955     if (tmpy < miny) miny = tmpy;
00956     if (tmpy > maxy) maxy = tmpy;
00957 
00958     float tmpz = f[i+2];
00959     if (tmpz < minz) minz = tmpz;
00960     if (tmpz > maxz) maxz = tmpz;
00961   }
00962 
00963   fmin[0] = minx;
00964   fmax[0] = maxx;
00965   fmin[1] = miny;
00966   fmax[1] = maxy;
00967   fmin[2] = minz;
00968   fmax[2] = maxz;
00969 }
00970 
00971 
00972 // Compute min/max values for a 16-byte-aligned array of float3s
00973 // input value n3 is the number of 3-element vectors to process
00974 int minmax_selected_3fv_aligned(const float *f, const int *on, const int n3, 
00975                                 const int firstsel, const int lastsel,
00976                                 float *fmin, float *fmax) {
00977   float minx, maxx, miny, maxy, minz, maxz;
00978 
00979   if ((n3 < 1) || (firstsel < 0) || (lastsel < firstsel) || (lastsel >= n3))
00980     return -1;
00981 
00982   // start at first selected atom
00983   int i=firstsel;
00984   minx=maxx=f[i*3  ];
00985   miny=maxy=f[i*3+1];
00986   minz=maxz=f[i*3+2];
00987 
00988   int end=lastsel+1;
00989 
00990 // printf("Starting array alignment: on[%d]: %p f[%d]: %p\n",
00991 //        i, &on[i], i*3, &f[i*3]);
00992 
00993 #if defined(VMDUSESSE) && defined(__SSE2__)
00994   // since we may not be on a 16-byte boundary, when we start, we roll 
00995   // through the first few items with plain C until we get to one.
00996   for (; i<end; i++) {
00997     int ind3 = i * 3;
00998 
00999 #if 1
01000     // exit if/when we reach a 16-byte boundary for the coordinate array only,
01001     // for now we'll do unaligned loads of the on array since there are cases
01002     // where we get differently unaligned input arrays and they'll never 
01003     // line up at a 16-byte boundary at the same time
01004     if (is_16byte_aligned(&f[ind3])) {
01005       break;
01006     }
01007 #else
01008     // exit if/when we reach a 16-byte boundary for both arrays
01009     if (is_16byte_aligned(&on[i]) && is_16byte_aligned(&f[ind3])) {
01010 // printf("Found alignment boundary: on[%d]: %p f[%d]: %p\n",
01011 //        i, &on[i], ind3, &f[ind3]);
01012       break;
01013     }
01014 #endif
01015 
01016     if (on[i]) {
01017       float tmpx = f[ind3  ];
01018       if (tmpx < minx) minx = tmpx;
01019       if (tmpx > maxx) maxx = tmpx;
01020 
01021       float tmpy = f[ind3+1];
01022       if (tmpy < miny) miny = tmpy;
01023       if (tmpy > maxy) maxy = tmpy;
01024 
01025       float tmpz = f[ind3+2];
01026       if (tmpz < minz) minz = tmpz;
01027       if (tmpz > maxz) maxz = tmpz;
01028     }
01029   }
01030 
01031   // initialize min/max values to results from scalar loop above
01032   __m128 xmin4 = _mm_set_ps1(minx);
01033   __m128 xmax4 = _mm_set_ps1(maxx);
01034   __m128 ymin4 = _mm_set_ps1(miny);
01035   __m128 ymax4 = _mm_set_ps1(maxy);
01036   __m128 zmin4 = _mm_set_ps1(minz);
01037   __m128 zmax4 = _mm_set_ps1(maxz);
01038 
01039   for (; i<(end-3); i+=4) {
01040 #if 1
01041     // XXX unaligned load of four selection flags, since there are cases
01042     //     where the input arrays can't achieve alignment simultaneously
01043     __m128i on4 = _mm_loadu_si128((__m128i*) &on[i]);
01044 #else
01045     // aligned load of four selection flags
01046     __m128i on4 = _mm_load_si128((__m128i*) &on[i]);
01047 #endif
01048 
01049     // compute atom selection mask
01050     __m128i mask = _mm_cmpeq_epi32(_mm_set1_epi32(1), on4);
01051     if (!hor_m128i(mask))
01052       continue; // no atoms selected
01053 
01054     // aligned load of four consecutive 3-element vectors into
01055     // three 4-element vectors
01056     int ind3 = i * 3;
01057     __m128 x0y0z0x1 = _mm_load_ps(&f[ind3+0]);
01058     __m128 y1z1x2y2 = _mm_load_ps(&f[ind3+4]);
01059     __m128 z2x3y3z3 = _mm_load_ps(&f[ind3+8]);
01060 
01061     // convert rgb3f AOS format to 4-element SOA vectors using shuffle instructions
01062     __m128 x2y2x3y3 = _mm_shuffle_ps(y1z1x2y2, z2x3y3z3, _MM_SHUFFLE(2, 1, 3, 2));
01063     __m128 y0z0y1z1 = _mm_shuffle_ps(x0y0z0x1, y1z1x2y2, _MM_SHUFFLE(1, 0, 2, 1));
01064     __m128 x        = _mm_shuffle_ps(x0y0z0x1, x2y2x3y3, _MM_SHUFFLE(2, 0, 3, 0)); // x0x1x2x3
01065     __m128 y        = _mm_shuffle_ps(y0z0y1z1, x2y2x3y3, _MM_SHUFFLE(3, 1, 2, 0)); // y0y1y2y3
01066     __m128 z        = _mm_shuffle_ps(y0z0y1z1, z2x3y3z3, _MM_SHUFFLE(3, 0, 3, 1)); // z0y1z2z3
01067 
01068     // compute mins and maxes
01069     xmin4 = _mm_sel_ps(xmin4, _mm_min_ps(xmin4, x), (__m128) mask);
01070     xmax4 = _mm_sel_ps(xmax4, _mm_max_ps(xmax4, x), (__m128) mask);
01071     ymin4 = _mm_sel_ps(ymin4, _mm_min_ps(ymin4, y), (__m128) mask);
01072     ymax4 = _mm_sel_ps(ymax4, _mm_max_ps(ymax4, y), (__m128) mask);
01073     zmin4 = _mm_sel_ps(zmin4, _mm_min_ps(zmin4, z), (__m128) mask);
01074     zmax4 = _mm_sel_ps(zmax4, _mm_max_ps(zmax4, z), (__m128) mask);
01075   }
01076 
01077   minx = fmin_m128(xmin4);
01078   miny = fmin_m128(ymin4);
01079   minz = fmin_m128(zmin4);
01080 
01081   maxx = fmax_m128(xmax4);
01082   maxy = fmax_m128(ymax4);
01083   maxz = fmax_m128(zmax4);
01084 #endif
01085 
01086   // regular C code... 
01087   for (; i<end; i++) {
01088     if (on[i]) {
01089       int ind3 = i * 3;
01090       float tmpx = f[ind3  ];
01091       if (tmpx < minx) minx = tmpx;
01092       if (tmpx > maxx) maxx = tmpx;
01093 
01094       float tmpy = f[ind3+1];
01095       if (tmpy < miny) miny = tmpy;
01096       if (tmpy > maxy) maxy = tmpy;
01097 
01098       float tmpz = f[ind3+2];
01099       if (tmpz < minz) minz = tmpz;
01100       if (tmpz > maxz) maxz = tmpz;
01101     }
01102   }
01103 
01104   fmin[0] = minx;
01105   fmax[0] = maxx;
01106   fmin[1] = miny;
01107   fmax[1] = maxy;
01108   fmin[2] = minz;
01109   fmax[2] = maxz;
01110 
01111   return 0;
01112 }
01113 
01114 
01115 // take three 3-vectors and compute x2 cross x3; with the results
01116 // in x1.  x1 must point to different memory than x2 or x3
01117 // This returns a pointer to x1
01118 float * cross_prod(float *x1, const float *x2, const float *x3)
01119 {
01120   x1[0] =  x2[1]*x3[2] - x3[1]*x2[2];
01121   x1[1] = -x2[0]*x3[2] + x3[0]*x2[2];
01122   x1[2] =  x2[0]*x3[1] - x3[0]*x2[1];
01123   return x1;
01124 }
01125 
01126 // normalize a vector, and return a pointer to it
01127 // Warning:  it changes the value of the vector!!
01128 float * vec_normalize(float *vect) {
01129   float len = vect[0]*vect[0] + vect[1]*vect[1] + vect[2]*vect[2];
01130 
01131   // prevent division by zero
01132   if (len > 0) {
01133     float rescale = 1.0f / sqrtf(len);
01134     vect[0] *= rescale;
01135     vect[1] *= rescale;
01136     vect[2] *= rescale;
01137   }
01138 
01139   return vect;
01140 }
01141 
01142 
01143 // find and return the norm of a 3-vector
01144 float norm(const float *vect) {
01145   return sqrtf(vect[0]*vect[0] + vect[1]*vect[1] + vect[2]*vect[2]);
01146 }
01147 
01148 
01149 // determine if a triangle is degenerate or not
01150 int tri_degenerate(const float * v0, const float * v1, const float * v2) {
01151   float s1[3], s2[3], s1_length, s2_length;
01152 
01153   /*
01154    various rendering packages have amusingly different ideas about what
01155    constitutes a degenerate triangle.  -1 and 1 work well.  numbers
01156    below 0.999 and -0.999 show up in OpenGL
01157    numbers as low as 0.98 have worked in POVRay with certain models while
01158    numbers as high as 0.999999 have produced massive holes in other
01159    models
01160          -matt 11/13/96
01161   */
01162 
01163   /**************************************************************/
01164   /*    turn the triangle into 2 normalized vectors.            */
01165   /*    If the dot product is 1 or -1 then                      */
01166   /*   the triangle is degenerate                               */
01167   /**************************************************************/
01168   s1[0] = v0[0] - v1[0];
01169   s1[1] = v0[1] - v1[1];
01170   s1[2] = v0[2] - v1[2];
01171 
01172   s2[0] = v0[0] - v2[0];
01173   s2[1] = v0[1] - v2[1];
01174   s2[2] = v0[2] - v2[2];
01175 
01176   s1_length = sqrtf(s1[0]*s1[0] + s1[1]*s1[1] + s1[2]*s1[2]);
01177   s2_length = sqrtf(s2[0]*s2[0] + s2[1]*s2[1] + s2[2]*s2[2]);
01178 
01179   /**************************************************************/
01180   /*                   invert to avoid divides:                 */
01181   /*                         1.0/v1_length * 1.0/v2_length      */
01182   /**************************************************************/
01183 
01184   s2_length = 1.0f / (s1_length*s2_length);
01185   s1_length = s2_length * (s1[0]*s2[0] + s1[1]*s2[1] + s1[2]*s2[2]);
01186 
01187   // and add it to the list if it's not degenerate
01188   if ((s1_length >= 1.0 ) || (s1_length <= -1.0)) 
01189     return 1;
01190   else
01191     return 0;
01192 }
01193 
01194 
01195 // compute the angle (in degrees 0 to 180 ) between two vectors a & b
01196 float angle(const float *a, const float *b) {
01197   float ab[3];
01198   cross_prod(ab, a, b);
01199   float psin = sqrtf(dot_prod(ab, ab));
01200   float pcos = dot_prod(a, b);
01201   return 57.2958f * (float) atan2(psin, pcos);
01202 }
01203 
01204 
01205 // Compute the dihedral angle for the given atoms, returning a value between
01206 // -180 and 180.
01207 // faster, cleaner implementation based on atan2
01208 float dihedral(const float *a1,const float *a2,const float *a3,const float *a4)
01209 {
01210   float r1[3], r2[3], r3[3], n1[3], n2[3];
01211   vec_sub(r1, a2, a1);
01212   vec_sub(r2, a3, a2);
01213   vec_sub(r3, a4, a3);
01214   
01215   cross_prod(n1, r1, r2);
01216   cross_prod(n2, r2, r3);
01217   
01218   float psin = dot_prod(n1, r3) * sqrtf(dot_prod(r2, r2));
01219   float pcos = dot_prod(n1, n2);
01220 
01221   // atan2f would be faster, but we'll have to workaround the lack
01222   // of existence on some platforms.
01223   return 57.2958f * (float) atan2(psin, pcos);
01224 }
01225  
01226 // compute the distance between points a & b
01227 float distance(const float *a, const float *b) {
01228   return sqrtf(distance2(a,b));
01229 }
01230 
01231 char *vmd_tempfile(const char *s) {
01232   char *envtxt, *TempDir;
01233 
01234   if((envtxt = getenv("VMDTMPDIR")) != NULL) {
01235     TempDir = stringdup(envtxt);
01236   } else {
01237 #if defined(_MSC_VER)
01238     if ((envtxt = getenv("TMP")) != NULL) {
01239       TempDir = stringdup(envtxt);
01240     }
01241     else if ((envtxt = getenv("TEMP")) != NULL) {
01242       TempDir = stringdup(envtxt);
01243     }
01244     else {
01245       TempDir = stringdup("c:\\\\");
01246     }
01247 #else
01248     TempDir = stringdup("/tmp");
01249 #endif
01250   }
01251   stripslashes(TempDir); // strip out ending '/' chars.
01252 
01253   char *tmpfilebuf = new char[1024];
01254  
01255   // copy in temp string
01256   strcpy(tmpfilebuf, TempDir);
01257  
01258 #if defined(_MSC_VER)
01259   strcat(tmpfilebuf, "\\");
01260   strncat(tmpfilebuf, s, 1022 - strlen(TempDir));
01261 #else
01262   strcat(tmpfilebuf, "/");
01263   strncat(tmpfilebuf, s, 1022 - strlen(TempDir));
01264 #endif
01265  
01266   tmpfilebuf[1023] = '\0';
01267  
01268   delete [] TempDir;
01269 
01270   // return converted string
01271   return tmpfilebuf;
01272 }
01273 
01274 
01275 int vmd_delete_file(const char * path) {
01276 #if defined(_MSC_VER)
01277   if (DeleteFile(path) == 0) 
01278     return -1;
01279   else 
01280     return 0;  
01281 #else
01282   return unlink(path);
01283 #endif
01284 }
01285 
01286 void vmd_sleep(int secs) {
01287 #if defined(_MSC_VER)
01288   Sleep(secs * 1000);
01289 #else 
01290   sleep(secs);
01291 #endif
01292 }
01293 
01294 void vmd_msleep(int msecs) {
01295 #if defined(_MSC_VER)
01296   Sleep(msecs);
01297 #else 
01298   struct timeval timeout;
01299   timeout.tv_sec = 0;
01300   timeout.tv_usec = 1000 * msecs;
01301   select(0, NULL, NULL, NULL, &timeout);
01302 #endif // _MSC_VER
01303 }
01304 
01305 int vmd_system(const char* cmd) {
01306    return system(cmd);
01307 }
01308 
01309 
01313 long vmd_random(void) {
01314 #ifdef _MSC_VER
01315   return rand();
01316 #else
01317   return random();
01318 #endif
01319 }
01320 
01321 void vmd_srandom(unsigned int seed) {
01322 #ifdef _MSC_VER
01323   srand(seed);
01324 #else
01325   srandom(seed);
01326 #endif
01327 }
01328 
01331 float vmd_random_gaussian() {
01332   static bool cache = false;
01333   static float cached_value;
01334   const float RAND_FACTOR = 2.f/VMD_RAND_MAX;
01335   float r, s, w;
01336   
01337   if (cache) {
01338     cache = false;
01339     return cached_value;
01340   }
01341   do {
01342     r = RAND_FACTOR*vmd_random()-1.f; 
01343     s = RAND_FACTOR*vmd_random()-1.f;
01344     w = r*r+s*s;
01345   } while (w >= 1.f);
01346   w = sqrtf(-2.f*logf(w)/w);
01347   cached_value = s * w;
01348   cache = true;
01349   return (r*w);
01350 }
01351 
01352 
01355 long vmd_get_total_physmem_mb(void) {
01356 #if defined(_MSC_VER)
01357   MEMORYSTATUS memstat;
01358   GlobalMemoryStatus(&memstat);
01359   if (memstat.dwLength != sizeof(memstat))
01360     return -1; /* memstat result is wrong size! */
01361   return memstat.dwTotalPhys/(1024 * 1024);
01362 #elif defined(__linux)
01363   FILE *fp;
01364   char meminfobuf[1024], *pos;
01365   size_t len;
01366 
01367   fp = fopen("/proc/meminfo", "r");
01368   if (fp != NULL) {
01369     len = fread(meminfobuf,1,1024, fp);
01370     meminfobuf[1023] = 0;
01371     fclose(fp);
01372     if (len > 0) {
01373       pos=strstr(meminfobuf,"MemTotal:");
01374       if (pos == NULL) 
01375         return -1;
01376       pos += 9; /* skip tag */;
01377       return strtol(pos, (char **)NULL, 10)/1024L;
01378     }
01379   } 
01380   return -1;
01381 #elif defined(AIXUSEPERFSTAT) && defined(_AIX)
01382   perfstat_memory_total_t minfo;
01383   perfstat_memory_total(NULL, &minfo, sizeof(perfstat_memory_total_t), 1);
01384   return minfo.real_total*(4096/1024)/1024;
01385 #elif defined(_AIX)
01386   return (sysconf(_SC_AIX_REALMEM) / 1024);
01387 #elif defined(_SC_PAGESIZE) && defined(_SC_PHYS_PAGES)
01388   /* SysV Unix */
01389   long pgsz = sysconf(_SC_PAGESIZE);
01390   long physpgs = sysconf(_SC_PHYS_PAGES);
01391   return ((pgsz / 1024) * physpgs) / 1024;
01392 #elif defined(__APPLE__)
01393   /* MacOS X uses BSD sysctl */
01394   /* use hw.memsize, as it's a 64-bit value */
01395   int rc;
01396   uint64_t membytes;
01397   size_t len = sizeof(membytes);
01398   if (sysctlbyname("hw.memsize", &membytes, &len, NULL, 0)) 
01399     return -1;
01400   return (membytes / (1024*1024));
01401 #else
01402   return -1; /* unrecognized system, no method to get this info */
01403 #endif
01404 }
01405 
01406 
01407 
01410 long vmd_get_avail_physmem_mb(void) {
01411 #if defined(_MSC_VER)
01412   MEMORYSTATUS memstat;
01413   GlobalMemoryStatus(&memstat);
01414   if (memstat.dwLength != sizeof(memstat))
01415     return -1; /* memstat result is wrong size! */ 
01416   return memstat.dwAvailPhys / (1024 * 1024);
01417 #elif defined(__linux)
01418   FILE *fp;
01419   char meminfobuf[1024], *pos;
01420   size_t len;
01421   long val;
01422 
01423   fp = fopen("/proc/meminfo", "r");
01424   if (fp != NULL) {
01425     len = fread(meminfobuf,1,1024, fp);
01426     meminfobuf[1023] = 0;
01427     fclose(fp);
01428     if (len > 0) {
01429       val = 0L;
01430       pos=strstr(meminfobuf,"MemFree:");
01431       if (pos != NULL) {
01432         pos += 8; /* skip tag */;
01433         val += strtol(pos, (char **)NULL, 10);
01434       }
01435       pos=strstr(meminfobuf,"Buffers:");
01436       if (pos != NULL) {
01437         pos += 8; /* skip tag */;
01438         val += strtol(pos, (char **)NULL, 10);
01439       }
01440       pos=strstr(meminfobuf,"Cached:");
01441       if (pos != NULL) {
01442         pos += 8; /* skip tag */;
01443         val += strtol(pos, (char **)NULL, 10);
01444       }
01445       return val/1024L;
01446     } else {
01447       return -1;
01448     }
01449   } else {
01450     return -1;
01451   }
01452 #elif defined(AIXUSEPERFSTAT) && defined(_AIX)
01453   perfstat_memory_total_t minfo;
01454   perfstat_memory_total(NULL, &minfo, sizeof(perfstat_memory_total_t), 1);
01455   return minfo.real_free*(4096/1024)/1024;
01456 #elif defined(_SC_PAGESIZE) && defined(_SC_AVPHYS_PAGES)
01457   /* SysV Unix */
01458   long pgsz = sysconf(_SC_PAGESIZE);
01459   long avphyspgs = sysconf(_SC_AVPHYS_PAGES);
01460   return ((pgsz / 1024) * avphyspgs) / 1024;
01461 #elif defined(__APPLE__)
01462 #if 0
01463   /* BSD sysctl */
01464   /* hw.usermem isn't really the amount of free memory, it's */
01465   /* really more a measure of the non-kernel memory          */
01466   int rc;
01467   int membytes;
01468   size_t len = sizeof(membytes);
01469   if (sysctlbyname("hw.usermem", &membytes, &len, NULL, 0)) 
01470     return -1;
01471   return (membytes / (1024*1024));
01472 #else
01473   return -1;
01474 #endif
01475 #else
01476   return -1; /* unrecognized system, no method to get this info */
01477 #endif
01478 }
01479 
01480 
01482 long vmd_get_avail_physmem_percent(void) {
01483   double total, avail;
01484   total = (double) vmd_get_total_physmem_mb();
01485   avail = (double) vmd_get_avail_physmem_mb();
01486   if (total > 0.0 && avail >= 0.0)
01487     return (long) (avail / (total / 100.0));
01488 
01489   return -1; /* return an error */
01490 }
01491 
01492 

Generated on Thu Jun 20 01:50:15 2013 for VMD (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002