Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

Benchmark.C

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2008 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: Benchmark.C,v $
00013  *      $Author: johns $        $Locker:  $             $State: Exp $
00014  *      $Revision: 1.3 $      $Date: 2008/03/27 19:36:35 $
00015  *
00016  ***************************************************************************
00017  * DESCRIPTION:
00018  *
00019  * Various CPU/memory subsystem benchmarking routines.
00020  * The peak performance numbers achieved within a VMD build can be 
00021  * used to determine how well the VMD build was optimized, the 
00022  * performance of the host CPU/memory systems, SMP scaling efficiency, etc.
00023  *
00024  * The streaming memory bandwidth tests are an alternative implementation 
00025  * of McCalpin's STREAM benchmark.
00026  *
00027  ***************************************************************************/
00028 
00029 #include <stdlib.h>
00030 #include <string.h>
00031 #include "utilities.h"
00032 
00033 
00034 /*
00035  * On compilers that accept the C99 'restrict' keyword, we can give
00036  * the compiler additional help with optimization.  Since the caller is
00037  * contained within the same source file, this shouldn't be necessary
00038  * in the current case however. 
00039  */
00040 #if 0
00041 #define RESTRICT restrict
00042 #else
00043 #define RESTRICT 
00044 #endif
00045 
00046 /*
00047  * If we want, we can create compiler-specific vectorization 
00048  * helper macros to assist with achieving peak performance, though 
00049  * this really shouldn't be required.
00050  */
00051 #if 0
00052 #define VECTORIZEME _Pragma("vector always")
00053 #else
00054 #define VECTORIZEME 
00055 #endif
00056 
00057 
00058 /*
00059  * Double precision stream bandwidth tests
00060  */
00061 
00062 void dstream_init(double * RESTRICT a, double * RESTRICT b,
00063                   double * RESTRICT c, int N) {
00064   int j;
00065 VECTORIZEME
00066   for (j=0; j<N; j++) {
00067     a[j] = 1.0;
00068     b[j] = 2.0;
00069     c[j] = 0.0;
00070   }
00071 }
00072 
00073 void dstream_copy(double * RESTRICT a, const double * RESTRICT b, 
00074                  int N, double *mbsize) {
00075   int j;
00076 VECTORIZEME
00077   for (j=0; j<N; j++)
00078     a[j] = b[j];
00079 
00080   *mbsize = (2 * sizeof(double) * N) / (1024.0 * 1024.0);
00081 }
00082 
00083 void dstream_scale(double * RESTRICT a, const double * RESTRICT b, 
00084                   double scalar, int N, double *mbsize) {
00085   int j;
00086 VECTORIZEME
00087   for (j=0; j<N; j++)
00088     a[j] = scalar * b[j];
00089 
00090   *mbsize = (2 * sizeof(double) * N) / (1024.0 * 1024.0);
00091 }
00092 
00093 void dstream_add(double * RESTRICT a, const double * RESTRICT b, 
00094                 const double * RESTRICT c, int N, double *mbsize) {
00095   int j;
00096 VECTORIZEME
00097   for (j=0; j<N; j++)
00098     a[j] = b[j] + c[j];
00099 
00100   *mbsize = (3 * sizeof(double) * N) / (1024.0 * 1024.0);
00101 }
00102 
00103 void dstream_triad(double * RESTRICT a, const double * RESTRICT b, 
00104                   const double * RESTRICT c, double scalar, int N, 
00105                   double *mbsize) {
00106   int j;
00107 VECTORIZEME
00108   for (j=0; j<N; j++)
00109     a[j] = b[j] + scalar * c[j];
00110 
00111   *mbsize = (3 * sizeof(double) * N) / (1024.0 * 1024.0);
00112 }
00113 
00114 
00115 
00116 /*
00117  * Single precision stream bandwidth tests
00118  */
00119 
00120 void fstream_init(float * RESTRICT a, float * RESTRICT b,
00121                   float * RESTRICT c, int N) {
00122   int j;
00123 VECTORIZEME
00124   for (j=0; j<N; j++) {
00125     a[j] = 1.0f;
00126     b[j] = 2.0f;
00127     c[j] = 0.0f;
00128   }
00129 }
00130 
00131 void fstream_copy(float * RESTRICT a, const float * RESTRICT b, 
00132                  int N, double *mbsize) {
00133   int j;
00134 VECTORIZEME
00135   for (j=0; j<N; j++)
00136     a[j] = b[j];
00137 
00138   *mbsize = (2 * sizeof(float) * N) / (1024.0 * 1024.0);
00139 }
00140 
00141 void fstream_scale(float * RESTRICT a, const float * RESTRICT b, 
00142                    float scalar, int N, double *mbsize) {
00143   int j;
00144 VECTORIZEME
00145   for (j=0; j<N; j++)
00146     a[j] = scalar * b[j];
00147 
00148   *mbsize = (2 * sizeof(float) * N) / (1024.0 * 1024.0);
00149 }
00150 
00151 void fstream_add(float * RESTRICT a, const float * RESTRICT b, 
00152                  const float * RESTRICT c, int N, double *mbsize) {
00153   int j;
00154 VECTORIZEME
00155   for (j=0; j<N; j++)
00156     a[j] = b[j] + c[j];
00157 
00158   *mbsize = (3 * sizeof(float) * N) / (1024.0 * 1024.0);
00159 }
00160 
00161 void fstream_triad(float * RESTRICT a, const float * RESTRICT b, 
00162                   const float * RESTRICT c, float scalar, int N, 
00163                   double *mbsize) {
00164   int j;
00165 VECTORIZEME
00166   for (j=0; j<N; j++)
00167     a[j] = b[j] + scalar * c[j];
00168 
00169   *mbsize = (3 * sizeof(float) * N) / (1024.0 * 1024.0);
00170 }
00171 
00172 
00173 /*
00174  * run the benchmark
00175  */
00176 int stream_bench(int N, double *time, double *mbsec) {
00177   double *da, *db, *dc;
00178   float *fa, *fb, *fc;
00179   vmd_timerhandle timer;
00180   int rc = 0;
00181 
00182   timer = vmd_timer_create();
00183 
00184   /*
00185    * run double precision benchmarks
00186    */
00187   da = (double *) malloc(N * sizeof(double));
00188   db = (double *) malloc(N * sizeof(double));
00189   dc = (double *) malloc(N * sizeof(double));
00190 
00191   if ((da != NULL) && (db != NULL) && (dc != NULL)) {
00192     double mbsz;
00193 
00194     dstream_init(da, db, dc, N);
00195 
00196     vmd_timer_start(timer);
00197     dstream_copy(da, db, N, &mbsz);
00198     vmd_timer_stop(timer);
00199     time[0] = vmd_timer_time(timer);
00200     mbsec[0] = mbsz / time[0];
00201 
00202     vmd_timer_start(timer);
00203     dstream_scale(da, db, 2.0, N, &mbsz);
00204     vmd_timer_stop(timer);
00205     time[1] = vmd_timer_time(timer);
00206     mbsec[1] = mbsz / time[1];
00207 
00208     vmd_timer_start(timer);
00209     dstream_add(da, db, dc, N, &mbsz);
00210     vmd_timer_stop(timer);
00211     time[2] = vmd_timer_time(timer);
00212     mbsec[2] = mbsz / time[2];
00213 
00214     vmd_timer_start(timer);
00215     dstream_triad(da, db, dc, 2.0, N, &mbsz);
00216     vmd_timer_stop(timer);
00217     time[3] = vmd_timer_time(timer);
00218     mbsec[3] = mbsz / time[3];
00219   } else {
00220     rc = -1;
00221   }
00222 
00223   if (da)
00224     free(da);
00225   if (db)
00226     free(db);
00227   if (dc)
00228     free(dc);
00229 
00230   if (rc) {
00231     vmd_timer_destroy(timer);
00232     return rc;
00233   }
00234 
00235   /*
00236    * run float precision benchmarks
00237    */
00238   fa = (float *) malloc(N * sizeof(float));
00239   fb = (float *) malloc(N * sizeof(float));
00240   fc = (float *) malloc(N * sizeof(float));
00241 
00242   if ((fa != NULL) && (fb != NULL) && (fc != NULL)) {
00243     double mbsz;
00244 
00245     fstream_init(fa, fb, fc, N);
00246 
00247     vmd_timer_start(timer);
00248     fstream_copy(fa, fb, N, &mbsz);
00249     vmd_timer_stop(timer);
00250     time[4] = vmd_timer_time(timer);
00251     mbsec[4] = mbsz / time[4];
00252 
00253     vmd_timer_start(timer);
00254     fstream_scale(fa, fb, 2.0, N, &mbsz);
00255     vmd_timer_stop(timer);
00256     time[5] = vmd_timer_time(timer);
00257     mbsec[5] = mbsz / time[5];
00258 
00259     vmd_timer_start(timer);
00260     fstream_add(fa, fb, fc, N, &mbsz);
00261     vmd_timer_stop(timer);
00262     time[6] = vmd_timer_time(timer);
00263     mbsec[6] = mbsz / time[6];
00264 
00265     vmd_timer_start(timer);
00266     fstream_triad(fa, fb, fc, 2.0, N, &mbsz);
00267     vmd_timer_stop(timer);
00268     time[7] = vmd_timer_time(timer);
00269     mbsec[7] = mbsz / time[7];
00270   } else {
00271     rc = -1;
00272   }
00273 
00274   if (fa)
00275     free(fa);
00276   if (fb)
00277     free(fb);
00278   if (fc)
00279     free(fc);
00280 
00281   vmd_timer_destroy(timer);
00282 
00283   return rc;
00284 }
00285 
00286 
00287 
00288 
00289 

Generated on Fri Aug 29 01:26:47 2008 for VMD (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002