Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

Benchmark.C

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2019 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: Benchmark.C,v $
00013  *      $Author: johns $        $Locker:  $             $State: Exp $
00014  *      $Revision: 1.13 $      $Date: 2020/07/28 08:21:19 $
00015  *
00016  ***************************************************************************
00017  * DESCRIPTION:
00018  *
00019  * Various CPU/memory subsystem benchmarking routines.
00020  * The peak performance numbers achieved within a VMD build can be 
00021  * used to determine how well the VMD build was optimized, the 
00022  * performance of the host CPU/memory systems, SMP scaling efficiency, etc.
00023  *
00024  * The streaming memory bandwidth tests are an alternative implementation 
00025  * of McCalpin's STREAM benchmark.
00026  *
00027  ***************************************************************************/
00028 
00029 #include <stdlib.h>
00030 #include <string.h>
00031 #include "WKFUtils.h"
00032 #include "WKFThreads.h"
00033 #include "utilities.h"
00034 
00035 /*
00036  * On compilers that accept the C99 'restrict' keyword, we can give
00037  * the compiler additional help with optimization.  Since the caller is
00038  * contained within the same source file, this shouldn't be necessary
00039  * in the current case however. 
00040  */
00041 #if 0
00042 #define RESTRICT restrict
00043 #else
00044 #define RESTRICT 
00045 #endif
00046 
00047 /*
00048  * If we want, we can create compiler-specific vectorization 
00049  * helper macros to assist with achieving peak performance, though 
00050  * this really shouldn't be required.
00051  */
00052 #if 0
00053 #define VECTORIZEME _Pragma("vector always")
00054 #else
00055 #define VECTORIZEME 
00056 #endif
00057 
00058 
00059 /*
00060  * Double precision stream bandwidth tests
00061  */
00062 
00063 void dstream_init(double * RESTRICT a, double * RESTRICT b,
00064                   double * RESTRICT c, int N) {
00065   int j;
00066 VECTORIZEME
00067   for (j=0; j<N; j++) {
00068     a[j] = 1.0;
00069     b[j] = 2.0;
00070     c[j] = 0.0;
00071   }
00072 }
00073 
00074 void dstream_copy(double * RESTRICT a, const double * RESTRICT b, 
00075                  int N, double *mbsize) {
00076   int j;
00077 VECTORIZEME
00078   for (j=0; j<N; j++)
00079     a[j] = b[j];
00080 
00081   *mbsize = (2L * sizeof(double) * N) / (1024.0 * 1024.0);
00082 }
00083 
00084 void dstream_scale(double * RESTRICT a, const double * RESTRICT b, 
00085                   double scalar, int N, double *mbsize) {
00086   int j;
00087 VECTORIZEME
00088   for (j=0; j<N; j++)
00089     a[j] = scalar * b[j];
00090 
00091   *mbsize = (2L * sizeof(double) * N) / (1024.0 * 1024.0);
00092 }
00093 
00094 void dstream_add(double * RESTRICT a, const double * RESTRICT b, 
00095                 const double * RESTRICT c, int N, double *mbsize) {
00096   int j;
00097 VECTORIZEME
00098   for (j=0; j<N; j++)
00099     a[j] = b[j] + c[j];
00100 
00101   *mbsize = (3L * sizeof(double) * N) / (1024.0 * 1024.0);
00102 }
00103 
00104 void dstream_triad(double * RESTRICT a, const double * RESTRICT b, 
00105                   const double * RESTRICT c, double scalar, int N, 
00106                   double *mbsize) {
00107   int j;
00108 VECTORIZEME
00109   for (j=0; j<N; j++)
00110     a[j] = b[j] + scalar * c[j];
00111 
00112   *mbsize = (3L * sizeof(double) * N) / (1024.0 * 1024.0);
00113 }
00114 
00115 
00116 
00117 /*
00118  * Single precision stream bandwidth tests
00119  */
00120 
00121 void fstream_init(float * RESTRICT a, float * RESTRICT b,
00122                   float * RESTRICT c, int N) {
00123   int j;
00124 VECTORIZEME
00125   for (j=0; j<N; j++) {
00126     a[j] = 1.0f;
00127     b[j] = 2.0f;
00128     c[j] = 0.0f;
00129   }
00130 }
00131 
00132 void fstream_copy(float * RESTRICT a, const float * RESTRICT b, 
00133                  int N, double *mbsize) {
00134   int j;
00135 VECTORIZEME
00136   for (j=0; j<N; j++)
00137     a[j] = b[j];
00138 
00139   *mbsize = (2L * sizeof(float) * N) / (1024.0 * 1024.0);
00140 }
00141 
00142 void fstream_scale(float * RESTRICT a, const float * RESTRICT b, 
00143                    float scalar, int N, double *mbsize) {
00144   int j;
00145 VECTORIZEME
00146   for (j=0; j<N; j++)
00147     a[j] = scalar * b[j];
00148 
00149   *mbsize = (2L * sizeof(float) * N) / (1024.0 * 1024.0);
00150 }
00151 
00152 void fstream_add(float * RESTRICT a, const float * RESTRICT b, 
00153                  const float * RESTRICT c, int N, double *mbsize) {
00154   int j;
00155 VECTORIZEME
00156   for (j=0; j<N; j++)
00157     a[j] = b[j] + c[j];
00158 
00159   *mbsize = (3L * sizeof(float) * N) / (1024.0 * 1024.0);
00160 }
00161 
00162 void fstream_triad(float * RESTRICT a, const float * RESTRICT b, 
00163                   const float * RESTRICT c, float scalar, int N, 
00164                   double *mbsize) {
00165   int j;
00166 VECTORIZEME
00167   for (j=0; j<N; j++)
00168     a[j] = b[j] + scalar * c[j];
00169 
00170   *mbsize = (3L * sizeof(float) * N) / (1024.0 * 1024.0);
00171 }
00172 
00173 
00174 /*
00175  * run the benchmark
00176  */
00177 int stream_bench(int N, double *time, double *mbsec) {
00178   double *da, *db, *dc;
00179   float *fa, *fb, *fc;
00180   wkf_timerhandle timer;
00181   int rc = 0;
00182 
00183   timer = wkf_timer_create();
00184 
00185   /*
00186    * run double precision benchmarks
00187    */
00188   da = (double *) malloc(N * sizeof(double));
00189   db = (double *) malloc(N * sizeof(double));
00190   dc = (double *) malloc(N * sizeof(double));
00191 
00192   if ((da != NULL) && (db != NULL) && (dc != NULL)) {
00193     double mbsz;
00194 
00195     dstream_init(da, db, dc, N);
00196 
00197     wkf_timer_start(timer);
00198     dstream_copy(da, db, N, &mbsz);
00199     wkf_timer_stop(timer);
00200     time[0] = wkf_timer_time(timer);
00201     mbsec[0] = mbsz / time[0];
00202 
00203     wkf_timer_start(timer);
00204     dstream_scale(da, db, 2.0, N, &mbsz);
00205     wkf_timer_stop(timer);
00206     time[1] = wkf_timer_time(timer);
00207     mbsec[1] = mbsz / time[1];
00208 
00209     wkf_timer_start(timer);
00210     dstream_add(da, db, dc, N, &mbsz);
00211     wkf_timer_stop(timer);
00212     time[2] = wkf_timer_time(timer);
00213     mbsec[2] = mbsz / time[2];
00214 
00215     wkf_timer_start(timer);
00216     dstream_triad(da, db, dc, 2.0, N, &mbsz);
00217     wkf_timer_stop(timer);
00218     time[3] = wkf_timer_time(timer);
00219     mbsec[3] = mbsz / time[3];
00220   } else {
00221     rc = -1;
00222   }
00223 
00224   if (da)
00225     free(da);
00226   if (db)
00227     free(db);
00228   if (dc)
00229     free(dc);
00230 
00231   if (rc) {
00232     wkf_timer_destroy(timer);
00233     return rc;
00234   }
00235 
00236   /*
00237    * run float precision benchmarks
00238    */
00239   fa = (float *) malloc(N * sizeof(float));
00240   fb = (float *) malloc(N * sizeof(float));
00241   fc = (float *) malloc(N * sizeof(float));
00242 
00243   if ((fa != NULL) && (fb != NULL) && (fc != NULL)) {
00244     double mbsz;
00245 
00246     fstream_init(fa, fb, fc, N);
00247 
00248     wkf_timer_start(timer);
00249     fstream_copy(fa, fb, N, &mbsz);
00250     wkf_timer_stop(timer);
00251     time[4] = wkf_timer_time(timer);
00252     mbsec[4] = mbsz / time[4];
00253 
00254     wkf_timer_start(timer);
00255     fstream_scale(fa, fb, 2.0, N, &mbsz);
00256     wkf_timer_stop(timer);
00257     time[5] = wkf_timer_time(timer);
00258     mbsec[5] = mbsz / time[5];
00259 
00260     wkf_timer_start(timer);
00261     fstream_add(fa, fb, fc, N, &mbsz);
00262     wkf_timer_stop(timer);
00263     time[6] = wkf_timer_time(timer);
00264     mbsec[6] = mbsz / time[6];
00265 
00266     wkf_timer_start(timer);
00267     fstream_triad(fa, fb, fc, 2.0, N, &mbsz);
00268     wkf_timer_stop(timer);
00269     time[7] = wkf_timer_time(timer);
00270     mbsec[7] = mbsz / time[7];
00271   } else {
00272     rc = -1;
00273   }
00274 
00275   if (fa)
00276     free(fa);
00277   if (fb)
00278     free(fb);
00279   if (fc)
00280     free(fc);
00281 
00282   wkf_timer_destroy(timer);
00283 
00284   return rc;
00285 }
00286 
00287 
00288 
00289 void vmdbench_minmax_1fv(int sz, int reps, double &runtime, double &bwmbsec) {
00290   int i;
00291   float minf=0, maxf=0;
00292 
00293   // generate trivial test array
00294   float *fv = (float *) malloc(sz * sizeof(float));
00295   for (i=0; i<sz; i++) {
00296     fv[i] = (float) i;
00297   }
00298 
00299   wkf_timerhandle timer;
00300   timer = wkf_timer_create();
00301   wkf_timer_start(timer);
00302   int r;
00303   for (r=0; r<reps; r++)
00304     minmax_1fv_aligned(fv, sz, &minf, &maxf);
00305   wkf_timer_stop(timer);
00306   runtime = wkf_timer_time(timer);
00307 
00308 //  printf("minmax_1fv_aligned: %f\n", runtime);
00309 //  printf("  min: %f max: %f\n", minf, maxf);
00310 
00311   bwmbsec = (reps * sz * sizeof(float) / (1024.0 * 1024.0)) / runtime;
00312 //  printf("  BW: %.1f MB/sec\n", bwmbsec);
00313 
00314   free(fv);
00315   wkf_timer_destroy(timer);
00316 }
00317 
00318 
00319 void vmdbench_minmaxmean_1fv(int sz, int reps,
00320                              double &runtime, double &bwmbsec) {
00321   int i;
00322   float minf=0, maxf=0, meanf=0;
00323 
00324   // generate trivial test array
00325   float *fv = (float *) malloc(sz * sizeof(float));
00326   for (i=0; i<sz; i++) {
00327     fv[i] = (float) i;
00328   }
00329 
00330   wkf_timerhandle timer;
00331   timer = wkf_timer_create();
00332   wkf_timer_start(timer);
00333   int r;
00334   for (r=0; r<reps; r++)
00335     minmaxmean_1fv_aligned(fv, sz, &minf, &maxf, &meanf);
00336   wkf_timer_stop(timer);
00337   runtime = wkf_timer_time(timer);
00338 
00339 //  printf("minmaxmean_1fv_aligned: %f\n", runtime);
00340 //  printf("  min: %f max: %f mean: %f\n", minf, maxf, meanf);
00341 
00342   bwmbsec = (reps * sz * sizeof(float) / (1024.0 * 1024.0)) / runtime;
00343 //  printf("  BW: %.1f MB/sec\n", bw);
00344 
00345   free(fv);
00346   wkf_timer_destroy(timer);
00347 }
00348 
00349 
00350 void vmdbench_minmax_3fv(int sz, int reps, double &runtime, double &bwmbsec) {
00351   int i;
00352   float minfv[3] = { 0 }, maxfv[3] = { 0 };
00353 
00354   // generate trivial test array
00355   float *fv = (float *) malloc(3L * sz * sizeof(float));
00356   for (i=0; i<sz * 3L; i++) {
00357     fv[i] = (float) i;
00358   }
00359 
00360   wkf_timerhandle timer;
00361   timer = wkf_timer_create();
00362   wkf_timer_start(timer);
00363   int r;
00364   for (r=0; r<reps; r++)
00365     minmax_3fv_aligned(fv, sz, minfv, maxfv);
00366   wkf_timer_stop(timer);
00367   runtime = wkf_timer_time(timer);
00368 
00369 //  printf("minmax_3fv_aligned: %f\n", wkf_timer_time(timer));
00370 //  int i;
00371 //  for (i=0; i<3; i++) {
00372 //    minf += minfv[i];
00373 //    maxf += maxfv[i];
00374 //  }
00375 //  printf("  min: %f max: %f\n", minf, maxf);
00376 
00377   bwmbsec = (reps * 3L * sz * sizeof(float) / (1024.0 * 1024.0)) / runtime;
00378 //  printf("  BW: %.1f MB/sec\n", bwmbsec);
00379 
00380   free(fv);
00381   wkf_timer_destroy(timer);
00382 }
00383 
00384 
00385 void vmdbench_analyze_selection(int sz, int reps,
00386                                 double &runtime, double &bwmbsec) {
00387   int i;
00388   int first=0, last=-1, selected=0;
00389   int *on = (int *) calloc(1, sz * sizeof(int));
00390 
00391   // set one atom per group of 8 rotating through all lanes
00392   int lane=0;
00393   for (i=sz/2; i<(sz-7); i+=8) {
00394     on[i+lane] = 1;
00395     lane = (lane+1) & 0x7; // swizzle through lanes
00396   }
00397 
00398   wkf_timerhandle timer;
00399   timer = wkf_timer_create();
00400   wkf_timer_start(timer);
00401   int r;
00402   for (r=0; r<reps; r++)
00403     analyze_selection_aligned(sz, on, &first, &last, &selected);
00404   wkf_timer_stop(timer);
00405   runtime = wkf_timer_time(timer);
00406 
00407 //  printf("selection stats: first %d  last %d  sel %d  time: %f\n",
00408 //         first, last, selected, runtime);
00409   bwmbsec = (reps * sz * sizeof(int) / (1024.0 * 1024.0)) / runtime;
00410 //  printf("  BW: %.1f MB/sec\n", bwmbsec);
00411 
00412   free(on);
00413   wkf_timer_destroy(timer);
00414 }
00415 
00416 
00417 
00418 
00419 
00420 

Generated on Thu Apr 18 02:44:01 2024 for VMD (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002