00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include <stdlib.h>
00030 #include <string.h>
00031 #include "utilities.h"
00032
00033
00034
00035
00036
00037
00038
00039
00040 #if 0
00041 #define RESTRICT restrict
00042 #else
00043 #define RESTRICT
00044 #endif
00045
00046
00047
00048
00049
00050
00051 #if 0
00052 #define VECTORIZEME _Pragma("vector always")
00053 #else
00054 #define VECTORIZEME
00055 #endif
00056
00057
00058
00059
00060
00061
00062 void dstream_init(double * RESTRICT a, double * RESTRICT b,
00063 double * RESTRICT c, int N) {
00064 int j;
00065 VECTORIZEME
00066 for (j=0; j<N; j++) {
00067 a[j] = 1.0;
00068 b[j] = 2.0;
00069 c[j] = 0.0;
00070 }
00071 }
00072
00073 void dstream_copy(double * RESTRICT a, const double * RESTRICT b,
00074 int N, double *mbsize) {
00075 int j;
00076 VECTORIZEME
00077 for (j=0; j<N; j++)
00078 a[j] = b[j];
00079
00080 *mbsize = (2 * sizeof(double) * N) / (1024.0 * 1024.0);
00081 }
00082
00083 void dstream_scale(double * RESTRICT a, const double * RESTRICT b,
00084 double scalar, int N, double *mbsize) {
00085 int j;
00086 VECTORIZEME
00087 for (j=0; j<N; j++)
00088 a[j] = scalar * b[j];
00089
00090 *mbsize = (2 * sizeof(double) * N) / (1024.0 * 1024.0);
00091 }
00092
00093 void dstream_add(double * RESTRICT a, const double * RESTRICT b,
00094 const double * RESTRICT c, int N, double *mbsize) {
00095 int j;
00096 VECTORIZEME
00097 for (j=0; j<N; j++)
00098 a[j] = b[j] + c[j];
00099
00100 *mbsize = (3 * sizeof(double) * N) / (1024.0 * 1024.0);
00101 }
00102
00103 void dstream_triad(double * RESTRICT a, const double * RESTRICT b,
00104 const double * RESTRICT c, double scalar, int N,
00105 double *mbsize) {
00106 int j;
00107 VECTORIZEME
00108 for (j=0; j<N; j++)
00109 a[j] = b[j] + scalar * c[j];
00110
00111 *mbsize = (3 * sizeof(double) * N) / (1024.0 * 1024.0);
00112 }
00113
00114
00115
00116
00117
00118
00119
00120 void fstream_init(float * RESTRICT a, float * RESTRICT b,
00121 float * RESTRICT c, int N) {
00122 int j;
00123 VECTORIZEME
00124 for (j=0; j<N; j++) {
00125 a[j] = 1.0f;
00126 b[j] = 2.0f;
00127 c[j] = 0.0f;
00128 }
00129 }
00130
00131 void fstream_copy(float * RESTRICT a, const float * RESTRICT b,
00132 int N, double *mbsize) {
00133 int j;
00134 VECTORIZEME
00135 for (j=0; j<N; j++)
00136 a[j] = b[j];
00137
00138 *mbsize = (2 * sizeof(float) * N) / (1024.0 * 1024.0);
00139 }
00140
00141 void fstream_scale(float * RESTRICT a, const float * RESTRICT b,
00142 float scalar, int N, double *mbsize) {
00143 int j;
00144 VECTORIZEME
00145 for (j=0; j<N; j++)
00146 a[j] = scalar * b[j];
00147
00148 *mbsize = (2 * sizeof(float) * N) / (1024.0 * 1024.0);
00149 }
00150
00151 void fstream_add(float * RESTRICT a, const float * RESTRICT b,
00152 const float * RESTRICT c, int N, double *mbsize) {
00153 int j;
00154 VECTORIZEME
00155 for (j=0; j<N; j++)
00156 a[j] = b[j] + c[j];
00157
00158 *mbsize = (3 * sizeof(float) * N) / (1024.0 * 1024.0);
00159 }
00160
00161 void fstream_triad(float * RESTRICT a, const float * RESTRICT b,
00162 const float * RESTRICT c, float scalar, int N,
00163 double *mbsize) {
00164 int j;
00165 VECTORIZEME
00166 for (j=0; j<N; j++)
00167 a[j] = b[j] + scalar * c[j];
00168
00169 *mbsize = (3 * sizeof(float) * N) / (1024.0 * 1024.0);
00170 }
00171
00172
00173
00174
00175
00176 int stream_bench(int N, double *time, double *mbsec) {
00177 double *da, *db, *dc;
00178 float *fa, *fb, *fc;
00179 vmd_timerhandle timer;
00180 int rc = 0;
00181
00182 timer = vmd_timer_create();
00183
00184
00185
00186
00187 da = (double *) malloc(N * sizeof(double));
00188 db = (double *) malloc(N * sizeof(double));
00189 dc = (double *) malloc(N * sizeof(double));
00190
00191 if ((da != NULL) && (db != NULL) && (dc != NULL)) {
00192 double mbsz;
00193
00194 dstream_init(da, db, dc, N);
00195
00196 vmd_timer_start(timer);
00197 dstream_copy(da, db, N, &mbsz);
00198 vmd_timer_stop(timer);
00199 time[0] = vmd_timer_time(timer);
00200 mbsec[0] = mbsz / time[0];
00201
00202 vmd_timer_start(timer);
00203 dstream_scale(da, db, 2.0, N, &mbsz);
00204 vmd_timer_stop(timer);
00205 time[1] = vmd_timer_time(timer);
00206 mbsec[1] = mbsz / time[1];
00207
00208 vmd_timer_start(timer);
00209 dstream_add(da, db, dc, N, &mbsz);
00210 vmd_timer_stop(timer);
00211 time[2] = vmd_timer_time(timer);
00212 mbsec[2] = mbsz / time[2];
00213
00214 vmd_timer_start(timer);
00215 dstream_triad(da, db, dc, 2.0, N, &mbsz);
00216 vmd_timer_stop(timer);
00217 time[3] = vmd_timer_time(timer);
00218 mbsec[3] = mbsz / time[3];
00219 } else {
00220 rc = -1;
00221 }
00222
00223 if (da)
00224 free(da);
00225 if (db)
00226 free(db);
00227 if (dc)
00228 free(dc);
00229
00230 if (rc) {
00231 vmd_timer_destroy(timer);
00232 return rc;
00233 }
00234
00235
00236
00237
00238 fa = (float *) malloc(N * sizeof(float));
00239 fb = (float *) malloc(N * sizeof(float));
00240 fc = (float *) malloc(N * sizeof(float));
00241
00242 if ((fa != NULL) && (fb != NULL) && (fc != NULL)) {
00243 double mbsz;
00244
00245 fstream_init(fa, fb, fc, N);
00246
00247 vmd_timer_start(timer);
00248 fstream_copy(fa, fb, N, &mbsz);
00249 vmd_timer_stop(timer);
00250 time[4] = vmd_timer_time(timer);
00251 mbsec[4] = mbsz / time[4];
00252
00253 vmd_timer_start(timer);
00254 fstream_scale(fa, fb, 2.0, N, &mbsz);
00255 vmd_timer_stop(timer);
00256 time[5] = vmd_timer_time(timer);
00257 mbsec[5] = mbsz / time[5];
00258
00259 vmd_timer_start(timer);
00260 fstream_add(fa, fb, fc, N, &mbsz);
00261 vmd_timer_stop(timer);
00262 time[6] = vmd_timer_time(timer);
00263 mbsec[6] = mbsz / time[6];
00264
00265 vmd_timer_start(timer);
00266 fstream_triad(fa, fb, fc, 2.0, N, &mbsz);
00267 vmd_timer_stop(timer);
00268 time[7] = vmd_timer_time(timer);
00269 mbsec[7] = mbsz / time[7];
00270 } else {
00271 rc = -1;
00272 }
00273
00274 if (fa)
00275 free(fa);
00276 if (fb)
00277 free(fb);
00278 if (fc)
00279 free(fc);
00280
00281 vmd_timer_destroy(timer);
00282
00283 return rc;
00284 }
00285
00286
00287
00288
00289