Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Related Pages  

cmd_vmdbench.C

Go to the documentation of this file.
00001 /***************************************************************************
00002  *cr
00003  *cr            (C) Copyright 1995-2011 The Board of Trustees of the
00004  *cr                        University of Illinois
00005  *cr                         All Rights Reserved
00006  *cr
00007  ***************************************************************************/
00008 
00009 /***************************************************************************
00010  * RCS INFORMATION:
00011  *
00012  *      $RCSfile: cmd_vmdbench.C,v $
00013  *      $Author: johns $        $Locker:  $             $State: Exp $
00014  *      $Revision: 1.22 $       $Date: 2010/12/16 04:08:54 $
00015  *
00016  ***************************************************************************
00017  * DESCRIPTION:
00018  *   text commands for benchmarking hardware performance
00019  ***************************************************************************/
00020 
00021 #include <tcl.h>
00022 #include <ctype.h>
00023 #include <stdio.h>
00024 #include <stdlib.h>
00025 #include <string.h>
00026 #include "Benchmark.h"
00027 #include "config.h"
00028 #include "VMDApp.h"
00029 #include "TclCommands.h"
00030 #include "CUDAKernels.h"
00031 #include "CUDAAccel.h"
00032 #include "WKFThreads.h"
00033 
00034 static void cmd_vmdbench_usage(Tcl_Interp *interp) {
00035   Tcl_AppendResult(interp,
00036       "usage: vmdbench <command> [args...]\n"
00037       "vmdbench stream        [N]       - built-in STREAM memory bandwidth test\n",
00038       "vmdbench cudamadd      [devices] - CUDA multiply-add arithmetic (*)\n",
00039       "vmdbench cudabusbw     [devices] - CUDA host/device bus bandwidth (*)\n",
00040       "vmdbench cudaglobmembw [devices] - CUDA global memory bandwidth (*)\n",
00041       "vmdbench cudadevpool   [N]       - CUDA threadpool run-cycle latency (*)\n",
00042       "(*) Only available in CUDA-enabled builds of VMD\n",
00043       NULL);
00044 }
00045 
00046 int text_cmd_vmdbench(ClientData cd, Tcl_Interp *interp, int argc, 
00047                       const char *argv[]) {
00048 
00049   VMDApp *app = (VMDApp *)cd;
00050 
00051   if (argc == 1) {
00052     cmd_vmdbench_usage(interp);
00053     return TCL_ERROR;
00054   }
00055 
00056   if (argc >= 2) {
00057     if (!strupncmp(argv[1], "stream", CMDLEN)) {
00058       double times[8], mbsec[8];
00059       int N = 1024*1024 * 16;
00060 
00061       if (argc == 3) {
00062         if (Tcl_GetInt(interp, argv[2], &N) != TCL_OK) {
00063           Tcl_AppendResult(interp, " in vmdbench stream", NULL);
00064           return TCL_ERROR;
00065         }
00066       }
00067 
00068       int rc = stream_bench(N, times, mbsec);
00069       if (rc) {
00070         Tcl_AppendResult(interp,
00071           "unable to complete stream benchmark, out of memory", NULL);
00072         return TCL_ERROR;
00073       }
00074 
00075       Tcl_Obj *tcl_result = Tcl_NewListObj(0, NULL);
00076       const char *benchnames[] = {
00077         "copy (double)",
00078         "scale (double)",
00079         "add (double)",
00080         "triad (double)",
00081         "copy (float)",
00082         "scale (float)",
00083         "add (float)",
00084         "triad (float)"
00085       };
00086 
00087       Tcl_Obj *colNameObj = Tcl_NewListObj(0, NULL);
00088       Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Test", -1)); 
00089       Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Time", -1)); 
00090       Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("MB/sec", -1)); 
00091       Tcl_ListObjAppendElement(interp, tcl_result, colNameObj);
00092 
00093       int i;     
00094       for (i=0; i<8; i++) {
00095         Tcl_Obj *rowListObj = Tcl_NewListObj(0, NULL);
00096         Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewStringObj(benchnames[i], -1)); 
00097         Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(times[i])); 
00098         Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(mbsec[i])); 
00099         Tcl_ListObjAppendElement(interp, tcl_result, rowListObj);
00100 
00101       }
00102       Tcl_SetObjResult(interp, tcl_result);
00103 
00104       return TCL_OK;
00105     } else if (!strupncmp(argv[1], "cudamadd", CMDLEN)) {
00106 #if defined(VMDCUDA)
00107       int numdevs, physnumdevs;
00108       int *devlist = NULL;
00109       vmd_cuda_num_devices(&physnumdevs);
00110       numdevs = physnumdevs;
00111 #if !defined(VMDTHREADS)
00112       numdevs = 1;
00113 #endif
00114 
00115       // handle optional device list arguments
00116       if (argc > 2) {
00117         if ((argc-2) > numdevs) {
00118           Tcl_AppendResult(interp, "vmdbench: bad device argument", NULL);
00119           return TCL_ERROR;
00120         } else {
00121           numdevs = argc-2;
00122         }
00123         devlist = (int *) malloc(numdevs * sizeof(int));
00124         int arg, dev;
00125         for (arg=0; arg<numdevs; arg++) {
00126           if (Tcl_GetInt(interp, argv[arg+2], &dev) != TCL_OK) {
00127             Tcl_AppendResult(interp, "vmdbench: bad device argument", NULL);
00128             free(devlist);
00129             return TCL_ERROR;
00130           }
00131           if (dev < 0 || dev >= physnumdevs) {
00132             Tcl_AppendResult(interp, "vmdbench: device argument out of range", NULL);
00133             free(devlist);
00134             return TCL_ERROR;
00135           }
00136           devlist[arg] = dev;
00137         } 
00138       }
00139 
00140       double *gflops = (double *) malloc(numdevs * sizeof(double));
00141       int testloops=1;
00142       if (getenv("VMDMADDLOOPS") != NULL)
00143         testloops = atoi(getenv("VMDMADDLOOPS"));
00144 
00145       vmd_cuda_madd_gflops(numdevs, devlist, gflops, testloops);
00146 
00147       Tcl_Obj *tcl_result = Tcl_NewListObj(0, NULL);
00148       Tcl_Obj *colNameObj = Tcl_NewListObj(0, NULL);
00149       Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Device", -1));
00150       Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("GFLOPS", -1));
00151       Tcl_ListObjAppendElement(interp, tcl_result, colNameObj);
00152 
00153       int i;
00154       for (i=0; i<numdevs; i++) {
00155         Tcl_Obj *rowListObj = Tcl_NewListObj(0, NULL);
00156         if (devlist != NULL) 
00157           Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewIntObj(devlist[i]));
00158         else
00159           Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewIntObj(i));
00160         Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(gflops[i]));
00161         Tcl_ListObjAppendElement(interp, tcl_result, rowListObj);
00162       }
00163       Tcl_SetObjResult(interp, tcl_result);
00164 
00165       if (devlist)
00166         free(devlist);
00167 
00168       return TCL_OK;
00169 #else 
00170       Tcl_AppendResult(interp, "CUDA Acceleration not available in this build", NULL);
00171       return TCL_ERROR;
00172 #endif
00173     } else if (!strupncmp(argv[1], "cudabusbw", CMDLEN)) {
00174 #if defined(VMDCUDA)
00175       int numdevs, physnumdevs;
00176       int *devlist = NULL;
00177       vmd_cuda_num_devices(&physnumdevs);
00178       numdevs = physnumdevs;
00179 #if !defined(VMDTHREADS)
00180       numdevs = 1;
00181 #endif
00182 
00183       // handle optional device list arguments
00184       if (argc > 2) {
00185         if ((argc-2) > numdevs) {
00186           Tcl_AppendResult(interp, "vmdbench: bad device argument", NULL);
00187           return TCL_ERROR;
00188         } else {
00189           numdevs = argc-2;
00190         }
00191         devlist = (int *) malloc(numdevs * sizeof(int));
00192         int arg, dev;
00193         for (arg=0; arg<numdevs; arg++) {
00194           if (Tcl_GetInt(interp, argv[arg+2], &dev) != TCL_OK) {
00195             Tcl_AppendResult(interp, "vmdbench: bad device argument", NULL);
00196             free(devlist);
00197             return TCL_ERROR;
00198           }
00199           if (dev < 0 || dev >= physnumdevs) {
00200             Tcl_AppendResult(interp, "vmdbench: device argument out of range", NULL);
00201             free(devlist);
00202             return TCL_ERROR;
00203           }
00204           devlist[arg] = dev;
00205         } 
00206       }
00207 
00208       double *hdmbsec = (double *) malloc(numdevs * sizeof(double));
00209       double *hdlatusec = (double *) malloc(numdevs * sizeof(double));
00210       double *phdmbsec = (double *) malloc(numdevs * sizeof(double));
00211       double *phdlatusec = (double *) malloc(numdevs * sizeof(double));
00212       double *dhmbsec = (double *) malloc(numdevs * sizeof(double));
00213       double *dhlatusec = (double *) malloc(numdevs * sizeof(double));
00214       double *pdhmbsec = (double *) malloc(numdevs * sizeof(double));
00215       double *pdhlatusec = (double *) malloc(numdevs * sizeof(double));
00216 
00217       vmd_cuda_bus_bw(numdevs, devlist, 
00218                       hdmbsec, hdlatusec, phdmbsec, phdlatusec,
00219                       dhmbsec, dhlatusec, pdhmbsec, pdhlatusec);
00220 
00221       Tcl_Obj *tcl_result = Tcl_NewListObj(0, NULL);
00222       Tcl_Obj *colNameObj = Tcl_NewListObj(0, NULL);
00223       Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Device", -1));
00224       Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Host-device bandwidth (MB/sec)", -1));
00225       Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Host-device latency (usec)", -1));
00226       Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Host-device pinned bandwidth (MB/sec)", -1));
00227       Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Host-device pinned latency (usec)", -1));
00228       Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Device-host bandwidth (MB/sec)", -1));
00229       Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Device-host latency (usec)", -1));
00230       Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Device-host pinned bandwidth (MB/sec)", -1));
00231       Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Device-host pinned latency (usec)", -1));
00232       Tcl_ListObjAppendElement(interp, tcl_result, colNameObj);
00233 
00234       int i;
00235       for (i=0; i<numdevs; i++) {
00236         Tcl_Obj *rowListObj = Tcl_NewListObj(0, NULL);
00237         if (devlist != NULL) 
00238           Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewIntObj(devlist[i]));
00239         else
00240           Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewIntObj(i));
00241 
00242         Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(hdmbsec[i]));
00243         Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(hdlatusec[i]));
00244         Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(phdmbsec[i]));
00245         Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(phdlatusec[i]));
00246         Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(dhmbsec[i]));
00247         Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(dhlatusec[i]));
00248         Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(pdhmbsec[i]));
00249         Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(pdhlatusec[i]));
00250         Tcl_ListObjAppendElement(interp, tcl_result, rowListObj);
00251       }
00252       Tcl_SetObjResult(interp, tcl_result);
00253       return TCL_OK;
00254 #else 
00255       Tcl_AppendResult(interp, "CUDA Acceleration not available in this build", NULL);
00256       return TCL_ERROR;
00257 #endif
00258     } else if (!strupncmp(argv[1], "cudaglobmembw", CMDLEN)) {
00259 #if defined(VMDCUDA)
00260       int numdevs, physnumdevs;
00261       int *devlist = NULL;
00262       vmd_cuda_num_devices(&physnumdevs);
00263       numdevs = physnumdevs;
00264 #if !defined(VMDTHREADS)
00265       numdevs = 1;
00266 #endif
00267 
00268       // handle optional device list arguments
00269       if (argc > 2) {
00270         if ((argc-2) > numdevs) {
00271           Tcl_AppendResult(interp, "vmdbench: bad device argument", NULL);
00272           return TCL_ERROR;
00273         } else {
00274           numdevs = argc-2;
00275         }
00276         devlist = (int *) malloc(numdevs * sizeof(int));
00277         int arg, dev;
00278         for (arg=0; arg<numdevs; arg++) {
00279           if (Tcl_GetInt(interp, argv[arg+2], &dev) != TCL_OK) {
00280             Tcl_AppendResult(interp, "vmdbench: bad device argument", NULL);
00281             free(devlist);
00282             return TCL_ERROR;
00283           }
00284           if (dev < 0 || dev >= physnumdevs) {
00285             Tcl_AppendResult(interp, "vmdbench: device argument out of range", NULL);
00286             free(devlist);
00287             return TCL_ERROR;
00288           }
00289           devlist[arg] = dev;
00290         } 
00291       }
00292 
00293       double *memsetgbsec = (double *) malloc(numdevs * sizeof(double));
00294       double *memcpygbsec = (double *) malloc(numdevs * sizeof(double));
00295 
00296       vmd_cuda_globmem_bw(numdevs, devlist, memsetgbsec, memcpygbsec);
00297 
00298       Tcl_Obj *tcl_result = Tcl_NewListObj(0, NULL);
00299       Tcl_Obj *colNameObj = Tcl_NewListObj(0, NULL);
00300       Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Device", -1));
00301       Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Memory set bandwidth (GB/sec)", -1));
00302       Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Memory copy bandwidth (GB/sec)", -1));
00303       Tcl_ListObjAppendElement(interp, tcl_result, colNameObj);
00304 
00305       int i;
00306       for (i=0; i<numdevs; i++) {
00307         Tcl_Obj *rowListObj = Tcl_NewListObj(0, NULL);
00308         if (devlist != NULL) 
00309           Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewIntObj(devlist[i]));
00310         else
00311           Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewIntObj(i));
00312 
00313         Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(memsetgbsec[i]));
00314         Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(memcpygbsec[i]));
00315         Tcl_ListObjAppendElement(interp, tcl_result, rowListObj);
00316       }
00317       Tcl_SetObjResult(interp, tcl_result);
00318       return TCL_OK;
00319 #else 
00320       Tcl_AppendResult(interp, "CUDA Acceleration not available in this build", NULL);
00321       return TCL_ERROR;
00322 #endif
00323     } else if (!strupncmp(argv[1], "cudadevpool", CMDLEN)) {
00324 #if defined(VMDCUDA)
00325       int N=1;
00326       if (argc == 3) {
00327         if (Tcl_GetInt(interp, argv[2], &N) != TCL_OK) {
00328           Tcl_AppendResult(interp, " in vmdbench cudadevpool", NULL);
00329           return TCL_ERROR;
00330         }
00331       }
00332 
00333       wkf_threadpool_t * devpool = app->cuda->get_cuda_devpool();
00334       Tcl_Obj *tcl_result = Tcl_NewListObj(0, NULL);
00335       Tcl_ListObjAppendElement(interp, tcl_result, Tcl_NewStringObj("Empty kernel launch latency (usec)", -1));
00336       Tcl_ListObjAppendElement(interp, tcl_result, Tcl_NewStringObj("Device pool barrier latency (usec)", -1));
00337       Tcl_ListObjAppendElement(interp, tcl_result, Tcl_NewStringObj("Device pool empty run cycle latency (usec)", -1));
00338       Tcl_ListObjAppendElement(interp, tcl_result, Tcl_NewStringObj("Device pool tile run latency (usec)", -1));
00339       Tcl_ListObjAppendElement(interp, tcl_result, Tcl_NewStringObj("Device pool GPU kernel tile latency (usec)", -1));
00340 
00341       int i;
00342       double kernlaunchlatency, barlatency;
00343       double cyclelatency, tilelatency;
00344       double kernellatency;
00345       for (i=0; i<2; i++) {
00346         vmd_cuda_devpool_latency(devpool, N, &kernlaunchlatency,
00347                                  &barlatency, &cyclelatency, 
00348                                  &tilelatency, &kernellatency);
00349 
00350         // do one warmup pass before we report the benchmark numbers
00351         if (i < 1)
00352           continue;
00353 
00354         // report the results
00355         Tcl_Obj *rowListObj = Tcl_NewListObj(0, NULL);
00356         Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(kernlaunchlatency*1000000));
00357         Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(barlatency*1000000));
00358         Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(cyclelatency*1000000));
00359         Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(tilelatency*1000000));
00360         Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(kernellatency*1000000));
00361         Tcl_ListObjAppendElement(interp, tcl_result, rowListObj);
00362       }
00363 
00364       Tcl_SetObjResult(interp, tcl_result);
00365       return TCL_OK;
00366 #else 
00367       Tcl_AppendResult(interp, "CUDA Acceleration not available in this build", NULL);
00368       return TCL_ERROR;
00369 #endif
00370 
00371     } else {
00372       cmd_vmdbench_usage(interp);
00373       return TCL_ERROR;
00374     }
00375   } else {
00376     cmd_vmdbench_usage(interp);
00377     return TCL_ERROR;
00378   }
00379   
00380   // if here, everything worked out ok
00381   return TCL_OK;
00382 }
00383 
00384 

Generated on Sat May 26 01:47:46 2012 for VMD (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002