00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <tcl.h>
00022 #include <ctype.h>
00023 #include <stdio.h>
00024 #include <stdlib.h>
00025 #include <string.h>
00026 #include "Benchmark.h"
00027 #include "config.h"
00028 #include "VMDApp.h"
00029 #include "TclCommands.h"
00030 #include "CUDAKernels.h"
00031 #include "VMDThreads.h"
00032
00033 static void cmd_vmdbench_usage(Tcl_Interp *interp) {
00034 Tcl_AppendResult(interp,
00035 "usage: vmdbench <command> [args...]\n"
00036 "vmdbench stream [N] -- built-in STREAM memory bandwidth test\n",
00037 "vmdbench cudamadd [devices] -- CUDA multiply-add arithmetic (*)\n",
00038 "vmdbench cudabusbw [devices] -- CUDA host/device bus bandwidth (*)\n",
00039 "(*) Only available in CUDA-enabled builds of VMD\n",
00040 NULL);
00041 }
00042
00043 int text_cmd_vmdbench(ClientData cd, Tcl_Interp *interp, int argc,
00044 const char *argv[]) {
00045
00046
00047
00048 if (argc == 1) {
00049 cmd_vmdbench_usage(interp);
00050 return TCL_ERROR;
00051 }
00052
00053 if (argc >= 2) {
00054 if (!strupncmp(argv[1], "stream", CMDLEN)) {
00055 double times[8], mbsec[8];
00056 int N = 1024*1024 * 16;
00057
00058 if (argc == 3) {
00059 if (Tcl_GetInt(interp, argv[2], &N) != TCL_OK) {
00060 Tcl_AppendResult(interp, " in vmdbench stream", NULL);
00061 return TCL_ERROR;
00062 }
00063 }
00064
00065 int rc = stream_bench(N, times, mbsec);
00066 if (rc) {
00067 Tcl_AppendResult(interp,
00068 "unable to complete stream benchmark, out of memory", NULL);
00069 return TCL_ERROR;
00070 }
00071
00072 Tcl_Obj *tcl_result = Tcl_NewListObj(0, NULL);
00073 const char *benchnames[] = {
00074 "copy (double)",
00075 "scale (double)",
00076 "add (double)",
00077 "triad (double)",
00078 "copy (float)",
00079 "scale (float)",
00080 "add (float)",
00081 "triad (float)"
00082 };
00083
00084 Tcl_Obj *colNameObj = Tcl_NewListObj(0, NULL);
00085 Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Test", -1));
00086 Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Time", -1));
00087 Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("MB/sec", -1));
00088 Tcl_ListObjAppendElement(interp, tcl_result, colNameObj);
00089
00090 int i;
00091 for (i=0; i<8; i++) {
00092 Tcl_Obj *rowListObj = Tcl_NewListObj(0, NULL);
00093 Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewStringObj(benchnames[i], -1));
00094 Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(times[i]));
00095 Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(mbsec[i]));
00096 Tcl_ListObjAppendElement(interp, tcl_result, rowListObj);
00097
00098 }
00099 Tcl_SetObjResult(interp, tcl_result);
00100
00101 return TCL_OK;
00102 } else if (!strupncmp(argv[1], "cudamadd", CMDLEN)) {
00103 #if defined(VMDCUDA)
00104 int numdevs, physnumdevs;
00105 int *devlist = NULL;
00106 vmd_cuda_num_devices(&physnumdevs);
00107 numdevs = physnumdevs;
00108 #if !defined(VMDTHREADS)
00109 numdevs = 1;
00110 #endif
00111
00112
00113 if (argc > 2) {
00114 if ((argc-2) > numdevs) {
00115 Tcl_AppendResult(interp, "vmdbench: bad device argument", NULL);
00116 return TCL_ERROR;
00117 } else {
00118 numdevs = argc-2;
00119 }
00120 devlist = (int *) malloc(numdevs * sizeof(int));
00121 int arg, dev;
00122 for (arg=0; arg<numdevs; arg++) {
00123 if (Tcl_GetInt(interp, argv[arg+2], &dev) != TCL_OK) {
00124 Tcl_AppendResult(interp, "vmdbench: bad device argument", NULL);
00125 free(devlist);
00126 return TCL_ERROR;
00127 }
00128 if (dev < 0 || dev >= physnumdevs) {
00129 Tcl_AppendResult(interp, "vmdbench: device argument out of range", NULL);
00130 free(devlist);
00131 return TCL_ERROR;
00132 }
00133 devlist[arg] = dev;
00134 }
00135 }
00136
00137 double *gflops = (double *) malloc(numdevs * sizeof(double));
00138 vmd_cuda_madd_gflops(numdevs, devlist, gflops);
00139
00140 Tcl_Obj *tcl_result = Tcl_NewListObj(0, NULL);
00141 Tcl_Obj *colNameObj = Tcl_NewListObj(0, NULL);
00142 Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Device", -1));
00143 Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("GFLOPS", -1));
00144 Tcl_ListObjAppendElement(interp, tcl_result, colNameObj);
00145
00146 int i;
00147 for (i=0; i<numdevs; i++) {
00148 Tcl_Obj *rowListObj = Tcl_NewListObj(0, NULL);
00149 if (devlist != NULL)
00150 Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewIntObj(devlist[i]));
00151 else
00152 Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewIntObj(i));
00153 Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(gflops[i]));
00154 Tcl_ListObjAppendElement(interp, tcl_result, rowListObj);
00155 }
00156 Tcl_SetObjResult(interp, tcl_result);
00157
00158 if (devlist)
00159 free(devlist);
00160
00161 return TCL_OK;
00162 #else
00163 Tcl_AppendResult(interp, "CUDA Acceleration not available in this build", NULL);
00164 return TCL_ERROR;
00165 #endif
00166 } else if (!strupncmp(argv[1], "cudabusbw", CMDLEN)) {
00167 #if defined(VMDCUDA)
00168 int numdevs, physnumdevs;
00169 int *devlist = NULL;
00170 vmd_cuda_num_devices(&physnumdevs);
00171 numdevs = physnumdevs;
00172 #if !defined(VMDTHREADS)
00173 numdevs = 1;
00174 #endif
00175
00176
00177 if (argc > 2) {
00178 if ((argc-2) > numdevs) {
00179 Tcl_AppendResult(interp, "vmdbench: bad device argument", NULL);
00180 return TCL_ERROR;
00181 } else {
00182 numdevs = argc-2;
00183 }
00184 devlist = (int *) malloc(numdevs * sizeof(int));
00185 int arg, dev;
00186 for (arg=0; arg<numdevs; arg++) {
00187 if (Tcl_GetInt(interp, argv[arg+2], &dev) != TCL_OK) {
00188 Tcl_AppendResult(interp, "vmdbench: bad device argument", NULL);
00189 free(devlist);
00190 return TCL_ERROR;
00191 }
00192 if (dev < 0 || dev >= physnumdevs) {
00193 Tcl_AppendResult(interp, "vmdbench: device argument out of range", NULL);
00194 free(devlist);
00195 return TCL_ERROR;
00196 }
00197 devlist[arg] = dev;
00198 }
00199 }
00200
00201 double *hdmbsec = (double *) malloc(numdevs * sizeof(double));
00202 double *phdmbsec = (double *) malloc(numdevs * sizeof(double));
00203 double *dhmbsec = (double *) malloc(numdevs * sizeof(double));
00204 double *pdhmbsec = (double *) malloc(numdevs * sizeof(double));
00205
00206 vmd_cuda_bus_bw(numdevs, devlist, hdmbsec, phdmbsec, dhmbsec, pdhmbsec);
00207
00208 Tcl_Obj *tcl_result = Tcl_NewListObj(0, NULL);
00209 Tcl_Obj *colNameObj = Tcl_NewListObj(0, NULL);
00210 Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Device", -1));
00211 Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Host-device bandwidth (MB/sec)", -1));
00212 Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Host-device pinned bandwidth (MB/sec)", -1));
00213 Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Device-host bandwidth (MB/sec)", -1));
00214 Tcl_ListObjAppendElement(interp, colNameObj, Tcl_NewStringObj("Device-host pinned bandwidth (MB/sec)", -1));
00215 Tcl_ListObjAppendElement(interp, tcl_result, colNameObj);
00216
00217 int i;
00218 for (i=0; i<numdevs; i++) {
00219 Tcl_Obj *rowListObj = Tcl_NewListObj(0, NULL);
00220 if (devlist != NULL)
00221 Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewIntObj(devlist[i]));
00222 else
00223 Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewIntObj(i));
00224
00225 Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(hdmbsec[i]));
00226 Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(phdmbsec[i]));
00227 Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(dhmbsec[i]));
00228 Tcl_ListObjAppendElement(interp, rowListObj, Tcl_NewDoubleObj(pdhmbsec[i]));
00229 Tcl_ListObjAppendElement(interp, tcl_result, rowListObj);
00230 }
00231 Tcl_SetObjResult(interp, tcl_result);
00232 return TCL_OK;
00233 #else
00234 Tcl_AppendResult(interp, "CUDA Acceleration not available in this build", NULL);
00235 return TCL_ERROR;
00236 #endif
00237 } else {
00238 cmd_vmdbench_usage(interp);
00239 return TCL_ERROR;
00240 }
00241 } else {
00242 cmd_vmdbench_usage(interp);
00243 return TCL_ERROR;
00244 }
00245
00246
00247 return TCL_OK;
00248 }
00249
00250