Updated the timing report and benchmark scripts. - Instead of averaging 7 queries for the benchmark, we average 3 queries. - The timing report includes coefficient of variation to alter when the test has been compromised.
Project: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/commit/b611a895 Tree: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/tree/b611a895 Diff: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/diff/b611a895 Branch: refs/heads/master Commit: b611a895585d615106291ed803535e651e345f57 Parents: 49ce42c Author: Preston Carman <[email protected]> Authored: Tue Apr 22 16:39:37 2014 -0700 Committer: Preston Carman <[email protected]> Committed: Thu May 8 14:15:35 2014 -0700 ---------------------------------------------------------------------- .../noaa-ghcn-daily/scripts/run_benchmark.sh | 4 +- .../scripts/run_benchmark_cluster.sh | 51 ++++++++-------- .../java/org/apache/vxquery/cli/VXQuery.java | 62 +++++++++++++------- 3 files changed, 71 insertions(+), 46 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/b611a895/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh index 2dd070c..6e1e7b7 100755 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh @@ -25,6 +25,8 @@ # run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "-client-net-ip-address 169.235.27.138" # run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03 # +REPEAT=5 +FRAME_SIZE=10000 if [ -z "${1}" ] then @@ -40,7 +42,7 @@ do log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log" log_base_path=$(dirname ${j/queries/query_logs}) mkdir -p ${log_base_path} - time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -frame-size 10000 -repeatexec 10 > ${log_base_path}/${log_file} 2>&1 + time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1 fi; done http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/b611a895/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh index a77f3c2..b4a770d 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh @@ -25,8 +25,8 @@ # run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "-client-net-ip-address 169.235.27.138" # run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03 # - -CLUSTER_COUNT=5 +REPEAT=5 +FRAME_SIZE=10000 if [ -z "${1}" ] then @@ -34,30 +34,33 @@ then exit fi -# Run queries for each number of nodes. -for (( i = 0; i < ${CLUSTER_COUNT}; i++ )) -do - echo "Starting ${i} cluster nodes" - python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${i}nodes.xml -a start +if [ -z "${2}" ] +then + echo "Please the number of nodes (start at 0)." + exit +fi + +# Run queries for the specified number of nodes. +echo "Starting ${2} cluster nodes" +python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${2}nodes.xml -a start - for j in $(find ${1} -name '*q??.xq') - do - # Only work with i nodes. - if [[ "${j}" =~ "${i}nodes" ]] +for j in $(find ${1} -name '*q??.xq') +do + # Only work with i nodes. + if [[ "${j}" =~ "${2}nodes" ]] + then + # Only run for specified queries. + if [ -z "${4}" ] || [[ "${j}" =~ "${4}" ]] then - # Only run for specified queries. - if [ -z "${3}" ] || [[ "${j}" =~ "${3}" ]] - then - echo "Running query: ${j}" - log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log" - log_base_path=$(dirname ${j/queries/query_logs}) - mkdir -p ${log_base_path} - time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing -showquery -showoet -showrp -frame-size 10000 -repeatexec 10 > ${log_base_path}/${log_file} 2>&1 - fi; + echo "Running query: ${j}" + log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log" + log_base_path=$(dirname ${j/queries/query_logs}) + mkdir -p ${log_base_path} + time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} -timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -repeatexec ${REPEAT} > ${log_base_path}/${log_file} 2>&1 fi; - done - - # Stop cluster. - python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${i}nodes.xml -a stop + fi; done + +# Stop cluster. +python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-server/src/main/resources/conf/${2}nodes.xml -a stop http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/b611a895/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java ---------------------------------------------------------------------- diff --git a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java index ec3ed68..94efba2 100644 --- a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java +++ b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java @@ -78,9 +78,12 @@ public class VXQuery { private IHyracksDataset hds; private ResultSetId resultSetId; - private static List<String> timing; - private static int totalTiming; - private static String message; + private static List<String> timingMessages; + private static long sumTiming; + private static long sumSquaredTiming; + private static long minTiming = Long.MAX_VALUE; + private static long maxTiming = Long.MIN_VALUE; + private static byte TIMING_QUERIES_TO_IGNORE = 2; /** * Constructor to use command line options passed. @@ -90,7 +93,7 @@ public class VXQuery { */ public VXQuery(CmdLineOptions opts) { this.opts = opts; - timing = new ArrayList<String>(); + timingMessages = new ArrayList<String>(); } /** @@ -120,16 +123,18 @@ public class VXQuery { // if -timing argument passed, show the starting and ending times if (opts.timing) { Date end = new Date(); - message = "Execution time: " + (end.getTime() - start.getTime()) + "ms"; - System.out.println(message); - timing.add(message); - if (opts.repeatExec > 3) { - message = "Average execution time: " + (totalTiming / (opts.repeatExec - 3)) + "ms"; - System.out.println(message); - timing.add(message); + timingMessage("Execution time: " + (end.getTime() - start.getTime()) + " ms"); + if (opts.repeatExec > TIMING_QUERIES_TO_IGNORE) { + long mean = sumTiming / (opts.repeatExec - TIMING_QUERIES_TO_IGNORE); + double sd = Math.sqrt(sumSquaredTiming / (opts.repeatExec - new Byte(TIMING_QUERIES_TO_IGNORE).doubleValue()) - mean * mean); + timingMessage("Average execution time: " + mean + " ms"); + timingMessage("Standard deviation: " + String.format( "%.4f", sd)); + timingMessage("Coefficient of variation: " + String.format( "%.4f", (sd / mean))); + timingMessage("Minimum execution time: " + minTiming + " ms"); + timingMessage("Maximum execution time: " + maxTiming + " ms"); } System.out.println("Timing Summary:"); - for (String time : timing) { + for (String time : timingMessages) { System.out.println(" " + time); } } @@ -253,7 +258,8 @@ public class VXQuery { }; start = opts.timing ? new Date() : null; - XMLQueryCompiler compiler = new XMLQueryCompiler(listener, getNodeList(), opts.frameSize, opts.availableProcessors); + XMLQueryCompiler compiler = new XMLQueryCompiler(listener, getNodeList(), opts.frameSize, + opts.availableProcessors); resultSetId = createResultSetId(); CompilerControlBlock ccb = new CompilerControlBlock(new StaticContextImpl(RootStaticContextImpl.INSTANCE), resultSetId, null); @@ -261,9 +267,7 @@ public class VXQuery { // if -timing argument passed, show the starting and ending times if (opts.timing) { end = new Date(); - message = "Compile time: " + (end.getTime() - start.getTime()) + "ms"; - System.out.println(message); - timing.add(message); + timingMessage("Compile time: " + (end.getTime() - start.getTime()) + " ms"); } if (opts.compileOnly) { continue; @@ -283,12 +287,18 @@ public class VXQuery { // if -timing argument passed, show the starting and ending times if (opts.timing) { end = new Date(); - if ((i + 1) > 3) { - totalTiming += end.getTime() - start.getTime(); + long currentRun = end.getTime() - start.getTime(); + if ((i + 1) > TIMING_QUERIES_TO_IGNORE) { + sumTiming += currentRun; + sumSquaredTiming += currentRun * currentRun; + if (currentRun < minTiming) { + minTiming = currentRun; + } + if (maxTiming < currentRun) { + maxTiming = currentRun; + } } - message = "Job (" + (i + 1) + ") execution time: " + (end.getTime() - start.getTime()) + "ms"; - System.out.println(message); - timing.add(message); + timingMessage("Job (" + (i + 1) + ") execution time: " + currentRun + " ms"); } } } @@ -414,6 +424,16 @@ public class VXQuery { } /** + * Save and print out the timing message. + * + * @param message + */ + private static void timingMessage(String message) { + System.out.println(message); + timingMessages.add(message); + } + + /** * Helper class with fields and methods to handle all command line options */ private static class CmdLineOptions {
