GIRAPH-905 Giraph Debugger
Project: http://git-wip-us.apache.org/repos/asf/giraph/repo Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/8675c84a Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/8675c84a Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/8675c84a Branch: refs/heads/trunk Commit: 8675c84a8eb3693efa4aa319860a2714556de02e Parents: fda1bb3 Author: Sergey Edunov <[email protected]> Authored: Mon Dec 8 11:21:19 2014 -0800 Committer: Sergey Edunov <[email protected]> Committed: Mon Dec 8 11:21:19 2014 -0800 ---------------------------------------------------------------------- CHANGELOG | 2 + .../org/apache/giraph/master/MasterCompute.java | 6 +- .../worker/WorkerAggregatorDelegator.java | 2 +- giraph-debugger/.gitignore | 8 + giraph-debugger/README.md | 139 +++ giraph-debugger/giraph-debug | 376 ++++++++ giraph-debugger/gui | 1 + giraph-debugger/pom.xml | 222 +++++ giraph-debugger/src/main/assembly/compile.xml | 39 + .../org/apache/giraph/debugger/CommandLine.java | 223 +++++ .../org/apache/giraph/debugger/DebugConfig.java | 438 ++++++++++ .../examples/BipartiteGraphInputFormat.java | 97 +++ .../examples/BipartiteGraphOutputFormat.java | 67 ++ .../examples/TextAdjacencyListInputFormat.java | 93 ++ ...cyListLongIDDoubleEdgeWeightInputFormat.java | 97 +++ .../examples/bipartitematching/Message.java | 119 +++ .../RandomizedMaximalMatchingComputation.java | 186 ++++ ...zedMaximalMatchingComputation2FixedLeft.java | 186 ++++ ...edMaximalMatchingComputation3FixedRight.java | 187 ++++ .../examples/bipartitematching/VertexValue.java | 77 ++ .../bipartitematching/bipartitegraph-1.json | 6 + .../bipartitematching/package-info.java | 24 + .../BuggySimpleTriangleClosingComputation.java | 206 +++++ .../SimpleTriangleClosingDebugConfig.java | 34 + .../examples/exceptiondebug/package-info.java | 23 + .../graphcoloring/GraphColoringComputation.java | 236 +++++ .../graphcoloring/GraphColoringDebugConfig.java | 56 ++ .../graphcoloring/GraphColoringMaster.java | 163 ++++ ...aphColoringMessageConstraintDebugConfig.java | 44 + ...oloringVertexValueConstraintDebugConfig.java | 43 + .../examples/graphcoloring/Message.java | 114 +++ .../examples/graphcoloring/VertexValue.java | 125 +++ .../examples/graphcoloring/package-info.java | 24 + ...ectedComponentsDebugComputationModified.java | 104 +++ ...leShortestPathsDebugComputationModified.java | 116 +++ ...TriangleClosingDebugComputationModified.java | 216 +++++ .../examples/instrumented/package-info.java | 22 + .../BuggyConnectedComponentsComputation.java | 99 +++ ...ssingReverseEdgeMsgIntegrityDebugConfig.java | 47 + .../ConnectedComponentsDebugConfig.java | 55 ++ ...nectedComponentsMsgIntegrityDebugConfig.java | 54 ++ ...ctedComponentsRandomVerticesDebugConfig.java | 38 + ...tedComponentsVValueIntegrityDebugConfig.java | 54 ++ .../examples/integrity/package-info.java | 24 + .../debugger/examples/mwm/MWMComputation.java | 111 +++ .../debugger/examples/mwm/MWMDebugConfig.java | 54 ++ .../mwm/MWMMessageConstraintDebugConfig.java | 43 + .../MWMVertexValueConstraintDebugConfig.java | 43 + .../debugger/examples/mwm/VertexValue.java | 89 ++ .../debugger/examples/mwm/package-info.java | 26 + .../giraph/debugger/examples/package-info.java | 23 + .../pagerank/SimplePageRankComputation.java | 94 ++ .../pagerank/SimplePageRankMasterCompute.java | 41 + .../examples/pagerank/package-info.java | 22 + .../randomwalk/RandomWalkComputation.java | 145 ++++ .../randomwalk/RandomWalkDebugConfig.java | 55 ++ .../RandomWalkMessageConstraintDebugConfig.java | 56 ++ ...domWalkVertexValueConstraintDebugConfig.java | 57 ++ .../examples/randomwalk/package-info.java | 22 + .../BuggySimpleShortestPathsComputation.java | 106 +++ .../SimpleShortestPathsDebugConfig.java | 43 + .../simpledebug/SimpleShortestPathsMaster.java | 75 ++ .../examples/simpledebug/package-info.java | 22 + .../org/apache/giraph/debugger/gui/Server.java | 513 +++++++++++ .../giraph/debugger/gui/ServerHttpHandler.java | 186 ++++ .../apache/giraph/debugger/gui/ServerUtils.java | 610 +++++++++++++ .../giraph/debugger/gui/package-info.java | 22 + .../AbstractInterceptingComputation.java | 650 ++++++++++++++ .../AbstractInterceptingMasterCompute.java | 127 +++ .../BottomInterceptingComputation.java | 112 +++ .../BottomInterceptingMasterCompute.java | 52 ++ .../CommonVertexMasterInterceptionUtil.java | 186 ++++ .../instrumenter/InstrumentGiraphClasses.java | 413 +++++++++ .../giraph/debugger/instrumenter/Intercept.java | 42 + .../debugger/instrumenter/UserComputation.java | 52 ++ .../instrumenter/UserMasterCompute.java | 43 + .../debugger/instrumenter/package-info.java | 22 + .../mock/ComputationComputeTestGenerator.java | 389 +++++++++ .../giraph/debugger/mock/FormatHelper.java | 162 ++++ .../mock/MasterComputeTestGenerator.java | 105 +++ .../mock/PrefixedClasspathResourceLoader.java | 42 + .../giraph/debugger/mock/TestGenerator.java | 197 +++++ .../debugger/mock/TestGraphGenerator.java | 308 +++++++ .../debugger/mock/VelocityBasedGenerator.java | 42 + .../giraph/debugger/mock/package-info.java | 22 + .../apache/giraph/debugger/package-info.java | 23 + .../debugger/utils/AggregatedValueWrapper.java | 110 +++ .../debugger/utils/AggregatorWrapper.java | 117 +++ .../debugger/utils/AsyncHDFSWriteService.java | 105 +++ .../utils/BaseScenarioAndIntegrityWrapper.java | 69 ++ .../giraph/debugger/utils/BaseWrapper.java | 205 +++++ .../utils/CommonVertexMasterContextWrapper.java | 216 +++++ .../giraph/debugger/utils/DebuggerUtils.java | 375 ++++++++ .../giraph/debugger/utils/ExceptionWrapper.java | 113 +++ .../utils/GiraphMasterScenarioWrapper.java | 148 ++++ .../utils/GiraphVertexScenarioWrapper.java | 819 +++++++++++++++++ .../utils/MsgIntegrityViolationWrapper.java | 313 +++++++ .../giraph/debugger/utils/package-info.java | 22 + .../src/main/protobuf/giraph_aggregator.proto | 12 + .../src/main/protobuf/integrity.proto | 18 + .../src/main/protobuf/scenario.proto | 81 ++ .../org/apache/giraph/debugger/gui/css/app.css | 254 ++++++ .../giraph/debugger/gui/css/slider/slider.css | 140 +++ .../apache/giraph/debugger/gui/css/valpanel.css | 72 ++ .../org/apache/giraph/debugger/gui/index.html | 319 +++++++ .../apache/giraph/debugger/gui/js/debugger.js | 870 +++++++++++++++++++ .../giraph/debugger/gui/js/editor.core.js | 634 ++++++++++++++ .../giraph/debugger/gui/js/editor.utils.js | 791 +++++++++++++++++ .../debugger/gui/js/slider/bootstrap-slider.js | 394 +++++++++ .../org/apache/giraph/debugger/gui/js/utils.js | 256 ++++++ .../debugger/gui/js/utils.sampleGraphs.js | 25 + .../apache/giraph/debugger/gui/js/valpanel.js | 430 +++++++++ .../debugger/mock/ComputeSetUpFuncTemplate.vm | 14 + .../debugger/mock/ComputeTestFuncTemplate.vm | 55 ++ .../giraph/debugger/mock/ComputeTestTemplate.vm | 41 + .../debugger/mock/MasterComputeTestTemplate.vm | 57 ++ .../mock/ReadWritableFromByteArrayTemplate.vm | 16 + .../mock/ReadWritableFromStringTemplate.vm | 16 + .../giraph/debugger/mock/TestGraphTemplate.vm | 10 + .../test/basecompute/BaseComputation.java | 61 ++ .../test/basecompute/CommonDebugConfig.java | 36 + .../test/basecompute/DerivedComputation.java | 99 +++ .../test/basecompute/package-info.java | 22 + 123 files changed, 17270 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/giraph/blob/8675c84a/CHANGELOG ---------------------------------------------------------------------- diff --git a/CHANGELOG b/CHANGELOG index 69618fd..7b54584 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,8 @@ Giraph Change Log Release 1.2.0 - unreleased + GIRAPH-905: Giraph Debugger (netj via edunov) + GIRAPH-966: Add a way to ignore some thread exceptions (majakabiljo) GIRAPH-964: Remove quotes from output partition specification in hive-io (majakabiljo) http://git-wip-us.apache.org/repos/asf/giraph/blob/8675c84a/giraph-core/src/main/java/org/apache/giraph/master/MasterCompute.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/master/MasterCompute.java b/giraph-core/src/main/java/org/apache/giraph/master/MasterCompute.java index eb4144a..85496c2 100644 --- a/giraph-core/src/main/java/org/apache/giraph/master/MasterCompute.java +++ b/giraph-core/src/main/java/org/apache/giraph/master/MasterCompute.java @@ -252,15 +252,15 @@ public abstract class MasterCompute serviceMaster.getJobProgressTracker().logInfo(line); } - final void setGraphState(GraphState graphState) { + public final void setGraphState(GraphState graphState) { this.graphState = graphState; } - final void setMasterService(CentralizedServiceMaster serviceMaster) { + public final void setMasterService(CentralizedServiceMaster serviceMaster) { this.serviceMaster = serviceMaster; } - final void setSuperstepClasses(SuperstepClasses superstepClasses) { + public final void setSuperstepClasses(SuperstepClasses superstepClasses) { this.superstepClasses = superstepClasses; } } http://git-wip-us.apache.org/repos/asf/giraph/blob/8675c84a/giraph-core/src/main/java/org/apache/giraph/worker/WorkerAggregatorDelegator.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/worker/WorkerAggregatorDelegator.java b/giraph-core/src/main/java/org/apache/giraph/worker/WorkerAggregatorDelegator.java index 6472850..1b2e749 100644 --- a/giraph-core/src/main/java/org/apache/giraph/worker/WorkerAggregatorDelegator.java +++ b/giraph-core/src/main/java/org/apache/giraph/worker/WorkerAggregatorDelegator.java @@ -69,7 +69,7 @@ public abstract class WorkerAggregatorDelegator<I extends WritableComparable, } @Override - public final <A extends Writable> A getAggregatedValue(String name) { + public <A extends Writable> A getAggregatedValue(String name) { AggregatorBroadcast<A> broadcast = workerGlobalCommUsage.getBroadcast(name); return broadcast.getValue(); } http://git-wip-us.apache.org/repos/asf/giraph/blob/8675c84a/giraph-debugger/.gitignore ---------------------------------------------------------------------- diff --git a/giraph-debugger/.gitignore b/giraph-debugger/.gitignore new file mode 100644 index 0000000..c2a737f --- /dev/null +++ b/giraph-debugger/.gitignore @@ -0,0 +1,8 @@ +# Maven stuffs +/target + +# Eclipse files +/.classpath +/.project +/.settings +/.checkstyle http://git-wip-us.apache.org/repos/asf/giraph/blob/8675c84a/giraph-debugger/README.md ---------------------------------------------------------------------- diff --git a/giraph-debugger/README.md b/giraph-debugger/README.md new file mode 100644 index 0000000..4e62041 --- /dev/null +++ b/giraph-debugger/README.md @@ -0,0 +1,139 @@ +# Giraph Debugger + +## Overview +Graft is a debugging and testing tool for programs written for [Apache Giraph](https://giraph.apache.org/). In particular Graft helps users find bugs in their [_Computation.compute()_](http://giraph.apache.org/apidocs/org/apache/giraph/graph/Computation.html) and [_Master.compute()_](https://giraph.apache.org/giraph-core/apidocs/org/apache/giraph/master/MasterCompute.html) methods that result in an incorrect computation being made on the graph, such as incorrect messages being sent between vertices, vertices being assigned incorrect vertex values, or aggregators being updating in an incorrect way. Graft is NOT designed for identifying performance bottlenecks in Giraph programs or Giraph itself. For more information, visit [Graft's wiki](https://github.com/semihsalihoglu/graft/wiki). + + + +## Quick Start + +### Installing Graft + +#### Get Prerequisites +The following are required to build and run Graft: + +* Protocol Buffers +* JDK 7 +* Maven 3 +* Git + +Please check the [wiki page for detailed installation instructions](https://github.com/semihsalihoglu/graft/wiki/Installing-Graft). + +Make sure everything required for Giraph is also installed, such as: + +* Hadoop + + +#### Get Giraph Trunk +Graft must be built as a module for Giraph trunk, so let's grab a copy of it: +```bash +git clone https://github.com/apache/giraph.git -b trunk +cd giraph +mvn -DskipTests --projects ./giraph-core install +``` + +#### Get Graft under Giraph, Build and Install It +Get a copy of Graft as giraph-debugger module in Giraph trunk: +```bash +git clone https://github.com/semihsalihoglu/graft.git giraph-debugger +cd giraph-debugger +mvn -DskipTests compile +``` + +Add current directory to PATH, so we can easily run giraph-debug later: +```bash +PATH=$PWD:$PATH +``` +You can add the line to your shell configuration. For example, if you use bash: +```bash +echo PATH=$PWD:\$PATH >>~/.bash_profile +``` +Now, let's debug an example Giraph job. + + +### Launching Giraph Jobs with Graft + +#### Download a Sample Graph +Before we move on, let's download a small sample graph: +```bash +curl -L http://ece.northwestern.edu/~aching/shortestPathsInputGraph.tar.gz | tar xfz - +hadoop fs -put shortestPathsInputGraph shortestPathsInputGraph +``` +You must have your system configured to use a Hadoop cluster, or run one on your local machine with the following command: +```bash +start-all.sh +``` + +#### Launch Giraph's Shortest Path Example +Next, let's compile the giraph-examples module: +```bash +cd ../giraph-examples +mvn -DskipTests compile +``` + +Here's how you would typically launch a Giraph job with GiraphRunner class (the simple shortest paths example): +```bash +hadoop jar \ + target/giraph-examples-*.jar org.apache.giraph.GiraphRunner \ + org.apache.giraph.examples.SimpleShortestPathsComputation \ + -vif org.apache.giraph.io.formats.JsonLongDoubleFloatDoubleVertexInputFormat \ + -vip shortestPathsInputGraph \ + -vof org.apache.giraph.io.formats.IdWithValueTextOutputFormat \ + -op shortestPathsOutputGraph.$RANDOM \ + -w 1 \ + -ca giraph.SplitMasterWorker=false \ + # +``` + +#### Launch It in Debugging Mode with Graft +Now, you can launch the Giraph job in debugging mode by simply replacing the first two words (`hadoop jar`) of the command with `giraph-debug`: + +```bash +giraph-debug \ + target/giraph-examples-*.jar org.apache.giraph.GiraphRunner \ + org.apache.giraph.examples.SimpleShortestPathsComputation \ + # ... rest are the same as above +``` +Find the job identifier from the output, e.g., `job_201405221715_0005` and copy it for later. + +You can optionally specify the supersteps and vertex IDs to debug: +```bash +giraph-debug -S{0,1,2} -V{1,2,3,4,5} -S 2 \ + target/giraph-examples-*.jar org.apache.giraph.GiraphRunner \ + org.apache.giraph.examples.SimpleShortestPathsComputation \ + # ... +``` + +### Accessing Captured Debug Traces with Graft + +#### Launch Debugger GUI +Launch the debugger GUI with the following command: +```bash +giraph-debug gui +``` +Then open <http://localhost:8000> from your web browser, and paste the job ID to browse it after the job has finished. + +If necessary, you can specify a different port number when you launch the GUI. +```bash +giraph-debug gui 12345 +``` + +#### Or, Stay on the Command-line to Debug +You can access all information that has been recorded by the debugging Giraph job using the following commands. + +##### List Recorded Traces +```bash +giraph-debug list +giraph-debug list job_201405221715_0005 +``` + +##### Dump a Trace +```bash +giraph-debug dump job_201405221715_0005 0 6 +``` + +##### Generate JUnit Test Case Code from a Trace +```bash +giraph-debug mktest job_201405221715_0005 0 6 Test_job_201405221715_0005_S0_V6 +giraph-debug mktest-master job_201405221715_0005 0 TestMaster_job_201405221715_0005_S0 +``` http://git-wip-us.apache.org/repos/asf/giraph/blob/8675c84a/giraph-debugger/giraph-debug ---------------------------------------------------------------------- diff --git a/giraph-debugger/giraph-debug b/giraph-debugger/giraph-debug new file mode 100644 index 0000000..53a7c93 --- /dev/null +++ b/giraph-debugger/giraph-debug @@ -0,0 +1,376 @@ +#!/usr/bin/env bash +# giraph-debug -- a script for launching Giraph jar with our debugger +# +# To debug your Giraph computation, simply run: +# +# giraph-debug [DEBUG_OPTIONS] [DEBUG_CONFIG_CLASS] \ +# JAR_FILE org.apache.giraph.GiraphRunner [HADOOP_OPTS] \ +# COMPUTATION_CLASS GIRAPH_RUNNER_ARGS... +# +# Instead of running GiraphRunner with the hadoop jar command: +# +# hadoop jar \ +# JAR_FILE org.apache.giraph.GiraphRunner [HADOOP_OPTS] \ +# COMPUTATION_CLASS GIRAPH_RUNNER_ARGS... +# +# DEBUG_OPTIONS can be a set of the following options: +# -S SUPERSTEP_NO To debug only the given supersteps +# -V VERTEX_ID To debug only the given vertices +# -R # To debug a certain number of random vertices +# -N To also debug the neighbors of the given vertices +# -E To disable the exceptions from being captured +# -m # To limit the maximum number of captured vertices +# -M # To limit the maximum number of captured violations +# -C CLASS Name of Computation classes to debug +# (if MasterCompute uses many) +# -f Force instrumentation, don't use cached one +# +# For VERTEX_ID, only LongWritable and IntWritable are supported. All +# supersteps will be captured if none were specified, and only the specified +# vertices will be captured. +# +# If the DEBUG_OPTIONS are insufficient, a custom code that can specify more +# complex conditions for capturing traces can be written and passed as +# DEBUG_CONFIG_CLASS, which extends +# org.apache.giraph.debugger.DebugConfig. +# +# By default all trace data for debugging will be stored under +# /giraph-debug-trace/ at HDFS. To change this path set the environment +# variable TRACE_ROOT to the desired path. +# +# +# To list available traces for a Giraph job, run the following command: +# +# giraph-debug list JOB_ID +# +# It will show a list of TRACE_IDs. +# +# +# To browse what has been captured in an individual trace, run: +# +# giraph-debug dump JOB_ID SUPERSTEP VERTEX_ID +# +# +# To generate a JUnit test case for a vertex Computation from a trace, run: +# +# giraph-debug mktest JOB_ID SUPERSTEP VERTEX_ID TEST_NAME +# +# To generate a JUnit test case for a MasterCompute from a trace, run: +# +# giraph-debug mktest-master JOB_ID SUPERSTEP TEST_NAME +# +# It will generate TEST_NAME.java and other necessary files as TEST_NAME.*. +# +# +# To launch the debugger GUI, run: +# +# giraph-debug gui [PORT] +# +# and open the URL in your web browser. +# +# +# Author: Jaeho Shin <[email protected]> +# Created: 2014-05-09 +set -eu + +# some defaults +: ${TRACE_ROOT:=/user/$USER/giraph-debug-traces} # HDFS path to where the traces are stored +: ${CLASSNAME_SUFFIX:=Original} # A suffix for user computation class used by instrumenter +: ${JARCACHE_HDFS:=$TRACE_ROOT/jars} # HDFS path to where the jars are cached +: ${JARCACHE_LOCAL:=~/.giraph-debugger/jars} # local path to where the jars are cached +DEFAULT_DEBUG_CONFIG=org.apache.giraph.debugger.DebugConfig + +msg() { echo >&2 "giraph-debug:" "$@"; } +error() { local msg=; for msg; do echo >&2 "$msg"; done; false; } +usage() { + sed -n '2,/^#$/ s/^# //p' <"$0" + [ $# -eq 0 ] || error "$@" +} + +# show usage unless we have enough arguments +if [ $# -lt 1 ]; then + usage + exit 1 +fi + +Here=$(cd "$(dirname "$0")" && pwd -P) +cps=("$Here"/target/giraph-debugger-*-jar-with-dependencies.jar) +[ -e "${cps[0]}" ] || cps=("$Here"/target/classes) +CLASSPATH="${CLASSPATH:+$CLASSPATH:}$(IFS=:; echo "${cps[*]}"):$(hadoop classpath)" +javaOpts=( + -D"giraph.debugger.traceRootAtHDFS=$TRACE_ROOT" # pass the TRACE_ROOT at HDFS + -D"giraph.debugger.jarCacheLocal=$JARCACHE_LOCAL" + -D"giraph.debugger.jarCacheAtHDFS=$JARCACHE_HDFS" +) +exec_java() { exec java -cp "$CLASSPATH" "${javaOpts[@]}" "$@"; } +exec_java_command_line() { + local jobId=${2:-} + if [ -n "$jobId" ] && + jarFileSig=$(hadoop fs -cat "$TRACE_ROOT"/"$jobId"/jar.signature); then + # get a copy of the job's jar in local cache if necessary + mkdir -p "$JARCACHE_LOCAL" + jarFileCachedLocal="$JARCACHE_LOCAL"/"$jarFileSig".jar + [ -e "$jarFileCachedLocal" ] || + hadoop fs -get "$JARCACHE_HDFS"/"$jarFileSig".jar "$jarFileCachedLocal" + CLASSPATH="$CLASSPATH:$jarFileCachedLocal" + fi + exec_java org.apache.giraph.debugger.CommandLine "$@" +} + +# handle modes other than launching GiraphJob first +case $1 in + gui) + GUI_PORT=${2:-8000} + msg "Starting Debugger GUI at http://$HOSTNAME:$GUI_PORT/" + exec_java \ + -D"giraph.debugger.guiPort=$GUI_PORT" \ + org.apache.giraph.debugger.gui.Server + ;; + + ls|list) + shift + if [ $# -gt 0 ]; then + JobId=$1; shift + exec_java_command_line list \ + "$JobId" "$@" + else + set -o pipefail + hadoop fs -ls "$TRACE_ROOT" | + grep -v "$JARCACHE_HDFS" | + tail -n +2 | sed 's:.*/:list :' + exit $? + fi + ;; + + dump|mktest) + Mode=$1; shift + [ $# -gt 0 ] || usage "JOB_ID is missing" + JobId=$1 + [ $# -gt 1 ] || usage "SUPERSTEP is missing" + Superstep=$2 + [ $# -gt 2 ] || usage "VERTEX_ID is missing" + VertexId=$3 + case $Mode in + mktest*) + [ $# -gt 3 ] || usage "TEST_NAME prefix for output is missing" + TestName=$4 + esac + exec_java_command_line $Mode "$@" + ;; + + dump-master|mktest-master) + Mode=$1; shift + [ $# -gt 0 ] || usage "JOB_ID is missing" + JobId=$1 + [ $# -gt 1 ] || usage "SUPERSTEP is missing" + Superstep=$2 + case $Mode in + mktest*) + [ $# -gt 2 ] || usage "TEST_NAME prefix for output is missing" + TestName=$3 + esac + exec_java_command_line $Mode "$@" + ;; + + + *) + # otherwise, instrument and launch the job +esac + +# parse options first +SuperstepsToDebug=() +VerticesToDebug=() +ComputationClasses=() +NoDebugNeighbors=true +CaptureExceptions=true +UseCachedJars=true +NumVerticesToLog= +NumViolationsToLog= +NumRandomVerticesToDebug= +while getopts "S:V:C:NEm:M:R:f" o; do + case $o in + S) SuperstepsToDebug+=("$OPTARG") ;; + V) VerticesToDebug+=("$OPTARG") ;; + C) ComputationClasses+=("$OPTARG") ;; + N) NoDebugNeighbors=false ;; + E) CaptureExceptions=false ;; + f) UseCachedJars=false ;; + m) NumVerticesToLog=$OPTARG ;; + M) NumViolationsToLog=$OPTARG ;; + R) NumRandomVerticesToDebug=$OPTARG ;; + *) + error "$o: Unrecognized option" + esac +done +shift $(($OPTIND - 1)) + +# parse arguments +[ $# -gt 2 ] || + usage "giraph-debug $1: Unrecognized mode" +debugConfigClassName=$1; shift +if [ -f "$debugConfigClassName" ]; then + # the DebugConfig class name is optional, and + # we use the default DebugConfig if the first argument seems to be a jar file + jarFile=$debugConfigClassName + debugConfigClassName=$DEFAULT_DEBUG_CONFIG +else + jarFile=$1; shift + [ -f "$jarFile" ] || + error "$jarFile: Not an existing jar file" +fi +giraphRunnerClass=$1 +case $giraphRunnerClass in + org.apache.giraph.GiraphRunner) ;; + *) + error \ + "Error: Unrecognized way to start Giraph job: $giraphRunnerClass" \ + "" \ + "Only the following form is supported:" \ + " giraph-debug [DEBUG_OPTIONS] [DEBUG_CONFIG_CLASS] JAR_FILE org.apache.giraph.GiraphRunner COMPUTATION_CLASS GIRAPH_RUNNER_ARG..." \ + # +esac +# skip hadoop jar options +hadoopJarOpts=( + $giraphRunnerClass + "${javaOpts[@]}" +) +while shift; do + case $1 in + -conf|-D|-fs|-jt|-files|-libjars|-archives) + hadoopJarOpts+=("$1"); shift ;; + -D?*) ;; + *) break + esac + hadoopJarOpts+=("$1") +done +origClassName=$1; shift + +# parse GiraphRunner arguments to find if there's a MasterCompute class +find_master_compute() { + while [ $# -gt 0 ]; do + case $1 in + -mc) shift; + echo "$1" + return + ;; + *) shift 2 # XXX assuming other GiraphRunner options always have arguments + esac + done +} +masterComputeClassName=$(find_master_compute "$@") + +# pass DebugConfig options via GiraphRunner's -ca (custom argument) options +# the class name for debug configuration +set -- "$@" -ca "giraph.debugger.configClass=$debugConfigClassName" +# superstepsToDebug +[ ${#SuperstepsToDebug[@]} -eq 0 ] || + set -- "$@" -ca "giraph.debugger.superstepsToDebug=$(IFS=:; echo "${SuperstepsToDebug[*]}")" +# verticesToDebug +if [ ${#VerticesToDebug[@]} -gt 0 ]; then + set -- "$@" -ca "giraph.debugger.debugAllVertices=false" \ + -ca "giraph.debugger.verticesToDebug=$(IFS=:; echo "${VerticesToDebug[*]}")" +elif [ x"$debugConfigClassName" = x"$DEFAULT_DEBUG_CONFIG" ]; then + # debug all vertices if none were specified and default DebugConfig is being used + set -- "$@" -ca "giraph.debugger.debugAllVertices=true" +fi +[ -z "$NumRandomVerticesToDebug" ] || + set -- "$@" -ca "giraph.debugger.debugAllVertices=false" \ + -ca "giraph.debugger.numRandomVerticesToDebug=$NumRandomVerticesToDebug" +# debugNeighbors +$NoDebugNeighbors || + set -- "$@" -ca "giraph.debugger.debugNeighbors=true" +# don't capture exceptions +$CaptureExceptions || + set -- "$@" -ca "giraph.debugger.captureExceptions=false" +# limit number of captures +[ -z "$NumVerticesToLog" ] || + set -- "$@" -ca "giraph.debugger.numVerticesToLog=$NumVerticesToLog" +[ -z "$NumViolationsToLog" ] || + set -- "$@" -ca "giraph.debugger.numViolationsToLog=$NumViolationsToLog" + +# set up environment +export HADOOP_CLASSPATH="${HADOOP_CLASSPATH:+$HADOOP_CLASSPATH:}$jarFile" + +# first, instrument the given class +jarFileSig=$( + { + echo "$origClassName" + echo "$masterComputeClassName" + cat "$jarFile" + } | (sha1sum || shasum) 2>/dev/null +) +jarFileSig=${jarFileSig%%[[:space:]]*} +instrumentedClassName="$origClassName" +instrumentedJarFileCached="$JARCACHE_LOCAL/$jarFileSig-instrumented.jar" +if $UseCachedJars && [ "$instrumentedJarFileCached" -nt "$jarFile" ] && + [ "$instrumentedJarFileCached" -nt "${cps[0]}" ]; then + # pick up the previously instrumented jar + instrumentedJarFile=$instrumentedJarFileCached + msg "Using previously instrumented jar: $instrumentedJarFile" +else + tmpDir=$(mktemp -d "${TMPDIR:-/tmp}/giraph-debug.XXXXXX") + trap 'rm -rf "$tmpDir"' EXIT + instrumentedJarFile="$tmpDir/$(basename "$jarFile" .jar)-instrumented.jar" + instrumenterArgs=("$origClassName" "$tmpDir"/classes.instrumented $masterComputeClassName) + [ ${#ComputationClasses[@]} -eq 0 ] || instrumenterArgs+=("${ComputationClasses[@]}") + java -cp "$HADOOP_CLASSPATH${CLASSPATH:+:$CLASSPATH}" \ + -D"giraph.debugger.classNameSuffix=$CLASSNAME_SUFFIX" \ + org.apache.giraph.debugger.instrumenter.InstrumentGiraphClasses \ + "${instrumenterArgs[@]}" + + # And, create a new jar that contains all the instrumented code + msg "Creating instrumented jar: $instrumentedJarFile" + # (To make sure giraph-debugger classes are included in the final + # instrumented jar, we update giraph-debugger jar with user's jar contents + # and the instrumented code.) + if [ -d "${cps[0]}" ]; then + jar cf "$instrumentedJarFile" "${cps[0]}" + else + cp -f "${cps[0]}" "$instrumentedJarFile" + fi + # To embed giraph-debugger classes, we need to extract user's jar. + # TODO This is very inefficient. We should definitely figure out how to send + # multiple jars without manipulating them. + ( + mkdir -p "$tmpDir"/classes.orig + jarFile="$(cd "$(dirname "$jarFile")" && pwd)/$(basename "$jarFile")" + cd "$tmpDir"/classes.orig/ + jar xf "$jarFile" + ) + jar uf "$instrumentedJarFile" -C "$tmpDir"/classes.orig . + jar uf "$instrumentedJarFile" -C "$tmpDir"/classes.instrumented . + # cache the instrumentedJarFile for repeated debugging + ( set +e + msg "Caching instrumented jar: $instrumentedJarFileCached" + mkdir -p "$(dirname "$instrumentedJarFileCached")" + cp -f "$instrumentedJarFile" "$instrumentedJarFileCached" + ) +fi +runJar=$instrumentedJarFile +# TODO can we create a thin new jar and send it with -libjars to shadow the original classes? +#jar cf "$instrumentedJarFile" -C "$tmpDir"/classes . +#runJar=$jarFile +#hadoopJarOpts+=(-libjars "$instrumentedJarFile") + +# keep the submitted jar file around, in order to read the captured traces later +jarFileCachedLocal="$JARCACHE_LOCAL"/"$jarFileSig".jar +jarFileCachedHDFS="$JARCACHE_HDFS"/"$jarFileSig".jar +msg "Caching job jar locally: $jarFileCachedLocal" +[ -e "$jarFileCachedLocal" ] || { + mkdir -p "$(dirname "$jarFileCachedLocal")" + ln -f "$jarFile" "$jarFileCachedLocal" 2>/dev/null || + cp -f "$jarFile" "$jarFileCachedLocal" +} +msg "Caching job jar at HDFS: $jarFileCachedHDFS" +hadoop fs -test -e "$jarFileCachedHDFS" || { + hadoop fs -mkdir "$(dirname "$jarFileCachedHDFS")" 2>/dev/null || true + hadoop fs -put "$jarFile" "$jarFileCachedHDFS" +} +# let AbstractInterceptingComputation record the jar signature under the job trace dir +hadoopJarOpts+=(-D"giraph.debugger.jarSignature=$jarFileSig") + +# submit a job to run the new instrumented jar with the original +HADOOP_CLASSPATH="$runJar:$HADOOP_CLASSPATH" \ +exec \ +hadoop jar "$runJar" "${hadoopJarOpts[@]}" \ + "$instrumentedClassName" "$@" http://git-wip-us.apache.org/repos/asf/giraph/blob/8675c84a/giraph-debugger/gui ---------------------------------------------------------------------- diff --git a/giraph-debugger/gui b/giraph-debugger/gui new file mode 100644 index 0000000..16e1b3c --- /dev/null +++ b/giraph-debugger/gui @@ -0,0 +1 @@ +src/main/resources/org/apache/giraph/debugger/gui \ No newline at end of file http://git-wip-us.apache.org/repos/asf/giraph/blob/8675c84a/giraph-debugger/pom.xml ---------------------------------------------------------------------- diff --git a/giraph-debugger/pom.xml b/giraph-debugger/pom.xml new file mode 100644 index 0000000..bc66635 --- /dev/null +++ b/giraph-debugger/pom.xml @@ -0,0 +1,222 @@ +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 + http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <groupId>org.apache.giraph</groupId> + <artifactId>giraph-parent</artifactId> + <version>1.2.0-SNAPSHOT</version> + </parent> + <artifactId>giraph-debugger</artifactId> + <packaging>jar</packaging> + + <name>Apache Giraph Debugger</name> + <url>https://github.com/semihsalihoglu/graft</url> + <description>A Debugger for Giraph</description> + + <properties> + <top.dir>${project.basedir}/..</top.dir> + <project.build.targetJdk>1.7</project.build.targetJdk> + + <dep.commons-lang.version>2.4</dep.commons-lang.version> + <dep.protobuf.version>2.5.0</dep.protobuf.version> + <dep.javassist.version>3.18.1-GA</dep.javassist.version> + <dep.velocity.version>1.7</dep.velocity.version> + + <!-- already declared in Giraph parent POM + <dep.commons-cli.version>1.2</dep.commons-cli.version> + <dep.commons-collections.version>3.2.1</dep.commons-collections.version> + <dep.commons-io.version>2.1</dep.commons-io.version> + <dep.guava.version>14.0.1</dep.guava.version> + <dep.json.version>20090211</dep.json.version> + <dep.junit.version>4.11</dep.junit.version> + <dep.log4j.version>1.2.17</dep.log4j.version> + --> + </properties> + + <build> + <finalName>giraph-debugger-${project.version}-${forHadoop}</finalName> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-assembly-plugin</artifactId> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-checkstyle-plugin</artifactId> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-enforcer-plugin</artifactId> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-jar-plugin</artifactId> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-javadoc-plugin</artifactId> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-site-plugin</artifactId> + <configuration> + <siteDirectory>${project.basedir}/src/site</siteDirectory> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-source-plugin</artifactId> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-plugin</artifactId> + <version>2.6</version> + <configuration> + <systemProperties> + <property> + <name>prop.jarLocation</name> + <value>${project.jar}</value> + </property> + </systemProperties> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.rat</groupId> + <artifactId>apache-rat-plugin</artifactId> + </plugin> + <plugin> + <artifactId>maven-antrun-plugin</artifactId> + <executions> + <execution> + <id>generate-sources</id> + <phase>generate-sources</phase> + <configuration> + <tasks> + <mkdir dir="target/generated-sources"/> + <exec executable="protoc"> + <arg value="--java_out=target/generated-sources"/> + <arg value="--proto_path=src/main/protobuf/"/> + <arg value="src/main/protobuf/giraph_aggregator.proto"/> + <arg value="src/main/protobuf/scenario.proto"/> + <arg value="src/main/protobuf/integrity.proto"/> + <!-- mvn compile assembly:single; --> + </exec> + </tasks> + <sourceRoot>target/generated-sources</sourceRoot> + </configuration> + <goals> + <goal>run</goal> + </goals> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>findbugs-maven-plugin</artifactId> + <version>2.5.1</version> + <configuration> + <xmlOutput>true</xmlOutput> + <findbugsXmlOutput>false</findbugsXmlOutput> + <excludeFilterFile>${top.dir}/findbugs-exclude.xml</excludeFilterFile> + </configuration> + <executions> + <execution> + <phase>verify</phase> + <goals> + <goal>check</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> + + <dependencies> + <dependency> + <groupId>org.apache.giraph</groupId> + <artifactId>giraph-core</artifactId> + </dependency> + <dependency> + <groupId>log4j</groupId> + <artifactId>log4j</artifactId> + <version>${dep.log4j.version}</version> + </dependency> + <dependency> + <groupId>commons-lang</groupId> + <artifactId>commons-lang</artifactId> + <version>${dep.commons-lang.version}</version> + </dependency> + <dependency> + <groupId>commons-io</groupId> + <artifactId>commons-io</artifactId> + <version>${dep.commons-io.version}</version> + </dependency> + <dependency> + <groupId>commons-collections</groupId> + <artifactId>commons-collections</artifactId> + <version>${dep.commons-collections.version}</version> + </dependency> + <dependency> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + <version>${dep.guava.version}</version> + </dependency> + <dependency> + <groupId>com.google.protobuf</groupId> + <artifactId>protobuf-java</artifactId> + <version>${dep.protobuf.version}</version> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>${dep.junit.version}</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.velocity</groupId> + <artifactId>velocity</artifactId> + <version>${dep.velocity.version}</version> + </dependency> + <dependency> + <groupId>org.json</groupId> + <artifactId>json</artifactId> + <version>${dep.json.version}</version> + </dependency> + <dependency> + <groupId>org.javassist</groupId> + <artifactId>javassist</artifactId> + <version>${dep.javassist.version}</version> + </dependency> + <dependency> + <groupId>org.mockito</groupId> + <artifactId>mockito-core</artifactId> + <scope>test</scope> + </dependency> + </dependencies> +</project> http://git-wip-us.apache.org/repos/asf/giraph/blob/8675c84a/giraph-debugger/src/main/assembly/compile.xml ---------------------------------------------------------------------- diff --git a/giraph-debugger/src/main/assembly/compile.xml b/giraph-debugger/src/main/assembly/compile.xml new file mode 100644 index 0000000..b2e620f --- /dev/null +++ b/giraph-debugger/src/main/assembly/compile.xml @@ -0,0 +1,39 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.0 http://maven.apache.org/xsd/assembly-1.1.0.xsd"> + <id>jar-with-dependencies</id> + <formats> + <format>jar</format> + </formats> + <includeBaseDirectory>false</includeBaseDirectory> + + <dependencySets> + <dependencySet> + <useProjectArtifact>true</useProjectArtifact> + <outputDirectory>/</outputDirectory> + <unpackOptions> + <excludes> + <exclude>META-INF/LICENSE</exclude> + </excludes> + </unpackOptions> + <unpack>true</unpack> + <scope>runtime</scope> + </dependencySet> + </dependencySets> +</assembly> http://git-wip-us.apache.org/repos/asf/giraph/blob/8675c84a/giraph-debugger/src/main/java/org/apache/giraph/debugger/CommandLine.java ---------------------------------------------------------------------- diff --git a/giraph-debugger/src/main/java/org/apache/giraph/debugger/CommandLine.java b/giraph-debugger/src/main/java/org/apache/giraph/debugger/CommandLine.java new file mode 100644 index 0000000..357d626 --- /dev/null +++ b/giraph-debugger/src/main/java/org/apache/giraph/debugger/CommandLine.java @@ -0,0 +1,223 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.debugger; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import org.apache.giraph.debugger.gui.ServerUtils; +import org.apache.giraph.debugger.mock.ComputationComputeTestGenerator; +import org.apache.giraph.debugger.mock.MasterComputeTestGenerator; +import org.apache.giraph.debugger.utils.DebuggerUtils.DebugTrace; +import org.apache.giraph.debugger.utils.GiraphMasterScenarioWrapper; +import org.apache.giraph.debugger.utils.GiraphVertexScenarioWrapper; +import org.apache.log4j.Logger; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; + +/** + * This main class is the command line interface for the debugger. The command + * syntax is as follows: list <job_id> dump <job_id> <superstep> <vertex> mktest + * <job_id> <superstep> <vertex> [output_prefix] + */ +public final class CommandLine { + + /** + * Logger for this class. + */ + private static final Logger LOG = Logger.getLogger(CommandLine.class); + + /** + * Should not instantiate. + */ + private CommandLine() { + } + + /** + * Main function of the CommandLine. + * @param args command line arguments. + */ + public static void main(final String[] args) { + // Validate + String mode = args[0]; + if (args.length == 0 || !mode.equalsIgnoreCase("list") && + !mode.equalsIgnoreCase("dump") && !mode.equalsIgnoreCase("mktest") && + !mode.equalsIgnoreCase("dump-master") && + !mode.equalsIgnoreCase("mktest-master")) { + printHelp(); + } + + if (args.length <= 1) { + printHelp(); + } + + String jobId = args[1]; + + if (mode.equalsIgnoreCase("list")) { + try { + List<Long> superstepsDebuggedMaster = ServerUtils + .getSuperstepsMasterDebugged(jobId); + Set<Long> superstepsDebugged = Sets.newHashSet(ServerUtils + .getSuperstepsDebugged(jobId)); + superstepsDebugged.addAll(superstepsDebuggedMaster); + List<Long> allSupersteps = Lists.newArrayList(superstepsDebugged); + Collections.sort(allSupersteps); + for (Long superstepNo : allSupersteps) { + if (superstepsDebuggedMaster.contains(superstepNo)) { + LOG.info(String.format("%-15s %s %4d ", + "dump-master", jobId, superstepNo)); + LOG.info(String.format( + "%-15s %s %4d TestMaster_%s_S%d", "mktest-master", + jobId, superstepNo, jobId, superstepNo)); + } + List<DebugTrace> debugTraces = Arrays.asList( + DebugTrace.INTEGRITY_MESSAGE_SINGLE_VERTEX + , DebugTrace.INTEGRITY_VERTEX + , DebugTrace.VERTEX_EXCEPTION + , DebugTrace.VERTEX_REGULAR + ); + for (DebugTrace debugTrace : debugTraces) { + for (String vertexId : ServerUtils.getVerticesDebugged(jobId, + superstepNo, debugTrace)) { + LOG.info(String.format("%-15s %s %4d %8s # %s", "dump", + jobId, superstepNo, vertexId, debugTrace.getLabel() == null ? + "" : "captured " + debugTrace.getLabel())); + LOG.info(String.format( + "%-15s %s %4d %8s Test_%s_S%d_V%s", "mktest", jobId, + superstepNo, vertexId, jobId, superstepNo, vertexId)); + } + } + } + } catch (IOException e) { + e.printStackTrace(); + } + } else { + if (args.length <= 2) { + printHelp(); + } + + Long superstepNo = Long.parseLong(args[2]); + try { + if (mode.equalsIgnoreCase("dump") || mode.equalsIgnoreCase("mktest")) { + if (args.length <= 3) { + printHelp(); + } + String vertexId = args[3]; + // Read scenario. + // TODO: rename ServerUtils to Utils + @SuppressWarnings("rawtypes") + GiraphVertexScenarioWrapper scenarioWrapper = ServerUtils + .readScenarioFromTrace(jobId, superstepNo, vertexId, + DebugTrace.VERTEX_ALL); + if (scenarioWrapper == null) { + LOG.error("The trace file does not exist."); + System.exit(2); + } + + if (mode.equalsIgnoreCase("dump")) { + LOG.info(scenarioWrapper); + } else if (mode.equalsIgnoreCase("mktest")) { + // Read output prefix and test class. + if (args.length <= 4) { + printHelp(); + } + String outputPrefix = args[4].trim(); + String testClassName = new File(outputPrefix).getName(); + // Generate test case. + String generatedTestCase = new ComputationComputeTestGenerator() + .generateTest(scenarioWrapper, null, testClassName); + outputTestCase(outputPrefix, generatedTestCase); + } + } else if (mode.equalsIgnoreCase("dump-master") || + mode.equalsIgnoreCase("mktest-master")) { + GiraphMasterScenarioWrapper scenarioWrapper = ServerUtils + .readMasterScenarioFromTrace(jobId, superstepNo, + DebugTrace.MASTER_ALL); + if (scenarioWrapper == null) { + LOG.error("The trace file does not exist."); + System.exit(2); + } + + if (mode.equalsIgnoreCase("dump-master")) { + LOG.info(scenarioWrapper); + } else if (mode.equalsIgnoreCase("mktest-master")) { + if (args.length <= 3) { + printHelp(); + } + String outputPrefix = args[3].trim(); + String testClassName = new File(outputPrefix).getName(); + String generatedTestCase = new MasterComputeTestGenerator() + .generateTest(scenarioWrapper, null, testClassName); + outputTestCase(outputPrefix, generatedTestCase); + } + } else { + printHelp(); + } + } catch (ClassNotFoundException | InstantiationException | + IllegalAccessException | IOException e) { + e.printStackTrace(); + } + } + } + + /** + * Writes the generated test case to the specified output prefix. The output + * file name is {outputPrefix}.java. + * + * @param outputPrefix prefix of the output file + * @param generatedTestCase contents of the test case file + * @throws IOException + */ + protected static void outputTestCase(String outputPrefix, + String generatedTestCase) throws IOException { + if (outputPrefix != null) { + String filename = outputPrefix + ".java"; + try (PrintWriter writer = new PrintWriter(new FileWriter(new File( + filename)))) { + writer.append(generatedTestCase); + } + LOG.error("Wrote " + filename); + } else { + LOG.info(generatedTestCase); + } + } + + /** + * Help output when the given command by the user is not recognized. + */ + private static void printHelp() { + LOG.info("Supported commands: "); + LOG.info("\tlist <job_id>"); + LOG.info( + "\t\tList available traces/scenarios (supersteps/vertices) for a job"); + LOG.info("\tdump <job_id> <superstep> <vertex>"); + LOG.info("\t\tDump a trace in textual form"); + LOG.info("\tmktest <job_id> <superstep> <vertex> <output_prefix>"); + LOG.info("\t\tGenerate a JUnit test case code from a trace. If an " + + "output_prefix is provided, a .java file is generated at the " + + "specified path."); + System.exit(1); + } +} http://git-wip-us.apache.org/repos/asf/giraph/blob/8675c84a/giraph-debugger/src/main/java/org/apache/giraph/debugger/DebugConfig.java ---------------------------------------------------------------------- diff --git a/giraph-debugger/src/main/java/org/apache/giraph/debugger/DebugConfig.java b/giraph-debugger/src/main/java/org/apache/giraph/debugger/DebugConfig.java new file mode 100644 index 0000000..a822579 --- /dev/null +++ b/giraph-debugger/src/main/java/org/apache/giraph/debugger/DebugConfig.java @@ -0,0 +1,438 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.debugger; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Random; +import java.util.Set; + +import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.giraph.edge.Edge; +import org.apache.giraph.graph.Computation; +import org.apache.giraph.graph.Vertex; +import org.apache.giraph.utils.ReflectionUtils; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; + +/** + * This class is used by programmers to configure what they want to be debugged. + * Programmers can either extend this class and implement their own debug + * configurations or use a few hadoop config parameters to use this one. If + * programmers implement their own config, they can do the following: + * <ul> + * <li>Configure which vertices to debug by looking at the whole {@link Vertex} + * object. + * <li>Configure which supersteps to debug. + * <li>Add a message integrity constraint by setting + * {@link #shouldCheckMessageIntegrity()} to true and then overriding + * {@link #isMessageCorrect(WritableComparable, WritableComparable, Writable)}. + * <li>Add a vertex value integrity constraint by setting + * {@link #shouldCheckVertexValueIntegrity()} and then overriding + * {@link #isVertexValueCorrect(WritableComparable, Writable)}. + * </ul> + * + * If instead the programmers use this class without extending it, they can + * configure it as follows: + * <ul> + * <li>By passing -D{@link #VERTICES_TO_DEBUG_FLAG}=v1,v2,..,vn, specify a set + * of integer or long vertex IDs to debug. The {@link Computation} class has to + * have either a {@link LongWritable} or {@link IntWritable}. By default no + * vertices are debugged. + * <li>By passing -D{@link #DEBUG_NEIGHBORS_FLAG}=true/false specify whether the + * in-neighbors of vertices that were configured to be debugged should also be + * debugged. By default this flag is set to false. + * <li>By passing -D{@link #SUPERSTEPS_TO_DEBUG_FLAG}=s1,s2,...,sm specify a set + * of supersteps to debug. By default all supersteps are debugged. + * </ul> + * + * Note that if programmers use this class directly, then by default the + * debugger will capture exceptions. + * + * @param <I> + * Vertex id + * @param <V> + * Vertex data + * @param <E> + * Edge data + * @param <M1> + * Incoming message type + * @param <M2> + * Outgoing message type + */ +@SuppressWarnings({ "rawtypes" }) +public class DebugConfig<I extends WritableComparable, V extends Writable, + E extends Writable, M1 extends Writable, M2 extends Writable> { + + /** + * String constant for splitting the parameter specifying which + * supersteps should be debugged. + */ + private static String SUPERSTEP_DELIMITER = ":"; + /** + * String constant for splitting the parameter specifying which + * vertices should be debugged. + */ + private static final String VERTEX_ID_DELIMITER = ":"; + + /** + * String constant for specifying the subset of vertices to debug + * when the user chooses not to debug all vertices + */ + private static final String VERTICES_TO_DEBUG_FLAG = + "giraph.debugger.verticesToDebug"; + /** + * String constant for specifying whether the neighbors of specified + * vertices should be debugged. + */ + private static final String DEBUG_NEIGHBORS_FLAG = + "giraph.debugger.debugNeighbors"; + /** + * String constant for specifying the subset of supersteps to debug + * when the user chooses not to debug the vertices in all supersteps. + */ + private static final String SUPERSTEPS_TO_DEBUG_FLAG = + "giraph.debugger.superstepsToDebug"; + /** + * String constant for specifying whether exceptions should be captured. + */ + private static final String CATCH_EXCEPTIONS_FLAG = + "giraph.debugger.catchExceptions"; + /** + * String constant for specifying whether all vertices should be debugged. + */ + private static final String DEBUG_ALL_VERTICES_FLAG = + "giraph.debugger.debugAllVertices"; + /** + * String constant for specifying the maximum number of vertices to capture. + */ + private static final String NUM_VERTICES_TO_LOG = + "giraph.debugger.numVerticesToLog"; + /** + * String constant for specifying the maximum number of violations to capture. + */ + private static final String NUM_VIOLATIONS_TO_LOG = + "giraph.debugger.numViolationsToLog"; + /** + * String constant for specifying the number of vertices to randomly capture. + */ + private static final String NUM_RANDOM_VERTICES_TO_DEBUG = + "giraph.debugger.numRandomVerticesToDebug"; + + /** + * Stores the set of specified vertices to debug, when VERTICES_TO_DEBUG_FLAG + * is specified. + */ + private Set<I> verticesToDebugSet; + + /** + * The number of vertices to randomly capture for debugging. + */ + private int numRandomVerticesToDebug; + + /** + * Stores the set of specified supersteps to debug in, when + * SUPERSTEPS_TO_DEBUG_FLAG is specified. + */ + private Set<Long> superstepsToDebugSet; + /** + * Whether the user has specified to debug the neighbors of the vertices + * that have been specified to be debugged, i.e. whether DEBUG_NEIGHBORS_FLAG + * is set to true. + */ + private boolean debugNeighborsOfVerticesToDebug; + /** + * Whether the user has specified to debug all vertices, i.e., whether + * DEBUG_ALL_VERTICES_FLAG is set to true. + */ + private boolean debugAllVertices = false; + /** + * Maximum number of vertices to capture by each thread of every worker. + */ + private int numVerticesToLog; + /** + * Maximum number of violations to capture by each thread of every worker. + */ + private int numViolationsToLog; + /** + * Whether to capture exceptions or not. + */ + private boolean shouldCatchExceptions; + + /** + * Default public constructor. Configures not to debug any vertex in + * any superstep. But below {#link {@link #shouldCatchExceptions()} returns + * true by default, so configures Graft to only catch exceptions. + */ + public DebugConfig() { + verticesToDebugSet = null; + debugAllVertices = false; + debugNeighborsOfVerticesToDebug = false; + shouldCatchExceptions = false; + superstepsToDebugSet = null; + numVerticesToLog = 3; + numViolationsToLog = 3; + numRandomVerticesToDebug = 0; + } + + /** + * Configures this class through a {@link GiraphConfiguration}, which may + * contain some flags passed in by the user. + * @param config a {@link GiraphConfiguration} object. + * @param totalNumberOfVertices in the graph to use when picking a random + * number of vertices to capture. + * @param jobId id of the job to use as seed, when generating a number. + */ + public final void readConfig(GiraphConfiguration config, + long totalNumberOfVertices, int jobId) { + this.debugNeighborsOfVerticesToDebug = config.getBoolean( + DEBUG_NEIGHBORS_FLAG, false); + this.numRandomVerticesToDebug = config.getInt( + NUM_RANDOM_VERTICES_TO_DEBUG, 0); + + this.shouldCatchExceptions = config.getBoolean(CATCH_EXCEPTIONS_FLAG, true); + + String superstepsToDebugStr = config.get(SUPERSTEPS_TO_DEBUG_FLAG, null); + if (superstepsToDebugStr == null) { + superstepsToDebugSet = null; + } else { + String[] superstepsToDebugArray = superstepsToDebugStr + .split(SUPERSTEP_DELIMITER); + superstepsToDebugSet = new HashSet<>(); + for (String superstepStr : superstepsToDebugArray) { + superstepsToDebugSet.add(Long.valueOf(superstepStr)); + } + } + + debugAllVertices = config.getBoolean(DEBUG_ALL_VERTICES_FLAG, false); + if (!debugAllVertices) { + String verticesToDebugStr = config.get(VERTICES_TO_DEBUG_FLAG, null); + Class<? extends Computation> userComputationClass = config + .getComputationClass(); + Class<?>[] typeArguments = ReflectionUtils.getTypeArguments( + Computation.class, userComputationClass); + Class<?> idType = typeArguments[0]; + if (verticesToDebugStr != null) { + String[] verticesToDebugArray = verticesToDebugStr + .split(VERTEX_ID_DELIMITER); + this.verticesToDebugSet = new HashSet<>(); + for (String idString : verticesToDebugArray) { + insertIDIntoVerticesToDebugSetIfLongOrInt(idType, idString); + } + } + if (numberOfRandomVerticesToCapture() > 0) { + if (this.verticesToDebugSet == null) { + this.verticesToDebugSet = new HashSet<>(); + } + Random random = new Random(jobId); + for (int i = 0; i < numberOfRandomVerticesToCapture(); ++i) { + int totalNumberOfVerticesInInt = (int) totalNumberOfVertices; + if (totalNumberOfVerticesInInt < 0) { + totalNumberOfVerticesInInt = Integer.MAX_VALUE; + } + insertIDIntoVerticesToDebugSetIfLongOrInt(idType, + "" + random.nextInt(totalNumberOfVerticesInInt)); + } + } + } + + numVerticesToLog = config.getInt(NUM_VERTICES_TO_LOG, 3); + numViolationsToLog = config.getInt(NUM_VIOLATIONS_TO_LOG, 3); + + // LOG.debug("DebugConfig" + this); + } + + /** + * Add given string to the vertex set for debugging. + * + * @param idType type of vertex id + * @param idString string representation of the vertex to add + */ + @SuppressWarnings("unchecked") + private void insertIDIntoVerticesToDebugSetIfLongOrInt(Class<?> idType, + String idString) { + if (LongWritable.class.isAssignableFrom(idType)) { + verticesToDebugSet + .add((I) new LongWritable(Long.valueOf(idString))); + } else if (IntWritable.class.isAssignableFrom(idType)) { + verticesToDebugSet.add((I) new IntWritable(Integer + .valueOf(idString))); + } else { + throw new IllegalArgumentException( + "When using the giraph.debugger.verticesToDebug argument, the " + + "vertex IDs of the computation class needs to be LongWritable" + + " or IntWritable."); + } + } + + /** + * Whether vertices should be debugged in the specified superstep. + * @param superstepNo superstep number. + * @return whether the superstep should be debugged. + */ + public boolean shouldDebugSuperstep(long superstepNo) { + return superstepsToDebugSet == null || + superstepsToDebugSet.contains(superstepNo); + } + + /** + * @return the number of random vertices that Graft should capture. + */ + public int numberOfRandomVerticesToCapture() { + return numRandomVerticesToDebug; + } + + /** + * Whether the specified vertex should be debugged. + * @param vertex a vertex. + * @param superstepNo the superstep number. + * @return whether the vertex should be debugged. + */ + public boolean shouldDebugVertex(Vertex<I, V, E> vertex, long superstepNo) { + if (vertex.isHalted()) { + // If vertex has already halted before a superstep, we probably won't + // want to debug it. + return false; + } + if (debugAllVertices) { + return true; + } + // Should not debug all vertices. Check if any vertices were special cased. + if (verticesToDebugSet == null) { + return false; + } else { + if (superstepNo == 0 && debugNeighborsOfVerticesToDebug) { + // If it's the first superstep and we should capture neighbors + // of vertices, then we check if this vertex is a neighbor of a vertex + // that is already specified (or randomly picked). If so we add the + // vertex to the verticesToDebugSet. + addVertexToVerticesToDebugSetIfNeighbor(vertex); + } + return verticesToDebugSet.contains(vertex.getId()); + } + } + + /** + * Whether the given vertex is a neighbor of a vertex that has been + * configured to be debugged. If so then the given vertex will also + * be debugged. + * @param vertex a vertex. + */ + private void addVertexToVerticesToDebugSetIfNeighbor(Vertex<I, V, E> vertex) { + for (Edge<I, E> edge : vertex.getEdges()) { + if (verticesToDebugSet.contains(edge.getTargetVertexId())) { + // Add the vertex to the set to avoid scanning all edges multiple times. + verticesToDebugSet.add(vertex.getId()); + } + } + } + + /** + * @return whether exceptions should be caught. + */ + public boolean shouldCatchExceptions() { + return shouldCatchExceptions; + } + + /** + * @return whether message integrity constraints should be checked, i.e., + * whether Graft should call the {@link #isMessageCorrect(WritableComparable, + * WritableComparable, Writable)} method on this message. + */ + public boolean shouldCheckMessageIntegrity() { + return false; + } + + /** + * @param srcId source id of the message. + * @param dstId destination id of the message. + * @param message message sent between srcId and dstId. + * @param superstepNo executing superstep number. + * @return whether this message is correct, i.e, does not violate a + * constraint. + */ + public boolean isMessageCorrect(I srcId, I dstId, M1 message, + long superstepNo) { + return true; + } + + /** + * @return whether a vertex value integrity constraints should be checked, + * i.e., whether Graft should call the {@link #isVertexValueCorrect( + * WritableComparable, Writable) method on this vertex. + */ + public boolean shouldCheckVertexValueIntegrity() { + return false; + } + + /** + * @param vertexId id of the vertex. + * @param value value of the vertex. + * @return whether this vertex's value is correct, i.e, does not violate a + * constraint. + */ + public boolean isVertexValueCorrect(I vertexId, V value) { + return true; + } + + /** + * @return Maximum number of vertices to capture by each thread of every + * worker + */ + public int getNumberOfVerticesToLog() { + return numVerticesToLog; + } + + /** + * @return Maximum number of violations to capture by each thread of every + * worker + */ + public int getNumberOfViolationsToLog() { + return numViolationsToLog; + } + + /** + * Warning: This function should not be called by classes outside of + * org.apache.giraph.debugger package. + * @return verticesToDebugSet maintained by this DebugConfig. + */ + public Set<I> getVerticesToDebugSet() { + return verticesToDebugSet; + } + + @Override + public String toString() { + StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append("superstepsToDebug: " + + (superstepsToDebugSet == null ? "all supersteps" : Arrays + .toString(superstepsToDebugSet.toArray()))); + stringBuilder.append("verticesToDebug: " + + (verticesToDebugSet == null ? null : Arrays.toString(verticesToDebugSet + .toArray()))); + stringBuilder.append("debugNeighborsOfVerticesToDebug: " + + debugNeighborsOfVerticesToDebug); + stringBuilder.append("shouldCatchExceptions: " + shouldCatchExceptions()); + stringBuilder.append("shouldCheckMessageIntegrity: " + + shouldCheckMessageIntegrity()); + stringBuilder.append("shouldCheckVertexValueIntegrity: " + + shouldCheckVertexValueIntegrity()); + return stringBuilder.toString(); + } +} http://git-wip-us.apache.org/repos/asf/giraph/blob/8675c84a/giraph-debugger/src/main/java/org/apache/giraph/debugger/examples/BipartiteGraphInputFormat.java ---------------------------------------------------------------------- diff --git a/giraph-debugger/src/main/java/org/apache/giraph/debugger/examples/BipartiteGraphInputFormat.java b/giraph-debugger/src/main/java/org/apache/giraph/debugger/examples/BipartiteGraphInputFormat.java new file mode 100644 index 0000000..57fc970 --- /dev/null +++ b/giraph-debugger/src/main/java/org/apache/giraph/debugger/examples/BipartiteGraphInputFormat.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.debugger.examples; + +import java.io.IOException; +import java.util.List; + +import org.apache.giraph.edge.Edge; +import org.apache.giraph.edge.EdgeFactory; +import org.apache.giraph.io.formats.TextVertexInputFormat; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.json.JSONArray; +import org.json.JSONException; + +import com.google.common.collect.Lists; + +/** + * Input format that loads the Bipartite graph for matching where each vertex + * is a JSON array with three elements: vertex id, vertex value (ignored), and + * an array of neighbor vertex ids. + * + * @param <V> + */ +public class BipartiteGraphInputFormat<V extends Writable> + extends + TextVertexInputFormat<LongWritable, V, NullWritable> { + + @Override + public TextVertexReader createVertexReader(InputSplit split, + TaskAttemptContext context) throws IOException { + return new JsonBipartiteVertexReader(); + } + + /** + * A JSON vertex reader for the BipartiteGraphInputFormat. + */ + private class JsonBipartiteVertexReader + extends + TextVertexReaderFromEachLineProcessedHandlingExceptions<JSONArray, + JSONException> { + + @Override + protected JSONArray preprocessLine(Text line) throws JSONException, + IOException { + return new JSONArray(line.toString()); + } + + @Override + protected LongWritable getId(JSONArray jsonVertex) throws JSONException, + IOException { + return new LongWritable(jsonVertex.getLong(0)); + } + + @Override + protected V getValue(JSONArray jsonVertex) throws JSONException, + IOException { + // Ignoring jsonVertex.getJSONArray(1) + return null; + } + + @Override + protected Iterable<Edge<LongWritable, NullWritable>> getEdges( + JSONArray jsonVertex) throws JSONException, IOException { + JSONArray jsonEdgeArray = jsonVertex.getJSONArray(2); + List<Edge<LongWritable, NullWritable>> edges = Lists + .newArrayListWithCapacity(jsonEdgeArray.length()); + for (int i = 0; i < jsonEdgeArray.length(); ++i) { + long neighbor = jsonEdgeArray.getLong(i); + edges.add(EdgeFactory.create(new LongWritable(neighbor), + NullWritable.get())); + } + return edges; + } + + } + +} http://git-wip-us.apache.org/repos/asf/giraph/blob/8675c84a/giraph-debugger/src/main/java/org/apache/giraph/debugger/examples/BipartiteGraphOutputFormat.java ---------------------------------------------------------------------- diff --git a/giraph-debugger/src/main/java/org/apache/giraph/debugger/examples/BipartiteGraphOutputFormat.java b/giraph-debugger/src/main/java/org/apache/giraph/debugger/examples/BipartiteGraphOutputFormat.java new file mode 100644 index 0000000..a0d0acd --- /dev/null +++ b/giraph-debugger/src/main/java/org/apache/giraph/debugger/examples/BipartiteGraphOutputFormat.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.debugger.examples; + +import java.io.IOException; + +import org.apache.giraph.debugger.examples.bipartitematching.VertexValue; +import org.apache.giraph.edge.Edge; +import org.apache.giraph.graph.Vertex; +import org.apache.giraph.io.formats.TextVertexOutputFormat; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.json.JSONArray; + +/** + * Output format that dumps the Bipartite graph after matching where each + * vertex is a JSON array of three elements: vertex id, vertex value, and an + * array of neighbor vertex ids. + */ +public class BipartiteGraphOutputFormat + extends + TextVertexOutputFormat<LongWritable, + VertexValue, NullWritable> { + + @Override + public TextVertexWriter createVertexWriter(TaskAttemptContext context) { + return new BipartiteVertexWriter(); + } + + /** + * A JSON vertex writer for the BipartiteGraphOutputFormat. + */ + private class BipartiteVertexWriter extends TextVertexWriterToEachLine { + @Override + public Text convertVertexToLine( + Vertex<LongWritable, VertexValue, + NullWritable> vertex) + throws IOException { + JSONArray jsonVertex = new JSONArray(); + jsonVertex.put(vertex.getId().get()); + jsonVertex.put(vertex.getValue()); + JSONArray jsonEdgeArray = new JSONArray(); + for (Edge<LongWritable, NullWritable> edge : vertex.getEdges()) { + jsonEdgeArray.put(edge.getTargetVertexId().get()); + } + jsonVertex.put(jsonEdgeArray); + return new Text(jsonVertex.toString()); + } + } +} http://git-wip-us.apache.org/repos/asf/giraph/blob/8675c84a/giraph-debugger/src/main/java/org/apache/giraph/debugger/examples/TextAdjacencyListInputFormat.java ---------------------------------------------------------------------- diff --git a/giraph-debugger/src/main/java/org/apache/giraph/debugger/examples/TextAdjacencyListInputFormat.java b/giraph-debugger/src/main/java/org/apache/giraph/debugger/examples/TextAdjacencyListInputFormat.java new file mode 100644 index 0000000..4802c97 --- /dev/null +++ b/giraph-debugger/src/main/java/org/apache/giraph/debugger/examples/TextAdjacencyListInputFormat.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.debugger.examples; + +import java.io.IOException; +import java.util.List; +import java.util.regex.Pattern; + +import org.apache.giraph.edge.Edge; +import org.apache.giraph.edge.EdgeFactory; +import org.apache.giraph.io.formats.TextVertexInputFormat; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +import com.google.common.collect.Lists; + +/** + * Simple text-based {@link org.apache.giraph.io.VertexInputFormat} for + * unweighted graphs with long ids. + * + * Each line consists of: vertex neighbor1 neighbor2 ... + * + * @param <V> + */ +public class TextAdjacencyListInputFormat<V extends Writable> extends + TextVertexInputFormat<LongWritable, V, NullWritable> { + /** Separator of the vertex and neighbors */ + private static final Pattern SEPARATOR = Pattern.compile("[\t ]"); + + @Override + public TextVertexReader createVertexReader(InputSplit split, + TaskAttemptContext context) + throws IOException { + return new LongLongNullVertexReader(); + } + + /** + * Vertex reader associated with {@link TextAdjacencyListInputFormat}. + */ + public class LongLongNullVertexReader extends + TextVertexReaderFromEachLineProcessed<String[]> { + /** Cached vertex id for the current line */ + private LongWritable id; + + @Override + protected String[] preprocessLine(Text line) throws IOException { + String[] tokens = SEPARATOR.split(line.toString()); + id = new LongWritable(Long.parseLong(tokens[0])); + return tokens; + } + + @Override + protected LongWritable getId(String[] tokens) throws IOException { + return id; + } + + @Override + protected V getValue(String[] tokens) throws IOException { + return getConf().createVertexValue(); + } + + @Override + protected Iterable<Edge<LongWritable, NullWritable>> getEdges( + String[] tokens) throws IOException { + List<Edge<LongWritable, NullWritable>> edges = + Lists.newArrayListWithCapacity(tokens.length - 1); + for (int n = 1; n < tokens.length; n++) { + edges.add(EdgeFactory.create( + new LongWritable(Long.parseLong(tokens[n])))); + } + return edges; + } + } +} http://git-wip-us.apache.org/repos/asf/giraph/blob/8675c84a/giraph-debugger/src/main/java/org/apache/giraph/debugger/examples/TextAdjacencyListLongIDDoubleEdgeWeightInputFormat.java ---------------------------------------------------------------------- diff --git a/giraph-debugger/src/main/java/org/apache/giraph/debugger/examples/TextAdjacencyListLongIDDoubleEdgeWeightInputFormat.java b/giraph-debugger/src/main/java/org/apache/giraph/debugger/examples/TextAdjacencyListLongIDDoubleEdgeWeightInputFormat.java new file mode 100644 index 0000000..91b030b --- /dev/null +++ b/giraph-debugger/src/main/java/org/apache/giraph/debugger/examples/TextAdjacencyListLongIDDoubleEdgeWeightInputFormat.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.debugger.examples; + +import java.io.IOException; +import java.util.List; +import java.util.regex.Pattern; + +import org.apache.giraph.edge.Edge; +import org.apache.giraph.edge.EdgeFactory; +import org.apache.giraph.io.formats.TextVertexInputFormat; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +import com.google.common.collect.Lists; + +/** + * Simple text-based {@link org.apache.giraph.io.VertexInputFormat} for + * double weighted graphs with long ids. + * + * Each line consists of: vertex neighbor1 weight1 neighbor2 weight2 ... + * + * @param <V> + */ +public class TextAdjacencyListLongIDDoubleEdgeWeightInputFormat< + V extends Writable> extends TextVertexInputFormat< + LongWritable, V, DoubleWritable> { + /** Separator of the vertex and neighbors */ + private static final Pattern SEPARATOR = Pattern.compile("[\t ]"); + + @Override + public TextVertexReader createVertexReader(InputSplit split, + TaskAttemptContext context) + throws IOException { + return new LongIDDoubleEdgeWeightVertexReader(); + } + + /** + * Vertex reader associated with + * {@link TextAdjacencyListLongIDDoubleEdgeWeightInputFormat}. + */ + public class LongIDDoubleEdgeWeightVertexReader extends + TextVertexReaderFromEachLineProcessed<String[]> { + /** Cached vertex id for the current line */ + private LongWritable id; + + @Override + protected String[] preprocessLine(Text line) throws IOException { + String[] tokens = SEPARATOR.split(line.toString()); + id = new LongWritable(Long.parseLong(tokens[0])); + return tokens; + } + + @Override + protected LongWritable getId(String[] tokens) throws IOException { + return id; + } + + @Override + protected V getValue(String[] tokens) throws IOException { + return getConf().createVertexValue(); + } + + @Override + protected Iterable<Edge<LongWritable, DoubleWritable>> getEdges( + String[] tokens) throws IOException { + List<Edge<LongWritable, DoubleWritable>> edges = + Lists.newArrayListWithCapacity((tokens.length - 1) / 2); + for (int n = 1; n < tokens.length;) { + edges.add(EdgeFactory.create( + new LongWritable(Long.parseLong(tokens[n])), + new DoubleWritable(Double.parseDouble(tokens[n + 1])))); + n += 2; + } + return edges; + } + } +} http://git-wip-us.apache.org/repos/asf/giraph/blob/8675c84a/giraph-debugger/src/main/java/org/apache/giraph/debugger/examples/bipartitematching/Message.java ---------------------------------------------------------------------- diff --git a/giraph-debugger/src/main/java/org/apache/giraph/debugger/examples/bipartitematching/Message.java b/giraph-debugger/src/main/java/org/apache/giraph/debugger/examples/bipartitematching/Message.java new file mode 100644 index 0000000..f24a433 --- /dev/null +++ b/giraph-debugger/src/main/java/org/apache/giraph/debugger/examples/bipartitematching/Message.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.debugger.examples.bipartitematching; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.giraph.graph.Vertex; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Writable; + +/** + * Message for bipartite matching. + */ +public class Message implements Writable { + + /** + * Id of the vertex sending this message. + */ + private long senderVertex; + + /** + * Type of the message. + */ + private enum Type { + /** + * Match request message sent by left vertices. + */ + MATCH_REQUEST, + /** + * Grant reply message sent by right and left vertices. + */ + REQUEST_GRANTED, + /** + * Denial reply message sent by right vertices. + */ + REQUEST_DENIED + } + + /** + * Whether this message is a match request (null), or a message that grants + * (true) or denies (false) another one. + */ + private Message.Type type = Type.MATCH_REQUEST; + + /** + * Default constructor. + */ + public Message() { + } + + /** + * Constructs a match request message. + * + * @param vertex + * Sending vertex + */ + public Message(Vertex<LongWritable, VertexValue, NullWritable> vertex) { + senderVertex = vertex.getId().get(); + type = Type.MATCH_REQUEST; + } + + /** + * Constructs a match granting or denying message. + * + * @param vertex + * Sending vertex + * @param isGranting + * True iff it is a granting message + */ + public Message(Vertex<LongWritable, VertexValue, NullWritable> vertex, + boolean isGranting) { + this(vertex); + type = isGranting ? Type.REQUEST_GRANTED : Type.REQUEST_DENIED; + } + + public long getSenderVertex() { + return senderVertex; + } + + public boolean isGranting() { + return type.equals(Type.REQUEST_GRANTED); + } + + @Override + public String toString() { + return type + " from " + senderVertex; + } + + @Override + public void readFields(DataInput in) throws IOException { + senderVertex = in.readLong(); + type = Type.values()[in.readInt()]; + } + + @Override + public void write(DataOutput out) throws IOException { + out.writeLong(senderVertex); + out.writeInt(type.ordinal()); + } + +}
