Updated Branches: refs/heads/trunk cd16dac76 -> bf0d07420
GIRAPH-604: Clean up benchmarks (majakabiljo) Project: http://git-wip-us.apache.org/repos/asf/giraph/repo Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/bf0d0742 Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/bf0d0742 Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/bf0d0742 Branch: refs/heads/trunk Commit: bf0d07420d50e816060fbcbc5c09f93b571a8947 Parents: cd16dac Author: Maja Kabiljo <[email protected]> Authored: Thu Apr 4 23:36:36 2013 -0700 Committer: Maja Kabiljo <[email protected]> Committed: Thu Apr 4 23:36:36 2013 -0700 ---------------------------------------------------------------------- CHANGELOG | 2 + .../giraph/benchmark/AggregatorsBenchmark.java | 117 ++------- .../apache/giraph/benchmark/BenchmarkOption.java | 190 +++++++++++++ .../apache/giraph/benchmark/GiraphBenchmark.java | 117 ++++++++ .../apache/giraph/benchmark/PageRankBenchmark.java | 205 ++++----------- .../giraph/benchmark/RandomMessageBenchmark.java | 180 ++++---------- .../giraph/benchmark/ShortestPathsBenchmark.java | 126 ++------- 7 files changed, 468 insertions(+), 469 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/giraph/blob/bf0d0742/CHANGELOG ---------------------------------------------------------------------- diff --git a/CHANGELOG b/CHANGELOG index d619155..0d5a671 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,8 @@ Giraph Change Log Release 0.2.0 - unreleased + GIRAPH-604: Clean up benchmarks (majakabiljo) + GIRAPH-605: Worker crashes if its vertices have no edges when using edge input (majakabiljo) GIRAPH-603: AbstractVertexToHive doesn't need message type (majakabiljo) http://git-wip-us.apache.org/repos/asf/giraph/blob/bf0d0742/giraph-core/src/main/java/org/apache/giraph/benchmark/AggregatorsBenchmark.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/benchmark/AggregatorsBenchmark.java b/giraph-core/src/main/java/org/apache/giraph/benchmark/AggregatorsBenchmark.java index 7043338..12304bb 100644 --- a/giraph-core/src/main/java/org/apache/giraph/benchmark/AggregatorsBenchmark.java +++ b/giraph-core/src/main/java/org/apache/giraph/benchmark/AggregatorsBenchmark.java @@ -19,37 +19,35 @@ package org.apache.giraph.benchmark; import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.PosixParser; import org.apache.giraph.aggregators.LongSumAggregator; +import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.giraph.conf.GiraphConstants; import org.apache.giraph.io.formats.PseudoRandomInputFormatConstants; import org.apache.giraph.io.formats.PseudoRandomVertexInputFormat; -import org.apache.giraph.job.GiraphJob; import org.apache.giraph.master.DefaultMasterCompute; import org.apache.giraph.graph.Vertex; import org.apache.giraph.worker.DefaultWorkerContext; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; -import org.apache.log4j.Logger; + +import com.google.common.collect.Sets; import java.io.IOException; +import java.util.Set; /** * Benchmark for aggregators. Also checks the correctness. */ -public class AggregatorsBenchmark implements Tool { - /** Class logger */ - private static final Logger LOG = - Logger.getLogger(AggregatorsBenchmark.class); +public class AggregatorsBenchmark extends GiraphBenchmark { /** Number of aggregators setting */ private static final String AGGREGATORS_NUM = "aggregatorsbenchmark.num"; - /** Configuration */ - private Configuration conf; + + /** Option for number of aggregators */ + private static final BenchmarkOption AGGREGATORS = + new BenchmarkOption("a", "aggregators", + true, "Aggregators", "Need to set number of aggregators (-a)"); /** * Vertex class for AggregatorsBenchmark @@ -78,16 +76,6 @@ public class AggregatorsBenchmark implements Tool { } } - @Override - public Configuration getConf() { - return conf; - } - - @Override - public void setConf(Configuration conf) { - this.conf = conf; - } - /** * MasterCompute class for AggregatorsBenchmark */ @@ -206,75 +194,22 @@ public class AggregatorsBenchmark implements Tool { } @Override - public final int run(final String[] args) throws Exception { - Options options = new Options(); - options.addOption("h", "help", false, "Help"); - options.addOption("v", "verbose", false, "Verbose"); - options.addOption("w", - "workers", - true, - "Number of workers"); - options.addOption("V", - "aggregateVertices", - true, - "Aggregate vertices"); - options.addOption("a", - "aggregators", - true, - "Aggregators"); - HelpFormatter formatter = new HelpFormatter(); - if (args.length == 0) { - formatter.printHelp(getClass().getName(), options, true); - return 0; - } - CommandLineParser parser = new PosixParser(); - CommandLine cmd = parser.parse(options, args); - if (cmd.hasOption('h')) { - formatter.printHelp(getClass().getName(), options, true); - return 0; - } - if (!cmd.hasOption('w')) { - LOG.info("Need to choose the number of workers (-w)"); - return -1; - } - if (!cmd.hasOption('V')) { - LOG.info("Need to set the aggregate vertices (-V)"); - return -1; - } - if (!cmd.hasOption('a')) { - LOG.info("Need to set number of aggregators (-a)"); - return -1; - } - - int workers = Integer.parseInt(cmd.getOptionValue('w')); - GiraphJob job = new GiraphJob(getConf(), getClass().getName()); - job.getConfiguration().setVertexClass(AggregatorsBenchmarkVertex.class); - job.getConfiguration().setMasterComputeClass( - AggregatorsBenchmarkMasterCompute.class); - job.getConfiguration().setVertexInputFormatClass( - PseudoRandomVertexInputFormat.class); - job.getConfiguration().setWorkerContextClass( - AggregatorsBenchmarkWorkerContext.class); - job.getConfiguration().setWorkerConfiguration(workers, workers, 100.0f); - job.getConfiguration().setLong( - PseudoRandomInputFormatConstants.AGGREGATE_VERTICES, - Long.parseLong(cmd.getOptionValue('V'))); - job.getConfiguration().setLong( - PseudoRandomInputFormatConstants.EDGES_PER_VERTEX, - 1); - job.getConfiguration().setInt(AGGREGATORS_NUM, - Integer.parseInt(cmd.getOptionValue('a'))); - job.getConfiguration().setInt("workers", workers); + public Set<BenchmarkOption> getBenchmarkOptions() { + return Sets.newHashSet(BenchmarkOption.VERTICES, AGGREGATORS); + } - boolean isVerbose = false; - if (cmd.hasOption('v')) { - isVerbose = true; - } - if (job.run(isVerbose)) { - return 0; - } else { - return -1; - } + @Override + protected void prepareConfiguration(GiraphConfiguration conf, + CommandLine cmd) { + conf.setVertexClass(AggregatorsBenchmarkVertex.class); + conf.setMasterComputeClass(AggregatorsBenchmarkMasterCompute.class); + conf.setVertexInputFormatClass(PseudoRandomVertexInputFormat.class); + conf.setWorkerContextClass(AggregatorsBenchmarkWorkerContext.class); + conf.setLong(PseudoRandomInputFormatConstants.AGGREGATE_VERTICES, + BenchmarkOption.VERTICES.getOptionLongValue(cmd)); + conf.setLong(PseudoRandomInputFormatConstants.EDGES_PER_VERTEX, 1); + conf.setInt(AGGREGATORS_NUM, AGGREGATORS.getOptionIntValue(cmd)); + conf.setInt("workers", conf.getInt(GiraphConstants.MAX_WORKERS, -1)); } /** http://git-wip-us.apache.org/repos/asf/giraph/blob/bf0d0742/giraph-core/src/main/java/org/apache/giraph/benchmark/BenchmarkOption.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/benchmark/BenchmarkOption.java b/giraph-core/src/main/java/org/apache/giraph/benchmark/BenchmarkOption.java new file mode 100644 index 0000000..0771ca2 --- /dev/null +++ b/giraph-core/src/main/java/org/apache/giraph/benchmark/BenchmarkOption.java @@ -0,0 +1,190 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.benchmark; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Options; +import org.apache.log4j.Logger; + +/** + * Command line options for benchmarks + */ +public class BenchmarkOption { + /** Option for help */ + public static final BenchmarkOption HELP = + new BenchmarkOption("h", "help", false, "Help"); + /** Option for verbose */ + public static final BenchmarkOption VERBOSE = + new BenchmarkOption("v", "verbose", false, "Verbose"); + /** Option for number of workers */ + public static final BenchmarkOption WORKERS = + new BenchmarkOption("w", "workers", true, "Number of workers", + "Need to choose the number of workers (-w)"); + /** Option for number of supersteps */ + public static final BenchmarkOption SUPERSTEPS = + new BenchmarkOption("s", "supersteps", true, + "Supersteps to execute before finishing", + "Need to set the number of supersteps (-s)"); + /** Option for number of vertices */ + public static final BenchmarkOption VERTICES = + new BenchmarkOption("V", "aggregateVertices", true, + "Aggregate vertices", "Need to set the aggregate vertices (-V)"); + /** Option for number of edges per vertex */ + public static final BenchmarkOption EDGES_PER_VERTEX = + new BenchmarkOption("e", "edgesPerVertex", true, + "Edges per vertex", + "Need to set the number of edges per vertex (-e)"); + + /** Short option */ + private String shortOption; + /** Long option */ + private String longOption; + /** True iff option requires an argument */ + private boolean hasArgument; + /** Description of the option */ + private String description; + /** + * Message to print if the option is missing, null if the option is not + * required + */ + private String missingMessage; + + /** + * Constructor for option which is not required + * + * @param shortOption Short option + * @param longOption Long option + * @param hasArgument True iff option requires argument + * @param description Description of the option + */ + public BenchmarkOption(String shortOption, String longOption, + boolean hasArgument, String description) { + this(shortOption, longOption, hasArgument, description, null); + } + + /** + * Constructor for option which is not required + * + * @param shortOption Short option + * @param longOption Long option + * @param hasArgument True iff option requires argument + * @param description Description of the option + * @param missingMessage Message to print if the option is missing + */ + public BenchmarkOption(String shortOption, String longOption, + boolean hasArgument, String description, String missingMessage) { + this.shortOption = shortOption; + this.longOption = longOption; + this.hasArgument = hasArgument; + this.description = description; + this.missingMessage = missingMessage; + } + + /** + * Check if the option is required + * + * @return True iff the option is required + */ + public boolean isRequired() { + return missingMessage != null; + } + + /** + * Add option to cli Options + * + * @param options Cli Options + */ + public void addToOptions(Options options) { + options.addOption(shortOption, longOption, hasArgument, description); + } + + /** + * If option is not required just return true. + * If option is required, check if it's present in CommandLine, + * and if it's not print the missingMessage to log. + * + * @param cmd CommandLine + * @param log Logger to print the missing message to + * @return False iff the option is required but is not specified in cmd + */ + public boolean checkOption(CommandLine cmd, Logger log) { + if (!isRequired()) { + return true; + } + if (!cmd.hasOption(shortOption)) { + log.info(missingMessage); + return false; + } + return true; + } + + /** + * Check if the option is present in CommandLine + * + * @param cmd CommandLine + * @return True iff the option is present in CommandLine + */ + public boolean optionTurnedOn(CommandLine cmd) { + return cmd.hasOption(shortOption); + } + + /** + * Retrieve the argument, if any, of this option + * + * @param cmd CommandLine + * @return Value of the argument if option is set and has an argument, + * otherwise null + */ + public String getOptionValue(CommandLine cmd) { + return cmd.getOptionValue(shortOption); + } + + /** + * Retrieve the argument of this option as integer value + * + * @param cmd CommandLine + * @return Value of the argument as integer value + */ + public int getOptionIntValue(CommandLine cmd) { + return Integer.parseInt(getOptionValue(cmd)); + } + + /** + * Retrieve the argument of this option as integer value, + * or default value if option is not set + * + * @param cmd CommandLine + * @param defaultValue Default value + * @return Value of the argument as integer value, + * or default value if option is not set + */ + public int getOptionIntValue(CommandLine cmd, int defaultValue) { + return optionTurnedOn(cmd) ? getOptionIntValue(cmd) : defaultValue; + } + + /** + * Retrieve the argument of this option as long value + * + * @param cmd CommandLine + * @return Value of the argument as long value + */ + public long getOptionLongValue(CommandLine cmd) { + return Long.parseLong(getOptionValue(cmd)); + } +} http://git-wip-us.apache.org/repos/asf/giraph/blob/bf0d0742/giraph-core/src/main/java/org/apache/giraph/benchmark/GiraphBenchmark.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/benchmark/GiraphBenchmark.java b/giraph-core/src/main/java/org/apache/giraph/benchmark/GiraphBenchmark.java new file mode 100644 index 0000000..4631065 --- /dev/null +++ b/giraph-core/src/main/java/org/apache/giraph/benchmark/GiraphBenchmark.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.benchmark; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.PosixParser; +import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.giraph.job.GiraphJob; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.Tool; +import org.apache.log4j.Logger; + +import java.util.Set; + +/** + * Abstract class which benchmarks should extend. + */ +public abstract class GiraphBenchmark implements Tool { + /** Class logger */ + private static final Logger LOG = Logger.getLogger(GiraphBenchmark.class); + /** Configuration */ + private Configuration conf; + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + } + + @Override + public Configuration getConf() { + return conf; + } + + @Override + public int run(String[] args) throws Exception { + Set<BenchmarkOption> giraphOptions = getBenchmarkOptions(); + giraphOptions.add(BenchmarkOption.HELP); + giraphOptions.add(BenchmarkOption.VERBOSE); + giraphOptions.add(BenchmarkOption.WORKERS); + Options options = new Options(); + for (BenchmarkOption giraphOption : giraphOptions) { + giraphOption.addToOptions(options); + } + + HelpFormatter formatter = new HelpFormatter(); + if (args.length == 0) { + formatter.printHelp(getClass().getName(), options, true); + return 0; + } + CommandLineParser parser = new PosixParser(); + CommandLine cmd = parser.parse(options, args); + for (BenchmarkOption giraphOption : giraphOptions) { + if (!giraphOption.checkOption(cmd, LOG)) { + return -1; + } + } + if (BenchmarkOption.HELP.optionTurnedOn(cmd)) { + formatter.printHelp(getClass().getName(), options, true); + return 0; + } + + GiraphJob job = new GiraphJob(getConf(), getClass().getName()); + int workers = Integer.parseInt(BenchmarkOption.WORKERS.getOptionValue(cmd)); + job.getConfiguration().setWorkerConfiguration(workers, workers, 100.0f); + prepareConfiguration(job.getConfiguration(), cmd); + + boolean isVerbose = false; + if (BenchmarkOption.VERBOSE.optionTurnedOn(cmd)) { + isVerbose = true; + } + if (job.run(isVerbose)) { + return 0; + } else { + return -1; + } + } + + /** + * Get the options to use in this benchmark. + * BenchmarkOption.VERBOSE, BenchmarkOption.HELP and BenchmarkOption.WORKERS + * will be added automatically, so you don't have to specify those. + * + * @return Options to use in this benchmark + */ + public abstract Set<BenchmarkOption> getBenchmarkOptions(); + + /** + * Process options from CommandLine and prepare configuration for running + * the job. + * BenchmarkOption.VERBOSE, BenchmarkOption.HELP and BenchmarkOption.WORKERS + * will be processed automatically so you don't have to process them. + * + * @param conf Configuration + * @param cmd Command line + */ + protected abstract void prepareConfiguration(GiraphConfiguration conf, + CommandLine cmd); +} http://git-wip-us.apache.org/repos/asf/giraph/blob/bf0d0742/giraph-core/src/main/java/org/apache/giraph/benchmark/PageRankBenchmark.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/benchmark/PageRankBenchmark.java b/giraph-core/src/main/java/org/apache/giraph/benchmark/PageRankBenchmark.java index b5d7e1e..fbb2516 100644 --- a/giraph-core/src/main/java/org/apache/giraph/benchmark/PageRankBenchmark.java +++ b/giraph-core/src/main/java/org/apache/giraph/benchmark/PageRankBenchmark.java @@ -18,10 +18,6 @@ package org.apache.giraph.benchmark; import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.PosixParser; import org.apache.giraph.conf.GiraphConfiguration; import org.apache.giraph.conf.GiraphConstants; import org.apache.giraph.combiner.DoubleSumCombiner; @@ -30,161 +26,74 @@ import org.apache.giraph.edge.ByteArrayEdges; import org.apache.giraph.edge.HashMapEdges; import org.apache.giraph.edge.LongDoubleArrayEdges; import org.apache.giraph.io.formats.PseudoRandomInputFormatConstants; -import org.apache.giraph.job.GiraphJob; import org.apache.giraph.io.formats.JsonBase64VertexOutputFormat; import org.apache.giraph.io.formats.PseudoRandomEdgeInputFormat; import org.apache.giraph.io.formats.PseudoRandomVertexInputFormat; import org.apache.giraph.partition.SimpleLongRangePartitionerFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.log4j.Logger; +import com.google.common.collect.Sets; + +import java.util.Set; + /** * Default Pregel-style PageRank computation. */ -public class PageRankBenchmark implements Tool { - /** - * Class logger - */ +public class PageRankBenchmark extends GiraphBenchmark { + /** Class logger */ private static final Logger LOG = Logger.getLogger(PageRankBenchmark.class); - /** - * Configuration from Configurable - */ - private Configuration conf; - @Override - public Configuration getConf() { - return conf; - } + /** Option for VertexEdges class */ + private static final BenchmarkOption EDGES_CLASS = new BenchmarkOption( + "c", "edgesClass", true, + "Vertex edges class (0 for LongDoubleArrayEdges," + + "1 for ByteArrayEdges, " + + "2 for ByteArrayEdges with unsafe serialization, " + + "3 for ArrayListEdges, " + + "4 for HashMapVertex"); + /** Option for using edge input */ + private static final BenchmarkOption EDGE_INPUT = new BenchmarkOption( + "ei", "edgeInput", false, + "Use edge-based input instead of vertex-based input."); + /** Option for minimum ratio of partition-local edges */ + private static final BenchmarkOption LOCAL_EDGES_MIN_RATIO = + new BenchmarkOption( + "l", "localEdgesMinRatio", true, + "Minimum ratio of partition-local edges (default is 0)"); + /** Option for partitioning algorithm */ + private static final BenchmarkOption PARTITIONER = new BenchmarkOption( + "p", "partitioner", true, + "Partitioning algorithm (0 for hash partitioning (default), " + + "1 for range partitioning)"); + /** Option for type of combiner */ + private static final BenchmarkOption COMBINER_TYPE = new BenchmarkOption( + "t", "combinerType", true, + "Combiner type (0 for no combiner, 1 for DoubleSumCombiner (default)"); + /** Option for output format */ + private static final BenchmarkOption OUTPUT_FORMAT = new BenchmarkOption( + "o", "vertexOutputFormat", true, + "0 for JsonBase64VertexOutputFormat"); @Override - public void setConf(Configuration conf) { - this.conf = conf; - } - - @Override - public final int run(final String[] args) throws Exception { - Options options = new Options(); - options.addOption("h", "help", false, "Help"); - options.addOption("v", "verbose", false, "Verbose"); - options.addOption("w", - "workers", - true, - "Number of workers"); - options.addOption("s", - "supersteps", - true, - "Supersteps to execute before finishing"); - options.addOption("V", - "aggregateVertices", - true, - "Aggregate vertices"); - options.addOption("e", - "edgesPerVertex", - true, - "Edges per vertex"); - options.addOption("c", - "edgesClass", - true, - "Vertex edges class (0 for LongDoubleArrayEdges," + - "1 for ByteArrayEdges, " + - "2 for ByteArrayEdges with unsafe serialization, " + - "3 for ArrayListEdges, " + - "4 for HashMapVertex"); - options.addOption("ei", - "edgeInput", - false, - "Use edge-based input instead of vertex-based input."); - options.addOption("l", - "localEdgesMinRatio", - true, - "Minimum ratio of partition-local edges (default is 0)"); - options.addOption("p", - "partitioner", - true, - "Partitioning algorithm (0 for hash partitioning (default), " + - "1 for range partitioning)"); - options.addOption("N", - "name", - true, - "Name of the job"); - options.addOption("t", - "combinerType", - true, - "Combiner type (0 for no combiner, 1 for DoubleSumCombiner (default)"); - options.addOption("o", - "vertexOutputFormat", - true, - "0 for JsonBase64VertexOutputFormat"); - - HelpFormatter formatter = new HelpFormatter(); - if (args.length == 0) { - formatter.printHelp(getClass().getName(), options, true); - return 0; - } - CommandLineParser parser = new PosixParser(); - CommandLine cmd = parser.parse(options, args); - if (cmd.hasOption('h')) { - formatter.printHelp(getClass().getName(), options, true); - return 0; - } - if (!cmd.hasOption('w')) { - LOG.info("Need to choose the number of workers (-w)"); - return -1; - } - if (!cmd.hasOption('s')) { - LOG.info("Need to set the number of supersteps (-s)"); - return -1; - } - if (!cmd.hasOption('V')) { - LOG.info("Need to set the aggregate vertices (-V)"); - return -1; - } - if (!cmd.hasOption('e')) { - LOG.info("Need to set the number of edges " + - "per vertex (-e)"); - return -1; - } - - int workers = Integer.parseInt(cmd.getOptionValue('w')); - String name = getClass().getName(); - if (cmd.hasOption("N")) { - name = name + " " + cmd.getOptionValue("N"); - } - - GiraphJob job = new GiraphJob(getConf(), name); - GiraphConfiguration configuration = job.getConfiguration(); - setClassesAndParameters(cmd, configuration); - - configuration.setWorkerConfiguration(workers, workers, 100.0f); - configuration.setInt( - PageRankVertex.SUPERSTEP_COUNT, - Integer.parseInt(cmd.getOptionValue('s'))); - - boolean isVerbose = false; - if (cmd.hasOption('v')) { - isVerbose = true; - } - if (job.run(isVerbose)) { - return 0; - } else { - return -1; - } + public Set<BenchmarkOption> getBenchmarkOptions() { + return Sets.newHashSet( + BenchmarkOption.SUPERSTEPS, BenchmarkOption.VERTICES, + BenchmarkOption.EDGES_PER_VERTEX, EDGES_CLASS, EDGE_INPUT, + LOCAL_EDGES_MIN_RATIO, PARTITIONER, COMBINER_TYPE, OUTPUT_FORMAT); } /** * Set vertex edges, input format, partitioner classes and related parameters * based on command-line arguments. * - * @param cmd Command line arguments + * @param cmd Command line arguments * @param configuration Giraph job configuration */ - protected void setClassesAndParameters( - CommandLine cmd, GiraphConfiguration configuration) { + protected void prepareConfiguration(GiraphConfiguration configuration, + CommandLine cmd) { configuration.setVertexClass(PageRankVertex.class); - int edgesClassOption = cmd.hasOption('c') ? Integer.parseInt( - cmd.getOptionValue('c')) : 1; + int edgesClassOption = EDGES_CLASS.getOptionIntValue(cmd, 1); switch (edgesClassOption) { case 0: configuration.setVertexEdgesClass(LongDoubleArrayEdges.class); @@ -210,12 +119,11 @@ public class PageRankBenchmark implements Tool { LOG.info("Using edges class " + GiraphConstants.VERTEX_EDGES_CLASS.get(configuration)); - if (!cmd.hasOption('t') || - (Integer.parseInt(cmd.getOptionValue('t')) == 1)) { + if (COMBINER_TYPE.getOptionIntValue(cmd, 1) == 1) { configuration.setVertexCombinerClass(DoubleSumCombiner.class); } - if (cmd.hasOption("ei")) { + if (EDGE_INPUT.optionTurnedOn(cmd)) { configuration.setEdgeInputFormatClass(PseudoRandomEdgeInputFormat.class); } else { configuration.setVertexInputFormatClass( @@ -224,31 +132,32 @@ public class PageRankBenchmark implements Tool { configuration.setLong( PseudoRandomInputFormatConstants.AGGREGATE_VERTICES, - Long.parseLong(cmd.getOptionValue('V'))); + BenchmarkOption.VERTICES.getOptionLongValue(cmd)); configuration.setLong( PseudoRandomInputFormatConstants.EDGES_PER_VERTEX, - Long.parseLong(cmd.getOptionValue('e'))); - if (cmd.hasOption('l')) { - float localEdgesMinRatio = Float.parseFloat(cmd.getOptionValue('l')); + BenchmarkOption.EDGES_PER_VERTEX.getOptionLongValue(cmd)); + if (LOCAL_EDGES_MIN_RATIO.optionTurnedOn(cmd)) { + float localEdgesMinRatio = + Float.parseFloat(LOCAL_EDGES_MIN_RATIO.getOptionValue(cmd)); configuration.setFloat( PseudoRandomInputFormatConstants.LOCAL_EDGES_MIN_RATIO, localEdgesMinRatio); } - int vertexOutputClassOption = - cmd.hasOption('o') ? Integer.parseInt(cmd.getOptionValue('o')) : -1; - if (vertexOutputClassOption == 0) { + if (OUTPUT_FORMAT.getOptionIntValue(cmd, -1) == 0) { LOG.info("Using vertex output format class " + JsonBase64VertexOutputFormat.class.getName()); configuration.setVertexOutputFormatClass( JsonBase64VertexOutputFormat.class); } - if (cmd.hasOption('p') && - Integer.parseInt(cmd.getOptionValue('p')) == 1) { + if (PARTITIONER.getOptionIntValue(cmd, 0) == 1) { configuration.setGraphPartitionerFactoryClass( SimpleLongRangePartitionerFactory.class); } + + configuration.setInt(PageRankVertex.SUPERSTEP_COUNT, + BenchmarkOption.SUPERSTEPS.getOptionIntValue(cmd)); } /** http://git-wip-us.apache.org/repos/asf/giraph/blob/bf0d0742/giraph-core/src/main/java/org/apache/giraph/benchmark/RandomMessageBenchmark.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/benchmark/RandomMessageBenchmark.java b/giraph-core/src/main/java/org/apache/giraph/benchmark/RandomMessageBenchmark.java index ff65ab9..5c7e019 100644 --- a/giraph-core/src/main/java/org/apache/giraph/benchmark/RandomMessageBenchmark.java +++ b/giraph-core/src/main/java/org/apache/giraph/benchmark/RandomMessageBenchmark.java @@ -19,32 +19,29 @@ package org.apache.giraph.benchmark; import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.PosixParser; import org.apache.giraph.aggregators.LongSumAggregator; +import org.apache.giraph.conf.GiraphConfiguration; import org.apache.giraph.conf.GiraphConstants; import org.apache.giraph.io.formats.PseudoRandomInputFormatConstants; import org.apache.giraph.io.formats.PseudoRandomVertexInputFormat; -import org.apache.giraph.job.GiraphJob; import org.apache.giraph.master.DefaultMasterCompute; import org.apache.giraph.graph.Vertex; import org.apache.giraph.worker.WorkerContext; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.log4j.Logger; +import com.google.common.collect.Sets; + import java.util.Random; +import java.util.Set; /** * Random Message Benchmark for evaluating the messaging performance. */ -public class RandomMessageBenchmark implements Tool { +public class RandomMessageBenchmark extends GiraphBenchmark { /** How many supersteps to run */ public static final String SUPERSTEP_COUNT = "giraph.randomMessageBenchmark.superstepCount"; @@ -74,12 +71,23 @@ public class RandomMessageBenchmark implements Tool { /** All millis during this application */ public static final String AGG_TOTAL_MILLIS = "total millis"; /** Workers for that superstep */ - public static final String WORKERS = "workers"; + public static final String WORKERS_NUM = "workers"; + + /** Option for number of bytes per message */ + private static final BenchmarkOption BYTES_PER_MESSAGE = new BenchmarkOption( + "b", "bytes", true, "Message bytes per memssage", + "Need to set the number of message bytes (-b)"); + /** Option for number of messages per edge */ + private static final BenchmarkOption MESSAGES_PER_EDGE = new BenchmarkOption( + "n", "number", true, "Number of messages per edge", + "Need to set the number of messages per edge (-n)"); + /** Option for number of flush threads */ + private static final BenchmarkOption FLUSH_THREADS = new BenchmarkOption( + "f", "flusher", true, "Number of flush threads"); + /** Class logger */ private static final Logger LOG = Logger.getLogger(RandomMessageBenchmarkWorkerContext.class); - /** Configuration from Configurable */ - private Configuration conf; /** * {@link WorkerContext} forRandomMessageBenchmark. @@ -129,7 +137,7 @@ public class RandomMessageBenchmark implements Tool { getAggregatedValue(AGG_SUPERSTEP_TOTAL_MESSAGES).get(); long superstepMillis = this.<LongWritable> getAggregatedValue(AGG_SUPERSTEP_TOTAL_MILLIS).get(); - long workers = this.<LongWritable>getAggregatedValue(WORKERS).get(); + long workers = this.<LongWritable>getAggregatedValue(WORKERS_NUM).get(); // For timing and tracking the supersteps // - superstep 0 starts the time, but cannot display any stats @@ -157,7 +165,7 @@ public class RandomMessageBenchmark implements Tool { LOG.info(AGG_TOTAL_MESSAGES + " : " + totalMessages); LOG.info(AGG_SUPERSTEP_TOTAL_MILLIS + " : " + superstepMillis); LOG.info(AGG_TOTAL_MILLIS + " : " + totalMillis); - LOG.info(WORKERS + " : " + workers); + LOG.info(WORKERS_NUM + " : " + workers); LOG.info("Superstep megabytes / second = " + superstepMegabytesPerSecond); LOG.info("Total megabytes / second = " + @@ -177,7 +185,7 @@ public class RandomMessageBenchmark implements Tool { } } - aggregate(WORKERS, new LongWritable(1)); + aggregate(WORKERS_NUM, new LongWritable(1)); } @Override @@ -241,7 +249,7 @@ public class RandomMessageBenchmark implements Tool { LongSumAggregator.class); registerAggregator(AGG_SUPERSTEP_TOTAL_MILLIS, LongSumAggregator.class); - registerAggregator(WORKERS, + registerAggregator(WORKERS_NUM, LongSumAggregator.class); } } @@ -273,128 +281,32 @@ public class RandomMessageBenchmark implements Tool { } @Override - public Configuration getConf() { - return conf; - } - - @Override - public void setConf(Configuration conf) { - this.conf = conf; + public Set<BenchmarkOption> getBenchmarkOptions() { + return Sets.newHashSet(BenchmarkOption.SUPERSTEPS, + BenchmarkOption.VERTICES, BenchmarkOption.EDGES_PER_VERTEX, + BYTES_PER_MESSAGE, MESSAGES_PER_EDGE, FLUSH_THREADS); } @Override - public int run(String[] args) throws Exception { - Options options = new Options(); - options.addOption("h", "help", false, "Help"); - options.addOption("v", "verbose", false, "Verbose"); - options.addOption("w", - "workers", - true, - "Number of workers"); - options.addOption("b", - "bytes", - true, - "Message bytes per memssage"); - options.addOption("n", - "number", - true, - "Number of messages per edge"); - options.addOption("s", - "supersteps", - true, - "Supersteps to execute before finishing"); - options.addOption("V", - "aggregateVertices", - true, - "Aggregate vertices"); - options.addOption("e", - "edgesPerVertex", - true, - "Edges per vertex"); - options.addOption("f", - "flusher", - true, - "Number of flush threads"); - - HelpFormatter formatter = new HelpFormatter(); - if (args.length == 0) { - formatter.printHelp(getClass().getName(), options, true); - return 0; - } - CommandLineParser parser = new PosixParser(); - CommandLine cmd = parser.parse(options, args); - if (cmd.hasOption('h')) { - formatter.printHelp(getClass().getName(), options, true); - return 0; - } - if (!cmd.hasOption('w')) { - LOG.info("Need to choose the number of workers (-w)"); - return -1; - } - if (!cmd.hasOption('s')) { - LOG.info("Need to set the number of supersteps (-s)"); - return -1; - } - if (!cmd.hasOption('V')) { - LOG.info("Need to set the aggregate vertices (-V)"); - return -1; - } - if (!cmd.hasOption('e')) { - LOG.info("Need to set the number of edges " + - "per vertex (-e)"); - return -1; - } - if (!cmd.hasOption('b')) { - LOG.info("Need to set the number of message bytes (-b)"); - return -1; - } - if (!cmd.hasOption('n')) { - LOG.info("Need to set the number of messages per edge (-n)"); - return -1; - } - int workers = Integer.parseInt(cmd.getOptionValue('w')); - GiraphJob job = new GiraphJob(getConf(), getClass().getName()); - job.getConfiguration().setVertexClass(RandomMessageVertex.class); - job.getConfiguration().setVertexInputFormatClass( - PseudoRandomVertexInputFormat.class); - job.getConfiguration().setWorkerContextClass( - RandomMessageBenchmarkWorkerContext.class); - job.getConfiguration().setMasterComputeClass( - RandomMessageBenchmarkMasterCompute.class); - job.getConfiguration().setWorkerConfiguration(workers, workers, 100.0f); - job.getConfiguration().setLong( - PseudoRandomInputFormatConstants.AGGREGATE_VERTICES, - Long.parseLong(cmd.getOptionValue('V'))); - job.getConfiguration().setLong( - PseudoRandomInputFormatConstants.EDGES_PER_VERTEX, - Long.parseLong(cmd.getOptionValue('e'))); - job.getConfiguration().setInt( - SUPERSTEP_COUNT, - Integer.parseInt(cmd.getOptionValue('s'))); - job.getConfiguration().setInt( - RandomMessageBenchmark.NUM_BYTES_PER_MESSAGE, - Integer.parseInt(cmd.getOptionValue('b'))); - job.getConfiguration().setInt( - RandomMessageBenchmark.NUM_MESSAGES_PER_EDGE, - Integer.parseInt(cmd.getOptionValue('n'))); - - boolean isVerbose = false; - if (cmd.hasOption('v')) { - isVerbose = true; - } - if (cmd.hasOption('s')) { - getConf().setInt(SUPERSTEP_COUNT, - Integer.parseInt(cmd.getOptionValue('s'))); - } - if (cmd.hasOption('f')) { - job.getConfiguration().setInt( - GiraphConstants.MSG_NUM_FLUSH_THREADS, - Integer.parseInt(cmd.getOptionValue('f'))); - } - if (job.run(isVerbose)) { - return 0; - } else { - return -1; + protected void prepareConfiguration(GiraphConfiguration conf, + CommandLine cmd) { + conf.setVertexClass(RandomMessageVertex.class); + conf.setVertexInputFormatClass(PseudoRandomVertexInputFormat.class); + conf.setWorkerContextClass(RandomMessageBenchmarkWorkerContext.class); + conf.setMasterComputeClass(RandomMessageBenchmarkMasterCompute.class); + conf.setLong(PseudoRandomInputFormatConstants.AGGREGATE_VERTICES, + BenchmarkOption.VERTICES.getOptionLongValue(cmd)); + conf.setLong(PseudoRandomInputFormatConstants.EDGES_PER_VERTEX, + BenchmarkOption.EDGES_PER_VERTEX.getOptionLongValue(cmd)); + conf.setInt(SUPERSTEP_COUNT, + BenchmarkOption.SUPERSTEPS.getOptionIntValue(cmd)); + conf.setInt(RandomMessageBenchmark.NUM_BYTES_PER_MESSAGE, + BYTES_PER_MESSAGE.getOptionIntValue(cmd)); + conf.setInt(RandomMessageBenchmark.NUM_MESSAGES_PER_EDGE, + MESSAGES_PER_EDGE.getOptionIntValue(cmd)); + if (FLUSH_THREADS.optionTurnedOn(cmd)) { + conf.setInt(GiraphConstants.MSG_NUM_FLUSH_THREADS, + FLUSH_THREADS.getOptionIntValue(cmd)); } } http://git-wip-us.apache.org/repos/asf/giraph/blob/bf0d0742/giraph-core/src/main/java/org/apache/giraph/benchmark/ShortestPathsBenchmark.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/benchmark/ShortestPathsBenchmark.java b/giraph-core/src/main/java/org/apache/giraph/benchmark/ShortestPathsBenchmark.java index c3c714e..58d3fee 100644 --- a/giraph-core/src/main/java/org/apache/giraph/benchmark/ShortestPathsBenchmark.java +++ b/giraph-core/src/main/java/org/apache/giraph/benchmark/ShortestPathsBenchmark.java @@ -19,126 +19,60 @@ package org.apache.giraph.benchmark; import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.CommandLineParser; -import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.Options; -import org.apache.commons.cli.PosixParser; import org.apache.giraph.combiner.MinimumDoubleCombiner; +import org.apache.giraph.conf.GiraphConfiguration; import org.apache.giraph.conf.GiraphConstants; import org.apache.giraph.edge.ArrayListEdges; import org.apache.giraph.edge.HashMapEdges; import org.apache.giraph.io.formats.PseudoRandomInputFormatConstants; import org.apache.giraph.io.formats.PseudoRandomVertexInputFormat; -import org.apache.giraph.job.GiraphJob; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.log4j.Logger; +import com.google.common.collect.Sets; + +import java.util.Set; + /** * Single-source shortest paths benchmark. */ -public class ShortestPathsBenchmark implements Tool { +public class ShortestPathsBenchmark extends GiraphBenchmark { /** Class logger */ private static final Logger LOG = Logger.getLogger(ShortestPathsBenchmark.class); - /** Configuration */ - private Configuration conf; - @Override - public Configuration getConf() { - return conf; - } + /** Option for VertexEdges class */ + private static final BenchmarkOption EDGES_CLASS = new BenchmarkOption( + "c", "edgesClass", true, + "Vertex edges class (0 for HashMapEdges, 1 for ArrayListEdges)"); + /** Option for not using combiner */ + private static final BenchmarkOption NO_COMBINER = new BenchmarkOption( + "nc", "noCombiner", false, "Don't use a combiner"); @Override - public void setConf(Configuration conf) { - this.conf = conf; + public Set<BenchmarkOption> getBenchmarkOptions() { + return Sets.newHashSet(BenchmarkOption.VERTICES, + BenchmarkOption.EDGES_PER_VERTEX, EDGES_CLASS, NO_COMBINER); } @Override - public final int run(final String[] args) throws Exception { - Options options = new Options(); - options.addOption("h", "help", false, "Help"); - options.addOption("v", "verbose", false, "Verbose"); - options.addOption("w", - "workers", - true, - "Number of workers"); - options.addOption("V", - "aggregateVertices", - true, - "Aggregate vertices"); - options.addOption("e", - "edgesPerVertex", - true, - "Edges per vertex"); - options.addOption("c", - "edgesClass", - true, - "Vertex edges class (0 for HashMapEdges, 1 for ArrayListEdges)"); - options.addOption("nc", - "noCombiner", - false, - "Don't use a combiner"); - HelpFormatter formatter = new HelpFormatter(); - if (args.length == 0) { - formatter.printHelp(getClass().getName(), options, true); - return 0; - } - CommandLineParser parser = new PosixParser(); - CommandLine cmd = parser.parse(options, args); - if (cmd.hasOption('h')) { - formatter.printHelp(getClass().getName(), options, true); - return 0; - } - if (!cmd.hasOption('w')) { - LOG.info("Need to choose the number of workers (-w)"); - return -1; - } - if (!cmd.hasOption('V')) { - LOG.info("Need to set the aggregate vertices (-V)"); - return -1; - } - if (!cmd.hasOption('e')) { - LOG.info("Need to set the number of edges " + - "per vertex (-e)"); - return -1; - } - - int workers = Integer.parseInt(cmd.getOptionValue('w')); - GiraphJob job = new GiraphJob(getConf(), getClass().getName()); - job.getConfiguration().setVertexClass(ShortestPathsVertex.class); - if (!cmd.hasOption('c') || - (Integer.parseInt(cmd.getOptionValue('c')) == 1)) { - job.getConfiguration().setVertexEdgesClass(ArrayListEdges.class); + protected void prepareConfiguration(GiraphConfiguration conf, + CommandLine cmd) { + conf.setVertexClass(ShortestPathsVertex.class); + if (EDGES_CLASS.getOptionIntValue(cmd, 1) == 1) { + conf.setVertexEdgesClass(ArrayListEdges.class); } else { - job.getConfiguration().setVertexEdgesClass(HashMapEdges.class); - } - LOG.info("Using class " + - GiraphConstants.VERTEX_CLASS.get(job.getConfiguration())); - job.getConfiguration().setVertexInputFormatClass( - PseudoRandomVertexInputFormat.class); - if (!cmd.hasOption("nc")) { - job.getConfiguration().setVertexCombinerClass( - MinimumDoubleCombiner.class); + conf.setVertexEdgesClass(HashMapEdges.class); } - job.getConfiguration().setWorkerConfiguration(workers, workers, 100.0f); - job.getConfiguration().setLong( - PseudoRandomInputFormatConstants.AGGREGATE_VERTICES, - Long.parseLong(cmd.getOptionValue('V'))); - job.getConfiguration().setLong( - PseudoRandomInputFormatConstants.EDGES_PER_VERTEX, - Long.parseLong(cmd.getOptionValue('e'))); - - boolean isVerbose = false; - if (cmd.hasOption('v')) { - isVerbose = true; - } - if (job.run(isVerbose)) { - return 0; - } else { - return -1; + LOG.info("Using class " + GiraphConstants.VERTEX_CLASS.get(conf)); + conf.setVertexInputFormatClass(PseudoRandomVertexInputFormat.class); + if (!NO_COMBINER.optionTurnedOn(cmd)) { + conf.setVertexCombinerClass(MinimumDoubleCombiner.class); } + conf.setLong(PseudoRandomInputFormatConstants.AGGREGATE_VERTICES, + BenchmarkOption.VERTICES.getOptionLongValue(cmd)); + conf.setLong(PseudoRandomInputFormatConstants.EDGES_PER_VERTEX, + BenchmarkOption.EDGES_PER_VERTEX.getOptionLongValue(cmd)); } /**
