Updated Branches: refs/heads/trunk f4deb5ca8 -> 2a6c9d563
GIRAPH-594: auto set reusing objects (nitay) Project: http://git-wip-us.apache.org/repos/asf/giraph/repo Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/2a6c9d56 Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/2a6c9d56 Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/2a6c9d56 Branch: refs/heads/trunk Commit: 2a6c9d5630508439c9fc4cee175617ff63d59a7d Parents: f4deb5c Author: Nitay Joffe <[email protected]> Authored: Thu Mar 28 14:09:35 2013 -0400 Committer: Nitay Joffe <[email protected]> Committed: Thu Mar 28 14:10:00 2013 -0400 ---------------------------------------------------------------------- CHANGELOG | 2 + .../java/org/apache/giraph/conf/GiraphClasses.java | 2 +- .../apache/giraph/conf/GiraphConfiguration.java | 48 +++++++++++++++ .../conf/ImmutableClassesGiraphConfiguration.java | 31 ++-------- .../giraph/partition/ByteArrayPartition.java | 10 ++- .../giraph/partition/ReusesObjectsPartition.java | 36 +++++++++++ .../org/apache/giraph/hive/HiveGiraphRunner.java | 2 +- .../hive/input/vertex/SimpleHiveToVertex.java | 6 +-- 8 files changed, 100 insertions(+), 37 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/CHANGELOG ---------------------------------------------------------------------- diff --git a/CHANGELOG b/CHANGELOG index 8da6a12..ab59833 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,8 @@ Giraph Change Log Release 0.2.0 - unreleased + GIRAPH-594: auto set reusing objects (nitay) + GIRAPH-597: Don't reuse vertex by default in SimpleHiveToVertex (majakabiljo) GIRAPH-588: More flexible Hive input (majakabiljo) http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/giraph-core/src/main/java/org/apache/giraph/conf/GiraphClasses.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphClasses.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphClasses.java index 23dab79..e03859a 100644 --- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphClasses.java +++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphClasses.java @@ -147,7 +147,7 @@ public class GiraphClasses<I extends WritableComparable, * * @param conf Configuration to read from. */ - public void readFromConf(Configuration conf) { + private void readFromConf(Configuration conf) { // set pre-validated generic parameter types into Configuration vertexClass = (Class<? extends Vertex<I, V, E, M>>) VERTEX_CLASS.get(conf); List<Class<?>> classList = ReflectionUtils.getTypeArguments(Vertex.class, http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java index dee8e98..963b82a 100644 --- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java +++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java @@ -20,6 +20,7 @@ package org.apache.giraph.conf; import org.apache.giraph.aggregators.AggregatorWriter; import org.apache.giraph.combiner.Combiner; +import org.apache.giraph.edge.ReuseObjectsVertexEdges; import org.apache.giraph.edge.VertexEdges; import org.apache.giraph.graph.Vertex; import org.apache.giraph.graph.VertexResolver; @@ -33,6 +34,7 @@ import org.apache.giraph.master.MasterObserver; import org.apache.giraph.partition.GraphPartitionerFactory; import org.apache.giraph.partition.Partition; import org.apache.giraph.partition.PartitionContext; +import org.apache.giraph.partition.ReusesObjectsPartition; import org.apache.giraph.worker.WorkerContext; import org.apache.giraph.worker.WorkerObserver; import org.apache.hadoop.conf.Configuration; @@ -85,6 +87,15 @@ public class GiraphConfiguration extends Configuration } /** + * Get the vertex edges class + * + * @return vertex edges class + */ + public Class<? extends VertexEdges> getVertexEdgesClass() { + return VERTEX_EDGES_CLASS.get(this); + } + + /** * Set the vertex edges class * * @param vertexEdgesClass Determines the way edges are stored @@ -106,6 +117,43 @@ public class GiraphConfiguration extends Configuration } /** + * True if the {@link VertexEdges} implementation copies the passed edges + * to its own data structure, i.e. it doesn't keep references to Edge + * objects, target vertex ids or edge values passed to add() or + * initialize(). + * This makes it possible to reuse edge objects passed to the data + * structure, to minimize object instantiation (see for example + * EdgeStore#addPartitionEdges()). + * + * @return True iff we can reuse the edge objects + */ + public boolean reuseEdgeObjects() { + return ReuseObjectsVertexEdges.class.isAssignableFrom( + getVertexEdgesClass()); + } + + /** + * True if the {@link Partition} implementation copies the passed vertices + * to its own data structure, i.e. it doesn't keep references to Vertex + * objects passed to it. + * This makes it possible to reuse vertex objects passed to the data + * structure, to minimize object instantiation. + * + * @return True iff we can reuse the vertex objects + */ + public boolean reuseVertexObjects() { + return ReusesObjectsPartition.class.isAssignableFrom(getPartitionClass()); + } + + /** + * Get Partition class used + * @return Partition class + */ + public Class<? extends Partition> getPartitionClass() { + return PARTITION_CLASS.get(this); + } + + /** * Set the vertex input format class (required) * * @param vertexInputFormatClass Determines how graph is input http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/giraph-core/src/main/java/org/apache/giraph/conf/ImmutableClassesGiraphConfiguration.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/ImmutableClassesGiraphConfiguration.java b/giraph-core/src/main/java/org/apache/giraph/conf/ImmutableClassesGiraphConfiguration.java index 0af8b97..76f6105 100644 --- a/giraph-core/src/main/java/org/apache/giraph/conf/ImmutableClassesGiraphConfiguration.java +++ b/giraph-core/src/main/java/org/apache/giraph/conf/ImmutableClassesGiraphConfiguration.java @@ -20,9 +20,12 @@ package org.apache.giraph.conf; import org.apache.giraph.aggregators.AggregatorWriter; import org.apache.giraph.combiner.Combiner; +import org.apache.giraph.edge.Edge; +import org.apache.giraph.edge.EdgeFactory; import org.apache.giraph.edge.ReusableEdge; -import org.apache.giraph.edge.ReuseObjectsVertexEdges; +import org.apache.giraph.edge.VertexEdges; import org.apache.giraph.graph.GraphState; +import org.apache.giraph.graph.Vertex; import org.apache.giraph.graph.VertexResolver; import org.apache.giraph.graph.VertexValueFactory; import org.apache.giraph.io.EdgeInputFormat; @@ -41,10 +44,6 @@ import org.apache.giraph.utils.ExtendedDataOutput; import org.apache.giraph.utils.ReflectionUtils; import org.apache.giraph.utils.UnsafeByteArrayInputStream; import org.apache.giraph.utils.UnsafeByteArrayOutputStream; -import org.apache.giraph.edge.Edge; -import org.apache.giraph.edge.EdgeFactory; -import org.apache.giraph.graph.Vertex; -import org.apache.giraph.edge.VertexEdges; import org.apache.giraph.worker.WorkerContext; import org.apache.giraph.worker.WorkerObserver; import org.apache.hadoop.conf.Configuration; @@ -566,11 +565,7 @@ public class ImmutableClassesGiraphConfiguration<I extends WritableComparable, } } - /** - * Get the user's subclassed {@link VertexEdges} - * - * @return User's vertex edges class - */ + @Override public Class<? extends VertexEdges<I, E>> getVertexEdgesClass() { return classes.getVertexEdgesClass(); } @@ -595,22 +590,6 @@ public class ImmutableClassesGiraphConfiguration<I extends WritableComparable, } /** - * True if the {@link VertexEdges} implementation copies the passed edges - * to its own data structure, i.e. it doesn't keep references to Edge - * objects, target vertex ids or edge values passed to add() or - * initialize(). - * This makes it possible to reuse edge objects passed to the data - * structure, to minimize object instantiation (see for example - * EdgeStore#addPartitionEdges()). - * - * @return True iff we can reuse the edge objects - */ - public boolean reuseEdgeObjects() { - return ReuseObjectsVertexEdges.class.isAssignableFrom( - getVertexEdgesClass()); - } - - /** * Create a user {@link VertexEdges} * * @return Instantiated user VertexEdges http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/giraph-core/src/main/java/org/apache/giraph/partition/ByteArrayPartition.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/partition/ByteArrayPartition.java b/giraph-core/src/main/java/org/apache/giraph/partition/ByteArrayPartition.java index 12aa6fb..dd8c974 100644 --- a/giraph-core/src/main/java/org/apache/giraph/partition/ByteArrayPartition.java +++ b/giraph-core/src/main/java/org/apache/giraph/partition/ByteArrayPartition.java @@ -17,15 +17,16 @@ */ package org.apache.giraph.partition; -import com.google.common.collect.MapMaker; -import com.google.common.primitives.Ints; +import org.apache.giraph.graph.Vertex; import org.apache.giraph.utils.UnsafeByteArrayInputStream; import org.apache.giraph.utils.WritableUtils; -import org.apache.giraph.graph.Vertex; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.util.Progressable; +import com.google.common.collect.MapMaker; +import com.google.common.primitives.Ints; + import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; @@ -45,7 +46,8 @@ import java.util.concurrent.ConcurrentMap; */ public class ByteArrayPartition<I extends WritableComparable, V extends Writable, E extends Writable, M extends Writable> - extends BasicPartition<I, V, E, M> { + extends BasicPartition<I, V, E, M> + implements ReusesObjectsPartition<I, V, E, M> { /** * Vertex map for this range (keyed by index). Note that the byte[] is a * serialized vertex with the first four bytes as the length of the vertex http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/giraph-core/src/main/java/org/apache/giraph/partition/ReusesObjectsPartition.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/partition/ReusesObjectsPartition.java b/giraph-core/src/main/java/org/apache/giraph/partition/ReusesObjectsPartition.java new file mode 100644 index 0000000..4153d55 --- /dev/null +++ b/giraph-core/src/main/java/org/apache/giraph/partition/ReusesObjectsPartition.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.partition; + +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; + +/** + * Empty interface to characterize {@link Partition} implementations that + * don't keep references to the Vertex objects they are passed. + * The Giraph infrastructure can exploit this characteristic by reusing Vertex + * objects. + * + * @param <I> Vertex id + * @param <V> Vertex value + * @param <E> Edge value + * @param <M> Message value + */ +public interface ReusesObjectsPartition<I extends WritableComparable, + V extends Writable, E extends Writable, M extends Writable> + extends Partition<I, V, E, M> { } http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/giraph-hive/src/main/java/org/apache/giraph/hive/HiveGiraphRunner.java ---------------------------------------------------------------------- diff --git a/giraph-hive/src/main/java/org/apache/giraph/hive/HiveGiraphRunner.java b/giraph-hive/src/main/java/org/apache/giraph/hive/HiveGiraphRunner.java index 0039dd6..63e9f95 100644 --- a/giraph-hive/src/main/java/org/apache/giraph/hive/HiveGiraphRunner.java +++ b/giraph-hive/src/main/java/org/apache/giraph/hive/HiveGiraphRunner.java @@ -26,6 +26,7 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.giraph.conf.GiraphClasses; import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.giraph.graph.Vertex; import org.apache.giraph.hive.common.HiveProfiles; import org.apache.giraph.hive.input.edge.HiveEdgeInputFormat; import org.apache.giraph.hive.input.edge.HiveEdgeReader; @@ -37,7 +38,6 @@ import org.apache.giraph.hive.output.HiveVertexOutputFormat; import org.apache.giraph.hive.output.HiveVertexWriter; import org.apache.giraph.hive.output.VertexToHive; import org.apache.giraph.job.GiraphJob; -import org.apache.giraph.graph.Vertex; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.util.Tool; http://git-wip-us.apache.org/repos/asf/giraph/blob/2a6c9d56/giraph-hive/src/main/java/org/apache/giraph/hive/input/vertex/SimpleHiveToVertex.java ---------------------------------------------------------------------- diff --git a/giraph-hive/src/main/java/org/apache/giraph/hive/input/vertex/SimpleHiveToVertex.java b/giraph-hive/src/main/java/org/apache/giraph/hive/input/vertex/SimpleHiveToVertex.java index 651aefd..a4acd2f 100644 --- a/giraph-hive/src/main/java/org/apache/giraph/hive/input/vertex/SimpleHiveToVertex.java +++ b/giraph-hive/src/main/java/org/apache/giraph/hive/input/vertex/SimpleHiveToVertex.java @@ -18,7 +18,6 @@ package org.apache.giraph.hive.input.vertex; -import org.apache.giraph.conf.BooleanConfOption; import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration; import org.apache.giraph.edge.Edge; import org.apache.giraph.graph.Vertex; @@ -42,9 +41,6 @@ import java.util.Iterator; public abstract class SimpleHiveToVertex<I extends WritableComparable, V extends Writable, E extends Writable, M extends Writable> extends AbstractHiveToVertex<I, V, E, M> { - /** Configuration option for whether to reuse vertex */ - public static final BooleanConfOption REUSE_VERTEX_KEY = - new BooleanConfOption("giraph.hive.reuse.vertex", false); /** Hive records which we are reading from */ private Iterator<HiveRecord> records; @@ -78,7 +74,7 @@ public abstract class SimpleHiveToVertex<I extends WritableComparable, @Override public void setConf(ImmutableClassesGiraphConfiguration<I, V, E, M> conf) { super.setConf(conf); - if (REUSE_VERTEX_KEY.get(conf)) { + if (conf.reuseVertexObjects()) { reusableVertex = getConf().createVertex(); } }
