Updated Branches: refs/heads/trunk ca442deeb -> 28f4d1b09
GIRAPH-820: add a configuration option to skip creating source vertices present only in edge input (pavanka via majakabiljo) Project: http://git-wip-us.apache.org/repos/asf/giraph/repo Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/28f4d1b0 Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/28f4d1b0 Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/28f4d1b0 Branch: refs/heads/trunk Commit: 28f4d1b09b712c02c2003a413a1118fd218030cf Parents: ca442de Author: Maja Kabiljo <[email protected]> Authored: Wed Jan 8 15:21:40 2014 -0800 Committer: Maja Kabiljo <[email protected]> Committed: Wed Jan 8 15:21:40 2014 -0800 ---------------------------------------------------------------------- CHANGELOG | 3 + .../apache/giraph/conf/GiraphConfiguration.java | 16 ++ .../org/apache/giraph/conf/GiraphConstants.java | 9 ++ .../java/org/apache/giraph/edge/EdgeStore.java | 13 +- .../giraph/io/TestCreateSourceVertex.java | 156 +++++++++++++++++++ 5 files changed, 193 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/giraph/blob/28f4d1b0/CHANGELOG ---------------------------------------------------------------------- diff --git a/CHANGELOG b/CHANGELOG index 6d77b5b..16a8777 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,9 @@ Giraph Change Log Release 1.1.0 - unreleased + GIRAPH-820: add a configuration option to skip creating source vertices present only + in edge input (pavanka via majakabiljo) + GIRAPH-742: Worker task finishSuperstep status reporting the wrong superstep number (cmuchins via majakabiljo) http://git-wip-us.apache.org/repos/asf/giraph/blob/28f4d1b0/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java index d066513..c8b7d36 100644 --- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java +++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConfiguration.java @@ -1161,4 +1161,20 @@ public class GiraphConfiguration extends Configuration public boolean isOneToAllMsgSendingEnabled() { return ONE_TO_ALL_MSG_SENDING.isTrue(this); } + + /** + * Get option whether to create a source vertex present only in edge input + * @return CREATE_EDGE_SOURCE_VERTICES option + */ + public boolean getCreateSourceVertex() { + return CREATE_EDGE_SOURCE_VERTICES.get(this); + } + + /** + * set option whether to create a source vertex present only in edge input + * @param createVertex create source vertex option + */ + public void setCreateSourceVertex(boolean createVertex) { + CREATE_EDGE_SOURCE_VERTICES.set(this, createVertex); + } } http://git-wip-us.apache.org/repos/asf/giraph/blob/28f4d1b0/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java index 3f379f1..63f38df 100644 --- a/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java +++ b/giraph-core/src/main/java/org/apache/giraph/conf/GiraphConstants.java @@ -970,6 +970,15 @@ public interface GiraphConstants { "one-to-all message sending strategy"); /** + * This option can be used to specify if a source vertex present in edge + * input but not in vertex input can be created + */ + BooleanConfOption CREATE_EDGE_SOURCE_VERTICES = + new BooleanConfOption("giraph.createEdgeSourceVertices", true, + "Create a source vertex if present in edge input but not " + + "necessarily in vertex input"); + + /** * This counter group will contain one counter whose name is the ZooKeeper * server:port which this job is using */ http://git-wip-us.apache.org/repos/asf/giraph/blob/28f4d1b0/giraph-core/src/main/java/org/apache/giraph/edge/EdgeStore.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/main/java/org/apache/giraph/edge/EdgeStore.java b/giraph-core/src/main/java/org/apache/giraph/edge/EdgeStore.java index 1694d36..dd8f2a3 100644 --- a/giraph-core/src/main/java/org/apache/giraph/edge/EdgeStore.java +++ b/giraph-core/src/main/java/org/apache/giraph/edge/EdgeStore.java @@ -156,6 +156,8 @@ public class EdgeStore<I extends WritableComparable, * Note: this method is not thread-safe. */ public void moveEdgesToVertices() { + final boolean createSourceVertex = configuration. + getCreateSourceVertex(); if (transientEdges.isEmpty()) { if (LOG.isInfoEnabled()) { LOG.info("moveEdgesToVertices: No edges to move"); @@ -191,10 +193,13 @@ public class EdgeStore<I extends WritableComparable, // If the source vertex doesn't exist, create it. Otherwise, // just set the edges. if (vertex == null) { - vertex = configuration.createVertex(); - vertex.initialize(vertexId, configuration.createVertexValue(), - outEdges); - partition.putVertex(vertex); + if (createSourceVertex) { + // createVertex only if it is allowed by configuration + vertex = configuration.createVertex(); + vertex.initialize(vertexId, + configuration.createVertexValue(), outEdges); + partition.putVertex(vertex); + } } else { // A vertex may exist with or without edges initially // and optimize the case of no initial edges http://git-wip-us.apache.org/repos/asf/giraph/blob/28f4d1b0/giraph-core/src/test/java/org/apache/giraph/io/TestCreateSourceVertex.java ---------------------------------------------------------------------- diff --git a/giraph-core/src/test/java/org/apache/giraph/io/TestCreateSourceVertex.java b/giraph-core/src/test/java/org/apache/giraph/io/TestCreateSourceVertex.java new file mode 100644 index 0000000..039e975 --- /dev/null +++ b/giraph-core/src/test/java/org/apache/giraph/io/TestCreateSourceVertex.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.io; + +import com.google.common.collect.Maps; +import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.giraph.edge.ByteArrayEdges; +import org.apache.giraph.io.formats.IdWithValueTextOutputFormat; +import org.apache.giraph.io.formats.IntIntNullTextVertexInputFormat; +import org.apache.giraph.io.formats.IntNullTextEdgeInputFormat; +import org.apache.giraph.utils.ComputationCountEdges; +import org.apache.giraph.utils.IntIntNullNoOpComputation; +import org.apache.giraph.utils.InternalVertexRunner; +import org.junit.Test; + +import java.util.Map; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * Test createSourceVertex configuration option + */ +public class TestCreateSourceVertex { + @Test + public void testPositiveCreateSourceVertex() throws Exception { + String [] vertices = new String[] { + "1 0", + "2 0", + "3 0", + "4 0", + }; + String [] edges = new String[] { + "1 2", + "1 5", + "2 4", + "2 1", + "3 4", + "4 1", + "4 5", + "6 2", + "7 8", + "4 8", + }; + + GiraphConfiguration conf = getConf(); + conf.setCreateSourceVertex(false); + + Iterable<String> results = InternalVertexRunner.run(conf, vertices, edges); + Map<Integer, Integer> values = parseResults(results); + + // Check that only vertices from vertex input are present in output graph + assertEquals(4, values.size()); + // Check that the ids of vertices in output graph exactly match vertex input + assertTrue(values.containsKey(1)); + assertTrue(values.containsKey(2)); + assertTrue(values.containsKey(3)); + assertTrue(values.containsKey(4)); + + conf.setComputationClass(ComputationCountEdges.class); + results = InternalVertexRunner.run(conf, vertices, edges); + values = parseResults(results); + + // Check the number of edges of each vertex + assertEquals(2, (int) values.get(1)); + assertEquals(2, (int) values.get(2)); + assertEquals(1, (int) values.get(3)); + assertEquals(3, (int) values.get(4)); + } + + @Test + public void testNegativeCreateSourceVertex() throws Exception { + String [] vertices = new String[] { + "1 0", + "2 0", + "3 0", + "4 0", + }; + String [] edges = new String[] { + "1 2", + "1 5", + "2 4", + "2 1", + "3 4", + "4 1", + "4 5", + "6 2", + "7 8", + "4 8", + }; + + GiraphConfiguration conf = getConf(); + + Iterable<String> results = InternalVertexRunner.run(conf, vertices, edges); + Map<Integer, Integer> values = parseResults(results); + + // Check that only vertices from vertex input are present in output graph + assertEquals(6, values.size()); + // Check that the ids of vertices in output graph exactly match vertex input + assertTrue(values.containsKey(1)); + assertTrue(values.containsKey(2)); + assertTrue(values.containsKey(3)); + assertTrue(values.containsKey(4)); + assertTrue(values.containsKey(6)); + assertTrue(values.containsKey(7)); + + conf.setComputationClass(ComputationCountEdges.class); + results = InternalVertexRunner.run(conf, vertices, edges); + values = parseResults(results); + + // Check the number of edges of each vertex + assertEquals(2, (int) values.get(1)); + assertEquals(2, (int) values.get(2)); + assertEquals(1, (int) values.get(3)); + assertEquals(3, (int) values.get(4)); + assertEquals(1, (int) values.get(6)); + assertEquals(1, (int) values.get(7)); + } + + private GiraphConfiguration getConf() { + GiraphConfiguration conf = new GiraphConfiguration(); + conf.setComputationClass(IntIntNullNoOpComputation.class); + conf.setOutEdgesClass(ByteArrayEdges.class); + conf.setVertexInputFormatClass(IntIntNullTextVertexInputFormat.class); + conf.setEdgeInputFormatClass(IntNullTextEdgeInputFormat.class); + conf.setVertexOutputFormatClass(IdWithValueTextOutputFormat.class); + return conf; + } + + private static Map<Integer, Integer> parseResults(Iterable<String> results) { + Map<Integer, Integer> values = Maps.newHashMap(); + for (String line : results) { + String[] tokens = line.split("\\s+"); + int id = Integer.valueOf(tokens[0]); + int value = Integer.valueOf(tokens[1]); + values.put(id, value); + } + return values; + } +}
