Updated Branches: refs/heads/trunk c9a368144 -> a3a6e9a8f
GIRAPH-844: TextInputFormat for SimpleShortestPaths Project: http://git-wip-us.apache.org/repos/asf/giraph/repo Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/a3a6e9a8 Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/a3a6e9a8 Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/a3a6e9a8 Branch: refs/heads/trunk Commit: a3a6e9a8fff39ea32e2ceeda8bd2b85d9df660ad Parents: c9a3681 Author: ssc <ssc@krimskrams.(none)> Authored: Mon Feb 10 14:32:41 2014 +0100 Committer: ssc <ssc@krimskrams.(none)> Committed: Mon Feb 10 22:40:18 2014 +0100 ---------------------------------------------------------------------- CHANGELOG | 3 + .../LongDoubleDoubleTextInputFormat.java | 1 + .../LongDoubleFloatTextInputFormat.java | 110 +++++++++++++++++++ .../SimpleShortestPathsComputationTest.java | 67 +++++++++-- 4 files changed, 173 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/giraph/blob/a3a6e9a8/CHANGELOG ---------------------------------------------------------------------- diff --git a/CHANGELOG b/CHANGELOG index 7dc93d0..feb9193 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,9 @@ Giraph Change Log Release 1.1.0 - unreleased + + GIRAPH-844: TextInputFormat for SimpleShortestPaths (ssc) + GIRAPH-841: fix missing munge symbols error for non-munge-using profiles (Eugene Koontz via rvs) GIRAPH-794: add support for generic hadoop1 and hadoop2 profiles (rvs) http://git-wip-us.apache.org/repos/asf/giraph/blob/a3a6e9a8/giraph-examples/src/main/java/org/apache/giraph/examples/LongDoubleDoubleTextInputFormat.java ---------------------------------------------------------------------- diff --git a/giraph-examples/src/main/java/org/apache/giraph/examples/LongDoubleDoubleTextInputFormat.java b/giraph-examples/src/main/java/org/apache/giraph/examples/LongDoubleDoubleTextInputFormat.java index 18cc8bc..d943df2 100644 --- a/giraph-examples/src/main/java/org/apache/giraph/examples/LongDoubleDoubleTextInputFormat.java +++ b/giraph-examples/src/main/java/org/apache/giraph/examples/LongDoubleDoubleTextInputFormat.java @@ -45,6 +45,7 @@ public class LongDoubleDoubleTextInputFormat DoubleWritable> implements ImmutableClassesGiraphConfigurable<LongWritable, DoubleWritable, DoubleWritable> { + /** Configuration. */ private ImmutableClassesGiraphConfiguration<LongWritable, DoubleWritable, DoubleWritable> conf; http://git-wip-us.apache.org/repos/asf/giraph/blob/a3a6e9a8/giraph-examples/src/main/java/org/apache/giraph/examples/LongDoubleFloatTextInputFormat.java ---------------------------------------------------------------------- diff --git a/giraph-examples/src/main/java/org/apache/giraph/examples/LongDoubleFloatTextInputFormat.java b/giraph-examples/src/main/java/org/apache/giraph/examples/LongDoubleFloatTextInputFormat.java new file mode 100644 index 0000000..c7a2e07 --- /dev/null +++ b/giraph-examples/src/main/java/org/apache/giraph/examples/LongDoubleFloatTextInputFormat.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.giraph.examples; + +import com.google.common.collect.Lists; +import org.apache.giraph.conf.ImmutableClassesGiraphConfigurable; +import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration; +import org.apache.giraph.edge.Edge; +import org.apache.giraph.edge.EdgeFactory; +import org.apache.giraph.graph.Vertex; +import org.apache.giraph.io.formats.TextVertexInputFormat; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +import java.io.IOException; +import java.util.List; +import java.util.regex.Pattern; + +/** + * Simple text-based {@link org.apache.giraph.io.VertexInputFormat} for + * weighted graphs with long ids. Each line consists of: vertex neighbor1:weight + * neighbor2:weight ... + */ +public class LongDoubleFloatTextInputFormat + extends TextVertexInputFormat<LongWritable, DoubleWritable, FloatWritable> + implements ImmutableClassesGiraphConfigurable<LongWritable, DoubleWritable, + FloatWritable> { + /** Configuration. */ + private ImmutableClassesGiraphConfiguration<LongWritable, DoubleWritable, + FloatWritable> conf; + + @Override + public TextVertexReader createVertexReader(InputSplit split, + TaskAttemptContext context) throws IOException { + return new LongDoubleFloatVertexReader(); + } + + @Override + public void setConf(ImmutableClassesGiraphConfiguration<LongWritable, + DoubleWritable, FloatWritable> configuration) { + this.conf = configuration; + } + + @Override + public ImmutableClassesGiraphConfiguration<LongWritable, DoubleWritable, + FloatWritable> getConf() { + return conf; + } + + /** + * Vertex reader associated with + * {@link LongDoubleDoubleTextInputFormat}. + */ + public class LongDoubleFloatVertexReader extends + TextVertexInputFormat<LongWritable, DoubleWritable, + FloatWritable>.TextVertexReader { + /** Separator of the vertex and neighbors */ + private final Pattern neighborSeparator = Pattern.compile("[\t ]"); + /** Separator of a neighbor and its weight */ + private final Pattern weightSeparator = Pattern.compile("[:]"); + + @Override + public Vertex<LongWritable, DoubleWritable, FloatWritable> + getCurrentVertex() throws IOException, InterruptedException { + Vertex<LongWritable, DoubleWritable, FloatWritable> + vertex = conf.createVertex(); + + String[] tokens = neighborSeparator.split(getRecordReader() + .getCurrentValue().toString()); + List<Edge<LongWritable, FloatWritable>> edges = + Lists.newArrayListWithCapacity(tokens.length - 1); + + for (int n = 1; n < tokens.length; n++) { + String[] parts = weightSeparator.split(tokens[n]); + edges.add(EdgeFactory.create( + new LongWritable(Long.parseLong(parts[0])), + new FloatWritable(Float.parseFloat(parts[1])))); + } + + LongWritable vertexId = new LongWritable(Long.parseLong(tokens[0])); + vertex.initialize(vertexId, new DoubleWritable(), edges); + + return vertex; + } + + @Override + public boolean nextVertex() throws IOException, InterruptedException { + return getRecordReader().nextKeyValue(); + } + } +} http://git-wip-us.apache.org/repos/asf/giraph/blob/a3a6e9a8/giraph-examples/src/test/java/org/apache/giraph/examples/SimpleShortestPathsComputationTest.java ---------------------------------------------------------------------- diff --git a/giraph-examples/src/test/java/org/apache/giraph/examples/SimpleShortestPathsComputationTest.java b/giraph-examples/src/test/java/org/apache/giraph/examples/SimpleShortestPathsComputationTest.java index 6a74c21..dc6c84a 100644 --- a/giraph-examples/src/test/java/org/apache/giraph/examples/SimpleShortestPathsComputationTest.java +++ b/giraph-examples/src/test/java/org/apache/giraph/examples/SimpleShortestPathsComputationTest.java @@ -40,6 +40,7 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; import java.util.Map; +import java.util.regex.Pattern; import static org.apache.giraph.examples.SimpleShortestPathsComputation.SOURCE_ID; import static org.junit.Assert.assertEquals; @@ -114,7 +115,7 @@ public class SimpleShortestPathsComputationTest { * A local integration test on toy data */ @Test - public void testToyData() throws Exception { + public void testToyDataJson() throws Exception { // a small four vertex graph String[] graph = new String[] { @@ -137,18 +138,18 @@ public class SimpleShortestPathsComputationTest { // run internally Iterable<String> results = InternalVertexRunner.run(conf, graph); - Map<Long, Double> distances = parseDistances(results); + Map<Long, Double> distances = parseDistancesJson(results); // verify results assertNotNull(distances); - assertEquals(4, (int) distances.size()); - assertEquals(0.0, (double) distances.get(1L), 0d); - assertEquals(1.0, (double) distances.get(2L), 0d); - assertEquals(2.0, (double) distances.get(3L), 0d); - assertEquals(4.0, (double) distances.get(4L), 0d); + assertEquals(4, distances.size()); + assertEquals(0.0, distances.get(1L), 0d); + assertEquals(1.0, distances.get(2L), 0d); + assertEquals(2.0, distances.get(3L), 0d); + assertEquals(4.0, distances.get(4L), 0d); } - private Map<Long, Double> parseDistances(Iterable<String> results) { + private Map<Long, Double> parseDistancesJson(Iterable<String> results) { Map<Long, Double> distances = Maps.newHashMapWithExpectedSize(Iterables.size(results)); for (String line : results) { @@ -162,4 +163,54 @@ public class SimpleShortestPathsComputationTest { } return distances; } + + /** + * A local integration test on toy data + */ + @Test + public void testToyData() throws Exception { + + // a small four vertex graph + String[] graph = new String[] { + "1 2:1.0 3:3.0", + "2 3:1.0 4:10.0", + "3 4:2.0", + "4" + }; + + GiraphConfiguration conf = new GiraphConfiguration(); + // start from vertex 1 + SOURCE_ID.set(conf, 1); + conf.setComputationClass(SimpleShortestPathsComputation.class); + conf.setOutEdgesClass(ByteArrayEdges.class); + conf.setVertexInputFormatClass(LongDoubleFloatTextInputFormat.class); + conf.setVertexOutputFormatClass( + VertexWithDoubleValueNullEdgeTextOutputFormat.class); + + // run internally + Iterable<String> results = InternalVertexRunner.run(conf, graph); + + Map<Long, Double> distances = parseDistances(results); + + // verify results + assertNotNull(distances); + assertEquals(4, distances.size()); + assertEquals(0.0, distances.get(1L), 0d); + assertEquals(1.0, distances.get(2L), 0d); + assertEquals(2.0, distances.get(3L), 0d); + assertEquals(4.0, distances.get(4L), 0d); + } + + private Map<Long, Double> parseDistances(Iterable<String> results) { + Map<Long, Double> distances = + Maps.newHashMapWithExpectedSize(Iterables.size(results)); + + Pattern separator = Pattern.compile("[\t]"); + + for (String line : results) { + String[] tokens = separator.split(line); + distances.put(Long.parseLong(tokens[0]), Double.parseDouble(tokens[1])); + } + return distances; + } }
