Updated Branches:
  refs/heads/trunk c9a368144 -> a3a6e9a8f

GIRAPH-844: TextInputFormat for SimpleShortestPaths


Project: http://git-wip-us.apache.org/repos/asf/giraph/repo
Commit: http://git-wip-us.apache.org/repos/asf/giraph/commit/a3a6e9a8
Tree: http://git-wip-us.apache.org/repos/asf/giraph/tree/a3a6e9a8
Diff: http://git-wip-us.apache.org/repos/asf/giraph/diff/a3a6e9a8

Branch: refs/heads/trunk
Commit: a3a6e9a8fff39ea32e2ceeda8bd2b85d9df660ad
Parents: c9a3681
Author: ssc <ssc@krimskrams.(none)>
Authored: Mon Feb 10 14:32:41 2014 +0100
Committer: ssc <ssc@krimskrams.(none)>
Committed: Mon Feb 10 22:40:18 2014 +0100

----------------------------------------------------------------------
 CHANGELOG                                       |   3 +
 .../LongDoubleDoubleTextInputFormat.java        |   1 +
 .../LongDoubleFloatTextInputFormat.java         | 110 +++++++++++++++++++
 .../SimpleShortestPathsComputationTest.java     |  67 +++++++++--
 4 files changed, 173 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/giraph/blob/a3a6e9a8/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 7dc93d0..feb9193 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,9 @@
 Giraph Change Log
 
 Release 1.1.0 - unreleased
+
+  GIRAPH-844: TextInputFormat for SimpleShortestPaths (ssc)
+
   GIRAPH-841: fix missing munge symbols error for non-munge-using profiles 
(Eugene Koontz via rvs)
 
   GIRAPH-794: add support for generic hadoop1 and hadoop2 profiles (rvs)

http://git-wip-us.apache.org/repos/asf/giraph/blob/a3a6e9a8/giraph-examples/src/main/java/org/apache/giraph/examples/LongDoubleDoubleTextInputFormat.java
----------------------------------------------------------------------
diff --git 
a/giraph-examples/src/main/java/org/apache/giraph/examples/LongDoubleDoubleTextInputFormat.java
 
b/giraph-examples/src/main/java/org/apache/giraph/examples/LongDoubleDoubleTextInputFormat.java
index 18cc8bc..d943df2 100644
--- 
a/giraph-examples/src/main/java/org/apache/giraph/examples/LongDoubleDoubleTextInputFormat.java
+++ 
b/giraph-examples/src/main/java/org/apache/giraph/examples/LongDoubleDoubleTextInputFormat.java
@@ -45,6 +45,7 @@ public class LongDoubleDoubleTextInputFormat
     DoubleWritable>
     implements ImmutableClassesGiraphConfigurable<LongWritable, DoubleWritable,
     DoubleWritable> {
+
   /** Configuration. */
   private ImmutableClassesGiraphConfiguration<LongWritable, DoubleWritable,
       DoubleWritable> conf;

http://git-wip-us.apache.org/repos/asf/giraph/blob/a3a6e9a8/giraph-examples/src/main/java/org/apache/giraph/examples/LongDoubleFloatTextInputFormat.java
----------------------------------------------------------------------
diff --git 
a/giraph-examples/src/main/java/org/apache/giraph/examples/LongDoubleFloatTextInputFormat.java
 
b/giraph-examples/src/main/java/org/apache/giraph/examples/LongDoubleFloatTextInputFormat.java
new file mode 100644
index 0000000..c7a2e07
--- /dev/null
+++ 
b/giraph-examples/src/main/java/org/apache/giraph/examples/LongDoubleFloatTextInputFormat.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.giraph.examples;
+
+import com.google.common.collect.Lists;
+import org.apache.giraph.conf.ImmutableClassesGiraphConfigurable;
+import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration;
+import org.apache.giraph.edge.Edge;
+import org.apache.giraph.edge.EdgeFactory;
+import org.apache.giraph.graph.Vertex;
+import org.apache.giraph.io.formats.TextVertexInputFormat;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.regex.Pattern;
+
+/**
+ * Simple text-based {@link org.apache.giraph.io.VertexInputFormat} for
+ * weighted graphs with long ids. Each line consists of: vertex 
neighbor1:weight
+ * neighbor2:weight ...
+ */
+public class LongDoubleFloatTextInputFormat
+    extends TextVertexInputFormat<LongWritable, DoubleWritable, FloatWritable>
+    implements ImmutableClassesGiraphConfigurable<LongWritable, DoubleWritable,
+    FloatWritable> {
+  /** Configuration. */
+  private ImmutableClassesGiraphConfiguration<LongWritable, DoubleWritable,
+      FloatWritable> conf;
+
+  @Override
+  public TextVertexReader createVertexReader(InputSplit split,
+      TaskAttemptContext context) throws IOException {
+    return new LongDoubleFloatVertexReader();
+  }
+
+  @Override
+  public void setConf(ImmutableClassesGiraphConfiguration<LongWritable,
+      DoubleWritable, FloatWritable> configuration) {
+    this.conf = configuration;
+  }
+
+  @Override
+  public ImmutableClassesGiraphConfiguration<LongWritable, DoubleWritable,
+      FloatWritable> getConf() {
+    return conf;
+  }
+
+  /**
+   * Vertex reader associated with
+   * {@link LongDoubleDoubleTextInputFormat}.
+   */
+  public class LongDoubleFloatVertexReader extends
+      TextVertexInputFormat<LongWritable, DoubleWritable,
+          FloatWritable>.TextVertexReader {
+    /** Separator of the vertex and neighbors */
+    private final Pattern neighborSeparator = Pattern.compile("[\t ]");
+    /** Separator of a neighbor and its weight */
+    private final Pattern weightSeparator = Pattern.compile("[:]");
+
+    @Override
+    public Vertex<LongWritable, DoubleWritable, FloatWritable>
+    getCurrentVertex() throws IOException, InterruptedException {
+      Vertex<LongWritable, DoubleWritable, FloatWritable>
+          vertex = conf.createVertex();
+
+      String[] tokens = neighborSeparator.split(getRecordReader()
+          .getCurrentValue().toString());
+      List<Edge<LongWritable, FloatWritable>> edges =
+          Lists.newArrayListWithCapacity(tokens.length - 1);
+
+      for (int n = 1; n < tokens.length; n++) {
+        String[] parts = weightSeparator.split(tokens[n]);
+        edges.add(EdgeFactory.create(
+            new LongWritable(Long.parseLong(parts[0])),
+            new FloatWritable(Float.parseFloat(parts[1]))));
+      }
+
+      LongWritable vertexId = new LongWritable(Long.parseLong(tokens[0]));
+      vertex.initialize(vertexId, new DoubleWritable(), edges);
+
+      return vertex;
+    }
+
+    @Override
+    public boolean nextVertex() throws IOException, InterruptedException {
+      return getRecordReader().nextKeyValue();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/giraph/blob/a3a6e9a8/giraph-examples/src/test/java/org/apache/giraph/examples/SimpleShortestPathsComputationTest.java
----------------------------------------------------------------------
diff --git 
a/giraph-examples/src/test/java/org/apache/giraph/examples/SimpleShortestPathsComputationTest.java
 
b/giraph-examples/src/test/java/org/apache/giraph/examples/SimpleShortestPathsComputationTest.java
index 6a74c21..dc6c84a 100644
--- 
a/giraph-examples/src/test/java/org/apache/giraph/examples/SimpleShortestPathsComputationTest.java
+++ 
b/giraph-examples/src/test/java/org/apache/giraph/examples/SimpleShortestPathsComputationTest.java
@@ -40,6 +40,7 @@ import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 
 import java.util.Map;
+import java.util.regex.Pattern;
 
 import static 
org.apache.giraph.examples.SimpleShortestPathsComputation.SOURCE_ID;
 import static org.junit.Assert.assertEquals;
@@ -114,7 +115,7 @@ public class SimpleShortestPathsComputationTest {
    * A local integration test on toy data
    */
   @Test
-  public void testToyData() throws Exception {
+  public void testToyDataJson() throws Exception {
 
     // a small four vertex graph
     String[] graph = new String[] {
@@ -137,18 +138,18 @@ public class SimpleShortestPathsComputationTest {
     // run internally
     Iterable<String> results = InternalVertexRunner.run(conf, graph);
 
-    Map<Long, Double> distances = parseDistances(results);
+    Map<Long, Double> distances = parseDistancesJson(results);
 
     // verify results
     assertNotNull(distances);
-    assertEquals(4, (int) distances.size());
-    assertEquals(0.0, (double) distances.get(1L), 0d);
-    assertEquals(1.0, (double) distances.get(2L), 0d);
-    assertEquals(2.0, (double) distances.get(3L), 0d);
-    assertEquals(4.0, (double) distances.get(4L), 0d);
+    assertEquals(4, distances.size());
+    assertEquals(0.0, distances.get(1L), 0d);
+    assertEquals(1.0, distances.get(2L), 0d);
+    assertEquals(2.0, distances.get(3L), 0d);
+    assertEquals(4.0, distances.get(4L), 0d);
   }
 
-  private Map<Long, Double> parseDistances(Iterable<String> results) {
+  private Map<Long, Double> parseDistancesJson(Iterable<String> results) {
     Map<Long, Double> distances =
         Maps.newHashMapWithExpectedSize(Iterables.size(results));
     for (String line : results) {
@@ -162,4 +163,54 @@ public class SimpleShortestPathsComputationTest {
     }
     return distances;
   }
+
+  /**
+   * A local integration test on toy data
+   */
+  @Test
+  public void testToyData() throws Exception {
+
+    // a small four vertex graph
+    String[] graph = new String[] {
+        "1 2:1.0 3:3.0",
+        "2 3:1.0 4:10.0",
+        "3 4:2.0",
+        "4"
+    };
+
+    GiraphConfiguration conf = new GiraphConfiguration();
+    // start from vertex 1
+    SOURCE_ID.set(conf, 1);
+    conf.setComputationClass(SimpleShortestPathsComputation.class);
+    conf.setOutEdgesClass(ByteArrayEdges.class);
+    conf.setVertexInputFormatClass(LongDoubleFloatTextInputFormat.class);
+    conf.setVertexOutputFormatClass(
+        VertexWithDoubleValueNullEdgeTextOutputFormat.class);
+
+    // run internally
+    Iterable<String> results = InternalVertexRunner.run(conf, graph);
+
+    Map<Long, Double> distances = parseDistances(results);
+
+    // verify results
+    assertNotNull(distances);
+    assertEquals(4, distances.size());
+    assertEquals(0.0, distances.get(1L), 0d);
+    assertEquals(1.0, distances.get(2L), 0d);
+    assertEquals(2.0, distances.get(3L), 0d);
+    assertEquals(4.0, distances.get(4L), 0d);
+  }
+
+  private Map<Long, Double> parseDistances(Iterable<String> results) {
+    Map<Long, Double> distances =
+        Maps.newHashMapWithExpectedSize(Iterables.size(results));
+
+    Pattern separator = Pattern.compile("[\t]");
+
+    for (String line : results) {
+      String[] tokens = separator.split(line);
+      distances.put(Long.parseLong(tokens[0]), Double.parseDouble(tokens[1]));
+    }
+    return distances;
+  }
 }

Reply via email to