[
https://issues.apache.org/jira/browse/TINKERPOP3-1015?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Marko A. Rodriguez updated TINKERPOP3-1015:
-------------------------------------------
Description:
I just recently added {{ToyGraphInputRDD}} to test InputRDD stuff against the
full test suite. Check out it works:
{code}
public final class ToyGraphInputRDD implements InputRDD {
public static final String GREMLIN_SPARK_TOY_GRAPH =
"gremlin.spark.toyGraph";
@Override
public JavaPairRDD<Object, VertexWritable> readGraphRDD(final Configuration
configuration, final JavaSparkContext sparkContext) {
final List<Vertex> vertices;
if
(configuration.getProperty(GREMLIN_SPARK_TOY_GRAPH).equals(LoadGraphWith.GraphData.MODERN.toString()))
vertices =
IteratorUtils.list(TinkerFactory.createModern().vertices());
else if
(configuration.getProperty(GREMLIN_SPARK_TOY_GRAPH).equals(LoadGraphWith.GraphData.CLASSIC.toString()))
vertices =
IteratorUtils.list(TinkerFactory.createClassic().vertices());
else if
(configuration.getProperty(GREMLIN_SPARK_TOY_GRAPH).equals(LoadGraphWith.GraphData.CREW.toString()))
vertices =
IteratorUtils.list(TinkerFactory.createTheCrew().vertices());
else if
(configuration.getProperty(GREMLIN_SPARK_TOY_GRAPH).equals(LoadGraphWith.GraphData.GRATEFUL.toString()))
{
try {
final Graph graph = TinkerGraph.open();
graph.io(GryoIo.build()).readGraph(GryoResourceAccess.class.getResource("grateful-dead.kryo").getFile());
vertices = IteratorUtils.list(graph.vertices());
} catch (final IOException e) {
throw new IllegalStateException(e.getMessage(), e);
}
} else
throw new IllegalArgumentException("No legal toy graph was provided
to load: " + configuration.getProperty(GREMLIN_SPARK_TOY_GRAPH));
return
sparkContext.parallelize(vertices.stream().map(VertexWritable::new).collect(Collectors.toList())).mapToPair(vertex
-> new Tuple2<>(vertex.get().id(), vertex));
}
}
{code}
In principle, we could have a {{DefaultInputRDD}} and {{DefaultInputFormat}}
that do this:
{code}
public final class DefaultInputRDD implements InputRDD {
@Override
public JavaPairRDD<Object, VertexWritable> readGraphRDD(final Configuration
configuration, final JavaSparkContext sparkContext) {
Graph graph = GraphFactory.open(configuration);
return
sparkContext.parallelize(graph.vertices().stream().map(VertexWritable::new).collect(Collectors.toList())).mapToPair(vertex
-> new Tuple2<>(vertex.get().id(), vertex));
}
}
{code}
It would be a serial/single-threaded load, but it would allow any OLTP graph
system to use Spark/Giraph/etc.
was:
I just recently added {{ToyGraphInputRDD}} to test InputRDD stuff against the
full test suite. Check out it works:
{code}
public final class ToyGraphInputRDD implements InputRDD {
public static final String GREMLIN_SPARK_TOY_GRAPH =
"gremlin.spark.toyGraph";
@Override
public JavaPairRDD<Object, VertexWritable> readGraphRDD(final Configuration
configuration, final JavaSparkContext sparkContext) {
final List<Vertex> vertices;
if
(configuration.getProperty(GREMLIN_SPARK_TOY_GRAPH).equals(LoadGraphWith.GraphData.MODERN.toString()))
vertices =
IteratorUtils.list(TinkerFactory.createModern().vertices());
else if
(configuration.getProperty(GREMLIN_SPARK_TOY_GRAPH).equals(LoadGraphWith.GraphData.CLASSIC.toString()))
vertices =
IteratorUtils.list(TinkerFactory.createClassic().vertices());
else if
(configuration.getProperty(GREMLIN_SPARK_TOY_GRAPH).equals(LoadGraphWith.GraphData.CREW.toString()))
vertices =
IteratorUtils.list(TinkerFactory.createTheCrew().vertices());
else if
(configuration.getProperty(GREMLIN_SPARK_TOY_GRAPH).equals(LoadGraphWith.GraphData.GRATEFUL.toString()))
{
try {
final Graph graph = TinkerGraph.open();
graph.io(GryoIo.build()).readGraph(GryoResourceAccess.class.getResource("grateful-dead.kryo").getFile());
vertices = IteratorUtils.list(graph.vertices());
} catch (final IOException e) {
throw new IllegalStateException(e.getMessage(), e);
}
} else
throw new IllegalArgumentException("No legal toy graph was provided
to load: " + configuration.getProperty(GREMLIN_SPARK_TOY_GRAPH));
return
sparkContext.parallelize(vertices.stream().map(VertexWritable::new).collect(Collectors.toList())).mapToPair(vertex
-> new Tuple2<>(vertex.get().id(), vertex));
}
}
{code}
In principle, we could have a {{DefaultInputRDD}} and {{DefaultInputFormat}}
that do this:
{code}
public final class ToyGraphInputRDD implements InputRDD {
public static final String GREMLIN_SPARK_TOY_GRAPH =
"gremlin.spark.toyGraph";
@Override
public JavaPairRDD<Object, VertexWritable> readGraphRDD(final Configuration
configuration, final JavaSparkContext sparkContext) {
Graph graph = GraphFactory.open(configuration);
return
sparkContext.parallelize(graph.vertices().stream().map(VertexWritable::new).collect(Collectors.toList())).mapToPair(vertex
-> new Tuple2<>(vertex.get().id(), vertex));
}
}
{code}
It would be a serial/single-threaded load, but it would allow any OLTP graph
system to use Spark/Giraph/etc.
> InputRDD and InputFormat to load into HadoopGraph from any Graph System
> -----------------------------------------------------------------------
>
> Key: TINKERPOP3-1015
> URL: https://issues.apache.org/jira/browse/TINKERPOP3-1015
> Project: TinkerPop 3
> Issue Type: Improvement
> Components: hadoop
> Affects Versions: 3.1.0-incubating
> Reporter: Marko A. Rodriguez
>
> I just recently added {{ToyGraphInputRDD}} to test InputRDD stuff against the
> full test suite. Check out it works:
> {code}
> public final class ToyGraphInputRDD implements InputRDD {
> public static final String GREMLIN_SPARK_TOY_GRAPH =
> "gremlin.spark.toyGraph";
> @Override
> public JavaPairRDD<Object, VertexWritable> readGraphRDD(final
> Configuration configuration, final JavaSparkContext sparkContext) {
> final List<Vertex> vertices;
> if
> (configuration.getProperty(GREMLIN_SPARK_TOY_GRAPH).equals(LoadGraphWith.GraphData.MODERN.toString()))
> vertices =
> IteratorUtils.list(TinkerFactory.createModern().vertices());
> else if
> (configuration.getProperty(GREMLIN_SPARK_TOY_GRAPH).equals(LoadGraphWith.GraphData.CLASSIC.toString()))
> vertices =
> IteratorUtils.list(TinkerFactory.createClassic().vertices());
> else if
> (configuration.getProperty(GREMLIN_SPARK_TOY_GRAPH).equals(LoadGraphWith.GraphData.CREW.toString()))
> vertices =
> IteratorUtils.list(TinkerFactory.createTheCrew().vertices());
> else if
> (configuration.getProperty(GREMLIN_SPARK_TOY_GRAPH).equals(LoadGraphWith.GraphData.GRATEFUL.toString()))
> {
> try {
> final Graph graph = TinkerGraph.open();
>
> graph.io(GryoIo.build()).readGraph(GryoResourceAccess.class.getResource("grateful-dead.kryo").getFile());
> vertices = IteratorUtils.list(graph.vertices());
> } catch (final IOException e) {
> throw new IllegalStateException(e.getMessage(), e);
> }
> } else
> throw new IllegalArgumentException("No legal toy graph was
> provided to load: " + configuration.getProperty(GREMLIN_SPARK_TOY_GRAPH));
> return
> sparkContext.parallelize(vertices.stream().map(VertexWritable::new).collect(Collectors.toList())).mapToPair(vertex
> -> new Tuple2<>(vertex.get().id(), vertex));
> }
> }
> {code}
> In principle, we could have a {{DefaultInputRDD}} and {{DefaultInputFormat}}
> that do this:
> {code}
> public final class DefaultInputRDD implements InputRDD {
> @Override
> public JavaPairRDD<Object, VertexWritable> readGraphRDD(final
> Configuration configuration, final JavaSparkContext sparkContext) {
> Graph graph = GraphFactory.open(configuration);
> return
> sparkContext.parallelize(graph.vertices().stream().map(VertexWritable::new).collect(Collectors.toList())).mapToPair(vertex
> -> new Tuple2<>(vertex.get().id(), vertex));
> }
> }
> {code}
> It would be a serial/single-threaded load, but it would allow any OLTP graph
> system to use Spark/Giraph/etc.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)