Added script and configuration file that can be used for OLAP CSV exports.
Project: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/commit/e13f91aa Tree: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/tree/e13f91aa Diff: http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/diff/e13f91aa Branch: refs/heads/TINKERPOP-1298 Commit: e13f91aacddd4cb5722ac3a6e100fdf70b0c33fd Parents: 8ad5b62 Author: Daniel Kuppitz <daniel_kupp...@hotmail.com> Authored: Mon May 23 18:24:22 2016 +0200 Committer: Daniel Kuppitz <daniel_kupp...@hotmail.com> Committed: Tue May 24 19:51:22 2016 +0200 ---------------------------------------------------------------------- data/script-csv-export.groovy | 43 +++++++++++++++ .../conf/hadoop-csv-export.properties | 56 ++++++++++++++++++++ 2 files changed, 99 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/e13f91aa/data/script-csv-export.groovy ---------------------------------------------------------------------- diff --git a/data/script-csv-export.groovy b/data/script-csv-export.groovy new file mode 100644 index 0000000..7a6da22 --- /dev/null +++ b/data/script-csv-export.groovy @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +@Grab(group = 'com.opencsv', module = 'opencsv', version = '3.7') +import com.opencsv.* + +import org.apache.tinkerpop.gremlin.process.computer.bulkdumping.BulkExportVertexProgram + +def stringify(vertex) { + def result = null + def haltedTraversers = vertex.property(TraversalVertexProgram.HALTED_TRAVERSERS) + if (haltedTraversers.isPresent()) { + def properties = vertex.value(BulkExportVertexProgram.BULK_EXPORT_PROPERTIES).split("\1")*.split("\2", 2)*.toList() + def writer = new StringWriter() + def w = new CSVWriter(writer) + haltedTraversers.value().each { def t -> + def values = [] + properties.each { def property, def format -> + def value = t.path(property) + values << (format.isEmpty() ? value.toString() : String.format(format, value)) + } + w.writeNext((String[]) values, false) + } + result = writer.toString().trim() + writer.close() + } + return result +} http://git-wip-us.apache.org/repos/asf/incubator-tinkerpop/blob/e13f91aa/hadoop-gremlin/conf/hadoop-csv-export.properties ---------------------------------------------------------------------- diff --git a/hadoop-gremlin/conf/hadoop-csv-export.properties b/hadoop-gremlin/conf/hadoop-csv-export.properties new file mode 100644 index 0000000..3e1f8da --- /dev/null +++ b/hadoop-gremlin/conf/hadoop-csv-export.properties @@ -0,0 +1,56 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +gremlin.graph=org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph +gremlin.hadoop.graphReader=org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat +gremlin.hadoop.graphWriter=org.apache.tinkerpop.gremlin.hadoop.structure.io.script.ScriptOutputFormat +gremlin.hadoop.jarsInDistributedCache=true +gremlin.hadoop.defaultGraphComputer=org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer + +gremlin.hadoop.inputLocation=output +gremlin.hadoop.scriptOutputFormat.script=script-csv-export.groovy +gremlin.hadoop.outputLocation=export + +#################################### +# SparkGraphComputer Configuration # +#################################### +spark.master=local[4] +spark.executor.memory=1g +spark.serializer=org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoSerializer +# spark.kryo.registrationRequired=true +# spark.storage.memoryFraction=0.2 +# spark.eventLog.enabled=true +# spark.eventLog.dir=/tmp/spark-event-logs +# spark.ui.killEnabled=true + +##################################### +# GiraphGraphComputer Configuration # +##################################### +giraph.minWorkers=2 +giraph.maxWorkers=2 +giraph.useOutOfCoreGraph=true +giraph.useOutOfCoreMessages=true +mapreduce.map.java.opts=-Xmx1024m +mapreduce.reduce.java.opts=-Xmx1024m +giraph.numInputThreads=2 +giraph.numComputeThreads=2 +# giraph.maxPartitionsInMemory=1 +# giraph.userPartitionCount=2 +## MapReduce of GiraphGraphComputer ## +# mapreduce.job.maps=2 +# mapreduce.job.reduces=1 + +