Repository: giraph Updated Branches: refs/heads/trunk 819d6d38d -> ad27a2914
http://git-wip-us.apache.org/repos/asf/giraph/blob/ad27a291/giraph-hive/src/test/java/org/apache/giraph/hive/output/HiveOutputTest.java ---------------------------------------------------------------------- diff --git a/giraph-hive/src/test/java/org/apache/giraph/hive/output/HiveOutputTest.java b/giraph-hive/src/test/java/org/apache/giraph/hive/output/HiveOutputTest.java deleted file mode 100644 index 39390f1..0000000 --- a/giraph-hive/src/test/java/org/apache/giraph/hive/output/HiveOutputTest.java +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.giraph.hive.output; - -import org.apache.giraph.conf.GiraphConfiguration; -import org.apache.giraph.conf.GiraphConstants; -import org.apache.giraph.edge.ByteArrayEdges; -import org.apache.giraph.hive.GiraphHiveTestBase; -import org.apache.giraph.hive.Helpers; -import org.apache.giraph.hive.common.GiraphHiveConstants; -import org.apache.giraph.hive.computations.ComputationCountEdges; -import org.apache.giraph.hive.output.examples.HiveOutputIntIntVertex; -import org.apache.giraph.io.formats.IntNullTextEdgeInputFormat; -import org.apache.giraph.utils.InternalVertexRunner; -import org.apache.thrift.TException; -import org.junit.Before; -import org.junit.Test; - -import com.facebook.hiveio.common.HiveMetastores; -import com.facebook.hiveio.input.HiveInput; -import com.facebook.hiveio.input.HiveInputDescription; -import com.facebook.hiveio.record.HiveReadableRecord; -import com.facebook.hiveio.testing.LocalHiveServer; -import com.google.common.collect.Maps; - -import java.io.IOException; -import java.util.Map; - -import static org.junit.Assert.assertEquals; - -import junit.framework.Assert; - -public class HiveOutputTest extends GiraphHiveTestBase { - private LocalHiveServer hiveServer = new LocalHiveServer("giraph-hive"); - - @Before - public void setUp() throws IOException, TException { - hiveServer.init(); - HiveMetastores.setTestClient(hiveServer.getClient()); - } - - @Test - public void testHiveOutput() throws Exception - { - String tableName = "test1"; - hiveServer.createTable("CREATE TABLE " + tableName + - " (i1 BIGINT, i2 BIGINT) "); - - GiraphConfiguration conf = new GiraphConfiguration(); - runJob(tableName, conf); - - HiveInputDescription inputDesc = new HiveInputDescription(); - inputDesc.getTableDesc().setTableName(tableName); - - verifyRecords(inputDesc); - } - - @Test - public void testHiveOutputWithPartitions() throws Exception - { - String tableName = "test1"; - hiveServer.createTable("CREATE TABLE " + tableName + - " (i1 BIGINT, i2 BIGINT) " + - " PARTITIONED BY (ds STRING) "); - - GiraphConfiguration conf = new GiraphConfiguration(); - GiraphHiveConstants.HIVE_VERTEX_OUTPUT_PARTITION.set(conf, "ds=foobar"); - - runJob(tableName, conf); - - HiveInputDescription inputDesc = new HiveInputDescription(); - inputDesc.getTableDesc().setTableName(tableName); - inputDesc.setPartitionFilter("ds='foobar'"); - - verifyRecords(inputDesc); - } - - @Test - public void testHiveMultithreadedOutput() throws Exception - { - String tableName = "test1"; - hiveServer.createTable("CREATE TABLE " + tableName + - " (i1 BIGINT, i2 BIGINT) "); - - GiraphConfiguration conf = new GiraphConfiguration(); - conf.setVertexOutputFormatThreadSafe(true); - conf.setNumOutputThreads(2); - GiraphConstants.USER_PARTITION_COUNT.set(conf, 4); - runJob(tableName, conf); - - HiveInputDescription inputDesc = new HiveInputDescription(); - inputDesc.getTableDesc().setTableName(tableName); - - verifyRecords(inputDesc); - } - - private void runJob(String tableName, GiraphConfiguration conf) throws Exception { - String[] edges = new String[] { - "1 2", - "2 3", - "2 4", - "4 1" - }; - - GiraphHiveConstants.HIVE_VERTEX_OUTPUT_TABLE.set(conf, tableName); - GiraphHiveConstants.VERTEX_TO_HIVE_CLASS.set(conf, HiveOutputIntIntVertex.class); - - conf.setComputationClass(ComputationCountEdges.class); - conf.setOutEdgesClass(ByteArrayEdges.class); - conf.setEdgeInputFormatClass(IntNullTextEdgeInputFormat.class); - conf.setVertexOutputFormatClass(HiveVertexOutputFormat.class); - InternalVertexRunner.run(conf, null, edges); - - Helpers.commitJob(conf); - } - - private void verifyRecords(HiveInputDescription inputDesc) - throws IOException, InterruptedException - { - Iterable<HiveReadableRecord> records = HiveInput.readTable(inputDesc); - Map<Long, Long> data = Maps.newHashMap(); - - // Records are in an unknown sort order so we grab their values here - for (HiveReadableRecord record : records) { - if (data.put(record.getLong(0), record.getLong(1)) != null) { - Assert.fail("Id " + record.getLong(0) + " appears twice in the output"); - } - } - - assertEquals(3, data.size()); - assertEquals(1L, (long) data.get(1L)); - assertEquals(2L, (long) data.get(2L)); - assertEquals(1L, (long) data.get(4L)); - } -} http://git-wip-us.apache.org/repos/asf/giraph/blob/ad27a291/giraph-hive/src/test/java/org/apache/giraph/hive/output/package-info.java ---------------------------------------------------------------------- diff --git a/giraph-hive/src/test/java/org/apache/giraph/hive/output/package-info.java b/giraph-hive/src/test/java/org/apache/giraph/hive/output/package-info.java deleted file mode 100644 index 35d2c63..0000000 --- a/giraph-hive/src/test/java/org/apache/giraph/hive/output/package-info.java +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * Hive output related tests. - */ -package org.apache.giraph.hive.output; http://git-wip-us.apache.org/repos/asf/giraph/blob/ad27a291/giraph-hive/src/test/resources/org/apache/giraph/jython/count-edges-launcher.py ---------------------------------------------------------------------- diff --git a/giraph-hive/src/test/resources/org/apache/giraph/jython/count-edges-launcher.py b/giraph-hive/src/test/resources/org/apache/giraph/jython/count-edges-launcher.py deleted file mode 100644 index f5cf685..0000000 --- a/giraph-hive/src/test/resources/org/apache/giraph/jython/count-edges-launcher.py +++ /dev/null @@ -1,43 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from org.apache.giraph.combiner import DoubleSumMessageCombiner -from org.apache.giraph.edge import ByteArrayEdges -from org.apache.giraph.jython import JythonJob -from org.apache.hadoop.io import IntWritable -from org.apache.hadoop.io import NullWritable - - -def prepare(job): - job.hive_database = "default" - job.workers = 3 - - job.computation_name = "CountEdges" - - job.vertex_id.type = IntWritable - job.vertex_value.type = IntWritable - job.edge_value.type = NullWritable - job.message_value.type = NullWritable - - edge_input = JythonJob.EdgeInput() - edge_input.table = "count_edges_edge_input" - edge_input.source_id_column = "source_edge_id" - edge_input.target_id_column = "target_edge_id" - job.edge_inputs.add(edge_input) - - job.vertex_output.table = "count_edges_output" - job.vertex_output.id_column = "vertex_id" - job.vertex_output.value_column = "num_edges" http://git-wip-us.apache.org/repos/asf/giraph/blob/ad27a291/giraph-hive/src/test/resources/org/apache/giraph/jython/fake-label-propagation-launcher.py ---------------------------------------------------------------------- diff --git a/giraph-hive/src/test/resources/org/apache/giraph/jython/fake-label-propagation-launcher.py b/giraph-hive/src/test/resources/org/apache/giraph/jython/fake-label-propagation-launcher.py deleted file mode 100644 index 2d1c381..0000000 --- a/giraph-hive/src/test/resources/org/apache/giraph/jython/fake-label-propagation-launcher.py +++ /dev/null @@ -1,52 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from org.apache.hadoop.io import IntWritable -from org.apache.giraph.jython import JythonJob - - -def prepare(job): - job.hive_database = "default" - job.workers = 5 - - job.computation_name = "FakeLabelPropagation" - - job.vertex_id.type = IntWritable - - job.vertex_value.type = FakeLPVertexValue - job.vertex_value.hive_io = FakeLPVertexValueHive - - job.edge_value.type = FakeLPEdgeValue - job.edge_value.hive_reader = FakeLPEdgeReader - - job.message_value.type = "FakeLPMessageValue" - - edge_input = JythonJob.EdgeInput() - edge_input.table = "flp_edges" - edge_input.source_id_column = "source_id" - edge_input.target_id_column = "target_id" - edge_input.value_column = "value" - job.edge_inputs.add(edge_input) - - vertex_input = JythonJob.VertexInput() - vertex_input.table = "flp_vertexes" - vertex_input.id_column = "id" - vertex_input.value_column = "value" - job.vertex_inputs.add(vertex_input) - - job.vertex_output.table = "flp_output" - job.vertex_output.id_column = "id" - job.vertex_output.value_column = "value" http://git-wip-us.apache.org/repos/asf/giraph/blob/ad27a291/giraph-hive/src/test/resources/org/apache/giraph/jython/fake-label-propagation-worker.py ---------------------------------------------------------------------- diff --git a/giraph-hive/src/test/resources/org/apache/giraph/jython/fake-label-propagation-worker.py b/giraph-hive/src/test/resources/org/apache/giraph/jython/fake-label-propagation-worker.py deleted file mode 100644 index 9499a35..0000000 --- a/giraph-hive/src/test/resources/org/apache/giraph/jython/fake-label-propagation-worker.py +++ /dev/null @@ -1,91 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from org.apache.hadoop.io import Writable -from org.apache.giraph.jython import JythonComputation -from org.apache.giraph.hive.jython import JythonHiveIO -from org.apache.giraph.hive.jython import JythonHiveReader - - -# Implements HiveColumnIO to tell Giraph how to read/write from Hive -class FakeLPVertexValue: - def __init__(self): - self.labels = {} - self.dog = 'cat' - - def add(self, message): - for label, weight in message.labels.iteritems(): - if label in self.labels: - self.labels[label] += weight - else: - self.labels[label] = weight - - -# Hive reader/writer for vertexes -class FakeLPVertexValueHive(JythonHiveIO): - def readFromHive(self, vertex_value, column): - vertex_value.labels = column.getMap() - - def writeToHive(self, vertex_value, column): - column.setMap(vertex_value.labels) - - -# Implements Writable to override default Jython serialization which grabs all -# of the data in an object. -# Also implements HiveColumnReadable to read from Hive. -class FakeLPEdgeValue(Writable): - def __init__(self): - self.value = 2.13 - self.foo = "bar" - - def readFields(self, data_input): - self.value = data_input.readFloat() - self.foo = "read_in" - - def write(self, data_output): - data_output.writeFloat(self.value) - self.foo = "wrote_out" - - -# Hive reader for edges -class FakeLPEdgeReader(JythonHiveReader): - def readFromHive(self, edge_value, column): - edge_value.value = column.getFloat() - - -# Doesn't implement anything. Use default Jython serialization. -# No need for I/O with Hive -class FakeLPMessageValue: - def __init__(self): - self.labels = {} - - -# Implements BasicComputation to be a Computation Giraph can use -class FakeLabelPropagation(JythonComputation): - def compute(self, vertex, messages): - if self.superstep == 0: - self.send_msg(vertex) - elif self.superstep < self.conf.getInt("supersteps", 3): - for message in messages: - vertex.value.add(message) - self.send_msg(vertex) - else: - vertex.voteToHalt() - - def send_msg(self, vertex): - msg = FakeLPMessageValue() - msg.labels = vertex.value.labels - self.sendMessageToAllEdges(vertex, msg) http://git-wip-us.apache.org/repos/asf/giraph/blob/ad27a291/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 4198f13..c32dfc7 100644 --- a/pom.xml +++ b/pom.xml @@ -931,7 +931,6 @@ under the License. <module>giraph-accumulo</module> <module>giraph-hbase</module> <module>giraph-hcatalog</module> - <module>giraph-hive</module> <module>giraph-gora</module> <module>giraph-rexster</module> <module>giraph-dist</module> @@ -983,7 +982,6 @@ under the License. <module>giraph-accumulo</module> <module>giraph-hbase</module> <module>giraph-hcatalog</module> - <module>giraph-hive</module> <module>giraph-gora</module> <module>giraph-rexster</module> <module>giraph-dist</module> @@ -1038,7 +1036,6 @@ under the License. <module>giraph-accumulo</module> <module>giraph-hbase</module> <module>giraph-hcatalog</module> - <module>giraph-hive</module> <module>giraph-rexster</module> <module>giraph-dist</module> </modules> @@ -1063,9 +1060,6 @@ under the License. <profile> <id>hadoop_facebook</id> - <modules> - <module>giraph-hive</module> - </modules> <properties> <hadoop.version>0.20.0</hadoop.version> <munge.symbols>HADOOP_NON_SECURE,HADOOP_NON_JOBCONTEXT_IS_INTERFACE,HADOOP_JOB_ID_AVAILABLE,STATIC_SASL_SYMBOL</munge.symbols> @@ -1299,7 +1293,6 @@ under the License. <module>giraph-accumulo</module> <module>giraph-hbase</module> <module>giraph-hcatalog</module> - <module>giraph-hive</module> <module>giraph-gora</module> <module>giraph-rexster</module> <module>giraph-dist</module> @@ -1740,11 +1733,6 @@ under the License. </dependency> <dependency> <groupId>org.apache.giraph</groupId> - <artifactId>giraph-hive</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>org.apache.giraph</groupId> <artifactId>giraph-hbase</artifactId> <version>${project.version}</version> </dependency> http://git-wip-us.apache.org/repos/asf/giraph/blob/ad27a291/src/site/xdoc/javadoc_modules.xml ---------------------------------------------------------------------- diff --git a/src/site/xdoc/javadoc_modules.xml b/src/site/xdoc/javadoc_modules.xml index 7e5a8c3..3176564 100644 --- a/src/site/xdoc/javadoc_modules.xml +++ b/src/site/xdoc/javadoc_modules.xml @@ -31,7 +31,6 @@ under the License. <p>This is a multi-module maven project where the parent is composed of several modules.</p> <ul> <li><strong>giraph-core</strong> contains the core Giraph classes and its javadoc can be found <a href="giraph-core/apidocs/overview-summary.html">here</a>.</li> - <li><strong>giraph-hive</strong> contains the Giraph hive input/output classes and its javadoc can be found <a href="giraph-hive/apidocs/overview-summary.html">here</a>.</li> <li><strong>giraph-examples</strong> contains many toy examples and its javadoc can be found <a href="giraph-examples/apidocs/package-summary.html">here</a>.</li> </ul> </section>
