This is an automated email from the ASF dual-hosted git repository. bhavanisudha pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-hudi.git
The following commit(s) were added to refs/heads/master by this push: new e21441a Add changes for presto mor queries (#1578) e21441a is described below commit e21441ad8317f302fed947c414e059a332e4d1ef Author: bschell <bdschel...@gmail.com> AuthorDate: Mon May 4 11:27:14 2020 -0700 Add changes for presto mor queries (#1578) Adds the neccessary changes to hudi for support of presto querying hudi merge-on-read table's realtime view. Co-authored-by: Brandon Scheller <bsche...@amazon.com> --- .../hadoop/UseRecordReaderFromInputFormat.java | 38 ++++++++++++++++++++++ .../realtime/HoodieParquetRealtimeInputFormat.java | 2 ++ .../org/apache/hudi/hadoop/TestAnnotation.java | 23 +++++++++++-- packaging/hudi-presto-bundle/pom.xml | 18 ++++++++++ 4 files changed, 79 insertions(+), 2 deletions(-) diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/UseRecordReaderFromInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/UseRecordReaderFromInputFormat.java new file mode 100644 index 0000000..fe87323 --- /dev/null +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/UseRecordReaderFromInputFormat.java @@ -0,0 +1,38 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hudi.hadoop; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Inherited; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** +* When annotated on a InputFormat, informs the query engines, that they should use the RecordReader provided by the input +* format to execute the queries. +*/ +@Inherited +@Documented +@Target(ElementType.TYPE) +@Retention(RetentionPolicy.RUNTIME) +public @interface UseRecordReaderFromInputFormat { + +} \ No newline at end of file diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java index ce86807..ae3fb5c 100644 --- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java +++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java @@ -47,6 +47,7 @@ import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; +import org.apache.hudi.hadoop.UseRecordReaderFromInputFormat; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; @@ -63,6 +64,7 @@ import java.util.stream.Stream; /** * Input Format, that provides a real-time view of data in a Hoodie table. */ +@UseRecordReaderFromInputFormat @UseFileSplitsFromInputFormat public class HoodieParquetRealtimeInputFormat extends HoodieParquetInputFormat implements Configurable { diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestAnnotation.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestAnnotation.java index 15d0a6c..1f74c7a 100644 --- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestAnnotation.java +++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestAnnotation.java @@ -19,7 +19,7 @@ package org.apache.hudi.hadoop; import org.junit.jupiter.api.Test; - +import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat; import java.lang.annotation.Annotation; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -27,7 +27,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; public class TestAnnotation { @Test - public void testAnnotation() { + public void testHoodieParquetInputFormatAnnotation() { assertTrue(HoodieParquetInputFormat.class.isAnnotationPresent(UseFileSplitsFromInputFormat.class)); Annotation[] annotations = HoodieParquetInputFormat.class.getAnnotations(); boolean found = false; @@ -38,4 +38,23 @@ public class TestAnnotation { } assertTrue(found); } + + @Test + public void testHoodieParquetRealtimeInputFormatAnnotations() { + assertTrue(HoodieParquetRealtimeInputFormat.class.isAnnotationPresent(UseFileSplitsFromInputFormat.class)); + assertTrue(HoodieParquetRealtimeInputFormat.class.isAnnotationPresent(UseRecordReaderFromInputFormat.class)); + Annotation[] annotations = HoodieParquetRealtimeInputFormat.class.getAnnotations(); + boolean foundFileSplitsAnnotation = false; + boolean foundRecordReaderAnnotation = false; + for (Annotation annotation : annotations) { + if ("UseFileSplitsFromInputFormat".equals(annotation.annotationType().getSimpleName())) { + foundFileSplitsAnnotation = true; + } + if ("UseRecordReaderFromInputFormat".equals(annotation.annotationType().getSimpleName())) { + foundRecordReaderAnnotation = true; + } + } + assertTrue(foundFileSplitsAnnotation); + assertTrue(foundRecordReaderAnnotation); + } } diff --git a/packaging/hudi-presto-bundle/pom.xml b/packaging/hudi-presto-bundle/pom.xml index cccde22..c51c22b 100644 --- a/packaging/hudi-presto-bundle/pom.xml +++ b/packaging/hudi-presto-bundle/pom.xml @@ -68,6 +68,7 @@ <include>org.apache.hudi:hudi-hadoop-mr</include> <include>org.apache.parquet:parquet-avro</include> + <include>org.apache.avro:avro</include> <include>com.esotericsoftware:kryo-shaded</include> <include>org.objenesis:objenesis</include> <include>com.esotericsoftware:minlog</include> @@ -76,6 +77,10 @@ <relocations> <relocation> + <pattern>org.apache.avro.</pattern> + <shadedPattern>org.apache.hudi.org.apache.avro.</shadedPattern> + </relocation> + <relocation> <pattern>com.esotericsoftware.kryo.</pattern> <shadedPattern>org.apache.hudi.com.esotericsoftware.kryo.</shadedPattern> </relocation> @@ -128,5 +133,18 @@ <artifactId>hudi-hadoop-mr-bundle</artifactId> <version>${project.version}</version> </dependency> + + <!-- Parquet --> + <dependency> + <groupId>org.apache.parquet</groupId> + <artifactId>parquet-avro</artifactId> + <scope>compile</scope> + </dependency> + + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + <scope>compile</scope> + </dependency> </dependencies> </project>