Repository: hive Updated Branches: refs/heads/master 6adab1c2a -> 24e16cc57
HIVE-19899: Support stored as JsonFile (Aihua Xu, reviewed by Yongzhi Chen, BELUGA BEHR) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/24e16cc5 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/24e16cc5 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/24e16cc5 Branch: refs/heads/master Commit: 24e16cc57293ea6771cd55009f8cfd29870a39ee Parents: 6adab1c Author: Aihua Xu <aihu...@apache.org> Authored: Thu Jun 14 13:35:49 2018 -0700 Committer: Aihua Xu <aihu...@apache.org> Committed: Thu Jun 21 14:36:07 2018 -0700 ---------------------------------------------------------------------- .../hcatalog/pig/AbstractHCatStorerTest.java | 2 +- .../pig/TestHCatLoaderComplexSchema.java | 3 ++ .../hive/hcatalog/pig/TestHCatStorer.java | 4 +- .../apache/hadoop/hive/ql/io/IOConstants.java | 1 + .../ql/io/JsonFileStorageFormatDescriptor.java | 51 ++++++++++++++++++++ ...he.hadoop.hive.ql.io.StorageFormatDescriptor | 1 + .../hive/ql/io/TestStorageFormatDescriptor.java | 3 ++ .../test/queries/clientpositive/json_serde1.q | 9 ++-- .../results/clientpositive/json_serde1.q.out | 44 ++++++++++++++++- 9 files changed, 109 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java ---------------------------------------------------------------------- diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java index 97277b5..a5cf3a5 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/AbstractHCatStorerTest.java @@ -54,7 +54,7 @@ import org.slf4j.LoggerFactory; public abstract class AbstractHCatStorerTest extends HCatBaseTest { static Logger LOG = LoggerFactory.getLogger(AbstractHCatStorerTest.class); static final String INPUT_FILE_NAME = TEST_DATA_DIR + "/input.data"; - String storageFormat; + protected String storageFormat; public AbstractHCatStorerTest() { storageFormat = getStorageFormat(); http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java ---------------------------------------------------------------------- diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java index 8f06d39..37e670c 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatLoaderComplexSchema.java @@ -75,6 +75,9 @@ public class TestHCatLoaderComplexSchema { put(IOConstants.PARQUETFILE, new HashSet<String>() {{ add("testMapNullKey"); }}); + put(IOConstants.JSONFILE, new HashSet<String>() {{ + add("testMapNullKey"); + }}); }}; private String storageFormat; http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java ---------------------------------------------------------------------- diff --git a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java index 477ea66..cb02139 100644 --- a/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java +++ b/hcatalog/hcatalog-pig-adapter/src/test/java/org/apache/hive/hcatalog/pig/TestHCatStorer.java @@ -86,8 +86,6 @@ public class TestHCatStorer extends AbstractHCatStorerTest { } }; - private String storageFormat; - @Parameterized.Parameters public static Collection<Object[]> generateParameters() { return StorageFormats.names(); @@ -99,7 +97,7 @@ public class TestHCatStorer extends AbstractHCatStorerTest { @Override String getStorageFormat() { - return null; + return this.storageFormat; } @Test http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java b/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java index f60d296..2be864e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/IOConstants.java @@ -35,6 +35,7 @@ public final class IOConstants { public static final String PARQUETFILE = "PARQUETFILE"; public static final String AVRO = "AVRO"; public static final String AVROFILE = "AVROFILE"; + public static final String JSONFILE = "JSONFILE"; /** * The desired TABLE column names and types for input format schema evolution. http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/java/org/apache/hadoop/hive/ql/io/JsonFileStorageFormatDescriptor.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/JsonFileStorageFormatDescriptor.java b/ql/src/java/org/apache/hadoop/hive/ql/io/JsonFileStorageFormatDescriptor.java new file mode 100644 index 0000000..00c6178 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/JsonFileStorageFormatDescriptor.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io; + +import java.util.Set; + +import org.apache.hadoop.hive.serde2.JsonSerDe; + +import com.google.common.collect.ImmutableSet; + +/** + * A storage format descriptor class to support "STORED AS JSONFILE" syntax. + * + */ +public class JsonFileStorageFormatDescriptor extends AbstractStorageFormatDescriptor { + @Override + public Set<String> getNames() { + return ImmutableSet.of(IOConstants.JSONFILE); + } + + @Override + public String getInputFormat() { + return IOConstants.TEXTFILE_INPUT; + } + + @Override + public String getOutputFormat() { + return IOConstants.TEXTFILE_OUTPUT; + } + + @Override + public String getSerde() { + return JsonSerDe.class.getName(); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/main/resources/META-INF/services/org.apache.hadoop.hive.ql.io.StorageFormatDescriptor ---------------------------------------------------------------------- diff --git a/ql/src/main/resources/META-INF/services/org.apache.hadoop.hive.ql.io.StorageFormatDescriptor b/ql/src/main/resources/META-INF/services/org.apache.hadoop.hive.ql.io.StorageFormatDescriptor index d858a95..c28a302 100644 --- a/ql/src/main/resources/META-INF/services/org.apache.hadoop.hive.ql.io.StorageFormatDescriptor +++ b/ql/src/main/resources/META-INF/services/org.apache.hadoop.hive.ql.io.StorageFormatDescriptor @@ -4,3 +4,4 @@ org.apache.hadoop.hive.ql.io.RCFileStorageFormatDescriptor org.apache.hadoop.hive.ql.io.ORCFileStorageFormatDescriptor org.apache.hadoop.hive.ql.io.ParquetFileStorageFormatDescriptor org.apache.hadoop.hive.ql.io.AvroStorageFormatDescriptor +org.apache.hadoop.hive.ql.io.JsonFileStorageFormatDescriptor \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/test/org/apache/hadoop/hive/ql/io/TestStorageFormatDescriptor.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/TestStorageFormatDescriptor.java b/ql/src/test/org/apache/hadoop/hive/ql/io/TestStorageFormatDescriptor.java index 72acaad..86d3703 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/TestStorageFormatDescriptor.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/TestStorageFormatDescriptor.java @@ -40,5 +40,8 @@ public class TestStorageFormatDescriptor { (new ParquetFileStorageFormatDescriptor()).getNames()); Assert.assertEquals(Sets.newHashSet(IOConstants.AVRO, IOConstants.AVROFILE), (new AvroStorageFormatDescriptor()).getNames()); + Assert.assertEquals(Sets.newHashSet(IOConstants.JSONFILE), + (new JsonFileStorageFormatDescriptor()).getNames()); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/test/queries/clientpositive/json_serde1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/json_serde1.q b/ql/src/test/queries/clientpositive/json_serde1.q index b805925..fcbf1c0 100644 --- a/ql/src/test/queries/clientpositive/json_serde1.q +++ b/ql/src/test/queries/clientpositive/json_serde1.q @@ -1,9 +1,8 @@ --! qt:dataset:src -add jar ${system:maven.local.repository}/org/apache/hive/hcatalog/hive-hcatalog-core/${system:hive.version}/hive-hcatalog-core-${system:hive.version}.jar; - drop table if exists json_serde1_1; drop table if exists json_serde1_2; +drop table if exists json_serde1_3; create table json_serde1_1 (a array<string>,b map<string,int>) row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'; @@ -17,7 +16,7 @@ create table json_serde1_2 ( a array<int>, b map<int,date>, c struct<c1:int, c2:string, c3:array<string>, c4:map<string, int>, c5:struct<c5_1:string, c5_2:int>> -) row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'; +) row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'; insert into table json_serde1_2 select @@ -33,5 +32,9 @@ insert into table json_serde1_2 select * from json_serde1_2; +create table json_serde1_3 (c1 int, c2 string) stored as jsonfile; +show create table json_serde1_3; + drop table json_serde1_1; drop table json_serde1_2; +drop table json_serde1_3; http://git-wip-us.apache.org/repos/asf/hive/blob/24e16cc5/ql/src/test/results/clientpositive/json_serde1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/json_serde1.q.out b/ql/src/test/results/clientpositive/json_serde1.q.out index e14d674..341a494 100644 --- a/ql/src/test/results/clientpositive/json_serde1.q.out +++ b/ql/src/test/results/clientpositive/json_serde1.q.out @@ -6,6 +6,10 @@ PREHOOK: query: drop table if exists json_serde1_2 PREHOOK: type: DROPTABLE POSTHOOK: query: drop table if exists json_serde1_2 POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists json_serde1_3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists json_serde1_3 +POSTHOOK: type: DROPTABLE PREHOOK: query: create table json_serde1_1 (a array<string>,b map<string,int>) row format serde 'org.apache.hive.hcatalog.data.JsonSerDe' PREHOOK: type: CREATETABLE @@ -42,7 +46,7 @@ PREHOOK: query: create table json_serde1_2 ( a array<int>, b map<int,date>, c struct<c1:int, c2:string, c3:array<string>, c4:map<string, int>, c5:struct<c5_1:string, c5_2:int>> -) row format serde 'org.apache.hive.hcatalog.data.JsonSerDe' +) row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe' PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@json_serde1_2 @@ -50,7 +54,7 @@ POSTHOOK: query: create table json_serde1_2 ( a array<int>, b map<int,date>, c struct<c1:int, c2:string, c3:array<string>, c4:map<string, int>, c5:struct<c5_1:string, c5_2:int>> -) row format serde 'org.apache.hive.hcatalog.data.JsonSerDe' +) row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe' POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@json_serde1_2 @@ -95,6 +99,34 @@ POSTHOOK: Input: default@json_serde1_2 #### A masked pattern was here #### [3,2,1] {1:"2001-01-01",2:null} {"c1":123456,"c2":"hello","c3":["aa","bb","cc"],"c4":{"abc":123,"xyz":456},"c5":{"c5_1":"bye","c5_2":88}} [3,2,1] {1:"2001-01-01",2:null} {"c1":123456,"c2":"hello","c3":["aa","bb","cc"],"c4":{"abc":123,"xyz":456},"c5":{"c5_1":"bye","c5_2":88}} +PREHOOK: query: create table json_serde1_3 (c1 int, c2 string) stored as jsonfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@json_serde1_3 +POSTHOOK: query: create table json_serde1_3 (c1 int, c2 string) stored as jsonfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@json_serde1_3 +PREHOOK: query: show create table json_serde1_3 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: default@json_serde1_3 +POSTHOOK: query: show create table json_serde1_3 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: default@json_serde1_3 +CREATE TABLE `json_serde1_3`( + `c1` int COMMENT 'from deserializer', + `c2` string COMMENT 'from deserializer') +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.JsonSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION +#### A masked pattern was here #### +TBLPROPERTIES ( + 'bucketing_version'='2', +#### A masked pattern was here #### PREHOOK: query: drop table json_serde1_1 PREHOOK: type: DROPTABLE PREHOOK: Input: default@json_serde1_1 @@ -111,3 +143,11 @@ POSTHOOK: query: drop table json_serde1_2 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@json_serde1_2 POSTHOOK: Output: default@json_serde1_2 +PREHOOK: query: drop table json_serde1_3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@json_serde1_3 +PREHOOK: Output: default@json_serde1_3 +POSTHOOK: query: drop table json_serde1_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@json_serde1_3 +POSTHOOK: Output: default@json_serde1_3