[flink] branch master updated: [FLINK-13012][hive] Handle default partition name of Hive table

jark Mon, 29 Jul 2019 00:11:45 -0700

This is an automated email from the ASF dual-hosted git repository.

jark pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git



The following commit(s) were added to refs/heads/master by this push:
     new 1d81374  [FLINK-13012][hive] Handle default partition name of Hive 
table
1d81374 is described below

commit 1d81374ce344e7bde410792c8a2efd6f4e707b9f
Author: Rui Li <li...@apache.org>
AuthorDate: Thu Jul 11 21:08:22 2019 +0800

    [FLINK-13012][hive] Handle default partition name of Hive table
    
    This closes #9088
---
 .../connectors/hive/HiveTableOutputFormat.java     | 15 +++-
 .../batch/connectors/hive/HiveTableSource.java     | 11 ++-
 .../connectors/hive/TableEnvHiveConnectorTest.java | 88 ++++++++++++++++++++++
 3 files changed, 109 insertions(+), 5 deletions(-)

diff --git 
a/flink-connectors/flink-connector-hive/src/main/java/org/apache/flink/batch/connectors/hive/HiveTableOutputFormat.java
 
b/flink-connectors/flink-connector-hive/src/main/java/org/apache/flink/batch/connectors/hive/HiveTableOutputFormat.java
index acb1bf9..ade5830 100644
--- 
a/flink-connectors/flink-connector-hive/src/main/java/org/apache/flink/batch/connectors/hive/HiveTableOutputFormat.java
+++ 
b/flink-connectors/flink-connector-hive/src/main/java/org/apache/flink/batch/connectors/hive/HiveTableOutputFormat.java
@@ -141,6 +141,9 @@ public class HiveTableOutputFormat extends 
HadoopOutputFormatCommonBase<Row> imp
        // to convert Flink object to Hive object
        private transient HiveObjectConversion[] hiveConversions;
 
+       // used when partition values is null or empty
+       private transient String defaultPartitionName;
+
        public HiveTableOutputFormat(JobConf jobConf, ObjectPath tablePath, 
CatalogTable table, HiveTablePartition hiveTablePartition,
                                                                Properties 
tableProperties, boolean overwrite) {
                super(jobConf.getCredentials());
@@ -298,6 +301,8 @@ public class HiveTableOutputFormat extends 
HadoopOutputFormatCommonBase<Row> imp
                        rowObjectInspector = 
ObjectInspectorFactory.getStandardStructObjectInspector(
                                
Arrays.asList(rowTypeInfo.getFieldNames()).subList(0, rowTypeInfo.getArity() - 
partitionColumns.size()),
                                objectInspectors);
+                       defaultPartitionName = 
jobConf.get(HiveConf.ConfVars.DEFAULTPARTITIONNAME.varname,
+                                       
HiveConf.ConfVars.DEFAULTPARTITIONNAME.defaultStrVal);
                }
        }
 
@@ -310,10 +315,12 @@ public class HiveTableOutputFormat extends 
HadoopOutputFormatCommonBase<Row> imp
                                // only need to check the dynamic partitions
                                final int numStaticPart = 
hiveTablePartition.getPartitionSpec().size();
                                for (int i = dynamicPartitionOffset; i < 
record.getArity(); i++) {
-                                       // TODO: seems Hive also just calls 
toString(), need further investigation to confirm
-                                       // TODO: validate partition value
-                                       String partVal = 
record.getField(i).toString();
-                                       dynPartSpec.put(partitionColumns.get(i 
- dynamicPartitionOffset + numStaticPart), partVal);
+                                       Object field = record.getField(i);
+                                       String partitionValue = field != null ? 
field.toString() : null;
+                                       if (partitionValue == null || 
partitionValue.isEmpty()) {
+                                               partitionValue = 
defaultPartitionName;
+                                       }
+                                       dynPartSpec.put(partitionColumns.get(i 
- dynamicPartitionOffset + numStaticPart), partitionValue);
                                }
                                String partName = 
Warehouse.makePartPath(dynPartSpec);
                                partitionWriter = 
partitionToWriter.get(partName);
diff --git 
a/flink-connectors/flink-connector-hive/src/main/java/org/apache/flink/batch/connectors/hive/HiveTableSource.java
 
b/flink-connectors/flink-connector-hive/src/main/java/org/apache/flink/batch/connectors/hive/HiveTableSource.java
index 706442d..e0734f4 100644
--- 
a/flink-connectors/flink-connector-hive/src/main/java/org/apache/flink/batch/connectors/hive/HiveTableSource.java
+++ 
b/flink-connectors/flink-connector-hive/src/main/java/org/apache/flink/batch/connectors/hive/HiveTableSource.java
@@ -157,6 +157,8 @@ public class HiveTableSource extends 
InputFormatTableSource<Row> implements Part
                        String tableName = tablePath.getObjectName();
                        List<String> partitionColNames = 
catalogTable.getPartitionKeys();
                        if (partitionColNames != null && 
partitionColNames.size() > 0) {
+                               final String defaultPartitionName = 
jobConf.get(HiveConf.ConfVars.DEFAULTPARTITIONNAME.varname,
+                                               
HiveConf.ConfVars.DEFAULTPARTITIONNAME.defaultStrVal);
                                List<Partition> partitions =
                                                client.listPartitions(dbName, 
tableName, (short) -1);
                                for (Partition partition : partitions) {
@@ -168,7 +170,14 @@ public class HiveTableSource extends 
InputFormatTableSource<Row> implements Part
                                                String partitionValue = 
partition.getValues().get(i);
                                                
partitionSpec.put(partitionColName, partitionValue);
                                                DataType type = 
catalogTable.getSchema().getFieldDataType(partitionColName).get();
-                                               Object partitionObject = 
restorePartitionValueFromFromType(partitionValue, type);
+                                               Object partitionObject;
+                                               if 
(defaultPartitionName.equals(partitionValue)) {
+                                                       LogicalTypeRoot 
typeRoot = type.getLogicalType().getTypeRoot();
+                                                       // while this is inline 
with Hive, seems it should be null for string columns as well
+                                                       partitionObject = 
typeRoot == LogicalTypeRoot.CHAR || typeRoot == LogicalTypeRoot.VARCHAR ? 
defaultPartitionName : null;
+                                               } else {
+                                                       partitionObject = 
restorePartitionValueFromFromType(partitionValue, type);
+                                               }
                                                
partitionColValues.put(partitionColName, partitionObject);
                                        }
                                        HiveTablePartition hiveTablePartition = 
new HiveTablePartition(sd, partitionColValues);
diff --git 
a/flink-connectors/flink-connector-hive/src/test/java/org/apache/flink/batch/connectors/hive/TableEnvHiveConnectorTest.java
 
b/flink-connectors/flink-connector-hive/src/test/java/org/apache/flink/batch/connectors/hive/TableEnvHiveConnectorTest.java
new file mode 100644
index 0000000..2f910a9
--- /dev/null
+++ 
b/flink-connectors/flink-connector-hive/src/test/java/org/apache/flink/batch/connectors/hive/TableEnvHiveConnectorTest.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.batch.connectors.hive;
+
+import org.apache.flink.table.api.TableEnvironment;
+import org.apache.flink.table.catalog.hive.HiveCatalog;
+import org.apache.flink.table.catalog.hive.HiveTestUtils;
+import org.apache.flink.table.catalog.hive.client.HiveMetastoreClientFactory;
+import org.apache.flink.table.catalog.hive.client.HiveMetastoreClientWrapper;
+
+import com.klarna.hiverunner.HiveShell;
+import com.klarna.hiverunner.annotations.HiveSQL;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+
+import java.util.Arrays;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Test hive connector with table API.
+ */
+@RunWith(FlinkStandaloneHiveRunner.class)
+public class TableEnvHiveConnectorTest {
+
+       @HiveSQL(files = {})
+       private static HiveShell hiveShell;
+
+       private static HiveCatalog hiveCatalog;
+       private static HiveMetastoreClientWrapper hmsClient;
+
+       @BeforeClass
+       public static void setup() {
+               HiveConf hiveConf = hiveShell.getHiveConf();
+               hiveCatalog = HiveTestUtils.createHiveCatalog(hiveConf);
+               hiveCatalog.open();
+               hmsClient = HiveMetastoreClientFactory.create(hiveConf, null);
+       }
+
+       @Test
+       public void testDefaultPartitionName() throws Exception {
+               hiveShell.execute("create database db1");
+               hiveShell.execute("create table db1.src (x int, y int)");
+               hiveShell.execute("create table db1.part (x int) partitioned by 
(y int)");
+               hiveShell.insertInto("db1", "src").addRow(1, 1).addRow(2, 
null).commit();
+
+               TableEnvironment tableEnv = HiveTestUtils.createTableEnv();
+               tableEnv.registerCatalog(hiveCatalog.getName(), hiveCatalog);
+               tableEnv.useCatalog(hiveCatalog.getName());
+
+               // test generating partitions with default name
+               tableEnv.sqlUpdate("insert into db1.part select * from 
db1.src");
+               tableEnv.execute("mytest");
+               HiveConf hiveConf = hiveShell.getHiveConf();
+               String defaultPartName = 
hiveConf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME);
+               Table hiveTable = hmsClient.getTable("db1", "part");
+               Path defaultPartPath = new 
Path(hiveTable.getSd().getLocation(), "y=" + defaultPartName);
+               FileSystem fs = defaultPartPath.getFileSystem(hiveConf);
+               assertTrue(fs.exists(defaultPartPath));
+
+               // TODO: test reading from flink when 
https://issues.apache.org/jira/browse/FLINK-13279 is fixed
+               assertEquals(Arrays.asList("1\t1", "2\tNULL"), 
hiveShell.executeQuery("select * from db1.part"));
+
+               hiveShell.execute("drop database db1 cascade");
+       }
+}

[flink] branch master updated: [FLINK-13012][hive] Handle default partition name of Hive table

Reply via email to