This is an automated email from the ASF dual-hosted git repository. ayushsaxena pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new e5a7ce2f091 HIVE-21100: Allow flattening of table subdirectories resulted when using TEZ engine and UNION clause. (#4730). (Ayush Saxena, reviewed by Laszlo Bodor) e5a7ce2f091 is described below commit e5a7ce2f091da1f8a324da6e489cda59b9e4bfc6 Author: Ayush Saxena <ayushsax...@apache.org> AuthorDate: Fri Sep 22 14:59:06 2023 +0530 HIVE-21100: Allow flattening of table subdirectories resulted when using TEZ engine and UNION clause. (#4730). (Ayush Saxena, reviewed by Laszlo Bodor) --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 4 + .../apache/hive/common/util/MockFileSystem.java | 11 +- .../test/resources/testconfiguration.properties | 1 + .../org/apache/hadoop/hive/ql/exec/MoveTask.java | 46 + .../apache/hadoop/hive/ql/exec/TestMoveTask.java | 47 + .../queries/clientpositive/flatten_union_subdir.q | 117 +++ .../clientpositive/tez/flatten_union_subdir.q.out | 1030 ++++++++++++++++++++ 7 files changed, 1255 insertions(+), 1 deletion(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 0d7a8d072e5..d4d6429c46d 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2622,6 +2622,10 @@ public class HiveConf extends Configuration { "If the user has set hive.merge.mapfiles to true and hive.merge.mapredfiles to false, the idea was the\n" + "number of reducers are few, so the number of files anyway are small. However, with this optimization,\n" + "we are increasing the number of files possibly by a big margin. So, we merge aggressively."), + HIVE_TEZ_UNION_FLATTEN_SUBDIRECTORIES("hive.tez.union.flatten.subdirectories", false, + "By default, when writing data into a table and UNION ALL is the last step of the query, Hive on Tez will\n" + + "create a subdirectory for each branch of the UNION ALL. When this property is enabled,\n" + + "the subdirectories are removed, and the files are renamed and moved to the parent directory"), HIVEOPTCORRELATION("hive.optimize.correlation", false, "exploit intra-query correlations."), HIVE_OPTIMIZE_LIMIT_TRANSPOSE("hive.optimize.limittranspose", false, diff --git a/common/src/test/org/apache/hive/common/util/MockFileSystem.java b/common/src/test/org/apache/hive/common/util/MockFileSystem.java index 1a0b41902d8..2e5d9414edc 100644 --- a/common/src/test/org/apache/hive/common/util/MockFileSystem.java +++ b/common/src/test/org/apache/hive/common/util/MockFileSystem.java @@ -45,6 +45,7 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Progressable; +import org.mockito.MockitoAnnotations; public class MockFileSystem extends FileSystem { final List<MockFile> files = new ArrayList<MockFile>(); @@ -165,7 +166,15 @@ public class MockFileSystem extends FileSystem { public boolean rename(Path path, Path path2) throws IOException { statistics.incrementWriteOps(1); checkAccess(); - return false; + + MockFile file = findFile(path); + if (file == null || findFile(path2) != null) { + return false; + } + + files.add(new MockFile(path2.toString(), file.blockSize, file.content)); + files.remove(file); + return true; } @Override diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 46d76e8b40d..e56f6ba8bdb 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -24,6 +24,7 @@ minitez.query.files=\ explainanalyze_4.q,\ explainanalyze_5.q,\ explainuser_3.q,\ + flatten_union_subdir.q,\ limit_bailout.q,\ mapjoin_addjar.q,\ orc_merge12.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index 9cdf7f05d22..9bc6aa1b41f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -24,7 +24,9 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hive.common.BlobStorageUtils; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.HiveStatsUtils; @@ -93,6 +95,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Properties; +import java.util.Set; import static org.apache.hadoop.hive.ql.exec.Utilities.BLOB_MANIFEST_FILE; @@ -131,6 +134,40 @@ public class MoveTask extends Task<MoveWork> implements Serializable { return false; } + public void flattenUnionSubdirectories(Path sourcePath) throws HiveException { + try { + FileSystem fs = sourcePath.getFileSystem(conf); + LOG.info("Checking {} for subdirectories to flatten", sourcePath); + Set<Path> unionSubdirs = new HashSet<>(); + if (fs.exists(sourcePath)) { + RemoteIterator<LocatedFileStatus> i = fs.listFiles(sourcePath, true); + String prefix = AbstractFileMergeOperator.UNION_SUDBIR_PREFIX; + while (i.hasNext()) { + Path path = i.next().getPath(); + Path parent = path.getParent(); + if (parent.getName().startsWith(prefix)) { + // We do rename by including the name of parent directory into the filename so that there are no clashes + // when we move the files to the parent directory. Ex. HIVE_UNION_SUBDIR_1/000000_0 -> 1_000000_0 + String parentOfParent = parent.getParent().toString(); + String parentNameSuffix = parent.getName().substring(prefix.length()); + + fs.rename(path, new Path(parentOfParent + "/" + parentNameSuffix + "_" + path.getName())); + + unionSubdirs.add(parent); + } + } + + // remove the empty union subdirectories + for (Path path : unionSubdirs) { + LOG.info("This subdirectory has been flattened: " + path.toString()); + fs.delete(path, true); + } + } + } catch (Exception e) { + throw new HiveException("Unable to flatten " + sourcePath, e); + } + } + private void moveFile(Path sourcePath, Path targetPath, boolean isDfsDir) throws HiveException { try { @@ -357,6 +394,9 @@ public class MoveTask extends Task<MoveWork> implements Serializable { return processHiveException(he); } + boolean shouldFlattenUnionSubdirectories = + HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TEZ_UNION_FLATTEN_SUBDIRECTORIES); + if (Utilities.FILE_OP_LOGGER.isTraceEnabled()) { Utilities.FILE_OP_LOGGER.trace("Executing MoveWork " + System.identityHashCode(work) + " with " + work.getLoadFileWork() + "; " + work.getLoadTableWork() + "; " @@ -384,6 +424,9 @@ public class MoveTask extends Task<MoveWork> implements Serializable { Utilities.FILE_OP_LOGGER.debug("MoveTask not moving " + sourcePath); } else { Utilities.FILE_OP_LOGGER.debug("MoveTask moving " + sourcePath + " to " + targetPath); + if (shouldFlattenUnionSubdirectories) { + flattenUnionSubdirectories(sourcePath); + } if(lfd.getWriteType() == AcidUtils.Operation.INSERT) { //'targetPath' is table root of un-partitioned table or partition //'sourcePath' result of 'select ...' part of CTAS statement @@ -461,6 +504,9 @@ public class MoveTask extends Task<MoveWork> implements Serializable { boolean isFullAcidOp = work.getLoadTableWork().getWriteType() != AcidUtils.Operation.NOT_ACID && !tbd.isMmTable(); //it seems that LoadTableDesc has Operation.INSERT only for CTAS... + if (shouldFlattenUnionSubdirectories) { + flattenUnionSubdirectories(tbd.getSourcePath()); + } // Create a data container DataContainer dc = null; if (tbd.getPartitionSpec().size() == 0) { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMoveTask.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMoveTask.java new file mode 100644 index 00000000000..faa623fa6d3 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMoveTask.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hive.common.util.MockFileSystem; +import org.junit.Test; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; + +/** + * Tests the method MoveTask.flattenUnionSubdirectories(). + */ +public class TestMoveTask { + @Test + public void flattenUnionSubdirectories() throws IOException, HiveException { + String initialPath = "/table_users/" + AbstractFileMergeOperator.UNION_SUDBIR_PREFIX + "1/000000_0"; + String flattenPath = "/table_users/1_000000_0"; + + MockFileSystem.MockFile file1 = new MockFileSystem.MockFile("mock://" + initialPath, 0, new byte[1]); + MockFileSystem fs = new MockFileSystem(new Configuration(), file1); + + new MoveTask().flattenUnionSubdirectories(new MockFileSystem.MockPath(fs, initialPath)); + + assertFalse(fs.exists(new MockFileSystem.MockPath(fs, initialPath))); + assertTrue(fs.exists(new MockFileSystem.MockPath(fs, flattenPath))); + } +} diff --git a/ql/src/test/queries/clientpositive/flatten_union_subdir.q b/ql/src/test/queries/clientpositive/flatten_union_subdir.q new file mode 100644 index 00000000000..49455227267 --- /dev/null +++ b/ql/src/test/queries/clientpositive/flatten_union_subdir.q @@ -0,0 +1,117 @@ +set hive.tez.union.flatten.subdirectories=true; +set hive.support.concurrency=true; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.acid.direct.insert.enabled=true; +set hive.auto.convert.join=true; + +create table test1 (val string) partitioned by (dt string) stored as avro TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only'); +insert into test1 partition (dt='20230817') values ("val1"), ("val2"); + + +-- TEST FOR EXTERNAL TABLE + +create table union_target_nonacid_directinsert_flattened (val string) partitioned by (dt string) stored as avro; + +explain insert overwrite table union_target_nonacid_directinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_nonacid_directinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817'; + +insert overwrite table union_target_nonacid_directinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_nonacid_directinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817'; + +dfs -ls -R ${hiveconf:hive.metastore.warehouse.dir}/union_target_nonacid_directinsert_flattened; + +select * from union_target_nonacid_directinsert_flattened; + +-- TESTS FOR DIRECT & FLATTENED + +create table union_target_mm_directinsert_flattened (val string) partitioned by (dt string) stored as avro TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only'); + +explain insert overwrite table union_target_mm_directinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_mm_directinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817'; + +insert overwrite table union_target_mm_directinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_mm_directinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817'; + +dfs -ls -R ${hiveconf:hive.metastore.warehouse.dir}/union_target_mm_directinsert_flattened; + +select * from union_target_mm_directinsert_flattened; + +create table union_target_acid_directinsert_flattened (val string) partitioned by (dt string) stored as ORC TBLPROPERTIES ('transactional'='true'); + +explain insert into table union_target_acid_directinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_acid_directinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817'; + +insert into table union_target_acid_directinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_acid_directinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817'; + +dfs -ls -R ${hiveconf:hive.metastore.warehouse.dir}/union_target_acid_directinsert_flattened; + +select * from union_target_acid_directinsert_flattened; + +-- TESTS FOR NON DIRECT & FLATTENED + +set hive.acid.direct.insert.enabled=false; + +create table union_target_mm_flattened (val string) partitioned by (dt string) stored as avro TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only'); + +explain insert overwrite table union_target_mm_flattened partition (dt='20230817') select ful.* from (select val from union_target_mm_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817'; + +insert overwrite table union_target_mm_flattened partition (dt='20230817') select ful.* from (select val from union_target_mm_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817'; + +dfs -ls -R ${hiveconf:hive.metastore.warehouse.dir}/union_target_mm_flattened; + +select * from union_target_mm_flattened; + +create table union_target_acid_flattened (val string) partitioned by (dt string) stored as ORC TBLPROPERTIES ('transactional'='true'); + +explain insert into table union_target_acid_flattened partition (dt='20230817') select ful.* from (select val from union_target_acid_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817'; + +insert into table union_target_acid_flattened partition (dt='20230817') select ful.* from (select val from union_target_acid_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817'; + +dfs -ls -R ${hiveconf:hive.metastore.warehouse.dir}/union_target_acid_flattened; + +select * from union_target_acid_flattened; + + +-- TESTS FOR NON DIRECT & NON FLATTENED + +set hive.tez.union.flatten.subdirectories=false; + +create table union_target_mm_unflattened (val string) partitioned by (dt string) stored as avro TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only'); + +explain insert overwrite table union_target_mm_unflattened partition (dt='20230817') select ful.* from (select val from union_target_mm_unflattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817'; + +insert overwrite table union_target_mm_unflattened partition (dt='20230817') select ful.* from (select val from union_target_mm_unflattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817'; + +dfs -ls -R ${hiveconf:hive.metastore.warehouse.dir}/union_target_mm_unflattened; + +select * from union_target_mm_unflattened; + +create table union_target_acid_unflattened (val string) partitioned by (dt string) stored as ORC TBLPROPERTIES ('transactional'='true'); + +explain insert into table union_target_acid_unflattened partition (dt='20230817') select ful.* from (select val from union_target_acid_unflattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817'; + +insert into table union_target_acid_unflattened partition (dt='20230817') select ful.* from (select val from union_target_acid_unflattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817'; + +dfs -ls -R ${hiveconf:hive.metastore.warehouse.dir}/union_target_acid_unflattened; + +select * from union_target_acid_unflattened; + +-- TESTS FOR DIRECT & NON FLATTENED + +set hive.acid.direct.insert.enabled=true; + +create table union_target_mm_directinsert_unflattened (val string) partitioned by (dt string) stored as avro TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only'); + +explain insert overwrite table union_target_mm_directinsert_unflattened partition (dt='20230817') select ful.* from (select val from union_target_mm_directinsert_unflattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817'; + +insert overwrite table union_target_mm_directinsert_unflattened partition (dt='20230817') select ful.* from (select val from union_target_mm_directinsert_unflattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817'; + +dfs -ls -R ${hiveconf:hive.metastore.warehouse.dir}/union_target_mm_directinsert_unflattened; + +select * from union_target_mm_directinsert_unflattened; + +create table union_target_acid_nondirectinsert_flattened (val string) partitioned by (dt string) stored as ORC TBLPROPERTIES ('transactional'='true'); + +explain insert into table union_target_acid_nondirectinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_acid_nondirectinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817'; + +insert into table union_target_acid_nondirectinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_acid_nondirectinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817'; + +dfs -ls -R ${hiveconf:hive.metastore.warehouse.dir}/union_target_acid_nondirectinsert_flattened; + +select * from union_target_acid_nondirectinsert_flattened; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/tez/flatten_union_subdir.q.out b/ql/src/test/results/clientpositive/tez/flatten_union_subdir.q.out new file mode 100644 index 00000000000..51fdf23c6d8 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/flatten_union_subdir.q.out @@ -0,0 +1,1030 @@ +PREHOOK: query: create table test1 (val string) partitioned by (dt string) stored as avro TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test1 +POSTHOOK: query: create table test1 (val string) partitioned by (dt string) stored as avro TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test1 +PREHOOK: query: insert into test1 partition (dt='20230817') values ("val1"), ("val2") +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test1@dt=20230817 +POSTHOOK: query: insert into test1 partition (dt='20230817') values ("val1"), ("val2") +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test1@dt=20230817 +POSTHOOK: Lineage: test1 PARTITION(dt=20230817).val SCRIPT [] +PREHOOK: query: create table union_target_nonacid_directinsert_flattened (val string) partitioned by (dt string) stored as avro +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@union_target_nonacid_directinsert_flattened +POSTHOOK: query: create table union_target_nonacid_directinsert_flattened (val string) partitioned by (dt string) stored as avro +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@union_target_nonacid_directinsert_flattened +PREHOOK: query: explain insert overwrite table union_target_nonacid_directinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_nonacid_directinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@dt=20230817 +PREHOOK: Input: default@union_target_nonacid_directinsert_flattened +PREHOOK: Output: default@union_target_nonacid_directinsert_flattened@dt=20230817 +POSTHOOK: query: explain insert overwrite table union_target_nonacid_directinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_nonacid_directinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@dt=20230817 +POSTHOOK: Input: default@union_target_nonacid_directinsert_flattened +POSTHOOK: Output: default@union_target_nonacid_directinsert_flattened@dt=20230817 +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 5 <- Union 2 (CONTAINS) +Reducer 3 <- Union 2 (SIMPLE_EDGE) + +Stage-3 + Stats Work{} + Stage-0 + Move Operator + table:{"name:":"default.union_target_nonacid_directinsert_flattened"} + Stage-2 + Dependency Collection{} + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_63] + Select Operator [SEL_62] (rows=1 width=358) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_61] (rows=1 width=264) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)"],keys:KEY._col0 + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] vectorized + File Output Operator [FS_57] + table:{"name:":"default.union_target_nonacid_directinsert_flattened"} + Map Join Operator [MAPJOIN_56] (rows=2 width=84) + Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] + <-Map 4 [BROADCAST_EDGE] vectorized + BROADCAST [RS_53] + PartitionCols:_col0 + Select Operator [SEL_52] (rows=2 width=88) + Output:["_col0"] + Filter Operator [FIL_51] (rows=2 width=88) + predicate:val is not null + TableScan [TS_3] (rows=2 width=88) + default@test1,test1, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + <-Select Operator [SEL_55] (rows=1 width=84) + Output:["_col0"] + Filter Operator [FIL_54] (rows=1 width=268) + predicate:(dt = '20230816') + TableScan [TS_34] (rows=1 width=268) + default@union_target_nonacid_directinsert_flattened,union_target_nonacid_directinsert_flattened,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + Reduce Output Operator [RS_60] + PartitionCols:_col0 + Group By Operator [GBY_59] (rows=1 width=332) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(length(val))","avg(COALESCE(length(val),0))","count(1)","count(val)","compute_bit_vector_hll(val)"],keys:dt + Select Operator [SEL_58] (rows=4 width=180) + Output:["val","dt"] + Please refer to the previous Map Join Operator [MAPJOIN_56] + <-Map 5 [CONTAINS] vectorized + File Output Operator [FS_65] + table:{"name:":"default.union_target_nonacid_directinsert_flattened"} + Select Operator [SEL_64] (rows=2 width=88) + Output:["_col0"] + TableScan [TS_44] (rows=2 width=88) + default@test1,test1, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + Reduce Output Operator [RS_68] + PartitionCols:_col0 + Group By Operator [GBY_67] (rows=1 width=332) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(length(val))","avg(COALESCE(length(val),0))","count(1)","count(val)","compute_bit_vector_hll(val)"],keys:dt + Select Operator [SEL_66] (rows=4 width=180) + Output:["val","dt"] + Please refer to the previous Select Operator [SEL_64] + +PREHOOK: query: insert overwrite table union_target_nonacid_directinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_nonacid_directinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@dt=20230817 +PREHOOK: Input: default@union_target_nonacid_directinsert_flattened +PREHOOK: Output: default@union_target_nonacid_directinsert_flattened@dt=20230817 +POSTHOOK: query: insert overwrite table union_target_nonacid_directinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_nonacid_directinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@dt=20230817 +POSTHOOK: Input: default@union_target_nonacid_directinsert_flattened +POSTHOOK: Output: default@union_target_nonacid_directinsert_flattened@dt=20230817 +POSTHOOK: Lineage: union_target_nonacid_directinsert_flattened PARTITION(dt=20230817).val EXPRESSION [(union_target_nonacid_directinsert_flattened)union_target_nonacid_directinsert_flattened.FieldSchema(name:val, type:string, comment:), (test1)test1.FieldSchema(name:val, type:string, comment:), ] +drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 311 ### HDFS DATE ### hdfs://### HDFS PATH ### +PREHOOK: query: select * from union_target_nonacid_directinsert_flattened +PREHOOK: type: QUERY +PREHOOK: Input: default@union_target_nonacid_directinsert_flattened +PREHOOK: Input: default@union_target_nonacid_directinsert_flattened@dt=20230817 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from union_target_nonacid_directinsert_flattened +POSTHOOK: type: QUERY +POSTHOOK: Input: default@union_target_nonacid_directinsert_flattened +POSTHOOK: Input: default@union_target_nonacid_directinsert_flattened@dt=20230817 +POSTHOOK: Output: hdfs://### HDFS PATH ### +val1 20230817 +val2 20230817 +PREHOOK: query: create table union_target_mm_directinsert_flattened (val string) partitioned by (dt string) stored as avro TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@union_target_mm_directinsert_flattened +POSTHOOK: query: create table union_target_mm_directinsert_flattened (val string) partitioned by (dt string) stored as avro TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@union_target_mm_directinsert_flattened +PREHOOK: query: explain insert overwrite table union_target_mm_directinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_mm_directinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@dt=20230817 +PREHOOK: Input: default@union_target_mm_directinsert_flattened +PREHOOK: Output: default@union_target_mm_directinsert_flattened@dt=20230817 +POSTHOOK: query: explain insert overwrite table union_target_mm_directinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_mm_directinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@dt=20230817 +POSTHOOK: Input: default@union_target_mm_directinsert_flattened +POSTHOOK: Output: default@union_target_mm_directinsert_flattened@dt=20230817 +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 5 <- Union 2 (CONTAINS) +Reducer 3 <- Union 2 (SIMPLE_EDGE) + +Stage-3 + Stats Work{} + Stage-0 + Move Operator + table:{"name:":"default.union_target_mm_directinsert_flattened"} + Stage-2 + Dependency Collection{} + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_63] + Select Operator [SEL_62] (rows=1 width=358) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_61] (rows=1 width=264) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)"],keys:KEY._col0 + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] vectorized + File Output Operator [FS_57] + table:{"name:":"default.union_target_mm_directinsert_flattened"} + Map Join Operator [MAPJOIN_56] (rows=2 width=84) + Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] + <-Map 4 [BROADCAST_EDGE] vectorized + BROADCAST [RS_53] + PartitionCols:_col0 + Select Operator [SEL_52] (rows=2 width=88) + Output:["_col0"] + Filter Operator [FIL_51] (rows=2 width=88) + predicate:val is not null + TableScan [TS_3] (rows=2 width=88) + default@test1,test1, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + <-Select Operator [SEL_55] (rows=1 width=84) + Output:["_col0"] + Filter Operator [FIL_54] (rows=1 width=268) + predicate:(dt = '20230816') + TableScan [TS_34] (rows=1 width=268) + default@union_target_mm_directinsert_flattened,union_target_mm_directinsert_flattened, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + Reduce Output Operator [RS_60] + PartitionCols:_col0 + Group By Operator [GBY_59] (rows=1 width=332) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(length(val))","avg(COALESCE(length(val),0))","count(1)","count(val)","compute_bit_vector_hll(val)"],keys:dt + Select Operator [SEL_58] (rows=4 width=180) + Output:["val","dt"] + Please refer to the previous Map Join Operator [MAPJOIN_56] + <-Map 5 [CONTAINS] vectorized + File Output Operator [FS_65] + table:{"name:":"default.union_target_mm_directinsert_flattened"} + Select Operator [SEL_64] (rows=2 width=88) + Output:["_col0"] + TableScan [TS_44] (rows=2 width=88) + default@test1,test1, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + Reduce Output Operator [RS_68] + PartitionCols:_col0 + Group By Operator [GBY_67] (rows=1 width=332) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(length(val))","avg(COALESCE(length(val),0))","count(1)","count(val)","compute_bit_vector_hll(val)"],keys:dt + Select Operator [SEL_66] (rows=4 width=180) + Output:["val","dt"] + Please refer to the previous Select Operator [SEL_64] + +PREHOOK: query: insert overwrite table union_target_mm_directinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_mm_directinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@dt=20230817 +PREHOOK: Input: default@union_target_mm_directinsert_flattened +PREHOOK: Output: default@union_target_mm_directinsert_flattened@dt=20230817 +POSTHOOK: query: insert overwrite table union_target_mm_directinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_mm_directinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@dt=20230817 +POSTHOOK: Input: default@union_target_mm_directinsert_flattened +POSTHOOK: Output: default@union_target_mm_directinsert_flattened@dt=20230817 +POSTHOOK: Lineage: union_target_mm_directinsert_flattened PARTITION(dt=20230817).val EXPRESSION [(union_target_mm_directinsert_flattened)union_target_mm_directinsert_flattened.FieldSchema(name:val, type:string, comment:), (test1)test1.FieldSchema(name:val, type:string, comment:), ] +drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxrwxrwx - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 306 ### HDFS DATE ### hdfs://### HDFS PATH ### +PREHOOK: query: select * from union_target_mm_directinsert_flattened +PREHOOK: type: QUERY +PREHOOK: Input: default@union_target_mm_directinsert_flattened +PREHOOK: Input: default@union_target_mm_directinsert_flattened@dt=20230817 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from union_target_mm_directinsert_flattened +POSTHOOK: type: QUERY +POSTHOOK: Input: default@union_target_mm_directinsert_flattened +POSTHOOK: Input: default@union_target_mm_directinsert_flattened@dt=20230817 +POSTHOOK: Output: hdfs://### HDFS PATH ### +val1 20230817 +val2 20230817 +PREHOOK: query: create table union_target_acid_directinsert_flattened (val string) partitioned by (dt string) stored as ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@union_target_acid_directinsert_flattened +POSTHOOK: query: create table union_target_acid_directinsert_flattened (val string) partitioned by (dt string) stored as ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@union_target_acid_directinsert_flattened +PREHOOK: query: explain insert into table union_target_acid_directinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_acid_directinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@dt=20230817 +PREHOOK: Input: default@union_target_acid_directinsert_flattened +PREHOOK: Output: default@union_target_acid_directinsert_flattened@dt=20230817 +POSTHOOK: query: explain insert into table union_target_acid_directinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_acid_directinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@dt=20230817 +POSTHOOK: Input: default@union_target_acid_directinsert_flattened +POSTHOOK: Output: default@union_target_acid_directinsert_flattened@dt=20230817 +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 5 <- Union 2 (CONTAINS) +Reducer 3 <- Union 2 (SIMPLE_EDGE) + +Stage-3 + Stats Work{} + Stage-0 + Move Operator + table:{"name:":"default.union_target_acid_directinsert_flattened"} + Stage-2 + Dependency Collection{} + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_63] + Select Operator [SEL_62] (rows=1 width=358) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_61] (rows=1 width=264) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)"],keys:KEY._col0 + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] vectorized + File Output Operator [FS_57] + table:{"name:":"default.union_target_acid_directinsert_flattened"} + Map Join Operator [MAPJOIN_56] (rows=2 width=84) + Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] + <-Map 4 [BROADCAST_EDGE] vectorized + BROADCAST [RS_53] + PartitionCols:_col0 + Select Operator [SEL_52] (rows=2 width=88) + Output:["_col0"] + Filter Operator [FIL_51] (rows=2 width=88) + predicate:val is not null + TableScan [TS_3] (rows=2 width=88) + default@test1,test1, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + <-Select Operator [SEL_55] (rows=1 width=84) + Output:["_col0"] + Filter Operator [FIL_54] (rows=1 width=268) + predicate:(dt = '20230816') + TableScan [TS_34] (rows=1 width=268) + default@union_target_acid_directinsert_flattened,union_target_acid_directinsert_flattened, ACID table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + Reduce Output Operator [RS_60] + PartitionCols:_col0 + Group By Operator [GBY_59] (rows=1 width=332) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(length(val))","avg(COALESCE(length(val),0))","count(1)","count(val)","compute_bit_vector_hll(val)"],keys:dt + Select Operator [SEL_58] (rows=4 width=180) + Output:["val","dt"] + Please refer to the previous Map Join Operator [MAPJOIN_56] + <-Map 5 [CONTAINS] vectorized + File Output Operator [FS_65] + table:{"name:":"default.union_target_acid_directinsert_flattened"} + Select Operator [SEL_64] (rows=2 width=88) + Output:["_col0"] + TableScan [TS_44] (rows=2 width=88) + default@test1,test1, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + Reduce Output Operator [RS_68] + PartitionCols:_col0 + Group By Operator [GBY_67] (rows=1 width=332) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(length(val))","avg(COALESCE(length(val),0))","count(1)","count(val)","compute_bit_vector_hll(val)"],keys:dt + Select Operator [SEL_66] (rows=4 width=180) + Output:["val","dt"] + Please refer to the previous Select Operator [SEL_64] + +PREHOOK: query: insert into table union_target_acid_directinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_acid_directinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@dt=20230817 +PREHOOK: Input: default@union_target_acid_directinsert_flattened +PREHOOK: Output: default@union_target_acid_directinsert_flattened@dt=20230817 +POSTHOOK: query: insert into table union_target_acid_directinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_acid_directinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@dt=20230817 +POSTHOOK: Input: default@union_target_acid_directinsert_flattened +POSTHOOK: Output: default@union_target_acid_directinsert_flattened@dt=20230817 +POSTHOOK: Lineage: union_target_acid_directinsert_flattened PARTITION(dt=20230817).val EXPRESSION [(union_target_acid_directinsert_flattened)union_target_acid_directinsert_flattened.FieldSchema(name:val, type:string, comment:null), (test1)test1.FieldSchema(name:val, type:string, comment:), ] +drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 1 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 699 ### HDFS DATE ### hdfs://### HDFS PATH ### +PREHOOK: query: select * from union_target_acid_directinsert_flattened +PREHOOK: type: QUERY +PREHOOK: Input: default@union_target_acid_directinsert_flattened +PREHOOK: Input: default@union_target_acid_directinsert_flattened@dt=20230817 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from union_target_acid_directinsert_flattened +POSTHOOK: type: QUERY +POSTHOOK: Input: default@union_target_acid_directinsert_flattened +POSTHOOK: Input: default@union_target_acid_directinsert_flattened@dt=20230817 +POSTHOOK: Output: hdfs://### HDFS PATH ### +val1 20230817 +val2 20230817 +PREHOOK: query: create table union_target_mm_flattened (val string) partitioned by (dt string) stored as avro TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@union_target_mm_flattened +POSTHOOK: query: create table union_target_mm_flattened (val string) partitioned by (dt string) stored as avro TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@union_target_mm_flattened +PREHOOK: query: explain insert overwrite table union_target_mm_flattened partition (dt='20230817') select ful.* from (select val from union_target_mm_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@dt=20230817 +PREHOOK: Input: default@union_target_mm_flattened +PREHOOK: Output: default@union_target_mm_flattened@dt=20230817 +POSTHOOK: query: explain insert overwrite table union_target_mm_flattened partition (dt='20230817') select ful.* from (select val from union_target_mm_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@dt=20230817 +POSTHOOK: Input: default@union_target_mm_flattened +POSTHOOK: Output: default@union_target_mm_flattened@dt=20230817 +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 5 <- Union 2 (CONTAINS) +Reducer 3 <- Union 2 (SIMPLE_EDGE) + +Stage-3 + Stats Work{} + Stage-0 + Move Operator + table:{"name:":"default.union_target_mm_flattened"} + Stage-2 + Dependency Collection{} + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_63] + Select Operator [SEL_62] (rows=1 width=358) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_61] (rows=1 width=264) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)"],keys:KEY._col0 + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] vectorized + File Output Operator [FS_57] + table:{"name:":"default.union_target_mm_flattened"} + Map Join Operator [MAPJOIN_56] (rows=2 width=84) + Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] + <-Map 4 [BROADCAST_EDGE] vectorized + BROADCAST [RS_53] + PartitionCols:_col0 + Select Operator [SEL_52] (rows=2 width=88) + Output:["_col0"] + Filter Operator [FIL_51] (rows=2 width=88) + predicate:val is not null + TableScan [TS_3] (rows=2 width=88) + default@test1,test1, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + <-Select Operator [SEL_55] (rows=1 width=84) + Output:["_col0"] + Filter Operator [FIL_54] (rows=1 width=268) + predicate:(dt = '20230816') + TableScan [TS_34] (rows=1 width=268) + default@union_target_mm_flattened,union_target_mm_flattened, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + Reduce Output Operator [RS_60] + PartitionCols:_col0 + Group By Operator [GBY_59] (rows=1 width=332) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(length(val))","avg(COALESCE(length(val),0))","count(1)","count(val)","compute_bit_vector_hll(val)"],keys:dt + Select Operator [SEL_58] (rows=4 width=180) + Output:["val","dt"] + Please refer to the previous Map Join Operator [MAPJOIN_56] + <-Map 5 [CONTAINS] vectorized + File Output Operator [FS_65] + table:{"name:":"default.union_target_mm_flattened"} + Select Operator [SEL_64] (rows=2 width=88) + Output:["_col0"] + TableScan [TS_44] (rows=2 width=88) + default@test1,test1, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + Reduce Output Operator [RS_68] + PartitionCols:_col0 + Group By Operator [GBY_67] (rows=1 width=332) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(length(val))","avg(COALESCE(length(val),0))","count(1)","count(val)","compute_bit_vector_hll(val)"],keys:dt + Select Operator [SEL_66] (rows=4 width=180) + Output:["val","dt"] + Please refer to the previous Select Operator [SEL_64] + +PREHOOK: query: insert overwrite table union_target_mm_flattened partition (dt='20230817') select ful.* from (select val from union_target_mm_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@dt=20230817 +PREHOOK: Input: default@union_target_mm_flattened +PREHOOK: Output: default@union_target_mm_flattened@dt=20230817 +POSTHOOK: query: insert overwrite table union_target_mm_flattened partition (dt='20230817') select ful.* from (select val from union_target_mm_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@dt=20230817 +POSTHOOK: Input: default@union_target_mm_flattened +POSTHOOK: Output: default@union_target_mm_flattened@dt=20230817 +POSTHOOK: Lineage: union_target_mm_flattened PARTITION(dt=20230817).val EXPRESSION [(union_target_mm_flattened)union_target_mm_flattened.FieldSchema(name:val, type:string, comment:), (test1)test1.FieldSchema(name:val, type:string, comment:), ] +drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxrwxrwx - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 293 ### HDFS DATE ### hdfs://### HDFS PATH ### +PREHOOK: query: select * from union_target_mm_flattened +PREHOOK: type: QUERY +PREHOOK: Input: default@union_target_mm_flattened +PREHOOK: Input: default@union_target_mm_flattened@dt=20230817 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from union_target_mm_flattened +POSTHOOK: type: QUERY +POSTHOOK: Input: default@union_target_mm_flattened +POSTHOOK: Input: default@union_target_mm_flattened@dt=20230817 +POSTHOOK: Output: hdfs://### HDFS PATH ### +val1 20230817 +val2 20230817 +PREHOOK: query: create table union_target_acid_flattened (val string) partitioned by (dt string) stored as ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@union_target_acid_flattened +POSTHOOK: query: create table union_target_acid_flattened (val string) partitioned by (dt string) stored as ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@union_target_acid_flattened +PREHOOK: query: explain insert into table union_target_acid_flattened partition (dt='20230817') select ful.* from (select val from union_target_acid_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@dt=20230817 +PREHOOK: Input: default@union_target_acid_flattened +PREHOOK: Output: default@union_target_acid_flattened@dt=20230817 +POSTHOOK: query: explain insert into table union_target_acid_flattened partition (dt='20230817') select ful.* from (select val from union_target_acid_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@dt=20230817 +POSTHOOK: Input: default@union_target_acid_flattened +POSTHOOK: Output: default@union_target_acid_flattened@dt=20230817 +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 5 <- Union 2 (CONTAINS) +Reducer 3 <- Union 2 (SIMPLE_EDGE) + +Stage-3 + Stats Work{} + Stage-0 + Move Operator + table:{"name:":"default.union_target_acid_flattened"} + Stage-2 + Dependency Collection{} + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_63] + Select Operator [SEL_62] (rows=1 width=358) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_61] (rows=1 width=264) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)"],keys:KEY._col0 + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] vectorized + File Output Operator [FS_57] + table:{"name:":"default.union_target_acid_flattened"} + Map Join Operator [MAPJOIN_56] (rows=2 width=84) + Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] + <-Map 4 [BROADCAST_EDGE] vectorized + BROADCAST [RS_53] + PartitionCols:_col0 + Select Operator [SEL_52] (rows=2 width=88) + Output:["_col0"] + Filter Operator [FIL_51] (rows=2 width=88) + predicate:val is not null + TableScan [TS_3] (rows=2 width=88) + default@test1,test1, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + <-Select Operator [SEL_55] (rows=1 width=84) + Output:["_col0"] + Filter Operator [FIL_54] (rows=1 width=268) + predicate:(dt = '20230816') + TableScan [TS_34] (rows=1 width=268) + default@union_target_acid_flattened,union_target_acid_flattened, ACID table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + Reduce Output Operator [RS_60] + PartitionCols:_col0 + Group By Operator [GBY_59] (rows=1 width=332) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(length(val))","avg(COALESCE(length(val),0))","count(1)","count(val)","compute_bit_vector_hll(val)"],keys:dt + Select Operator [SEL_58] (rows=4 width=180) + Output:["val","dt"] + Please refer to the previous Map Join Operator [MAPJOIN_56] + <-Map 5 [CONTAINS] vectorized + File Output Operator [FS_65] + table:{"name:":"default.union_target_acid_flattened"} + Select Operator [SEL_64] (rows=2 width=88) + Output:["_col0"] + TableScan [TS_44] (rows=2 width=88) + default@test1,test1, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + Reduce Output Operator [RS_68] + PartitionCols:_col0 + Group By Operator [GBY_67] (rows=1 width=332) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(length(val))","avg(COALESCE(length(val),0))","count(1)","count(val)","compute_bit_vector_hll(val)"],keys:dt + Select Operator [SEL_66] (rows=4 width=180) + Output:["val","dt"] + Please refer to the previous Select Operator [SEL_64] + +PREHOOK: query: insert into table union_target_acid_flattened partition (dt='20230817') select ful.* from (select val from union_target_acid_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@dt=20230817 +PREHOOK: Input: default@union_target_acid_flattened +PREHOOK: Output: default@union_target_acid_flattened@dt=20230817 +POSTHOOK: query: insert into table union_target_acid_flattened partition (dt='20230817') select ful.* from (select val from union_target_acid_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@dt=20230817 +POSTHOOK: Input: default@union_target_acid_flattened +POSTHOOK: Output: default@union_target_acid_flattened@dt=20230817 +POSTHOOK: Lineage: union_target_acid_flattened PARTITION(dt=20230817).val EXPRESSION [(union_target_acid_flattened)union_target_acid_flattened.FieldSchema(name:val, type:string, comment:null), (test1)test1.FieldSchema(name:val, type:string, comment:), ] +drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 1 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 699 ### HDFS DATE ### hdfs://### HDFS PATH ### +PREHOOK: query: select * from union_target_acid_flattened +PREHOOK: type: QUERY +PREHOOK: Input: default@union_target_acid_flattened +PREHOOK: Input: default@union_target_acid_flattened@dt=20230817 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from union_target_acid_flattened +POSTHOOK: type: QUERY +POSTHOOK: Input: default@union_target_acid_flattened +POSTHOOK: Input: default@union_target_acid_flattened@dt=20230817 +POSTHOOK: Output: hdfs://### HDFS PATH ### +val1 20230817 +val2 20230817 +PREHOOK: query: create table union_target_mm_unflattened (val string) partitioned by (dt string) stored as avro TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@union_target_mm_unflattened +POSTHOOK: query: create table union_target_mm_unflattened (val string) partitioned by (dt string) stored as avro TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@union_target_mm_unflattened +PREHOOK: query: explain insert overwrite table union_target_mm_unflattened partition (dt='20230817') select ful.* from (select val from union_target_mm_unflattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@dt=20230817 +PREHOOK: Input: default@union_target_mm_unflattened +PREHOOK: Output: default@union_target_mm_unflattened@dt=20230817 +POSTHOOK: query: explain insert overwrite table union_target_mm_unflattened partition (dt='20230817') select ful.* from (select val from union_target_mm_unflattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@dt=20230817 +POSTHOOK: Input: default@union_target_mm_unflattened +POSTHOOK: Output: default@union_target_mm_unflattened@dt=20230817 +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 5 <- Union 2 (CONTAINS) +Reducer 3 <- Union 2 (SIMPLE_EDGE) + +Stage-3 + Stats Work{} + Stage-0 + Move Operator + table:{"name:":"default.union_target_mm_unflattened"} + Stage-2 + Dependency Collection{} + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_63] + Select Operator [SEL_62] (rows=1 width=358) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_61] (rows=1 width=264) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)"],keys:KEY._col0 + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] vectorized + File Output Operator [FS_57] + table:{"name:":"default.union_target_mm_unflattened"} + Map Join Operator [MAPJOIN_56] (rows=2 width=84) + Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] + <-Map 4 [BROADCAST_EDGE] vectorized + BROADCAST [RS_53] + PartitionCols:_col0 + Select Operator [SEL_52] (rows=2 width=88) + Output:["_col0"] + Filter Operator [FIL_51] (rows=2 width=88) + predicate:val is not null + TableScan [TS_3] (rows=2 width=88) + default@test1,test1, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + <-Select Operator [SEL_55] (rows=1 width=84) + Output:["_col0"] + Filter Operator [FIL_54] (rows=1 width=268) + predicate:(dt = '20230816') + TableScan [TS_34] (rows=1 width=268) + default@union_target_mm_unflattened,union_target_mm_unflattened, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + Reduce Output Operator [RS_60] + PartitionCols:_col0 + Group By Operator [GBY_59] (rows=1 width=332) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(length(val))","avg(COALESCE(length(val),0))","count(1)","count(val)","compute_bit_vector_hll(val)"],keys:dt + Select Operator [SEL_58] (rows=4 width=180) + Output:["val","dt"] + Please refer to the previous Map Join Operator [MAPJOIN_56] + <-Map 5 [CONTAINS] vectorized + File Output Operator [FS_65] + table:{"name:":"default.union_target_mm_unflattened"} + Select Operator [SEL_64] (rows=2 width=88) + Output:["_col0"] + TableScan [TS_44] (rows=2 width=88) + default@test1,test1, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + Reduce Output Operator [RS_68] + PartitionCols:_col0 + Group By Operator [GBY_67] (rows=1 width=332) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(length(val))","avg(COALESCE(length(val),0))","count(1)","count(val)","compute_bit_vector_hll(val)"],keys:dt + Select Operator [SEL_66] (rows=4 width=180) + Output:["val","dt"] + Please refer to the previous Select Operator [SEL_64] + +PREHOOK: query: insert overwrite table union_target_mm_unflattened partition (dt='20230817') select ful.* from (select val from union_target_mm_unflattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@dt=20230817 +PREHOOK: Input: default@union_target_mm_unflattened +PREHOOK: Output: default@union_target_mm_unflattened@dt=20230817 +POSTHOOK: query: insert overwrite table union_target_mm_unflattened partition (dt='20230817') select ful.* from (select val from union_target_mm_unflattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@dt=20230817 +POSTHOOK: Input: default@union_target_mm_unflattened +POSTHOOK: Output: default@union_target_mm_unflattened@dt=20230817 +POSTHOOK: Lineage: union_target_mm_unflattened PARTITION(dt=20230817).val EXPRESSION [(union_target_mm_unflattened)union_target_mm_unflattened.FieldSchema(name:val, type:string, comment:), (test1)test1.FieldSchema(name:val, type:string, comment:), ] +drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxrwxrwx - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxrwxrwx - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 295 ### HDFS DATE ### hdfs://### HDFS PATH ### +PREHOOK: query: select * from union_target_mm_unflattened +PREHOOK: type: QUERY +PREHOOK: Input: default@union_target_mm_unflattened +PREHOOK: Input: default@union_target_mm_unflattened@dt=20230817 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from union_target_mm_unflattened +POSTHOOK: type: QUERY +POSTHOOK: Input: default@union_target_mm_unflattened +POSTHOOK: Input: default@union_target_mm_unflattened@dt=20230817 +POSTHOOK: Output: hdfs://### HDFS PATH ### +val1 20230817 +val2 20230817 +PREHOOK: query: create table union_target_acid_unflattened (val string) partitioned by (dt string) stored as ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@union_target_acid_unflattened +POSTHOOK: query: create table union_target_acid_unflattened (val string) partitioned by (dt string) stored as ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@union_target_acid_unflattened +PREHOOK: query: explain insert into table union_target_acid_unflattened partition (dt='20230817') select ful.* from (select val from union_target_acid_unflattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@dt=20230817 +PREHOOK: Input: default@union_target_acid_unflattened +PREHOOK: Output: default@union_target_acid_unflattened@dt=20230817 +POSTHOOK: query: explain insert into table union_target_acid_unflattened partition (dt='20230817') select ful.* from (select val from union_target_acid_unflattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@dt=20230817 +POSTHOOK: Input: default@union_target_acid_unflattened +POSTHOOK: Output: default@union_target_acid_unflattened@dt=20230817 +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 5 <- Union 2 (CONTAINS) +Reducer 3 <- Union 2 (SIMPLE_EDGE) + +Stage-3 + Stats Work{} + Stage-0 + Move Operator + table:{"name:":"default.union_target_acid_unflattened"} + Stage-2 + Dependency Collection{} + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_63] + Select Operator [SEL_62] (rows=1 width=358) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_61] (rows=1 width=264) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)"],keys:KEY._col0 + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] vectorized + File Output Operator [FS_57] + table:{"name:":"default.union_target_acid_unflattened"} + Map Join Operator [MAPJOIN_56] (rows=2 width=84) + Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] + <-Map 4 [BROADCAST_EDGE] vectorized + BROADCAST [RS_53] + PartitionCols:_col0 + Select Operator [SEL_52] (rows=2 width=88) + Output:["_col0"] + Filter Operator [FIL_51] (rows=2 width=88) + predicate:val is not null + TableScan [TS_3] (rows=2 width=88) + default@test1,test1, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + <-Select Operator [SEL_55] (rows=1 width=84) + Output:["_col0"] + Filter Operator [FIL_54] (rows=1 width=268) + predicate:(dt = '20230816') + TableScan [TS_34] (rows=1 width=268) + default@union_target_acid_unflattened,union_target_acid_unflattened, ACID table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + Reduce Output Operator [RS_60] + PartitionCols:_col0 + Group By Operator [GBY_59] (rows=1 width=332) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(length(val))","avg(COALESCE(length(val),0))","count(1)","count(val)","compute_bit_vector_hll(val)"],keys:dt + Select Operator [SEL_58] (rows=4 width=180) + Output:["val","dt"] + Please refer to the previous Map Join Operator [MAPJOIN_56] + <-Map 5 [CONTAINS] vectorized + File Output Operator [FS_65] + table:{"name:":"default.union_target_acid_unflattened"} + Select Operator [SEL_64] (rows=2 width=88) + Output:["_col0"] + TableScan [TS_44] (rows=2 width=88) + default@test1,test1, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + Reduce Output Operator [RS_68] + PartitionCols:_col0 + Group By Operator [GBY_67] (rows=1 width=332) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(length(val))","avg(COALESCE(length(val),0))","count(1)","count(val)","compute_bit_vector_hll(val)"],keys:dt + Select Operator [SEL_66] (rows=4 width=180) + Output:["val","dt"] + Please refer to the previous Select Operator [SEL_64] + +PREHOOK: query: insert into table union_target_acid_unflattened partition (dt='20230817') select ful.* from (select val from union_target_acid_unflattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@dt=20230817 +PREHOOK: Input: default@union_target_acid_unflattened +PREHOOK: Output: default@union_target_acid_unflattened@dt=20230817 +POSTHOOK: query: insert into table union_target_acid_unflattened partition (dt='20230817') select ful.* from (select val from union_target_acid_unflattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@dt=20230817 +POSTHOOK: Input: default@union_target_acid_unflattened +POSTHOOK: Output: default@union_target_acid_unflattened@dt=20230817 +POSTHOOK: Lineage: union_target_acid_unflattened PARTITION(dt=20230817).val EXPRESSION [(union_target_acid_unflattened)union_target_acid_unflattened.FieldSchema(name:val, type:string, comment:null), (test1)test1.FieldSchema(name:val, type:string, comment:), ] +drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 1 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 699 ### HDFS DATE ### hdfs://### HDFS PATH ### +PREHOOK: query: select * from union_target_acid_unflattened +PREHOOK: type: QUERY +PREHOOK: Input: default@union_target_acid_unflattened +PREHOOK: Input: default@union_target_acid_unflattened@dt=20230817 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from union_target_acid_unflattened +POSTHOOK: type: QUERY +POSTHOOK: Input: default@union_target_acid_unflattened +POSTHOOK: Input: default@union_target_acid_unflattened@dt=20230817 +POSTHOOK: Output: hdfs://### HDFS PATH ### +val1 20230817 +val2 20230817 +PREHOOK: query: create table union_target_mm_directinsert_unflattened (val string) partitioned by (dt string) stored as avro TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@union_target_mm_directinsert_unflattened +POSTHOOK: query: create table union_target_mm_directinsert_unflattened (val string) partitioned by (dt string) stored as avro TBLPROPERTIES ('transactional'='true', 'transactional_properties'='insert_only') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@union_target_mm_directinsert_unflattened +PREHOOK: query: explain insert overwrite table union_target_mm_directinsert_unflattened partition (dt='20230817') select ful.* from (select val from union_target_mm_directinsert_unflattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@dt=20230817 +PREHOOK: Input: default@union_target_mm_directinsert_unflattened +PREHOOK: Output: default@union_target_mm_directinsert_unflattened@dt=20230817 +POSTHOOK: query: explain insert overwrite table union_target_mm_directinsert_unflattened partition (dt='20230817') select ful.* from (select val from union_target_mm_directinsert_unflattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@dt=20230817 +POSTHOOK: Input: default@union_target_mm_directinsert_unflattened +POSTHOOK: Output: default@union_target_mm_directinsert_unflattened@dt=20230817 +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 5 <- Union 2 (CONTAINS) +Reducer 3 <- Union 2 (SIMPLE_EDGE) + +Stage-3 + Stats Work{} + Stage-0 + Move Operator + table:{"name:":"default.union_target_mm_directinsert_unflattened"} + Stage-2 + Dependency Collection{} + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_63] + Select Operator [SEL_62] (rows=1 width=358) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_61] (rows=1 width=264) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)"],keys:KEY._col0 + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] vectorized + File Output Operator [FS_57] + table:{"name:":"default.union_target_mm_directinsert_unflattened"} + Map Join Operator [MAPJOIN_56] (rows=2 width=84) + Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] + <-Map 4 [BROADCAST_EDGE] vectorized + BROADCAST [RS_53] + PartitionCols:_col0 + Select Operator [SEL_52] (rows=2 width=88) + Output:["_col0"] + Filter Operator [FIL_51] (rows=2 width=88) + predicate:val is not null + TableScan [TS_3] (rows=2 width=88) + default@test1,test1, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + <-Select Operator [SEL_55] (rows=1 width=84) + Output:["_col0"] + Filter Operator [FIL_54] (rows=1 width=268) + predicate:(dt = '20230816') + TableScan [TS_34] (rows=1 width=268) + default@union_target_mm_directinsert_unflattened,union_target_mm_directinsert_unflattened, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + Reduce Output Operator [RS_60] + PartitionCols:_col0 + Group By Operator [GBY_59] (rows=1 width=332) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(length(val))","avg(COALESCE(length(val),0))","count(1)","count(val)","compute_bit_vector_hll(val)"],keys:dt + Select Operator [SEL_58] (rows=4 width=180) + Output:["val","dt"] + Please refer to the previous Map Join Operator [MAPJOIN_56] + <-Map 5 [CONTAINS] vectorized + File Output Operator [FS_65] + table:{"name:":"default.union_target_mm_directinsert_unflattened"} + Select Operator [SEL_64] (rows=2 width=88) + Output:["_col0"] + TableScan [TS_44] (rows=2 width=88) + default@test1,test1, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + Reduce Output Operator [RS_68] + PartitionCols:_col0 + Group By Operator [GBY_67] (rows=1 width=332) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(length(val))","avg(COALESCE(length(val),0))","count(1)","count(val)","compute_bit_vector_hll(val)"],keys:dt + Select Operator [SEL_66] (rows=4 width=180) + Output:["val","dt"] + Please refer to the previous Select Operator [SEL_64] + +PREHOOK: query: insert overwrite table union_target_mm_directinsert_unflattened partition (dt='20230817') select ful.* from (select val from union_target_mm_directinsert_unflattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@dt=20230817 +PREHOOK: Input: default@union_target_mm_directinsert_unflattened +PREHOOK: Output: default@union_target_mm_directinsert_unflattened@dt=20230817 +POSTHOOK: query: insert overwrite table union_target_mm_directinsert_unflattened partition (dt='20230817') select ful.* from (select val from union_target_mm_directinsert_unflattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@dt=20230817 +POSTHOOK: Input: default@union_target_mm_directinsert_unflattened +POSTHOOK: Output: default@union_target_mm_directinsert_unflattened@dt=20230817 +POSTHOOK: Lineage: union_target_mm_directinsert_unflattened PARTITION(dt=20230817).val EXPRESSION [(union_target_mm_directinsert_unflattened)union_target_mm_directinsert_unflattened.FieldSchema(name:val, type:string, comment:), (test1)test1.FieldSchema(name:val, type:string, comment:), ] +drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxrwxrwx - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxrwxrwx - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 308 ### HDFS DATE ### hdfs://### HDFS PATH ### +PREHOOK: query: select * from union_target_mm_directinsert_unflattened +PREHOOK: type: QUERY +PREHOOK: Input: default@union_target_mm_directinsert_unflattened +PREHOOK: Input: default@union_target_mm_directinsert_unflattened@dt=20230817 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from union_target_mm_directinsert_unflattened +POSTHOOK: type: QUERY +POSTHOOK: Input: default@union_target_mm_directinsert_unflattened +POSTHOOK: Input: default@union_target_mm_directinsert_unflattened@dt=20230817 +POSTHOOK: Output: hdfs://### HDFS PATH ### +val1 20230817 +val2 20230817 +PREHOOK: query: create table union_target_acid_nondirectinsert_flattened (val string) partitioned by (dt string) stored as ORC TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@union_target_acid_nondirectinsert_flattened +POSTHOOK: query: create table union_target_acid_nondirectinsert_flattened (val string) partitioned by (dt string) stored as ORC TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@union_target_acid_nondirectinsert_flattened +PREHOOK: query: explain insert into table union_target_acid_nondirectinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_acid_nondirectinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@dt=20230817 +PREHOOK: Input: default@union_target_acid_nondirectinsert_flattened +PREHOOK: Output: default@union_target_acid_nondirectinsert_flattened@dt=20230817 +POSTHOOK: query: explain insert into table union_target_acid_nondirectinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_acid_nondirectinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@dt=20230817 +POSTHOOK: Input: default@union_target_acid_nondirectinsert_flattened +POSTHOOK: Output: default@union_target_acid_nondirectinsert_flattened@dt=20230817 +Plan optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) +Map 5 <- Union 2 (CONTAINS) +Reducer 3 <- Union 2 (SIMPLE_EDGE) + +Stage-3 + Stats Work{} + Stage-0 + Move Operator + table:{"name:":"default.union_target_acid_nondirectinsert_flattened"} + Stage-2 + Dependency Collection{} + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_63] + Select Operator [SEL_62] (rows=1 width=358) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_61] (rows=1 width=264) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)"],keys:KEY._col0 + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] vectorized + File Output Operator [FS_57] + table:{"name:":"default.union_target_acid_nondirectinsert_flattened"} + Map Join Operator [MAPJOIN_56] (rows=2 width=84) + Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] + <-Map 4 [BROADCAST_EDGE] vectorized + BROADCAST [RS_53] + PartitionCols:_col0 + Select Operator [SEL_52] (rows=2 width=88) + Output:["_col0"] + Filter Operator [FIL_51] (rows=2 width=88) + predicate:val is not null + TableScan [TS_3] (rows=2 width=88) + default@test1,test1, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + <-Select Operator [SEL_55] (rows=1 width=84) + Output:["_col0"] + Filter Operator [FIL_54] (rows=1 width=268) + predicate:(dt = '20230816') + TableScan [TS_34] (rows=1 width=268) + default@union_target_acid_nondirectinsert_flattened,union_target_acid_nondirectinsert_flattened, ACID table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + Reduce Output Operator [RS_60] + PartitionCols:_col0 + Group By Operator [GBY_59] (rows=1 width=332) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(length(val))","avg(COALESCE(length(val),0))","count(1)","count(val)","compute_bit_vector_hll(val)"],keys:dt + Select Operator [SEL_58] (rows=4 width=180) + Output:["val","dt"] + Please refer to the previous Map Join Operator [MAPJOIN_56] + <-Map 5 [CONTAINS] vectorized + File Output Operator [FS_65] + table:{"name:":"default.union_target_acid_nondirectinsert_flattened"} + Select Operator [SEL_64] (rows=2 width=88) + Output:["_col0"] + TableScan [TS_44] (rows=2 width=88) + default@test1,test1, transactional table,Tbl:COMPLETE,Col:COMPLETE,Output:["val"] + Reduce Output Operator [RS_68] + PartitionCols:_col0 + Group By Operator [GBY_67] (rows=1 width=332) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["max(length(val))","avg(COALESCE(length(val),0))","count(1)","count(val)","compute_bit_vector_hll(val)"],keys:dt + Select Operator [SEL_66] (rows=4 width=180) + Output:["val","dt"] + Please refer to the previous Select Operator [SEL_64] + +PREHOOK: query: insert into table union_target_acid_nondirectinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_acid_nondirectinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test1@dt=20230817 +PREHOOK: Input: default@union_target_acid_nondirectinsert_flattened +PREHOOK: Output: default@union_target_acid_nondirectinsert_flattened@dt=20230817 +POSTHOOK: query: insert into table union_target_acid_nondirectinsert_flattened partition (dt='20230817') select ful.* from (select val from union_target_acid_nondirectinsert_flattened where dt='20230816') ful left join (select val from test1 where dt='20230817') inc on ful.val=inc.val union all select test1.val from test1 where dt='20230817' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test1@dt=20230817 +POSTHOOK: Input: default@union_target_acid_nondirectinsert_flattened +POSTHOOK: Output: default@union_target_acid_nondirectinsert_flattened@dt=20230817 +POSTHOOK: Lineage: union_target_acid_nondirectinsert_flattened PARTITION(dt=20230817).val EXPRESSION [(union_target_acid_nondirectinsert_flattened)union_target_acid_nondirectinsert_flattened.FieldSchema(name:val, type:string, comment:null), (test1)test1.FieldSchema(name:val, type:string, comment:), ] +drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 1 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 ### USER ### ### GROUP ### 699 ### HDFS DATE ### hdfs://### HDFS PATH ### +PREHOOK: query: select * from union_target_acid_nondirectinsert_flattened +PREHOOK: type: QUERY +PREHOOK: Input: default@union_target_acid_nondirectinsert_flattened +PREHOOK: Input: default@union_target_acid_nondirectinsert_flattened@dt=20230817 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from union_target_acid_nondirectinsert_flattened +POSTHOOK: type: QUERY +POSTHOOK: Input: default@union_target_acid_nondirectinsert_flattened +POSTHOOK: Input: default@union_target_acid_nondirectinsert_flattened@dt=20230817 +POSTHOOK: Output: hdfs://### HDFS PATH ### +val1 20230817 +val2 20230817