This is an automated email from the ASF dual-hosted git repository. kgyrtkirk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 4efb565 HIVE-25485: Transform selects of literals under a UNION ALL to inline table scan (#2608) (Zoltan Haindrich reviewed by Krisztian Kasa) 4efb565 is described below commit 4efb565b36ef740fd3a932cfcb07590fc3e93a40 Author: Zoltan Haindrich <k...@rxd.hu> AuthorDate: Wed Sep 22 11:31:30 2021 +0200 HIVE-25485: Transform selects of literals under a UNION ALL to inline table scan (#2608) (Zoltan Haindrich reviewed by Krisztian Kasa) --- .../hive/jdbc/BaseJdbcWithMiniLlap.java.orig | 747 --------------------- .../java/org/apache/hive/jdbc/TestJdbcDriver2.java | 2 +- .../calcite/rules/HiveRelDecorrelator.java | 2 +- .../HiveUnionSimpleSelectsToInlineTableRule.java | 244 +++++++ .../calcite/translator/SqlFunctionConverter.java | 2 +- .../hadoop/hive/ql/parse/CalcitePlanner.java | 4 + .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 6 +- .../test/queries/clientpositive/union_literals.q | 103 +++ .../results/clientpositive/llap/udf_likeall.q.out | 4 +- .../results/clientpositive/llap/udf_likeany.q.out | 4 +- .../clientpositive/llap/udf_sort_array_by.q.out | 6 +- .../clientpositive/llap/union_literals.q.out | 454 +++++++++++++ .../clientpositive/llap/vectorized_mapjoin3.q.out | 229 +++---- 13 files changed, 925 insertions(+), 882 deletions(-) diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java.orig b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java.orig deleted file mode 100644 index 4c46db9..0000000 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java.orig +++ /dev/null @@ -1,747 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hive.jdbc; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - -import java.io.File; -import java.math.BigDecimal; -import java.net.URL; -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.sql.Statement; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.UUID; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapred.InputSplit; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.RecordReader; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.llap.FieldDesc; -import org.apache.hadoop.hive.llap.Row; -import org.apache.hadoop.hive.llap.Schema; -import org.apache.hadoop.io.NullWritable; - -import org.apache.hive.jdbc.miniHS2.MiniHS2; -import org.apache.hive.jdbc.miniHS2.MiniHS2.MiniClusterType; -import org.apache.hadoop.hive.common.type.Date; -import org.apache.hadoop.hive.common.type.Timestamp; -import org.apache.hadoop.hive.llap.LlapBaseInputFormat; - -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Before; -import org.junit.Test; -import org.apache.hadoop.mapred.InputFormat; - -/** - * Specialize this base class for different serde's/formats - * {@link #beforeTest(boolean) beforeTest} should be called - * by sub-classes in a {@link org.junit.BeforeClass} initializer - */ -public abstract class BaseJdbcWithMiniLlap { - - private static String dataFileDir; - private static Path kvDataFilePath; - private static Path dataTypesFilePath; - private static Path over10KFilePath; - - protected static MiniHS2 miniHS2 = null; - protected static HiveConf conf = null; - protected static Connection hs2Conn = null; - - // This method should be called by sub-classes in a @BeforeClass initializer - public static MiniHS2 beforeTest(HiveConf inputConf) throws Exception { - conf = inputConf; - Class.forName(MiniHS2.getJdbcDriverName()); - miniHS2 = new MiniHS2(conf, MiniClusterType.LLAP); - dataFileDir = conf.get("test.data.files").replace('\\', '/').replace("c:", ""); - kvDataFilePath = new Path(dataFileDir, "kv1.txt"); - dataTypesFilePath = new Path(dataFileDir, "datatypes.txt"); - over10KFilePath = new Path(dataFileDir, "over10k"); - Map<String, String> confOverlay = new HashMap<String, String>(); - miniHS2.start(confOverlay); - miniHS2.getDFS().getFileSystem().mkdirs(new Path("/apps_staging_dir/anonymous")); - return miniHS2; - } - - static HiveConf defaultConf() throws Exception { - String confDir = "../../data/conf/llap/"; - if (confDir != null && !confDir.isEmpty()) { - HiveConf.setHiveSiteLocation(new URL("file://"+ new File(confDir).toURI().getPath() + "/hive-site.xml")); - System.out.println("Setting hive-site: " + HiveConf.getHiveSiteLocation()); - } - HiveConf defaultConf = new HiveConf(); - defaultConf.setBoolVar(ConfVars.HIVE_SUPPORT_CONCURRENCY, false); - defaultConf.setBoolVar(ConfVars.HIVE_SERVER2_ENABLE_DOAS, false); - defaultConf.addResource(new URL("file://" + new File(confDir).toURI().getPath() + "/tez-site.xml")); - return defaultConf; - } - - @Before - public void setUp() throws Exception { - hs2Conn = getConnection(miniHS2.getJdbcURL(), System.getProperty("user.name"), "bar"); - } - - public static Connection getConnection(String jdbcURL, String user, String pwd) throws SQLException { - Connection conn = DriverManager.getConnection(jdbcURL, user, pwd); - conn.createStatement().execute("set hive.support.concurrency = false"); - return conn; - } - - @After - public void tearDown() throws Exception { - LlapBaseInputFormat.closeAll(); - hs2Conn.close(); - } - - @AfterClass - public static void afterTest() throws Exception { - if (miniHS2.isStarted()) { - miniHS2.stop(); - } - } - - protected void createTestTable(String tableName) throws Exception { - createTestTable(hs2Conn, null, tableName, kvDataFilePath.toString()); - } - - public static void createTestTable(Connection connection, String database, String tableName, String srcFile) throws - Exception { - Statement stmt = connection.createStatement(); - - if (database != null) { - stmt.execute("CREATE DATABASE IF NOT EXISTS " + database); - stmt.execute("USE " + database); - } - - // create table - stmt.execute("DROP TABLE IF EXISTS " + tableName); - stmt.execute("CREATE TABLE " + tableName - + " (under_col INT COMMENT 'the under column', value STRING) COMMENT ' test table'"); - - // load data - stmt.execute("load data local inpath '" + srcFile + "' into table " + tableName); - - ResultSet res = stmt.executeQuery("SELECT * FROM " + tableName); - assertTrue(res.next()); - assertEquals("val_238", res.getString(2)); - res.close(); - stmt.close(); - } - - protected void createDataTypesTable(String tableName) throws Exception { - Statement stmt = hs2Conn.createStatement(); - - // create table - stmt.execute("DROP TABLE IF EXISTS " + tableName); - // tables with various types - stmt.execute("create table " + tableName - + " (c1 int, c2 boolean, c3 double, c4 string," - + " c5 array<int>, c6 map<int,string>, c7 map<string,string>," - + " c8 struct<r:string,s:int,t:double>," - + " c9 tinyint, c10 smallint, c11 float, c12 bigint," - + " c13 array<array<string>>," - + " c14 map<int, map<int,int>>," - + " c15 struct<r:int,s:struct<a:int,b:string>>," - + " c16 array<struct<m:map<string,string>,n:int>>," - + " c17 timestamp, " - + " c18 decimal(16,7), " - + " c19 binary, " - + " c20 date," - + " c21 varchar(20)," - + " c22 char(15)," - + " c23 binary" - + ")"); - stmt.execute("load data local inpath '" - + dataTypesFilePath.toString() + "' into table " + tableName); - stmt.close(); - } - - protected void createOver10KTable(String tableName) throws Exception { - try (Statement stmt = hs2Conn.createStatement()) { - - String createQuery = - "create table " + tableName + " (t tinyint, si smallint, i int, b bigint, f float, d double, bo boolean, " - + "s string, ts timestamp, `dec` decimal(4,2), bin binary) row format delimited fields terminated by '|'"; - - // create table - stmt.execute("DROP TABLE IF EXISTS " + tableName); - stmt.execute(createQuery); - // load data - stmt.execute("load data local inpath '" + over10KFilePath.toString() + "' into table " + tableName); - } - } - - @Test(timeout = 60000) - public void testLlapInputFormatEndToEnd() throws Exception { - createTestTable("testtab1"); - - int rowCount; - - RowCollector rowCollector = new RowCollector(); - String query = "select * from testtab1 where under_col = 0"; - rowCount = processQuery(query, 1, rowCollector); - assertEquals(3, rowCount); - assertArrayEquals(new String[] {"0", "val_0"}, rowCollector.rows.get(0)); - assertArrayEquals(new String[] {"0", "val_0"}, rowCollector.rows.get(1)); - assertArrayEquals(new String[] {"0", "val_0"}, rowCollector.rows.get(2)); - - // Try empty rows query - rowCollector.rows.clear(); - query = "select * from testtab1 where true = false"; - rowCount = processQuery(query, 1, rowCollector); - assertEquals(0, rowCount); - } - - @Test(timeout = 300000) - public void testMultipleBatchesOfComplexTypes() throws Exception { - final String tableName = "testMultipleBatchesOfComplexTypes"; - try (Statement stmt = hs2Conn.createStatement()) { - String createQuery = - "create table " + tableName + "(c1 array<struct<f1:string,f2:string>>, " - + "c2 int, " - + "c3 array<array<int>>, " - + "c4 array<struct<f1:array<string>>>) STORED AS ORC"; - - // create table - stmt.execute("DROP TABLE IF EXISTS " + tableName); - stmt.execute(createQuery); - // load data - stmt.execute("INSERT INTO " + tableName + " VALUES " - // value 1 - + "(ARRAY(NAMED_STRUCT('f1','a1', 'f2','a2'), NAMED_STRUCT('f1','a3', 'f2','a4')), " - + "1, ARRAY(ARRAY(1)), ARRAY(NAMED_STRUCT('f1',ARRAY('aa1')))), " - // value 2 - + "(ARRAY(NAMED_STRUCT('f1','b1', 'f2','b2'), NAMED_STRUCT('f1','b3', 'f2','b4')), 2, " - + "ARRAY(ARRAY(2,2), ARRAY(2,2)), " - + "ARRAY(NAMED_STRUCT('f1',ARRAY('aa2','aa2')), NAMED_STRUCT('f1',ARRAY('aa2','aa2')))), " - // value 3 - + "(ARRAY(NAMED_STRUCT('f1','c1', 'f2','c2'), NAMED_STRUCT('f1','c3', 'f2','c4'), " - + "NAMED_STRUCT('f1','c5', 'f2','c6')), 3, " + "ARRAY(ARRAY(3,3,3), ARRAY(3,3,3), ARRAY(3,3,3)), " - + "ARRAY(NAMED_STRUCT('f1',ARRAY('aa3','aa3','aa3')), " - + "NAMED_STRUCT('f1',ARRAY('aa3','aa3', 'aa3')), NAMED_STRUCT('f1',ARRAY('aa3','aa3', 'aa3')))), " - // value 4 - + "(ARRAY(NAMED_STRUCT('f1','d1', 'f2','d2'), NAMED_STRUCT('f1','d3', 'f2','d4')," - + " NAMED_STRUCT('f1','d5', 'f2','d6'), NAMED_STRUCT('f1','d7', 'f2','d8')), 4, " - + "ARRAY(ARRAY(4,4,4,4),ARRAY(4,4,4,4),ARRAY(4,4,4,4),ARRAY(4,4,4,4)), " - + "ARRAY(NAMED_STRUCT('f1',ARRAY('aa4','aa4','aa4', 'aa4')), " - + "NAMED_STRUCT('f1',ARRAY('aa4','aa4','aa4', 'aa4')), NAMED_STRUCT('f1',ARRAY('aa4','aa4','aa4', 'aa4'))," - + " NAMED_STRUCT('f1',ARRAY('aa4','aa4','aa4', 'aa4'))))"); - - // generate 4096 rows from above records - for (int i = 0; i < 10; i++) { - stmt.execute(String.format("insert into %s select * from %s", tableName, tableName)); - } - // validate test table - ResultSet res = stmt.executeQuery("SELECT count(*) FROM " + tableName); - assertTrue(res.next()); - assertEquals(4096, res.getInt(1)); - res.close(); - } - - RowCollector rowCollector = new RowCollector(); - String query = "select * from " + tableName; - int rowCount = processQuery(query, 1, rowCollector); - assertEquals(4096, rowCount); - - /* - * - * validate different rows - * [[[a1, a2], [a3, a4]], 1, [[1]], [[[aa1]]]] - * [[[b1, b2], [b3, b4]], 2, [[2, 2], [2, 2]], [[[aa2, aa2]], [[aa2, aa2]]]] - * [[[c1, c2], [c3, c4], [c5, c6]], 3, [[3, 3, 3], [3, 3, 3], [3, 3, 3]], [[[aa3, aa3, aa3]], [[aa3, aa3, aa3]], [[aa3, aa3, aa3]]]] - * [[[d1, d2], [d3, d4], [d5, d6], [d7, d8]], 4, [[4, 4, 4, 4], [4, 4, 4, 4], [4, 4, 4, 4], [4, 4, 4, 4]], [[[aa4, aa4, aa4, aa4]], [[aa4, aa4, aa4, aa4]], [[aa4, aa4, aa4, aa4]], [[aa4, aa4, aa4, aa4]]]] - * - */ - rowCollector.rows.clear(); - query = "select * from " + tableName + " where c2=1 limit 1"; - rowCount = processQuery(query, 1, rowCollector); - assertEquals(1, rowCount); - final String[] expected1 = - { "[[a1, a2], [a3, a4]]", - "1", - "[[1]]", - "[[[aa1]]]" - }; - assertArrayEquals(expected1, rowCollector.rows.get(0)); - - rowCollector.rows.clear(); - query = "select * from " + tableName + " where c2=2 limit 1"; - rowCount = processQuery(query, 1, rowCollector); - assertEquals(1, rowCount); - final String[] expected2 = - { "[[b1, b2], [b3, b4]]", - "2", - "[[2, 2], [2, 2]]", - "[[[aa2, aa2]], [[aa2, aa2]]]" - }; - assertArrayEquals(expected2, rowCollector.rows.get(0)); - - rowCollector.rows.clear(); - query = "select * from " + tableName + " where c2=3 limit 1"; - rowCount = processQuery(query, 1, rowCollector); - assertEquals(1, rowCount); - final String[] expected3 = - { "[[c1, c2], [c3, c4], [c5, c6]]", - "3", - "[[3, 3, 3], [3, 3, 3], [3, 3, 3]]", - "[[[aa3, aa3, aa3]], [[aa3, aa3, aa3]], [[aa3, aa3, aa3]]]" - }; - assertArrayEquals(expected3, rowCollector.rows.get(0)); - - rowCollector.rows.clear(); - query = "select * from " + tableName + " where c2=4 limit 1"; - rowCount = processQuery(query, 1, rowCollector); - assertEquals(1, rowCount); - final String[] expected4 = - { "[[d1, d2], [d3, d4], [d5, d6], [d7, d8]]", - "4", - "[[4, 4, 4, 4], [4, 4, 4, 4], [4, 4, 4, 4], [4, 4, 4, 4]]", - "[[[aa4, aa4, aa4, aa4]], [[aa4, aa4, aa4, aa4]], [[aa4, aa4, aa4, aa4]], [[aa4, aa4, aa4, aa4]]]" - }; - assertArrayEquals(expected4, rowCollector.rows.get(0)); - - } - - @Test(timeout = 300000) - public void testLlapInputFormatEndToEndWithMultipleBatches() throws Exception { - String tableName = "over10k_table"; - - createOver10KTable(tableName); - - int rowCount; - - // Try with more than one batch - RowCollector rowCollector = new RowCollector(); - String query = "select * from " + tableName; - rowCount = processQuery(query, 1, rowCollector); - assertEquals(9999, rowCount); - - // Try with less than one batch - rowCollector.rows.clear(); - query = "select * from " + tableName + " where s = 'rachel brown'"; - rowCount = processQuery(query, 1, rowCollector); - assertEquals(17, rowCount); - - // Try empty rows query - rowCollector.rows.clear(); - query = "select * from " + tableName + " where false"; - rowCount = processQuery(query, 1, rowCollector); - assertEquals(0, rowCount); - } - - - @Test(timeout = 60000) - public void testNonAsciiStrings() throws Exception { - createTestTable("testtab_nonascii"); - - RowCollector rowCollector = new RowCollector(); - String nonAscii = "À côté du garçon"; - String query = "select value, '" + nonAscii + "' from testtab_nonascii where under_col=0"; - int rowCount = processQuery(query, 1, rowCollector); - assertEquals(3, rowCount); - - assertArrayEquals(new String[] {"val_0", nonAscii}, rowCollector.rows.get(0)); - assertArrayEquals(new String[] {"val_0", nonAscii}, rowCollector.rows.get(1)); - assertArrayEquals(new String[] {"val_0", nonAscii}, rowCollector.rows.get(2)); - } - - @Test(timeout = 60000) - public void testEscapedStrings() throws Exception { - createTestTable("testtab1"); - - RowCollector rowCollector = new RowCollector(); - String expectedVal1 = "'a',\"b\",\\c\\"; - String expectedVal2 = "multi\nline"; - String query = "select value, '\\'a\\',\"b\",\\\\c\\\\', 'multi\\nline' from testtab1 where under_col=0"; - int rowCount = processQuery(query, 1, rowCollector); - assertEquals(3, rowCount); - - assertArrayEquals(new String[] {"val_0", expectedVal1, expectedVal2}, rowCollector.rows.get(0)); - assertArrayEquals(new String[] {"val_0", expectedVal1, expectedVal2}, rowCollector.rows.get(1)); - assertArrayEquals(new String[] {"val_0", expectedVal1, expectedVal2}, rowCollector.rows.get(2)); - } - - @Test(timeout = 60000) - public void testDataTypes() throws Exception { - createDataTypesTable("datatypes"); - RowCollector2 rowCollector = new RowCollector2(); - String query = "select * from datatypes"; - int rowCount = processQuery(query, 1, rowCollector); - assertEquals(3, rowCount); - - // Verify schema - String[][] colNameTypes = new String[][] { - {"datatypes.c1", "int"}, - {"datatypes.c2", "boolean"}, - {"datatypes.c3", "double"}, - {"datatypes.c4", "string"}, - {"datatypes.c5", "array<int>"}, - {"datatypes.c6", "map<int,string>"}, - {"datatypes.c7", "map<string,string>"}, - {"datatypes.c8", "struct<r:string,s:int,t:double>"}, - {"datatypes.c9", "tinyint"}, - {"datatypes.c10", "smallint"}, - {"datatypes.c11", "float"}, - {"datatypes.c12", "bigint"}, - {"datatypes.c13", "array<array<string>>"}, - {"datatypes.c14", "map<int,map<int,int>>"}, - {"datatypes.c15", "struct<r:int,s:struct<a:int,b:string>>"}, - {"datatypes.c16", "array<struct<m:map<string,string>,n:int>>"}, - {"datatypes.c17", "timestamp"}, - {"datatypes.c18", "decimal(16,7)"}, - {"datatypes.c19", "binary"}, - {"datatypes.c20", "date"}, - {"datatypes.c21", "varchar(20)"}, - {"datatypes.c22", "char(15)"}, - {"datatypes.c23", "binary"}, - }; - FieldDesc fieldDesc; - assertEquals(23, rowCollector.numColumns); - for (int idx = 0; idx < rowCollector.numColumns; ++idx) { - fieldDesc = rowCollector.schema.getColumns().get(idx); - assertEquals("ColName idx=" + idx, colNameTypes[idx][0], fieldDesc.getName()); - assertEquals("ColType idx=" + idx, colNameTypes[idx][1], fieldDesc.getTypeInfo().getTypeName()); - } - - // First row is all nulls - Object[] rowValues = rowCollector.rows.get(0); - for (int idx = 0; idx < rowCollector.numColumns; ++idx) { - assertEquals("idx=" + idx, null, rowValues[idx]); - } - - // Second Row - rowValues = rowCollector.rows.get(1); - assertEquals(Integer.valueOf(-1), rowValues[0]); - assertEquals(Boolean.FALSE, rowValues[1]); - assertEquals(Double.valueOf(-1.1d), rowValues[2]); - assertEquals("", rowValues[3]); - - List<?> c5Value = (List<?>) rowValues[4]; - assertEquals(0, c5Value.size()); - - Map<?,?> c6Value = (Map<?,?>) rowValues[5]; - assertEquals(0, c6Value.size()); - - Map<?,?> c7Value = (Map<?,?>) rowValues[6]; - assertEquals(0, c7Value.size()); - - List<?> c8Value = (List<?>) rowValues[7]; - assertEquals(null, c8Value.get(0)); - assertEquals(null, c8Value.get(1)); - assertEquals(null, c8Value.get(2)); - - assertEquals(Byte.valueOf((byte) -1), rowValues[8]); - assertEquals(Short.valueOf((short) -1), rowValues[9]); - assertEquals(Float.valueOf(-1.0f), rowValues[10]); - assertEquals(Long.valueOf(-1l), rowValues[11]); - - List<?> c13Value = (List<?>) rowValues[12]; - assertEquals(0, c13Value.size()); - - Map<?,?> c14Value = (Map<?,?>) rowValues[13]; - assertEquals(0, c14Value.size()); - - List<?> c15Value = (List<?>) rowValues[14]; - assertEquals(null, c15Value.get(0)); - assertEquals(null, c15Value.get(1)); - - List<?> c16Value = (List<?>) rowValues[15]; - assertEquals(0, c16Value.size()); - - assertEquals(null, rowValues[16]); - assertEquals(null, rowValues[17]); - assertEquals(null, rowValues[18]); - assertEquals(null, rowValues[19]); - assertEquals(null, rowValues[20]); - assertEquals(null, rowValues[21]); - assertEquals(null, rowValues[22]); - - // Third row - rowValues = rowCollector.rows.get(2); - assertEquals(Integer.valueOf(1), rowValues[0]); - assertEquals(Boolean.TRUE, rowValues[1]); - assertEquals(Double.valueOf(1.1d), rowValues[2]); - assertEquals("1", rowValues[3]); - - c5Value = (List<?>) rowValues[4]; - assertEquals(2, c5Value.size()); - assertEquals(Integer.valueOf(1), c5Value.get(0)); - assertEquals(Integer.valueOf(2), c5Value.get(1)); - - c6Value = (Map<?,?>) rowValues[5]; - assertEquals(2, c6Value.size()); - assertEquals("x", c6Value.get(Integer.valueOf(1))); - assertEquals("y", c6Value.get(Integer.valueOf(2))); - - c7Value = (Map<?,?>) rowValues[6]; - assertEquals(1, c7Value.size()); - assertEquals("v", c7Value.get("k")); - - c8Value = (List<?>) rowValues[7]; - assertEquals("a", c8Value.get(0)); - assertEquals(Integer.valueOf(9), c8Value.get(1)); - assertEquals(Double.valueOf(2.2d), c8Value.get(2)); - - assertEquals(Byte.valueOf((byte) 1), rowValues[8]); - assertEquals(Short.valueOf((short) 1), rowValues[9]); - assertEquals(Float.valueOf(1.0f), rowValues[10]); - assertEquals(Long.valueOf(1l), rowValues[11]); - - c13Value = (List<?>) rowValues[12]; - assertEquals(2, c13Value.size()); - List<?> listVal = (List<?>) c13Value.get(0); - assertEquals("a", listVal.get(0)); - assertEquals("b", listVal.get(1)); - listVal = (List<?>) c13Value.get(1); - assertEquals("c", listVal.get(0)); - assertEquals("d", listVal.get(1)); - - c14Value = (Map<?,?>) rowValues[13]; - assertEquals(2, c14Value.size()); - Map<?,?> mapVal = (Map<?,?>) c14Value.get(Integer.valueOf(1)); - assertEquals(2, mapVal.size()); - assertEquals(Integer.valueOf(12), mapVal.get(Integer.valueOf(11))); - assertEquals(Integer.valueOf(14), mapVal.get(Integer.valueOf(13))); - mapVal = (Map<?,?>) c14Value.get(Integer.valueOf(2)); - assertEquals(1, mapVal.size()); - assertEquals(Integer.valueOf(22), mapVal.get(Integer.valueOf(21))); - - c15Value = (List<?>) rowValues[14]; - assertEquals(Integer.valueOf(1), c15Value.get(0)); - listVal = (List<?>) c15Value.get(1); - assertEquals(2, listVal.size()); - assertEquals(Integer.valueOf(2), listVal.get(0)); - assertEquals("x", listVal.get(1)); - - c16Value = (List<?>) rowValues[15]; - assertEquals(2, c16Value.size()); - listVal = (List<?>) c16Value.get(0); - assertEquals(2, listVal.size()); - mapVal = (Map<?,?>) listVal.get(0); - assertEquals(0, mapVal.size()); - assertEquals(Integer.valueOf(1), listVal.get(1)); - listVal = (List<?>) c16Value.get(1); - mapVal = (Map<?,?>) listVal.get(0); - assertEquals(2, mapVal.size()); - assertEquals("b", mapVal.get("a")); - assertEquals("d", mapVal.get("c")); - assertEquals(Integer.valueOf(2), listVal.get(1)); - - assertEquals(Timestamp.valueOf("2012-04-22 09:00:00.123456789"), rowValues[16]); - assertEquals(new BigDecimal("123456789.123456"), rowValues[17]); - assertArrayEquals("abcd".getBytes("UTF-8"), (byte[]) rowValues[18]); - assertEquals(Date.valueOf("2013-01-01"), rowValues[19]); - assertEquals("abc123", rowValues[20]); - assertEquals("abc123 ", rowValues[21]); - assertArrayEquals("X'01FF'".getBytes("UTF-8"), (byte[]) rowValues[22]); - } - - - @Test(timeout = 60000) - public void testComplexQuery() throws Exception { - createTestTable("testtab1"); - - RowCollector rowCollector = new RowCollector(); - String query = "select value, count(*) from testtab1 where under_col=0 group by value"; - int rowCount = processQuery(query, 1, rowCollector); - assertEquals(1, rowCount); - - assertArrayEquals(new String[] {"val_0", "3"}, rowCollector.rows.get(0)); - } - - protected interface RowProcessor { - void process(Row row); - } - - protected static class RowCollector implements RowProcessor { - ArrayList<String[]> rows = new ArrayList<String[]>(); - Schema schema = null; - int numColumns = 0; - - public void process(Row row) { - if (schema == null) { - schema = row.getSchema(); - numColumns = schema.getColumns().size(); - } - - String[] arr = new String[numColumns]; - for (int idx = 0; idx < numColumns; ++idx) { - Object val = row.getValue(idx); - arr[idx] = (val == null ? null : val.toString()); - } - rows.add(arr); - } - } - - // Save the actual values from each row as opposed to the String representation. - protected static class RowCollector2 implements RowProcessor { - ArrayList<Object[]> rows = new ArrayList<Object[]>(); - Schema schema = null; - int numColumns = 0; - - public void process(Row row) { - if (schema == null) { - schema = row.getSchema(); - numColumns = schema.getColumns().size(); - } - - Object[] arr = new Object[numColumns]; - for (int idx = 0; idx < numColumns; ++idx) { - arr[idx] = row.getValue(idx); - } - rows.add(arr); - } - } - - protected int processQuery(String query, int numSplits, RowProcessor rowProcessor) throws Exception { - return processQuery(null, query, numSplits, rowProcessor); - } - - protected abstract InputFormat<NullWritable, Row> getInputFormat(); - - protected int processQuery(String currentDatabase, String query, int numSplits, RowProcessor rowProcessor) - throws Exception { - String url = miniHS2.getJdbcURL(); - String user = System.getProperty("user.name"); - String pwd = user; - String handleId = UUID.randomUUID().toString(); - - InputFormat<NullWritable, Row> inputFormat = getInputFormat(); - - // Get splits - JobConf job = new JobConf(conf); - job.set(LlapBaseInputFormat.URL_KEY, url); - job.set(LlapBaseInputFormat.USER_KEY, user); - job.set(LlapBaseInputFormat.PWD_KEY, pwd); - job.set(LlapBaseInputFormat.QUERY_KEY, query); - job.set(LlapBaseInputFormat.HANDLE_ID, handleId); - if (currentDatabase != null) { - job.set(LlapBaseInputFormat.DB_KEY, currentDatabase); - } - - InputSplit[] splits = inputFormat.getSplits(job, numSplits); - - // Fetch rows from splits - int rowCount = 0; - for (InputSplit split : splits) { - System.out.println("Processing split " + split.getLocations()); - - RecordReader<NullWritable, Row> reader = inputFormat.getRecordReader(split, job, null); - Row row = reader.createValue(); - while (reader.next(NullWritable.get(), row)) { - rowProcessor.process(row); - ++rowCount; - } - //In arrow-mode this will throw exception unless all buffers have been released - //See org.apache.hadoop.hive.llap.LlapArrowBatchRecordReader - reader.close(); - } - LlapBaseInputFormat.close(handleId); - - return rowCount; - } - - /** - * Test CLI kill command of a query that is running. - * We spawn 2 threads - one running the query and - * the other attempting to cancel. - * We're using a dummy udf to simulate a query, - * that runs for a sufficiently long time. - * @throws Exception - */ - @Test - public void testKillQuery() throws Exception { - String tableName = "testtab1"; - createTestTable(tableName); - Connection con = hs2Conn; - Connection con2 = getConnection(miniHS2.getJdbcURL(), System.getProperty("user.name"), "bar"); - - String udfName = TestJdbcWithMiniHS2.SleepMsUDF.class.getName(); - Statement stmt1 = con.createStatement(); - Statement stmt2 = con2.createStatement(); - stmt1.execute("create temporary function sleepMsUDF as '" + udfName + "'"); - stmt1.close(); - final Statement stmt = con.createStatement(); - - ExceptionHolder tExecuteHolder = new ExceptionHolder(); - ExceptionHolder tKillHolder = new ExceptionHolder(); - - // Thread executing the query - Thread tExecute = new Thread(new Runnable() { - @Override - public void run() { - try { - System.out.println("Executing query: "); - // The test table has 500 rows, so total query time should be ~ 500*500ms - stmt.executeQuery("select sleepMsUDF(t1.under_col, 100), t1.under_col, t2.under_col " + - "from " + tableName + " t1 join " + tableName + " t2 on t1.under_col = t2.under_col"); - fail("Expecting SQLException"); - } catch (SQLException e) { - tExecuteHolder.throwable = e; - } - } - }); - // Thread killing the query - Thread tKill = new Thread(new Runnable() { - @Override - public void run() { - try { - Thread.sleep(2000); - String queryId = ((HiveStatement) stmt).getQueryId(); - System.out.println("Killing query: " + queryId); - - stmt2.execute("kill query '" + queryId + "'"); - stmt2.close(); - } catch (Exception e) { - tKillHolder.throwable = e; - } - } - }); - - tExecute.start(); - tKill.start(); - tExecute.join(); - tKill.join(); - stmt.close(); - con2.close(); - - assertNotNull("tExecute", tExecuteHolder.throwable); - assertNull("tCancel", tKillHolder.throwable); - } - - private static class ExceptionHolder { - Throwable throwable; - } -} - diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java index bf941c5..b2583c0 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java @@ -3176,7 +3176,7 @@ public class TestJdbcDriver2 { stmt.execute("SET hive.resultset.use.unique.column.names=true"); ResultSet rs = stmt.executeQuery("select 1 UNION ALL select 2"); ResultSetMetaData metaData = rs.getMetaData(); - assertEquals("_c0", metaData.getColumnLabel(1)); + assertEquals("col1", metaData.getColumnLabel(1)); assertTrue("There's no . for the UNION column name", !metaData.getColumnLabel(1).contains(".")); stmt.close(); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java index 61da9f4..fd54c0f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java @@ -1692,7 +1692,7 @@ public final class HiveRelDecorrelator implements ReflectiveVisitor { return ret.succeed(); } - private static RelNode stripHep(RelNode rel) { + static RelNode stripHep(RelNode rel) { if (rel instanceof HepRelVertex) { HepRelVertex hepRelVertex = (HepRelVertex) rel; rel = hepRelVertex.getCurrentRel(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveUnionSimpleSelectsToInlineTableRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveUnionSimpleSelectsToInlineTableRule.java new file mode 100644 index 0000000..c5f316d --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveUnionSimpleSelectsToInlineTableRule.java @@ -0,0 +1,244 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelRecordType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.collect.ImmutableList; + +/** + * Transforms SELECTS of literals under UNION ALL into inline table scans. + * + * This rule processes plain projects and inline tables below UNION ALL nodes. + * + *<pre> + * SELECT 1 + * UNION ALL + * SELECT 2 + * UNION ALL + * [...] + * </pre> + * + * <pre> + * HiveUnion(all=true) + * HiveProject(_o__c0=[1]) + * HiveTableScan(table=[[_dummy_database, _dummy_table]], table:alias=[_dummy_table]) + * HiveProject(_o__c0=[2]) + * HiveTableScan(table=[[_dummy_database, _dummy_table]], table:alias=[_dummy_table]) + * [...] + * </pre> + * + * will be transformed into + * <pre> + * HiveUnion(all=true) + * HiveProject(EXPR$0=[$0]) + * HiveTableFunctionScan(invocation=[inline(ARRAY(ROW(1), ROW(2)))], rowType=[RecordType(INTEGER EXPR$0)]) + * HiveTableScan(table=[[_dummy_database, _dummy_table]], table:alias=[_dummy_table]) + * [...] + * </pre> + * + * + */ +public class HiveUnionSimpleSelectsToInlineTableRule extends RelOptRule { + + protected static final Logger LOG = LoggerFactory.getLogger(HiveUnionSimpleSelectsToInlineTableRule.class); + + private RelNode dummyTable; + + public HiveUnionSimpleSelectsToInlineTableRule(RelNode dummyTable) { + super(operand(HiveUnion.class, any())); + this.dummyTable = dummyTable; + } + + static class RowStorage extends HashMap<RelRecordType, List<RexNode>> { + + private static final long serialVersionUID = 1L; + + public void addRow(RexNode row) { + RelRecordType type = (RelRecordType) row.getType(); + + List<RexNode> e = get(type); + if (e == null) { + put(type, e = new ArrayList<RexNode>()); + } + e.add(row); + } + } + + @Override + public void onMatch(RelOptRuleCall call) { + RexBuilder rexBuilder = call.builder().getRexBuilder(); + final HiveUnion union = call.rel(0); + if (!union.all) { + return; + } + List<RelNode> inputs = new ArrayList<RelNode>(); + List<Project> projects = new ArrayList<>(); + List<HiveTableFunctionScan> inlineTables = new ArrayList<>(); + + for (RelNode input : union.getInputs()) { + input = HiveRelDecorrelator.stripHep(input); + + if (isPlainProject(input)) { + projects.add((Project) input); + continue; + } + + if (isInlineTableOperand(input)) { + inlineTables.add((HiveTableFunctionScan) input); + continue; + } + inputs.add(input); + } + + if (projects.size() + inlineTables.size() <= 1) { + // nothing to do + return; + } + + RowStorage newRows = new RowStorage(); + for (HiveTableFunctionScan rel : inlineTables) { + // inline(array(row1,row2,...)) + RexCall rex = (RexCall) ((RexCall) rel.getCall()).operands.get(0); + for (RexNode row : rex.operands) { + if (!(row.getType() instanceof RelRecordType)) { + return; + } + newRows.addRow(row); + } + } + + for (Project proj : projects) { + RexNode row = rexBuilder.makeCall(SqlStdOperatorTable.ROW, proj.getProjects()); + if (!(row.getType() instanceof RelRecordType)) { + return; + } + newRows.addRow(row); + } + + if (newRows.keySet().size() + inputs.size() == union.getInputs().size()) { + // nothing to do + return; + } + + if (dummyTable == null) { + LOG.warn("Unexpected; rule would match - but dummyTable is not available"); + return; + } + + for (RelRecordType type : newRows.keySet()) { + List<RexNode> rows = newRows.get(type); + + RelDataType arrayType = rexBuilder.getTypeFactory().createArrayType(type, -1); + try { + SqlOperator inlineFn = + SqlFunctionConverter.getCalciteFn("inline", Collections.singletonList(arrayType), type, true, false); + SqlOperator arrayFn = + SqlFunctionConverter.getCalciteFn("array", Collections.nCopies(rows.size(), type), arrayType, true, false); + + RexNode expr = rexBuilder.makeCall(arrayFn, rows); + expr = rexBuilder.makeCall(inlineFn, expr); + + RelNode newInlineTable = buildTableFunctionScan(expr, union.getCluster()); + + inputs.add(newInlineTable); + + } catch (CalciteSemanticException e) { + LOG.debug("Conversion failed with exception", e); + return; + } + } + + if (inputs.size() > 1) { + HiveUnion newUnion = (HiveUnion) union.copy(union.getTraitSet(), inputs, true); + call.transformTo(newUnion); + } else { + call.transformTo(inputs.get(0)); + } + } + + private boolean isPlainProject(RelNode input) { + input = HiveRelDecorrelator.stripHep(input); + if (!(input instanceof Project)) { + return false; + } + if (input.getInputs().size() == 0) { + return true; + } + return isDummyTableScan(input.getInput(0)); + } + + private boolean isInlineTableOperand(RelNode input) { + input = HiveRelDecorrelator.stripHep(input); + if (!(input instanceof HiveTableFunctionScan)) { + return false; + } + if (input.getInputs().size() == 0) { + return true; + } + return isDummyTableScan(input.getInput(0)); + } + + private boolean isDummyTableScan(RelNode input) { + input = HiveRelDecorrelator.stripHep(input); + if (!(input instanceof HiveTableScan)) { + return false; + } + HiveTableScan ts = (HiveTableScan) input; + Table table = ((RelOptHiveTable) ts.getTable()).getHiveTableMD(); + if (!SemanticAnalyzer.DUMMY_DATABASE.equals(table.getDbName())) { + return false; + } + return true; + } + + private RelNode buildTableFunctionScan(RexNode expr, RelOptCluster cluster) + throws CalciteSemanticException { + + return HiveTableFunctionScan.create(cluster, TraitsUtil.getDefaultTraitSet(cluster), + ImmutableList.of(dummyTable), expr, null, expr.getType(), null); + + } +} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index 56fd6f5..5b147b6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -563,7 +563,7 @@ public class SqlFunctionConverter { } public static SqlOperator getCalciteFn(String hiveUdfName, - ImmutableList<RelDataType> calciteArgTypes, RelDataType calciteRetType, + List<RelDataType> calciteArgTypes, RelDataType calciteRetType, boolean deterministic, boolean runtimeConstant) throws CalciteSemanticException { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 55b2fdd..472b3d7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -252,6 +252,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortPullUpConstants import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortRemoveRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortUnionReduceRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSubQueryRemoveRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionSimpleSelectsToInlineTableRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionPullUpConstantsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule; @@ -1608,6 +1609,7 @@ public class CalcitePlanner extends SemanticAnalyzer { LinkedHashMap<RelNode, RowResolver> relToHiveRR = new LinkedHashMap<RelNode, RowResolver>(); LinkedHashMap<RelNode, ImmutableMap<String, Integer>> relToHiveColNameCalcitePosMap = new LinkedHashMap<RelNode, ImmutableMap<String, Integer>>(); private final StatsSource statsSource; + private RelNode dummyTableScan; protected CalcitePlannerAction( Map<String, PrunedPartitionList> partitionCache, @@ -2210,6 +2212,7 @@ public class CalcitePlanner extends SemanticAnalyzer { // 1. Run other optimizations that do not need stats generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST, ProjectRemoveRule.Config.DEFAULT.toRule(), HiveUnionMergeRule.INSTANCE, + new HiveUnionSimpleSelectsToInlineTableRule(dummyTableScan), HiveAggregateProjectMergeRule.INSTANCE, HiveProjectMergeRule.INSTANCE_NO_FORCE, HiveJoinCommuteRule.INSTANCE); @@ -5067,6 +5070,7 @@ public class CalcitePlanner extends SemanticAnalyzer { qb.addAlias(DUMMY_TABLE); qb.setTabAlias(DUMMY_TABLE, DUMMY_TABLE); RelNode op = genTableLogicalPlan(DUMMY_TABLE, qb); + dummyTableScan = op; aliasToRel.put(DUMMY_TABLE, op); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 25a2731..909853e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -11884,8 +11884,12 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { opParseCtx.get(operator).setRowResolver(newRR); } + Path dummyPath; protected Table getDummyTable() throws SemanticException { - Path dummyPath = createDummyFile(); + if (dummyPath == null) { + dummyPath = createDummyFile(); + } + Table desc = new Table(DUMMY_DATABASE, DUMMY_TABLE); desc.getTTable().getSd().setLocation(dummyPath.toString()); desc.getTTable().getSd().getSerdeInfo().setSerializationLib(NullStructSerDe.class.getName()); diff --git a/ql/src/test/queries/clientpositive/union_literals.q b/ql/src/test/queries/clientpositive/union_literals.q new file mode 100644 index 0000000..dcecfb3 --- /dev/null +++ b/ql/src/test/queries/clientpositive/union_literals.q @@ -0,0 +1,103 @@ + + +explain +SELECT * FROM ( + VALUES(1, '1'), + (2, 'orange'), + (5, 'yellow'), + (10, 'green'), + (11, 'blue'), + (12, 'indigo'), + (20, 'violet')) + AS Colors +; + +explain +SELECT * FROM ( + VALUES(1, '1'), + (2, 'orange'), + (5, 'yellow'), + (10, 'green'), + (11, 'blue'), + (12, 'indigo'), + (20, 'violet')) + AS Colors +union all + select 2,'2' +union all + select 2,'2' +; + +SELECT * FROM ( + VALUES(1, '1'), + (2, 'orange'), + (5, 'yellow')) + as colors1 +UNION ALL +SELECT * FROM ( + VALUES(10, 'green'), + (11, 'blue'), + (12, 'indigo'), + (20, 'violet')) + AS Colors +union all + select 2,'2' +union all + select 2,'2'; + + +explain + select 1, 1 +union all + select 2, 2 +union all + select 3, 3 +; + + select 1, 1 +union all + select 2, 2 +union all + select 3, 3 +; + + +create table t (a string, b string); +insert into t values(9,9); + +explain cbo + select cast(a as integer) from t +union all + select cast(1 as integer) +; + +explain cbo + select cast(a as integer) from t +union all + select cast(1 as integer) +union all + select cast(2 as integer) +; + + + +explain + select cast(a as integer) from t +union all + select cast(1 as integer) +union all + select cast(2 as integer) +union all + select cast(3 as integer) +; + + select cast(a as integer) from t +union all + select cast(1 as integer) +union all + select cast(2 as integer) +union all + select cast(3 as integer) +; + + select cast(a as integer) from t; diff --git a/ql/src/test/results/clientpositive/llap/udf_likeall.q.out b/ql/src/test/results/clientpositive/llap/udf_likeall.q.out index 7a089c9..3dbd6fd 100644 --- a/ql/src/test/results/clientpositive/llap/udf_likeall.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_likeall.q.out @@ -71,8 +71,8 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: database:default POSTHOOK: Output: default@like_all_table -POSTHOOK: Lineage: like_all_table.company EXPRESSION [] -POSTHOOK: Lineage: like_all_table.pat EXPRESSION [] +POSTHOOK: Lineage: like_all_table.company SCRIPT [] +POSTHOOK: Lineage: like_all_table.pat SCRIPT [] PREHOOK: query: select company from like_all_table where company like all ('%oo%','%go%') PREHOOK: type: QUERY PREHOOK: Input: default@like_all_table diff --git a/ql/src/test/results/clientpositive/llap/udf_likeany.q.out b/ql/src/test/results/clientpositive/llap/udf_likeany.q.out index fc36686..d5bb45a 100644 --- a/ql/src/test/results/clientpositive/llap/udf_likeany.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_likeany.q.out @@ -71,8 +71,8 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: database:default POSTHOOK: Output: default@like_any_table -POSTHOOK: Lineage: like_any_table.company EXPRESSION [] -POSTHOOK: Lineage: like_any_table.pat EXPRESSION [] +POSTHOOK: Lineage: like_any_table.company SCRIPT [] +POSTHOOK: Lineage: like_any_table.pat SCRIPT [] PREHOOK: query: select company from like_any_table where company like any ('%oo%','%in','fa%') PREHOOK: type: QUERY PREHOOK: Input: default@like_any_table diff --git a/ql/src/test/results/clientpositive/llap/udf_sort_array_by.q.out b/ql/src/test/results/clientpositive/llap/udf_sort_array_by.q.out index 4eda558..be80abb 100644 --- a/ql/src/test/results/clientpositive/llap/udf_sort_array_by.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_sort_array_by.q.out @@ -101,9 +101,9 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: database:default POSTHOOK: Output: default@sort_array_by_table -POSTHOOK: Lineage: sort_array_by_table.company EXPRESSION [] -POSTHOOK: Lineage: sort_array_by_table.country EXPRESSION [] -POSTHOOK: Lineage: sort_array_by_table.employee EXPRESSION [] +POSTHOOK: Lineage: sort_array_by_table.company SCRIPT [] +POSTHOOK: Lineage: sort_array_by_table.country SCRIPT [] +POSTHOOK: Lineage: sort_array_by_table.employee SCRIPT [] PREHOOK: query: select company,country,sort_array_by(employee,'salary') as single_field_sort from sort_array_by_table PREHOOK: type: QUERY PREHOOK: Input: default@sort_array_by_table diff --git a/ql/src/test/results/clientpositive/llap/union_literals.q.out b/ql/src/test/results/clientpositive/llap/union_literals.q.out new file mode 100644 index 0000000..107cddc --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/union_literals.q.out @@ -0,0 +1,454 @@ +PREHOOK: query: explain +SELECT * FROM ( + VALUES(1, '1'), + (2, 'orange'), + (5, 'yellow'), + (10, 'green'), + (11, 'blue'), + (12, 'indigo'), + (20, 'violet')) + AS Colors +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT * FROM ( + VALUES(1, '1'), + (2, 'orange'), + (5, 'yellow'), + (10, 'green'), + (11, 'blue'), + (12, 'indigo'), + (20, 'violet')) + AS Colors +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Select Operator + expressions: array(const struct(1,'1'),const struct(2,'orange'),const struct(5,'yellow'),const struct(10,'green'),const struct(11,'blue'),const struct(12,'indigo'),const struct(20,'violet')) (type: array<struct<col1:int,col2:string>>) + outputColumnNames: _col0 + UDTF Operator + function name: inline + Select Operator + expressions: col1 (type: int), col2 (type: string) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: explain +SELECT * FROM ( + VALUES(1, '1'), + (2, 'orange'), + (5, 'yellow'), + (10, 'green'), + (11, 'blue'), + (12, 'indigo'), + (20, 'violet')) + AS Colors +union all + select 2,'2' +union all + select 2,'2' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: explain +SELECT * FROM ( + VALUES(1, '1'), + (2, 'orange'), + (5, 'yellow'), + (10, 'green'), + (11, 'blue'), + (12, 'indigo'), + (20, 'violet')) + AS Colors +union all + select 2,'2' +union all + select 2,'2' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Union 2 (CONTAINS) + Map 3 <- Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array(const struct(2,'2'),const struct(2,'2')) (type: array<struct<col1:int,col2:string>>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + function name: inline + Select Operator + expressions: col1 (type: int), col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array(const struct(1,'1'),const struct(2,'orange'),const struct(5,'yellow'),const struct(10,'green'),const struct(11,'blue'),const struct(12,'indigo'),const struct(20,'violet')) (type: array<struct<col1:int,col2:string>>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + function name: inline + Select Operator + expressions: col1 (type: int), col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: no inputs + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * FROM ( + VALUES(1, '1'), + (2, 'orange'), + (5, 'yellow')) + as colors1 +UNION ALL +SELECT * FROM ( + VALUES(10, 'green'), + (11, 'blue'), + (12, 'indigo'), + (20, 'violet')) + AS Colors +union all + select 2,'2' +union all + select 2,'2' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM ( + VALUES(1, '1'), + (2, 'orange'), + (5, 'yellow')) + as colors1 +UNION ALL +SELECT * FROM ( + VALUES(10, 'green'), + (11, 'blue'), + (12, 'indigo'), + (20, 'violet')) + AS Colors +union all + select 2,'2' +union all + select 2,'2' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +2 2 +2 2 +1 1 +2 orange +5 yellow +10 green +11 blue +12 indigo +20 violet +PREHOOK: query: explain + select 1, 1 +union all + select 2, 2 +union all + select 3, 3 +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: explain + select 1, 1 +union all + select 2, 2 +union all + select 3, 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Select Operator + expressions: array(const struct(1,1),const struct(2,2),const struct(3,3)) (type: array<struct<col1:int,col2:int>>) + outputColumnNames: _col0 + UDTF Operator + function name: inline + Select Operator + expressions: col1 (type: int), col2 (type: int) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: select 1, 1 +union all + select 2, 2 +union all + select 3, 3 +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 1, 1 +union all + select 2, 2 +union all + select 3, 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +1 1 +2 2 +3 3 +PREHOOK: query: create table t (a string, b string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t (a string, b string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into t values(9,9) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values(9,9) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.a SCRIPT [] +POSTHOOK: Lineage: t.b SCRIPT [] +PREHOOK: query: explain cbo + select cast(a as integer) from t +union all + select cast(1 as integer) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: explain cbo + select cast(a as integer) from t +union all + select cast(1 as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: default@t +#### A masked pattern was here #### +CBO PLAN: +HiveUnion(all=[true]) + HiveProject(a=[CAST($0):INTEGER]) + HiveTableScan(table=[[default, t]], table:alias=[t]) + HiveProject(_o__c0=[1]) + HiveTableScan(table=[[_dummy_database, _dummy_table]], table:alias=[_dummy_table]) + +PREHOOK: query: explain cbo + select cast(a as integer) from t +union all + select cast(1 as integer) +union all + select cast(2 as integer) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: explain cbo + select cast(a as integer) from t +union all + select cast(1 as integer) +union all + select cast(2 as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: default@t +#### A masked pattern was here #### +CBO PLAN: +HiveUnion(all=[true]) + HiveProject(a=[CAST($0):INTEGER]) + HiveTableScan(table=[[default, t]], table:alias=[t]) + HiveProject(EXPR$0=[$0]) + HiveTableFunctionScan(invocation=[inline(ARRAY(ROW(1), ROW(2)))], rowType=[RecordType(INTEGER EXPR$0)]) + HiveTableScan(table=[[_dummy_database, _dummy_table]], table:alias=[_dummy_table]) + +PREHOOK: query: explain + select cast(a as integer) from t +union all + select cast(1 as integer) +union all + select cast(2 as integer) +union all + select cast(3 as integer) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: explain + select cast(a as integer) from t +union all + select cast(1 as integer) +union all + select cast(2 as integer) +union all + select cast(3 as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Union 2 (CONTAINS) + Map 3 <- Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(a) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: array(const struct(1),const struct(2),const struct(3)) (type: array<struct<col1:int>>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + function name: inline + Select Operator + expressions: col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: no inputs + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select cast(a as integer) from t +union all + select cast(1 as integer) +union all + select cast(2 as integer) +union all + select cast(3 as integer) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select cast(a as integer) from t +union all + select cast(1 as integer) +union all + select cast(2 as integer) +union all + select cast(3 as integer) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: default@t +#### A masked pattern was here #### +9 +1 +2 +3 +PREHOOK: query: select cast(a as integer) from t +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select cast(a as integer) from t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +9 diff --git a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out index 15109b9..8ad8ddd 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out @@ -111,7 +111,7 @@ POSTHOOK: Input: default@table_19 POSTHOOK: Input: default@table_6 #### A masked pattern was here #### 418.9 -Warning: Shuffle Join MERGEJOIN[59][tables = [$hdt$_0]] in Stage 'Reducer 3' is a cross product +Warning: Map Join MAPJOIN[55][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: SELECT t1.t_id, null @@ -813,7 +813,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[56][tables = [$hdt$_0]] in Stage 'Reducer 3' is a cross product +Warning: Map Join MAPJOIN[52][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT t1.t_id, @@ -853,47 +853,124 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Union 2 (CONTAINS) - Map 4 <- Union 2 (CONTAINS) - Reducer 3 <- Map 5 (XPROD_EDGE), Reducer 6 (BROADCAST_EDGE), Union 2 (XPROD_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: _dummy_table - Row Limit Per Split: 1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + alias: t2 + filterExpr: (t_id is not null and f_id is not null) (type: boolean) + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:t_id:string, 1:f_id:string, 2:type:string, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 4:ROW__IS__DELETED:boolean] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:string), SelectColumnIsNotNull(col 1:string)) + predicate: (t_id is not null and f_id is not null) (type: boolean) + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs + expressions: t_id (type: string), f_id (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + Map Join Vectorization: + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:string + className: VectorMapJoinInnerBigOnlyMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:string, 1:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 2 + Statistics: Num rows: 10 Data size: 4250 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string, 1:string + bigTableRetainColumnNums: [] + className: VectorMapJoinInnerBigOnlyMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 0:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 11 Data size: 4675 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), null (type: void) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 5] + selectExpressions: ConstantVectorExpression(val null) -> 5:void + Statistics: Num rows: 11 Data size: 4675 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 11 Data size: 4675 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs Map Vectorization: - enabled: false + enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - enabledConditionsNotMet: Could not enable vectorization due to partition column names size 1 is greater than the number of table column names size 0 IS false - inputFileFormats: org.apache.hadoop.hive.ql.io.NullRowsInputFormat - Map 4 + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1] + dataColumns: t_id:string, f_id:string, type:string + partitionColumnCount: 0 + scratchColumnTypeNames: [void] + Map 2 Map Operator Tree: TableScan alias: _dummy_table Row Limit Per Split: 1 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + expressions: array(const struct('TEST-1'),const struct('TEST-2')) (type: array<struct<col1:string>>) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + function name: inline Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map Vectorization: @@ -901,11 +978,11 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true enabledConditionsNotMet: Could not enable vectorization due to partition column names size 1 is greater than the number of table column names size 0 IS false inputFileFormats: org.apache.hadoop.hive.ql.io.NullRowsInputFormat - Map 5 + Map 3 Map Operator Tree: TableScan alias: t1 - filterExpr: (((type = 'TEST') and t_id is not null and f_id is not null) or (t_id is not null and f_id is not null)) (type: boolean) + filterExpr: ((type = 'TEST') and t_id is not null and f_id is not null) (type: boolean) Statistics: Num rows: 10 Data size: 5520 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -936,31 +1013,6 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 5 Data size: 2760 Basic stats: COMPLETE Column stats: NONE - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:string), SelectColumnIsNotNull(col 1:string)) - predicate: (t_id is not null and f_id is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: t_id (type: string), f_id (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 0:string, 1:string - Statistics: Num rows: 10 Data size: 3680 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -978,77 +1030,6 @@ STAGE PLANS: dataColumns: t_id:string, f_id:string, type:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 7540 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col3 - input vertices: - 1 Reducer 6 - Statistics: Num rows: 22 Data size: 8294 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), null (type: void) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 22 Data size: 8294 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 22 Data size: 8294 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - MergeJoin Vectorization: - enabled: false - enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 6 - Execution mode: vectorized, llap - Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: zz - reduceColumnSortOrder: ++ - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 0:string, 1:string - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5 Data size: 2760 Basic stats: COMPLETE Column stats: NONE - Union 2 - Vertex: Union 2 Stage: Stage-0 Fetch Operator