[ https://issues.apache.org/jira/browse/HIVE-22808?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Krisztian Kasa updated HIVE-22808: ---------------------------------- Status: Open (was: Patch Available) > HiveRelFieldTrimmer does not handle HiveTableFunctionScan > --------------------------------------------------------- > > Key: HIVE-22808 > URL: https://issues.apache.org/jira/browse/HIVE-22808 > Project: Hive > Issue Type: Bug > Components: Query Planning > Reporter: Krisztian Kasa > Assignee: Krisztian Kasa > Priority: Major > Attachments: HIVE-22808.1.patch, HIVE-22808.2.patch, > HIVE-22808.2.patch, HIVE-22808.3.patch > > > *Repro* > {code:java} > CREATE TABLE table_16 ( > timestamp_col_19 timestamp, > timestamp_col_29 timestamp, > int_col_27 int, > int_col_39 int, > boolean_col_18 boolean, > varchar0045_col_23 varchar(45) > ); > CREATE TABLE table_7 ( > int_col_10 int, > bigint_col_3 bigint > ); > CREATE TABLE table_10 ( > boolean_col_8 boolean, > boolean_col_16 boolean, > timestamp_col_5 timestamp, > timestamp_col_15 timestamp, > timestamp_col_30 timestamp, > decimal3825_col_26 decimal(38, 25), > smallint_col_9 smallint, > int_col_18 int > ); > explain cbo > SELECT > DISTINCT COALESCE(a4.timestamp_col_15, IF(a4.boolean_col_16, > a4.timestamp_col_30, a4.timestamp_col_5)) AS timestamp_col > FROM table_7 a3 > RIGHT JOIN table_10 a4 > WHERE (a3.bigint_col_3) >= (a4.int_col_18) > INTERSECT ALL > SELECT > COALESCE(LEAST( > COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS > TIMESTAMP)), > COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS > TIMESTAMP)) > ), > GREATEST(COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS > TIMESTAMP)), > COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS > TIMESTAMP))) > ) AS timestamp_col > FROM table_16 a1 > GROUP BY COALESCE(LEAST( > COALESCE(a1.timestamp_col_19, CAST('2010-03-29 00:00:00' AS > TIMESTAMP)), > COALESCE(a1.timestamp_col_29, CAST('2014-08-16 00:00:00' AS > TIMESTAMP)) > ), > GREATEST( > COALESCE(a1.timestamp_col_19, CAST('2013-07-01 00:00:00' AS > TIMESTAMP)), > COALESCE(a1.timestamp_col_29, CAST('2028-06-18 00:00:00' AS > TIMESTAMP))) > ); > {code} > CBO Plan contains unnecessary columns or all columns from a table in > projections like: > {code:java} > HiveProject(int_col_10=[$0], bigint_col_3=[$1], > BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], > CAST=[CAST($4):RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid)]) > {code} > *Cause* > The plan contains a HiveTableFunctionScan operator: > {code:java} > HiveTableFunctionScan(invocation=[replicate_rows($0, $1)], > rowType=[RecordType(BIGINT $f0, TIMESTAMP(9) $f1)]) > {code} > HiveTableFunctionScan is not handled by HiveRelFieldTrimmer nor > RelFieldTrimmer which suppose to remove unused columns in the > CalcitePlanner.applyPreJoinOrderingTransforms(...) phase. The whole subtree > rooted from HiveTableFunctionScan is ignored. > Whole plan: > {code:java} > CBO PLAN: > HiveProject($f0=[$1]) > HiveTableFunctionScan(invocation=[replicate_rows($0, $1)], > rowType=[RecordType(BIGINT $f0, TIMESTAMP(9) $f1)]) > HiveProject($f0=[$2], $f1=[$0]) > HiveFilter(condition=[=($1, 2)]) > HiveAggregate(group=[{0}], agg#0=[count($1)], agg#1=[min($1)]) > HiveProject($f0=[$0], $f1=[$1]) > HiveUnion(all=[true]) > HiveProject($f0=[$0], $f1=[$1]) > HiveAggregate(group=[{0}], agg#0=[count()]) > HiveProject($f0=[$0]) > HiveAggregate(group=[{0}]) > HiveProject($f0=[CASE(IS NOT NULL($7), $7, if($5, $8, > $6))]) > HiveJoin(condition=[>=($1, $13)], joinType=[inner], > algorithm=[none], cost=[not available]) > HiveProject(int_col_10=[$0], bigint_col_3=[$1], > BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], > CAST=[CAST($4):RecordType(BIGINT writeid, INTEGER bucketid, BIGINT rowid)]) > HiveFilter(condition=[IS NOT NULL($1)]) > HiveTableScan(table=[[default, table_7]], > table:alias=[a3]) > HiveProject(boolean_col_16=[$0], > timestamp_col_5=[$1], timestamp_col_15=[$2], timestamp_col_30=[$3], > int_col_18=[$4], BLOCK__OFFSET__INSIDE__FILE=[$5], INPUT__FILE__NAME=[$6], > ROW__ID=[$7], CAST=[CAST($4):BIGINT]) > HiveFilter(condition=[IS NOT > NULL(CAST($4):BIGINT)]) > HiveTableScan(table=[[default, table_10]], > table:alias=[a4]) > HiveProject($f0=[$0], $f1=[$1]) > HiveAggregate(group=[{0}], agg#0=[count()]) > HiveProject($f0=[$0]) > HiveAggregate(group=[{0}]) > HiveProject($f0=[CASE(IS NOT NULL(least(CASE(IS NOT > NULL($0), $0, 2010-03-29 00:00:00:TIMESTAMP(9)), CASE(IS NOT NULL($1), $1, > 2014-08-16 00:00:00:TIMESTAMP(9)))), least(CASE(IS NOT NULL($0), $0, > 2010-03-29 00:00:00:TIMESTAMP(9)), CASE(IS NOT NULL($1), $1, 2014-08-16 > 00:00:00:TIMESTAMP(9))), greatest(CASE(IS NOT NULL($0), $0, 2013-07-01 > 00:00:00:TIMESTAMP(9)), CASE(IS NOT NULL($1), $1, 2028-06-18 > 00:00:00:TIMESTAMP(9))))]) > HiveTableScan(table=[[default, table_16]], > table:alias=[a1]) > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)