http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test b/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test index 067698a..617eb3a 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/constant-propagation.test @@ -19,6 +19,7 @@ PLAN-ROOT SINK 00:SCAN HDFS [functional.widetable_250_cols a] partitions=1/1 files=1 size=28.69KB predicates: a.int_col1 = 10, a.int_col2 = 11, a.int_col3 = 55, a.int_col4 = 110 + row-size=1.21KB cardinality=unavailable ==== # Test multiple forward propagation select * from functional.widetable_250_cols a @@ -30,6 +31,7 @@ PLAN-ROOT SINK 00:SCAN HDFS [functional.widetable_250_cols a] partitions=1/1 files=1 size=28.69KB predicates: a.int_col1 = 10, a.int_col2 = 11, a.int_col3 = 55, a.int_col4 = -385 + row-size=1.21KB cardinality=unavailable ==== # Test multiple forward propagation select * from functional.widetable_250_cols a @@ -41,6 +43,7 @@ PLAN-ROOT SINK 00:SCAN HDFS [functional.widetable_250_cols a] partitions=1/1 files=1 size=28.69KB predicates: a.int_col1 = 10, a.int_col2 = 11, a.int_col3 = 55, a.int_col4 = -495 + row-size=1.21KB cardinality=unavailable ==== # Test multiple forward propagation, and a reversed propagation # (which fails as we can't rewrite 55 = a.int_col4 / 10) @@ -53,6 +56,7 @@ PLAN-ROOT SINK 00:SCAN HDFS [functional.widetable_250_cols a] partitions=1/1 files=1 size=28.69KB predicates: a.int_col1 = 10, a.int_col2 = 11, a.int_col3 = 55, a.int_col4 / 10 = 55 + row-size=1.21KB cardinality=unavailable ==== # Another impossibility (a.int_col3 = a.int_col2 * 5 = a.int_col2 * -7) select * from functional.widetable_250_cols a @@ -72,6 +76,7 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: count(*) +| row-size=8B cardinality=0 | 00:EMPTYSET ==== @@ -84,6 +89,7 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: count(*) +| row-size=8B cardinality=0 | 00:EMPTYSET ==== @@ -96,6 +102,7 @@ PLAN-ROOT SINK 00:SCAN HDFS [functional.alltypes a] partitions=24/24 files=24 size=478.45KB predicates: CAST(a.int_col AS STRING) = 'abc', CAST(int_col AS STRING) > 'xyz' + row-size=89B cardinality=231 ==== # Implicit casts are considered for propagation select * from functional.alltypes a @@ -115,17 +122,21 @@ PLAN-ROOT SINK | 03:AGGREGATE [FINALIZE] | output: count(*) +| row-size=8B cardinality=0 | 02:SELECT | predicates: int_col = 12, int_col > 1 +| row-size=4B cardinality=0 | 01:AGGREGATE [FINALIZE] | group by: int_col | limit: 10 +| row-size=4B cardinality=10 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB predicates: int_col = 10 + row-size=4B cardinality=730 ==== # Many constant predicates removed select count(*) from @@ -137,10 +148,12 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: count(*) +| row-size=8B cardinality=1 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB predicates: int_col = 10 + row-size=4B cardinality=730 ==== # All true predicates elided select count(*) from @@ -151,9 +164,11 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: count(*) +| row-size=8B cardinality=1 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=0B cardinality=7.30K ==== # Many redundant / duplicate predicates select count(*) from @@ -204,10 +219,12 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: count(*) +| row-size=8B cardinality=1 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB predicates: int_col = 10 + row-size=4B cardinality=730 ==== # Non-trivial expr substitution (const false) select count(*) from @@ -219,6 +236,7 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: count(*) +| row-size=8B cardinality=0 | 00:EMPTYSET ==== @@ -233,10 +251,12 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: count(*) +| row-size=8B cardinality=1 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB predicates: int_col = 10 + row-size=4B cardinality=730 ==== # Non-trivial expr substitution (non-constant) select count(*) from @@ -249,10 +269,12 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: count(*) +| row-size=8B cardinality=1 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB predicates: int_col = 10, TRUE OR 10 + random() * functional.alltypes.tinyint_col = 100 + row-size=5B cardinality=231 ==== # Collection predicates within HDFS scan nodes get optimized select 1 @@ -262,26 +284,35 @@ where l_partkey < l_suppkey and c.c_nationkey = 10 and o_orderkey = 4 and l_supp PLAN-ROOT SINK | 01:SUBPLAN +| row-size=50B cardinality=600.00K | |--08:NESTED LOOP JOIN [CROSS JOIN] +| | row-size=50B cardinality=100 | | | |--02:SINGULAR ROW SRC +| | row-size=14B cardinality=1 | | | 04:SUBPLAN +| | row-size=36B cardinality=100 | | | |--07:NESTED LOOP JOIN [CROSS JOIN] +| | | row-size=36B cardinality=10 | | | | | |--05:SINGULAR ROW SRC +| | | row-size=20B cardinality=1 | | | | | 06:UNNEST [o.o_lineitems] +| | row-size=0B cardinality=10 | | | 03:UNNEST [c.c_orders o] +| row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - partitions=1/1 files=4 size=292.36MB + partitions=1/1 files=4 size=288.99MB predicates: c.c_nationkey = 10, !empty(c.c_orders) predicates on o: !empty(o.o_lineitems), o_orderkey = 4 predicates on o_lineitems: l_partkey < 10, l_suppkey = 10 + row-size=14B cardinality=6.00K ==== # Nested predicates also get propagated select 1 @@ -292,27 +323,36 @@ where l_partkey < l_suppkey and c.c_nationkey = 10 and o_orderkey = o_shippriori PLAN-ROOT SINK | 01:SUBPLAN +| row-size=54B cardinality=600.00K | |--08:NESTED LOOP JOIN [INNER JOIN] | | join predicates: o_shippriority = c_nationkey +| | row-size=54B cardinality=100 | | | |--02:SINGULAR ROW SRC +| | row-size=14B cardinality=1 | | | 04:SUBPLAN +| | row-size=40B cardinality=100 | | | |--07:NESTED LOOP JOIN [CROSS JOIN] +| | | row-size=40B cardinality=10 | | | | | |--05:SINGULAR ROW SRC +| | | row-size=24B cardinality=1 | | | | | 06:UNNEST [o.o_lineitems] +| | row-size=0B cardinality=10 | | | 03:UNNEST [c.c_orders o] +| row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - partitions=1/1 files=4 size=292.36MB + partitions=1/1 files=4 size=288.99MB predicates: c.c_nationkey = 10, !empty(c.c_orders) predicates on o: !empty(o.o_lineitems), o.o_orderkey = 10, o.o_shippriority = 10 predicates on o_lineitems: l_partkey < 10, l_suppkey = 10 + row-size=14B cardinality=6.00K ==== # Using IS NULL select count(*) from functional.alltypes where id = 10 and bool_col is null @@ -322,6 +362,7 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: count(*) +| row-size=8B cardinality=0 | 00:EMPTYSET ==== @@ -332,6 +373,7 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: count(*) +| row-size=8B cardinality=0 | 00:EMPTYSET ==== @@ -343,10 +385,12 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: count(*) +| row-size=8B cardinality=1 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB predicates: bool_col IS NULL, functional.alltypes.id IS NULL, id > 0, functional.alltypes.bool_col > 0, id = bool_col + row-size=5B cardinality=730 ==== # = NULL and > select count(*) from functional.alltypes where id > 0 and bool_col = null @@ -356,6 +400,7 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: count(*) +| row-size=8B cardinality=0 | 00:EMPTYSET ====
http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/constant.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/constant.test b/testdata/workloads/functional-planner/queries/PlannerTest/constant.test index 3c03cd5..5cfa415 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/constant.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/constant.test @@ -4,9 +4,11 @@ PLAN-ROOT SINK | 00:UNION constant-operands=1 + row-size=2B cardinality=1 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | 00:UNION constant-operands=1 + row-size=2B cardinality=1 ==== http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test b/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test index ce4dbd7..3368cd7 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test @@ -10,6 +10,7 @@ PLAN-ROOT SINK 00:SCAN DATA SOURCE [functional.alltypes_datasource] data source predicates: tinyint_col < 256 predicates: float_col != 0, CAST(int_col AS BIGINT) < 10 + row-size=112B cardinality=500 ==== # The first four predicates are in a form that can be offered to the data source # and the first and third will be accepted (it accepts every other conjunct). @@ -29,6 +30,7 @@ PLAN-ROOT SINK 00:SCAN DATA SOURCE [functional.alltypes_datasource] data source predicates: int_col < 10, string_col != 'Foo' predicates: double_col < 5, NOT bool_col = TRUE, NOT double_col = 5.0, string_col != 'Bar' + row-size=112B cardinality=500 ==== # The 3rd predicate is not in a form that can be offered to the data source so # the 4th will be offered and accepted instead. @@ -43,6 +45,7 @@ PLAN-ROOT SINK 00:SCAN DATA SOURCE [functional.alltypes_datasource] data source predicates: int_col < 10, bool_col != FALSE predicates: double_col > 5, string_col IN ('Foo', 'Bar') + row-size=112B cardinality=500 ==== # Tests that all predicates from the On-clause are applied (IMPALA-805) # and that slot equivalences are enforced at lowest possible plan node @@ -60,12 +63,15 @@ PLAN-ROOT SINK | 02:HASH JOIN [INNER JOIN] | hash predicates: a.id = b.id +| row-size=35B cardinality=500 | |--01:SCAN DATA SOURCE [functional.alltypes_datasource b] |--predicates: b.id = b.int_col, b.id = b.bigint_col +| row-size=0B cardinality=500 | 00:SCAN DATA SOURCE [functional.alltypes_datasource a] predicates: a.id = a.int_col, a.id = a.tinyint_col, a.int_col = a.bigint_col, a.tinyint_col = a.smallint_col + row-size=0B cardinality=500 ==== # Tests that <=>, IS DISTINCT FROM, and IS NOT DISTINCT FROM all can be offered to the # data source. @@ -82,6 +88,7 @@ PLAN-ROOT SINK 00:SCAN DATA SOURCE [functional.alltypes_datasource] data source predicates: id IS NOT DISTINCT FROM 1, tinyint_col IS DISTINCT FROM 2, int_col IS NOT DISTINCT FROM 4 predicates: bigint_col IS NOT DISTINCT FROM 5, bool_col IS NOT DISTINCT FROM TRUE, smallint_col IS DISTINCT FROM 3 + row-size=112B cardinality=500 ==== # EmptySet datasource select * from functional.alltypes_datasource @@ -96,7 +103,6 @@ PLAN-ROOT SINK | 00:EMPTYSET ==== ----- QUERY # IMPALA-5602: If a query contains predicates that are all pushed to the datasource and # there is a limit, then the query should not incorrectly run with 'small query' # optimization. @@ -110,4 +116,5 @@ PLAN-ROOT SINK 00:SCAN DATA SOURCE [functional.alltypes_datasource] data source predicates: id = 1 limit: 15 + row-size=112B cardinality=15 ==== http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/ddl.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/ddl.test b/testdata/workloads/functional-planner/queries/PlannerTest/ddl.test index 74c7e5f..ce495b3 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/ddl.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/ddl.test @@ -5,12 +5,14 @@ WRITE TO HDFS [default.t, OVERWRITE=false] | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=89B cardinality=7.30K ---- DISTRIBUTEDPLAN WRITE TO HDFS [default.t, OVERWRITE=false] | partitions=1 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=89B cardinality=7.30K ==== create table t as select distinct a.int_col, a.string_col from functional.alltypes a @@ -22,41 +24,54 @@ WRITE TO HDFS [default.t, OVERWRITE=false] | 03:AGGREGATE [FINALIZE] | group by: a.int_col, a.string_col +| row-size=17B cardinality=13 | 02:HASH JOIN [INNER JOIN] | hash predicates: a.id = b.id | runtime filters: RF000 <- b.id +| row-size=25B cardinality=13 | |--01:SCAN HDFS [functional.alltypessmall b] +| partition predicates: b.month = 2 | partitions=1/4 files=1 size=1.58KB +| row-size=4B cardinality=25 | 00:SCAN HDFS [functional.alltypes a] + partition predicates: a.year = 2009 partitions=12/24 files=12 size=238.68KB runtime filters: RF000 -> a.id + row-size=21B cardinality=3.65K ---- DISTRIBUTEDPLAN WRITE TO HDFS [default.t, OVERWRITE=false] | partitions=1 | 06:AGGREGATE [FINALIZE] | group by: a.int_col, a.string_col +| row-size=17B cardinality=13 | 05:EXCHANGE [HASH(a.int_col,a.string_col)] | 03:AGGREGATE [STREAMING] | group by: a.int_col, a.string_col +| row-size=17B cardinality=13 | 02:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: a.id = b.id | runtime filters: RF000 <- b.id +| row-size=25B cardinality=13 | |--04:EXCHANGE [BROADCAST] | | | 01:SCAN HDFS [functional.alltypessmall b] +| partition predicates: b.month = 2 | partitions=1/4 files=1 size=1.58KB +| row-size=4B cardinality=25 | 00:SCAN HDFS [functional.alltypes a] + partition predicates: a.year = 2009 partitions=12/24 files=12 size=238.68KB runtime filters: RF000 -> a.id + row-size=21B cardinality=3.65K ==== # CTAS with a view that has a limit clause (IMPALA-1411) create table t as @@ -68,6 +83,7 @@ WRITE TO HDFS [default.t, OVERWRITE=false] 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB limit: 1 + row-size=0B cardinality=1 ---- DISTRIBUTEDPLAN WRITE TO HDFS [default.t, OVERWRITE=false] | partitions=1 @@ -78,6 +94,7 @@ WRITE TO HDFS [default.t, OVERWRITE=false] 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB limit: 1 + row-size=0B cardinality=1 ==== # CTAS with multiple nested inline views that have a limit clause (IMPALA-1411) create table t as @@ -91,6 +108,7 @@ WRITE TO HDFS [default.t, OVERWRITE=false] 00:SCAN HDFS [functional.alltypestiny] partitions=4/4 files=4 size=460B limit: 1 + row-size=4B cardinality=1 ---- DISTRIBUTEDPLAN WRITE TO HDFS [default.t, OVERWRITE=false] | partitions=1 @@ -101,6 +119,7 @@ WRITE TO HDFS [default.t, OVERWRITE=false] 00:SCAN HDFS [functional.alltypestiny] partitions=4/4 files=4 size=460B limit: 1 + row-size=4B cardinality=1 ==== # CTAS with a select statement that has a limit and offset clause (IMPALA-1411) @@ -112,9 +131,11 @@ WRITE TO HDFS [default.t, OVERWRITE=false] | 01:TOP-N [LIMIT=1 OFFSET=5] | order by: id ASC +| row-size=89B cardinality=1 | 00:SCAN HDFS [functional.alltypestiny] partitions=4/4 files=4 size=460B + row-size=89B cardinality=8 ---- DISTRIBUTEDPLAN WRITE TO HDFS [default.t, OVERWRITE=false] | partitions=1 @@ -126,9 +147,11 @@ WRITE TO HDFS [default.t, OVERWRITE=false] | 01:TOP-N [LIMIT=6] | order by: id ASC +| row-size=89B cardinality=6 | 00:SCAN HDFS [functional.alltypestiny] partitions=4/4 files=4 size=460B + row-size=89B cardinality=8 ==== # CTAS with an inline view that has a limit and offset clause (IMPALA-1411) create table t as @@ -140,9 +163,11 @@ WRITE TO HDFS [default.t, OVERWRITE=false] | 01:TOP-N [LIMIT=2 OFFSET=5] | order by: id ASC +| row-size=8B cardinality=2 | 00:SCAN HDFS [functional.alltypestiny] partitions=4/4 files=4 size=460B + row-size=8B cardinality=8 ---- DISTRIBUTEDPLAN WRITE TO HDFS [default.t, OVERWRITE=false] | partitions=1 @@ -154,9 +179,11 @@ WRITE TO HDFS [default.t, OVERWRITE=false] | 01:TOP-N [LIMIT=7] | order by: id ASC +| row-size=8B cardinality=7 | 00:SCAN HDFS [functional.alltypestiny] partitions=4/4 files=4 size=460B + row-size=8B cardinality=8 ==== # CTAS with sort columns create table t sort by (int_col, bool_col) as @@ -167,18 +194,22 @@ WRITE TO HDFS [default.t, OVERWRITE=false] | 01:SORT | order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| row-size=89B cardinality=7.30K | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=89B cardinality=7.30K ---- DISTRIBUTEDPLAN WRITE TO HDFS [default.t, OVERWRITE=false] | partitions=1 | 01:SORT | order by: int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| row-size=89B cardinality=7.30K | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=89B cardinality=7.30K ==== # CTAS with partitions and sort columns create table t partitioned by (year, month) sort by (int_col, bool_col) as @@ -189,20 +220,24 @@ WRITE TO HDFS [default.t, OVERWRITE=false, PARTITION-KEYS=(year,month)] | 01:SORT | order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| row-size=89B cardinality=7.30K | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=89B cardinality=7.30K ---- DISTRIBUTEDPLAN WRITE TO HDFS [default.t, OVERWRITE=false, PARTITION-KEYS=(year,month)] | partitions=24 | 02:SORT | order by: year ASC NULLS LAST, month ASC NULLS LAST, int_col ASC NULLS LAST, bool_col ASC NULLS LAST +| row-size=89B cardinality=7.30K | 01:EXCHANGE [HASH(functional.alltypes.year,functional.alltypes.month)] | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=89B cardinality=7.30K ==== # IMPALA-4167: if no (no)shuffle hint is given for CTAS into partitioned HDFS table, then # Impala is free to decide whether to add an exchange node or not. In this example, the @@ -220,6 +255,7 @@ WRITE TO HDFS [default.t, OVERWRITE=false, PARTITION-KEYS=(functional.alltypes.y | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=89B cardinality=7.30K ==== # IMPALA-4167: non-shuffled CTAS into partitioned table has no exchange node before write. # Note that plan hint tests for CTAS are minimal by design, as this logic is covered well @@ -234,6 +270,7 @@ WRITE TO HDFS [default.t, OVERWRITE=false, PARTITION-KEYS=(functional.alltypes.y | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=89B cardinality=7.30K ==== # CTAS with more complex select query create table t partitioned by (c_nationkey) sort by (c_custkey) as @@ -247,52 +284,63 @@ WRITE TO HDFS [default.t, OVERWRITE=false, PARTITION-KEYS=(c_nationkey)] | 04:SORT | order by: c_nationkey ASC NULLS LAST, c_custkey ASC NULLS LAST +| row-size=18B cardinality=228.68K | 03:AGGREGATE [FINALIZE] | output: max(o_totalprice) | group by: c_custkey, c_nationkey +| row-size=18B cardinality=228.68K | 02:HASH JOIN [INNER JOIN] | hash predicates: o_custkey = c_custkey | runtime filters: RF000 <- c_custkey +| row-size=26B cardinality=228.68K | |--01:SCAN HDFS [tpch.customer] | partitions=1/1 files=1 size=23.08MB | predicates: c_nationkey < 10 +| row-size=10B cardinality=15.00K | 00:SCAN HDFS [tpch.orders] partitions=1/1 files=1 size=162.56MB runtime filters: RF000 -> o_custkey + row-size=16B cardinality=1.50M ---- DISTRIBUTEDPLAN WRITE TO HDFS [default.t, OVERWRITE=false, PARTITION-KEYS=(c_nationkey)] | partitions=25 | 08:SORT | order by: c_nationkey ASC NULLS LAST, c_custkey ASC NULLS LAST +| row-size=18B cardinality=228.68K | 07:EXCHANGE [HASH(c_nationkey)] | 06:AGGREGATE [FINALIZE] | output: max:merge(o_totalprice) | group by: c_custkey, c_nationkey +| row-size=18B cardinality=228.68K | 05:EXCHANGE [HASH(c_custkey,c_nationkey)] | 03:AGGREGATE [STREAMING] | output: max(o_totalprice) | group by: c_custkey, c_nationkey +| row-size=18B cardinality=228.68K | 02:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: o_custkey = c_custkey | runtime filters: RF000 <- c_custkey +| row-size=26B cardinality=228.68K | |--04:EXCHANGE [BROADCAST] | | | 01:SCAN HDFS [tpch.customer] | partitions=1/1 files=1 size=23.08MB | predicates: c_nationkey < 10 +| row-size=10B cardinality=15.00K | 00:SCAN HDFS [tpch.orders] partitions=1/1 files=1 size=162.56MB runtime filters: RF000 -> o_custkey + row-size=16B cardinality=1.50M ==== http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/default-join-distr-mode-broadcast.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/default-join-distr-mode-broadcast.test b/testdata/workloads/functional-planner/queries/PlannerTest/default-join-distr-mode-broadcast.test index 8735f97..3ff9e66 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/default-join-distr-mode-broadcast.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/default-join-distr-mode-broadcast.test @@ -9,15 +9,18 @@ PLAN-ROOT SINK 02:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: x.a = y.a | runtime filters: RF000 <- y.a +| row-size=48B cardinality=unavailable | |--03:EXCHANGE [BROADCAST] | | | 01:SCAN HDFS [functional.tinytable y] | partitions=1/1 files=1 size=38B +| row-size=24B cardinality=unavailable | 00:SCAN HDFS [functional.tinytable x] partitions=1/1 files=1 size=38B runtime filters: RF000 -> x.a + row-size=24B cardinality=unavailable ==== # Left join input has an unknown cardinality. select /* +straight_join */ * from @@ -30,15 +33,18 @@ PLAN-ROOT SINK 02:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: x.a = y.string_col | runtime filters: RF000 <- y.string_col +| row-size=113B cardinality=unavailable | |--03:EXCHANGE [BROADCAST] | | | 01:SCAN HDFS [functional.alltypes y] -| partitions=24/24 files=24 size=469.90KB +| partitions=24/24 files=24 size=478.45KB +| row-size=89B cardinality=7.30K | 00:SCAN HDFS [functional.tinytable x] partitions=1/1 files=1 size=38B runtime filters: RF000 -> x.a + row-size=24B cardinality=unavailable ==== # Right join input has an unknown cardinality. select /* +straight_join */ * from @@ -51,13 +57,16 @@ PLAN-ROOT SINK 02:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: x.string_col = y.a | runtime filters: RF000 <- y.a +| row-size=113B cardinality=7.30K | |--03:EXCHANGE [BROADCAST] | | | 01:SCAN HDFS [functional.tinytable y] | partitions=1/1 files=1 size=38B +| row-size=24B cardinality=unavailable | 00:SCAN HDFS [functional.alltypes x] - partitions=24/24 files=24 size=469.90KB + partitions=24/24 files=24 size=478.45KB runtime filters: RF000 -> x.string_col + row-size=89B cardinality=7.30K ==== http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/default-join-distr-mode-shuffle.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/default-join-distr-mode-shuffle.test b/testdata/workloads/functional-planner/queries/PlannerTest/default-join-distr-mode-shuffle.test index 59e60c9..7065371 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/default-join-distr-mode-shuffle.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/default-join-distr-mode-shuffle.test @@ -9,17 +9,20 @@ PLAN-ROOT SINK 02:HASH JOIN [INNER JOIN, PARTITIONED] | hash predicates: x.a = y.a | runtime filters: RF000 <- y.a +| row-size=48B cardinality=unavailable | |--04:EXCHANGE [HASH(y.a)] | | | 01:SCAN HDFS [functional.tinytable y] | partitions=1/1 files=1 size=38B +| row-size=24B cardinality=unavailable | 03:EXCHANGE [HASH(x.a)] | 00:SCAN HDFS [functional.tinytable x] partitions=1/1 files=1 size=38B runtime filters: RF000 -> x.a + row-size=24B cardinality=unavailable ==== # Left join input has an unknown cardinality. select /* +straight_join */ * from @@ -32,17 +35,20 @@ PLAN-ROOT SINK 02:HASH JOIN [INNER JOIN, PARTITIONED] | hash predicates: x.a = y.string_col | runtime filters: RF000 <- y.string_col +| row-size=113B cardinality=unavailable | |--04:EXCHANGE [HASH(y.string_col)] | | | 01:SCAN HDFS [functional.alltypes y] -| partitions=24/24 files=24 size=469.90KB +| partitions=24/24 files=24 size=478.45KB +| row-size=89B cardinality=7.30K | 03:EXCHANGE [HASH(x.a)] | 00:SCAN HDFS [functional.tinytable x] partitions=1/1 files=1 size=38B runtime filters: RF000 -> x.a + row-size=24B cardinality=unavailable ==== # Right join input has an unknown cardinality. select /* +straight_join */ * from @@ -55,15 +61,18 @@ PLAN-ROOT SINK 02:HASH JOIN [INNER JOIN, PARTITIONED] | hash predicates: x.string_col = y.a | runtime filters: RF000 <- y.a +| row-size=113B cardinality=7.30K | |--04:EXCHANGE [HASH(y.a)] | | | 01:SCAN HDFS [functional.tinytable y] | partitions=1/1 files=1 size=38B +| row-size=24B cardinality=unavailable | 03:EXCHANGE [HASH(x.string_col)] | 00:SCAN HDFS [functional.alltypes x] - partitions=24/24 files=24 size=469.90KB + partitions=24/24 files=24 size=478.45KB runtime filters: RF000 -> x.string_col + row-size=89B cardinality=7.30K ==== http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/disable-codegen.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/disable-codegen.test b/testdata/workloads/functional-planner/queries/PlannerTest/disable-codegen.test index 3987410..3d2702b 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/disable-codegen.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/disable-codegen.test @@ -9,14 +9,17 @@ PLAN-ROOT SINK | 03:AGGREGATE [FINALIZE] | output: count:merge(*) +| row-size=8B cardinality=1 | 02:EXCHANGE [UNPARTITIONED] | 01:AGGREGATE | output: count(*) +| row-size=8B cardinality=1 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=0B cardinality=7.30K ==== # Rows per node is > 3000: codegen should be enabled. select count(*) from functional.alltypesagg @@ -28,14 +31,17 @@ PLAN-ROOT SINK | 03:AGGREGATE [FINALIZE] | output: count:merge(*) +| row-size=8B cardinality=1 | 02:EXCHANGE [UNPARTITIONED] | 01:AGGREGATE | output: count(*) +| row-size=8B cardinality=1 | 00:SCAN HDFS [functional.alltypesagg] partitions=11/11 files=11 size=814.73KB + row-size=0B cardinality=11.00K ==== # No stats on functional_parquet: codegen should be disabled. select count(*) from functional_parquet.alltypes @@ -49,14 +55,17 @@ PLAN-ROOT SINK | 03:AGGREGATE [FINALIZE] | output: count:merge(*) +| row-size=8B cardinality=1 | 02:EXCHANGE [UNPARTITIONED] | 01:AGGREGATE | output: sum_init_zero(functional_parquet.alltypes.parquet-stats: num_rows) +| row-size=8B cardinality=1 | 00:SCAN HDFS [functional_parquet.alltypes] - partitions=24/24 files=24 size=188.29KB + partitions=24/24 files=24 size=189.28KB + row-size=8B cardinality=unavailable ==== # > 3000 rows returned to coordinator: codegen should be enabled select * from functional_parquet.alltypes @@ -71,7 +80,8 @@ PLAN-ROOT SINK 01:EXCHANGE [UNPARTITIONED] | 00:SCAN HDFS [functional_parquet.alltypes] - partitions=24/24 files=24 size=188.29KB + partitions=24/24 files=24 size=189.28KB + row-size=80B cardinality=unavailable ==== # Optimisation is enabled for join producing < 3000 rows select count(*) @@ -86,24 +96,29 @@ PLAN-ROOT SINK | 06:AGGREGATE [FINALIZE] | output: count:merge(*) +| row-size=8B cardinality=1 | 05:EXCHANGE [UNPARTITIONED] | 03:AGGREGATE | output: count(*) +| row-size=8B cardinality=1 | 02:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: t1.id = t2.id | runtime filters: RF000 <- t2.id +| row-size=8B cardinality=8 | |--04:EXCHANGE [BROADCAST] | | | 01:SCAN HDFS [functional.alltypestiny t2] | partitions=4/4 files=4 size=460B +| row-size=4B cardinality=8 | 00:SCAN HDFS [functional.alltypes t1] partitions=24/24 files=24 size=478.45KB runtime filters: RF000 -> t1.id + row-size=4B cardinality=7.30K ==== # Optimisation is disabled by cross join producing > 3000 rows select count(*) from functional.alltypes t1, functional.alltypes t2 @@ -115,21 +130,26 @@ PLAN-ROOT SINK | 06:AGGREGATE [FINALIZE] | output: count:merge(*) +| row-size=8B cardinality=1 | 05:EXCHANGE [UNPARTITIONED] | 03:AGGREGATE | output: count(*) +| row-size=8B cardinality=1 | 02:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| row-size=0B cardinality=53.29M | |--04:EXCHANGE [BROADCAST] | | | 01:SCAN HDFS [functional.alltypes t2] | partitions=24/24 files=24 size=478.45KB +| row-size=0B cardinality=7.30K | 00:SCAN HDFS [functional.alltypes t1] partitions=24/24 files=24 size=478.45KB + row-size=0B cardinality=7.30K ==== # Optimisation is enabled for union producing < 3000 rows select count(*) from ( @@ -145,20 +165,25 @@ PLAN-ROOT SINK | 05:AGGREGATE [FINALIZE] | output: count:merge(*) +| row-size=8B cardinality=1 | 04:EXCHANGE [UNPARTITIONED] | 03:AGGREGATE | output: count(*) +| row-size=8B cardinality=1 | 00:UNION | pass-through-operands: all +| row-size=0B cardinality=7.31K | |--02:SCAN HDFS [functional.alltypestiny] | partitions=4/4 files=4 size=460B +| row-size=0B cardinality=8 | 01:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=0B cardinality=7.30K ==== # Optimisation is disabled by union producing > 3000 rows select count(*) from ( @@ -173,20 +198,25 @@ PLAN-ROOT SINK | 05:AGGREGATE [FINALIZE] | output: count:merge(*) +| row-size=8B cardinality=1 | 04:EXCHANGE [UNPARTITIONED] | 03:AGGREGATE | output: count(*) +| row-size=8B cardinality=1 | 00:UNION | pass-through-operands: all +| row-size=0B cardinality=14.60K | |--02:SCAN HDFS [functional.alltypes] | partitions=24/24 files=24 size=478.45KB +| row-size=0B cardinality=7.30K | 01:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=0B cardinality=7.30K ==== # Scan with limit on large table: the number of rows scanned is bounded, # codegen should be disabled @@ -201,6 +231,7 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: sum(tpch.lineitem.l_discount) +| row-size=16B cardinality=1 | 02:EXCHANGE [UNPARTITIONED] | limit: 1000 @@ -208,6 +239,7 @@ PLAN-ROOT SINK 00:SCAN HDFS [tpch.lineitem] partitions=1/1 files=1 size=718.94MB limit: 1000 + row-size=8B cardinality=1.00K ==== # Scan with limit and predicates on large table: any number of rows could be scanned: # codegen should be enabled @@ -221,6 +253,7 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: sum(tpch.lineitem.l_discount) +| row-size=16B cardinality=1 | 02:EXCHANGE [UNPARTITIONED] | limit: 1000 @@ -229,4 +262,5 @@ PLAN-ROOT SINK partitions=1/1 files=1 size=718.94MB predicates: l_orderkey > 100 limit: 1000 + row-size=16B cardinality=1.00K ==== http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/disable-preaggregations.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/disable-preaggregations.test b/testdata/workloads/functional-planner/queries/PlannerTest/disable-preaggregations.test index 07726c9..ac2b6e4 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/disable-preaggregations.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/disable-preaggregations.test @@ -9,15 +9,18 @@ PLAN-ROOT SINK 03:AGGREGATE [FINALIZE] | output: count:merge(*) | group by: tinyint_col +| row-size=9B cardinality=9 | 02:EXCHANGE [HASH(tinyint_col)] | 01:AGGREGATE | output: count(*) | group by: tinyint_col +| row-size=9B cardinality=9 | 00:SCAN HDFS [functional.alltypesagg] partitions=11/11 files=11 size=814.73KB + row-size=1B cardinality=11.00K ==== select count(distinct id) from functional.alltypesagg @@ -26,20 +29,25 @@ PLAN-ROOT SINK | 06:AGGREGATE [FINALIZE] | output: count:merge(id) +| row-size=8B cardinality=1 | 05:EXCHANGE [UNPARTITIONED] | 02:AGGREGATE | output: count(id) +| row-size=8B cardinality=1 | 04:AGGREGATE | group by: id +| row-size=4B cardinality=10.28K | 03:EXCHANGE [HASH(id)] | 01:AGGREGATE | group by: id +| row-size=4B cardinality=10.28K | 00:SCAN HDFS [functional.alltypesagg] partitions=11/11 files=11 size=814.73KB + row-size=4B cardinality=11.00K ==== http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/distinct-estimate.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/distinct-estimate.test b/testdata/workloads/functional-planner/queries/PlannerTest/distinct-estimate.test index b895ad0..cb8a4e0 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/distinct-estimate.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/distinct-estimate.test @@ -5,22 +5,27 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: distinctpc(l_orderkey) +| row-size=8B cardinality=1 | 00:SCAN HDFS [tpch.lineitem] partitions=1/1 files=1 size=718.94MB + row-size=8B cardinality=6.00M ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | 03:AGGREGATE [FINALIZE] | output: distinctpc:merge(l_orderkey) +| row-size=8B cardinality=1 | 02:EXCHANGE [UNPARTITIONED] | 01:AGGREGATE | output: distinctpc(l_orderkey) +| row-size=8B cardinality=1 | 00:SCAN HDFS [tpch.lineitem] partitions=1/1 files=1 size=718.94MB + row-size=8B cardinality=6.00M ==== # Distinct estimate with distinct select count(distinct l_orderkey), distinctpc(l_orderkey) from tpch.lineitem @@ -29,34 +34,42 @@ PLAN-ROOT SINK | 02:AGGREGATE [FINALIZE] | output: count(l_orderkey), distinctpc:merge(l_orderkey) +| row-size=16B cardinality=1 | 01:AGGREGATE | output: distinctpc(l_orderkey) | group by: l_orderkey +| row-size=16B cardinality=1.56M | 00:SCAN HDFS [tpch.lineitem] partitions=1/1 files=1 size=718.94MB + row-size=8B cardinality=6.00M ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | 06:AGGREGATE [FINALIZE] | output: count:merge(l_orderkey), distinctpc:merge(l_orderkey) +| row-size=16B cardinality=1 | 05:EXCHANGE [UNPARTITIONED] | 02:AGGREGATE | output: count(l_orderkey), distinctpc:merge(l_orderkey) +| row-size=16B cardinality=1 | 04:AGGREGATE | output: distinctpc:merge(l_orderkey) | group by: l_orderkey +| row-size=16B cardinality=1.56M | 03:EXCHANGE [HASH(l_orderkey)] | 01:AGGREGATE [STREAMING] | output: distinctpc(l_orderkey) | group by: l_orderkey +| row-size=16B cardinality=1.56M | 00:SCAN HDFS [tpch.lineitem] partitions=1/1 files=1 size=718.94MB + row-size=8B cardinality=6.00M ==== http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/distinct.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/distinct.test b/testdata/workloads/functional-planner/queries/PlannerTest/distinct.test index 6e66e30..3e671da 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/distinct.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/distinct.test @@ -6,9 +6,11 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip +| row-size=24B cardinality=0 | 00:SCAN HDFS [functional.testtbl] partitions=1/1 files=0 size=0B + row-size=24B cardinality=0 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | @@ -16,14 +18,17 @@ PLAN-ROOT SINK | 03:AGGREGATE [FINALIZE] | group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip +| row-size=24B cardinality=0 | 02:EXCHANGE [HASH(functional.testtbl.id,functional.testtbl.name,functional.testtbl.zip)] | 01:AGGREGATE [STREAMING] | group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip +| row-size=24B cardinality=0 | 00:SCAN HDFS [functional.testtbl] partitions=1/1 files=0 size=0B + row-size=24B cardinality=0 ==== # distinct w/ explicit select list select distinct id, zip @@ -33,9 +38,11 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | group by: id, zip +| row-size=12B cardinality=0 | 00:SCAN HDFS [functional.testtbl] partitions=1/1 files=0 size=0B + row-size=12B cardinality=0 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | @@ -43,14 +50,17 @@ PLAN-ROOT SINK | 03:AGGREGATE [FINALIZE] | group by: id, zip +| row-size=12B cardinality=0 | 02:EXCHANGE [HASH(id,zip)] | 01:AGGREGATE [STREAMING] | group by: id, zip +| row-size=12B cardinality=0 | 00:SCAN HDFS [functional.testtbl] partitions=1/1 files=0 size=0B + row-size=12B cardinality=0 ==== # count(distinct) select count(distinct id, zip) @@ -60,33 +70,41 @@ PLAN-ROOT SINK | 02:AGGREGATE [FINALIZE] | output: count(if(id IS NULL, NULL, zip)) +| row-size=8B cardinality=0 | 01:AGGREGATE | group by: id, zip +| row-size=12B cardinality=0 | 00:SCAN HDFS [functional.testtbl] partitions=1/1 files=0 size=0B + row-size=12B cardinality=0 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | 06:AGGREGATE [FINALIZE] | output: count:merge(if(id IS NULL, NULL, zip)) +| row-size=8B cardinality=0 | 05:EXCHANGE [UNPARTITIONED] | 02:AGGREGATE | output: count(if(id IS NULL, NULL, zip)) +| row-size=8B cardinality=0 | 04:AGGREGATE | group by: id, zip +| row-size=12B cardinality=0 | 03:EXCHANGE [HASH(id,zip)] | 01:AGGREGATE [STREAMING] | group by: id, zip +| row-size=12B cardinality=0 | 00:SCAN HDFS [functional.testtbl] partitions=1/1 files=0 size=0B + row-size=12B cardinality=0 ==== # count(distinct) w/ grouping select tinyint_col, count(distinct int_col, bigint_col) @@ -98,12 +116,15 @@ PLAN-ROOT SINK 02:AGGREGATE [FINALIZE] | output: count(if(int_col IS NULL, NULL, bigint_col)) | group by: tinyint_col +| row-size=9B cardinality=9 | 01:AGGREGATE | group by: tinyint_col, int_col, bigint_col +| row-size=13B cardinality=11.00K | 00:SCAN HDFS [functional.alltypesagg] partitions=11/11 files=11 size=814.73KB + row-size=13B cardinality=11.00K ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | @@ -112,23 +133,28 @@ PLAN-ROOT SINK 06:AGGREGATE [FINALIZE] | output: count:merge(if(int_col IS NULL, NULL, bigint_col)) | group by: tinyint_col +| row-size=9B cardinality=9 | 05:EXCHANGE [HASH(tinyint_col)] | 02:AGGREGATE [STREAMING] | output: count(if(int_col IS NULL, NULL, bigint_col)) | group by: tinyint_col +| row-size=9B cardinality=9 | 04:AGGREGATE | group by: tinyint_col, int_col, bigint_col +| row-size=13B cardinality=11.00K | 03:EXCHANGE [HASH(tinyint_col,int_col,bigint_col)] | 01:AGGREGATE [STREAMING] | group by: tinyint_col, int_col, bigint_col +| row-size=13B cardinality=11.00K | 00:SCAN HDFS [functional.alltypesagg] partitions=11/11 files=11 size=814.73KB + row-size=13B cardinality=11.00K ==== # count(distinct) and sum(distinct) w/ grouping select tinyint_col, count(distinct int_col), sum(distinct int_col) @@ -140,12 +166,15 @@ PLAN-ROOT SINK 02:AGGREGATE [FINALIZE] | output: count(int_col), sum(int_col) | group by: tinyint_col +| row-size=17B cardinality=9 | 01:AGGREGATE | group by: tinyint_col, int_col +| row-size=5B cardinality=8.61K | 00:SCAN HDFS [functional.alltypesagg] partitions=11/11 files=11 size=814.73KB + row-size=5B cardinality=11.00K ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | @@ -154,23 +183,28 @@ PLAN-ROOT SINK 06:AGGREGATE [FINALIZE] | output: count:merge(int_col), sum:merge(int_col) | group by: tinyint_col +| row-size=17B cardinality=9 | 05:EXCHANGE [HASH(tinyint_col)] | 02:AGGREGATE [STREAMING] | output: count(int_col), sum(int_col) | group by: tinyint_col +| row-size=17B cardinality=9 | 04:AGGREGATE | group by: tinyint_col, int_col +| row-size=5B cardinality=8.61K | 03:EXCHANGE [HASH(tinyint_col,int_col)] | 01:AGGREGATE [STREAMING] | group by: tinyint_col, int_col +| row-size=5B cardinality=8.61K | 00:SCAN HDFS [functional.alltypesagg] partitions=11/11 files=11 size=814.73KB + row-size=5B cardinality=11.00K ==== # sum(distinct) w/o grouping select sum(distinct int_col) @@ -180,33 +214,41 @@ PLAN-ROOT SINK | 02:AGGREGATE [FINALIZE] | output: sum(int_col) +| row-size=8B cardinality=1 | 01:AGGREGATE | group by: int_col +| row-size=4B cardinality=957 | 00:SCAN HDFS [functional.alltypesagg] partitions=11/11 files=11 size=814.73KB + row-size=4B cardinality=11.00K ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | 06:AGGREGATE [FINALIZE] | output: sum:merge(int_col) +| row-size=8B cardinality=1 | 05:EXCHANGE [UNPARTITIONED] | 02:AGGREGATE | output: sum(int_col) +| row-size=8B cardinality=1 | 04:AGGREGATE | group by: int_col +| row-size=4B cardinality=957 | 03:EXCHANGE [HASH(int_col)] | 01:AGGREGATE [STREAMING] | group by: int_col +| row-size=4B cardinality=957 | 00:SCAN HDFS [functional.alltypesagg] partitions=11/11 files=11 size=814.73KB + row-size=4B cardinality=11.00K ==== # count(distinct) and sum(distinct) w/ grouping; distinct in min() and max() # is ignored @@ -219,13 +261,16 @@ PLAN-ROOT SINK 02:AGGREGATE [FINALIZE] | output: count(int_col), min:merge(smallint_col), max:merge(string_col) | group by: tinyint_col +| row-size=23B cardinality=9 | 01:AGGREGATE | output: min(smallint_col), max(string_col) | group by: tinyint_col, int_col +| row-size=19B cardinality=8.61K | 00:SCAN HDFS [functional.alltypesagg] partitions=11/11 files=11 size=814.73KB + row-size=22B cardinality=11.00K ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | @@ -234,25 +279,30 @@ PLAN-ROOT SINK 06:AGGREGATE [FINALIZE] | output: count:merge(int_col), min:merge(smallint_col), max:merge(string_col) | group by: tinyint_col +| row-size=23B cardinality=9 | 05:EXCHANGE [HASH(tinyint_col)] | 02:AGGREGATE [STREAMING] | output: count(int_col), min:merge(smallint_col), max:merge(string_col) | group by: tinyint_col +| row-size=23B cardinality=9 | 04:AGGREGATE | output: min:merge(smallint_col), max:merge(string_col) | group by: tinyint_col, int_col +| row-size=19B cardinality=8.61K | 03:EXCHANGE [HASH(tinyint_col,int_col)] | 01:AGGREGATE [STREAMING] | output: min(smallint_col), max(string_col) | group by: tinyint_col, int_col +| row-size=19B cardinality=8.61K | 00:SCAN HDFS [functional.alltypesagg] partitions=11/11 files=11 size=814.73KB + row-size=22B cardinality=11.00K ==== # aggregate fns with and without distinct select tinyint_col, count(distinct int_col), count(*), sum(distinct int_col), @@ -264,13 +314,16 @@ PLAN-ROOT SINK 02:AGGREGATE [FINALIZE] | output: count(int_col), sum(int_col), count:merge(*), sum:merge(int_col), min:merge(smallint_col), max:merge(bigint_col) | group by: tinyint_col +| row-size=43B cardinality=9 | 01:AGGREGATE | output: count(*), sum(int_col), min(smallint_col), max(bigint_col) | group by: tinyint_col, int_col +| row-size=31B cardinality=8.61K | 00:SCAN HDFS [functional.alltypesagg] partitions=11/11 files=11 size=814.73KB + row-size=15B cardinality=11.00K ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | @@ -279,25 +332,30 @@ PLAN-ROOT SINK 06:AGGREGATE [FINALIZE] | output: count:merge(int_col), sum:merge(int_col), count:merge(*), sum:merge(int_col), min:merge(smallint_col), max:merge(bigint_col) | group by: tinyint_col +| row-size=43B cardinality=9 | 05:EXCHANGE [HASH(tinyint_col)] | 02:AGGREGATE [STREAMING] | output: count(int_col), sum(int_col), count:merge(*), sum:merge(int_col), min:merge(smallint_col), max:merge(bigint_col) | group by: tinyint_col +| row-size=43B cardinality=9 | 04:AGGREGATE | output: count:merge(*), sum:merge(int_col), min:merge(smallint_col), max:merge(bigint_col) | group by: tinyint_col, int_col +| row-size=31B cardinality=8.61K | 03:EXCHANGE [HASH(tinyint_col,int_col)] | 01:AGGREGATE [STREAMING] | output: count(*), sum(int_col), min(smallint_col), max(bigint_col) | group by: tinyint_col, int_col +| row-size=31B cardinality=8.61K | 00:SCAN HDFS [functional.alltypesagg] partitions=11/11 files=11 size=814.73KB + row-size=15B cardinality=11.00K ==== # test join on inline views containing distinct aggregates to make sure # the aggregation info reports the correct tuple ids (from the 2nd phase @@ -310,69 +368,87 @@ PLAN-ROOT SINK | 06:HASH JOIN [INNER JOIN] | hash predicates: count(int_col) = count(bigint_col) +| row-size=16B cardinality=1 | |--05:AGGREGATE [FINALIZE] | | output: count(bigint_col) +| | row-size=8B cardinality=1 | | | 04:AGGREGATE | | group by: bigint_col +| | row-size=8B cardinality=2 | | | 03:SCAN HDFS [functional.alltypestiny] | partitions=4/4 files=4 size=460B +| row-size=8B cardinality=8 | 02:AGGREGATE [FINALIZE] | output: count(int_col) +| row-size=8B cardinality=1 | 01:AGGREGATE | group by: int_col +| row-size=4B cardinality=2 | 00:SCAN HDFS [functional.alltypestiny] partitions=4/4 files=4 size=460B + row-size=4B cardinality=8 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | 06:HASH JOIN [INNER JOIN, BROADCAST] | hash predicates: count(int_col) = count(bigint_col) +| row-size=16B cardinality=1 | |--15:EXCHANGE [UNPARTITIONED] | | | 14:AGGREGATE [FINALIZE] | | output: count:merge(bigint_col) +| | row-size=8B cardinality=1 | | | 13:EXCHANGE [UNPARTITIONED] | | | 05:AGGREGATE | | output: count(bigint_col) +| | row-size=8B cardinality=1 | | | 12:AGGREGATE | | group by: bigint_col +| | row-size=8B cardinality=2 | | | 11:EXCHANGE [HASH(bigint_col)] | | | 04:AGGREGATE [STREAMING] | | group by: bigint_col +| | row-size=8B cardinality=2 | | | 03:SCAN HDFS [functional.alltypestiny] | partitions=4/4 files=4 size=460B +| row-size=8B cardinality=8 | 10:AGGREGATE [FINALIZE] | output: count:merge(int_col) +| row-size=8B cardinality=1 | 09:EXCHANGE [UNPARTITIONED] | 02:AGGREGATE | output: count(int_col) +| row-size=8B cardinality=1 | 08:AGGREGATE | group by: int_col +| row-size=4B cardinality=2 | 07:EXCHANGE [HASH(int_col)] | 01:AGGREGATE [STREAMING] | group by: int_col +| row-size=4B cardinality=2 | 00:SCAN HDFS [functional.alltypestiny] partitions=4/4 files=4 size=460B + row-size=4B cardinality=8 ==== # Test placement of having predicate into 2nd phase merge agg for # distinct + non-distinct aggregates without group by (IMPALA-845). @@ -388,37 +464,45 @@ PLAN-ROOT SINK 02:AGGREGATE [FINALIZE] | output: count(tinyint_col), count:merge(bigint_col) | having: zeroifnull(count(bigint_col)) > 0 +| row-size=16B cardinality=0 | 01:AGGREGATE | output: count(bigint_col) | group by: tinyint_col +| row-size=9B cardinality=10 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=9B cardinality=7.30K ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | 06:AGGREGATE [FINALIZE] | output: count:merge(tinyint_col), count:merge(bigint_col) | having: zeroifnull(count(bigint_col)) > 0 +| row-size=16B cardinality=0 | 05:EXCHANGE [UNPARTITIONED] | 02:AGGREGATE | output: count(tinyint_col), count:merge(bigint_col) +| row-size=16B cardinality=0 | 04:AGGREGATE | output: count:merge(bigint_col) | group by: tinyint_col +| row-size=9B cardinality=10 | 03:EXCHANGE [HASH(tinyint_col)] | 01:AGGREGATE [STREAMING] | output: count(bigint_col) | group by: tinyint_col +| row-size=9B cardinality=10 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=9B cardinality=7.30K ==== # test slot materialization on a distinct agg inside an inline view # triggered by a predicate in an outer query block (IMPALA-861) @@ -431,34 +515,42 @@ PLAN-ROOT SINK 02:AGGREGATE [FINALIZE] | output: count(1) | having: count(1) IS NOT NULL +| row-size=8B cardinality=0 | 01:AGGREGATE | group by: 1 +| row-size=1B cardinality=1 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=0B cardinality=7.30K ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | 06:AGGREGATE [FINALIZE] | output: count:merge(1) | having: count(1) IS NOT NULL +| row-size=8B cardinality=0 | 05:EXCHANGE [UNPARTITIONED] | 02:AGGREGATE | output: count(1) +| row-size=8B cardinality=0 | 04:AGGREGATE | group by: 1 +| row-size=1B cardinality=1 | 03:EXCHANGE [HASH(1)] | 01:AGGREGATE [STREAMING] | group by: 1 +| row-size=1B cardinality=1 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=0B cardinality=7.30K ==== # test slot materialization on a distinct agg inside an inline view # triggered by a predicate in an outer query block (IMPALA-861) @@ -471,37 +563,45 @@ PLAN-ROOT SINK 02:AGGREGATE [FINALIZE] | output: count(1), count:merge(*) | having: count(1) > 0, zeroifnull(count(*)) > 1, count(1) + zeroifnull(count(*)) > 10 +| row-size=16B cardinality=0 | 01:AGGREGATE | output: count(*) | group by: 1 +| row-size=9B cardinality=1 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=0B cardinality=7.30K ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | 06:AGGREGATE [FINALIZE] | output: count:merge(1), count:merge(*) | having: count(1) > 0, zeroifnull(count(*)) > 1, count(1) + zeroifnull(count(*)) > 10 +| row-size=16B cardinality=0 | 05:EXCHANGE [UNPARTITIONED] | 02:AGGREGATE | output: count(1), count:merge(*) +| row-size=16B cardinality=0 | 04:AGGREGATE | output: count:merge(*) | group by: 1 +| row-size=9B cardinality=1 | 03:EXCHANGE [HASH(1)] | 01:AGGREGATE [STREAMING] | output: count(*) | group by: 1 +| row-size=9B cardinality=1 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=0B cardinality=7.30K ==== # IMPALA-2266: Test non-grouping distinct aggregation inside an inline view. select * from (select count(distinct int_col) cd from functional.alltypes) v @@ -510,22 +610,27 @@ PLAN-ROOT SINK | 06:AGGREGATE [FINALIZE] | output: count:merge(int_col) +| row-size=8B cardinality=1 | 05:EXCHANGE [UNPARTITIONED] | 02:AGGREGATE | output: count(int_col) +| row-size=8B cardinality=1 | 04:AGGREGATE | group by: int_col +| row-size=4B cardinality=10 | 03:EXCHANGE [HASH(int_col)] | 01:AGGREGATE [STREAMING] | group by: int_col +| row-size=4B cardinality=10 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=4B cardinality=7.30K ==== # IMPALA-2266: Test grouping distinct aggregation inside an inline view. select * from (select count(distinct int_col) cd from functional.alltypes group by bool_col) v @@ -537,23 +642,28 @@ PLAN-ROOT SINK 06:AGGREGATE [FINALIZE] | output: count:merge(int_col) | group by: bool_col +| row-size=9B cardinality=2 | 05:EXCHANGE [HASH(bool_col)] | 02:AGGREGATE [STREAMING] | output: count(int_col) | group by: bool_col +| row-size=9B cardinality=2 | 04:AGGREGATE | group by: bool_col, int_col +| row-size=5B cardinality=20 | 03:EXCHANGE [HASH(bool_col,int_col)] | 01:AGGREGATE [STREAMING] | group by: bool_col, int_col +| row-size=5B cardinality=20 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=5B cardinality=7.30K ==== # IMPALA-4042: count(distinct NULL) fails on a view select count(distinct null) from functional.alltypes_view @@ -562,22 +672,27 @@ PLAN-ROOT SINK | 06:AGGREGATE [FINALIZE] | output: count:merge(NULL) +| row-size=8B cardinality=1 | 05:EXCHANGE [UNPARTITIONED] | 02:AGGREGATE | output: count(NULL) +| row-size=8B cardinality=1 | 04:AGGREGATE | group by: NULL +| row-size=1B cardinality=1 | 03:EXCHANGE [HASH(NULL)] | 01:AGGREGATE [STREAMING] | group by: NULL +| row-size=1B cardinality=1 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=0B cardinality=7.30K ==== # Query block with a single distinct and multiple non-distinct aggs simplifies to a # non-grouping aggregation plan. @@ -592,35 +707,43 @@ PLAN-ROOT SINK 02:AGGREGATE [FINALIZE] | output: min:merge(string_col), max:merge(string_col) | having: min(string_col) < '9', min(string_col) < max(string_col) +| row-size=24B cardinality=0 | 01:AGGREGATE | output: min(string_col), max(string_col) | group by: smallint_col +| row-size=26B cardinality=10 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=15B cardinality=7.30K ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | 06:AGGREGATE [FINALIZE] | output: min:merge(string_col), max:merge(string_col) | having: min(string_col) < '9', min(string_col) < max(string_col) +| row-size=24B cardinality=0 | 05:EXCHANGE [UNPARTITIONED] | 02:AGGREGATE | output: min:merge(string_col), max:merge(string_col) +| row-size=24B cardinality=0 | 04:AGGREGATE | output: min:merge(string_col), max:merge(string_col) | group by: smallint_col +| row-size=26B cardinality=10 | 03:EXCHANGE [HASH(smallint_col)] | 01:AGGREGATE [STREAMING] | output: min(string_col), max(string_col) | group by: smallint_col +| row-size=26B cardinality=10 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB + row-size=15B cardinality=7.30K ==== http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/empty.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/empty.test b/testdata/workloads/functional-planner/queries/PlannerTest/empty.test index 43d1fcf..964b7f9 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/empty.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/empty.test @@ -47,6 +47,7 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: count(int_col), avg(double_col), count(*) +| row-size=24B cardinality=0 | 00:EMPTYSET ==== @@ -61,6 +62,7 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: count(*) +| row-size=8B cardinality=0 | 00:EMPTYSET ==== @@ -93,12 +95,14 @@ PLAN-ROOT SINK 02:HASH JOIN [INNER JOIN] | hash predicates: f.id = t1.id | runtime filters: RF000 <- t1.id +| row-size=8B cardinality=0 | |--01:EMPTYSET | 00:SCAN HDFS [functional.alltypessmall f] partitions=4/4 files=4 size=6.32KB runtime filters: RF000 -> f.id + row-size=4B cardinality=100 ==== # Constant conjunct causes union operand to be dropped. select * from functional.alltypessmall @@ -111,12 +115,15 @@ PLAN-ROOT SINK | 00:UNION | pass-through-operands: all +| row-size=89B cardinality=108 | |--02:SCAN HDFS [functional.alltypestiny] | partitions=4/4 files=4 size=460B +| row-size=89B cardinality=8 | 01:SCAN HDFS [functional.alltypessmall] partitions=4/4 files=4 size=6.32KB + row-size=89B cardinality=100 ==== # Constant conjunct turns union into an empty-set node. select * @@ -133,11 +140,13 @@ PLAN-ROOT SINK | 02:HASH JOIN [FULL OUTER JOIN] | hash predicates: a.id = id +| row-size=178B cardinality=7.30K | |--01:EMPTYSET | 00:SCAN HDFS [functional.alltypes a] partitions=24/24 files=24 size=478.45KB + row-size=89B cardinality=7.30K ==== # Constant conjunct in the ON-clause of an outer join is # assigned to the join. @@ -151,12 +160,15 @@ PLAN-ROOT SINK 02:HASH JOIN [LEFT OUTER JOIN] | hash predicates: a.id = b.id | other join predicates: FALSE +| row-size=178B cardinality=100 | |--01:SCAN HDFS [functional.alltypestiny b] | partitions=4/4 files=4 size=460B +| row-size=89B cardinality=8 | 00:SCAN HDFS [functional.alltypessmall a] partitions=4/4 files=4 size=6.32KB + row-size=89B cardinality=100 ==== # Constant conjunct in the ON-clause of an outer join is # assigned to the join. @@ -171,13 +183,16 @@ PLAN-ROOT SINK | hash predicates: a.id = b.id | other join predicates: FALSE | runtime filters: RF000 <- b.id +| row-size=178B cardinality=9 | |--01:SCAN HDFS [functional.alltypestiny b] | partitions=4/4 files=4 size=460B +| row-size=89B cardinality=8 | 00:SCAN HDFS [functional.alltypessmall a] partitions=4/4 files=4 size=6.32KB runtime filters: RF000 -> a.id + row-size=89B cardinality=100 ==== # Constant conjunct in the ON-clause of an outer join is # assigned to the join. @@ -191,12 +206,15 @@ PLAN-ROOT SINK 02:HASH JOIN [FULL OUTER JOIN] | hash predicates: a.id = b.id | other join predicates: NULL +| row-size=178B cardinality=108 | |--01:SCAN HDFS [functional.alltypestiny b] | partitions=4/4 files=4 size=460B +| row-size=89B cardinality=8 | 00:SCAN HDFS [functional.alltypessmall a] partitions=4/4 files=4 size=6.32KB + row-size=89B cardinality=100 ==== # Limit 0 turns query block into an empty-set node. select t1.id, t2.id @@ -234,12 +252,14 @@ PLAN-ROOT SINK 02:HASH JOIN [INNER JOIN] | hash predicates: f.id = t1.id | runtime filters: RF000 <- t1.id +| row-size=8B cardinality=0 | |--01:EMPTYSET | 00:SCAN HDFS [functional.alltypessmall f] partitions=4/4 files=4 size=6.32KB runtime filters: RF000 -> f.id + row-size=4B cardinality=100 ==== # Limit 0 causes union operand to be dropped. select * from functional.alltypessmall @@ -252,12 +272,15 @@ PLAN-ROOT SINK | 00:UNION | pass-through-operands: all +| row-size=89B cardinality=108 | |--02:SCAN HDFS [functional.alltypestiny] | partitions=4/4 files=4 size=460B +| row-size=89B cardinality=8 | 01:SCAN HDFS [functional.alltypessmall] partitions=4/4 files=4 size=6.32KB + row-size=89B cardinality=100 ==== # Limit 0 causes empty-set union. select * from functional.alltypessmall @@ -282,12 +305,15 @@ select int_col from functional.alltypesagg PLAN-ROOT SINK | 00:UNION +| row-size=8B cardinality=11.00K | |--03:SCAN HDFS [functional.alltypesagg] | partitions=11/11 files=11 size=814.73KB +| row-size=4B cardinality=11.00K | 02:AGGREGATE [FINALIZE] | output: count(1) +| row-size=8B cardinality=0 | 01:EMPTYSET ==== @@ -307,6 +333,7 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month) | 01:SORT | order by: year ASC NULLS LAST, month ASC NULLS LAST +| row-size=89B cardinality=0 | 00:EMPTYSET ==== @@ -320,6 +347,7 @@ WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month) | 01:SORT | order by: year ASC NULLS LAST, month ASC NULLS LAST +| row-size=89B cardinality=0 | 00:EMPTYSET ==== @@ -343,6 +371,7 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: sum(id), count(int_col) +| row-size=16B cardinality=0 | 00:EMPTYSET ==== @@ -358,6 +387,7 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: sum(id + int_col) +| row-size=8B cardinality=0 | 00:EMPTYSET ==== @@ -373,9 +403,11 @@ PLAN-ROOT SINK | 02:AGGREGATE [FINALIZE] | output: count(T1.int_col) +| row-size=8B cardinality=0 | 01:AGGREGATE | group by: int_col +| row-size=4B cardinality=0 | 00:EMPTYSET ==== @@ -396,11 +428,14 @@ PLAN-ROOT SINK | 02:UNION | pass-through-operands: all +| row-size=2B cardinality=0 | 01:AGGREGATE [FINALIZE] | group by: lead(-496, 81, NULL) OVER(...) +| row-size=2B cardinality=0 | 00:UNION + row-size=2B cardinality=0 ==== # IMPALA-2088: Test empty union operands with analytic functions. select lead(-496, 81) over (order by t1.double_col desc, t1.id asc) @@ -421,14 +456,18 @@ PLAN-ROOT SINK 02:UNION | constant-operands=1 | pass-through-operands: 01 +| row-size=2B cardinality=9 | |--03:SCAN HDFS [functional.alltypestiny] | partitions=4/4 files=4 size=460B +| row-size=8B cardinality=8 | 01:AGGREGATE [FINALIZE] | group by: lead(-496, 81, NULL) OVER(...) +| row-size=2B cardinality=0 | 00:UNION + row-size=2B cardinality=0 ==== # IMPALA-2216: Make sure the final output exprs are substituted, even # if the resulting plan is an EmptySetNode. @@ -471,42 +510,55 @@ where c_custkey < 10 PLAN-ROOT SINK | 01:SUBPLAN +| row-size=48B cardinality=15.00K | |--16:NESTED LOOP JOIN [LEFT OUTER JOIN] +| | row-size=48B cardinality=1 | | | |--12:AGGREGATE [FINALIZE] | | | output: count(*) +| | | row-size=8B cardinality=1 | | | | | 08:SUBPLAN +| | | row-size=0B cardinality=10 | | | | | |--11:NESTED LOOP JOIN [RIGHT OUTER JOIN] +| | | | row-size=0B cardinality=1 | | | | | | | |--09:SINGULAR ROW SRC +| | | | row-size=12B cardinality=1 | | | | | | | 10:EMPTYSET | | | | | 07:UNNEST [c.c_orders o] +| | row-size=0B cardinality=10 | | | 15:NESTED LOOP JOIN [LEFT OUTER JOIN] +| | row-size=40B cardinality=1 | | | |--06:EMPTYSET | | | 14:NESTED LOOP JOIN [LEFT OUTER JOIN] +| | row-size=36B cardinality=1 | | | |--05:EMPTYSET | | | 13:NESTED LOOP JOIN [RIGHT OUTER JOIN] +| | row-size=28B cardinality=1 | | | |--02:SINGULAR ROW SRC +| | row-size=56B cardinality=1 | | | 04:AGGREGATE [FINALIZE] | | output: count(*) +| | row-size=8B cardinality=0 | | | 03:EMPTYSET | 00:SCAN HDFS [tpch_nested_parquet.customer c] - partitions=1/1 files=4 size=292.36MB + partitions=1/1 files=4 size=288.99MB predicates: c_custkey < 10 + row-size=56B cardinality=15.00K ==== # IMPALA-2539: Test empty union operands containing relative table refs. select c_custkey, o_orderkey @@ -524,23 +576,31 @@ where c_custkey = 1 PLAN-ROOT SINK | 01:SUBPLAN +| row-size=28B cardinality=10 | |--07:NESTED LOOP JOIN [CROSS JOIN] +| | row-size=28B cardinality=10 | | | |--02:SINGULAR ROW SRC +| | row-size=44B cardinality=1 | | | 06:UNION +| | row-size=8B cardinality=10 | | | 05:AGGREGATE [FINALIZE] | | group by: o_orderkey +| | row-size=8B cardinality=10 | | | 03:UNION +| | row-size=8B cardinality=10 | | | 04:UNNEST [c.c_orders o1] +| row-size=8B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - partitions=1/1 files=4 size=292.36MB + partitions=1/1 files=4 size=288.99MB predicates: c_custkey = 1 + row-size=44B cardinality=1 ==== # IMPALA-2215: Having clause without aggregation. select 1 from (select 1) v having 1 > 1 @@ -564,11 +624,14 @@ PLAN-ROOT SINK | 03:AGGREGATE [FINALIZE] | output: count(*) +| row-size=8B cardinality=0 | 02:NESTED LOOP JOIN [CROSS JOIN] +| row-size=1B cardinality=0 | |--01:EMPTYSET | 00:SCAN HDFS [functional.alltypes x] partitions=24/24 files=24 size=478.45KB + row-size=0B cardinality=7.30K ==== http://git-wip-us.apache.org/repos/asf/impala/blob/a7ea86b7/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test b/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test index 8b71f11..5383858 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test @@ -14,7 +14,7 @@ PLAN-ROOT SINK | fk/pk conjuncts: ss_customer_sk = c_customer_sk | runtime filters: RF000[bloom] <- c_customer_sk | mem-estimate=4.75MB mem-reservation=4.75MB spill-buffer=256.00KB thread-reservation=0 -| tuple-ids=0,1 row-size=319B cardinality=529700 +| tuple-ids=0,1 row-size=319B cardinality=529.70K | in pipelines: 00(GETNEXT), 01(OPEN) | |--01:SCAN HDFS [tpcds.customer] @@ -25,7 +25,7 @@ PLAN-ROOT SINK | columns: all | extrapolated-rows=disabled max-scan-range-rows=100000 | mem-estimate=48.00MB mem-reservation=8.00MB thread-reservation=1 -| tuple-ids=1 row-size=219B cardinality=16667 +| tuple-ids=1 row-size=219B cardinality=16.67K | in pipelines: 01(GETNEXT) | 00:SCAN HDFS [tpcds.store_sales] @@ -37,7 +37,7 @@ PLAN-ROOT SINK columns: all extrapolated-rows=disabled max-scan-range-rows=130093 mem-estimate=128.00MB mem-reservation=8.00MB thread-reservation=1 - tuple-ids=0 row-size=100B cardinality=2880404 + tuple-ids=0 row-size=100B cardinality=2.88M in pipelines: 00(GETNEXT) ==== # Single-column FK/PK join detection on left outer join. The join cardinality @@ -57,7 +57,7 @@ PLAN-ROOT SINK | fk/pk conjuncts: ss_customer_sk = c_customer_sk | other predicates: c_salutation = 'Mrs.' | mem-estimate=4.75MB mem-reservation=4.75MB spill-buffer=256.00KB thread-reservation=0 -| tuple-ids=0,1N row-size=319B cardinality=2880404 +| tuple-ids=0,1N row-size=319B cardinality=2.88M | in pipelines: 00(GETNEXT), 01(OPEN) | |--01:SCAN HDFS [tpcds.customer] @@ -68,7 +68,7 @@ PLAN-ROOT SINK | columns: all | extrapolated-rows=disabled max-scan-range-rows=100000 | mem-estimate=48.00MB mem-reservation=8.00MB thread-reservation=1 -| tuple-ids=1 row-size=219B cardinality=16667 +| tuple-ids=1 row-size=219B cardinality=16.67K | in pipelines: 01(GETNEXT) | 00:SCAN HDFS [tpcds.store_sales] @@ -79,7 +79,7 @@ PLAN-ROOT SINK columns: all extrapolated-rows=disabled max-scan-range-rows=130093 mem-estimate=128.00MB mem-reservation=8.00MB thread-reservation=1 - tuple-ids=0 row-size=100B cardinality=2880404 + tuple-ids=0 row-size=100B cardinality=2.88M in pipelines: 00(GETNEXT) ==== # Single-column FK/PK join detection on right outer join. The join cardinality @@ -99,7 +99,7 @@ PLAN-ROOT SINK | fk/pk conjuncts: ss_customer_sk = c_customer_sk | runtime filters: RF000[bloom] <- c_customer_sk | mem-estimate=4.75MB mem-reservation=4.75MB spill-buffer=256.00KB thread-reservation=0 -| tuple-ids=0N,1 row-size=319B cardinality=529700 +| tuple-ids=0N,1 row-size=319B cardinality=529.70K | in pipelines: 00(GETNEXT), 01(OPEN) | |--01:SCAN HDFS [tpcds.customer] @@ -110,7 +110,7 @@ PLAN-ROOT SINK | columns: all | extrapolated-rows=disabled max-scan-range-rows=100000 | mem-estimate=48.00MB mem-reservation=8.00MB thread-reservation=1 -| tuple-ids=1 row-size=219B cardinality=16667 +| tuple-ids=1 row-size=219B cardinality=16.67K | in pipelines: 01(GETNEXT) | 00:SCAN HDFS [tpcds.store_sales] @@ -122,7 +122,7 @@ PLAN-ROOT SINK columns: all extrapolated-rows=disabled max-scan-range-rows=130093 mem-estimate=128.00MB mem-reservation=8.00MB thread-reservation=1 - tuple-ids=0 row-size=100B cardinality=2880404 + tuple-ids=0 row-size=100B cardinality=2.88M in pipelines: 00(GETNEXT) ==== # Multi-column FK/PK join detection @@ -141,7 +141,7 @@ PLAN-ROOT SINK | fk/pk conjuncts: ss_item_sk = sr_item_sk, ss_ticket_number = sr_ticket_number | runtime filters: RF000[bloom] <- sr_item_sk, RF001[bloom] <- sr_ticket_number | mem-estimate=4.75MB mem-reservation=4.75MB spill-buffer=256.00KB thread-reservation=0 -| tuple-ids=0,1 row-size=188B cardinality=211838 +| tuple-ids=0,1 row-size=188B cardinality=211.84K | in pipelines: 00(GETNEXT), 01(OPEN) | |--01:SCAN HDFS [tpcds.store_returns] @@ -152,7 +152,7 @@ PLAN-ROOT SINK | columns: all | extrapolated-rows=disabled max-scan-range-rows=287514 | mem-estimate=80.00MB mem-reservation=8.00MB thread-reservation=1 -| tuple-ids=1 row-size=88B cardinality=28751 +| tuple-ids=1 row-size=88B cardinality=28.75K | in pipelines: 01(GETNEXT) | 00:SCAN HDFS [tpcds.store_sales] @@ -164,7 +164,7 @@ PLAN-ROOT SINK columns: all extrapolated-rows=disabled max-scan-range-rows=130093 mem-estimate=128.00MB mem-reservation=8.00MB thread-reservation=1 - tuple-ids=0 row-size=100B cardinality=2880404 + tuple-ids=0 row-size=100B cardinality=2.88M in pipelines: 00(GETNEXT) ==== # Many-to-many join detection. @@ -182,7 +182,7 @@ PLAN-ROOT SINK | fk/pk conjuncts: none | runtime filters: RF000[bloom] <- ws_sold_time_sk | mem-estimate=108.67MB mem-reservation=34.00MB spill-buffer=2.00MB thread-reservation=0 -| tuple-ids=0,1 row-size=244B cardinality=44136418 +| tuple-ids=0,1 row-size=244B cardinality=44.14M | in pipelines: 00(GETNEXT), 01(OPEN) | |--01:SCAN HDFS [tpcds.web_sales] @@ -192,7 +192,7 @@ PLAN-ROOT SINK | columns: all | extrapolated-rows=disabled max-scan-range-rows=657377 | mem-estimate=160.00MB mem-reservation=8.00MB thread-reservation=1 -| tuple-ids=1 row-size=144B cardinality=719384 +| tuple-ids=1 row-size=144B cardinality=719.38K | in pipelines: 01(GETNEXT) | 00:SCAN HDFS [tpcds.store_sales] @@ -204,7 +204,7 @@ PLAN-ROOT SINK columns: all extrapolated-rows=disabled max-scan-range-rows=130093 mem-estimate=128.00MB mem-reservation=8.00MB thread-reservation=1 - tuple-ids=0 row-size=100B cardinality=2880404 + tuple-ids=0 row-size=100B cardinality=2.88M in pipelines: 00(GETNEXT) ==== # PK/PK join is detected as FK/PK. @@ -223,7 +223,7 @@ PLAN-ROOT SINK | fk/pk conjuncts: b.d_date_sk = a.d_date_sk | runtime filters: RF000[bloom] <- a.d_date_sk | mem-estimate=17.00MB mem-reservation=17.00MB spill-buffer=1.00MB thread-reservation=0 -| tuple-ids=1,0 row-size=510B cardinality=36525 +| tuple-ids=1,0 row-size=510B cardinality=36.52K | in pipelines: 01(GETNEXT), 00(OPEN) | |--00:SCAN HDFS [tpcds.date_dim a] @@ -234,7 +234,7 @@ PLAN-ROOT SINK | columns: all | extrapolated-rows=disabled max-scan-range-rows=73049 | mem-estimate=48.00MB mem-reservation=8.00MB thread-reservation=1 -| tuple-ids=0 row-size=255B cardinality=36525 +| tuple-ids=0 row-size=255B cardinality=36.52K | in pipelines: 00(GETNEXT) | 01:SCAN HDFS [tpcds.date_dim b] @@ -245,7 +245,7 @@ PLAN-ROOT SINK columns: all extrapolated-rows=disabled max-scan-range-rows=73049 mem-estimate=48.00MB mem-reservation=8.00MB thread-reservation=1 - tuple-ids=1 row-size=255B cardinality=73049 + tuple-ids=1 row-size=255B cardinality=73.05K in pipelines: 01(GETNEXT) ==== # Single query with various join types combined. @@ -268,7 +268,7 @@ PLAN-ROOT SINK | fk/pk conjuncts: none | runtime filters: RF000[bloom] <- c_current_addr_sk | mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0 -| tuple-ids=1,0,3,4,2 row-size=60B cardinality=19358 +| tuple-ids=1,0,3,4,2 row-size=60B cardinality=19.36K | in pipelines: 01(GETNEXT), 02(OPEN) | |--02:SCAN HDFS [tpcds.customer] @@ -278,7 +278,7 @@ PLAN-ROOT SINK | columns: all | extrapolated-rows=disabled max-scan-range-rows=100000 | mem-estimate=48.00MB mem-reservation=8.00MB thread-reservation=1 -| tuple-ids=2 row-size=4B cardinality=100000 +| tuple-ids=2 row-size=4B cardinality=100.00K | in pipelines: 02(GETNEXT) | 07:HASH JOIN [INNER JOIN] @@ -286,7 +286,7 @@ PLAN-ROOT SINK | fk/pk conjuncts: sr_returned_date_sk = d2.d_date_sk | runtime filters: RF002[bloom] <- d2.d_date_sk | mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0 -| tuple-ids=1,0,3,4 row-size=56B cardinality=8131 +| tuple-ids=1,0,3,4 row-size=56B cardinality=8.13K | in pipelines: 01(GETNEXT), 04(OPEN) | |--04:SCAN HDFS [tpcds.date_dim d2] @@ -296,7 +296,7 @@ PLAN-ROOT SINK | columns: all | extrapolated-rows=disabled max-scan-range-rows=73049 | mem-estimate=48.00MB mem-reservation=8.00MB thread-reservation=1 -| tuple-ids=4 row-size=4B cardinality=73049 +| tuple-ids=4 row-size=4B cardinality=73.05K | in pipelines: 04(GETNEXT) | 06:HASH JOIN [INNER JOIN] @@ -304,7 +304,7 @@ PLAN-ROOT SINK | fk/pk conjuncts: sr_item_sk = ss_item_sk, sr_ticket_number = ss_ticket_number | runtime filters: RF004[bloom] <- ss_item_sk, RF005[bloom] <- ss_ticket_number | mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0 -| tuple-ids=1,0,3 row-size=52B cardinality=8131 +| tuple-ids=1,0,3 row-size=52B cardinality=8.13K | in pipelines: 01(GETNEXT), 00(OPEN) | |--05:HASH JOIN [INNER JOIN] @@ -312,7 +312,7 @@ PLAN-ROOT SINK | | fk/pk conjuncts: ss_sold_date_sk = d1.d_date_sk | | runtime filters: RF008[bloom] <- d1.d_date_sk | | mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0 -| | tuple-ids=0,3 row-size=32B cardinality=11055 +| | tuple-ids=0,3 row-size=32B cardinality=11.05K | | in pipelines: 00(GETNEXT), 03(OPEN) | | | |--03:SCAN HDFS [tpcds.date_dim d1] @@ -335,7 +335,7 @@ PLAN-ROOT SINK | columns: all | extrapolated-rows=disabled max-scan-range-rows=130093 | mem-estimate=128.00MB mem-reservation=8.00MB thread-reservation=1 -| tuple-ids=0 row-size=24B cardinality=2880404 +| tuple-ids=0 row-size=24B cardinality=2.88M | in pipelines: 00(GETNEXT) | 01:SCAN HDFS [tpcds.store_returns] @@ -346,7 +346,7 @@ PLAN-ROOT SINK columns: all extrapolated-rows=disabled max-scan-range-rows=287514 mem-estimate=80.00MB mem-reservation=8.00MB thread-reservation=1 - tuple-ids=1 row-size=20B cardinality=287514 + tuple-ids=1 row-size=20B cardinality=287.51K in pipelines: 01(GETNEXT) ==== # Assumed FK/PK join becasue of non-trivial equi-join exprs. @@ -364,7 +364,7 @@ PLAN-ROOT SINK | fk/pk conjuncts: assumed fk/pk | runtime filters: RF000[bloom] <- c_customer_sk / 100 | mem-estimate=34.00MB mem-reservation=34.00MB spill-buffer=2.00MB thread-reservation=0 -| tuple-ids=0,1 row-size=319B cardinality=2880404 +| tuple-ids=0,1 row-size=319B cardinality=2.88M | in pipelines: 00(GETNEXT), 01(OPEN) | |--01:SCAN HDFS [tpcds.customer] @@ -374,7 +374,7 @@ PLAN-ROOT SINK | columns: all | extrapolated-rows=disabled max-scan-range-rows=100000 | mem-estimate=48.00MB mem-reservation=8.00MB thread-reservation=1 -| tuple-ids=1 row-size=219B cardinality=100000 +| tuple-ids=1 row-size=219B cardinality=100.00K | in pipelines: 01(GETNEXT) | 00:SCAN HDFS [tpcds.store_sales] @@ -386,7 +386,7 @@ PLAN-ROOT SINK columns: all extrapolated-rows=disabled max-scan-range-rows=130093 mem-estimate=128.00MB mem-reservation=8.00MB thread-reservation=1 - tuple-ids=0 row-size=100B cardinality=2880404 + tuple-ids=0 row-size=100B cardinality=2.88M in pipelines: 00(GETNEXT) ==== # Assumed FK/PK join due to missing stats on the rhs. Join cardinality is equal to @@ -405,7 +405,7 @@ PLAN-ROOT SINK | fk/pk conjuncts: assumed fk/pk | runtime filters: RF000[bloom] <- c_customer_sk | mem-estimate=2.00GB mem-reservation=34.00MB spill-buffer=2.00MB thread-reservation=0 -| tuple-ids=0,1 row-size=8B cardinality=2880404 +| tuple-ids=0,1 row-size=8B cardinality=2.88M | in pipelines: 00(GETNEXT), 01(OPEN) | |--01:SCAN HDFS [tpcds_seq_snap.customer] @@ -427,7 +427,7 @@ PLAN-ROOT SINK columns: all extrapolated-rows=disabled max-scan-range-rows=130093 mem-estimate=128.00MB mem-reservation=8.00MB thread-reservation=1 - tuple-ids=0 row-size=4B cardinality=2880404 + tuple-ids=0 row-size=4B cardinality=2.88M in pipelines: 00(GETNEXT) ==== # Assumed FK/PK join due to missing stats on the lhs. Join cardinality is unknown. @@ -455,7 +455,7 @@ PLAN-ROOT SINK | columns: all | extrapolated-rows=disabled max-scan-range-rows=100000 | mem-estimate=48.00MB mem-reservation=8.00MB thread-reservation=1 -| tuple-ids=1 row-size=4B cardinality=100000 +| tuple-ids=1 row-size=4B cardinality=100.00K | in pipelines: 01(GETNEXT) | 00:SCAN HDFS [tpcds_seq_snap.store_sales] @@ -487,13 +487,13 @@ PLAN-ROOT SINK | fk/pk conjuncts: none | runtime filters: RF000[bloom] <- ws_sold_time_sk | mem-estimate=1.94MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0 -| tuple-ids=0,2 row-size=104B cardinality=2440073 +| tuple-ids=0,2 row-size=104B cardinality=2.44M | in pipelines: 00(GETNEXT), 02(OPEN) | |--02:AGGREGATE [FINALIZE] | | group by: ws_sold_time_sk | | mem-estimate=10.00MB mem-reservation=1.94MB spill-buffer=64.00KB thread-reservation=0 -| | tuple-ids=2 row-size=4B cardinality=39771 +| | tuple-ids=2 row-size=4B cardinality=39.77K | | in pipelines: 02(GETNEXT), 01(OPEN) | | | 01:SCAN HDFS [tpcds.web_sales] @@ -503,7 +503,7 @@ PLAN-ROOT SINK | columns: all | extrapolated-rows=disabled max-scan-range-rows=657377 | mem-estimate=160.00MB mem-reservation=8.00MB thread-reservation=1 -| tuple-ids=1 row-size=4B cardinality=719384 +| tuple-ids=1 row-size=4B cardinality=719.38K | in pipelines: 01(GETNEXT) | 00:SCAN HDFS [tpcds.store_sales] @@ -515,6 +515,6 @@ PLAN-ROOT SINK columns: all extrapolated-rows=disabled max-scan-range-rows=130093 mem-estimate=128.00MB mem-reservation=8.00MB thread-reservation=1 - tuple-ids=0 row-size=100B cardinality=2880404 + tuple-ids=0 row-size=100B cardinality=2.88M in pipelines: 00(GETNEXT) ====