This is an automated email from the ASF dual-hosted git repository. alamb pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push: new f3941b207e feat: implement QUALIFY clause (#16933) f3941b207e is described below commit f3941b207eeaa7768d840e17c32fa61f3b6fca71 Author: Huaijin <haohuai...@gmail.com> AuthorDate: Sat Aug 16 04:50:58 2025 +0800 feat: implement QUALIFY clause (#16933) * feat: implement QUALIFY clause * add document --------- Co-authored-by: Andrew Lamb <and...@nerdnetworks.org> --- datafusion/sql/src/select.rs | 72 +++++- datafusion/sql/tests/sql_integration.rs | 48 +++- datafusion/sqllogictest/test_files/qualify.slt | 301 +++++++++++++++++++++++++ docs/source/user-guide/sql/select.md | 9 + 4 files changed, 420 insertions(+), 10 deletions(-) diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs index b50fbf6812..c7ccda7232 100644 --- a/datafusion/sql/src/select.rs +++ b/datafusion/sql/src/select.rs @@ -66,9 +66,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> { if !select.lateral_views.is_empty() { return not_impl_err!("LATERAL VIEWS"); } - if select.qualify.is_some() { - return not_impl_err!("QUALIFY"); - } + if select.top.is_some() { return not_impl_err!("TOP"); } @@ -148,6 +146,33 @@ impl<S: ContextProvider> SqlToRel<'_, S> { }) .transpose()?; + // Optionally the QUALIFY expression. + let qualify_expr_opt = select + .qualify + .map::<Result<Expr>, _>(|qualify_expr| { + let qualify_expr = self.sql_expr_to_logical_expr( + qualify_expr, + &combined_schema, + planner_context, + )?; + // This step "dereferences" any aliases in the QUALIFY clause. + // + // This is how we support queries with QUALIFY expressions that + // refer to aliased columns. + // + // For example: + // + // select row_number() over (PARTITION BY id) as rk from users qualify rk > 1; + // + // are rewritten as, respectively: + // + // select row_number() over (PARTITION BY id) as rk from users qualify row_number() over (PARTITION BY id) > 1; + // + let qualify_expr = resolve_aliases_to_exprs(qualify_expr, &alias_map)?; + normalize_col(qualify_expr, &projected_plan) + }) + .transpose()?; + // The outer expressions we will search through for aggregates. // Aggregates may be sourced from the SELECT list or from the HAVING expression. let aggr_expr_haystack = select_exprs.iter().chain(having_expr_opt.iter()); @@ -225,8 +250,12 @@ impl<S: ContextProvider> SqlToRel<'_, S> { plan }; - // Process window function - let window_func_exprs = find_window_exprs(&select_exprs_post_aggr); + // The outer expressions we will search through for window functions. + // Window functions may be sourced from the SELECT list or from the QUALIFY expression. + let windows_expr_haystack = + select_exprs_post_aggr.iter().chain(qualify_expr_opt.iter()); + // All of the window expressions (deduplicated). + let window_func_exprs = find_window_exprs(windows_expr_haystack); let plan = if window_func_exprs.is_empty() { plan @@ -242,6 +271,39 @@ impl<S: ContextProvider> SqlToRel<'_, S> { plan }; + // Process QUALIFY clause after window functions + // QUALIFY filters the results of window functions, similar to how HAVING filters aggregates + let plan = if let Some(qualify_expr) = qualify_expr_opt { + // Validate that QUALIFY is used with window functions + if window_func_exprs.is_empty() { + return plan_err!( + "QUALIFY clause requires window functions in the SELECT list or QUALIFY clause" + ); + } + + // now attempt to resolve columns and replace with fully-qualified columns + let windows_projection_exprs = window_func_exprs + .iter() + .map(|expr| resolve_columns(expr, &plan)) + .collect::<Result<Vec<Expr>>>()?; + + // Rewrite the qualify expression to reference columns from the window plan + let qualify_expr_post_window = + rebase_expr(&qualify_expr, &windows_projection_exprs, &plan)?; + + // Validate that the qualify expression can be resolved from the window plan schema + self.validate_schema_satisfies_exprs( + plan.schema(), + std::slice::from_ref(&qualify_expr_post_window), + )?; + + LogicalPlanBuilder::from(plan) + .filter(qualify_expr_post_window)? + .build()? + } else { + plan + }; + // Try processing unnest expression or do the final projection let plan = self.try_process_unnest(plan, select_exprs_post_aggr)?; diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index fe9ae340ce..fcd5115ae8 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -43,7 +43,7 @@ use datafusion_functions_aggregate::{ }; use datafusion_functions_aggregate::{average::avg_udaf, grouping::grouping_udaf}; use datafusion_functions_nested::make_array::make_array_udf; -use datafusion_functions_window::rank::rank_udwf; +use datafusion_functions_window::{rank::rank_udwf, row_number::row_number_udwf}; use insta::{allow_duplicates, assert_snapshot}; use rstest::rstest; use sqlparser::dialect::{Dialect, GenericDialect, HiveDialect, MySqlDialect}; @@ -3298,6 +3298,7 @@ fn logical_plan_with_dialect_and_options( .with_aggregate_function(max_udaf()) .with_aggregate_function(grouping_udaf()) .with_window_function(rank_udwf()) + .with_window_function(row_number_udwf()) .with_expr_planner(Arc::new(CoreFunctionPlanner::default())); let context = MockContextProvider { state }; @@ -4186,6 +4187,47 @@ fn test_select_distinct_order_by() { ); } +#[test] +fn test_select_qualify_basic() { + let sql = "SELECT person.id, ROW_NUMBER() OVER (PARTITION BY person.age ORDER BY person.id) as rn FROM person QUALIFY rn = 1"; + let plan = logical_plan(sql).unwrap(); + assert_snapshot!( + plan, + @r#" +Projection: person.id, row_number() PARTITION BY [person.age] ORDER BY [person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn + Filter: row_number() PARTITION BY [person.age] ORDER BY [person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW = Int64(1) + WindowAggr: windowExpr=[[row_number() PARTITION BY [person.age] ORDER BY [person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] + TableScan: person +"# + ); +} + +#[test] +fn test_select_qualify_without_window_function() { + let sql = "SELECT person.id FROM person QUALIFY person.id > 1"; + let err = logical_plan(sql).unwrap_err(); + assert_eq!( + err.strip_backtrace(), + "Error during planning: QUALIFY clause requires window functions in the SELECT list or QUALIFY clause" + ); +} + +#[test] +fn test_select_qualify_complex_condition() { + let sql = "SELECT person.id, person.age, ROW_NUMBER() OVER (PARTITION BY person.age ORDER BY person.id) as rn, RANK() OVER (ORDER BY person.salary) as rank FROM person QUALIFY rn <= 2 AND rank <= 5"; + let plan = logical_plan(sql).unwrap(); + assert_snapshot!( + plan, + @r#" +Projection: person.id, person.age, row_number() PARTITION BY [person.age] ORDER BY [person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rn, rank() ORDER BY [person.salary ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS rank + Filter: row_number() PARTITION BY [person.age] ORDER BY [person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(2) AND rank() ORDER BY [person.salary ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(5) + WindowAggr: windowExpr=[[rank() ORDER BY [person.salary ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] + WindowAggr: windowExpr=[[row_number() PARTITION BY [person.age] ORDER BY [person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] + TableScan: person +"# + ); +} + #[rstest] #[case::select_cluster_by_unsupported( "SELECT customer_name, sum(order_total) as total_order_amount FROM orders CLUSTER BY customer_name", @@ -4195,10 +4237,6 @@ fn test_select_distinct_order_by() { "SELECT id, number FROM person LATERAL VIEW explode(numbers) exploded_table AS number", "This feature is not implemented: LATERAL VIEWS" )] -#[case::select_qualify_unsupported( - "SELECT i, p, o FROM person QUALIFY ROW_NUMBER() OVER (PARTITION BY p ORDER BY o) = 1", - "This feature is not implemented: QUALIFY" -)] #[case::select_top_unsupported( "SELECT TOP (5) * FROM person", "This feature is not implemented: TOP" diff --git a/datafusion/sqllogictest/test_files/qualify.slt b/datafusion/sqllogictest/test_files/qualify.slt new file mode 100644 index 0000000000..4771fd5297 --- /dev/null +++ b/datafusion/sqllogictest/test_files/qualify.slt @@ -0,0 +1,301 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +########## +## QUALIFY Clause Tests +########## + +# Create test data +statement ok +CREATE TABLE users ( + id INT, + name VARCHAR, + age INT, + salary DECIMAL(10,2), + dept VARCHAR +) AS VALUES +(1, 'Alice', 25, 50000.00, 'Engineering'), +(2, 'Bob', 30, 60000.00, 'Engineering'), +(3, 'Charlie', 25, 55000.00, 'Engineering'), +(4, 'Diana', 35, 70000.00, 'Marketing'), +(5, 'Eve', 30, 65000.00, 'Marketing'), +(6, 'Frank', 25, 52000.00, 'Engineering'), +(7, 'Grace', 35, 75000.00, 'Marketing'), +(8, 'Henry', 30, 62000.00, 'Engineering'); + +# Basic QUALIFY with ROW_NUMBER +query ITI +SELECT id, name, ROW_NUMBER() OVER (PARTITION BY dept ORDER BY salary DESC) as rn +FROM users +QUALIFY rn = 1 +ORDER BY dept, id; +---- +8 Henry 1 +7 Grace 1 + +# QUALIFY with RANK +query ITI +SELECT id, name, RANK() OVER (ORDER BY salary DESC) as rank +FROM users +QUALIFY rank <= 3 +ORDER BY rank, id; +---- +7 Grace 1 +4 Diana 2 +5 Eve 3 + +# QUALIFY with DENSE_RANK +query ITI +SELECT id, name, DENSE_RANK() OVER (PARTITION BY dept ORDER BY age) as dense_rank +FROM users +QUALIFY dense_rank <= 2 +ORDER BY dept, dense_rank, id; +---- +1 Alice 1 +3 Charlie 1 +6 Frank 1 +2 Bob 2 +8 Henry 2 +5 Eve 1 +4 Diana 2 +7 Grace 2 + +# QUALIFY with complex condition +query ITII +SELECT id, name, ROW_NUMBER() OVER (PARTITION BY dept ORDER BY salary DESC) as rn, + RANK() OVER (ORDER BY age) as age_rank +FROM users +QUALIFY rn <= 2 AND age_rank <= 5 +ORDER BY dept, rn, id; +---- +8 Henry 1 4 +2 Bob 2 4 + +# QUALIFY with LAG function +query ITRR +SELECT id, name, salary, LAG(salary) OVER (PARTITION BY dept ORDER BY id) as prev_salary +FROM users +QUALIFY prev_salary IS NOT NULL AND salary > prev_salary +ORDER BY dept, id; +---- +2 Bob 60000 50000 +8 Henry 62000 52000 +7 Grace 75000 65000 + +# QUALIFY with LEAD function +query ITRR +SELECT id, name, salary, LEAD(salary) OVER (PARTITION BY dept ORDER BY id) as next_salary +FROM users +QUALIFY next_salary IS NOT NULL AND salary < next_salary +ORDER BY dept, id; +---- +1 Alice 50000 60000 +6 Frank 52000 62000 +5 Eve 65000 75000 + +# QUALIFY with NTILE +query ITI +SELECT id, name, NTILE(3) OVER (PARTITION BY dept ORDER BY salary DESC) as tile +FROM users +QUALIFY tile = 1 +ORDER BY dept, id; +---- +2 Bob 1 +8 Henry 1 +7 Grace 1 + +# QUALIFY with PERCENT_RANK +query ITR +SELECT id, name, PERCENT_RANK() OVER (PARTITION BY dept ORDER BY salary) as pct_rank +FROM users +QUALIFY pct_rank >= 0.5 +ORDER BY dept, pct_rank, id; +---- +3 Charlie 0.5 +2 Bob 0.75 +8 Henry 1 +4 Diana 0.5 +7 Grace 1 + +# QUALIFY with CUME_DIST +query ITR +SELECT id, name, CUME_DIST() OVER (PARTITION BY dept ORDER BY age) as cume_dist +FROM users +QUALIFY cume_dist >= 0.75 +ORDER BY dept, cume_dist, id; +---- +2 Bob 1 +8 Henry 1 +4 Diana 1 +7 Grace 1 + +# QUALIFY with multiple window functions +query ITIII +SELECT id, name, + ROW_NUMBER() OVER (PARTITION BY dept ORDER BY salary DESC) as rn, + RANK() OVER (ORDER BY age) as age_rank, + DENSE_RANK() OVER (PARTITION BY dept ORDER BY age) as dept_age_rank +FROM users +QUALIFY rn <= 2 AND age_rank <= 4 AND dept_age_rank <= 2 +ORDER BY dept, rn, id; +---- +8 Henry 1 4 2 +2 Bob 2 4 2 + +# QUALIFY with arithmetic expressions +query ITRI +SELECT id, name, salary, + ROW_NUMBER() OVER (PARTITION BY dept ORDER BY salary DESC) as rn +FROM users +QUALIFY rn = 1 AND salary > 60000 +ORDER BY dept, id; +---- +8 Henry 62000 1 +7 Grace 75000 1 + +# QUALIFY with string functions +query ITI +SELECT id, name, + ROW_NUMBER() OVER (PARTITION BY dept ORDER BY name) as rn +FROM users +QUALIFY rn = 1 +ORDER BY dept, id; +---- +1 Alice 1 +4 Diana 1 + +# window function with aggregate function +query ITI +SELECT id, name, COUNT(*) OVER (PARTITION BY dept) as cnt +FROM users +QUALIFY cnt > 4 +ORDER BY dept, id; +---- +1 Alice 5 +2 Bob 5 +3 Charlie 5 +6 Frank 5 +8 Henry 5 + +# QUALIFY with HAVING +query TR +SELECT dept, AVG(salary) OVER (PARTITION BY dept) as r +FROM users +WHERE salary > 5000 +GROUP BY dept, salary +HAVING SUM(salary) > 20000 +QUALIFY r > 60000 +---- +Marketing 70000 +Marketing 70000 +Marketing 70000 + +# Error: QUALIFY without window functions +query error +SELECT id, name FROM users QUALIFY id > 1; + +# Window function in QUALIFY +query IT +SELECT id, name FROM users QUALIFY COUNT(*) OVER () > 1 ORDER BY id; +---- +1 Alice +2 Bob +3 Charlie +4 Diana +5 Eve +6 Frank +7 Grace +8 Henry + +# verify the logical plan and physical plan +query TT +EXPLAIN SELECT id, name FROM users QUALIFY COUNT(*) OVER () > 1 ORDER BY id; +---- +logical_plan +01)Sort: users.id ASC NULLS LAST +02)--Projection: users.id, users.name +03)----Filter: count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING > Int64(1) +04)------WindowAggr: windowExpr=[[count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]] +05)--------TableScan: users projection=[id, name] +physical_plan +01)SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false] +02)--CoalesceBatchesExec: target_batch_size=8192 +03)----FilterExec: count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 > 1, projection=[id@0, name@1] +04)------WindowAggExec: wdw=[count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] +05)--------DataSourceExec: partitions=1, partition_sizes=[1] + +# plan row_number() +query TT +explain select row_number() over (PARTITION BY dept) as rk from users qualify rk > 1; +---- +logical_plan +01)Projection: row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rk +02)--Filter: row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING > UInt64(1) +03)----Projection: row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING +04)------WindowAggr: windowExpr=[[row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]] +05)--------TableScan: users projection=[dept] +physical_plan +01)ProjectionExec: expr=[row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@0 as rk] +02)--CoalesceBatchesExec: target_batch_size=8192 +03)----FilterExec: row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@0 > 1 +04)------ProjectionExec: expr=[row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING] +05)--------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted] +06)----------SortExec: expr=[dept@0 ASC NULLS LAST], preserve_partitioning=[false] +07)------------DataSourceExec: partitions=1, partition_sizes=[1] + +# plan with window function and group by +query TT +EXPLAIN SELECT dept, AVG(salary) OVER (PARTITION BY dept) as r +FROM users +WHERE salary > 5000 +GROUP BY dept, salary +HAVING SUM(salary) > 20000 +QUALIFY r > 60000 +---- +logical_plan +01)Projection: users.dept, avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS r +02)--Filter: avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING > Decimal128(Some(60000000000),14,6) +03)----Projection: users.dept, avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING +04)------WindowAggr: windowExpr=[[avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]] +05)--------Projection: users.dept, users.salary +06)----------Filter: sum(users.salary) > Decimal128(Some(2000000),20,2) +07)------------Aggregate: groupBy=[[users.dept, users.salary]], aggr=[[sum(users.salary)]] +08)--------------Filter: users.salary > Decimal128(Some(500000),10,2) +09)----------------TableScan: users projection=[salary, dept] +physical_plan +01)ProjectionExec: expr=[dept@0 as dept, avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as r] +02)--CoalesceBatchesExec: target_batch_size=8192 +03)----FilterExec: avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 > Some(60000000000),14,6 +04)------ProjectionExec: expr=[dept@0 as dept, avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING] +05)--------WindowAggExec: wdw=[avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Decimal128(14, 6), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }] +06)----------SortExec: expr=[dept@0 ASC NULLS LAST], preserve_partitioning=[true] +07)------------CoalesceBatchesExec: target_batch_size=8192 +08)--------------RepartitionExec: partitioning=Hash([dept@0], 4), input_partitions=4 +09)----------------CoalesceBatchesExec: target_batch_size=8192 +10)------------------FilterExec: sum(users.salary)@2 > Some(2000000),20,2, projection=[dept@0, salary@1] +11)--------------------AggregateExec: mode=FinalPartitioned, gby=[dept@0 as dept, salary@1 as salary], aggr=[sum(users.salary)] +12)----------------------CoalesceBatchesExec: target_batch_size=8192 +13)------------------------RepartitionExec: partitioning=Hash([dept@0, salary@1], 4), input_partitions=4 +14)--------------------------AggregateExec: mode=Partial, gby=[dept@1 as dept, salary@0 as salary], aggr=[sum(users.salary)] +15)----------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 +16)------------------------------CoalesceBatchesExec: target_batch_size=8192 +17)--------------------------------FilterExec: salary@0 > Some(500000),10,2 +18)----------------------------------DataSourceExec: partitions=1, partition_sizes=[1] + +# Clean up +statement ok +DROP TABLE users; diff --git a/docs/source/user-guide/sql/select.md b/docs/source/user-guide/sql/select.md index 84aac431a6..39163cf492 100644 --- a/docs/source/user-guide/sql/select.md +++ b/docs/source/user-guide/sql/select.md @@ -35,6 +35,7 @@ DataFusion supports the following syntax for queries: [ [WHERE](#where-clause) condition ] <br/> [ [GROUP BY](#group-by-clause) grouping_element [, ...] ] <br/> [ [HAVING](#having-clause) condition] <br/> +[ [QUALIFY](#qualify-clause) condition] <br/> [ [UNION](#union-clause) [ ALL | select ] <br/> [ [ORDER BY](#order-by-clause) expression [ ASC | DESC ][, ...] ] <br/> [ [LIMIT](#limit-clause) count ] <br/> @@ -261,6 +262,14 @@ Example: SELECT a, b, MAX(c) FROM table GROUP BY a, b HAVING MAX(c) > 10 ``` +## QUALIFY clause + +Example: + +```sql +SELECT ROW_NUMBER() OVER (PARTITION BY region) AS rk FROM table QUALIFY rk > 1; +``` + ## UNION clause Example: --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org