(datafusion) branch main updated: feat: implement QUALIFY clause (#16933)

alamb Fri, 15 Aug 2025 13:52:06 -0700

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git



The following commit(s) were added to refs/heads/main by this push:
     new f3941b207e feat: implement QUALIFY clause (#16933)
f3941b207e is described below

commit f3941b207eeaa7768d840e17c32fa61f3b6fca71
Author: Huaijin <haohuai...@gmail.com>
AuthorDate: Sat Aug 16 04:50:58 2025 +0800

    feat: implement QUALIFY clause (#16933)
    
    * feat: implement QUALIFY clause
    
    * add document
    
    ---------
    
    Co-authored-by: Andrew Lamb <and...@nerdnetworks.org>
---
 datafusion/sql/src/select.rs                   |  72 +++++-
 datafusion/sql/tests/sql_integration.rs        |  48 +++-
 datafusion/sqllogictest/test_files/qualify.slt | 301 +++++++++++++++++++++++++
 docs/source/user-guide/sql/select.md           |   9 +
 4 files changed, 420 insertions(+), 10 deletions(-)

diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index b50fbf6812..c7ccda7232 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -66,9 +66,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         if !select.lateral_views.is_empty() {
             return not_impl_err!("LATERAL VIEWS");
         }
-        if select.qualify.is_some() {
-            return not_impl_err!("QUALIFY");
-        }
+
         if select.top.is_some() {
             return not_impl_err!("TOP");
         }
@@ -148,6 +146,33 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             })
             .transpose()?;
 
+        // Optionally the QUALIFY expression.
+        let qualify_expr_opt = select
+            .qualify
+            .map::<Result<Expr>, _>(|qualify_expr| {
+                let qualify_expr = self.sql_expr_to_logical_expr(
+                    qualify_expr,
+                    &combined_schema,
+                    planner_context,
+                )?;
+                // This step "dereferences" any aliases in the QUALIFY clause.
+                //
+                // This is how we support queries with QUALIFY expressions that
+                // refer to aliased columns.
+                //
+                // For example:
+                //
+                //   select row_number() over (PARTITION BY id) as rk from 
users qualify rk > 1;
+                //
+                // are rewritten as, respectively:
+                //
+                //   select row_number() over (PARTITION BY id) as rk from 
users qualify row_number() over (PARTITION BY id) > 1;
+                //
+                let qualify_expr = resolve_aliases_to_exprs(qualify_expr, 
&alias_map)?;
+                normalize_col(qualify_expr, &projected_plan)
+            })
+            .transpose()?;
+
         // The outer expressions we will search through for aggregates.
         // Aggregates may be sourced from the SELECT list or from the HAVING 
expression.
         let aggr_expr_haystack = 
select_exprs.iter().chain(having_expr_opt.iter());
@@ -225,8 +250,12 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             plan
         };
 
-        // Process window function
-        let window_func_exprs = find_window_exprs(&select_exprs_post_aggr);
+        // The outer expressions we will search through for window functions.
+        // Window functions may be sourced from the SELECT list or from the 
QUALIFY expression.
+        let windows_expr_haystack =
+            select_exprs_post_aggr.iter().chain(qualify_expr_opt.iter());
+        // All of the window expressions (deduplicated).
+        let window_func_exprs = find_window_exprs(windows_expr_haystack);
 
         let plan = if window_func_exprs.is_empty() {
             plan
@@ -242,6 +271,39 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             plan
         };
 
+        // Process QUALIFY clause after window functions
+        // QUALIFY filters the results of window functions, similar to how 
HAVING filters aggregates
+        let plan = if let Some(qualify_expr) = qualify_expr_opt {
+            // Validate that QUALIFY is used with window functions
+            if window_func_exprs.is_empty() {
+                return plan_err!(
+                    "QUALIFY clause requires window functions in the SELECT 
list or QUALIFY clause"
+                );
+            }
+
+            // now attempt to resolve columns and replace with fully-qualified 
columns
+            let windows_projection_exprs = window_func_exprs
+                .iter()
+                .map(|expr| resolve_columns(expr, &plan))
+                .collect::<Result<Vec<Expr>>>()?;
+
+            // Rewrite the qualify expression to reference columns from the 
window plan
+            let qualify_expr_post_window =
+                rebase_expr(&qualify_expr, &windows_projection_exprs, &plan)?;
+
+            // Validate that the qualify expression can be resolved from the 
window plan schema
+            self.validate_schema_satisfies_exprs(
+                plan.schema(),
+                std::slice::from_ref(&qualify_expr_post_window),
+            )?;
+
+            LogicalPlanBuilder::from(plan)
+                .filter(qualify_expr_post_window)?
+                .build()?
+        } else {
+            plan
+        };
+
         // Try processing unnest expression or do the final projection
         let plan = self.try_process_unnest(plan, select_exprs_post_aggr)?;
 
diff --git a/datafusion/sql/tests/sql_integration.rs 
b/datafusion/sql/tests/sql_integration.rs
index fe9ae340ce..fcd5115ae8 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -43,7 +43,7 @@ use datafusion_functions_aggregate::{
 };
 use datafusion_functions_aggregate::{average::avg_udaf, 
grouping::grouping_udaf};
 use datafusion_functions_nested::make_array::make_array_udf;
-use datafusion_functions_window::rank::rank_udwf;
+use datafusion_functions_window::{rank::rank_udwf, 
row_number::row_number_udwf};
 use insta::{allow_duplicates, assert_snapshot};
 use rstest::rstest;
 use sqlparser::dialect::{Dialect, GenericDialect, HiveDialect, MySqlDialect};
@@ -3298,6 +3298,7 @@ fn logical_plan_with_dialect_and_options(
         .with_aggregate_function(max_udaf())
         .with_aggregate_function(grouping_udaf())
         .with_window_function(rank_udwf())
+        .with_window_function(row_number_udwf())
         .with_expr_planner(Arc::new(CoreFunctionPlanner::default()));
 
     let context = MockContextProvider { state };
@@ -4186,6 +4187,47 @@ fn test_select_distinct_order_by() {
     );
 }
 
+#[test]
+fn test_select_qualify_basic() {
+    let sql = "SELECT person.id, ROW_NUMBER() OVER (PARTITION BY person.age 
ORDER BY person.id) as rn FROM person QUALIFY rn = 1";
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, row_number() PARTITION BY [person.age] ORDER BY 
[person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS 
rn
+  Filter: row_number() PARTITION BY [person.age] ORDER BY [person.id ASC NULLS 
LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW = Int64(1)
+    WindowAggr: windowExpr=[[row_number() PARTITION BY [person.age] ORDER BY 
[person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+      TableScan: person
+"#
+    );
+}
+
+#[test]
+fn test_select_qualify_without_window_function() {
+    let sql = "SELECT person.id FROM person QUALIFY person.id > 1";
+    let err = logical_plan(sql).unwrap_err();
+    assert_eq!(
+        err.strip_backtrace(),
+        "Error during planning: QUALIFY clause requires window functions in 
the SELECT list or QUALIFY clause"
+    );
+}
+
+#[test]
+fn test_select_qualify_complex_condition() {
+    let sql = "SELECT person.id, person.age, ROW_NUMBER() OVER (PARTITION BY 
person.age ORDER BY person.id) as rn, RANK() OVER (ORDER BY person.salary) as 
rank FROM person QUALIFY rn <= 2 AND rank <= 5";
+    let plan = logical_plan(sql).unwrap();
+    assert_snapshot!(
+        plan,
+        @r#"
+Projection: person.id, person.age, row_number() PARTITION BY [person.age] 
ORDER BY [person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 
CURRENT ROW AS rn, rank() ORDER BY [person.salary ASC NULLS LAST] RANGE BETWEEN 
UNBOUNDED PRECEDING AND CURRENT ROW AS rank
+  Filter: row_number() PARTITION BY [person.age] ORDER BY [person.id ASC NULLS 
LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW <= Int64(2) AND rank() 
ORDER BY [person.salary ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 
CURRENT ROW <= Int64(5)
+    WindowAggr: windowExpr=[[rank() ORDER BY [person.salary ASC NULLS LAST] 
RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+      WindowAggr: windowExpr=[[row_number() PARTITION BY [person.age] ORDER BY 
[person.id ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+        TableScan: person
+"#
+    );
+}
+
 #[rstest]
 #[case::select_cluster_by_unsupported(
     "SELECT customer_name, sum(order_total) as total_order_amount FROM orders 
CLUSTER BY customer_name",
@@ -4195,10 +4237,6 @@ fn test_select_distinct_order_by() {
     "SELECT id, number FROM person LATERAL VIEW explode(numbers) 
exploded_table AS number",
     "This feature is not implemented: LATERAL VIEWS"
 )]
-#[case::select_qualify_unsupported(
-    "SELECT i, p, o FROM person QUALIFY ROW_NUMBER() OVER (PARTITION BY p 
ORDER BY o) = 1",
-    "This feature is not implemented: QUALIFY"
-)]
 #[case::select_top_unsupported(
     "SELECT TOP (5) * FROM person",
     "This feature is not implemented: TOP"
diff --git a/datafusion/sqllogictest/test_files/qualify.slt 
b/datafusion/sqllogictest/test_files/qualify.slt
new file mode 100644
index 0000000000..4771fd5297
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/qualify.slt
@@ -0,0 +1,301 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##########
+## QUALIFY Clause Tests
+##########
+
+# Create test data
+statement ok
+CREATE TABLE users (
+  id INT,
+  name VARCHAR,
+  age INT,
+  salary DECIMAL(10,2),
+  dept VARCHAR
+) AS VALUES
+(1, 'Alice', 25, 50000.00, 'Engineering'),
+(2, 'Bob', 30, 60000.00, 'Engineering'),
+(3, 'Charlie', 25, 55000.00, 'Engineering'),
+(4, 'Diana', 35, 70000.00, 'Marketing'),
+(5, 'Eve', 30, 65000.00, 'Marketing'),
+(6, 'Frank', 25, 52000.00, 'Engineering'),
+(7, 'Grace', 35, 75000.00, 'Marketing'),
+(8, 'Henry', 30, 62000.00, 'Engineering');
+
+# Basic QUALIFY with ROW_NUMBER
+query ITI
+SELECT id, name, ROW_NUMBER() OVER (PARTITION BY dept ORDER BY salary DESC) as 
rn 
+FROM users 
+QUALIFY rn = 1
+ORDER BY dept, id;
+----
+8 Henry 1
+7 Grace 1
+
+# QUALIFY with RANK
+query ITI
+SELECT id, name, RANK() OVER (ORDER BY salary DESC) as rank 
+FROM users 
+QUALIFY rank <= 3
+ORDER BY rank, id;
+----
+7 Grace 1
+4 Diana 2
+5 Eve 3
+
+# QUALIFY with DENSE_RANK
+query ITI
+SELECT id, name, DENSE_RANK() OVER (PARTITION BY dept ORDER BY age) as 
dense_rank 
+FROM users 
+QUALIFY dense_rank <= 2
+ORDER BY dept, dense_rank, id;
+----
+1 Alice 1
+3 Charlie 1
+6 Frank 1
+2 Bob 2
+8 Henry 2
+5 Eve 1
+4 Diana 2
+7 Grace 2
+
+# QUALIFY with complex condition
+query ITII
+SELECT id, name, ROW_NUMBER() OVER (PARTITION BY dept ORDER BY salary DESC) as 
rn,
+       RANK() OVER (ORDER BY age) as age_rank
+FROM users 
+QUALIFY rn <= 2 AND age_rank <= 5
+ORDER BY dept, rn, id;
+----
+8 Henry 1 4
+2 Bob 2 4
+
+# QUALIFY with LAG function
+query ITRR
+SELECT id, name, salary, LAG(salary) OVER (PARTITION BY dept ORDER BY id) as 
prev_salary
+FROM users 
+QUALIFY prev_salary IS NOT NULL AND salary > prev_salary
+ORDER BY dept, id;
+----
+2 Bob 60000 50000
+8 Henry 62000 52000
+7 Grace 75000 65000
+
+# QUALIFY with LEAD function
+query ITRR
+SELECT id, name, salary, LEAD(salary) OVER (PARTITION BY dept ORDER BY id) as 
next_salary
+FROM users 
+QUALIFY next_salary IS NOT NULL AND salary < next_salary
+ORDER BY dept, id;
+----
+1 Alice 50000 60000
+6 Frank 52000 62000
+5 Eve 65000 75000
+
+# QUALIFY with NTILE
+query ITI
+SELECT id, name, NTILE(3) OVER (PARTITION BY dept ORDER BY salary DESC) as tile
+FROM users 
+QUALIFY tile = 1
+ORDER BY dept, id;
+----
+2 Bob 1
+8 Henry 1
+7 Grace 1
+
+# QUALIFY with PERCENT_RANK
+query ITR
+SELECT id, name, PERCENT_RANK() OVER (PARTITION BY dept ORDER BY salary) as 
pct_rank
+FROM users 
+QUALIFY pct_rank >= 0.5
+ORDER BY dept, pct_rank, id;
+----
+3 Charlie 0.5
+2 Bob 0.75
+8 Henry 1
+4 Diana 0.5
+7 Grace 1
+
+# QUALIFY with CUME_DIST
+query ITR
+SELECT id, name, CUME_DIST() OVER (PARTITION BY dept ORDER BY age) as cume_dist
+FROM users 
+QUALIFY cume_dist >= 0.75
+ORDER BY dept, cume_dist, id;
+----
+2 Bob 1
+8 Henry 1
+4 Diana 1
+7 Grace 1
+
+# QUALIFY with multiple window functions
+query ITIII
+SELECT id, name, 
+       ROW_NUMBER() OVER (PARTITION BY dept ORDER BY salary DESC) as rn,
+       RANK() OVER (ORDER BY age) as age_rank,
+       DENSE_RANK() OVER (PARTITION BY dept ORDER BY age) as dept_age_rank
+FROM users 
+QUALIFY rn <= 2 AND age_rank <= 4 AND dept_age_rank <= 2
+ORDER BY dept, rn, id;
+----
+8 Henry 1 4 2
+2 Bob 2 4 2
+
+# QUALIFY with arithmetic expressions
+query ITRI
+SELECT id, name, salary, 
+       ROW_NUMBER() OVER (PARTITION BY dept ORDER BY salary DESC) as rn
+FROM users 
+QUALIFY rn = 1 AND salary > 60000
+ORDER BY dept, id;
+----
+8 Henry 62000 1
+7 Grace 75000 1
+
+# QUALIFY with string functions
+query ITI
+SELECT id, name, 
+       ROW_NUMBER() OVER (PARTITION BY dept ORDER BY name) as rn
+FROM users 
+QUALIFY rn = 1
+ORDER BY dept, id;
+----
+1 Alice 1
+4 Diana 1
+
+# window function with aggregate function
+query ITI
+SELECT id, name, COUNT(*) OVER (PARTITION BY dept) as cnt
+FROM users 
+QUALIFY cnt > 4
+ORDER BY dept, id;
+----
+1 Alice 5
+2 Bob 5
+3 Charlie 5
+6 Frank 5
+8 Henry 5
+
+# QUALIFY with HAVING
+query TR
+SELECT dept, AVG(salary) OVER (PARTITION BY dept) as r
+FROM users
+WHERE salary > 5000
+GROUP BY dept, salary
+HAVING SUM(salary) > 20000
+QUALIFY r > 60000 
+----
+Marketing 70000
+Marketing 70000
+Marketing 70000
+
+# Error: QUALIFY without window functions
+query error
+SELECT id, name FROM users QUALIFY id > 1;
+
+# Window function in QUALIFY
+query IT
+SELECT id, name FROM users QUALIFY COUNT(*) OVER () > 1 ORDER BY id;
+----
+1 Alice
+2 Bob
+3 Charlie
+4 Diana
+5 Eve
+6 Frank
+7 Grace
+8 Henry
+
+# verify the logical plan and physical plan
+query TT
+EXPLAIN SELECT id, name FROM users QUALIFY COUNT(*) OVER () > 1 ORDER BY id;
+----
+logical_plan
+01)Sort: users.id ASC NULLS LAST
+02)--Projection: users.id, users.name
+03)----Filter: count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED 
FOLLOWING > Int64(1)
+04)------WindowAggr: windowExpr=[[count(Int64(1)) ROWS BETWEEN UNBOUNDED 
PRECEDING AND UNBOUNDED FOLLOWING]]
+05)--------TableScan: users projection=[id, name]
+physical_plan
+01)SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false]
+02)--CoalesceBatchesExec: target_batch_size=8192
+03)----FilterExec: count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND 
UNBOUNDED FOLLOWING@2 > 1, projection=[id@0, name@1]
+04)------WindowAggExec: wdw=[count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING 
AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(Int64(1)) ROWS BETWEEN 
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: 
false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame 
{ units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: 
Following(UInt64(NULL)), is_causal: false }]
+05)--------DataSourceExec: partitions=1, partition_sizes=[1]
+
+# plan row_number()
+query TT
+explain select row_number() over (PARTITION BY dept) as rk from users qualify 
rk > 1;
+----
+logical_plan
+01)Projection: row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED 
PRECEDING AND UNBOUNDED FOLLOWING AS rk
+02)--Filter: row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED 
PRECEDING AND UNBOUNDED FOLLOWING > UInt64(1)
+03)----Projection: row_number() PARTITION BY [users.dept] ROWS BETWEEN 
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+04)------WindowAggr: windowExpr=[[row_number() PARTITION BY [users.dept] ROWS 
BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+05)--------TableScan: users projection=[dept]
+physical_plan
+01)ProjectionExec: expr=[row_number() PARTITION BY [users.dept] ROWS BETWEEN 
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@0 as rk]
+02)--CoalesceBatchesExec: target_batch_size=8192
+03)----FilterExec: row_number() PARTITION BY [users.dept] ROWS BETWEEN 
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@0 > 1
+04)------ProjectionExec: expr=[row_number() PARTITION BY [users.dept] ROWS 
BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as row_number() PARTITION 
BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
+05)--------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [users.dept] 
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: 
"row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND 
UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, 
dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING 
AND UNBOUNDED FOLLOWING], mode=[Sorted]
+06)----------SortExec: expr=[dept@0 ASC NULLS LAST], 
preserve_partitioning=[false]
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
+
+# plan with window function and group by
+query TT
+EXPLAIN SELECT dept, AVG(salary) OVER (PARTITION BY dept) as r
+FROM users
+WHERE salary > 5000
+GROUP BY dept, salary
+HAVING SUM(salary) > 20000
+QUALIFY r > 60000
+----
+logical_plan
+01)Projection: users.dept, avg(users.salary) PARTITION BY [users.dept] ROWS 
BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS r
+02)--Filter: avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN 
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING > Decimal128(Some(60000000000),14,6)
+03)----Projection: users.dept, avg(users.salary) PARTITION BY [users.dept] 
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+04)------WindowAggr: windowExpr=[[avg(users.salary) PARTITION BY [users.dept] 
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+05)--------Projection: users.dept, users.salary
+06)----------Filter: sum(users.salary) > Decimal128(Some(2000000),20,2)
+07)------------Aggregate: groupBy=[[users.dept, users.salary]], 
aggr=[[sum(users.salary)]]
+08)--------------Filter: users.salary > Decimal128(Some(500000),10,2)
+09)----------------TableScan: users projection=[salary, dept]
+physical_plan
+01)ProjectionExec: expr=[dept@0 as dept, avg(users.salary) PARTITION BY 
[users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as r]
+02)--CoalesceBatchesExec: target_batch_size=8192
+03)----FilterExec: avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN 
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 > Some(60000000000),14,6
+04)------ProjectionExec: expr=[dept@0 as dept, avg(users.salary) PARTITION BY 
[users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as 
avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING 
AND UNBOUNDED FOLLOWING]
+05)--------WindowAggExec: wdw=[avg(users.salary) PARTITION BY [users.dept] 
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: 
"avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING 
AND UNBOUNDED FOLLOWING", data_type: Decimal128(14, 6), nullable: true, 
dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { 
units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: 
Following(UInt64(NULL)), is_causal: false }]
+06)----------SortExec: expr=[dept@0 ASC NULLS LAST], 
preserve_partitioning=[true]
+07)------------CoalesceBatchesExec: target_batch_size=8192
+08)--------------RepartitionExec: partitioning=Hash([dept@0], 4), 
input_partitions=4
+09)----------------CoalesceBatchesExec: target_batch_size=8192
+10)------------------FilterExec: sum(users.salary)@2 > Some(2000000),20,2, 
projection=[dept@0, salary@1]
+11)--------------------AggregateExec: mode=FinalPartitioned, gby=[dept@0 as 
dept, salary@1 as salary], aggr=[sum(users.salary)]
+12)----------------------CoalesceBatchesExec: target_batch_size=8192
+13)------------------------RepartitionExec: partitioning=Hash([dept@0, 
salary@1], 4), input_partitions=4
+14)--------------------------AggregateExec: mode=Partial, gby=[dept@1 as dept, 
salary@0 as salary], aggr=[sum(users.salary)]
+15)----------------------------RepartitionExec: 
partitioning=RoundRobinBatch(4), input_partitions=1
+16)------------------------------CoalesceBatchesExec: target_batch_size=8192
+17)--------------------------------FilterExec: salary@0 > Some(500000),10,2
+18)----------------------------------DataSourceExec: partitions=1, 
partition_sizes=[1]
+
+# Clean up
+statement ok
+DROP TABLE users; 
diff --git a/docs/source/user-guide/sql/select.md 
b/docs/source/user-guide/sql/select.md
index 84aac431a6..39163cf492 100644
--- a/docs/source/user-guide/sql/select.md
+++ b/docs/source/user-guide/sql/select.md
@@ -35,6 +35,7 @@ DataFusion supports the following syntax for queries:
 [ [WHERE](#where-clause) condition ] <br/>
 [ [GROUP BY](#group-by-clause) grouping_element [, ...] ] <br/>
 [ [HAVING](#having-clause) condition] <br/>
+[ [QUALIFY](#qualify-clause) condition] <br/>
 [ [UNION](#union-clause) [ ALL | select ] <br/>
 [ [ORDER BY](#order-by-clause) expression [ ASC | DESC ][, ...] ] <br/>
 [ [LIMIT](#limit-clause) count ] <br/>
@@ -261,6 +262,14 @@ Example:
 SELECT a, b, MAX(c) FROM table GROUP BY a, b HAVING MAX(c) > 10
 ```
 
+## QUALIFY clause
+
+Example:
+
+```sql
+SELECT ROW_NUMBER() OVER (PARTITION BY region) AS rk FROM table QUALIFY rk > 1;
+```
+
 ## UNION clause
 
 Example:


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

(datafusion) branch main updated: feat: implement QUALIFY clause (#16933)

Reply via email to