This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 15045a8849 Introduce `Expr::is_volatile()`, adjust
`TreeNode::exists()` (#10191)
15045a8849 is described below
commit 15045a8849e80327de68b1ec59e4a0644f05582c
Author: Peter Toth <[email protected]>
AuthorDate: Tue Apr 23 20:08:31 2024 +0200
Introduce `Expr::is_volatile()`, adjust `TreeNode::exists()` (#10191)
---
datafusion/common/src/tree_node.rs | 7 +++----
datafusion/expr/src/expr.rs | 20 ++++++++++----------
datafusion/optimizer/src/common_subexpr_eliminate.rs | 5 ++---
datafusion/optimizer/src/push_down_filter.rs | 5 +----
datafusion/optimizer/src/utils.rs | 16 ----------------
5 files changed, 16 insertions(+), 37 deletions(-)
diff --git a/datafusion/common/src/tree_node.rs
b/datafusion/common/src/tree_node.rs
index 7003f5ac7f..43026f3a92 100644
--- a/datafusion/common/src/tree_node.rs
+++ b/datafusion/common/src/tree_node.rs
@@ -405,18 +405,17 @@ pub trait TreeNode: Sized {
/// Returns true if `f` returns true for any node in the tree.
///
/// Stops recursion as soon as a matching node is found
- fn exists<F: FnMut(&Self) -> bool>(&self, mut f: F) -> bool {
+ fn exists<F: FnMut(&Self) -> Result<bool>>(&self, mut f: F) ->
Result<bool> {
let mut found = false;
self.apply(|n| {
- Ok(if f(n) {
+ Ok(if f(n)? {
found = true;
TreeNodeRecursion::Stop
} else {
TreeNodeRecursion::Continue
})
})
- .unwrap();
- found
+ .map(|_| found)
}
/// Low-level API used to implement other APIs.
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 6f76936806..ea2cfeafe6 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -1271,7 +1271,16 @@ impl Expr {
/// Return true when the expression contains out reference(correlated)
expressions.
pub fn contains_outer(&self) -> bool {
- self.exists(|expr| matches!(expr, Expr::OuterReferenceColumn { .. }))
+ self.exists(|expr| Ok(matches!(expr, Expr::OuterReferenceColumn { ..
})))
+ .unwrap()
+ }
+
+ /// Returns true if the expression is volatile, i.e. whether it can return
different
+ /// results when evaluated multiple times with the same input.
+ pub fn is_volatile(&self) -> Result<bool> {
+ self.exists(|expr| {
+ Ok(matches!(expr, Expr::ScalarFunction(func) if
func.func_def.is_volatile()?))
+ })
}
/// Recursively find all [`Expr::Placeholder`] expressions, and
@@ -1931,15 +1940,6 @@ fn create_names(exprs: &[Expr]) -> Result<String> {
.join(", "))
}
-/// Whether the given expression is volatile, i.e. whether it can return
different results
-/// when evaluated multiple times with the same input.
-pub fn is_volatile(expr: &Expr) -> Result<bool> {
- match expr {
- Expr::ScalarFunction(func) => func.func_def.is_volatile(),
- _ => Ok(false),
- }
-}
-
#[cfg(test)]
mod test {
use crate::expr::Cast;
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs
b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index b859dda9d5..081d9c2505 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -21,7 +21,6 @@ use std::collections::hash_map::Entry;
use std::collections::{BTreeSet, HashMap};
use std::sync::Arc;
-use crate::utils::is_volatile_expression;
use crate::{utils, OptimizerConfig, OptimizerRule};
use arrow::datatypes::{DataType, Field};
@@ -661,7 +660,7 @@ impl TreeNodeVisitor for ExprIdentifierVisitor<'_> {
fn f_down(&mut self, expr: &Expr) -> Result<TreeNodeRecursion> {
// related to https://github.com/apache/datafusion/issues/8814
// If the expr contain volatile expression or is a short-circuit
expression, skip it.
- if expr.short_circuits() || is_volatile_expression(expr)? {
+ if expr.short_circuits() || expr.is_volatile()? {
self.visit_stack
.push(VisitRecord::JumpMark(self.node_count));
return Ok(TreeNodeRecursion::Jump); // go to f_up
@@ -717,7 +716,7 @@ impl TreeNodeRewriter for CommonSubexprRewriter<'_> {
// The `CommonSubexprRewriter` relies on `ExprIdentifierVisitor` to
generate
// the `id_array`, which records the expr's identifier used to rewrite
expr. So if we
// skip an expr in `ExprIdentifierVisitor`, we should skip it here,
too.
- if expr.short_circuits() || is_volatile_expression(&expr)? {
+ if expr.short_circuits() || expr.is_volatile()? {
return Ok(Transformed::new(expr, false, TreeNodeRecursion::Jump));
}
diff --git a/datafusion/optimizer/src/push_down_filter.rs
b/datafusion/optimizer/src/push_down_filter.rs
index 950932f479..e1561ad9d6 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -18,7 +18,6 @@ use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use crate::optimizer::ApplyOrder;
-use crate::utils::is_volatile_expression;
use crate::{OptimizerConfig, OptimizerRule};
use datafusion_common::tree_node::{
@@ -705,9 +704,7 @@ impl OptimizerRule for PushDownFilter {
(qualified_name(qualifier, field.name()), expr)
})
- .partition(|(_, value)| {
- is_volatile_expression(value).unwrap_or(true)
- });
+ .partition(|(_, value)|
value.is_volatile().unwrap_or(true));
let mut push_predicates = vec![];
let mut keep_predicates = vec![];
diff --git a/datafusion/optimizer/src/utils.rs
b/datafusion/optimizer/src/utils.rs
index aad89be7db..1c20501da5 100644
--- a/datafusion/optimizer/src/utils.rs
+++ b/datafusion/optimizer/src/utils.rs
@@ -21,9 +21,7 @@ use std::collections::{BTreeSet, HashMap};
use crate::{OptimizerConfig, OptimizerRule};
-use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
use datafusion_common::{Column, DFSchema, DFSchemaRef, Result};
-use datafusion_expr::expr::is_volatile;
use datafusion_expr::expr_rewriter::replace_col;
use datafusion_expr::utils as expr_utils;
use datafusion_expr::{logical_plan::LogicalPlan, Expr, Operator};
@@ -97,20 +95,6 @@ pub fn log_plan(description: &str, plan: &LogicalPlan) {
trace!("{description}::\n{}\n", plan.display_indent_schema());
}
-/// check whether the expression is volatile predicates
-pub(crate) fn is_volatile_expression(e: &Expr) -> Result<bool> {
- let mut is_volatile_expr = false;
- e.apply(|expr| {
- Ok(if is_volatile(expr)? {
- is_volatile_expr = true;
- TreeNodeRecursion::Stop
- } else {
- TreeNodeRecursion::Continue
- })
- })?;
- Ok(is_volatile_expr)
-}
-
/// Splits a conjunctive [`Expr`] such as `A AND B AND C` => `[A, B, C]`
///
/// See [`split_conjunction_owned`] for more details and an example.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]