This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 360175aec4 Add operator section to user guide, Add `std::ops`
operations to `prelude`, and add `not()` expr_fn (#7732)
360175aec4 is described below
commit 360175aec4f5dfa43f6d1ab2f4665a4bd397ae83
Author: Chih Wang <[email protected]>
AuthorDate: Wed Oct 18 04:16:40 2023 +0800
Add operator section to user guide, Add `std::ops` operations to `prelude`,
and add `not()` expr_fn (#7732)
* Add logical tests of Expr and logical not function
* Re-export std::ops::* in prelude
* Update the Expression API documentation, mainly covering boolean, bitwise,
comparison, and arithmetic expressions
---
datafusion/core/src/prelude.rs | 5 ++
datafusion/expr/src/expr.rs | 36 +++++++++
datafusion/expr/src/expr_fn.rs | 6 ++
datafusion/expr/src/operator.rs | 1 +
docs/source/conf.py | 2 +-
docs/source/user-guide/expressions.md | 144 ++++++++++++++++++++--------------
6 files changed, 136 insertions(+), 58 deletions(-)
diff --git a/datafusion/core/src/prelude.rs b/datafusion/core/src/prelude.rs
index 3782feca19..7689468e5d 100644
--- a/datafusion/core/src/prelude.rs
+++ b/datafusion/core/src/prelude.rs
@@ -38,3 +38,8 @@ pub use datafusion_expr::{
logical_plan::{JoinType, Partitioning},
Expr,
};
+
+pub use std::ops::Not;
+pub use std::ops::{Add, Div, Mul, Neg, Rem, Sub};
+pub use std::ops::{BitAnd, BitOr, BitXor};
+pub use std::ops::{Shl, Shr};
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 0b166107fb..239a318850 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -1702,4 +1702,40 @@ mod test {
Ok(())
}
+
+ #[test]
+ fn test_logical_ops() {
+ assert_eq!(
+ format!("{}", lit(1u32).eq(lit(2u32))),
+ "UInt32(1) = UInt32(2)"
+ );
+ assert_eq!(
+ format!("{}", lit(1u32).not_eq(lit(2u32))),
+ "UInt32(1) != UInt32(2)"
+ );
+ assert_eq!(
+ format!("{}", lit(1u32).gt(lit(2u32))),
+ "UInt32(1) > UInt32(2)"
+ );
+ assert_eq!(
+ format!("{}", lit(1u32).gt_eq(lit(2u32))),
+ "UInt32(1) >= UInt32(2)"
+ );
+ assert_eq!(
+ format!("{}", lit(1u32).lt(lit(2u32))),
+ "UInt32(1) < UInt32(2)"
+ );
+ assert_eq!(
+ format!("{}", lit(1u32).lt_eq(lit(2u32))),
+ "UInt32(1) <= UInt32(2)"
+ );
+ assert_eq!(
+ format!("{}", lit(1u32).and(lit(2u32))),
+ "UInt32(1) AND UInt32(2)"
+ );
+ assert_eq!(
+ format!("{}", lit(1u32).or(lit(2u32))),
+ "UInt32(1) OR UInt32(2)"
+ );
+ }
}
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 79a43c2353..5368a2d8a2 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -31,6 +31,7 @@ use crate::{
};
use arrow::datatypes::DataType;
use datafusion_common::{Column, Result};
+use std::ops::Not;
use std::sync::Arc;
/// Create a column expression based on a qualified or unqualified column
name. Will
@@ -121,6 +122,11 @@ pub fn or(left: Expr, right: Expr) -> Expr {
))
}
+/// Return a new expression with a logical NOT
+pub fn not(expr: Expr) -> Expr {
+ expr.not()
+}
+
/// Create an expression to represent the min() aggregate function
pub fn min(expr: Expr) -> Expr {
Expr::AggregateFunction(AggregateFunction::new(
diff --git a/datafusion/expr/src/operator.rs b/datafusion/expr/src/operator.rs
index 112e29082d..1790f14789 100644
--- a/datafusion/expr/src/operator.rs
+++ b/datafusion/expr/src/operator.rs
@@ -363,6 +363,7 @@ impl ops::Neg for Expr {
}
}
+/// Support `NOT <expr>` fluent style
impl Not for Expr {
type Output = Self;
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 9aa84d49bc..3fa6c6091d 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -118,4 +118,4 @@ html_sidebars = {
myst_heading_anchors = 3
# enable nice rendering of checkboxes for the task lists
-myst_enable_extensions = [ "tasklist"]
+myst_enable_extensions = ["colon_fence", "deflist", "tasklist"]
diff --git a/docs/source/user-guide/expressions.md
b/docs/source/user-guide/expressions.md
index dbd8c814b4..28104dbfd4 100644
--- a/docs/source/user-guide/expressions.md
+++ b/docs/source/user-guide/expressions.md
@@ -22,60 +22,99 @@
DataFrame methods such as `select` and `filter` accept one or more logical
expressions and there are many functions
available for creating logical expressions. These are documented below.
-Expressions can be chained together using a fluent-style API:
+:::{tip}
+Most functions and methods may receive and return an `Expr`, which can be
chained together using a fluent-style API:
```rust
// create the expression `(a > 6) AND (b < 7)`
col("a").gt(lit(6)).and(col("b").lt(lit(7)))
```
+:::
+
## Identifiers
-| Function | Notes |
-| -------- | -------------------------------------------- |
-| col | Reference a column in a dataframe `col("a")` |
+| Syntax | Description |
+| ---------- | -------------------------------------------- |
+| col(ident) | Reference a column in a dataframe `col("a")` |
+
+:::{note}
+ident
+: A type which implement `Into<Column>` trait
+:::
## Literal Values
-| Function | Notes |
-| -------- | -------------------------------------------------- |
-| lit | Literal value such as `lit(123)` or `lit("hello")` |
+| Syntax | Description |
+| ---------- | -------------------------------------------------- |
+| lit(value) | Literal value such as `lit(123)` or `lit("hello")` |
+
+:::{note}
+value
+: A type which implement `Literal`
+:::
## Boolean Expressions
-| Function | Notes |
-| -------- | ----------------------------------------- |
-| and | `and(expr1, expr2)` or `expr1.and(expr2)` |
-| or | `or(expr1, expr2)` or `expr1.or(expr2)` |
-| not | `not(expr)` or `expr.not()` |
+| Syntax | Description |
+| ------------------- | ----------- |
+| and(x, y), x.and(y) | Logical AND |
+| or(x, y), x.or(y) | Logical OR |
+| !x, not(x), x.not() | Logical NOT |
+
+:::{note}
+`!` is a bitwise or logical complement operator in Rust, but it only works as
a logical NOT in expression API.
+:::
+
+:::{note}
+Since `&&` and `||` are existed as logical operators in Rust, but those are
not overloadable and not works with expression API.
+:::
-## Bitwise expressions
+## Bitwise Expressions
-| Function | Notes
|
-| ------------------- |
------------------------------------------------------------------------- |
-| bitwise_and | `bitwise_and(expr1, expr2)` or
`expr1.bitwise_and(expr2)` |
-| bitwise_or | `bitwise_or(expr1, expr2)` or
`expr1.bitwise_or(expr2)` |
-| bitwise_xor | `bitwise_xor(expr1, expr2)` or
`expr1.bitwise_xor(expr2)` |
-| bitwise_shift_right | `bitwise_shift_right(expr1, expr2)` or
`expr1.bitwise_shift_right(expr2)` |
-| bitwise_shift_left | `bitwise_shift_left(expr1, expr2)` or
`expr1.bitwise_shift_left(expr2)` |
+| Syntax | Description |
+| ------------------------------------------- | ----------- |
+| x & y, bitwise_and(x, y), x.bitand(y) | AND |
+| x \| y, bitwise_or(x, y), x.bitor(y) | OR |
+| x ^ y, bitwise_xor(x, y), x.bitxor(y) | XOR |
+| x << y, bitwise_shift_left(x, y), x.shl(y) | Left shift |
+| x >> y, bitwise_shift_right(x, y), x.shr(y) | Right shift |
## Comparison Expressions
-| Function | Notes |
-| -------- | --------------------- |
-| eq | `expr1.eq(expr2)` |
-| gt | `expr1.gt(expr2)` |
-| gt_eq | `expr1.gt_eq(expr2)` |
-| lt | `expr1.lt(expr2)` |
-| lt_eq | `expr1.lt_eq(expr2)` |
-| not_eq | `expr1.not_eq(expr2)` |
+| Syntax | Description |
+| ----------- | --------------------- |
+| x.eq(y) | Equal |
+| x.not_eq(y) | Not Equal |
+| x.gt(y) | Greater Than |
+| x.gt_eq(y) | Greater Than or Equal |
+| x.lt(y) | Less Than |
+| x.lt_eq(y) | Less Than or Equal |
+
+:::{note}
+Comparison operators (`<`, `<=`, `==`, `>=`, `>`) could be overloaded by the
`PartialOrd` and `PartialEq` trait in Rust,
+but these operators always return a `bool` which makes them not work with the
expression API.
+:::
+
+## Arithmetic Expressions
+
+| Syntax | Description |
+| ---------------- | -------------- |
+| x + y, x.add(y) | Addition |
+| x - y, x.sub(y) | Subtraction |
+| x \* y, x.mul(y) | Multiplication |
+| x / y, x.div(y) | Division |
+| x % y, x.rem(y) | Remainder |
+| -x, x.neg() | Negation |
+
+:::{note}
+In Rust, the keyword `mod` is reserved and cannot be used as an identifier.
+To avoid any conflicts and ensure code completion works smoothly, we use
`mod_` instead.
+:::
## Math Functions
-In addition to the math functions listed here, some Rust operators are
implemented for expressions, allowing
-expressions such as `col("a") + col("b")` to be used.
-
-| Function | Notes |
+| Syntax | Description |
| --------------------- | ------------------------------------------------- |
| abs(x) | absolute value |
| acos(x) | inverse cosine |
@@ -114,11 +153,10 @@ expressions such as `col("a") + col("b")` to be used.
| tanh(x) | hyperbolic tangent |
| trunc(x) | truncate toward zero |
-### Math functions usage notes:
-
+:::{note}
Unlike to some databases the math functions in Datafusion works the same way
as Rust math functions, avoiding failing on corner cases e.g
-```
+```sql
❯ select log(-1), log(0), sqrt(-1);
+----------------+---------------+-----------------+
| log(Int64(-1)) | log(Int64(0)) | sqrt(Int64(-1)) |
@@ -127,27 +165,19 @@ Unlike to some databases the math functions in Datafusion
works the same way as
+----------------+---------------+-----------------+
```
-## Bitwise Operators
-
-| Operator | Notes |
-| -------- | ----------------------------------------------- |
-| & | Bitwise AND => `(expr1 & expr2)` |
-| | | Bitwise OR => <code>(expr1 | expr2)</code> |
-| # | Bitwise XOR => `(expr1 # expr2)` |
-| << | Bitwise left shift => `(expr1 << expr2)` |
-| >> | Bitwise right shift => `(expr1 << expr2)` |
+:::
## Conditional Expressions
-| Function | Notes
[...]
-| -------- |
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
[...]
-| coalesce | Returns the first of its arguments that is not null. Null is
returned only if all arguments are null. It is often used to substitute a
default value for null values when data is retrieved for display.
[...]
-| case | CASE expression. The expression may chain multiple `when`
expressions and end with an `end` or `otherwise` expression. Example:</br>
<pre><code>case(col("a") % lit(3))</br> .when(lit(0),
lit("A"))</br> .when(lit(1),
lit("B"))</br> .when(lit(2),
lit("C"))</br> .end()</code></pre>or, end with
`otherwise` to match any other conditions:
<pre><code>case(col("b").gt(lit(100)))</br> &nb [...]
-| nullif | Returns a null value if `value1` equals `value2`; otherwise it
returns `value1`. This can be used to perform the inverse operation of the
`coalesce` expression.
[...]
+| Syntax
| Description
[...]
+|
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
[...]
+| coalesce([value, ...])
| Returns the first of its arguments that is not
null. Null is returned only if all arguments are null. It is often used to
substitute a default value for null values when data is retrieved for display.
[...]
+|
case(expr)</br> .when(expr)</br> .end(),</br>case(expr)</br> .when(expr)</br> .otherwise(expr)
| CASE expression. The expression may chain multiple `when` expressions and
end with an `end` or `otherwise` expression. Example:</br>
<pre><code>case(col("a") % lit(3))</br> .when(lit(0),
lit("A"))</br> .when(lit(1),
lit("B"))</br> . [...]
+| nullif(value1, value2)
| Returns a null value if `value1` equals
`value2`; otherwise it returns `value1`. This can be used to perform the
inverse operation of the `coalesce` expression.
[...]
## String Expressions
-| Function | Notes
|
+| Syntax | Description
|
| ---------------------------------------------- |
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
| ascii(character) | Returns a numeric
representation of the character (`character`). Example: `ascii('a') -> 97`
|
| bit_length(text) | Returns the length of the
string (`text`) in bits. Example: `bit_length('spider') -> 48`
|
@@ -182,7 +212,7 @@ Unlike to some databases the math functions in Datafusion
works the same way as
## Array Expressions
-| Function | Notes
|
+| Syntax | Description
|
| ------------------------------------- |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
| array_append(array, element) | Appends an element to the end of an
array. `array_append([1, 2, 3], 4) -> [1, 2, 3, 4]`
|
| array_concat(array[, ..., array_n]) | Concatenates arrays.
`array_concat([1, 2, 3], [4, 5, 6]) -> [1, 2, 3, 4, 5, 6]`
|
@@ -213,14 +243,14 @@ Unlike to some databases the math functions in Datafusion
works the same way as
## Regular Expressions
-| Function | Notes
|
+| Syntax | Description
|
| -------------- |
----------------------------------------------------------------------------- |
| regexp_match | Matches a regular expression against a string and returns
matched substrings. |
| regexp_replace | Replaces strings that match a regular expression
|
## Temporal Expressions
-| Function | Notes
|
+| Syntax | Description
|
| -------------------- |
------------------------------------------------------ |
| date_part | Extracts a subfield from the date.
|
| date_trunc | Truncates the date to a specified level of precision.
|
@@ -233,7 +263,7 @@ Unlike to some databases the math functions in Datafusion
works the same way as
## Other Expressions
-| Function | Notes
|
+| Syntax | Description
|
| ---------------------------- |
----------------------------------------------------------------------------------------------------------
|
| array([value1, ...]) | Returns an array of fixed size with each
argument (`[value1, ...]`) on it. |
| in_list(expr, list, negated) | Returns `true` if (`expr`) belongs or not
belongs (`negated`) to a list (`list`), otherwise returns false. |
@@ -246,7 +276,7 @@ Unlike to some databases the math functions in Datafusion
works the same way as
## Aggregate Functions
-| Function | Notes
|
+| Syntax |
Description
|
| ----------------------------------------------------------------- |
---------------------------------------------------------------------------------------
|
| avg(expr) |
Сalculates the average value for `expr`.
|
| approx_distinct(expr) |
Calculates an approximate count of the number of distinct values for `expr`.
|
@@ -270,7 +300,7 @@ Unlike to some databases the math functions in Datafusion
works the same way as
## Subquery Expressions
-| Function | Notes
|
+| Syntax | Description
|
| --------------- |
---------------------------------------------------------------------------------------------
|
| exists | Creates an `EXISTS` subquery expression
|
| in_subquery | `df1.filter(in_subquery(col("foo"), df2))?` is the
equivalent of the SQL `WHERE foo IN <df2>` |
@@ -280,7 +310,7 @@ Unlike to some databases the math functions in Datafusion
works the same way as
## User-Defined Function Expressions
-| Function | Notes
|
+| Syntax | Description
|
| ----------- |
------------------------------------------------------------------------- |
| create_udf | Creates a new UDF with a specific signature and specific
return type. |
| create_udaf | Creates a new UDAF with a specific signature, state type and
return type. |