This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new bc2aa007 Support arbitrary literal in BETWEEN operator (#2567)
bc2aa007 is described below

commit bc2aa00721652658b9b042b7a7d837bb7b547261
Author: jtuglu1 <[email protected]>
AuthorDate: Sun Oct 5 12:29:03 2025 -0700

    Support arbitrary literal in BETWEEN operator (#2567)
    
    <!--
    Thanks for opening a pull request!
    -->
    
    <!-- In the case this PR will resolve an issue, please replace
    ${GITHUB_ISSUE_ID} below with the actual Github issue id. -->
    <!-- Closes #${GITHUB_ISSUE_ID} -->
    
    # Rationale for this change
    
    Want to support calling BETWEEN for any valid column types, not just
    numeric columns. This extends support for filter expressions like
    `date_col BETWEEN '2025-01-01' AND '2025-01-02'`.
    
    The `test_invalid_between` test was removed in favor of letting the type
    checks happen at evaluation time (when the literals are attempted to be
    cast to the corresponding column's type for comparison).
    
    ## Are these changes tested?
    
    Yes, tested locally applying filters to tables.
    
    ## Are there any user-facing changes?
    
    Yes, `BETWEEN` operator signature has changed, it nows supports all
    comparable column types.
    
    <!-- In the case of user-facing changes, please add the changelog label.
    -->
---
 mkdocs/docs/row-filter-syntax.md |  4 +++-
 pyiceberg/expressions/parser.py  |  3 +--
 tests/expressions/test_parser.py | 26 ++++++++++++++------------
 3 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/mkdocs/docs/row-filter-syntax.md b/mkdocs/docs/row-filter-syntax.md
index ce3b46c0..bffb97c2 100644
--- a/mkdocs/docs/row-filter-syntax.md
+++ b/mkdocs/docs/row-filter-syntax.md
@@ -102,11 +102,13 @@ column NOT LIKE 'prefix%'
 
 ## BETWEEN
 
-The BETWEEN operator filters a numeric value against an inclusive range, e.g. 
`a between 1 and 2` is equivalent to `a >= 1 and a <= 2`.
+The BETWEEN operator filters a column against an inclusive range of two 
comparable literals, e.g. `a between 1 and 2` is equivalent to `a >= 1 and a <= 
2`.
 
 ```sql
 column BETWEEN 1 AND 2
 column BETWEEN 1.0 AND 2.0
+column BETWEEN '2025-01-01' AND '2025-01-02'
+column BETWEEN '2025-01-01T00:00:00.000000' AND '2025-01-02T12:00:00.000000'
 ```
 
 ## Logical Operations
diff --git a/pyiceberg/expressions/parser.py b/pyiceberg/expressions/parser.py
index 19663638..60846791 100644
--- a/pyiceberg/expressions/parser.py
+++ b/pyiceberg/expressions/parser.py
@@ -107,7 +107,6 @@ boolean = one_of(["true", "false"], 
caseless=True).set_results_name("boolean")
 string = sgl_quoted_string.set_results_name("raw_quoted_string")
 decimal = common.real().set_results_name("decimal")
 integer = common.signed_integer().set_results_name("integer")
-number = common.number().set_results_name("number")
 literal = Group(string | decimal | integer | 
boolean).set_results_name("literal")
 literal_set = Group(
     DelimitedList(string) | DelimitedList(decimal) | DelimitedList(integer) | 
DelimitedList(boolean)
@@ -151,7 +150,7 @@ comparison_op = one_of(["<", "<=", ">", ">=", "=", "==", 
"!=", "<>"], caseless=T
 left_ref = column + comparison_op + literal
 right_ref = literal + comparison_op + column
 comparison = left_ref | right_ref
-between = column + BETWEEN + number + AND + number
+between = column + BETWEEN + literal + AND + literal
 
 
 @between.set_parse_action
diff --git a/tests/expressions/test_parser.py b/tests/expressions/test_parser.py
index 152ac03e..28d7cf11 100644
--- a/tests/expressions/test_parser.py
+++ b/tests/expressions/test_parser.py
@@ -42,7 +42,7 @@ from pyiceberg.expressions import (
     Reference,
     StartsWith,
 )
-from pyiceberg.expressions.literals import DecimalLiteral, LongLiteral
+from pyiceberg.expressions.literals import DecimalLiteral, LongLiteral, literal
 
 
 def test_always_true() -> None:
@@ -241,7 +241,8 @@ def test_quoted_column_with_spaces() -> None:
     assert EqualTo("Foo Bar", "data") == parser.parse("\"Foo Bar\" = 'data'")
 
 
-def test_valid_between() -> None:
+def test_valid_between_with_numerics() -> None:
+    # numerics
     assert And(
         left=GreaterThanOrEqual(Reference(name="foo"), LongLiteral(1)),
         right=LessThanOrEqual(Reference(name="foo"), LongLiteral(3)),
@@ -254,16 +255,17 @@ def test_valid_between() -> None:
         left=GreaterThanOrEqual(Reference(name="foo"), 
DecimalLiteral(Decimal(1.0))),
         right=LessThanOrEqual(Reference(name="foo"), 
DecimalLiteral(Decimal(4.0))),
     ) == parser.parse("foo between 1.0 and 4.0")
-    assert parser.parse("foo between 1 and 3") == parser.parse("1 <= foo and 
foo <= 3")
 
+    # dates
+    assert And(
+        left=GreaterThanOrEqual(Reference(name="foo"), literal("2025-05-10")),
+        right=LessThanOrEqual(Reference(name="foo"), literal("2025-05-12")),
+    ) == parser.parse("foo between '2025-05-10' and '2025-05-12'")
 
-def test_invalid_between() -> None:
-    # boolean
-    with pytest.raises(ParseException) as exc_info:
-        parser.parse("foo between true and false")
-    assert "Expected number, found 'true'" in str(exc_info)
+    # timestamps
+    assert And(
+        left=GreaterThanOrEqual(Reference(name="foo"), 
literal("2025-01-01T00:00:00.000000")),
+        right=LessThanOrEqual(Reference(name="foo"), 
literal("2025-01-10T12:00:00.000000")),
+    ) == parser.parse("foo between '2025-01-01T00:00:00.000000' and 
'2025-01-10T12:00:00.000000'")
 
-    # string
-    with pytest.raises(ParseException) as exc_info:
-        parser.parse("foo between 'a' and 'b'")
-    assert 'Expected number, found "\'"' in str(exc_info)
+    assert parser.parse("foo between 1 and 3") == parser.parse("1 <= foo and 
foo <= 3")

Reply via email to