Re: [PR] Update like statements to reflect sql behaviors [iceberg-python]

via GitHub Sat, 21 Oct 2023 15:27:52 -0700


Fokko commented on code in PR #91:
URL: https://github.com/apache/iceberg-python/pull/91#discussion_r1367803748



##########
tests/expressions/test_parser.py:
##########
@@ -168,12 +168,30 @@ def test_multiple_and_or() -> None:
     ) == parser.parse("foo is not null and foo < 5 or (foo > 10 and foo < 100 
and bar is null)")
 
 
+def test_like_equality() -> None:
+    assert EqualTo("foo", "data") == parser.parse("foo LIKE 'data'")
+    assert EqualTo("foo", "data%") == parser.parse("foo LIKE 'data\\%'")
+
+
 def test_starts_with() -> None:
-    assert StartsWith("foo", "data") == parser.parse("foo LIKE 'data'")
+    assert StartsWith("foo", "data") == parser.parse("foo LIKE 'data%'")
+    assert StartsWith("foo", "some % data") == parser.parse("foo LIKE 'some 
\\% data%'")
+    assert StartsWith("foo", "some data%") == parser.parse("foo LIKE 'some 
data\\%%'")
+
+
+def test_invalid_likes() -> None:
+    invalid_statements = ["foo LIKE '%data%'", "foo LIKE 'da%ta'" "foo LIKE 
'%data'"]
+
+    for statement in invalid_statements:
+        with pytest.raises(ValueError) as exc_info:
+            parser.parse(statement)
+
+        assert "LIKE expressions only supports wildcard, '%', at the end of a 
string" in str(exc_info)

Review Comment:
   Nice one, this was one of my concerns 👍 



##########
tests/expressions/test_parser.py:
##########
@@ -168,12 +168,30 @@ def test_multiple_and_or() -> None:
     ) == parser.parse("foo is not null and foo < 5 or (foo > 10 and foo < 100 
and bar is null)")
 
 
+def test_like_equality() -> None:
+    assert EqualTo("foo", "data") == parser.parse("foo LIKE 'data'")
+    assert EqualTo("foo", "data%") == parser.parse("foo LIKE 'data\\%'")
+
+
 def test_starts_with() -> None:
-    assert StartsWith("foo", "data") == parser.parse("foo LIKE 'data'")
+    assert StartsWith("foo", "data") == parser.parse("foo LIKE 'data%'")
+    assert StartsWith("foo", "some % data") == parser.parse("foo LIKE 'some 
\\% data%'")
+    assert StartsWith("foo", "some data%") == parser.parse("foo LIKE 'some 
data\\%%'")
+
+
+def test_invalid_likes() -> None:
+    invalid_statements = ["foo LIKE '%data%'", "foo LIKE 'da%ta'" "foo LIKE 
'%data'"]
+
+    for statement in invalid_statements:
+        with pytest.raises(ValueError) as exc_info:
+            parser.parse(statement)
+
+        assert "LIKE expressions only supports wildcard, '%', at the end of a 
string" in str(exc_info)

Review Comment:
   Nice one, this was one of my concerns 👍 



##########
pyiceberg/expressions/parser.py:
##########
@@ -217,12 +219,25 @@ def _(result: ParseResults) -> BooleanExpression:
 
 @starts_with.set_parse_action
 def _(result: ParseResults) -> BooleanExpression:
-    return StartsWith(result.column, result.raw_quoted_string)
+    return _evaluate_like_statement(result)
 
 
 @not_starts_with.set_parse_action
 def _(result: ParseResults) -> BooleanExpression:
-    return NotStartsWith(result.column, result.raw_quoted_string)
+    return _evaluate_like_statement(result).__invert__()

Review Comment:
   The `__function__` of not used explicitly in Python. For example, instead of 
`__str__()`, it is more pythonic to use `str(obj)`.
   
   ```suggestion
       return ~_evaluate_like_statement(result)
   ```



##########
tests/expressions/test_parser.py:
##########
@@ -168,12 +168,30 @@ def test_multiple_and_or() -> None:
     ) == parser.parse("foo is not null and foo < 5 or (foo > 10 and foo < 100 
and bar is null)")
 
 
+def test_like_equality() -> None:
+    assert EqualTo("foo", "data") == parser.parse("foo LIKE 'data'")
+    assert EqualTo("foo", "data%") == parser.parse("foo LIKE 'data\\%'")
+
+
 def test_starts_with() -> None:
-    assert StartsWith("foo", "data") == parser.parse("foo LIKE 'data'")
+    assert StartsWith("foo", "data") == parser.parse("foo LIKE 'data%'")
+    assert StartsWith("foo", "some % data") == parser.parse("foo LIKE 'some 
\\% data%'")
+    assert StartsWith("foo", "some data%") == parser.parse("foo LIKE 'some 
data\\%%'")
+
+
+def test_invalid_likes() -> None:
+    invalid_statements = ["foo LIKE '%data%'", "foo LIKE 'da%ta'" "foo LIKE 
'%data'"]

Review Comment:
   ```suggestion
       invalid_statements = ["foo LIKE '%data%'", "foo LIKE 'da%ta'", "foo LIKE 
'%data'"]
   ```



##########
pyiceberg/expressions/parser.py:
##########
@@ -78,6 +78,8 @@
 identifier = Word(alphas, alphanums + "_$").set_results_name("identifier")
 column = DelimitedList(identifier, delim=".", 
combine=False).set_results_name("column")
 
+like_regex = r'(?P<valid_wildcard>(?<!\\)%$)|(?P<invalid_wildcard>(?<!\\)%)'

Review Comment:
   My first choice would not be a regex, but it works well 👍 



##########
pyiceberg/expressions/parser.py:
##########
@@ -78,6 +78,8 @@
 identifier = Word(alphas, alphanums + "_$").set_results_name("identifier")
 column = DelimitedList(identifier, delim=".", 
combine=False).set_results_name("column")
 
+like_regex = r'(?P<valid_wildcard>(?<!\\)%$)|(?P<invalid_wildcard>(?<!\\)%)'

Review Comment:
   My first choice would not be a regex, but it works well 👍 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Update like statements to reflect sql behaviors [iceberg-python]

Reply via email to