rdblue commented on code in PR #6128:
URL: https://github.com/apache/iceberg/pull/6128#discussion_r1022181916
##########
python/tests/test_transforms.py:
##########
@@ -506,3 +532,345 @@ def test_datetime_transform_str(transform, transform_str):
)
def test_datetime_transform_repr(transform, transform_repr):
assert repr(transform) == transform_repr
+
+
[email protected]
+def bound_reference_str() -> BoundReference:
+ return BoundReference(field=NestedField(1, "field", StringType(),
required=False), accessor=Accessor(position=0, inner=None))
+
+
[email protected]
+def bound_reference_date() -> BoundReference:
+ return BoundReference(field=NestedField(1, "field", DateType(),
required=False), accessor=Accessor(position=0, inner=None))
+
+
[email protected]
+def bound_reference_timestamp() -> BoundReference:
+ return BoundReference(
+ field=NestedField(1, "field", TimestampType(), required=False),
accessor=Accessor(position=0, inner=None)
+ )
+
+
[email protected]
+def bound_reference_decimal() -> BoundReference:
+ return BoundReference(
+ field=NestedField(1, "field", DecimalType(8, 2), required=False),
accessor=Accessor(position=0, inner=None)
+ )
+
+
[email protected]
+def bound_reference_long() -> BoundReference:
+ return BoundReference(
+ field=NestedField(1, "field", DecimalType(8, 2), required=False),
accessor=Accessor(position=0, inner=None)
+ )
+
+
+def test_projection_bucket_unary(bound_reference_str: BoundReference) -> None:
+ assert BucketTransform(2).project("name",
BoundNotNull(term=bound_reference_str)) == NotNull(term=Reference(name="name"))
+
+
+def test_projection_bucket_literal(bound_reference_str: BoundReference) ->
None:
+ assert BucketTransform(2).project("name",
BoundEqualTo(term=bound_reference_str, literal=literal("data"))) == EqualTo(
+ term="name", literal=literal(1)
+ )
+
+
+def test_projection_bucket_set_same_bucket(bound_reference_str:
BoundReference) -> None:
+ assert BucketTransform(2).project(
+ "name", BoundIn(term=bound_reference_str, literals={literal("hello"),
literal("world")})
+ ) == EqualTo(term="name", literal=literal(1))
+
+
+def test_projection_bucket_set_in(bound_reference_str: BoundReference) -> None:
+ assert BucketTransform(3).project(
+ "name", BoundIn(term=bound_reference_str, literals={literal("hello"),
literal("world")})
+ ) == In(term="name", literals={literal(1), literal(2)})
+
+
+def test_projection_bucket_set_not_in(bound_reference_str: BoundReference) ->
None:
+ assert (
+ BucketTransform(3).project("name",
BoundNotIn(term=bound_reference_str, literals={literal("hello"),
literal("world")}))
+ is None
+ )
+
+
+def test_projection_year_unary(bound_reference_date: BoundReference) -> None:
+ assert YearTransform().project("name",
BoundNotNull(term=bound_reference_date)) == NotNull(term="name")
+
+
+def test_projection_year_literal(bound_reference_date: BoundReference) -> None:
+ assert YearTransform().project("name",
BoundEqualTo(term=bound_reference_date, literal=DateLiteral(1925))) == EqualTo(
+ term="name", literal=literal(5)
+ )
+
+
+def test_projection_year_set_same_year(bound_reference_date: BoundReference)
-> None:
+ assert YearTransform().project(
+ "name", BoundIn(term=bound_reference_date,
literals={DateLiteral(1925), DateLiteral(1926)})
+ ) == EqualTo(term="name", literal=literal(5))
+
+
+def test_projection_year_set_in(bound_reference_date: BoundReference) -> None:
+ assert YearTransform().project(
+ "name", BoundIn(term=bound_reference_date,
literals={DateLiteral(1925), DateLiteral(2925)})
+ ) == In(term="name", literals={literal(8), literal(5)})
+
+
+def test_projection_year_set_not_in(bound_reference_date: BoundReference) ->
None:
+ assert (
+ YearTransform().project("name", BoundNotIn(term=bound_reference_date,
literals={DateLiteral(1925), DateLiteral(2925)}))
+ is None
+ )
+
+
+def test_projection_month_unary(bound_reference_date: BoundReference) -> None:
+ assert MonthTransform().project("name",
BoundNotNull(term=bound_reference_date)) == NotNull(term="name")
+
+
+def test_projection_month_literal(bound_reference_date: BoundReference) ->
None:
+ assert MonthTransform().project("name",
BoundEqualTo(term=bound_reference_date, literal=DateLiteral(1925))) == EqualTo(
+ term="name", literal=literal(63)
+ )
+
+
+def test_projection_month_set_same_month(bound_reference_date: BoundReference)
-> None:
+ assert MonthTransform().project(
+ "name", BoundIn(term=bound_reference_date,
literals={DateLiteral(1925), DateLiteral(1926)})
+ ) == EqualTo(term="name", literal=literal(63))
+
+
+def test_projection_month_set_in(bound_reference_date: BoundReference) -> None:
+ assert MonthTransform().project(
+ "name", BoundIn(term=bound_reference_date,
literals={DateLiteral(1925), DateLiteral(2925)})
+ ) == In(term="name", literals={literal(96), literal(63)})
+
+
+def test_projection_day_month_not_in(bound_reference_date: BoundReference) ->
None:
+ assert (
+ MonthTransform().project("name", BoundNotIn(term=bound_reference_date,
literals={DateLiteral(1925), DateLiteral(2925)}))
+ is None
+ )
+
+
+def test_projection_day_unary(bound_reference_timestamp) -> None:
+ assert DayTransform().project("name",
BoundNotNull(term=bound_reference_timestamp)) == NotNull(term="name")
+
+
+def test_projection_day_literal(bound_reference_timestamp) -> None:
+ assert DayTransform().project(
+ "name", BoundEqualTo(term=bound_reference_timestamp,
literal=TimestampLiteral(1667696874000))
+ ) == EqualTo(term="name", literal=literal(19))
+
+
+def test_projection_day_set_same_day(bound_reference_timestamp) -> None:
+ assert DayTransform().project(
+ "name",
+ BoundIn(term=bound_reference_timestamp,
literals={TimestampLiteral(1667696874001), TimestampLiteral(1667696874000)}),
+ ) == EqualTo(term="name", literal=literal(19))
+
+
+def test_projection_day_set_in(bound_reference_timestamp) -> None:
+ assert DayTransform().project(
+ "name",
+ BoundIn(term=bound_reference_timestamp,
literals={TimestampLiteral(1667696874001), TimestampLiteral(1567696874000)}),
+ ) == In(term="name", literals={literal(18), literal(19)})
+
+
+def test_projection_day_set_not_in(bound_reference_timestamp) -> None:
+ assert (
+ DayTransform().project(
+ "name",
+ BoundNotIn(term=bound_reference_timestamp,
literals={TimestampLiteral(1567696874), TimestampLiteral(1667696874)}),
+ )
+ is None
+ )
+
+
+def test_projection_day_human(bound_reference_date: BoundReference) -> None:
+ date_literal = literal(date(2018, 1, 1))
+ assert DayTransform().project("dt",
BoundEqualTo(term=bound_reference_date, literal=date_literal)) == EqualTo(
+ term="dt", literal=literal(17532)
+ ) # == 2018, 1, 1
+
+ assert DayTransform().project("dt",
BoundLessThanOrEqual(term=bound_reference_date, literal=date_literal)) ==
LessThanOrEqual(
+ term="dt", literal=literal(17532)
+ ) # <= 2018, 1, 1
+
+ assert DayTransform().project("dt",
BoundLessThan(term=bound_reference_date, literal=date_literal)) ==
LessThanOrEqual(
+ term="dt", literal=literal(17531)
+ ) # <= 2017, 12, 31
+
+ assert DayTransform().project(
+ "dt", BoundGreaterThanOrEqual(term=bound_reference_date,
literal=date_literal)
+ ) == GreaterThanOrEqual(
+ term="dt", literal=literal(17532)
+ ) # >= 2018, 1, 1
+
+ assert DayTransform().project("dt",
BoundGreaterThan(term=bound_reference_date, literal=date_literal)) ==
GreaterThanOrEqual(
+ term="dt", literal=literal(17533)
+ ) # >= 2018, 1, 2
+
+
+def test_projection_hour_unary(bound_reference_timestamp) -> None:
+ assert HourTransform().project("name",
BoundNotNull(term=bound_reference_timestamp)) == NotNull(term="name")
+
+
+def test_projection_hour_literal(bound_reference_timestamp) -> None:
+ assert HourTransform().project(
+ "name", BoundEqualTo(term=bound_reference_timestamp,
literal=TimestampLiteral(1667696874000))
Review Comment:
Minor: these tests use millisecond precision timestamps (Sun Nov 06 2022
01:07:54) but the actual representation is in microseconds. That's why the
resulting hours ordinal is small. The actual date/time is Tue Jan 20 1970
07:14:56. It would be better to always use micros even when supplying constants
so that future readers aren't confused.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]