rdblue commented on code in PR #6714:
URL: https://github.com/apache/iceberg/pull/6714#discussion_r1110160350
##########
python/tests/expressions/test_visitors.py:
##########
@@ -1486,3 +1492,211 @@ def test_dnf_to_dask(table_schema_simple: Schema) ->
None:
),
)
assert expression_to_plain_format(expr) == [[("foo", ">", "hello")],
[("bar", "in", {1, 2, 3}), ("baz", "==", True)]]
+
+
[email protected]
+def schema_data_file() -> Schema:
+ return Schema(
+ NestedField(1, "all_nan", DoubleType(), required=True),
+ NestedField(2, "max_nan", DoubleType(), required=True),
+ NestedField(3, "min_max_nan", FloatType(), required=False),
+ NestedField(4, "all_nan_null_bounds", DoubleType(), required=True),
+ NestedField(5, "some_nan_correct_bounds", FloatType(), required=False),
+ )
+
+
[email protected]
+def data_file() -> DataFile:
+ return DataFile(
+ file_path="file.avro",
+ file_format=FileFormat.PARQUET,
+ partition={},
+ record_count=50,
+ file_size_in_bytes=3,
+ column_sizes={
+ 1: 10,
+ 2: 10,
+ 3: 10,
+ 4: 10,
+ 5: 10,
+ },
+ value_counts={
+ 1: 10,
+ 2: 10,
+ 3: 10,
+ 4: 10,
+ 5: 10,
+ },
+ null_value_counts={
+ 1: 0,
+ 2: 0,
+ 3: 0,
+ 4: 0,
+ 5: 0,
+ },
+ nan_value_counts={1: 10, 4: 10, 5: 5},
+ lower_bounds={
+ 1: to_bytes(DoubleType(), float("nan")),
+ 2: to_bytes(DoubleType(), 7),
+ 3: to_bytes(FloatType(), float("nan")),
+ 5: to_bytes(FloatType(), 7),
+ },
+ upper_bounds={
+ 1: to_bytes(DoubleType(), float("nan")),
+ 2: to_bytes(DoubleType(), float("nan")),
+ 3: to_bytes(FloatType(), float("nan")),
+ 5: to_bytes(FloatType(), 22),
+ },
+ )
+
+
+def
test_inclusive_metrics_evaluator_less_than_and_less_than_equal(schema_data_file:
Schema, data_file: DataFile) -> None:
Review Comment:
Minor, but I think it makes sense to keep tests like these in separate
files. That way they're easier to find and run. There's not much of a downside
to having tests spread across files, too.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]