alamb commented on code in PR #12503:
URL: https://github.com/apache/datafusion/pull/12503#discussion_r1764905210
##########
datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs:
##########
@@ -264,8 +264,12 @@ impl PruningStatistics for BloomFilterStatistics {
.iter()
.map(|value| {
match value {
- ScalarValue::Utf8(Some(v)) => sbbf.check(&v.as_str()),
- ScalarValue::Binary(Some(v)) => sbbf.check(v),
+ ScalarValue::Utf8(Some(v)) |
ScalarValue::Utf8View(Some(v)) => {
+ sbbf.check(&v.as_str())
+ }
+ ScalarValue::Binary(Some(v)) |
ScalarValue::BinaryView(Some(v)) => {
+ sbbf.check(v)
+ }
Review Comment:
I double checked that the tests cover this code like this:
```diff
---
a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
+++
b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs
@@ -264,9 +264,12 @@ impl PruningStatistics for BloomFilterStatistics {
.iter()
.map(|value| {
match value {
- ScalarValue::Utf8(Some(v)) |
ScalarValue::Utf8View(Some(v)) => {
+ ScalarValue::Utf8(Some(v)) => {
sbbf.check(&v.as_str())
}
+ ScalarValue::Utf8View(Some(v)) => {
+ panic!("String view bloom filter not implemented
yet");
+ }
ScalarValue::Binary(Some(v)) |
ScalarValue::BinaryView(Some(v)) => {
sbbf.check(v)
}
@@ -1439,6 +1442,7 @@ mod tests {
}
}
+
```
```
cargo test -p datafusion -- row_group_filter
running 21 tests
test
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_multiple_expr
... ok
test
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_missing_stats
... ok
test
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_simple_expr
... ok
test
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_sql_in
... ok
test
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_file_schema
... ok
test
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_simple_expr
... ok
test
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_decimal_type2
... ok
test
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_decimal_type
... ok
test
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_decimal_type3
... ok
test
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_null_expr
... ok
test
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_partial_expr
... ok
test
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_decimal_type4
... ok
test
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_decimal_type5
... ok
test
datasource::physical_plan::parquet::row_group_filter::tests::row_group_pruning_predicate_eq_null_expr
... ok
test
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_with_exists_2_values
... ok
test
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_with_exists_3_values
... ok
test
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_with_exists_value
... ok
test
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_with_or_not_eq
... ok
test
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_with_exists_3_values_view
... FAILED
test
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_multiple_expr_view
... FAILED
test
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_without_bloom_filter
... ok
failures:
----
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_with_exists_3_values_view
stdout ----
thread
'datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_with_exists_3_values_view'
panicked at
datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs:271:25:
String bloom filter not implemented yet
----
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_multiple_expr_view
stdout ----
thread
'datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_multiple_expr_view'
panicked at
datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs:271:25:
String bloom filter not implemented yet
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
failures:
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_multiple_expr_view
datasource::physical_plan::parquet::row_group_filter::tests::test_row_group_bloom_filter_pruning_predicate_with_exists_3_values_view
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]