alamb commented on code in PR #4742:
URL: https://github.com/apache/arrow-datafusion/pull/4742#discussion_r1059755285


##########
datafusion/core/src/physical_plan/file_format/parquet/page_filter.rs:
##########
@@ -381,15 +381,28 @@ macro_rules! get_min_max_values_for_page_index {
                     vec.iter().map(|x| x.$func().cloned()),
                 )))
             }
-            Index::BYTE_ARRAY(index) => {
-                let vec = &index.indexes;
-                let array: StringArray = vec
-                    .iter()
-                    .map(|x| x.$func())
-                    .map(|x| x.and_then(|x| std::str::from_utf8(x).ok()))
-                    .collect();
-                Some(Arc::new(array))
-            }
+            Index::BYTE_ARRAY(index) => match $self.target_type {

Review Comment:
   I wonder if this code is covered? Is there some test that does page 
filtering on Decimal values?



##########
datafusion/core/src/physical_plan/file_format/parquet/row_groups.rs:
##########
@@ -627,6 +638,56 @@ mod tests {
         );
 
         // TODO: BYTE_ARRAY support read decimal from parquet, after the 
20.0.0 arrow-rs release
+        // BYTE_ARRAY: c1 = decimal128(100000, 28, 3), the c1 is decimal(18,2)
+        // the type of parquet is decimal(18,2)
+        let schema =
+            Schema::new(vec![Field::new("c1", DataType::Decimal128(18, 2), 
false)]);
+        // cast the type of c1 to decimal(28,3)
+        let left = cast(col("c1"), DataType::Decimal128(28, 3));
+        let expr = left.eq(lit(ScalarValue::Decimal128(Some(100000), 28, 3)));
+        let schema_descr = get_test_schema_descr(vec![(
+            "c1",
+            PhysicalType::BYTE_ARRAY,
+            Some(LogicalType::Decimal {
+                scale: 2,
+                precision: 18,
+            }),
+            Some(18),
+            Some(2),
+            Some(16),
+        )]);
+        let pruning_predicate =
+            PruningPredicate::try_new(expr, Arc::new(schema)).unwrap();
+        // we must use the big-endian when encode the i128 to bytes or vec[u8].
+        let rgm1 = get_row_group_meta_data(
+            &schema_descr,
+            vec![ParquetStatistics::byte_array(
+                // 5.00
+                Some(ByteArray::from(500i128.to_be_bytes().to_vec())),
+                // 80.00
+                Some(ByteArray::from(8000i128.to_be_bytes().to_vec())),
+                None,
+                0,
+                false,
+            )],
+        );
+        let rgm2 = get_row_group_meta_data(

Review Comment:
   I wonder if it is worth adding a test for row group metadata that has nulls 
for min and / or max?



##########
datafusion/core/src/physical_plan/file_format/parquet/row_groups.rs:
##########
@@ -627,6 +638,56 @@ mod tests {
         );
 
         // TODO: BYTE_ARRAY support read decimal from parquet, after the 
20.0.0 arrow-rs release

Review Comment:
   this comment can probably be removed as well



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to