mbrobbel commented on code in PR #8438:
URL: https://github.com/apache/arrow-rs/pull/8438#discussion_r2377953399


##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -809,17 +800,11 @@ fn typed_value_to_variant(typed_value: &ArrayRef, index: 
usize) -> Variant<'_, '
             let date = Date32Type::to_naive_date(value);
             Variant::from(date)
         }
-        DataType::FixedSizeBinary(binary_len) => {
+        // 16-byte FixedSizeBinary is alway corresponds to a UUID; all other 
sizes are illegal.

Review Comment:
   ```suggestion
           // 16-byte FixedSizeBinary always corresponds to a UUID; all other 
sizes are illegal.
   ```



##########
parquet-variant-compute/src/variant_array.rs:
##########
@@ -891,28 +864,116 @@ fn typed_value_to_variant(typed_value: &ArrayRef, index: 
usize) -> Variant<'_, '
 ///
 /// So cast them to get the right type.
 fn cast_to_binary_view_arrays(array: &dyn Array) -> Result<ArrayRef, 
ArrowError> {
-    let new_type = rewrite_to_view_types(array.data_type());
-    cast(array, &new_type)
+    let new_type = canonicalize_and_verify_data_type(array.data_type())?;
+    cast(array, new_type.as_ref())
+}
+
+/// Validates whether a given arrow decimal is a valid variant decimal
+///
+/// NOTE: By a strict reading of the "decimal table" in the [shredding spec], 
each decimal type
+/// should have a lower bound on precision as well as an upper bound (i.e. 
Decimal16 with precision
+/// 5 is invalid because Decimal4 can cover it). But the variant shredding 
integration tests
+/// specifically expect such cases to succeed, so we only enforce the upper 
bound here.
+///
+/// [shredding spec]: 
https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#encoding-types
+fn is_valid_variant_decimal(p: &u8, s: &i8, max_precision: u8) -> bool {
+    *p <= max_precision && (0..=*p as i8).contains(s)
 }
 
 /// replaces all instances of Binary with BinaryView in a DataType
-fn rewrite_to_view_types(data_type: &DataType) -> DataType {
-    match data_type {
-        DataType::Binary => DataType::BinaryView,
-        DataType::List(field) => DataType::List(rewrite_field_type(field)),
-        DataType::Struct(fields) => {
-            DataType::Struct(fields.iter().map(rewrite_field_type).collect())
-        }
-        _ => data_type.clone(),
+fn canonicalize_and_verify_data_type(

Review Comment:
   I think this comment should be updated?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to