scovich commented on code in PR #8233:
URL: https://github.com/apache/arrow-rs/pull/8233#discussion_r2307773304


##########
parquet-variant-compute/src/cast_to_variant.rs:
##########
@@ -46,72 +46,81 @@ use parquet_variant::{
     Variant, VariantBuilder, VariantDecimal16, VariantDecimal4, 
VariantDecimal8,
 };
 
-fn convert_timestamp(
+fn convert_timestamp_with_options(
     time_unit: &TimeUnit,
     time_zone: &Option<Arc<str>>,
     input: &dyn Array,
     builder: &mut VariantArrayBuilder,
-) {
+    strict: bool,
+) -> Result<(), ArrowError> {
     let native_datetimes: Vec<Option<NaiveDateTime>> = match time_unit {
         arrow_schema::TimeUnit::Second => {
             let ts_array = input
                 .as_any()
                 .downcast_ref::<TimestampSecondArray>()
                 .expect("Array is not TimestampSecondArray");
-
-            ts_array
-                .iter()
-                .map(|x| x.map(|y| timestamp_s_to_datetime(y).unwrap()))
-                .collect()
+            timestamp_to_variant_timestamp!(ts_array, timestamp_s_to_datetime, 
"seconds", strict)
         }
         arrow_schema::TimeUnit::Millisecond => {
             let ts_array = input
                 .as_any()
                 .downcast_ref::<TimestampMillisecondArray>()
                 .expect("Array is not TimestampMillisecondArray");
-
-            ts_array
-                .iter()
-                .map(|x| x.map(|y| timestamp_ms_to_datetime(y).unwrap()))
-                .collect()
+            timestamp_to_variant_timestamp!(
+                ts_array,
+                timestamp_ms_to_datetime,
+                "milliseconds",
+                strict
+            )
         }
         arrow_schema::TimeUnit::Microsecond => {
             let ts_array = input
                 .as_any()
                 .downcast_ref::<TimestampMicrosecondArray>()
                 .expect("Array is not TimestampMicrosecondArray");
-            ts_array
-                .iter()
-                .map(|x| x.map(|y| timestamp_us_to_datetime(y).unwrap()))
-                .collect()
+            timestamp_to_variant_timestamp!(
+                ts_array,
+                timestamp_us_to_datetime,
+                "microseconds",
+                strict
+            )
         }
         arrow_schema::TimeUnit::Nanosecond => {
             let ts_array = input
                 .as_any()
                 .downcast_ref::<TimestampNanosecondArray>()
                 .expect("Array is not TimestampNanosecondArray");
-            ts_array
-                .iter()
-                .map(|x| x.map(|y| timestamp_ns_to_datetime(y).unwrap()))
-                .collect()
+            timestamp_to_variant_timestamp!(
+                ts_array,
+                timestamp_ns_to_datetime,
+                "nanoseconds",
+                strict
+            )
         }
     };
 
-    for x in native_datetimes {
+    for (i, x) in native_datetimes.iter().enumerate() {
         match x {
             Some(ndt) => {
                 if time_zone.is_none() {
-                    builder.append_variant(ndt.into());
+                    builder.append_variant((*ndt).into());
                 } else {
-                    let utc_dt: DateTime<Utc> = Utc.from_utc_datetime(&ndt);
+                    let utc_dt: DateTime<Utc> = Utc.from_utc_datetime(ndt);
                     builder.append_variant(utc_dt.into());
                 }
             }
             None => {
+                if strict && input.is_valid(i) {
+                    return Err(ArrowError::ComputeError(format!(
+                        "Failed to convert timestamp at index {}: invalid 
timestamp value",
+                        i
+                    )));
+                }
                 builder.append_null();

Review Comment:
   nit: just adding an extra match arm with a guard might be easier to read?
   ```patch
    }
   +None if strict && input.is_valid(i) {
   +    return Err(...);
   +}
    None => {
   ```



##########
parquet-variant-compute/src/cast_to_variant.rs:
##########
@@ -143,7 +152,14 @@ fn convert_timestamp(
 /// `1970-01-01T00:00:01.234567890Z`
 /// will be truncated to
 /// `1970-01-01T00:00:01.234567Z`
-pub fn cast_to_variant(input: &dyn Array) -> Result<VariantArray, ArrowError> {
+///
+/// # Arguments
+/// * `input` - The array to convert to VariantArray
+/// * `strict` - If true, return error on conversion failure. If false, insert 
null for failed conversions.
+pub fn cast_to_variant_with_options(
+    input: &dyn Array,
+    strict: bool,

Review Comment:
   Is there any possibility/risk that we may need additional options in the 
future? If so, it might be better to pass a struct? Or maybe we just deal with 
that if/when it comes?



##########
parquet-variant-compute/src/cast_to_variant.rs:
##########
@@ -545,6 +563,14 @@ pub fn cast_to_variant(input: &dyn Array) -> 
Result<VariantArray, ArrowError> {
     Ok(builder.build())
 }
 
+/// Convert an array to a `VariantArray` with strict mode enabled (panics on 
conversion failures).
+///
+/// This function provides backward compatibility. For non-panicking behavior,
+/// use `cast_to_variant_with_options` with `strict = false`.

Review Comment:
   I don't think passing `false` panics either? Doesn't the `strict` flag just 
decide whether a failed cast becomes NULL vs. error?



##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -123,3 +158,30 @@ macro_rules! decimal_to_variant_decimal {
     }};
 }
 pub(crate) use decimal_to_variant_decimal;
+
+/// Convert a timestamp value to a `VariantTimestamp`
+macro_rules! timestamp_to_variant_timestamp {
+    ($ts_array:expr, $converter:expr, $unit_name:expr, $strict:expr) => {
+        if $strict {
+            let mut result = Vec::with_capacity($ts_array.len());
+            for x in $ts_array.iter() {
+                match x {
+                    Some(y) => match $converter(y) {
+                        Some(dt) => result.push(Some(dt)),
+                        None => {
+                            return Err(ArrowError::ComputeError(format!(
+                                "Invalid timestamp {} value",
+                                $unit_name
+                            )))
+                        }
+                    },
+                    None => result.push(None),
+                }
+            }
+            result

Review Comment:
   Would this work?
   ```suggestion
               let error = || ArrowError::ComputeError(format!(
                   "Invalid timestamp {} value",
                   $unit_name
               ));
               let converter = |x| $converter(x).ok_or_else(error);
               let iter = $ts_array.iter().map(|x| 
x.map(converter).transpose());
               iter.collect::<Result<Vec<_>, ArrowErrors>()?
   ```



##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -32,6 +33,31 @@ macro_rules! non_generic_conversion_array {
             $builder.append_variant(Variant::from(cast_value));
         }
     }};
+    ($array:expr, $cast_fn:expr, $builder:expr, $strict:expr) => {{
+        let array = $array;
+        for i in 0..array.len() {
+            if array.is_null(i) {
+                $builder.append_null();
+                continue;
+            }
+            match $cast_fn(array.value(i)) {
+                Some(cast_value) => {
+                    $builder.append_variant(Variant::from(cast_value));
+                }
+                None => {
+                    if $strict {
+                        return Err(ArrowError::ComputeError(format!(
+                            "Failed to convert value at index {}: conversion 
failed",
+                            i
+                        )));
+                    } else {
+                        $builder.append_null();
+                    }
+                }

Review Comment:
   similar to above
   ```suggestion
                   Some(cast_value) => 
$builder.append_variant(Variant::from(cast_value)),
                   None if $strict => { 
                       return Err(ArrowError::ComputeError(format!(
                           "Failed to convert value at index {i}: conversion 
failed",
                       )));
                   }
                   None => $builder.append_null(),
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to