sdf-jkl commented on code in PR #10157:
URL: https://github.com/apache/arrow-rs/pull/10157#discussion_r3481940632


##########
parquet-variant/src/variant.rs:
##########
@@ -1005,104 +831,78 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// let v1 = Variant::from(123i64);
     /// assert_eq!(v1.as_int64(), Some(123i64));
     ///
-    /// // or from boolean variant
-    /// let v2 = Variant::BooleanFalse;
-    /// assert_eq!(v2.as_int64(), Some(0));
-    ///
     /// // but not a variant that cannot be cast into an integer
-    /// let v3 = Variant::from("hello!");
-    /// assert_eq!(v3.as_int64(), None);
+    /// let v2 = Variant::from("hello!");
+    /// assert_eq!(v2.as_int64(), None);
     /// ```
     pub fn as_int64(&self) -> Option<i64> {
-        self.as_num()
+        match *self {
+            Variant::Int64(i) => Some(i),
+            _ => None,
+        }
     }
 
     /// Converts this variant to a `u8` if possible.
     ///
-    /// Returns `Some(u8)` for boolean and numeric variants(integers, 
floating-point,
-    /// and decimals with scale 0) that fit in `u8` range
+    /// Returns `Some(u8)` for int8 variants that fit in `u8`,
     /// `None` for other variants or values that would overflow.
     ///
     /// # Examples
     ///
     /// ```
-    ///  use parquet_variant::{Variant, VariantDecimal4};
+    ///  use parquet_variant::Variant;
     ///
     ///  // you can read an int64 variant into an u8

Review Comment:
   ```suggestion
       ///  // you can read an int8 variant into an u8
   ```



##########
parquet-variant/src/variant.rs:
##########
@@ -1005,104 +831,78 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// let v1 = Variant::from(123i64);
     /// assert_eq!(v1.as_int64(), Some(123i64));
     ///
-    /// // or from boolean variant
-    /// let v2 = Variant::BooleanFalse;
-    /// assert_eq!(v2.as_int64(), Some(0));
-    ///
     /// // but not a variant that cannot be cast into an integer
-    /// let v3 = Variant::from("hello!");
-    /// assert_eq!(v3.as_int64(), None);
+    /// let v2 = Variant::from("hello!");
+    /// assert_eq!(v2.as_int64(), None);
     /// ```
     pub fn as_int64(&self) -> Option<i64> {
-        self.as_num()
+        match *self {
+            Variant::Int64(i) => Some(i),
+            _ => None,
+        }
     }
 
     /// Converts this variant to a `u8` if possible.
     ///
-    /// Returns `Some(u8)` for boolean and numeric variants(integers, 
floating-point,
-    /// and decimals with scale 0) that fit in `u8` range
+    /// Returns `Some(u8)` for int8 variants that fit in `u8`,
     /// `None` for other variants or values that would overflow.
     ///
     /// # Examples
     ///
     /// ```
-    ///  use parquet_variant::{Variant, VariantDecimal4};
+    ///  use parquet_variant::Variant;
     ///
     ///  // you can read an int64 variant into an u8
-    ///  let v1 = Variant::from(123i64);
+    ///  let v1 = Variant::from(123i8);
     ///  assert_eq!(v1.as_u8(), Some(123u8));
     ///
-    ///  // or a Decimal4 with scale 0 into u8
-    ///  let d = VariantDecimal4::try_new(26, 0).unwrap();
-    ///  let v2 = Variant::from(d);
-    ///  assert_eq!(v2.as_u8(), Some(26u8));
-    ///
-    ///  // or a variant that decimal with scale not equal to zero
-    ///  let d = VariantDecimal4::try_new(123, 2).unwrap();
-    ///  let v3 = Variant::from(d);
-    ///  assert_eq!(v3.as_u8(), Some(1));
-    ///
-    /// // or from boolean variant
-    /// let v4 = Variant::BooleanFalse;
-    /// assert_eq!(v4.as_u8(), Some(0));
-    ///
     ///  // but not a variant that can't fit into the range
-    ///  let v5 = Variant::from(-1);
-    ///  assert_eq!(v5.as_u8(), None);
+    ///  let v2 = Variant::from(-1);
+    ///  assert_eq!(v2.as_u8(), None);
     ///
-    ///  // or not a variant that cannot be cast into an integer
-    ///  let v6 = Variant::from("hello!");
-    ///  assert_eq!(v6.as_u8(), None);
+    ///  // or not an int8 variant
+    ///  let v3 = Variant::from(123i64);
+    ///  assert_eq!(v3.as_u8(), None);
     /// ```
     pub fn as_u8(&self) -> Option<u8> {
-        self.as_num()
+        match *self {
+            Variant::Int8(i) => i.try_into().ok(),
+            _ => None,
+        }
     }
 
     /// Converts this variant to an `u16` if possible.
     ///
-    /// Returns `Some(u16)` for boolean and numeric variants(integers, 
floating-point,
-    /// and decimals with scale 0) that fit in `u16` range
+    /// Returns `Some(u16)` for int16 variants that fit in `u16`,
     /// `None` for other variants or values that would overflow.
     ///
     /// # Examples
     ///
     /// ```
-    ///  use parquet_variant::{Variant, VariantDecimal4};
+    ///  use parquet_variant::Variant;
     ///
     ///  // you can read an int64 variant into an u16

Review Comment:
   same as above:
   ```suggestion
       ///  // you can read an int16 variant into an u16
   ```



##########
parquet-variant-compute/src/type_conversion.rs:
##########
@@ -287,6 +444,79 @@ where
     }
 }
 
+/// Return the unscaled integer representation for Arrow decimal type `O` from 
a `Variant`.
+///
+/// This function is unlike `variant_to_unscaled_decim`, it would never 
rescale the decimal value,
+/// and only return the unscaled integer representation for the specific 
decimal variants.
+pub(crate) fn shred_variant_to_unscaled_decimal<O>(variant: &Variant<'_, '_>) 
-> Option<O::Native>

Review Comment:
   The function should check that Variant's scale matches the column scale. 
Otherwise Decimal(123, scale 3) shredded into a Decimal(.., scale 5) column 
would become .00123 instead of .123.
   
   The question to rescale or not is also debatable.
   
   I digged into spark and currently they [rescale 
decimals](https://github.com/apache/spark/blob/c8695495569e4058a758b111ebc942fc4906494c/common/variant/src/main/java/org/apache/spark/types/variant/VariantShreddingWriter.java#L47-L50)
 and even cast to integers and vice versa:
   ```
    // If true, we will shred decimals to a different scale or to integers, as 
long as they are
    // numerically equivalent. Similarly, integers will be allowed to shred to 
decimals.
   ```
   More resources:
   - The rescale toggle they current set to `True` by default - 
   
https://github.com/apache/spark/blob/c8695495569e4058a758b111ebc942fc4906494c/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/SparkShreddingUtils.scala#L699-L700
   - sparks decimal rescale logic - 
https://github.com/apache/spark/blob/c8695495569e4058a758b111ebc942fc4906494c/common/variant/src/main/java/org/apache/spark/types/variant/VariantShreddingWriter.java#L199-L216



##########
parquet-variant/src/variant.rs:
##########
@@ -1005,104 +831,78 @@ impl<'m, 'v> Variant<'m, 'v> {
     /// let v1 = Variant::from(123i64);
     /// assert_eq!(v1.as_int64(), Some(123i64));
     ///
-    /// // or from boolean variant
-    /// let v2 = Variant::BooleanFalse;
-    /// assert_eq!(v2.as_int64(), Some(0));
-    ///
     /// // but not a variant that cannot be cast into an integer
-    /// let v3 = Variant::from("hello!");
-    /// assert_eq!(v3.as_int64(), None);
+    /// let v2 = Variant::from("hello!");
+    /// assert_eq!(v2.as_int64(), None);
     /// ```
     pub fn as_int64(&self) -> Option<i64> {
-        self.as_num()
+        match *self {
+            Variant::Int64(i) => Some(i),
+            _ => None,
+        }
     }
 
     /// Converts this variant to a `u8` if possible.
     ///
-    /// Returns `Some(u8)` for boolean and numeric variants(integers, 
floating-point,
-    /// and decimals with scale 0) that fit in `u8` range
+    /// Returns `Some(u8)` for int8 variants that fit in `u8`,
     /// `None` for other variants or values that would overflow.

Review Comment:
   ```suggestion
       /// Returns `Some(u8)` for a non-negative int8 variant, `None` otherwise.
   ```
   
   Same case for `as_u*` below



##########
parquet-variant/src/variant.rs:
##########
@@ -1111,40 +911,28 @@ impl<'m, 'v> Variant<'m, 'v> {
     ///  use parquet_variant::{Variant, VariantDecimal8};
     ///
     ///  // you can read an int64 variant into an u32

Review Comment:
   ```suggestion
       ///  // you can read an int32 variant into an u32
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to