friendlymatthew commented on code in PR #8891:
URL: https://github.com/apache/arrow-rs/pull/8891#discussion_r2593587272
##########
arrow-schema/src/fields.rs:
##########
@@ -339,19 +339,212 @@ impl UnionFields {
///
/// See <https://arrow.apache.org/docs/format/Columnar.html#union-layout>
///
+ /// # Errors
+ ///
+ /// This function returns an error if:
+ /// - Any type_id appears more than once (duplicate type ids)
+ /// - The number of type_ids doesn't match the number of fields
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use arrow_schema::{DataType, Field, UnionFields};
+ /// // Create a new UnionFields with type id mapping
+ /// // 1 -> DataType::UInt8
+ /// // 3 -> DataType::Utf8
+ /// let result = UnionFields::try_new(
+ /// vec![1, 3],
+ /// vec![
+ /// Field::new("field1", DataType::UInt8, false),
+ /// Field::new("field3", DataType::Utf8, false),
+ /// ],
+ /// );
+ /// assert!(result.is_ok());
+ ///
+ /// // This will fail due to duplicate type ids
+ /// let result = UnionFields::try_new(
+ /// vec![1, 1],
+ /// vec![
+ /// Field::new("field1", DataType::UInt8, false),
+ /// Field::new("field2", DataType::Utf8, false),
+ /// ],
+ /// );
+ /// assert!(result.is_err());
+ /// ```
+ pub fn try_new<F, T>(type_ids: T, fields: F) -> Result<Self, ArrowError>
+ where
+ F: IntoIterator,
+ F::Item: Into<FieldRef>,
+ T: IntoIterator<Item = i8>,
+ {
+ let mut type_ids_iter = type_ids.into_iter();
+ let mut fields_iter = fields.into_iter().map(Into::into);
+
+ let mut seen_type_ids = 0u128;
+
+ let mut out = Vec::new();
+
+ loop {
+ match (type_ids_iter.next(), fields_iter.next()) {
+ (None, None) => return Ok(Self(out.into())),
+ (Some(type_id), Some(field)) => {
+ // check type id is non-negative
+ if type_id < 0 {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "type ids must be non-negative: {type_id}"
+ )));
+ }
+
+ // check type id uniqueness
+ let mask = 1_u128 << type_id;
+ if (seen_type_ids & mask) != 0 {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "duplicate type id: {type_id}"
+ )));
+ }
+
+ seen_type_ids |= mask;
+
+ out.push((type_id, field));
+ }
+ (None, Some(_)) => {
+ return Err(ArrowError::InvalidArgumentError(
+ "fields iterator has more elements than type_ids
iterator".to_string(),
+ ));
+ }
+ (Some(_), None) => {
+ return Err(ArrowError::InvalidArgumentError(
+ "type_ids iterator has more elements than fields
iterator".to_string(),
+ ));
+ }
+ }
+ }
+ }
+
+ /// Create a new [`UnionFields`] from a collection of fields with
automatically
+ /// assigned type IDs starting from 0.
+ ///
+ /// The type IDs are assigned in increasing order: 0, 1, 2, 3, etc.
+ ///
+ /// See <https://arrow.apache.org/docs/format/Columnar.html#union-layout>
+ ///
+ /// # Panics
+ ///
+ /// Panics if the number of fields exceeds 127 (the maximum value for i8
type IDs).
+ ///
+ /// If you want to avoid panics, use [`UnionFields::try_from_fields`]
instead, which
+ /// returns a `Result`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use arrow_schema::{DataType, Field, UnionFields};
+ /// // Create a new UnionFields with automatic type id assignment
+ /// // 0 -> DataType::UInt8
+ /// // 1 -> DataType::Utf8
+ /// let union_fields = UnionFields::from_fields(vec![
+ /// Field::new("field1", DataType::UInt8, false),
+ /// Field::new("field2", DataType::Utf8, false),
+ /// ]);
+ /// assert_eq!(union_fields.len(), 2);
+ /// ```
+ pub fn from_fields<F>(fields: F) -> Self
+ where
+ F: IntoIterator,
+ F::Item: Into<FieldRef>,
+ {
+ fields
+ .into_iter()
+ .enumerate()
+ .map(|(i, field)| {
+ let id = i8::try_from(i).expect("UnionFields cannot contain
more than 128 fields");
+
+ (id, field.into())
+ })
+ .collect()
+ }
+
+ /// Create a new [`UnionFields`] from a collection of fields with
automatically
+ /// assigned type IDs starting from 0.
+ ///
+ /// The type IDs are assigned in increasing order: 0, 1, 2, 3, etc.
+ ///
+ /// This is the non-panicking version of [`UnionFields::from_fields`].
+ ///
+ /// See <https://arrow.apache.org/docs/format/Columnar.html#union-layout>
+ ///
+ /// # Errors
+ ///
+ /// Returns an error if the number of fields exceeds 127 (the maximum
value for i8 type IDs).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use arrow_schema::{DataType, Field, UnionFields};
+ /// // Create a new UnionFields with automatic type id assignment
+ /// // 0 -> DataType::UInt8
+ /// // 1 -> DataType::Utf8
+ /// let result = UnionFields::try_from_fields(vec![
+ /// Field::new("field1", DataType::UInt8, false),
+ /// Field::new("field2", DataType::Utf8, false),
+ /// ]);
+ /// assert!(result.is_ok());
+ /// assert_eq!(result.unwrap().len(), 2);
+ ///
+ /// // This will fail with too many fields
+ /// let many_fields: Vec<_> = (0..200)
+ /// .map(|i| Field::new(format!("field{}", i), DataType::Int32, false))
+ /// .collect();
+ /// let result = UnionFields::try_from_fields(many_fields);
+ /// assert!(result.is_err());
+ /// ```
+ pub fn try_from_fields<F>(fields: F) -> Result<Self, ArrowError>
+ where
+ F: IntoIterator,
+ F::Item: Into<FieldRef>,
+ {
Review Comment:
cc @kylebarron
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]