alamb commented on code in PR #8891:
URL: https://github.com/apache/arrow-rs/pull/8891#discussion_r2586174495


##########
arrow-schema/src/fields.rs:
##########
@@ -339,19 +339,166 @@ impl UnionFields {
     ///
     /// See <https://arrow.apache.org/docs/format/Columnar.html#union-layout>
     ///
+    /// # Errors
+    ///
+    /// This function returns an error if:
+    /// - Any type_id appears more than once (duplicate type ids)
+    /// - Any field appears more than once (duplicate fields)

Review Comment:
   I am not sure the arrow spec requires unique field names for union. I 
couldn't find any reference to such a restriction in 
https://arrow.apache.org/docs/format/Columnar.html#union-layout
   
   Since I think previous versions of arrow-rs allow duplicate fields I don't 
think it is ok to start erroring on them in this release.



##########
arrow-schema/src/fields.rs:
##########
@@ -568,4 +716,150 @@ mod tests {
         let r = fields.try_filter_leaves(|_, _| 
Err(ArrowError::SchemaError("error".to_string())));
         assert!(r.is_err());
     }
+
+    #[test]
+    fn test_union_fields_try_new_valid() {
+        let res = UnionFields::try_new(
+            vec![1, 6, 7],
+            vec![
+                Field::new("f1", DataType::UInt8, false),
+                Field::new("f6", DataType::Utf8, false),
+                Field::new("f7", DataType::Int32, true),
+            ],
+        );
+        assert!(res.is_ok());
+        let union_fields = res.unwrap();
+        assert_eq!(union_fields.len(), 3);
+        assert_eq!(
+            union_fields.iter().map(|(id, _)| id).collect::<Vec<_>>(),
+            vec![1, 6, 7]
+        );
+    }
+
+    #[test]
+    fn test_union_fields_try_new_empty() {
+        let res = UnionFields::try_new(Vec::<i8>::new(), Vec::<Field>::new());
+        assert!(res.is_ok());
+        assert!(res.unwrap().is_empty());
+    }
+
+    #[test]
+    fn test_union_fields_try_new_duplicate_type_id() {
+        let res = UnionFields::try_new(
+            vec![1, 1],
+            vec![
+                Field::new("f1", DataType::UInt8, false),
+                Field::new("f2", DataType::Utf8, false),
+            ],
+        );
+        assert!(res.is_err());
+        assert!(
+            res.unwrap_err()
+                .to_string()
+                .contains("duplicate type id: 1")
+        );
+    }
+
+    #[test]
+    fn test_union_fields_try_new_duplicate_field() {
+        let field = Field::new("field", DataType::UInt8, false);
+        let res = UnionFields::try_new(vec![1, 2], vec![field.clone(), field]);
+        assert!(res.is_err());
+        assert!(res.unwrap_err().to_string().contains("duplicate field"));
+    }
+
+    #[test]
+    fn test_union_fields_try_new_more_type_ids() {
+        let res = UnionFields::try_new(
+            vec![1, 2, 3],
+            vec![
+                Field::new("f1", DataType::UInt8, false),
+                Field::new("f2", DataType::Utf8, false),
+            ],
+        );
+        assert!(res.is_err());
+        assert!(
+            res.unwrap_err()
+                .to_string()
+                .contains("type_ids iterator has more elements")
+        );
+    }
+
+    #[test]
+    fn test_union_fields_try_new_more_fields() {
+        let res = UnionFields::try_new(
+            vec![1, 2],
+            vec![
+                Field::new("f1", DataType::UInt8, false),
+                Field::new("f2", DataType::Utf8, false),
+                Field::new("f3", DataType::Int32, true),
+            ],
+        );
+        assert!(res.is_err());
+        assert!(
+            res.unwrap_err()
+                .to_string()
+                .contains("fields iterator has more elements")
+        );
+    }
+
+    #[test]

Review Comment:
   very nice tests



##########
arrow-ipc/src/convert.rs:
##########
@@ -490,8 +490,9 @@ pub(crate) fn get_data_type(field: crate::Field, 
may_be_dictionary: bool) -> Dat
             };
 
             let fields = match union.typeIds() {
-                None => UnionFields::new(0_i8..fields.len() as i8, fields),
-                Some(ids) => UnionFields::new(ids.iter().map(|i| i as i8), 
fields),
+                None => UnionFields::from_fields(fields),

Review Comment:
   this is a nice API 👍 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to