friendlymatthew commented on code in PR #7833: URL: https://github.com/apache/arrow-rs/pull/7833#discussion_r2183782296
########## parquet-variant/src/builder.rs: ########## @@ -1499,4 +1562,143 @@ mod tests { let valid_result = valid_obj.finish(); assert!(valid_result.is_ok()); } + + #[test] + fn test_sorted_dictionary() { + // check if variant metadatabuilders are equivalent from different ways of constructing them + let mut variant1 = VariantBuilder::new().with_field_names(["b", "c", "d"].into_iter()); + + let mut variant2 = { + let mut builder = VariantBuilder::new(); + + builder.add_field_name("b"); + builder.add_field_name("c"); + builder.add_field_name("d"); + + builder + }; + + assert_eq!( + variant1.metadata_builder.field_names, + variant2.metadata_builder.field_names + ); + + // check metadata builders say it's sorted + assert!(variant1.metadata_builder.is_sorted); + assert!(variant2.metadata_builder.is_sorted); + + { + // test the bad case and break the sort order + variant2.add_field_name("a"); + assert!(!variant2.metadata_builder.is_sorted); + + // per the spec, make sure the variant will fail to build if only metadata is provided + let (m, v) = variant2.finish(); + let res = Variant::try_new(&m, &v); + assert!(res.is_err()); + + // since it is not sorted, make sure the metadata says so + let header = VariantMetadata::try_new(&m).unwrap(); + assert!(!header.is_sorted()); + } + + // write out variant1 and make sure the sorted flag is properly encoded + variant1.append_value(false); + + let (m, v) = variant1.finish(); + let res = Variant::try_new(&m, &v); + assert!(res.is_ok()); + + let header = VariantMetadata::try_new(&m).unwrap(); + assert!(header.is_sorted()); + } + + #[test] + fn test_object_sorted_dictionary() { + // predefine the list of field names + let mut variant1 = VariantBuilder::new().with_field_names(["a", "b", "c"].into_iter()); + let mut obj = variant1.new_object(); + + obj.insert("c", true); + obj.insert("a", false); + obj.insert("b", ()); + + // verify the field ids are correctly + let field_ids_by_insert_order = obj.fields.iter().map(|(&id, _)| id).collect::<Vec<_>>(); + assert_eq!(field_ids_by_insert_order, vec![2, 0, 1]); + + // add a field name that wasn't pre-defined but doesn't break the sort order + obj.insert("d", 2); + obj.finish().unwrap(); + + let (metadata, value) = variant1.finish(); + let variant = Variant::try_new(&metadata, &value).unwrap(); + + let metadata = VariantMetadata::try_new(&metadata).unwrap(); + assert!(metadata.is_sorted()); + + // verify object is sorted by field name order + let object = variant.as_object().unwrap(); + let field_names = object + .iter() + .map(|(field_name, _)| field_name) + .collect::<Vec<_>>(); + + assert_eq!(field_names, vec!["a", "b", "c", "d"]); + } + + #[test] + fn test_object_not_sorted_dictionary() { + // predefine the list of field names + let mut variant1 = VariantBuilder::new().with_field_names(["b", "c", "d"].into_iter()); + let mut obj = variant1.new_object(); + + obj.insert("c", true); + obj.insert("d", false); + obj.insert("b", ()); + + // verify the field ids are correctly + let field_ids_by_insert_order = obj.fields.iter().map(|(&id, _)| id).collect::<Vec<_>>(); + assert_eq!(field_ids_by_insert_order, vec![1, 2, 0]); + + // add a field name that wasn't pre-defined but breaks the sort order + obj.insert("a", 2); + obj.finish().unwrap(); + + let (metadata, value) = variant1.finish(); + let variant = Variant::try_new(&metadata, &value).unwrap(); + + let metadata = VariantMetadata::try_new(&metadata).unwrap(); + assert!(!metadata.is_sorted()); + + // verify object field names are sorted by field name order + let object = variant.as_object().unwrap(); + let field_names = object + .iter() + .map(|(field_name, _)| field_name) + .collect::<Vec<_>>(); + + assert_eq!(field_names, vec!["a", "b", "c", "d"]); + } + + #[test] + fn test_building_sorted_dictionary() { + let mut builder = VariantBuilder::new(); + assert!(!builder.metadata_builder.is_sorted); + assert_eq!(builder.metadata_builder.num_field_names(), 0); + + builder.add_field_name("a"); + + assert!(builder.metadata_builder.is_sorted); + assert_eq!(builder.metadata_builder.num_field_names(), 1); + + let builder = builder.with_field_names(["b", "c", "d"].into_iter()); + + assert!(builder.metadata_builder.is_sorted); + assert_eq!(builder.metadata_builder.num_field_names(), 4); + + let builder = builder.with_field_names(["z", "y"].into_iter()); + assert!(!builder.metadata_builder.is_sorted); + assert_eq!(builder.metadata_builder.num_field_names(), 6); + } Review Comment: Some additional assertions that test the constructor methods -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org