alamb commented on code in PR #7783: URL: https://github.com/apache/arrow-rs/pull/7783#discussion_r2183494895
########## parquet-variant/tests/test_json_to_variant.rs: ########## @@ -0,0 +1,552 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Manually tests if parsing JSON strings to Variants returns the expected results. + +use arrow_schema::ArrowError; +use parquet_variant::{ + json_to_variant, variant_to_json_string, ShortString, Variant, VariantBuilder, + VariantDecimal16, VariantDecimal4, VariantDecimal8, +}; + +struct JsonToVariantTest<'a> { + json: &'a str, + expected: Variant<'a, 'a>, +} + +impl<'a> JsonToVariantTest<'a> { + fn run(self) -> Result<(), ArrowError> { + let mut variant_builder = VariantBuilder::new(); + json_to_variant(self.json, &mut variant_builder)?; + let (metadata, value) = variant_builder.finish(); + let variant = Variant::try_new(&metadata, &value)?; + assert_eq!(variant, self.expected); + Ok(()) + } +} + +#[test] +fn test_json_to_variant_null() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "null", + expected: Variant::Null, + } + .run() +} + +#[test] +fn test_json_to_variant_boolean_true() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "true", + expected: Variant::BooleanTrue, + } + .run() +} + +#[test] +fn test_json_to_variant_boolean_false() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "false", + expected: Variant::BooleanFalse, + } + .run() +} + +#[test] +fn test_json_to_variant_int8_positive() -> Result<(), ArrowError> { + JsonToVariantTest { + json: " 127 ", + expected: Variant::Int8(127), + } + .run() +} + +#[test] +fn test_json_to_variant_int8_negative() -> Result<(), ArrowError> { + JsonToVariantTest { + json: " -128 ", + expected: Variant::Int8(-128), + } + .run() +} + +#[test] +fn test_json_to_variant_int16() -> Result<(), ArrowError> { + JsonToVariantTest { + json: " 27134 ", + expected: Variant::Int16(27134), + } + .run() +} + +#[test] +fn test_json_to_variant_int32() -> Result<(), ArrowError> { + JsonToVariantTest { + json: " -32767431 ", + expected: Variant::Int32(-32767431), + } + .run() +} + +#[test] +fn test_json_to_variant_int64() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "92842754201389", + expected: Variant::Int64(92842754201389), + } + .run() +} + +#[ignore] +#[test] +fn test_json_to_variant_decimal4_basic() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "1.23", + expected: Variant::from(VariantDecimal4::try_new(123, 2)?), + } + .run() +} + +#[ignore] +#[test] +fn test_json_to_variant_decimal4_large_positive() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "99999999.9", + expected: Variant::from(VariantDecimal4::try_new(999999999, 1)?), + } + .run() +} + +#[ignore] +#[test] +fn test_json_to_variant_decimal4_large_negative() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "-99999999.9", + expected: Variant::from(VariantDecimal4::try_new(-999999999, 1)?), + } + .run() +} + +#[ignore] +#[test] +fn test_json_to_variant_decimal4_small_positive() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "0.999999999", + expected: Variant::from(VariantDecimal4::try_new(999999999, 9)?), + } + .run() +} + +#[ignore] +#[test] +fn test_json_to_variant_decimal4_tiny_positive() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "0.000000001", + expected: Variant::from(VariantDecimal4::try_new(1, 9)?), + } + .run() +} + +#[ignore] +#[test] +fn test_json_to_variant_decimal4_small_negative() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "-0.999999999", + expected: Variant::from(VariantDecimal4::try_new(-999999999, 9)?), + } + .run() +} + +#[ignore] +#[test] +fn test_json_to_variant_decimal8_positive() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "999999999.0", + expected: Variant::from(VariantDecimal8::try_new(9999999990, 1)?), + } + .run() +} + +#[ignore] +#[test] +fn test_json_to_variant_decimal8_negative() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "-999999999.0", + expected: Variant::from(VariantDecimal8::try_new(-9999999990, 1)?), + } + .run() +} + +#[ignore] +#[test] +fn test_json_to_variant_decimal8_high_precision() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "0.999999999999999999", + expected: Variant::from(VariantDecimal8::try_new(999999999999999999, 18)?), + } + .run() +} + +#[ignore] +#[test] +fn test_json_to_variant_decimal8_large_with_scale() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "9999999999999999.99", + expected: Variant::from(VariantDecimal8::try_new(999999999999999999, 2)?), + } + .run() +} + +#[ignore] +#[test] +fn test_json_to_variant_decimal8_large_negative_with_scale() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "-9999999999999999.99", + expected: Variant::from(VariantDecimal8::try_new(-999999999999999999, 2)?), + } + .run() +} + +#[ignore] +#[test] +fn test_json_to_variant_decimal16_large_integer() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "9999999999999999999", // integer larger than i64 + expected: Variant::from(VariantDecimal16::try_new(9999999999999999999, 0)?), + } + .run() +} + +#[ignore] +#[test] +fn test_json_to_variant_decimal16_high_precision() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "0.9999999999999999999", + expected: Variant::from(VariantDecimal16::try_new(9999999999999999999, 19)?), + } + .run() +} + +#[ignore] +#[test] +fn test_json_to_variant_decimal16_max_value() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "79228162514264337593543950335", // 2 ^ 96 - 1 + expected: Variant::from(VariantDecimal16::try_new(79228162514264337593543950335, 0)?), + } + .run() +} + +#[ignore] +#[test] +fn test_json_to_variant_decimal16_max_scale() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "7.9228162514264337593543950335", // using scale higher than this falls into double + // since the max scale is 28. + expected: Variant::from(VariantDecimal16::try_new( + 79228162514264337593543950335, + 28, + )?), + } + .run() +} + +#[test] +fn test_json_to_variant_double_precision() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "0.79228162514264337593543950335", + expected: Variant::Double(0.792_281_625_142_643_4_f64), + } + .run() +} + +#[test] +fn test_json_to_variant_double_scientific_positive() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "15e-1", + expected: Variant::Double(15e-1f64), + } + .run() +} + +#[test] +fn test_json_to_variant_double_scientific_negative() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "-15e-1", + expected: Variant::Double(-15e-1f64), + } + .run() +} + +#[test] +fn test_json_to_variant_short_string() -> Result<(), ArrowError> { + JsonToVariantTest { + json: "\"harsh\"", + expected: Variant::ShortString(ShortString::try_new("harsh")?), + } + .run() +} + +#[test] +fn test_json_to_variant_short_string_max_length() -> Result<(), ArrowError> { + JsonToVariantTest { + json: &format!("\"{}\"", "a".repeat(63)), + expected: Variant::ShortString(ShortString::try_new(&"a".repeat(63))?), + } + .run() +} + +#[test] +fn test_json_to_variant_long_string() -> Result<(), ArrowError> { + JsonToVariantTest { + json: &format!("\"{}\"", "a".repeat(64)), + expected: Variant::String(&"a".repeat(64)), + } + .run() +} + +#[test] +fn test_json_to_variant_very_long_string() -> Result<(), ArrowError> { + JsonToVariantTest { + json: &format!("\"{}\"", "b".repeat(100000)), + expected: Variant::String(&"b".repeat(100000)), + } + .run() +} + +#[test] +fn test_json_to_variant_array_simple() -> Result<(), ArrowError> { + let mut variant_builder = VariantBuilder::new(); + let mut list_builder = variant_builder.new_list(); + list_builder.append_value(Variant::Int8(127)); + list_builder.append_value(Variant::Int16(128)); + list_builder.append_value(Variant::Int32(-32767431)); + list_builder.finish(); + let (metadata, value) = variant_builder.finish(); + let variant = Variant::try_new(&metadata, &value)?; + + JsonToVariantTest { + json: "[127, 128, -32767431]", + expected: variant, + } + .run() +} + +#[test] +fn test_json_to_variant_array_with_object() -> Result<(), ArrowError> { + let mut variant_builder = VariantBuilder::new(); + let mut list_builder = variant_builder.new_list(); + let mut object_builder_inner = list_builder.new_object(); + object_builder_inner.insert("age", Variant::Int8(32)); + object_builder_inner.finish().unwrap(); + list_builder.append_value(Variant::Int16(128)); + list_builder.append_value(Variant::BooleanFalse); + list_builder.finish(); + let (metadata, value) = variant_builder.finish(); + let variant = Variant::try_new(&metadata, &value)?; + + JsonToVariantTest { + json: "[{\"age\": 32}, 128, false]", + expected: variant, + } + .run() +} + +#[test] +fn test_json_to_variant_array_large_u16_offset() -> Result<(), ArrowError> { + // u16 offset - 128 i8's + 1 "true" = 257 bytes + let mut variant_builder = VariantBuilder::new(); + let mut list_builder = variant_builder.new_list(); + for _ in 0..128 { + list_builder.append_value(Variant::Int8(1)); + } + list_builder.append_value(Variant::BooleanTrue); + list_builder.finish(); + let (metadata, value) = variant_builder.finish(); + let variant = Variant::try_new(&metadata, &value)?; + + JsonToVariantTest { + json: &format!("[{} true]", "1, ".repeat(128)), + expected: variant, + } + .run() +} + +#[test] +fn test_json_to_variant_array_nested_large() -> Result<(), ArrowError> { + // verify u24, and large_size + let mut variant_builder = VariantBuilder::new(); + let mut list_builder = variant_builder.new_list(); + for _ in 0..256 { + let mut list_builder_inner = list_builder.new_list(); + for _ in 0..255 { + list_builder_inner.append_value(Variant::Null); + } + list_builder_inner.finish(); + } + list_builder.finish(); + let (metadata, value) = variant_builder.finish(); + let variant = Variant::try_new(&metadata, &value)?; + let intermediate = format!("[{}]", vec!["null"; 255].join(", ")); + let json = format!("[{}]", vec![intermediate; 256].join(", ")); + JsonToVariantTest { + json: json.as_str(), + expected: variant, + } + .run() +} + +#[test] +fn test_json_to_variant_object_simple() -> Result<(), ArrowError> { + let mut variant_builder = VariantBuilder::new(); + let mut object_builder = variant_builder.new_object(); + object_builder.insert("a", Variant::Int8(3)); + object_builder.insert("b", Variant::Int8(2)); + object_builder.finish().unwrap(); + let (metadata, value) = variant_builder.finish(); + let variant = Variant::try_new(&metadata, &value)?; + JsonToVariantTest { + json: "{\"b\": 2, \"a\": 1, \"a\": 3}", + expected: variant, + } + .run() +} + +#[test] +fn test_json_to_variant_object_complex() -> Result<(), ArrowError> { + let mut variant_builder = VariantBuilder::new(); + let mut object_builder = variant_builder.new_object(); + let mut inner_list_builder = object_builder.new_list("booleans"); + inner_list_builder.append_value(Variant::BooleanTrue); + inner_list_builder.append_value(Variant::BooleanFalse); + inner_list_builder.finish(); + object_builder.insert("null", Variant::Null); + let mut inner_list_builder = object_builder.new_list("numbers"); + inner_list_builder.append_value(Variant::Int8(4)); + inner_list_builder.append_value(Variant::Double(-3e0)); + inner_list_builder.append_value(Variant::Double(1001e-3)); + inner_list_builder.finish(); + object_builder.finish().unwrap(); + let (metadata, value) = variant_builder.finish(); + let variant = Variant::try_new(&metadata, &value)?; + JsonToVariantTest { + json: "{\"numbers\": [4, -3e0, 1001e-3], \"null\": null, \"booleans\": [true, false]}", + expected: variant, + } + .run() +} + +#[test] +fn test_json_to_variant_object_very_large() -> Result<(), ArrowError> { Review Comment: I wasn't able to make it faster with a small effort, but I did briefly look at some profiling and it is spending a very large amount of time validating the offsets in the variant That is likely something we can improve on over time -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org