alamb commented on code in PR #8325:
URL: https://github.com/apache/arrow-rs/pull/8325#discussion_r2343905650
##########
parquet-variant/src/variant/metadata.rs:
##########
@@ -309,6 +310,15 @@ impl<'m> VariantMetadata<'m> {
self.header.offset_size.unpack_u32(bytes, i)
}
+ /// Returns the total size, in bytes, of the metadata.
+ ///
+ /// Note this value may be smaller than what was passed to [`Self::new`] or
+ /// [`Self::try_new`] if the input was larger than necessary to encode the
+ /// metadata dictionary.
+ pub fn size(&self) -> usize {
Review Comment:
I needed to expose this information because the variant metadata / data are
appended in one .bin file in the test cases
##########
parquet/tests/variant_integration.rs:
##########
@@ -21,1233 +21,486 @@
//! Variant values from .variant.bin files, reads Parquet files, converts
StructArray
//! to VariantArray, and verifies that extracted values match expected results.
//!
-//! Based on the parquet-testing PR:
https://github.com/apache/parquet-testing/pull/90/files
-//! Inspired by the arrow-go implementation:
https://github.com/apache/arrow-go/pull/455/files
+//! Inspired by the arrow-go implementation:
<https://github.com/apache/arrow-go/pull/455/files>
-// These tests require the arrow feature
-#![cfg(feature = "arrow")]
-
-use arrow_array::{Array, StructArray};
+use arrow::util::test_util::parquet_test_data;
+use arrow_array::{Array, ArrayRef};
+use arrow_cast::cast;
+use arrow_schema::{DataType, Fields};
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
-use std::{
- env,
- error::Error,
- fs,
- path::{Path, PathBuf},
-};
+use parquet_variant::{Variant, VariantMetadata};
+use parquet_variant_compute::VariantArray;
+use serde::Deserialize;
+use std::path::Path;
+use std::sync::{Arc, LazyLock};
+use std::{fs, path::PathBuf};
+
+type Result<T> = std::result::Result<T, String>;
+
+/// Creates a test function for a given case number
+///
+/// Note the index is zero-based, while the case number is one-based
+macro_rules! variant_test_case {
+ ($case_num:literal) => {
+ paste::paste! {
+ #[test]
+ fn [<test_variant_integration_case_ $case_num>]() {
+ all_cases()[$case_num - 1].run()
+ }
+ }
+ };
+
+ // Generates an error test case, where the expected result is an error
message
+ ($case_num:literal, $expected_error:literal) => {
+ paste::paste! {
+ #[test]
+ #[should_panic(expected = $expected_error)]
+ fn [<test_variant_integration_case_ $case_num>]() {
+ all_cases()[$case_num - 1].run()
+ }
+ }
+ };
+}
-/// Test case definition structure matching the format from cases.json
-#[derive(Debug, Clone)]
+// Generate test functions for each case
Review Comment:
I rewrote this file so the tests use the existing Rust test harness rather
than our own. It does require some redundancy, but it means normal rust test
execution tools work
##########
parquet/tests/variant_integration.rs:
##########
@@ -21,1233 +21,486 @@
//! Variant values from .variant.bin files, reads Parquet files, converts
StructArray
//! to VariantArray, and verifies that extracted values match expected results.
//!
-//! Based on the parquet-testing PR:
https://github.com/apache/parquet-testing/pull/90/files
-//! Inspired by the arrow-go implementation:
https://github.com/apache/arrow-go/pull/455/files
+//! Inspired by the arrow-go implementation:
<https://github.com/apache/arrow-go/pull/455/files>
-// These tests require the arrow feature
-#![cfg(feature = "arrow")]
-
-use arrow_array::{Array, StructArray};
+use arrow::util::test_util::parquet_test_data;
+use arrow_array::{Array, ArrayRef};
+use arrow_cast::cast;
+use arrow_schema::{DataType, Fields};
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
-use std::{
- env,
- error::Error,
- fs,
- path::{Path, PathBuf},
-};
+use parquet_variant::{Variant, VariantMetadata};
+use parquet_variant_compute::VariantArray;
+use serde::Deserialize;
+use std::path::Path;
+use std::sync::{Arc, LazyLock};
+use std::{fs, path::PathBuf};
+
+type Result<T> = std::result::Result<T, String>;
+
+/// Creates a test function for a given case number
+///
+/// Note the index is zero-based, while the case number is one-based
+macro_rules! variant_test_case {
+ ($case_num:literal) => {
+ paste::paste! {
+ #[test]
+ fn [<test_variant_integration_case_ $case_num>]() {
+ all_cases()[$case_num - 1].run()
+ }
+ }
+ };
+
+ // Generates an error test case, where the expected result is an error
message
+ ($case_num:literal, $expected_error:literal) => {
+ paste::paste! {
+ #[test]
+ #[should_panic(expected = $expected_error)]
+ fn [<test_variant_integration_case_ $case_num>]() {
+ all_cases()[$case_num - 1].run()
+ }
+ }
+ };
+}
-/// Test case definition structure matching the format from cases.json
-#[derive(Debug, Clone)]
+// Generate test functions for each case
+// Notes
+// - case 3 is empty in cases.json for some reason
+// - cases 40, 42, 87, 127 and 128 are expected to fail always (they include
invalid variants)
+// - the remaining cases are expected to (eventually) pass
+
+variant_test_case!(1, "Unsupported typed_value type: List(");
Review Comment:
I am quite pleased how many cases pass, actually
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]