This is an automated email from the ASF dual-hosted git repository. xushiyan pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/hudi-rs.git
The following commit(s) were added to refs/heads/main by this push: new 6080d4d test: add test cases for different table setup (#36) 6080d4d is described below commit 6080d4d43515f73ba08b9fb92dbead562418c96f Author: Shiyan Xu <2701446+xushi...@users.noreply.github.com> AuthorDate: Thu Jul 4 18:54:25 2024 -0500 test: add test cases for different table setup (#36) - run test cases against datafusion APIs - v6_nonpartitioned - v6_simplekeygen_hivestyle_no_metafields - v6_simplekeygen_nonhivestyle - v6_timebasedkeygen_nonhivestyle - organize test data and improve test utils --- .licenserc.yaml | 2 +- Cargo.toml | 2 + crates/core/src/storage/mod.rs | 14 +-- crates/core/src/table/mod.rs | 8 +- crates/core/src/table/timeline.rs | 7 +- crates/core/{fixtures => tests/data}/a.parquet | Bin .../{fixtures => tests/data}/leaf_dir/.gitkeep | 0 .../.hoodie/hoodie.properties | 0 .../commits_stub/.hoodie/20240402123035233.commit | 0 .../.hoodie/20240402123035233.commit.requested | 0 .../.hoodie/20240402123035233.inflight | 0 .../commits_stub/.hoodie/20240402144910683.commit | 0 .../.hoodie/20240402144910683.commit.requested | 0 .../.hoodie/20240402144910683.inflight | 0 .../commits_stub/.hoodie/hoodie.properties | 0 .../data}/timeline/commits_stub/a.parquet | 0 .../data}/timeline/commits_stub/part1/b.parquet | 0 .../timeline/commits_stub/part2/part22/c.parquet | 0 .../commits_stub/part3/part32/part33/d.parquet | 0 crates/datafusion/Cargo.toml | 4 +- crates/datafusion/src/lib.rs | 136 +++++++++++---------- crates/tests/Cargo.toml | 13 ++ ....datagen.sql => v6_complexkeygen_hivestyle.sql} | 0 ...rtitioned.datagen.sql => v6_nonpartitioned.sql} | 0 ...=> v6_simplekeygen_hivestyle_no_metafields.sql} | 0 ...atagen.sql => v6_simplekeygen_nonhivestyle.sql} | 0 crates/tests/src/lib.rs | 58 +++++---- crates/tests/src/utils.rs | 39 ++++++ 28 files changed, 178 insertions(+), 105 deletions(-) diff --git a/.licenserc.yaml b/.licenserc.yaml index 2ec3964..719fa0b 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -23,7 +23,7 @@ header: paths-ignore: - 'LICENSE' - 'NOTICE' - - '**/fixtures/**' + - '**/data/**' comment: on-failure diff --git a/Cargo.toml b/Cargo.toml index 82f0383..db333da 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,6 +60,8 @@ serde_json = "1" anyhow = { version = "1.0.86" } bytes = { version = "1" } chrono = { version = "=0.4.34", default-features = false, features = ["clock"] } +strum = { version = "0.26.3", features = ["derive"] } +strum_macros = "0.26.4" tracing = { version = "0.1", features = ["log"] } regex = { version = "1" } url = { version = "2" } diff --git a/crates/core/src/storage/mod.rs b/crates/core/src/storage/mod.rs index 98e1b2d..42cb147 100644 --- a/crates/core/src/storage/mod.rs +++ b/crates/core/src/storage/mod.rs @@ -213,7 +213,7 @@ mod tests { #[tokio::test] async fn storage_list_dirs() { let base_url = Url::from_directory_path( - canonicalize(Path::new("fixtures/timeline/commits_stub")).unwrap(), + canonicalize(Path::new("tests/data/timeline/commits_stub")).unwrap(), ) .unwrap(); let storage = Storage::new(Arc::new(base_url), Arc::new(HashMap::new())).unwrap(); @@ -235,7 +235,7 @@ mod tests { #[tokio::test] async fn storage_list_dirs_as_paths() { let base_url = Url::from_directory_path( - canonicalize(Path::new("fixtures/timeline/commits_stub")).unwrap(), + canonicalize(Path::new("tests/data/timeline/commits_stub")).unwrap(), ) .unwrap(); let storage = Storage::new(Arc::new(base_url), Arc::new(HashMap::new())).unwrap(); @@ -258,7 +258,7 @@ mod tests { #[tokio::test] async fn storage_list_files() { let base_url = Url::from_directory_path( - canonicalize(Path::new("fixtures/timeline/commits_stub")).unwrap(), + canonicalize(Path::new("tests/data/timeline/commits_stub")).unwrap(), ) .unwrap(); let storage = Storage::new(Arc::new(base_url), Arc::new(HashMap::new())).unwrap(); @@ -317,7 +317,7 @@ mod tests { #[tokio::test] async fn use_storage_to_get_leaf_dirs() { let base_url = Url::from_directory_path( - canonicalize(Path::new("fixtures/timeline/commits_stub")).unwrap(), + canonicalize(Path::new("tests/data/timeline/commits_stub")).unwrap(), ) .unwrap(); let storage = Storage::new(Arc::new(base_url), Arc::new(HashMap::new())).unwrap(); @@ -331,7 +331,7 @@ mod tests { #[tokio::test] async fn use_storage_to_get_leaf_dirs_for_leaf_dir() { let base_url = - Url::from_directory_path(canonicalize(Path::new("fixtures/leaf_dir")).unwrap()) + Url::from_directory_path(canonicalize(Path::new("tests/data/leaf_dir")).unwrap()) .unwrap(); let storage = Storage::new(Arc::new(base_url), Arc::new(HashMap::new())).unwrap(); let leaf_dirs = get_leaf_dirs(&storage, None).await.unwrap(); @@ -345,7 +345,7 @@ mod tests { #[tokio::test] async fn storage_get_file_info() { let base_url = - Url::from_directory_path(canonicalize(Path::new("fixtures")).unwrap()).unwrap(); + Url::from_directory_path(canonicalize(Path::new("tests/data")).unwrap()).unwrap(); let storage = Storage::new(Arc::new(base_url), Arc::new(HashMap::new())).unwrap(); let file_info = storage.get_file_info("a.parquet").await.unwrap(); assert_eq!(file_info.name, "a.parquet"); @@ -359,7 +359,7 @@ mod tests { #[tokio::test] async fn storage_get_parquet_file_data() { let base_url = - Url::from_directory_path(canonicalize(Path::new("fixtures")).unwrap()).unwrap(); + Url::from_directory_path(canonicalize(Path::new("tests/data")).unwrap()).unwrap(); let storage = Storage::new(Arc::new(base_url), Arc::new(HashMap::new())).unwrap(); let file_data = storage.get_parquet_file_data("a.parquet").await.unwrap(); assert_eq!(file_data.num_rows(), 5); diff --git a/crates/core/src/table/mod.rs b/crates/core/src/table/mod.rs index f3c9365..849f0e0 100644 --- a/crates/core/src/table/mod.rs +++ b/crates/core/src/table/mod.rs @@ -57,7 +57,7 @@ impl Table { let props = Self::load_properties(base_url.clone(), storage_options.clone()) .await - .context("Failed to create a table")?; + .context("Failed to load table properties")?; let props = Arc::new(props); let timeline = Timeline::new(base_url.clone(), storage_options.clone(), props.clone()) @@ -412,8 +412,10 @@ mod tests { #[tokio::test] async fn hudi_table_get_table_metadata() { - let base_path = - canonicalize(Path::new("fixtures/table_metadata/sample_table_properties")).unwrap(); + let base_path = canonicalize(Path::new( + "tests/data/table_metadata/sample_table_properties", + )) + .unwrap(); let table = Table::new(base_path.to_str().unwrap(), HashMap::new()) .await .unwrap(); diff --git a/crates/core/src/table/timeline.rs b/crates/core/src/table/timeline.rs index 3a68387..51601a4 100644 --- a/crates/core/src/table/timeline.rs +++ b/crates/core/src/table/timeline.rs @@ -215,9 +215,10 @@ mod tests { #[tokio::test] async fn init_commits_timeline() { - let base_url = - Url::from_file_path(canonicalize(Path::new("fixtures/timeline/commits_stub")).unwrap()) - .unwrap(); + let base_url = Url::from_file_path( + canonicalize(Path::new("tests/data/timeline/commits_stub")).unwrap(), + ) + .unwrap(); let timeline = Timeline::new( Arc::new(base_url), Arc::new(HashMap::new()), diff --git a/crates/core/fixtures/a.parquet b/crates/core/tests/data/a.parquet similarity index 100% rename from crates/core/fixtures/a.parquet rename to crates/core/tests/data/a.parquet diff --git a/crates/core/fixtures/leaf_dir/.gitkeep b/crates/core/tests/data/leaf_dir/.gitkeep similarity index 100% rename from crates/core/fixtures/leaf_dir/.gitkeep rename to crates/core/tests/data/leaf_dir/.gitkeep diff --git a/crates/core/fixtures/table_metadata/sample_table_properties/.hoodie/hoodie.properties b/crates/core/tests/data/table_metadata/sample_table_properties/.hoodie/hoodie.properties similarity index 100% rename from crates/core/fixtures/table_metadata/sample_table_properties/.hoodie/hoodie.properties rename to crates/core/tests/data/table_metadata/sample_table_properties/.hoodie/hoodie.properties diff --git a/crates/core/fixtures/timeline/commits_stub/.hoodie/20240402123035233.commit b/crates/core/tests/data/timeline/commits_stub/.hoodie/20240402123035233.commit similarity index 100% rename from crates/core/fixtures/timeline/commits_stub/.hoodie/20240402123035233.commit rename to crates/core/tests/data/timeline/commits_stub/.hoodie/20240402123035233.commit diff --git a/crates/core/fixtures/timeline/commits_stub/.hoodie/20240402123035233.commit.requested b/crates/core/tests/data/timeline/commits_stub/.hoodie/20240402123035233.commit.requested similarity index 100% rename from crates/core/fixtures/timeline/commits_stub/.hoodie/20240402123035233.commit.requested rename to crates/core/tests/data/timeline/commits_stub/.hoodie/20240402123035233.commit.requested diff --git a/crates/core/fixtures/timeline/commits_stub/.hoodie/20240402123035233.inflight b/crates/core/tests/data/timeline/commits_stub/.hoodie/20240402123035233.inflight similarity index 100% rename from crates/core/fixtures/timeline/commits_stub/.hoodie/20240402123035233.inflight rename to crates/core/tests/data/timeline/commits_stub/.hoodie/20240402123035233.inflight diff --git a/crates/core/fixtures/timeline/commits_stub/.hoodie/20240402144910683.commit b/crates/core/tests/data/timeline/commits_stub/.hoodie/20240402144910683.commit similarity index 100% rename from crates/core/fixtures/timeline/commits_stub/.hoodie/20240402144910683.commit rename to crates/core/tests/data/timeline/commits_stub/.hoodie/20240402144910683.commit diff --git a/crates/core/fixtures/timeline/commits_stub/.hoodie/20240402144910683.commit.requested b/crates/core/tests/data/timeline/commits_stub/.hoodie/20240402144910683.commit.requested similarity index 100% rename from crates/core/fixtures/timeline/commits_stub/.hoodie/20240402144910683.commit.requested rename to crates/core/tests/data/timeline/commits_stub/.hoodie/20240402144910683.commit.requested diff --git a/crates/core/fixtures/timeline/commits_stub/.hoodie/20240402144910683.inflight b/crates/core/tests/data/timeline/commits_stub/.hoodie/20240402144910683.inflight similarity index 100% rename from crates/core/fixtures/timeline/commits_stub/.hoodie/20240402144910683.inflight rename to crates/core/tests/data/timeline/commits_stub/.hoodie/20240402144910683.inflight diff --git a/crates/core/fixtures/timeline/commits_stub/.hoodie/hoodie.properties b/crates/core/tests/data/timeline/commits_stub/.hoodie/hoodie.properties similarity index 100% rename from crates/core/fixtures/timeline/commits_stub/.hoodie/hoodie.properties rename to crates/core/tests/data/timeline/commits_stub/.hoodie/hoodie.properties diff --git a/crates/core/fixtures/timeline/commits_stub/a.parquet b/crates/core/tests/data/timeline/commits_stub/a.parquet similarity index 100% rename from crates/core/fixtures/timeline/commits_stub/a.parquet rename to crates/core/tests/data/timeline/commits_stub/a.parquet diff --git a/crates/core/fixtures/timeline/commits_stub/part1/b.parquet b/crates/core/tests/data/timeline/commits_stub/part1/b.parquet similarity index 100% rename from crates/core/fixtures/timeline/commits_stub/part1/b.parquet rename to crates/core/tests/data/timeline/commits_stub/part1/b.parquet diff --git a/crates/core/fixtures/timeline/commits_stub/part2/part22/c.parquet b/crates/core/tests/data/timeline/commits_stub/part2/part22/c.parquet similarity index 100% rename from crates/core/fixtures/timeline/commits_stub/part2/part22/c.parquet rename to crates/core/tests/data/timeline/commits_stub/part2/part22/c.parquet diff --git a/crates/core/fixtures/timeline/commits_stub/part3/part32/part33/d.parquet b/crates/core/tests/data/timeline/commits_stub/part3/part32/part33/d.parquet similarity index 100% rename from crates/core/fixtures/timeline/commits_stub/part3/part32/part33/d.parquet rename to crates/core/tests/data/timeline/commits_stub/part3/part32/part33/d.parquet diff --git a/crates/datafusion/Cargo.toml b/crates/datafusion/Cargo.toml index dd9f3d6..623657e 100644 --- a/crates/datafusion/Cargo.toml +++ b/crates/datafusion/Cargo.toml @@ -24,7 +24,6 @@ rust-version.workspace = true [dependencies] hudi-core = { path = "../core" } -hudi-tests = { path = "../tests" } # arrow arrow = { workspace = true } arrow-arith = { workspace = true } @@ -67,3 +66,6 @@ chrono = { workspace = true, default-features = false, features = ["clock"] } regex = { workspace = true } uuid = { workspace = true, features = ["serde", "v4"] } url = { workspace = true } + +[dev-dependencies] +hudi-tests = { path = "../tests" } diff --git a/crates/datafusion/src/lib.rs b/crates/datafusion/src/lib.rs index 99f1c7e..3b0a73a 100644 --- a/crates/datafusion/src/lib.rs +++ b/crates/datafusion/src/lib.rs @@ -133,92 +133,102 @@ mod tests { use std::collections::HashMap; use std::sync::Arc; - use arrow_array::{Array, Int32Array, RecordBatch, StringArray}; use datafusion::prelude::{SessionConfig, SessionContext}; - use datafusion_common::{DataFusionError, Result, ScalarValue}; - - use hudi_core::config::HudiConfig; - use hudi_tests::TestTable; + use datafusion_common::ScalarValue; + + use hudi_core::config::HudiConfig::ReadInputPartitions; + use hudi_tests::utils::get_bool_column; + use hudi_tests::TestTable::{ + V6Nonpartitioned, V6SimplekeygenHivestyleNoMetafields, V6SimplekeygenNonhivestyle, + V6TimebasedkeygenNonhivestyle, + }; + use hudi_tests::{utils, TestTable}; + use utils::{get_i32_column, get_str_column}; + use TestTable::V6ComplexkeygenHivestyle; use crate::HudiDataSource; - #[tokio::test] - async fn datafusion_read_hudi_table() -> Result<(), DataFusionError> { + async fn prepare_session_context( + test_table: &TestTable, + options: &[(String, String)], + ) -> SessionContext { let config = SessionConfig::new().set( "datafusion.sql_parser.enable_ident_normalization", ScalarValue::from(false), ); let ctx = SessionContext::new_with_config(config); - let base_url = TestTable::V6ComplexkeygenHivestyle.url(); - let hudi = HudiDataSource::new( - base_url.as_str(), - HashMap::from([( - HudiConfig::ReadInputPartitions.as_ref().to_string(), - "2".to_string(), - )]), - ) - .await?; - ctx.register_table("hudi_table_complexkeygen", Arc::new(hudi))?; - let sql = r#" - SELECT _hoodie_file_name, id, name, structField.field2 - FROM hudi_table_complexkeygen WHERE id % 2 = 0 - AND structField.field2 > 30 ORDER BY name LIMIT 10"#; - - // verify plan - let explaining_df = ctx.sql(sql).await?.explain(false, true).unwrap(); - let explaining_rb = explaining_df.collect().await?; + let base_url = test_table.url(); + let options = HashMap::from_iter(options.iter().cloned().collect::<HashMap<_, _>>()); + let hudi = HudiDataSource::new(base_url.as_str(), options) + .await + .unwrap(); + ctx.register_table(test_table.as_ref(), Arc::new(hudi)) + .unwrap(); + ctx + } + + async fn verify_plan( + ctx: &SessionContext, + sql: &str, + table_name: &str, + planned_input_partitioned: &i32, + ) { + let explaining_df = ctx.sql(sql).await.unwrap().explain(false, true).unwrap(); + let explaining_rb = explaining_df.collect().await.unwrap(); let explaining_rb = explaining_rb.first().unwrap(); let plan = get_str_column(explaining_rb, "plan").join(""); let plan_lines: Vec<&str> = plan.lines().map(str::trim).collect(); assert!(plan_lines[2].starts_with("SortExec: TopK(fetch=10)")); - assert!(plan_lines[3].starts_with("ProjectionExec: expr=[_hoodie_file_name@0 as _hoodie_file_name, id@1 as id, name@2 as name, get_field(structField@3, field2) as hudi_table_complexkeygen.structField[field2]]")); + assert!(plan_lines[3].starts_with(&format!( + "ProjectionExec: expr=[id@0 as id, name@1 as name, isActive@2 as isActive, \ + get_field(structField@3, field2) as {}.structField[field2]]", + table_name + ))); assert!(plan_lines[5].starts_with( - "FilterExec: CAST(id@1 AS Int64) % 2 = 0 AND get_field(structField@3, field2) > 30" + "FilterExec: CAST(id@0 AS Int64) % 2 = 0 AND get_field(structField@3, field2) > 30" )); - assert!(plan_lines[6].contains("input_partitions=2")); + assert!(plan_lines[6].contains(&format!("input_partitions={}", planned_input_partitioned))); + } - // verify data - let df = ctx.sql(sql).await?; - let rb = df.collect().await?; + async fn verify_data(ctx: &SessionContext, sql: &str, table_name: &str) { + let df = ctx.sql(sql).await.unwrap(); + let rb = df.collect().await.unwrap(); let rb = rb.first().unwrap(); - assert_eq!( - get_str_column(rb, "_hoodie_file_name"), - &[ - "bb7c3a45-387f-490d-aab2-981c3f1a8ada-0_0-140-198_20240418173213674.parquet", - "4668e35e-bff8-4be9-9ff2-e7fb17ecb1a7-0_1-161-224_20240418173235694.parquet" - ] - ); assert_eq!(get_i32_column(rb, "id"), &[2, 4]); assert_eq!(get_str_column(rb, "name"), &["Bob", "Diana"]); + assert_eq!(get_bool_column(rb, "isActive"), &[false, true]); assert_eq!( - get_i32_column(rb, "hudi_table_complexkeygen.structField[field2]"), + get_i32_column(rb, &format!("{}.structField[field2]", table_name)), &[40, 50] ); - - Ok(()) - } - - fn get_str_column<'a>(record_batch: &'a RecordBatch, name: &str) -> Vec<&'a str> { - record_batch - .column_by_name(name) - .unwrap() - .as_any() - .downcast_ref::<StringArray>() - .unwrap() - .iter() - .map(|s| s.unwrap()) - .collect::<Vec<_>>() } - fn get_i32_column(record_batch: &RecordBatch, name: &str) -> Vec<i32> { - record_batch - .column_by_name(name) - .unwrap() - .as_any() - .downcast_ref::<Int32Array>() - .unwrap() - .iter() - .map(|s| s.unwrap()) - .collect::<Vec<_>>() + #[tokio::test] + async fn datafusion_read_hudi_table() { + for (test_table, planned_input_partitions) in &[ + (V6ComplexkeygenHivestyle, 2), + (V6Nonpartitioned, 1), + (V6SimplekeygenNonhivestyle, 2), + (V6SimplekeygenHivestyleNoMetafields, 2), + (V6TimebasedkeygenNonhivestyle, 2), + ] { + println!(">>> testing for {}", test_table.as_ref()); + let ctx = prepare_session_context( + test_table, + &[(ReadInputPartitions.as_ref().to_string(), "2".to_string())], + ) + .await; + + let sql = format!( + r#" + SELECT id, name, isActive, structField.field2 + FROM {} WHERE id % 2 = 0 + AND structField.field2 > 30 ORDER BY name LIMIT 10"#, + test_table.as_ref() + ); + + verify_plan(&ctx, &sql, test_table.as_ref(), planned_input_partitions).await; + verify_data(&ctx, &sql, test_table.as_ref()).await + } } } diff --git a/crates/tests/Cargo.toml b/crates/tests/Cargo.toml index b6efe85..df9c3af 100644 --- a/crates/tests/Cargo.toml +++ b/crates/tests/Cargo.toml @@ -23,6 +23,19 @@ license.workspace = true rust-version.workspace = true [dependencies] +arrow = { workspace = true } +arrow-arith = { workspace = true } +arrow-array = { workspace = true, features = ["chrono-tz"] } +arrow-buffer = { workspace = true } +arrow-cast = { workspace = true } +arrow-ipc = { workspace = true } +arrow-json = { workspace = true } +arrow-ord = { workspace = true } +arrow-row = { workspace = true } +arrow-schema = { workspace = true, features = ["serde"] } +arrow-select = { workspace = true } +strum = { workspace = true } +strum_macros = { workspace = true } tempfile = "3.10.1" zip-extract = "0.1.3" url = { workspace = true } diff --git a/crates/tests/data/tables/v6_complexkeygen_hivestyle.datagen.sql b/crates/tests/data/tables/v6_complexkeygen_hivestyle.sql similarity index 100% rename from crates/tests/data/tables/v6_complexkeygen_hivestyle.datagen.sql rename to crates/tests/data/tables/v6_complexkeygen_hivestyle.sql diff --git a/crates/tests/data/tables/v6_nonpartitioned.datagen.sql b/crates/tests/data/tables/v6_nonpartitioned.sql similarity index 100% rename from crates/tests/data/tables/v6_nonpartitioned.datagen.sql rename to crates/tests/data/tables/v6_nonpartitioned.sql diff --git a/crates/tests/data/tables/v6_simplekeygen_hivestyle_no_metafields.datagen.sql b/crates/tests/data/tables/v6_simplekeygen_hivestyle_no_metafields.sql similarity index 100% rename from crates/tests/data/tables/v6_simplekeygen_hivestyle_no_metafields.datagen.sql rename to crates/tests/data/tables/v6_simplekeygen_hivestyle_no_metafields.sql diff --git a/crates/tests/data/tables/v6_simplekeygen_nonhivestyle.datagen.sql b/crates/tests/data/tables/v6_simplekeygen_nonhivestyle.sql similarity index 100% rename from crates/tests/data/tables/v6_simplekeygen_nonhivestyle.datagen.sql rename to crates/tests/data/tables/v6_simplekeygen_nonhivestyle.sql diff --git a/crates/tests/src/lib.rs b/crates/tests/src/lib.rs index f94d5fd..22b2f3e 100644 --- a/crates/tests/src/lib.rs +++ b/crates/tests/src/lib.rs @@ -21,6 +21,7 @@ use std::fs; use std::io::Cursor; use std::path::{Path, PathBuf}; +use strum_macros::{AsRefStr, EnumIter, EnumString}; use tempfile::tempdir; use url::Url; @@ -29,51 +30,54 @@ pub mod utils; pub fn extract_test_table(zip_path: &Path) -> PathBuf { let target_dir = tempdir().unwrap().path().to_path_buf(); let archive = fs::read(zip_path).unwrap(); - zip_extract::extract(Cursor::new(archive), &target_dir, true).unwrap(); + zip_extract::extract(Cursor::new(archive), &target_dir, false).unwrap(); target_dir } +#[derive(Debug, EnumString, AsRefStr, EnumIter)] +#[strum(serialize_all = "snake_case")] pub enum TestTable { V6ComplexkeygenHivestyle, V6Empty, V6Nonpartitioned, + V6SimplekeygenHivestyleNoMetafields, + V6SimplekeygenNonhivestyle, + V6TimebasedkeygenNonhivestyle, } impl TestTable { pub fn zip_path(&self) -> Box<Path> { let dir = env!("CARGO_MANIFEST_DIR"); - let data_path = Path::new(dir).join("data/tables"); - match self { - Self::V6ComplexkeygenHivestyle => data_path - .join("v6_complexkeygen_hivestyle.zip") - .into_boxed_path(), - Self::V6Empty => data_path.join("v6_empty.zip").into_boxed_path(), - Self::V6Nonpartitioned => data_path.join("v6_nonpartitioned.zip").into_boxed_path(), - } + let data_path = Path::new(dir) + .join("data/tables") + .join(format!("{}.zip", self.as_ref())); + data_path.into_boxed_path() } pub fn path(&self) -> String { let zip_path = self.zip_path(); - match self { - Self::V6ComplexkeygenHivestyle => extract_test_table(&zip_path) - .join("v6_complexkeygen_hivestyle") - .to_str() - .unwrap() - .to_string(), - Self::V6Empty => extract_test_table(&zip_path) - .join("v6_empty") - .to_str() - .unwrap() - .to_string(), - Self::V6Nonpartitioned => extract_test_table(&zip_path) - .join("v6_nonpartitioned") - .to_str() - .unwrap() - .to_string(), - } + let path_buf = extract_test_table(zip_path.as_ref()).join(self.as_ref()); + path_buf.to_str().unwrap().to_string() } pub fn url(&self) -> Url { - Url::from_file_path(self.path()).unwrap() + let path = self.path(); + Url::from_file_path(path).unwrap() + } +} + +#[cfg(test)] +mod tests { + use strum::IntoEnumIterator; + + use crate::TestTable; + + #[test] + fn test_table_zip_file_should_exist() { + for t in TestTable::iter() { + let path = t.zip_path(); + assert!(path.exists()); + assert!(path.is_file()); + } } } diff --git a/crates/tests/src/utils.rs b/crates/tests/src/utils.rs index 37e9cfb..0f7d44b 100644 --- a/crates/tests/src/utils.rs +++ b/crates/tests/src/utils.rs @@ -17,6 +17,9 @@ * under the License. */ +use arrow::record_batch::RecordBatch; +use arrow_array::{Array, BooleanArray, Int32Array, StringArray}; + #[macro_export] macro_rules! assert_approx_eq { ($a:expr, $b:expr, $delta:expr) => {{ @@ -32,3 +35,39 @@ macro_rules! assert_approx_eq { ); }}; } + +pub fn get_str_column<'a>(record_batch: &'a RecordBatch, name: &str) -> Vec<&'a str> { + record_batch + .column_by_name(name) + .unwrap() + .as_any() + .downcast_ref::<StringArray>() + .unwrap() + .iter() + .map(|s| s.unwrap()) + .collect::<Vec<_>>() +} + +pub fn get_i32_column(record_batch: &RecordBatch, name: &str) -> Vec<i32> { + record_batch + .column_by_name(name) + .unwrap() + .as_any() + .downcast_ref::<Int32Array>() + .unwrap() + .iter() + .map(|s| s.unwrap()) + .collect::<Vec<_>>() +} + +pub fn get_bool_column(record_batch: &RecordBatch, name: &str) -> Vec<bool> { + record_batch + .column_by_name(name) + .unwrap() + .as_any() + .downcast_ref::<BooleanArray>() + .unwrap() + .iter() + .map(|s| s.unwrap()) + .collect::<Vec<_>>() +}