This is an automated email from the ASF dual-hosted git repository.

xushiyan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hudi-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 6080d4d  test: add test cases for different table setup (#36)
6080d4d is described below

commit 6080d4d43515f73ba08b9fb92dbead562418c96f
Author: Shiyan Xu <2701446+xushi...@users.noreply.github.com>
AuthorDate: Thu Jul 4 18:54:25 2024 -0500

    test: add test cases for different table setup (#36)
    
    - run test cases against datafusion APIs
      - v6_nonpartitioned
      - v6_simplekeygen_hivestyle_no_metafields
      - v6_simplekeygen_nonhivestyle
      - v6_timebasedkeygen_nonhivestyle
    - organize test data and improve test utils
---
 .licenserc.yaml                                    |   2 +-
 Cargo.toml                                         |   2 +
 crates/core/src/storage/mod.rs                     |  14 +--
 crates/core/src/table/mod.rs                       |   8 +-
 crates/core/src/table/timeline.rs                  |   7 +-
 crates/core/{fixtures => tests/data}/a.parquet     | Bin
 .../{fixtures => tests/data}/leaf_dir/.gitkeep     |   0
 .../.hoodie/hoodie.properties                      |   0
 .../commits_stub/.hoodie/20240402123035233.commit  |   0
 .../.hoodie/20240402123035233.commit.requested     |   0
 .../.hoodie/20240402123035233.inflight             |   0
 .../commits_stub/.hoodie/20240402144910683.commit  |   0
 .../.hoodie/20240402144910683.commit.requested     |   0
 .../.hoodie/20240402144910683.inflight             |   0
 .../commits_stub/.hoodie/hoodie.properties         |   0
 .../data}/timeline/commits_stub/a.parquet          |   0
 .../data}/timeline/commits_stub/part1/b.parquet    |   0
 .../timeline/commits_stub/part2/part22/c.parquet   |   0
 .../commits_stub/part3/part32/part33/d.parquet     |   0
 crates/datafusion/Cargo.toml                       |   4 +-
 crates/datafusion/src/lib.rs                       | 136 +++++++++++----------
 crates/tests/Cargo.toml                            |  13 ++
 ....datagen.sql => v6_complexkeygen_hivestyle.sql} |   0
 ...rtitioned.datagen.sql => v6_nonpartitioned.sql} |   0
 ...=> v6_simplekeygen_hivestyle_no_metafields.sql} |   0
 ...atagen.sql => v6_simplekeygen_nonhivestyle.sql} |   0
 crates/tests/src/lib.rs                            |  58 +++++----
 crates/tests/src/utils.rs                          |  39 ++++++
 28 files changed, 178 insertions(+), 105 deletions(-)

diff --git a/.licenserc.yaml b/.licenserc.yaml
index 2ec3964..719fa0b 100644
--- a/.licenserc.yaml
+++ b/.licenserc.yaml
@@ -23,7 +23,7 @@ header:
   paths-ignore:
     - 'LICENSE'
     - 'NOTICE'
-    - '**/fixtures/**'
+    - '**/data/**'
 
   comment: on-failure
 
diff --git a/Cargo.toml b/Cargo.toml
index 82f0383..db333da 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -60,6 +60,8 @@ serde_json = "1"
 anyhow = { version = "1.0.86" }
 bytes = { version = "1" }
 chrono = { version = "=0.4.34", default-features = false, features = ["clock"] 
}
+strum = { version = "0.26.3", features = ["derive"] }
+strum_macros = "0.26.4"
 tracing = { version = "0.1", features = ["log"] }
 regex = { version = "1" }
 url = { version = "2" }
diff --git a/crates/core/src/storage/mod.rs b/crates/core/src/storage/mod.rs
index 98e1b2d..42cb147 100644
--- a/crates/core/src/storage/mod.rs
+++ b/crates/core/src/storage/mod.rs
@@ -213,7 +213,7 @@ mod tests {
     #[tokio::test]
     async fn storage_list_dirs() {
         let base_url = Url::from_directory_path(
-            canonicalize(Path::new("fixtures/timeline/commits_stub")).unwrap(),
+            
canonicalize(Path::new("tests/data/timeline/commits_stub")).unwrap(),
         )
         .unwrap();
         let storage = Storage::new(Arc::new(base_url), 
Arc::new(HashMap::new())).unwrap();
@@ -235,7 +235,7 @@ mod tests {
     #[tokio::test]
     async fn storage_list_dirs_as_paths() {
         let base_url = Url::from_directory_path(
-            canonicalize(Path::new("fixtures/timeline/commits_stub")).unwrap(),
+            
canonicalize(Path::new("tests/data/timeline/commits_stub")).unwrap(),
         )
         .unwrap();
         let storage = Storage::new(Arc::new(base_url), 
Arc::new(HashMap::new())).unwrap();
@@ -258,7 +258,7 @@ mod tests {
     #[tokio::test]
     async fn storage_list_files() {
         let base_url = Url::from_directory_path(
-            canonicalize(Path::new("fixtures/timeline/commits_stub")).unwrap(),
+            
canonicalize(Path::new("tests/data/timeline/commits_stub")).unwrap(),
         )
         .unwrap();
         let storage = Storage::new(Arc::new(base_url), 
Arc::new(HashMap::new())).unwrap();
@@ -317,7 +317,7 @@ mod tests {
     #[tokio::test]
     async fn use_storage_to_get_leaf_dirs() {
         let base_url = Url::from_directory_path(
-            canonicalize(Path::new("fixtures/timeline/commits_stub")).unwrap(),
+            
canonicalize(Path::new("tests/data/timeline/commits_stub")).unwrap(),
         )
         .unwrap();
         let storage = Storage::new(Arc::new(base_url), 
Arc::new(HashMap::new())).unwrap();
@@ -331,7 +331,7 @@ mod tests {
     #[tokio::test]
     async fn use_storage_to_get_leaf_dirs_for_leaf_dir() {
         let base_url =
-            
Url::from_directory_path(canonicalize(Path::new("fixtures/leaf_dir")).unwrap())
+            
Url::from_directory_path(canonicalize(Path::new("tests/data/leaf_dir")).unwrap())
                 .unwrap();
         let storage = Storage::new(Arc::new(base_url), 
Arc::new(HashMap::new())).unwrap();
         let leaf_dirs = get_leaf_dirs(&storage, None).await.unwrap();
@@ -345,7 +345,7 @@ mod tests {
     #[tokio::test]
     async fn storage_get_file_info() {
         let base_url =
-            
Url::from_directory_path(canonicalize(Path::new("fixtures")).unwrap()).unwrap();
+            
Url::from_directory_path(canonicalize(Path::new("tests/data")).unwrap()).unwrap();
         let storage = Storage::new(Arc::new(base_url), 
Arc::new(HashMap::new())).unwrap();
         let file_info = storage.get_file_info("a.parquet").await.unwrap();
         assert_eq!(file_info.name, "a.parquet");
@@ -359,7 +359,7 @@ mod tests {
     #[tokio::test]
     async fn storage_get_parquet_file_data() {
         let base_url =
-            
Url::from_directory_path(canonicalize(Path::new("fixtures")).unwrap()).unwrap();
+            
Url::from_directory_path(canonicalize(Path::new("tests/data")).unwrap()).unwrap();
         let storage = Storage::new(Arc::new(base_url), 
Arc::new(HashMap::new())).unwrap();
         let file_data = 
storage.get_parquet_file_data("a.parquet").await.unwrap();
         assert_eq!(file_data.num_rows(), 5);
diff --git a/crates/core/src/table/mod.rs b/crates/core/src/table/mod.rs
index f3c9365..849f0e0 100644
--- a/crates/core/src/table/mod.rs
+++ b/crates/core/src/table/mod.rs
@@ -57,7 +57,7 @@ impl Table {
 
         let props = Self::load_properties(base_url.clone(), 
storage_options.clone())
             .await
-            .context("Failed to create a table")?;
+            .context("Failed to load table properties")?;
 
         let props = Arc::new(props);
         let timeline = Timeline::new(base_url.clone(), 
storage_options.clone(), props.clone())
@@ -412,8 +412,10 @@ mod tests {
 
     #[tokio::test]
     async fn hudi_table_get_table_metadata() {
-        let base_path =
-            
canonicalize(Path::new("fixtures/table_metadata/sample_table_properties")).unwrap();
+        let base_path = canonicalize(Path::new(
+            "tests/data/table_metadata/sample_table_properties",
+        ))
+        .unwrap();
         let table = Table::new(base_path.to_str().unwrap(), HashMap::new())
             .await
             .unwrap();
diff --git a/crates/core/src/table/timeline.rs 
b/crates/core/src/table/timeline.rs
index 3a68387..51601a4 100644
--- a/crates/core/src/table/timeline.rs
+++ b/crates/core/src/table/timeline.rs
@@ -215,9 +215,10 @@ mod tests {
 
     #[tokio::test]
     async fn init_commits_timeline() {
-        let base_url =
-            
Url::from_file_path(canonicalize(Path::new("fixtures/timeline/commits_stub")).unwrap())
-                .unwrap();
+        let base_url = Url::from_file_path(
+            
canonicalize(Path::new("tests/data/timeline/commits_stub")).unwrap(),
+        )
+        .unwrap();
         let timeline = Timeline::new(
             Arc::new(base_url),
             Arc::new(HashMap::new()),
diff --git a/crates/core/fixtures/a.parquet b/crates/core/tests/data/a.parquet
similarity index 100%
rename from crates/core/fixtures/a.parquet
rename to crates/core/tests/data/a.parquet
diff --git a/crates/core/fixtures/leaf_dir/.gitkeep 
b/crates/core/tests/data/leaf_dir/.gitkeep
similarity index 100%
rename from crates/core/fixtures/leaf_dir/.gitkeep
rename to crates/core/tests/data/leaf_dir/.gitkeep
diff --git 
a/crates/core/fixtures/table_metadata/sample_table_properties/.hoodie/hoodie.properties
 
b/crates/core/tests/data/table_metadata/sample_table_properties/.hoodie/hoodie.properties
similarity index 100%
rename from 
crates/core/fixtures/table_metadata/sample_table_properties/.hoodie/hoodie.properties
rename to 
crates/core/tests/data/table_metadata/sample_table_properties/.hoodie/hoodie.properties
diff --git 
a/crates/core/fixtures/timeline/commits_stub/.hoodie/20240402123035233.commit 
b/crates/core/tests/data/timeline/commits_stub/.hoodie/20240402123035233.commit
similarity index 100%
rename from 
crates/core/fixtures/timeline/commits_stub/.hoodie/20240402123035233.commit
rename to 
crates/core/tests/data/timeline/commits_stub/.hoodie/20240402123035233.commit
diff --git 
a/crates/core/fixtures/timeline/commits_stub/.hoodie/20240402123035233.commit.requested
 
b/crates/core/tests/data/timeline/commits_stub/.hoodie/20240402123035233.commit.requested
similarity index 100%
rename from 
crates/core/fixtures/timeline/commits_stub/.hoodie/20240402123035233.commit.requested
rename to 
crates/core/tests/data/timeline/commits_stub/.hoodie/20240402123035233.commit.requested
diff --git 
a/crates/core/fixtures/timeline/commits_stub/.hoodie/20240402123035233.inflight 
b/crates/core/tests/data/timeline/commits_stub/.hoodie/20240402123035233.inflight
similarity index 100%
rename from 
crates/core/fixtures/timeline/commits_stub/.hoodie/20240402123035233.inflight
rename to 
crates/core/tests/data/timeline/commits_stub/.hoodie/20240402123035233.inflight
diff --git 
a/crates/core/fixtures/timeline/commits_stub/.hoodie/20240402144910683.commit 
b/crates/core/tests/data/timeline/commits_stub/.hoodie/20240402144910683.commit
similarity index 100%
rename from 
crates/core/fixtures/timeline/commits_stub/.hoodie/20240402144910683.commit
rename to 
crates/core/tests/data/timeline/commits_stub/.hoodie/20240402144910683.commit
diff --git 
a/crates/core/fixtures/timeline/commits_stub/.hoodie/20240402144910683.commit.requested
 
b/crates/core/tests/data/timeline/commits_stub/.hoodie/20240402144910683.commit.requested
similarity index 100%
rename from 
crates/core/fixtures/timeline/commits_stub/.hoodie/20240402144910683.commit.requested
rename to 
crates/core/tests/data/timeline/commits_stub/.hoodie/20240402144910683.commit.requested
diff --git 
a/crates/core/fixtures/timeline/commits_stub/.hoodie/20240402144910683.inflight 
b/crates/core/tests/data/timeline/commits_stub/.hoodie/20240402144910683.inflight
similarity index 100%
rename from 
crates/core/fixtures/timeline/commits_stub/.hoodie/20240402144910683.inflight
rename to 
crates/core/tests/data/timeline/commits_stub/.hoodie/20240402144910683.inflight
diff --git 
a/crates/core/fixtures/timeline/commits_stub/.hoodie/hoodie.properties 
b/crates/core/tests/data/timeline/commits_stub/.hoodie/hoodie.properties
similarity index 100%
rename from crates/core/fixtures/timeline/commits_stub/.hoodie/hoodie.properties
rename to crates/core/tests/data/timeline/commits_stub/.hoodie/hoodie.properties
diff --git a/crates/core/fixtures/timeline/commits_stub/a.parquet 
b/crates/core/tests/data/timeline/commits_stub/a.parquet
similarity index 100%
rename from crates/core/fixtures/timeline/commits_stub/a.parquet
rename to crates/core/tests/data/timeline/commits_stub/a.parquet
diff --git a/crates/core/fixtures/timeline/commits_stub/part1/b.parquet 
b/crates/core/tests/data/timeline/commits_stub/part1/b.parquet
similarity index 100%
rename from crates/core/fixtures/timeline/commits_stub/part1/b.parquet
rename to crates/core/tests/data/timeline/commits_stub/part1/b.parquet
diff --git a/crates/core/fixtures/timeline/commits_stub/part2/part22/c.parquet 
b/crates/core/tests/data/timeline/commits_stub/part2/part22/c.parquet
similarity index 100%
rename from crates/core/fixtures/timeline/commits_stub/part2/part22/c.parquet
rename to crates/core/tests/data/timeline/commits_stub/part2/part22/c.parquet
diff --git 
a/crates/core/fixtures/timeline/commits_stub/part3/part32/part33/d.parquet 
b/crates/core/tests/data/timeline/commits_stub/part3/part32/part33/d.parquet
similarity index 100%
rename from 
crates/core/fixtures/timeline/commits_stub/part3/part32/part33/d.parquet
rename to 
crates/core/tests/data/timeline/commits_stub/part3/part32/part33/d.parquet
diff --git a/crates/datafusion/Cargo.toml b/crates/datafusion/Cargo.toml
index dd9f3d6..623657e 100644
--- a/crates/datafusion/Cargo.toml
+++ b/crates/datafusion/Cargo.toml
@@ -24,7 +24,6 @@ rust-version.workspace = true
 
 [dependencies]
 hudi-core = { path = "../core" }
-hudi-tests = { path = "../tests" }
 # arrow
 arrow = { workspace = true }
 arrow-arith = { workspace = true }
@@ -67,3 +66,6 @@ chrono = { workspace = true, default-features = false, 
features = ["clock"] }
 regex = { workspace = true }
 uuid = { workspace = true, features = ["serde", "v4"] }
 url = { workspace = true }
+
+[dev-dependencies]
+hudi-tests = { path = "../tests" }
diff --git a/crates/datafusion/src/lib.rs b/crates/datafusion/src/lib.rs
index 99f1c7e..3b0a73a 100644
--- a/crates/datafusion/src/lib.rs
+++ b/crates/datafusion/src/lib.rs
@@ -133,92 +133,102 @@ mod tests {
     use std::collections::HashMap;
     use std::sync::Arc;
 
-    use arrow_array::{Array, Int32Array, RecordBatch, StringArray};
     use datafusion::prelude::{SessionConfig, SessionContext};
-    use datafusion_common::{DataFusionError, Result, ScalarValue};
-
-    use hudi_core::config::HudiConfig;
-    use hudi_tests::TestTable;
+    use datafusion_common::ScalarValue;
+
+    use hudi_core::config::HudiConfig::ReadInputPartitions;
+    use hudi_tests::utils::get_bool_column;
+    use hudi_tests::TestTable::{
+        V6Nonpartitioned, V6SimplekeygenHivestyleNoMetafields, 
V6SimplekeygenNonhivestyle,
+        V6TimebasedkeygenNonhivestyle,
+    };
+    use hudi_tests::{utils, TestTable};
+    use utils::{get_i32_column, get_str_column};
+    use TestTable::V6ComplexkeygenHivestyle;
 
     use crate::HudiDataSource;
 
-    #[tokio::test]
-    async fn datafusion_read_hudi_table() -> Result<(), DataFusionError> {
+    async fn prepare_session_context(
+        test_table: &TestTable,
+        options: &[(String, String)],
+    ) -> SessionContext {
         let config = SessionConfig::new().set(
             "datafusion.sql_parser.enable_ident_normalization",
             ScalarValue::from(false),
         );
         let ctx = SessionContext::new_with_config(config);
-        let base_url = TestTable::V6ComplexkeygenHivestyle.url();
-        let hudi = HudiDataSource::new(
-            base_url.as_str(),
-            HashMap::from([(
-                HudiConfig::ReadInputPartitions.as_ref().to_string(),
-                "2".to_string(),
-            )]),
-        )
-        .await?;
-        ctx.register_table("hudi_table_complexkeygen", Arc::new(hudi))?;
-        let sql = r#"
-        SELECT _hoodie_file_name, id, name, structField.field2
-        FROM hudi_table_complexkeygen WHERE id % 2 = 0
-        AND structField.field2 > 30 ORDER BY name LIMIT 10"#;
-
-        // verify plan
-        let explaining_df = ctx.sql(sql).await?.explain(false, true).unwrap();
-        let explaining_rb = explaining_df.collect().await?;
+        let base_url = test_table.url();
+        let options = 
HashMap::from_iter(options.iter().cloned().collect::<HashMap<_, _>>());
+        let hudi = HudiDataSource::new(base_url.as_str(), options)
+            .await
+            .unwrap();
+        ctx.register_table(test_table.as_ref(), Arc::new(hudi))
+            .unwrap();
+        ctx
+    }
+
+    async fn verify_plan(
+        ctx: &SessionContext,
+        sql: &str,
+        table_name: &str,
+        planned_input_partitioned: &i32,
+    ) {
+        let explaining_df = ctx.sql(sql).await.unwrap().explain(false, 
true).unwrap();
+        let explaining_rb = explaining_df.collect().await.unwrap();
         let explaining_rb = explaining_rb.first().unwrap();
         let plan = get_str_column(explaining_rb, "plan").join("");
         let plan_lines: Vec<&str> = plan.lines().map(str::trim).collect();
         assert!(plan_lines[2].starts_with("SortExec: TopK(fetch=10)"));
-        assert!(plan_lines[3].starts_with("ProjectionExec: 
expr=[_hoodie_file_name@0 as _hoodie_file_name, id@1 as id, name@2 as name, 
get_field(structField@3, field2) as 
hudi_table_complexkeygen.structField[field2]]"));
+        assert!(plan_lines[3].starts_with(&format!(
+            "ProjectionExec: expr=[id@0 as id, name@1 as name, isActive@2 as 
isActive, \
+            get_field(structField@3, field2) as {}.structField[field2]]",
+            table_name
+        )));
         assert!(plan_lines[5].starts_with(
-            "FilterExec: CAST(id@1 AS Int64) % 2 = 0 AND 
get_field(structField@3, field2) > 30"
+            "FilterExec: CAST(id@0 AS Int64) % 2 = 0 AND 
get_field(structField@3, field2) > 30"
         ));
-        assert!(plan_lines[6].contains("input_partitions=2"));
+        assert!(plan_lines[6].contains(&format!("input_partitions={}", 
planned_input_partitioned)));
+    }
 
-        // verify data
-        let df = ctx.sql(sql).await?;
-        let rb = df.collect().await?;
+    async fn verify_data(ctx: &SessionContext, sql: &str, table_name: &str) {
+        let df = ctx.sql(sql).await.unwrap();
+        let rb = df.collect().await.unwrap();
         let rb = rb.first().unwrap();
-        assert_eq!(
-            get_str_column(rb, "_hoodie_file_name"),
-            &[
-                
"bb7c3a45-387f-490d-aab2-981c3f1a8ada-0_0-140-198_20240418173213674.parquet",
-                
"4668e35e-bff8-4be9-9ff2-e7fb17ecb1a7-0_1-161-224_20240418173235694.parquet"
-            ]
-        );
         assert_eq!(get_i32_column(rb, "id"), &[2, 4]);
         assert_eq!(get_str_column(rb, "name"), &["Bob", "Diana"]);
+        assert_eq!(get_bool_column(rb, "isActive"), &[false, true]);
         assert_eq!(
-            get_i32_column(rb, "hudi_table_complexkeygen.structField[field2]"),
+            get_i32_column(rb, &format!("{}.structField[field2]", table_name)),
             &[40, 50]
         );
-
-        Ok(())
-    }
-
-    fn get_str_column<'a>(record_batch: &'a RecordBatch, name: &str) -> 
Vec<&'a str> {
-        record_batch
-            .column_by_name(name)
-            .unwrap()
-            .as_any()
-            .downcast_ref::<StringArray>()
-            .unwrap()
-            .iter()
-            .map(|s| s.unwrap())
-            .collect::<Vec<_>>()
     }
 
-    fn get_i32_column(record_batch: &RecordBatch, name: &str) -> Vec<i32> {
-        record_batch
-            .column_by_name(name)
-            .unwrap()
-            .as_any()
-            .downcast_ref::<Int32Array>()
-            .unwrap()
-            .iter()
-            .map(|s| s.unwrap())
-            .collect::<Vec<_>>()
+    #[tokio::test]
+    async fn datafusion_read_hudi_table() {
+        for (test_table, planned_input_partitions) in &[
+            (V6ComplexkeygenHivestyle, 2),
+            (V6Nonpartitioned, 1),
+            (V6SimplekeygenNonhivestyle, 2),
+            (V6SimplekeygenHivestyleNoMetafields, 2),
+            (V6TimebasedkeygenNonhivestyle, 2),
+        ] {
+            println!(">>> testing for {}", test_table.as_ref());
+            let ctx = prepare_session_context(
+                test_table,
+                &[(ReadInputPartitions.as_ref().to_string(), "2".to_string())],
+            )
+            .await;
+
+            let sql = format!(
+                r#"
+            SELECT id, name, isActive, structField.field2
+            FROM {} WHERE id % 2 = 0
+            AND structField.field2 > 30 ORDER BY name LIMIT 10"#,
+                test_table.as_ref()
+            );
+
+            verify_plan(&ctx, &sql, test_table.as_ref(), 
planned_input_partitions).await;
+            verify_data(&ctx, &sql, test_table.as_ref()).await
+        }
     }
 }
diff --git a/crates/tests/Cargo.toml b/crates/tests/Cargo.toml
index b6efe85..df9c3af 100644
--- a/crates/tests/Cargo.toml
+++ b/crates/tests/Cargo.toml
@@ -23,6 +23,19 @@ license.workspace = true
 rust-version.workspace = true
 
 [dependencies]
+arrow = { workspace = true }
+arrow-arith = { workspace = true }
+arrow-array = { workspace = true, features = ["chrono-tz"] }
+arrow-buffer = { workspace = true }
+arrow-cast = { workspace = true }
+arrow-ipc = { workspace = true }
+arrow-json = { workspace = true }
+arrow-ord = { workspace = true }
+arrow-row = { workspace = true }
+arrow-schema = { workspace = true, features = ["serde"] }
+arrow-select = { workspace = true }
+strum = { workspace = true }
+strum_macros = { workspace = true }
 tempfile = "3.10.1"
 zip-extract = "0.1.3"
 url = { workspace = true }
diff --git a/crates/tests/data/tables/v6_complexkeygen_hivestyle.datagen.sql 
b/crates/tests/data/tables/v6_complexkeygen_hivestyle.sql
similarity index 100%
rename from crates/tests/data/tables/v6_complexkeygen_hivestyle.datagen.sql
rename to crates/tests/data/tables/v6_complexkeygen_hivestyle.sql
diff --git a/crates/tests/data/tables/v6_nonpartitioned.datagen.sql 
b/crates/tests/data/tables/v6_nonpartitioned.sql
similarity index 100%
rename from crates/tests/data/tables/v6_nonpartitioned.datagen.sql
rename to crates/tests/data/tables/v6_nonpartitioned.sql
diff --git 
a/crates/tests/data/tables/v6_simplekeygen_hivestyle_no_metafields.datagen.sql 
b/crates/tests/data/tables/v6_simplekeygen_hivestyle_no_metafields.sql
similarity index 100%
rename from 
crates/tests/data/tables/v6_simplekeygen_hivestyle_no_metafields.datagen.sql
rename to crates/tests/data/tables/v6_simplekeygen_hivestyle_no_metafields.sql
diff --git a/crates/tests/data/tables/v6_simplekeygen_nonhivestyle.datagen.sql 
b/crates/tests/data/tables/v6_simplekeygen_nonhivestyle.sql
similarity index 100%
rename from crates/tests/data/tables/v6_simplekeygen_nonhivestyle.datagen.sql
rename to crates/tests/data/tables/v6_simplekeygen_nonhivestyle.sql
diff --git a/crates/tests/src/lib.rs b/crates/tests/src/lib.rs
index f94d5fd..22b2f3e 100644
--- a/crates/tests/src/lib.rs
+++ b/crates/tests/src/lib.rs
@@ -21,6 +21,7 @@ use std::fs;
 use std::io::Cursor;
 use std::path::{Path, PathBuf};
 
+use strum_macros::{AsRefStr, EnumIter, EnumString};
 use tempfile::tempdir;
 use url::Url;
 
@@ -29,51 +30,54 @@ pub mod utils;
 pub fn extract_test_table(zip_path: &Path) -> PathBuf {
     let target_dir = tempdir().unwrap().path().to_path_buf();
     let archive = fs::read(zip_path).unwrap();
-    zip_extract::extract(Cursor::new(archive), &target_dir, true).unwrap();
+    zip_extract::extract(Cursor::new(archive), &target_dir, false).unwrap();
     target_dir
 }
 
+#[derive(Debug, EnumString, AsRefStr, EnumIter)]
+#[strum(serialize_all = "snake_case")]
 pub enum TestTable {
     V6ComplexkeygenHivestyle,
     V6Empty,
     V6Nonpartitioned,
+    V6SimplekeygenHivestyleNoMetafields,
+    V6SimplekeygenNonhivestyle,
+    V6TimebasedkeygenNonhivestyle,
 }
 
 impl TestTable {
     pub fn zip_path(&self) -> Box<Path> {
         let dir = env!("CARGO_MANIFEST_DIR");
-        let data_path = Path::new(dir).join("data/tables");
-        match self {
-            Self::V6ComplexkeygenHivestyle => data_path
-                .join("v6_complexkeygen_hivestyle.zip")
-                .into_boxed_path(),
-            Self::V6Empty => data_path.join("v6_empty.zip").into_boxed_path(),
-            Self::V6Nonpartitioned => 
data_path.join("v6_nonpartitioned.zip").into_boxed_path(),
-        }
+        let data_path = Path::new(dir)
+            .join("data/tables")
+            .join(format!("{}.zip", self.as_ref()));
+        data_path.into_boxed_path()
     }
 
     pub fn path(&self) -> String {
         let zip_path = self.zip_path();
-        match self {
-            Self::V6ComplexkeygenHivestyle => extract_test_table(&zip_path)
-                .join("v6_complexkeygen_hivestyle")
-                .to_str()
-                .unwrap()
-                .to_string(),
-            Self::V6Empty => extract_test_table(&zip_path)
-                .join("v6_empty")
-                .to_str()
-                .unwrap()
-                .to_string(),
-            Self::V6Nonpartitioned => extract_test_table(&zip_path)
-                .join("v6_nonpartitioned")
-                .to_str()
-                .unwrap()
-                .to_string(),
-        }
+        let path_buf = 
extract_test_table(zip_path.as_ref()).join(self.as_ref());
+        path_buf.to_str().unwrap().to_string()
     }
 
     pub fn url(&self) -> Url {
-        Url::from_file_path(self.path()).unwrap()
+        let path = self.path();
+        Url::from_file_path(path).unwrap()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use strum::IntoEnumIterator;
+
+    use crate::TestTable;
+
+    #[test]
+    fn test_table_zip_file_should_exist() {
+        for t in TestTable::iter() {
+            let path = t.zip_path();
+            assert!(path.exists());
+            assert!(path.is_file());
+        }
     }
 }
diff --git a/crates/tests/src/utils.rs b/crates/tests/src/utils.rs
index 37e9cfb..0f7d44b 100644
--- a/crates/tests/src/utils.rs
+++ b/crates/tests/src/utils.rs
@@ -17,6 +17,9 @@
  * under the License.
  */
 
+use arrow::record_batch::RecordBatch;
+use arrow_array::{Array, BooleanArray, Int32Array, StringArray};
+
 #[macro_export]
 macro_rules! assert_approx_eq {
     ($a:expr, $b:expr, $delta:expr) => {{
@@ -32,3 +35,39 @@ macro_rules! assert_approx_eq {
         );
     }};
 }
+
+pub fn get_str_column<'a>(record_batch: &'a RecordBatch, name: &str) -> 
Vec<&'a str> {
+    record_batch
+        .column_by_name(name)
+        .unwrap()
+        .as_any()
+        .downcast_ref::<StringArray>()
+        .unwrap()
+        .iter()
+        .map(|s| s.unwrap())
+        .collect::<Vec<_>>()
+}
+
+pub fn get_i32_column(record_batch: &RecordBatch, name: &str) -> Vec<i32> {
+    record_batch
+        .column_by_name(name)
+        .unwrap()
+        .as_any()
+        .downcast_ref::<Int32Array>()
+        .unwrap()
+        .iter()
+        .map(|s| s.unwrap())
+        .collect::<Vec<_>>()
+}
+
+pub fn get_bool_column(record_batch: &RecordBatch, name: &str) -> Vec<bool> {
+    record_batch
+        .column_by_name(name)
+        .unwrap()
+        .as_any()
+        .downcast_ref::<BooleanArray>()
+        .unwrap()
+        .iter()
+        .map(|s| s.unwrap())
+        .collect::<Vec<_>>()
+}

Reply via email to