This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 283c4b51d3 Document and `scratch` directory for sqllogictest and make
test specific (#7312)
283c4b51d3 is described below
commit 283c4b51d3afab7abf63407e7d2f2a8cb60869a0
Author: Andrew Lamb <[email protected]>
AuthorDate: Thu Aug 24 06:25:03 2023 -0400
Document and `scratch` directory for sqllogictest and make test specific
(#7312)
* Document and `scratch` directory for sqllogictest and make test specific
* Fix name
* Update test pats
* Apply suggestions from code review
Co-authored-by: Metehan Yıldırım
<[email protected]>
---------
Co-authored-by: Metehan Yıldırım
<[email protected]>
---
datafusion/sqllogictest/README.md | 30 ++++++++++++++----
datafusion/sqllogictest/bin/sqllogictests.rs | 17 +++++-----
datafusion/sqllogictest/test_files/copy.slt | 36 +++++++++++-----------
.../sqllogictest/test_files/insert_to_external.slt | 24 +++++++--------
4 files changed, 64 insertions(+), 43 deletions(-)
diff --git a/datafusion/sqllogictest/README.md
b/datafusion/sqllogictest/README.md
index 1f69bb864e..3e94859d35 100644
--- a/datafusion/sqllogictest/README.md
+++ b/datafusion/sqllogictest/README.md
@@ -177,14 +177,32 @@ You can update the tests / generate expected output by
passing the `--complete`
cargo test --test sqllogictests -- ddl --complete
```
-#### sqllogictests
+#### Running tests: `scratchdir`
-sqllogictest is a program originally written for SQLite to verify the
correctness of SQL queries against the SQLite
-engine. The program is engine-agnostic and can parse sqllogictest files
(`.slt`), runs queries against an SQL engine and
-compare the output to the expected output.
+The DataFusion sqllogictest runner automatically creates a directory
+named `test_files/scratch/<filename>`, creating it if needed and
+clearing any file contents if it exists.
-Tests in the `.slt` file are a sequence of query record generally starting
with `CREATE` statements to populate tables
-and then further queries to test the populated data (arrow-datafusion
exception).
+For example, the `test_files/copy.slt` file should use scratch
+directory `test_files/scratch/copy`.
+
+Tests that need to write temporary files should write (only) to this
+directory to ensure they do not interfere with others concurrently
+running tests.
+
+#### `.slt` file format
+
+[`sqllogictest`] was originally written for SQLite to verify the
+correctness of SQL queries against the SQLite engine. The format is designed
+engine-agnostic and can parse sqllogictest files (`.slt`), runs
+queries against an SQL engine and compares the output to the expected
+output.
+
+[`sqllogictest`]: https://www.sqlite.org/sqllogictest/doc/trunk/about.wiki
+
+Tests in the `.slt` file are a sequence of query records generally
+starting with `CREATE` statements to populate tables and then further
+queries to test the populated data.
Each `.slt` file runs in its own, isolated `SessionContext`, to make the test
setup explicit and so they can run in
parallel. Thus it important to keep the tests from having externally visible
side effects (like writing to a global
diff --git a/datafusion/sqllogictest/bin/sqllogictests.rs
b/datafusion/sqllogictest/bin/sqllogictests.rs
index fe38a9d8f8..618e3106c6 100644
--- a/datafusion/sqllogictest/bin/sqllogictests.rs
+++ b/datafusion/sqllogictest/bin/sqllogictests.rs
@@ -55,17 +55,21 @@ pub async fn main() -> Result<()> {
run_tests().await
}
-/// Sets up an empty directory at test_files/scratch
+/// Sets up an empty directory at test_files/scratch/<name>
/// creating it if needed and clearing any file contents if it exists
/// This allows tests for inserting to external tables or copy to
/// to persist data to disk and have consistent state when running
/// a new test
-fn setup_scratch_dir() -> Result<()> {
- let path = std::path::Path::new("test_files/scratch");
+fn setup_scratch_dir(name: &Path) -> Result<()> {
+ // go from copy.slt --> copy
+ let file_stem = name.file_stem().expect("File should have a stem");
+ let path = PathBuf::from("test_files").join("scratch").join(file_stem);
+
+ info!("Creating scratch dir in {path:?}");
if path.exists() {
- fs::remove_dir_all(path)?;
+ fs::remove_dir_all(&path)?;
}
- fs::create_dir(path)?;
+ fs::create_dir_all(&path)?;
Ok(())
}
@@ -73,8 +77,6 @@ async fn run_tests() -> Result<()> {
// Enable logging (e.g. set RUST_LOG=debug to see debug logs)
env_logger::init();
- setup_scratch_dir()?;
-
let options = Options::new();
// Run all tests in parallel, reporting failures at the end
@@ -135,6 +137,7 @@ async fn run_test_file(test_file: TestFile) -> Result<()> {
info!("Skipping: {}", path.display());
return Ok(());
};
+ setup_scratch_dir(&relative_path)?;
let mut runner = sqllogictest::Runner::new(|| async {
Ok(DataFusion::new(
test_ctx.session_ctx().clone(),
diff --git a/datafusion/sqllogictest/test_files/copy.slt
b/datafusion/sqllogictest/test_files/copy.slt
index a44a662ada..d13caa47c1 100644
--- a/datafusion/sqllogictest/test_files/copy.slt
+++ b/datafusion/sqllogictest/test_files/copy.slt
@@ -21,19 +21,19 @@ create table source_table(col1 integer, col2 varchar) as
values (1, 'Foo'), (2,
# Copy to directory as multiple files
query IT
-COPY source_table TO 'test_files/scratch/table' (format parquet,
per_thread_output true);
+COPY source_table TO 'test_files/scratch/copy/table' (format parquet,
per_thread_output true);
----
2
# Error case
query error DataFusion error: Error during planning: Copy To format not
explicitly set and unable to get file extension!
-EXPLAIN COPY source_table to 'test_files/scratch/table'
+EXPLAIN COPY source_table to 'test_files/scratch/copy/table'
query TT
-EXPLAIN COPY source_table to 'test_files/scratch/table' (format parquet,
per_thread_output true)
+EXPLAIN COPY source_table to 'test_files/scratch/copy/table' (format parquet,
per_thread_output true)
----
logical_plan
-CopyTo: format=parquet output_url=test_files/scratch/table
per_thread_output=true options: (format parquet, per_thread_output true)
+CopyTo: format=parquet output_url=test_files/scratch/copy/table
per_thread_output=true options: (format parquet, per_thread_output true)
--TableScan: source_table projection=[col1, col2]
physical_plan
InsertExec: sink=ParquetSink(writer_mode=PutMultipart, file_groups=[])
@@ -41,13 +41,13 @@ InsertExec: sink=ParquetSink(writer_mode=PutMultipart,
file_groups=[])
# Copy more files to directory via query
query IT
-COPY (select * from source_table UNION ALL select * from source_table) to
'test_files/scratch/table' (format parquet, per_thread_output true);
+COPY (select * from source_table UNION ALL select * from source_table) to
'test_files/scratch/copy/table' (format parquet, per_thread_output true);
----
4
# validate multiple parquet file output
statement ok
-CREATE EXTERNAL TABLE validate_parquet STORED AS PARQUET LOCATION
'test_files/scratch/table/';
+CREATE EXTERNAL TABLE validate_parquet STORED AS PARQUET LOCATION
'test_files/scratch/copy/table/';
query IT
select * from validate_parquet;
@@ -61,13 +61,13 @@ select * from validate_parquet;
# Copy from table to single file
query IT
-COPY source_table to 'test_files/scratch/table.parquet';
+COPY source_table to 'test_files/scratch/copy/table.parquet';
----
2
# validate single parquet file output
statement ok
-CREATE EXTERNAL TABLE validate_parquet_single STORED AS PARQUET LOCATION
'test_files/scratch/table.parquet';
+CREATE EXTERNAL TABLE validate_parquet_single STORED AS PARQUET LOCATION
'test_files/scratch/copy/table.parquet';
query IT
select * from validate_parquet_single;
@@ -77,13 +77,13 @@ select * from validate_parquet_single;
# copy from table to folder of csv files
query IT
-COPY source_table to 'test_files/scratch/table_csv' (format csv,
per_thread_output true);
+COPY source_table to 'test_files/scratch/copy/table_csv' (format csv,
per_thread_output true);
----
2
# validate folder of csv files
statement ok
-CREATE EXTERNAL TABLE validate_csv STORED AS csv WITH HEADER ROW LOCATION
'test_files/scratch/table_csv';
+CREATE EXTERNAL TABLE validate_csv STORED AS csv WITH HEADER ROW LOCATION
'test_files/scratch/copy/table_csv';
query IT
select * from validate_csv;
@@ -93,13 +93,13 @@ select * from validate_csv;
# Copy from table to single csv
query IT
-COPY source_table to 'test_files/scratch/table.csv';
+COPY source_table to 'test_files/scratch/copy/table.csv';
----
2
# Validate single csv output
statement ok
-CREATE EXTERNAL TABLE validate_single_csv STORED AS csv WITH HEADER ROW
LOCATION 'test_files/scratch/table.csv';
+CREATE EXTERNAL TABLE validate_single_csv STORED AS csv WITH HEADER ROW
LOCATION 'test_files/scratch/copy/table.csv';
query IT
select * from validate_single_csv;
@@ -109,13 +109,13 @@ select * from validate_single_csv;
# Copy from table to folder of json
query IT
-COPY source_table to 'test_files/scratch/table_json' (format json,
per_thread_output true);
+COPY source_table to 'test_files/scratch/copy/table_json' (format json,
per_thread_output true);
----
2
# Validate json output
statement ok
-CREATE EXTERNAL TABLE validate_json STORED AS json LOCATION
'test_files/scratch/table_json';
+CREATE EXTERNAL TABLE validate_json STORED AS json LOCATION
'test_files/scratch/copy/table_json';
query IT
select * from validate_json;
@@ -125,13 +125,13 @@ select * from validate_json;
# Copy from table to single json file
query IT
-COPY source_table to 'test_files/scratch/table.json';
+COPY source_table to 'test_files/scratch/copy/table.json';
----
2
# Validate single JSON file`
statement ok
-CREATE EXTERNAL TABLE validate_single_json STORED AS json LOCATION
'test_files/scratch/table_json';
+CREATE EXTERNAL TABLE validate_single_json STORED AS json LOCATION
'test_files/scratch/copy/table_json';
query IT
select * from validate_single_json;
@@ -141,13 +141,13 @@ select * from validate_single_json;
# Copy from table with options
query IT
-COPY source_table to 'test_files/scratch/table.json' (row_group_size 55);
+COPY source_table to 'test_files/scratch/copy/table.json' (row_group_size 55);
----
2
# Copy from table with options (and trailing comma)
query IT
-COPY source_table to 'test_files/scratch/table.json' (row_group_size 55,
row_group_limit_bytes 9,);
+COPY source_table to 'test_files/scratch/copy/table.json' (row_group_size 55,
row_group_limit_bytes 9,);
----
2
diff --git a/datafusion/sqllogictest/test_files/insert_to_external.slt
b/datafusion/sqllogictest/test_files/insert_to_external.slt
index b2ee5468e7..a29c230a46 100644
--- a/datafusion/sqllogictest/test_files/insert_to_external.slt
+++ b/datafusion/sqllogictest/test_files/insert_to_external.slt
@@ -49,7 +49,7 @@ statement ok
CREATE EXTERNAL TABLE
single_file_test(a bigint, b bigint)
STORED AS csv
-LOCATION 'test_files/scratch/single_csv_table.csv'
+LOCATION 'test_files/scratch/insert_to_external/single_csv_table.csv'
OPTIONS(
create_local_path 'true',
single_file 'true',
@@ -70,7 +70,7 @@ statement ok
CREATE EXTERNAL TABLE
directory_test(a bigint, b bigint)
STORED AS parquet
-LOCATION 'test_files/scratch/external_parquet_table_q0'
+LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q0'
OPTIONS(
create_local_path 'true',
);
@@ -87,10 +87,10 @@ select * from directory_test;
3 4
statement ok
-CREATE EXTERNAL TABLE
+CREATE EXTERNAL TABLE
table_without_values(field1 BIGINT NULL, field2 BIGINT NULL)
STORED AS parquet
-LOCATION 'test_files/scratch/external_parquet_table_q1'
+LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q1'
OPTIONS (create_local_path 'true');
query TT
@@ -153,10 +153,10 @@ drop table table_without_values;
# test_insert_into_as_select_multi_partitioned
statement ok
-CREATE EXTERNAL TABLE
+CREATE EXTERNAL TABLE
table_without_values(field1 BIGINT NULL, field2 BIGINT NULL)
STORED AS parquet
-LOCATION 'test_files/scratch/external_parquet_table_q2'
+LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q2'
OPTIONS (create_local_path 'true');
query TT
@@ -197,10 +197,10 @@ drop table table_without_values;
# test_insert_into_with_sort
statement ok
-CREATE EXTERNAL TABLE
+CREATE EXTERNAL TABLE
table_without_values(c1 varchar NULL)
STORED AS parquet
-LOCATION 'test_files/scratch/external_parquet_table_q3'
+LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q3'
OPTIONS (create_local_path 'true');
# verify that the sort order of the insert query is maintained into the
@@ -237,10 +237,10 @@ drop table table_without_values;
# test insert with column names
statement ok
-CREATE EXTERNAL TABLE
+CREATE EXTERNAL TABLE
table_without_values(id BIGINT, name varchar)
STORED AS parquet
-LOCATION 'test_files/scratch/external_parquet_table_q4'
+LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q4'
OPTIONS (create_local_path 'true');
query IT
@@ -276,10 +276,10 @@ drop table table_without_values;
# test insert with non-nullable column
statement ok
-CREATE EXTERNAL TABLE
+CREATE EXTERNAL TABLE
table_without_values(field1 BIGINT NOT NULL, field2 BIGINT NULL)
STORED AS parquet
-LOCATION 'test_files/scratch/external_parquet_table_q5'
+LOCATION 'test_files/scratch/insert_to_external/external_parquet_table_q5'
OPTIONS (create_local_path 'true');
query II