(datafusion) branch main updated: Speedup sqllogictests by running long running tests first (#20576)

github-bot Mon, 02 Mar 2026 11:17:02 -0800

This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git



The following commit(s) were added to refs/heads/main by this push:
     new 02dae77420 Speedup sqllogictests by running long running tests first 
(#20576)
02dae77420 is described below

commit 02dae7742002d6cdad097911a58fcbc031d0f9ad
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Mar 2 12:57:49 2026 -0500

    Speedup sqllogictests by running long running tests first (#20576)
    
    ## Which issue does this PR close?
    - part of  https://github.com/apache/datafusion/issues/20524
    - Follow on to https://github.com/apache/datafusion/pull/20566 from
    @kosiew
    
    ## Rationale for this change
    
    Our sqllogictests harness runs the queries in a single file serially,
    but runs multiple files in parallel
    
    Right now, the runtime of
    ```shell
    cargo test --profile=ci --test sqllogictests
    ```
    
    Is domninated by a few long running tests -- so the sooner they are
    started, the sooner the overall suite finishes
    
    ## What changes are included in this PR?
    
    Bulding on https://github.com/apache/datafusion/pull/20566 from @kosiew
    adds a heuristic reordering of the tests when run so that the longest
    running are run first
    
    ## Are these changes tested?
    
    By CI and I ran performance tests manually
    
    ### on main
    On main this takes 8 seconds
    
    ```shell
    andrewlamb@Andrews-MacBook-Pro-3:~/Software/datafusion$ cargo test 
--profile=ci --test sqllogictests
        Finished `ci` profile [unoptimized] target(s) in 0.21s
         Running bin/sqllogictests.rs 
(target/ci/deps/sqllogictests-c4e4be8d5c9fd66e)
    Running with 16 test threads (available parallelism: 16)
    Completed 408 test files in 8 seconds
    ```
    
    ## After test split
    After https://github.com/apache/datafusion/pull/20566 it takes 7 seconds
    to complete:
    
    ```shell
    andrewlamb@Andrews-MacBook-Pro-3:~/Software/datafusion$ cargo test 
--profile=ci --test sqllogictests
        Finished `ci` profile [unoptimized] target(s) in 0.20s
         Running bin/sqllogictests.rs 
(target/ci/deps/sqllogictests-c4e4be8d5c9fd66e)
    Running with 16 test threads (available parallelism: 16)
    Completed 411 test files in 7 seconds
    ```
    
    ## This PR
    With this PR it takes 5 seconds:
    ```shell
    andrewlamb@Andrews-MacBook-Pro-3:~/Software/datafusion$ cargo test 
--profile=ci --test sqllogictests
       Compiling datafusion-sqllogictest v52.1.0 
(/Users/andrewlamb/Software/datafusion/datafusion/sqllogictest)
        Finished `ci` profile [unoptimized] target(s) in 1.92s
         Running bin/sqllogictests.rs 
(target/ci/deps/sqllogictests-c4e4be8d5c9fd66e)
    Running with 16 test threads (available parallelism: 16)
    Completed 411 test files in 5 seconds
    ```
    
    This is actually bounded by the time it takes to run the longest test
    `push_down_filter_regression.slt`:
    ```shell
    andrewlamb@Andrews-MacBook-Pro-3:~/Software/datafusion$ cargo test 
--profile=ci --test sqllogictests -- push_down_filter_regression.slt
        Finished `ci` profile [unoptimized] target(s) in 0.20s
         Running bin/sqllogictests.rs 
(target/ci/deps/sqllogictests-c4e4be8d5c9fd66e)
    Running with 16 test threads (available parallelism: 16)
    Completed 1 test files in 5 seconds
    ```
    
    
    ## Are there any user-facing changes?
    
    <!--
    If there are user-facing changes then we may require documentation to be
    updated before approving the PR.
    -->
    
    <!--
    If there are any breaking changes to public APIs, please add the `api
    change` label.
    -->
---
 datafusion/sqllogictest/bin/sqllogictests.rs | 71 ++++++++++++++++++++++++++--
 1 file changed, 68 insertions(+), 3 deletions(-)

diff --git a/datafusion/sqllogictest/bin/sqllogictests.rs 
b/datafusion/sqllogictest/bin/sqllogictests.rs
index e067f2488d..b5a382ca6a 100644
--- a/datafusion/sqllogictest/bin/sqllogictests.rs
+++ b/datafusion/sqllogictest/bin/sqllogictests.rs
@@ -18,7 +18,9 @@
 use clap::{ColorChoice, Parser, ValueEnum};
 use datafusion::common::instant::Instant;
 use datafusion::common::utils::get_available_parallelism;
-use datafusion::common::{DataFusionError, Result, exec_datafusion_err, 
exec_err};
+use datafusion::common::{
+    DataFusionError, HashMap, Result, exec_datafusion_err, exec_err,
+};
 use datafusion_sqllogictest::{
     CurrentlyExecutingSqlTracker, DataFusion, DataFusionSubstraitRoundTrip, 
Filter,
     TestContext, df_value_validator, read_dir_recursive, setup_scratch_dir,
@@ -47,8 +49,8 @@ use std::fs;
 use std::io::{IsTerminal, stderr, stdout};
 use std::path::{Path, PathBuf};
 use std::str::FromStr;
-use std::sync::Arc;
 use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::{Arc, LazyLock};
 use std::time::Duration;
 
 #[cfg(feature = "postgres")]
@@ -75,6 +77,55 @@ struct FileTiming {
     elapsed: Duration,
 }
 
+/// TEST PRIORITY
+///
+/// Heuristically prioritize some test to run earlier.
+///
+/// Prioritizes test to run earlier if they are known to be long running (as
+/// each test file itself is run sequentially, but multiple test files are run
+/// in parallel.
+///
+/// Tests not listed here will run after the listed tests in an arbitrary 
order.
+///
+/// You can find the top longest running tests by running `--timing-summary` 
mode.
+/// For example
+///
+/// ```shell
+/// $  cargo test --profile=ci  --test sqllogictests -- --timing-summary top
+/// ...
+/// Per-file elapsed summary (deterministic):
+/// 1.    5.375s  push_down_filter_regression.slt
+/// 2.    3.174s  aggregate.slt
+/// 3.    3.158s  imdb.slt
+/// 4.    2.793s  joins.slt
+/// 5.    2.505s  array.slt
+/// 6.    2.265s  aggregate_skip_partial.slt
+/// 7.    2.260s  window.slt
+/// 8.    1.677s  group_by.slt
+/// 9.    0.973s  datetime/timestamps.slt
+/// 10.    0.822s  cte.slt
+/// ```
+static TEST_PRIORITY: LazyLock<HashMap<PathBuf, usize>> = LazyLock::new(|| {
+    [
+        (PathBuf::from("push_down_filter_regression.slt"), 0), // longest 
running, so run first.
+        (PathBuf::from("aggregate.slt"), 1),
+        (PathBuf::from("joins.slt"), 2),
+        (PathBuf::from("imdb.slt"), 3),
+        (PathBuf::from("array.slt"), 4),
+        (PathBuf::from("aggregate_skip_partial.slt"), 5),
+        (PathBuf::from("window.slt"), 6),
+        (PathBuf::from("group_by.slt"), 7),
+        (PathBuf::from("datetime/timestamps.slt"), 8),
+        (PathBuf::from("cte.slt"), 9),
+    ]
+    .into_iter()
+    .collect()
+});
+
+/// Default priority for tests not in the TEST_PRIORITY map. Tests with lower
+/// priority values run first.
+static DEFAULT_PRIORITY: usize = 100;
+
 pub fn main() -> Result<()> {
     tokio::runtime::Builder::new_multi_thread()
         .enable_all()
@@ -851,7 +902,21 @@ fn read_test_files(options: &Options) -> 
Result<Vec<TestFile>> {
         paths.append(&mut sqlite_paths)
     }
 
-    Ok(paths)
+    Ok(sort_tests(paths))
+}
+
+/// Sort the tests heuristically by order of "priority"
+///
+/// Prioritizes test to run earlier if they are known to be long running (as
+/// each test file itself is run sequentially, but multiple test files are run
+/// in parallel.
+fn sort_tests(mut tests: Vec<TestFile>) -> Vec<TestFile> {
+    tests.sort_by_key(|f| {
+        TEST_PRIORITY
+            .get(&f.relative_path)
+            .unwrap_or(&DEFAULT_PRIORITY)
+    });
+    tests
 }
 
 /// Parsed command line options


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion) branch main updated: Speedup sqllogictests by running long running tests first (#20576)

Reply via email to