This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new c8d26ba012 feat: Support Show runtime settings (#18564)
c8d26ba012 is described below

commit c8d26ba012471e6aece9430642d6a8a923bc344c
Author: Alex Huang <[email protected]>
AuthorDate: Sat Nov 22 09:45:56 2025 +0200

    feat: Support Show runtime settings (#18564)
    
    ## Which issue does this PR close?
    
    - Closes #18452
    
    ## Rationale for this change
    
    Previously, the `SHOW` command could not display runtime configuration
    settings such as `datafusion.runtime.memory_limit`,
    `datafusion.runtime.max_temp_directory_size`, etc. When users attempted
    to view these settings, they would receive an error:
    
    Error during planning: 'datafusion.runtime.memory_limit' is not a
    variable which can be viewed with 'SHOW'
    
    This PR enables users to query runtime settings using the `SHOW`
    command, making them accessible in the same way as other DataFusion
    configuration variables.
    
    ## What changes are included in this PR?
    
    1. **Added `config_entries()` method to `RuntimeEnv`** - Returns runtime
    configuration as `ConfigEntry` objects with human-readable values (e.g.,
    "100M", "unlimited")
    
    2. **Exposed getter methods in `DiskManager`** - Added
    `max_temp_directory_size()` and `temp_dir_paths()` to access disk
    manager configuration
    
    3. **Updated `InformationSchema`** - Modified `make_df_settings()` to
    include runtime configuration entries from `RuntimeEnv`
    
    4. **Modified SQL statement validation** - Added check for
    `datafusion.runtime.*` variables to allow SHOW command
    
    
    ## Are these changes tested?
    
    Yes, comprehensive tests have been added:
    
    - **set_variable.slt** - Tests SHOW and SET for all 4 runtime variables
    (memory_limit, max_temp_directory_size, metadata_cache_limit,
    temp_directory)
    - **information_schema.slt** - Verifies runtime variables appear in
    `information_schema.df_settings`
    - Tests cover default values, setting custom values, and querying via
    information_schema
    
    ## Are there any user-facing changes?
    
    Yes, users can now:
    
    **View runtime settings with SHOW**:
       ```sql
       SHOW datafusion.runtime.memory_limit;
       -- Returns: datafusion.runtime.memory_limit | unlimited
---
 datafusion/catalog/src/information_schema.rs       |  13 ++-
 datafusion/execution/src/disk_manager.rs           |  18 +++
 datafusion/execution/src/runtime_env.rs            | 121 +++++++++++++++++----
 datafusion/sql/src/statement.rs                    |   5 +-
 .../sqllogictest/test_files/information_schema.slt |   8 ++
 .../sqllogictest/test_files/set_variable.slt       |  49 +++++++++
 6 files changed, 190 insertions(+), 24 deletions(-)

diff --git a/datafusion/catalog/src/information_schema.rs 
b/datafusion/catalog/src/information_schema.rs
index 526a1f0446..35f07084e3 100644
--- a/datafusion/catalog/src/information_schema.rs
+++ b/datafusion/catalog/src/information_schema.rs
@@ -32,6 +32,7 @@ use datafusion_common::config::{ConfigEntry, ConfigOptions};
 use datafusion_common::error::Result;
 use datafusion_common::types::NativeType;
 use datafusion_common::DataFusionError;
+use datafusion_execution::runtime_env::RuntimeEnv;
 use datafusion_execution::TaskContext;
 use datafusion_expr::{AggregateUDF, ScalarUDF, Signature, TypeSignature, 
WindowUDF};
 use datafusion_expr::{TableType, Volatility};
@@ -215,11 +216,16 @@ impl InformationSchemaConfig {
     fn make_df_settings(
         &self,
         config_options: &ConfigOptions,
+        runtime_env: &Arc<RuntimeEnv>,
         builder: &mut InformationSchemaDfSettingsBuilder,
     ) {
         for entry in config_options.entries() {
             builder.add_setting(entry);
         }
+        // Add runtime configuration entries
+        for entry in runtime_env.config_entries() {
+            builder.add_setting(entry);
+        }
     }
 
     fn make_routines(
@@ -1060,7 +1066,12 @@ impl PartitionStream for InformationSchemaDfSettings {
             // TODO: Stream this
             futures::stream::once(async move {
                 // create a mem table with the names of tables
-                config.make_df_settings(ctx.session_config().options(), &mut 
builder);
+                let runtime_env = ctx.runtime_env();
+                config.make_df_settings(
+                    ctx.session_config().options(),
+                    &runtime_env,
+                    &mut builder,
+                );
                 Ok(builder.finish())
             }),
         ))
diff --git a/datafusion/execution/src/disk_manager.rs 
b/datafusion/execution/src/disk_manager.rs
index b0fdb2d74f..eb7b643a69 100644
--- a/datafusion/execution/src/disk_manager.rs
+++ b/datafusion/execution/src/disk_manager.rs
@@ -246,6 +246,24 @@ impl DiskManager {
         self.used_disk_space.load(Ordering::Relaxed)
     }
 
+    /// Returns the maximum temporary directory size in bytes
+    pub fn max_temp_directory_size(&self) -> u64 {
+        self.max_temp_directory_size
+    }
+
+    /// Returns the temporary directory paths
+    pub fn temp_dir_paths(&self) -> Vec<PathBuf> {
+        self.local_dirs
+            .lock()
+            .as_ref()
+            .map(|dirs| {
+                dirs.iter()
+                    .map(|temp_dir| temp_dir.path().to_path_buf())
+                    .collect()
+            })
+            .unwrap_or_default()
+    }
+
     /// Return true if this disk manager supports creating temporary
     /// files. If this returns false, any call to `create_tmp_file`
     /// will error.
diff --git a/datafusion/execution/src/runtime_env.rs 
b/datafusion/execution/src/runtime_env.rs
index d699876008..abcbe897d2 100644
--- a/datafusion/execution/src/runtime_env.rs
+++ b/datafusion/execution/src/runtime_env.rs
@@ -91,6 +91,41 @@ impl Debug for RuntimeEnv {
     }
 }
 
+/// Creates runtime configuration entries with the provided values
+///
+/// This helper function defines the structure and metadata for all runtime 
configuration
+/// entries to avoid duplication between `RuntimeEnv::config_entries()` and
+/// `RuntimeEnvBuilder::entries()`.
+fn create_runtime_config_entries(
+    memory_limit: Option<String>,
+    max_temp_directory_size: Option<String>,
+    temp_directory: Option<String>,
+    metadata_cache_limit: Option<String>,
+) -> Vec<ConfigEntry> {
+    vec![
+        ConfigEntry {
+            key: "datafusion.runtime.memory_limit".to_string(),
+            value: memory_limit,
+            description: "Maximum memory limit for query execution. Supports 
suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 
gigabytes.",
+        },
+        ConfigEntry {
+            key: "datafusion.runtime.max_temp_directory_size".to_string(),
+            value: max_temp_directory_size,
+            description: "Maximum temporary file directory size. Supports 
suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 
gigabytes.",
+        },
+        ConfigEntry {
+            key: "datafusion.runtime.temp_directory".to_string(),
+            value: temp_directory,
+            description: "The path to the temporary file directory.",
+        },
+        ConfigEntry {
+            key: "datafusion.runtime.metadata_cache_limit".to_string(),
+            value: metadata_cache_limit,
+            description: "Maximum memory to use for file metadata cache such 
as Parquet metadata. Supports suffixes K (kilobytes), M (megabytes), and G 
(gigabytes). Example: '2G' for 2 gigabytes.",
+        }
+    ]
+}
+
 impl RuntimeEnv {
     /// Registers a custom `ObjectStore` to be used with a specific url.
     /// This allows DataFusion to create external tables from urls that do not 
have
@@ -173,6 +208,64 @@ impl RuntimeEnv {
     ) -> Result<Arc<dyn EncryptionFactory>> {
         self.parquet_encryption_factory_registry.get_factory(id)
     }
+
+    /// Returns the current runtime configuration entries
+    pub fn config_entries(&self) -> Vec<ConfigEntry> {
+        use crate::memory_pool::MemoryLimit;
+
+        /// Convert bytes to a human-readable format
+        fn format_byte_size(size: u64) -> String {
+            const GB: u64 = 1024 * 1024 * 1024;
+            const MB: u64 = 1024 * 1024;
+            const KB: u64 = 1024;
+
+            match size {
+                s if s >= GB => format!("{}G", s / GB),
+                s if s >= MB => format!("{}M", s / MB),
+                s if s >= KB => format!("{}K", s / KB),
+                s => format!("{s}"),
+            }
+        }
+
+        let memory_limit_value = match self.memory_pool.memory_limit() {
+            MemoryLimit::Finite(size) => Some(format_byte_size(
+                size.try_into()
+                    .expect("Memory limit size conversion failed"),
+            )),
+            MemoryLimit::Infinite => Some("unlimited".to_string()),
+            MemoryLimit::Unknown => None,
+        };
+
+        let max_temp_dir_size = self.disk_manager.max_temp_directory_size();
+        let max_temp_dir_value = format_byte_size(max_temp_dir_size);
+
+        let temp_paths = self.disk_manager.temp_dir_paths();
+        let temp_dir_value = if temp_paths.is_empty() {
+            None
+        } else {
+            Some(
+                temp_paths
+                    .iter()
+                    .map(|p| p.display().to_string())
+                    .collect::<Vec<_>>()
+                    .join(","),
+            )
+        };
+
+        let metadata_cache_limit = 
self.cache_manager.get_metadata_cache_limit();
+        let metadata_cache_value = format_byte_size(
+            metadata_cache_limit
+                .try_into()
+                .expect("Metadata cache size conversion failed"),
+        );
+
+        create_runtime_config_entries(
+            memory_limit_value,
+            Some(max_temp_dir_value),
+            temp_dir_value,
+            Some(metadata_cache_value),
+        )
+    }
 }
 
 impl Default for RuntimeEnv {
@@ -359,28 +452,12 @@ impl RuntimeEnvBuilder {
 
     /// Returns a list of all available runtime configurations with their 
current values and descriptions
     pub fn entries(&self) -> Vec<ConfigEntry> {
-        vec![
-            ConfigEntry {
-                key: "datafusion.runtime.memory_limit".to_string(),
-                value: None, // Default is system-dependent
-                description: "Maximum memory limit for query execution. 
Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: 
'2G' for 2 gigabytes.",
-            },
-            ConfigEntry {
-                key: "datafusion.runtime.max_temp_directory_size".to_string(),
-                value: Some("100G".to_string()),
-                description: "Maximum temporary file directory size. Supports 
suffixes K (kilobytes), M (megabytes), and G (gigabytes). Example: '2G' for 2 
gigabytes.",
-            },
-            ConfigEntry {
-                key: "datafusion.runtime.temp_directory".to_string(),
-                value: None, // Default is system-dependent
-                description: "The path to the temporary file directory.",
-            },
-            ConfigEntry {
-                key: "datafusion.runtime.metadata_cache_limit".to_string(),
-                value: Some("50M".to_owned()),
-                description: "Maximum memory to use for file metadata cache 
such as Parquet metadata. Supports suffixes K (kilobytes), M (megabytes), and G 
(gigabytes). Example: '2G' for 2 gigabytes.",
-            }
-        ]
+        create_runtime_config_entries(
+            None,
+            Some("100G".to_string()),
+            None,
+            Some("50M".to_owned()),
+        )
     }
 
     /// Generate documentation that can be included in the user guide
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 91e11a4b65..e89ddaa349 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -1869,7 +1869,10 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 .iter()
                 .any(|opt| opt.key == variable);
 
-            if !is_valid_variable {
+            // Check if it's a runtime variable
+            let is_runtime_variable = 
variable.starts_with("datafusion.runtime.");
+
+            if !is_valid_variable && !is_runtime_variable {
                 return plan_err!(
                     "'{variable}' is not a variable which can be viewed with 
'SHOW'"
                 );
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt 
b/datafusion/sqllogictest/test_files/information_schema.slt
index eba527ed2b..e15163cf6e 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -314,6 +314,10 @@ datafusion.optimizer.repartition_sorts true
 datafusion.optimizer.repartition_windows true
 datafusion.optimizer.skip_failed_rules false
 datafusion.optimizer.top_down_join_key_reordering true
+datafusion.runtime.max_temp_directory_size 100G
+datafusion.runtime.memory_limit unlimited
+datafusion.runtime.metadata_cache_limit 50M
+datafusion.runtime.temp_directory NULL
 datafusion.sql_parser.collect_spans false
 datafusion.sql_parser.default_null_ordering nulls_max
 datafusion.sql_parser.dialect generic
@@ -436,6 +440,10 @@ datafusion.optimizer.repartition_sorts true Should 
DataFusion execute sorts in a
 datafusion.optimizer.repartition_windows true Should DataFusion repartition 
data using the partitions keys to execute window functions in parallel using 
the provided `target_partitions` level
 datafusion.optimizer.skip_failed_rules false When set to true, the logical 
plan optimizer will produce warning messages if any optimization rules produce 
errors and then proceed to the next rule. When set to false, any rules that 
produce errors will cause the query to fail
 datafusion.optimizer.top_down_join_key_reordering true When set to true, the 
physical plan optimizer will run a top down process to reorder the join keys
+datafusion.runtime.max_temp_directory_size 100G Maximum temporary file 
directory size. Supports suffixes K (kilobytes), M (megabytes), and G 
(gigabytes). Example: '2G' for 2 gigabytes.
+datafusion.runtime.memory_limit unlimited Maximum memory limit for query 
execution. Supports suffixes K (kilobytes), M (megabytes), and G (gigabytes). 
Example: '2G' for 2 gigabytes.
+datafusion.runtime.metadata_cache_limit 50M Maximum memory to use for file 
metadata cache such as Parquet metadata. Supports suffixes K (kilobytes), M 
(megabytes), and G (gigabytes). Example: '2G' for 2 gigabytes.
+datafusion.runtime.temp_directory NULL The path to the temporary file 
directory.
 datafusion.sql_parser.collect_spans false When set to true, the source 
locations relative to the original SQL query (i.e. 
[`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html))
 will be collected and recorded in the logical plan nodes.
 datafusion.sql_parser.default_null_ordering nulls_max Specifies the default 
null ordering for query results. There are 4 options: - `nulls_max`: Nulls 
appear last in ascending order. - `nulls_min`: Nulls appear first in ascending 
order. - `nulls_first`: Nulls always be first in any order. - `nulls_last`: 
Nulls always be last in any order. By default, `nulls_max` is used to follow 
Postgres's behavior. postgres rule: 
<https://www.postgresql.org/docs/current/queries-order.html>
 datafusion.sql_parser.dialect generic Configure the SQL dialect used by 
DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, 
Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB 
and Databricks.
diff --git a/datafusion/sqllogictest/test_files/set_variable.slt 
b/datafusion/sqllogictest/test_files/set_variable.slt
index b11a631613..82bd71d72b 100644
--- a/datafusion/sqllogictest/test_files/set_variable.slt
+++ b/datafusion/sqllogictest/test_files/set_variable.slt
@@ -366,3 +366,52 @@ RESET datafusion.execution.batches_size
 # reset invalid variable - extra suffix on valid field
 statement error DataFusion error: Invalid or Unsupported Configuration: Config 
field is a scalar usize and does not have nested field "bar"
 RESET datafusion.execution.batch_size.bar
+
+############
+## Test runtime configuration variables
+############
+
+# Test SHOW runtime.memory_limit (default value)
+query TT
+SHOW datafusion.runtime.memory_limit
+----
+datafusion.runtime.memory_limit unlimited
+
+# Test SET and SHOW runtime.memory_limit
+statement ok
+SET datafusion.runtime.memory_limit = '100M'
+
+query TT
+SHOW datafusion.runtime.memory_limit
+----
+datafusion.runtime.memory_limit 100M
+
+# Test SET and SHOW runtime.max_temp_directory_size
+statement ok
+SET datafusion.runtime.max_temp_directory_size = '10G'
+
+query TT
+SHOW datafusion.runtime.max_temp_directory_size
+----
+datafusion.runtime.max_temp_directory_size 10G
+
+# Test SET and SHOW runtime.metadata_cache_limit
+statement ok
+SET datafusion.runtime.metadata_cache_limit = '200M'
+
+query TT
+SHOW datafusion.runtime.metadata_cache_limit
+----
+datafusion.runtime.metadata_cache_limit 200M
+
+# Note: runtime.temp_directory shows the actual temp directory path with a 
unique suffix,
+# so we cannot test the exact value. We verify it exists in information_schema 
instead.
+
+# Test that all runtime variables appear in information_schema.df_settings
+query T
+SELECT name FROM information_schema.df_settings WHERE name LIKE 
'datafusion.runtime.%' ORDER BY name
+----
+datafusion.runtime.max_temp_directory_size
+datafusion.runtime.memory_limit
+datafusion.runtime.metadata_cache_limit
+datafusion.runtime.temp_directory


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to