This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 969fc138dd chore: Format examples in doc strings - spark, sql,
sqllogictest, sibstrait (#18443)
969fc138dd is described below
commit 969fc138dd7c109f7348b0fd0830239f8819d858
Author: Yu-Chuan Hung <[email protected]>
AuthorDate: Sat Nov 8 06:37:36 2025 +0800
chore: Format examples in doc strings - spark, sql, sqllogictest, sibstrait
(#18443)
## Which issue does this PR close?
Part of #16915
## Rationale for this change
Format code examples in documentation comments to improve readability
and maintain consistent code style across the codebase. This is part of
a multi-PR effort to format all doc comment examples and eventually
enable CI checks to enforce this formatting.
## What changes are included in this PR?
Run `cargo fmt -p <crate> -- --config format_code_in_doc_comments=true`
for the following datasource-related crates:
- `datafusion-spark`
- `datafusion-sql`
- `datafusion-sqllogictest`
- `datafusion-substrait`
- `datafusion-cli`
- `datafusion-examples`
## Are these changes tested?
No testing needed - this is purely a formatting change with no
functional modifications.
## Are there any user-facing changes?
No - this only affects documentation formatting.
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
.../examples/advanced_parquet_index.rs | 1 -
.../examples/external_dependency/query-aws-s3.rs | 1 -
datafusion-examples/examples/flight/sql_server.rs | 1 -
datafusion-examples/examples/parquet_index.rs | 1 -
datafusion-examples/examples/sql_query.rs | 1 -
datafusion-examples/examples/thread_pools.rs | 2 +-
datafusion/spark/src/function/bitwise/bit_shift.rs | 3 ---
datafusion/spark/src/function/url/parse_url.rs | 2 --
datafusion/spark/src/lib.rs | 2 +-
datafusion/sql/src/parser.rs | 13 +++++------
datafusion/sql/src/planner.rs | 1 -
datafusion/sql/src/resolve.rs | 12 +++++-----
datafusion/sql/src/unparser/expr.rs | 5 ++--
datafusion/sql/src/unparser/plan.rs | 8 +++++--
datafusion/sql/src/unparser/rewrite.rs | 1 -
datafusion/sql/src/utils.rs | 1 -
datafusion/sql/tests/cases/diagnostic.rs | 10 ++++----
.../src/engines/datafusion_engine/normalize.rs | 1 -
datafusion/substrait/src/lib.rs | 27 +++++++++++++---------
.../logical_plan/consumer/substrait_consumer.rs | 1 -
20 files changed, 44 insertions(+), 50 deletions(-)
diff --git a/datafusion-examples/examples/advanced_parquet_index.rs
b/datafusion-examples/examples/advanced_parquet_index.rs
index 1c560be6d0..371c18de35 100644
--- a/datafusion-examples/examples/advanced_parquet_index.rs
+++ b/datafusion-examples/examples/advanced_parquet_index.rs
@@ -121,7 +121,6 @@ use url::Url;
/// │ ╚═══════════════════╝ │ 1. With cached ParquetMetadata, so
/// └───────────────────────┘ the ParquetSource does not re-read /
/// Parquet File decode the thrift footer
-///
/// ```
///
/// Within a Row Group, Column Chunks store data in DataPages. This example
also
diff --git a/datafusion-examples/examples/external_dependency/query-aws-s3.rs
b/datafusion-examples/examples/external_dependency/query-aws-s3.rs
index da2d7e4879..cd0b4562d5 100644
--- a/datafusion-examples/examples/external_dependency/query-aws-s3.rs
+++ b/datafusion-examples/examples/external_dependency/query-aws-s3.rs
@@ -28,7 +28,6 @@ use url::Url;
///
/// - AWS_ACCESS_KEY_ID
/// - AWS_SECRET_ACCESS_KEY
-///
#[tokio::main]
async fn main() -> Result<()> {
let ctx = SessionContext::new();
diff --git a/datafusion-examples/examples/flight/sql_server.rs
b/datafusion-examples/examples/flight/sql_server.rs
index fc7d0817bd..d86860f9d4 100644
--- a/datafusion-examples/examples/flight/sql_server.rs
+++ b/datafusion-examples/examples/flight/sql_server.rs
@@ -68,7 +68,6 @@ macro_rules! status {
///
/// Based heavily on Ballista's implementation:
https://github.com/apache/datafusion-ballista/blob/main/ballista/scheduler/src/flight_sql.rs
/// and the example in arrow-rs:
https://github.com/apache/arrow-rs/blob/master/arrow-flight/examples/flight_sql_server.rs
-///
pub async fn sql_server() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init();
let addr = "0.0.0.0:50051".parse()?;
diff --git a/datafusion-examples/examples/parquet_index.rs
b/datafusion-examples/examples/parquet_index.rs
index 127c55da98..a1dd1f1ffd 100644
--- a/datafusion-examples/examples/parquet_index.rs
+++ b/datafusion-examples/examples/parquet_index.rs
@@ -99,7 +99,6 @@ use url::Url;
/// Thus some parquet files are │ │
/// "pruned" and thus are not └─────────────┘
/// scanned at all Parquet Files
-///
/// ```
///
/// [`ListingTable`]: datafusion::datasource::listing::ListingTable
diff --git a/datafusion-examples/examples/sql_query.rs
b/datafusion-examples/examples/sql_query.rs
index 0ac203cfb7..4da07d33d0 100644
--- a/datafusion-examples/examples/sql_query.rs
+++ b/datafusion-examples/examples/sql_query.rs
@@ -32,7 +32,6 @@ use std::sync::Arc;
///
/// [`query_memtable`]: a simple query against a [`MemTable`]
/// [`query_parquet`]: a simple query against a directory with multiple
Parquet files
-///
#[tokio::main]
async fn main() -> Result<()> {
query_memtable().await?;
diff --git a/datafusion-examples/examples/thread_pools.rs
b/datafusion-examples/examples/thread_pools.rs
index bba56b2932..9842cccfbf 100644
--- a/datafusion-examples/examples/thread_pools.rs
+++ b/datafusion-examples/examples/thread_pools.rs
@@ -342,7 +342,7 @@ impl CpuRuntime {
/// message such as:
///
/// ```text
- ///A Tokio 1.x context was found, but IO is disabled.
+ /// A Tokio 1.x context was found, but IO is disabled.
/// ```
pub fn handle(&self) -> &Handle {
&self.handle
diff --git a/datafusion/spark/src/function/bitwise/bit_shift.rs
b/datafusion/spark/src/function/bitwise/bit_shift.rs
index bb645b7660..68911b0492 100644
--- a/datafusion/spark/src/function/bitwise/bit_shift.rs
+++ b/datafusion/spark/src/function/bitwise/bit_shift.rs
@@ -42,7 +42,6 @@ use crate::function::error_utils::{
///
/// # Returns
/// A new array with the shifted values.
-///
fn shift_left<T: ArrowPrimitiveType>(
value: &PrimitiveArray<T>,
shift: &PrimitiveArray<Int32Type>,
@@ -71,7 +70,6 @@ where
///
/// # Returns
/// A new array with the shifted values.
-///
fn shift_right<T: ArrowPrimitiveType>(
value: &PrimitiveArray<T>,
shift: &PrimitiveArray<Int32Type>,
@@ -132,7 +130,6 @@ impl UShr<i32> for i64 {
///
/// # Returns
/// A new array with the shifted values.
-///
fn shift_right_unsigned<T: ArrowPrimitiveType>(
value: &PrimitiveArray<T>,
shift: &PrimitiveArray<Int32Type>,
diff --git a/datafusion/spark/src/function/url/parse_url.rs
b/datafusion/spark/src/function/url/parse_url.rs
index d93c260b4f..a8afa1d963 100644
--- a/datafusion/spark/src/function/url/parse_url.rs
+++ b/datafusion/spark/src/function/url/parse_url.rs
@@ -80,7 +80,6 @@ impl ParseUrl {
/// * `Ok(Some(String))` - The extracted URL component as a string
/// * `Ok(None)` - If the requested component doesn't exist or is empty
/// * `Err(DataFusionError)` - If the URL is malformed and cannot be parsed
- ///
fn parse(value: &str, part: &str, key: Option<&str>) ->
Result<Option<String>> {
let url: std::result::Result<Url, ParseError> = Url::parse(value);
if let Err(ParseError::RelativeUrlWithoutBase) = url {
@@ -168,7 +167,6 @@ impl ScalarUDFImpl for ParseUrl {
/// - A string array with extracted URL components
/// - `None` values where extraction failed or component doesn't exist
/// - The output array type (StringArray or LargeStringArray) is determined by
input types
-///
fn spark_parse_url(args: &[ArrayRef]) -> Result<ArrayRef> {
spark_handled_parse_url(args, |x| x)
}
diff --git a/datafusion/spark/src/lib.rs b/datafusion/spark/src/lib.rs
index 4d45f3c482..5b1fa06cb2 100644
--- a/datafusion/spark/src/lib.rs
+++ b/datafusion/spark/src/lib.rs
@@ -88,7 +88,7 @@
//! use datafusion_spark::expr_fn::sha2;
//! // Create the expression `sha2(my_data, 256)`
//! let expr = sha2(col("my_data"), lit(256));
-//!```
+//! ```
//!
//![`Expr`]: datafusion_expr::Expr
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index 1f1ef2a672..99d7467e1b 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -58,7 +58,7 @@ fn parse_file_type(s: &str) -> Result<String,
DataFusionError> {
/// Syntax:
/// ```sql
/// EXPLAIN <ANALYZE> <VERBOSE> [FORMAT format] statement
-///```
+/// ```
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ExplainStatement {
/// `EXPLAIN ANALYZE ..`
@@ -320,8 +320,7 @@ const DEFAULT_DIALECT: GenericDialect = GenericDialect {};
/// # use datafusion_sql::parser::DFParserBuilder;
/// # use datafusion_common::Result;
/// # fn test() -> Result<()> {
-/// let mut parser = DFParserBuilder::new("SELECT * FROM foo; SELECT 1 + 2")
-/// .build()?;
+/// let mut parser = DFParserBuilder::new("SELECT * FROM foo; SELECT 1 +
2").build()?;
/// // parse the SQL into DFStatements
/// let statements = parser.parse_statements()?;
/// assert_eq!(statements.len(), 2);
@@ -336,13 +335,13 @@ const DEFAULT_DIALECT: GenericDialect = GenericDialect {};
/// # use datafusion_sql::sqlparser::dialect::MySqlDialect;
/// # use datafusion_sql::sqlparser::ast::Expr;
/// # fn test() -> Result<()> {
-/// let dialect = MySqlDialect{}; // Parse using MySQL dialect
+/// let dialect = MySqlDialect {}; // Parse using MySQL dialect
/// let mut parser = DFParserBuilder::new("1 + 2")
-/// .with_dialect(&dialect)
-/// .build()?;
+/// .with_dialect(&dialect)
+/// .build()?;
/// // parse 1+2 into an sqlparser::ast::Expr
/// let res = parser.parse_expr()?;
-/// assert!(matches!(res.expr, Expr::BinaryOp {..}));
+/// assert!(matches!(res.expr, Expr::BinaryOp { .. }));
/// # Ok(())
/// # }
/// ```
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index 7bac033767..eb1e711eb4 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -251,7 +251,6 @@ impl IdentNormalizer {
/// This helps resolve scoping issues of CTEs.
/// By using cloning, a subquery can inherit CTEs from the outer query
/// and can also define its own private CTEs without affecting the outer query.
-///
#[derive(Debug, Clone)]
pub struct PlannerContext {
/// Data types for numbered parameters ($1, $2, etc), if supplied
diff --git a/datafusion/sql/src/resolve.rs b/datafusion/sql/src/resolve.rs
index 9e909f66fa..db5ddd5115 100644
--- a/datafusion/sql/src/resolve.rs
+++ b/datafusion/sql/src/resolve.rs
@@ -175,14 +175,14 @@ fn visit_statement(statement: &DFStatement, visitor: &mut
RelationVisitor) {
/// ## Example with CTEs
///
/// ```
-/// # use datafusion_sql::parser::DFParser;
+/// # use datafusion_sql::parser::DFParser;
/// # use datafusion_sql::resolve::resolve_table_references;
-/// let query = "with my_cte as (values (1), (2)) SELECT * from my_cte;";
-/// let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap();
-/// let (table_refs, ctes) = resolve_table_references(&statement,
true).unwrap();
+/// let query = "with my_cte as (values (1), (2)) SELECT * from my_cte;";
+/// let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap();
+/// let (table_refs, ctes) = resolve_table_references(&statement,
true).unwrap();
/// assert_eq!(table_refs.len(), 0);
-/// assert_eq!(ctes.len(), 1);
-/// assert_eq!(ctes[0].to_string(), "my_cte");
+/// assert_eq!(ctes.len(), 1);
+/// assert_eq!(ctes[0].to_string(), "my_cte");
/// ```
pub fn resolve_table_references(
statement: &crate::parser::Statement,
diff --git a/datafusion/sql/src/unparser/expr.rs
b/datafusion/sql/src/unparser/expr.rs
index 97f2b58bf8..8dc3092e9c 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -70,9 +70,8 @@ use sqlparser::tokenizer::Span;
/// use datafusion_expr::{col, lit};
/// use datafusion_sql::unparser::expr_to_sql;
/// let expr = col("a").gt(lit(4)); // form an expression `a > 4`
-/// let sql = expr_to_sql(&expr).unwrap(); // convert to ast::Expr
-/// // use the Display impl to convert to SQL text
-/// assert_eq!(sql.to_string(), "(a > 4)")
+/// let sql = expr_to_sql(&expr).unwrap(); // convert to ast::Expr, using
+/// assert_eq!(sql.to_string(), "(a > 4)"); // use Display impl for SQL text
/// ```
///
/// [`SqlToRel::sql_to_expr`]: crate::planner::SqlToRel::sql_to_expr
diff --git a/datafusion/sql/src/unparser/plan.rs
b/datafusion/sql/src/unparser/plan.rs
index e7535338b7..68b42ba05a 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -81,9 +81,13 @@ use std::{sync::Arc, vec};
/// .unwrap()
/// .build()
/// .unwrap();
-/// let sql = plan_to_sql(&plan).unwrap(); // convert to AST
+/// // convert to AST
+/// let sql = plan_to_sql(&plan).unwrap();
/// // use the Display impl to convert to SQL text
-/// assert_eq!(sql.to_string(), "SELECT \"table\".id, \"table\".\"value\" FROM
\"table\"")
+/// assert_eq!(
+/// sql.to_string(),
+/// "SELECT \"table\".id, \"table\".\"value\" FROM \"table\""
+/// )
/// ```
///
/// [`SqlToRel::sql_statement_to_plan`]:
crate::planner::SqlToRel::sql_statement_to_plan
diff --git a/datafusion/sql/src/unparser/rewrite.rs
b/datafusion/sql/src/unparser/rewrite.rs
index c961f1d6f1..1b6c3433f7 100644
--- a/datafusion/sql/src/unparser/rewrite.rs
+++ b/datafusion/sql/src/unparser/rewrite.rs
@@ -119,7 +119,6 @@ fn rewrite_sort_expr_for_union(exprs: Vec<SortExpr>) ->
Result<Vec<SortExpr>> {
/// Projection: table.column1, table.column2
/// Window: window_function
/// TableScan: table
-///
pub(super) fn rewrite_qualify(plan: LogicalPlan) -> Result<LogicalPlan> {
let transformed_plan = plan.transform_up(|plan| match plan {
// Check if the filter's input is a Window plan
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index 3c86d2d049..042ee53730 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -531,7 +531,6 @@ impl TreeNodeRewriter for RecursiveUnnestRewriter<'_> {
/// / /
/// column2
/// ```
- ///
fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
if let Expr::Unnest(ref traversing_unnest) = expr {
if traversing_unnest == self.top_most_unnest.as_ref().unwrap() {
diff --git a/datafusion/sql/tests/cases/diagnostic.rs
b/datafusion/sql/tests/cases/diagnostic.rs
index 8648dffb50..7ae839851d 100644
--- a/datafusion/sql/tests/cases/diagnostic.rs
+++ b/datafusion/sql/tests/cases/diagnostic.rs
@@ -69,10 +69,12 @@ fn do_query(sql: &'static str) -> Diagnostic {
/// ## Example
///
/// ```rust
-/// let spans = get_spans("SELECT /*whole+left*/speed/*left*/ +
/*right*/10/*right+whole*/ FROM cars");
-/// // whole is ^^^^^^^^^^^^^^^^^^^^^^^^^^^
-/// // left is ^^^^^
-/// // right is ^^
+/// let spans = get_spans(
+/// "SELECT /*whole+left*/speed/*left*/ + /*right*/10/*right+whole*/ FROM
cars",
+/// // whole is ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+/// // left is ^^^^^
+/// // right is ^^
+/// );
/// dbg!(&spans["whole"]);
/// dbg!(&spans["left"]);
/// dbg!(&spans["right"]);
diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
index 87108b6742..cb6410d857 100644
--- a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
+++ b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
@@ -185,7 +185,6 @@ macro_rules! get_row_value {
/// [NULL Values and empty strings]:
https://duckdb.org/dev/sqllogictest/result_verification#null-values-and-empty-strings
///
/// Floating numbers are rounded to have a consistent representation with the
Postgres runner.
-///
pub fn cell_to_string(col: &ArrayRef, row: usize, is_spark_path: bool) ->
Result<String> {
if !col.is_valid(row) {
// represent any null value with the string "NULL"
diff --git a/datafusion/substrait/src/lib.rs b/datafusion/substrait/src/lib.rs
index 9a4f44e81d..8bc31569f2 100644
--- a/datafusion/substrait/src/lib.rs
+++ b/datafusion/substrait/src/lib.rs
@@ -66,19 +66,24 @@
//! # use datafusion::arrow::array::{Int32Array, RecordBatch};
//! # use datafusion_substrait::logical_plan;
//! // Create a plan that scans table 't'
-//! let ctx = SessionContext::new();
-//! let batch = RecordBatch::try_from_iter(vec![("x",
Arc::new(Int32Array::from(vec![42])) as _)])?;
-//! ctx.register_batch("t", batch)?;
-//! let df = ctx.sql("SELECT x from t").await?;
-//! let plan = df.into_optimized_plan()?;
+//! let ctx = SessionContext::new();
+//! let batch = RecordBatch::try_from_iter(vec![(
+//! "x",
+//! Arc::new(Int32Array::from(vec![42])) as _,
+//! )])?;
+//! ctx.register_batch("t", batch)?;
+//! let df = ctx.sql("SELECT x from t").await?;
+//! let plan = df.into_optimized_plan()?;
//!
-//! // Convert the plan into a substrait (protobuf) Plan
-//! let substrait_plan = logical_plan::producer::to_substrait_plan(&plan,
&ctx.state())?;
+//! // Convert the plan into a substrait (protobuf) Plan
+//! let substrait_plan = logical_plan::producer::to_substrait_plan(&plan,
&ctx.state())?;
//!
-//! // Receive a substrait protobuf from somewhere, and turn it into a
LogicalPlan
-//! let logical_round_trip =
logical_plan::consumer::from_substrait_plan(&ctx.state(),
&substrait_plan).await?;
-//! let logical_round_trip = ctx.state().optimize(&logical_round_trip)?;
-//! assert_eq!(format!("{:?}", plan), format!("{:?}", logical_round_trip));
+//! // Receive a substrait protobuf from somewhere, and turn it into a
LogicalPlan
+//! let logical_round_trip =
+//! logical_plan::consumer::from_substrait_plan(&ctx.state(),
&substrait_plan)
+//! .await?;
+//! let logical_round_trip = ctx.state().optimize(&logical_round_trip)?;
+//! assert_eq!(format!("{:?}", plan), format!("{:?}", logical_round_trip));
//! # Ok(())
//! # }
//! ```
diff --git
a/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs
b/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs
index 5392dd77b5..c734b9eb7a 100644
--- a/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs
@@ -150,7 +150,6 @@ use substrait::proto::{
/// }
/// }
/// ```
-///
pub trait SubstraitConsumer: Send + Sync + Sized {
async fn resolve_table_ref(
&self,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]