This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/asf-site by this push:
new 418e6932a2 Publish built docs triggered by
6ab4d216b768c9327982e59376a62a29c69ca436
418e6932a2 is described below
commit 418e6932a2c412cc01905d8505297d1af1234181
Author: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
AuthorDate: Sun Nov 9 07:16:06 2025 +0000
Publish built docs triggered by 6ab4d216b768c9327982e59376a62a29c69ca436
---
_sources/library-user-guide/upgrading.md.txt | 89 +++++++++++++++++++++++++++-
library-user-guide/upgrading.html | 83 +++++++++++++++++++++++++-
searchindex.js | 2 +-
3 files changed, 169 insertions(+), 5 deletions(-)
diff --git a/_sources/library-user-guide/upgrading.md.txt
b/_sources/library-user-guide/upgrading.md.txt
index 0b227000f7..f08e2c383a 100644
--- a/_sources/library-user-guide/upgrading.md.txt
+++ b/_sources/library-user-guide/upgrading.md.txt
@@ -150,7 +150,7 @@ let projection_exprs = config.projection_exprs;
The `FileScanConfigBuilder::with_projection()` method has been deprecated in
favor of `with_projection_indices()`:
```diff
-let config = FileScanConfigBuilder::new(url, schema, file_source)
+let config = FileScanConfigBuilder::new(url, file_source)
- .with_projection(Some(vec![0, 2, 3]))
+ .with_projection_indices(Some(vec![0, 2, 3]))
.build();
@@ -190,6 +190,91 @@ TIMEZONE = '+00:00';
This change was made to better support using the default timezone in scalar
UDF functions such as
`now`, `current_date`, `current_time`, and `to_timestamp` among others.
+### Refactoring of `FileSource` constructors and `FileScanConfigBuilder` to
accept schemas upfront
+
+The way schemas are passed to file sources and scan configurations has been
significantly refactored. File sources now require the schema (including
partition columns) to be provided at construction time, and
`FileScanConfigBuilder` no longer takes a separate schema parameter.
+
+**Who is affected:**
+
+- Users who create `FileScanConfig` or file sources (`ParquetSource`,
`CsvSource`, `JsonSource`, `AvroSource`) directly
+- Users who implement custom `FileFormat` implementations
+
+**Key changes:**
+
+1. **FileSource constructors now require TableSchema**: All built-in file
sources now take the schema in their constructor:
+
+ ```diff
+ - let source = ParquetSource::default();
+ + let source = ParquetSource::new(table_schema);
+ ```
+
+2. **FileScanConfigBuilder no longer takes schema as a parameter**: The schema
is now passed via the FileSource:
+
+ ```diff
+ - FileScanConfigBuilder::new(url, schema, source)
+ + FileScanConfigBuilder::new(url, source)
+ ```
+
+3. **Partition columns are now part of TableSchema**: The
`with_table_partition_cols()` method has been removed from
`FileScanConfigBuilder`. Partition columns are now passed as part of the
`TableSchema` to the FileSource constructor:
+
+ ```diff
+ + let table_schema = TableSchema::new(
+ + file_schema,
+ + vec![Arc::new(Field::new("date", DataType::Utf8, false))],
+ + );
+ + let source = ParquetSource::new(table_schema);
+ let config = FileScanConfigBuilder::new(url, source)
+ - .with_table_partition_cols(vec![Field::new("date", DataType::Utf8,
false)])
+ .with_file(partitioned_file)
+ .build();
+ ```
+
+4. **FileFormat::file_source() now takes TableSchema parameter**: Custom
`FileFormat` implementations must be updated:
+ ```diff
+ impl FileFormat for MyFileFormat {
+ - fn file_source(&self) -> Arc<dyn FileSource> {
+ + fn file_source(&self, table_schema: TableSchema) -> Arc<dyn FileSource>
{
+ - Arc::new(MyFileSource::default())
+ + Arc::new(MyFileSource::new(table_schema))
+ }
+ }
+ ```
+
+**Migration examples:**
+
+For Parquet files:
+
+```diff
+- let source = Arc::new(ParquetSource::default());
+- let config = FileScanConfigBuilder::new(url, schema, source)
++ let table_schema = TableSchema::new(schema, vec![]);
++ let source = Arc::new(ParquetSource::new(table_schema));
++ let config = FileScanConfigBuilder::new(url, source)
+ .with_file(partitioned_file)
+ .build();
+```
+
+For CSV files with partition columns:
+
+```diff
+- let source = Arc::new(CsvSource::new(true, b',', b'"'));
+- let config = FileScanConfigBuilder::new(url, file_schema, source)
+- .with_table_partition_cols(vec![Field::new("year", DataType::Int32,
false)])
++ let options = CsvOptions {
++ has_header: Some(true),
++ delimiter: b',',
++ quote: b'"',
++ ..Default::default()
++ };
++ let table_schema = TableSchema::new(
++ file_schema,
++ vec![Arc::new(Field::new("year", DataType::Int32, false))],
++ );
++ let source =
Arc::new(CsvSource::new(table_schema).with_csv_options(options));
++ let config = FileScanConfigBuilder::new(url, source)
+ .build();
+```
+
### Introduction of `TableSchema` and changes to `FileSource::with_schema()`
method
A new `TableSchema` struct has been introduced in the `datafusion-datasource`
crate to better manage table schemas with partition columns. This struct helps
distinguish between:
@@ -1137,7 +1222,7 @@ Pattern in DataFusion `47.0.0`:
```rust
# /* comment to avoid running
-let config = FileScanConfigBuilder::new(url, schema, Arc::new(file_source))
+let config = FileScanConfigBuilder::new(url, Arc::new(file_source))
.with_statistics(stats)
...
.build();
diff --git a/library-user-guide/upgrading.html
b/library-user-guide/upgrading.html
index 728cd37c07..0b82a1d983 100644
--- a/library-user-guide/upgrading.html
+++ b/library-user-guide/upgrading.html
@@ -507,7 +507,7 @@ Users may need to update their paths to account for these
changes.</p>
</div>
<p><strong>Impact on builders:</strong></p>
<p>The <code class="docutils literal notranslate"><span
class="pre">FileScanConfigBuilder::with_projection()</span></code> method has
been deprecated in favor of <code class="docutils literal notranslate"><span
class="pre">with_projection_indices()</span></code>:</p>
-<div class="highlight-diff notranslate"><div
class="highlight"><pre><span></span>let config =
FileScanConfigBuilder::new(url, schema, file_source)
+<div class="highlight-diff notranslate"><div
class="highlight"><pre><span></span>let config =
FileScanConfigBuilder::new(url, file_source)
<span class="gd">- .with_projection(Some(vec![0, 2, 3]))</span>
<span class="gi">+ .with_projection_indices(Some(vec![0, 2, 3]))</span>
<span class="w"> </span> .build();
@@ -541,6 +541,84 @@ to the previous value you can execute the sql:</p>
<p>This change was made to better support using the default timezone in scalar
UDF functions such as
<code class="docutils literal notranslate"><span
class="pre">now</span></code>, <code class="docutils literal notranslate"><span
class="pre">current_date</span></code>, <code class="docutils literal
notranslate"><span class="pre">current_time</span></code>, and <code
class="docutils literal notranslate"><span
class="pre">to_timestamp</span></code> among others.</p>
</section>
+<section
id="refactoring-of-filesource-constructors-and-filescanconfigbuilder-to-accept-schemas-upfront">
+<h3>Refactoring of <code class="docutils literal notranslate"><span
class="pre">FileSource</span></code> constructors and <code class="docutils
literal notranslate"><span class="pre">FileScanConfigBuilder</span></code> to
accept schemas upfront<a class="headerlink"
href="#refactoring-of-filesource-constructors-and-filescanconfigbuilder-to-accept-schemas-upfront"
title="Link to this heading">#</a></h3>
+<p>The way schemas are passed to file sources and scan configurations has been
significantly refactored. File sources now require the schema (including
partition columns) to be provided at construction time, and <code
class="docutils literal notranslate"><span
class="pre">FileScanConfigBuilder</span></code> no longer takes a separate
schema parameter.</p>
+<p><strong>Who is affected:</strong></p>
+<ul class="simple">
+<li><p>Users who create <code class="docutils literal notranslate"><span
class="pre">FileScanConfig</span></code> or file sources (<code class="docutils
literal notranslate"><span class="pre">ParquetSource</span></code>, <code
class="docutils literal notranslate"><span class="pre">CsvSource</span></code>,
<code class="docutils literal notranslate"><span
class="pre">JsonSource</span></code>, <code class="docutils literal
notranslate"><span class="pre">AvroSource</span></code>) directly</p></li>
+<li><p>Users who implement custom <code class="docutils literal
notranslate"><span class="pre">FileFormat</span></code> implementations</p></li>
+</ul>
+<p><strong>Key changes:</strong></p>
+<ol class="arabic">
+<li><p><strong>FileSource constructors now require TableSchema</strong>: All
built-in file sources now take the schema in their constructor:</p>
+<div class="highlight-diff notranslate"><div
class="highlight"><pre><span></span><span class="gd">- let source =
ParquetSource::default();</span>
+<span class="gi">+ let source = ParquetSource::new(table_schema);</span>
+</pre></div>
+</div>
+</li>
+<li><p><strong>FileScanConfigBuilder no longer takes schema as a
parameter</strong>: The schema is now passed via the FileSource:</p>
+<div class="highlight-diff notranslate"><div
class="highlight"><pre><span></span><span class="gd">-
FileScanConfigBuilder::new(url, schema, source)</span>
+<span class="gi">+ FileScanConfigBuilder::new(url, source)</span>
+</pre></div>
+</div>
+</li>
+<li><p><strong>Partition columns are now part of TableSchema</strong>: The
<code class="docutils literal notranslate"><span
class="pre">with_table_partition_cols()</span></code> method has been removed
from <code class="docutils literal notranslate"><span
class="pre">FileScanConfigBuilder</span></code>. Partition columns are now
passed as part of the <code class="docutils literal notranslate"><span
class="pre">TableSchema</span></code> to the FileSource constructor:</p>
+<div class="highlight-diff notranslate"><div
class="highlight"><pre><span></span><span class="gi">+ let table_schema =
TableSchema::new(</span>
+<span class="gi">+ file_schema,</span>
+<span class="gi">+ vec![Arc::new(Field::new("date",
DataType::Utf8, false))],</span>
+<span class="gi">+ );</span>
+<span class="gi">+ let source = ParquetSource::new(table_schema);</span>
+<span class="w"> </span> let config = FileScanConfigBuilder::new(url, source)
+<span class="gd">-
.with_table_partition_cols(vec![Field::new("date", DataType::Utf8,
false)])</span>
+<span class="w"> </span> .with_file(partitioned_file)
+<span class="w"> </span> .build();
+</pre></div>
+</div>
+</li>
+<li><p><strong>FileFormat::file_source() now takes TableSchema
parameter</strong>: Custom <code class="docutils literal notranslate"><span
class="pre">FileFormat</span></code> implementations must be updated:</p>
+<div class="highlight-diff notranslate"><div
class="highlight"><pre><span></span>impl FileFormat for MyFileFormat {
+<span class="gd">- fn file_source(&self) -> Arc<dyn FileSource>
{</span>
+<span class="gi">+ fn file_source(&self, table_schema: TableSchema)
-> Arc<dyn FileSource> {</span>
+<span class="gd">- Arc::new(MyFileSource::default())</span>
+<span class="gi">+ Arc::new(MyFileSource::new(table_schema))</span>
+<span class="w"> </span> }
+}
+</pre></div>
+</div>
+</li>
+</ol>
+<p><strong>Migration examples:</strong></p>
+<p>For Parquet files:</p>
+<div class="highlight-diff notranslate"><div
class="highlight"><pre><span></span><span class="gd">- let source =
Arc::new(ParquetSource::default());</span>
+<span class="gd">- let config = FileScanConfigBuilder::new(url, schema,
source)</span>
+<span class="gi">+ let table_schema = TableSchema::new(schema, vec![]);</span>
+<span class="gi">+ let source =
Arc::new(ParquetSource::new(table_schema));</span>
+<span class="gi">+ let config = FileScanConfigBuilder::new(url, source)</span>
+<span class="w"> </span> .with_file(partitioned_file)
+<span class="w"> </span> .build();
+</pre></div>
+</div>
+<p>For CSV files with partition columns:</p>
+<div class="highlight-diff notranslate"><div
class="highlight"><pre><span></span><span class="gd">- let source =
Arc::new(CsvSource::new(true, b',', b'"'));</span>
+<span class="gd">- let config = FileScanConfigBuilder::new(url, file_schema,
source)</span>
+<span class="gd">-
.with_table_partition_cols(vec![Field::new("year", DataType::Int32,
false)])</span>
+<span class="gi">+ let options = CsvOptions {</span>
+<span class="gi">+ has_header: Some(true),</span>
+<span class="gi">+ delimiter: b',',</span>
+<span class="gi">+ quote: b'"',</span>
+<span class="gi">+ ..Default::default()</span>
+<span class="gi">+ };</span>
+<span class="gi">+ let table_schema = TableSchema::new(</span>
+<span class="gi">+ file_schema,</span>
+<span class="gi">+ vec![Arc::new(Field::new("year",
DataType::Int32, false))],</span>
+<span class="gi">+ );</span>
+<span class="gi">+ let source =
Arc::new(CsvSource::new(table_schema).with_csv_options(options));</span>
+<span class="gi">+ let config = FileScanConfigBuilder::new(url, source)</span>
+<span class="w"> </span> .build();
+</pre></div>
+</div>
+</section>
<section
id="introduction-of-tableschema-and-changes-to-filesource-with-schema-method">
<h3>Introduction of <code class="docutils literal notranslate"><span
class="pre">TableSchema</span></code> and changes to <code class="docutils
literal notranslate"><span class="pre">FileSource::with_schema()</span></code>
method<a class="headerlink"
href="#introduction-of-tableschema-and-changes-to-filesource-with-schema-method"
title="Link to this heading">#</a></h3>
<p>A new <code class="docutils literal notranslate"><span
class="pre">TableSchema</span></code> struct has been introduced in the <code
class="docutils literal notranslate"><span
class="pre">datafusion-datasource</span></code> crate to better manage table
schemas with partition columns. This struct helps distinguish between:</p>
@@ -1280,7 +1358,7 @@ DataFusion 47.0.0 this has been changed to use <code
class="docutils literal not
</pre></div>
</div>
<p>Pattern in DataFusion <code class="docutils literal notranslate"><span
class="pre">47.0.0</span></code>:</p>
-<div class="highlight-rust notranslate"><div
class="highlight"><pre><span></span><span class="kd">let</span><span class="w">
</span><span class="n">config</span><span class="w"> </span><span
class="o">=</span><span class="w"> </span><span
class="n">FileScanConfigBuilder</span><span class="p">::</span><span
class="n">new</span><span class="p">(</span><span class="n">url</span><span
class="p">,</span><span class="w"> </span><span class="n">schema</span><span
class="p">,</span><span class=" [...]
+<div class="highlight-rust notranslate"><div
class="highlight"><pre><span></span><span class="kd">let</span><span class="w">
</span><span class="n">config</span><span class="w"> </span><span
class="o">=</span><span class="w"> </span><span
class="n">FileScanConfigBuilder</span><span class="p">::</span><span
class="n">new</span><span class="p">(</span><span class="n">url</span><span
class="p">,</span><span class="w"> </span><span class="n">Arc</span><span
class="p">::</span><span class="n" [...]
<span class="w"> </span><span class="p">.</span><span
class="n">with_statistics</span><span class="p">(</span><span
class="n">stats</span><span class="p">)</span>
<span class="w"> </span><span class="o">..</span><span class="p">.</span>
<span class="w"> </span><span class="p">.</span><span
class="n">build</span><span class="p">();</span>
@@ -1575,6 +1653,7 @@ take care of constructing the <code class="docutils
literal notranslate"><span c
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link"
href="#filescanconfig-projection-renamed-to-filescanconfig-projection-exprs"><code
class="docutils literal notranslate"><span
class="pre">FileScanConfig::projection</span></code> renamed to <code
class="docutils literal notranslate"><span
class="pre">FileScanConfig::projection_exprs</span></code></a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link"
href="#describe-query-support"><code class="docutils literal notranslate"><span
class="pre">DESCRIBE</span> <span class="pre">query</span></code>
support</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link"
href="#datafusion-execution-time-zone-default-configuration-changed"><code
class="docutils literal notranslate"><span
class="pre">datafusion.execution.time_zone</span></code> default configuration
changed</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link"
href="#refactoring-of-filesource-constructors-and-filescanconfigbuilder-to-accept-schemas-upfront">Refactoring
of <code class="docutils literal notranslate"><span
class="pre">FileSource</span></code> constructors and <code class="docutils
literal notranslate"><span class="pre">FileScanConfigBuilder</span></code> to
accept schemas upfront</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link"
href="#introduction-of-tableschema-and-changes-to-filesource-with-schema-method">Introduction
of <code class="docutils literal notranslate"><span
class="pre">TableSchema</span></code> and changes to <code class="docutils
literal notranslate"><span class="pre">FileSource::with_schema()</span></code>
method</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link"
href="#aggregateudfimpl-is-ordered-set-aggregate-has-been-renamed-to-aggregateudfimpl-supports-within-group-clause"><code
class="docutils literal notranslate"><span
class="pre">AggregateUDFImpl::is_ordered_set_aggregate</span></code> has been
renamed to <code class="docutils literal notranslate"><span
class="pre">AggregateUDFImpl::supports_within_group_clause</span></code></a></li>
</ul>
diff --git a/searchindex.js b/searchindex.js
index 0d58534e58..46f3418a1e 100644
--- a/searchindex.js
+++ b/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"alltitles":{"!=":[[60,"op-neq"]],"!~":[[60,"op-re-not-match"]],"!~*":[[60,"op-re-not-match-i"]],"!~~":[[60,"id19"]],"!~~*":[[60,"id20"]],"#":[[60,"op-bit-xor"]],"%":[[60,"op-modulo"]],"&":[[60,"op-bit-and"]],"(relation,
name) tuples in logical fields and logical columns are
unique":[[13,"relation-name-tuples-in-logical-fields-and-logical-columns-are-unique"]],"*":[[60,"op-multiply"]],"+":[[60,"op-plus"]],"-":[[60,"op-minus"]],"/":[[60,"op-divide"]],"<":[[60,"op-lt"]],"<
[...]
\ No newline at end of file
+Search.setIndex({"alltitles":{"!=":[[60,"op-neq"]],"!~":[[60,"op-re-not-match"]],"!~*":[[60,"op-re-not-match-i"]],"!~~":[[60,"id19"]],"!~~*":[[60,"id20"]],"#":[[60,"op-bit-xor"]],"%":[[60,"op-modulo"]],"&":[[60,"op-bit-and"]],"(relation,
name) tuples in logical fields and logical columns are
unique":[[13,"relation-name-tuples-in-logical-fields-and-logical-columns-are-unique"]],"*":[[60,"op-multiply"]],"+":[[60,"op-plus"]],"-":[[60,"op-minus"]],"/":[[60,"op-divide"]],"<":[[60,"op-lt"]],"<
[...]
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]