This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new 418e6932a2 Publish built docs triggered by 
6ab4d216b768c9327982e59376a62a29c69ca436
418e6932a2 is described below

commit 418e6932a2c412cc01905d8505297d1af1234181
Author: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
AuthorDate: Sun Nov 9 07:16:06 2025 +0000

    Publish built docs triggered by 6ab4d216b768c9327982e59376a62a29c69ca436
---
 _sources/library-user-guide/upgrading.md.txt | 89 +++++++++++++++++++++++++++-
 library-user-guide/upgrading.html            | 83 +++++++++++++++++++++++++-
 searchindex.js                               |  2 +-
 3 files changed, 169 insertions(+), 5 deletions(-)

diff --git a/_sources/library-user-guide/upgrading.md.txt 
b/_sources/library-user-guide/upgrading.md.txt
index 0b227000f7..f08e2c383a 100644
--- a/_sources/library-user-guide/upgrading.md.txt
+++ b/_sources/library-user-guide/upgrading.md.txt
@@ -150,7 +150,7 @@ let projection_exprs = config.projection_exprs;
 The `FileScanConfigBuilder::with_projection()` method has been deprecated in 
favor of `with_projection_indices()`:
 
 ```diff
-let config = FileScanConfigBuilder::new(url, schema, file_source)
+let config = FileScanConfigBuilder::new(url, file_source)
 -   .with_projection(Some(vec![0, 2, 3]))
 +   .with_projection_indices(Some(vec![0, 2, 3]))
     .build();
@@ -190,6 +190,91 @@ TIMEZONE = '+00:00';
 This change was made to better support using the default timezone in scalar 
UDF functions such as
 `now`, `current_date`, `current_time`, and `to_timestamp` among others.
 
+### Refactoring of `FileSource` constructors and `FileScanConfigBuilder` to 
accept schemas upfront
+
+The way schemas are passed to file sources and scan configurations has been 
significantly refactored. File sources now require the schema (including 
partition columns) to be provided at construction time, and 
`FileScanConfigBuilder` no longer takes a separate schema parameter.
+
+**Who is affected:**
+
+- Users who create `FileScanConfig` or file sources (`ParquetSource`, 
`CsvSource`, `JsonSource`, `AvroSource`) directly
+- Users who implement custom `FileFormat` implementations
+
+**Key changes:**
+
+1. **FileSource constructors now require TableSchema**: All built-in file 
sources now take the schema in their constructor:
+
+   ```diff
+   - let source = ParquetSource::default();
+   + let source = ParquetSource::new(table_schema);
+   ```
+
+2. **FileScanConfigBuilder no longer takes schema as a parameter**: The schema 
is now passed via the FileSource:
+
+   ```diff
+   - FileScanConfigBuilder::new(url, schema, source)
+   + FileScanConfigBuilder::new(url, source)
+   ```
+
+3. **Partition columns are now part of TableSchema**: The 
`with_table_partition_cols()` method has been removed from 
`FileScanConfigBuilder`. Partition columns are now passed as part of the 
`TableSchema` to the FileSource constructor:
+
+   ```diff
+   + let table_schema = TableSchema::new(
+   +     file_schema,
+   +     vec![Arc::new(Field::new("date", DataType::Utf8, false))],
+   + );
+   + let source = ParquetSource::new(table_schema);
+     let config = FileScanConfigBuilder::new(url, source)
+   -     .with_table_partition_cols(vec![Field::new("date", DataType::Utf8, 
false)])
+         .with_file(partitioned_file)
+         .build();
+   ```
+
+4. **FileFormat::file_source() now takes TableSchema parameter**: Custom 
`FileFormat` implementations must be updated:
+   ```diff
+   impl FileFormat for MyFileFormat {
+   -   fn file_source(&self) -> Arc<dyn FileSource> {
+   +   fn file_source(&self, table_schema: TableSchema) -> Arc<dyn FileSource> 
{
+   -       Arc::new(MyFileSource::default())
+   +       Arc::new(MyFileSource::new(table_schema))
+       }
+   }
+   ```
+
+**Migration examples:**
+
+For Parquet files:
+
+```diff
+- let source = Arc::new(ParquetSource::default());
+- let config = FileScanConfigBuilder::new(url, schema, source)
++ let table_schema = TableSchema::new(schema, vec![]);
++ let source = Arc::new(ParquetSource::new(table_schema));
++ let config = FileScanConfigBuilder::new(url, source)
+      .with_file(partitioned_file)
+      .build();
+```
+
+For CSV files with partition columns:
+
+```diff
+- let source = Arc::new(CsvSource::new(true, b',', b'"'));
+- let config = FileScanConfigBuilder::new(url, file_schema, source)
+-     .with_table_partition_cols(vec![Field::new("year", DataType::Int32, 
false)])
++ let options = CsvOptions {
++     has_header: Some(true),
++     delimiter: b',',
++     quote: b'"',
++     ..Default::default()
++ };
++ let table_schema = TableSchema::new(
++     file_schema,
++     vec![Arc::new(Field::new("year", DataType::Int32, false))],
++ );
++ let source = 
Arc::new(CsvSource::new(table_schema).with_csv_options(options));
++ let config = FileScanConfigBuilder::new(url, source)
+      .build();
+```
+
 ### Introduction of `TableSchema` and changes to `FileSource::with_schema()` 
method
 
 A new `TableSchema` struct has been introduced in the `datafusion-datasource` 
crate to better manage table schemas with partition columns. This struct helps 
distinguish between:
@@ -1137,7 +1222,7 @@ Pattern in DataFusion `47.0.0`:
 
 ```rust
 # /* comment to avoid running
-let config = FileScanConfigBuilder::new(url, schema, Arc::new(file_source))
+let config = FileScanConfigBuilder::new(url, Arc::new(file_source))
   .with_statistics(stats)
   ...
   .build();
diff --git a/library-user-guide/upgrading.html 
b/library-user-guide/upgrading.html
index 728cd37c07..0b82a1d983 100644
--- a/library-user-guide/upgrading.html
+++ b/library-user-guide/upgrading.html
@@ -507,7 +507,7 @@ Users may need to update their paths to account for these 
changes.</p>
 </div>
 <p><strong>Impact on builders:</strong></p>
 <p>The <code class="docutils literal notranslate"><span 
class="pre">FileScanConfigBuilder::with_projection()</span></code> method has 
been deprecated in favor of <code class="docutils literal notranslate"><span 
class="pre">with_projection_indices()</span></code>:</p>
-<div class="highlight-diff notranslate"><div 
class="highlight"><pre><span></span>let config = 
FileScanConfigBuilder::new(url, schema, file_source)
+<div class="highlight-diff notranslate"><div 
class="highlight"><pre><span></span>let config = 
FileScanConfigBuilder::new(url, file_source)
 <span class="gd">-   .with_projection(Some(vec![0, 2, 3]))</span>
 <span class="gi">+   .with_projection_indices(Some(vec![0, 2, 3]))</span>
 <span class="w"> </span>   .build();
@@ -541,6 +541,84 @@ to the previous value you can execute the sql:</p>
 <p>This change was made to better support using the default timezone in scalar 
UDF functions such as
 <code class="docutils literal notranslate"><span 
class="pre">now</span></code>, <code class="docutils literal notranslate"><span 
class="pre">current_date</span></code>, <code class="docutils literal 
notranslate"><span class="pre">current_time</span></code>, and <code 
class="docutils literal notranslate"><span 
class="pre">to_timestamp</span></code> among others.</p>
 </section>
+<section 
id="refactoring-of-filesource-constructors-and-filescanconfigbuilder-to-accept-schemas-upfront">
+<h3>Refactoring of <code class="docutils literal notranslate"><span 
class="pre">FileSource</span></code> constructors and <code class="docutils 
literal notranslate"><span class="pre">FileScanConfigBuilder</span></code> to 
accept schemas upfront<a class="headerlink" 
href="#refactoring-of-filesource-constructors-and-filescanconfigbuilder-to-accept-schemas-upfront"
 title="Link to this heading">#</a></h3>
+<p>The way schemas are passed to file sources and scan configurations has been 
significantly refactored. File sources now require the schema (including 
partition columns) to be provided at construction time, and <code 
class="docutils literal notranslate"><span 
class="pre">FileScanConfigBuilder</span></code> no longer takes a separate 
schema parameter.</p>
+<p><strong>Who is affected:</strong></p>
+<ul class="simple">
+<li><p>Users who create <code class="docutils literal notranslate"><span 
class="pre">FileScanConfig</span></code> or file sources (<code class="docutils 
literal notranslate"><span class="pre">ParquetSource</span></code>, <code 
class="docutils literal notranslate"><span class="pre">CsvSource</span></code>, 
<code class="docutils literal notranslate"><span 
class="pre">JsonSource</span></code>, <code class="docutils literal 
notranslate"><span class="pre">AvroSource</span></code>) directly</p></li>
+<li><p>Users who implement custom <code class="docutils literal 
notranslate"><span class="pre">FileFormat</span></code> implementations</p></li>
+</ul>
+<p><strong>Key changes:</strong></p>
+<ol class="arabic">
+<li><p><strong>FileSource constructors now require TableSchema</strong>: All 
built-in file sources now take the schema in their constructor:</p>
+<div class="highlight-diff notranslate"><div 
class="highlight"><pre><span></span><span class="gd">- let source = 
ParquetSource::default();</span>
+<span class="gi">+ let source = ParquetSource::new(table_schema);</span>
+</pre></div>
+</div>
+</li>
+<li><p><strong>FileScanConfigBuilder no longer takes schema as a 
parameter</strong>: The schema is now passed via the FileSource:</p>
+<div class="highlight-diff notranslate"><div 
class="highlight"><pre><span></span><span class="gd">- 
FileScanConfigBuilder::new(url, schema, source)</span>
+<span class="gi">+ FileScanConfigBuilder::new(url, source)</span>
+</pre></div>
+</div>
+</li>
+<li><p><strong>Partition columns are now part of TableSchema</strong>: The 
<code class="docutils literal notranslate"><span 
class="pre">with_table_partition_cols()</span></code> method has been removed 
from <code class="docutils literal notranslate"><span 
class="pre">FileScanConfigBuilder</span></code>. Partition columns are now 
passed as part of the <code class="docutils literal notranslate"><span 
class="pre">TableSchema</span></code> to the FileSource constructor:</p>
+<div class="highlight-diff notranslate"><div 
class="highlight"><pre><span></span><span class="gi">+ let table_schema = 
TableSchema::new(</span>
+<span class="gi">+     file_schema,</span>
+<span class="gi">+     vec![Arc::new(Field::new(&quot;date&quot;, 
DataType::Utf8, false))],</span>
+<span class="gi">+ );</span>
+<span class="gi">+ let source = ParquetSource::new(table_schema);</span>
+<span class="w"> </span> let config = FileScanConfigBuilder::new(url, source)
+<span class="gd">-     
.with_table_partition_cols(vec![Field::new(&quot;date&quot;, DataType::Utf8, 
false)])</span>
+<span class="w"> </span>     .with_file(partitioned_file)
+<span class="w"> </span>     .build();
+</pre></div>
+</div>
+</li>
+<li><p><strong>FileFormat::file_source() now takes TableSchema 
parameter</strong>: Custom <code class="docutils literal notranslate"><span 
class="pre">FileFormat</span></code> implementations must be updated:</p>
+<div class="highlight-diff notranslate"><div 
class="highlight"><pre><span></span>impl FileFormat for MyFileFormat {
+<span class="gd">-   fn file_source(&amp;self) -&gt; Arc&lt;dyn FileSource&gt; 
{</span>
+<span class="gi">+   fn file_source(&amp;self, table_schema: TableSchema) 
-&gt; Arc&lt;dyn FileSource&gt; {</span>
+<span class="gd">-       Arc::new(MyFileSource::default())</span>
+<span class="gi">+       Arc::new(MyFileSource::new(table_schema))</span>
+<span class="w"> </span>   }
+}
+</pre></div>
+</div>
+</li>
+</ol>
+<p><strong>Migration examples:</strong></p>
+<p>For Parquet files:</p>
+<div class="highlight-diff notranslate"><div 
class="highlight"><pre><span></span><span class="gd">- let source = 
Arc::new(ParquetSource::default());</span>
+<span class="gd">- let config = FileScanConfigBuilder::new(url, schema, 
source)</span>
+<span class="gi">+ let table_schema = TableSchema::new(schema, vec![]);</span>
+<span class="gi">+ let source = 
Arc::new(ParquetSource::new(table_schema));</span>
+<span class="gi">+ let config = FileScanConfigBuilder::new(url, source)</span>
+<span class="w"> </span>     .with_file(partitioned_file)
+<span class="w"> </span>     .build();
+</pre></div>
+</div>
+<p>For CSV files with partition columns:</p>
+<div class="highlight-diff notranslate"><div 
class="highlight"><pre><span></span><span class="gd">- let source = 
Arc::new(CsvSource::new(true, b&#39;,&#39;, b&#39;&quot;&#39;));</span>
+<span class="gd">- let config = FileScanConfigBuilder::new(url, file_schema, 
source)</span>
+<span class="gd">-     
.with_table_partition_cols(vec![Field::new(&quot;year&quot;, DataType::Int32, 
false)])</span>
+<span class="gi">+ let options = CsvOptions {</span>
+<span class="gi">+     has_header: Some(true),</span>
+<span class="gi">+     delimiter: b&#39;,&#39;,</span>
+<span class="gi">+     quote: b&#39;&quot;&#39;,</span>
+<span class="gi">+     ..Default::default()</span>
+<span class="gi">+ };</span>
+<span class="gi">+ let table_schema = TableSchema::new(</span>
+<span class="gi">+     file_schema,</span>
+<span class="gi">+     vec![Arc::new(Field::new(&quot;year&quot;, 
DataType::Int32, false))],</span>
+<span class="gi">+ );</span>
+<span class="gi">+ let source = 
Arc::new(CsvSource::new(table_schema).with_csv_options(options));</span>
+<span class="gi">+ let config = FileScanConfigBuilder::new(url, source)</span>
+<span class="w"> </span>     .build();
+</pre></div>
+</div>
+</section>
 <section 
id="introduction-of-tableschema-and-changes-to-filesource-with-schema-method">
 <h3>Introduction of <code class="docutils literal notranslate"><span 
class="pre">TableSchema</span></code> and changes to <code class="docutils 
literal notranslate"><span class="pre">FileSource::with_schema()</span></code> 
method<a class="headerlink" 
href="#introduction-of-tableschema-and-changes-to-filesource-with-schema-method"
 title="Link to this heading">#</a></h3>
 <p>A new <code class="docutils literal notranslate"><span 
class="pre">TableSchema</span></code> struct has been introduced in the <code 
class="docutils literal notranslate"><span 
class="pre">datafusion-datasource</span></code> crate to better manage table 
schemas with partition columns. This struct helps distinguish between:</p>
@@ -1280,7 +1358,7 @@ DataFusion 47.0.0 this has been changed to use <code 
class="docutils literal not
 </pre></div>
 </div>
 <p>Pattern in DataFusion <code class="docutils literal notranslate"><span 
class="pre">47.0.0</span></code>:</p>
-<div class="highlight-rust notranslate"><div 
class="highlight"><pre><span></span><span class="kd">let</span><span class="w"> 
</span><span class="n">config</span><span class="w"> </span><span 
class="o">=</span><span class="w"> </span><span 
class="n">FileScanConfigBuilder</span><span class="p">::</span><span 
class="n">new</span><span class="p">(</span><span class="n">url</span><span 
class="p">,</span><span class="w"> </span><span class="n">schema</span><span 
class="p">,</span><span class=" [...]
+<div class="highlight-rust notranslate"><div 
class="highlight"><pre><span></span><span class="kd">let</span><span class="w"> 
</span><span class="n">config</span><span class="w"> </span><span 
class="o">=</span><span class="w"> </span><span 
class="n">FileScanConfigBuilder</span><span class="p">::</span><span 
class="n">new</span><span class="p">(</span><span class="n">url</span><span 
class="p">,</span><span class="w"> </span><span class="n">Arc</span><span 
class="p">::</span><span class="n" [...]
 <span class="w">  </span><span class="p">.</span><span 
class="n">with_statistics</span><span class="p">(</span><span 
class="n">stats</span><span class="p">)</span>
 <span class="w">  </span><span class="o">..</span><span class="p">.</span>
 <span class="w">  </span><span class="p">.</span><span 
class="n">build</span><span class="p">();</span>
@@ -1575,6 +1653,7 @@ take care of constructing the <code class="docutils 
literal notranslate"><span c
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" 
href="#filescanconfig-projection-renamed-to-filescanconfig-projection-exprs"><code
 class="docutils literal notranslate"><span 
class="pre">FileScanConfig::projection</span></code> renamed to <code 
class="docutils literal notranslate"><span 
class="pre">FileScanConfig::projection_exprs</span></code></a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" 
href="#describe-query-support"><code class="docutils literal notranslate"><span 
class="pre">DESCRIBE</span> <span class="pre">query</span></code> 
support</a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" 
href="#datafusion-execution-time-zone-default-configuration-changed"><code 
class="docutils literal notranslate"><span 
class="pre">datafusion.execution.time_zone</span></code> default configuration 
changed</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" 
href="#refactoring-of-filesource-constructors-and-filescanconfigbuilder-to-accept-schemas-upfront">Refactoring
 of <code class="docutils literal notranslate"><span 
class="pre">FileSource</span></code> constructors and <code class="docutils 
literal notranslate"><span class="pre">FileScanConfigBuilder</span></code> to 
accept schemas upfront</a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" 
href="#introduction-of-tableschema-and-changes-to-filesource-with-schema-method">Introduction
 of <code class="docutils literal notranslate"><span 
class="pre">TableSchema</span></code> and changes to <code class="docutils 
literal notranslate"><span class="pre">FileSource::with_schema()</span></code> 
method</a></li>
 <li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" 
href="#aggregateudfimpl-is-ordered-set-aggregate-has-been-renamed-to-aggregateudfimpl-supports-within-group-clause"><code
 class="docutils literal notranslate"><span 
class="pre">AggregateUDFImpl::is_ordered_set_aggregate</span></code> has been 
renamed to <code class="docutils literal notranslate"><span 
class="pre">AggregateUDFImpl::supports_within_group_clause</span></code></a></li>
 </ul>
diff --git a/searchindex.js b/searchindex.js
index 0d58534e58..46f3418a1e 100644
--- a/searchindex.js
+++ b/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"alltitles":{"!=":[[60,"op-neq"]],"!~":[[60,"op-re-not-match"]],"!~*":[[60,"op-re-not-match-i"]],"!~~":[[60,"id19"]],"!~~*":[[60,"id20"]],"#":[[60,"op-bit-xor"]],"%":[[60,"op-modulo"]],"&":[[60,"op-bit-and"]],"(relation,
 name) tuples in logical fields and logical columns are 
unique":[[13,"relation-name-tuples-in-logical-fields-and-logical-columns-are-unique"]],"*":[[60,"op-multiply"]],"+":[[60,"op-plus"]],"-":[[60,"op-minus"]],"/":[[60,"op-divide"]],"<":[[60,"op-lt"]],"<
 [...]
\ No newline at end of file
+Search.setIndex({"alltitles":{"!=":[[60,"op-neq"]],"!~":[[60,"op-re-not-match"]],"!~*":[[60,"op-re-not-match-i"]],"!~~":[[60,"id19"]],"!~~*":[[60,"id20"]],"#":[[60,"op-bit-xor"]],"%":[[60,"op-modulo"]],"&":[[60,"op-bit-and"]],"(relation,
 name) tuples in logical fields and logical columns are 
unique":[[13,"relation-name-tuples-in-logical-fields-and-logical-columns-are-unique"]],"*":[[60,"op-multiply"]],"+":[[60,"op-plus"]],"-":[[60,"op-minus"]],"/":[[60,"op-divide"]],"<":[[60,"op-lt"]],"<
 [...]
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to