This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/asf-site by this push:
new 387b7b11154 deploy: f38283b49b29f77e1bb2b0b2af07718724db3285
387b7b11154 is described below
commit 387b7b1115469ca7de4175b66069af4063774d54
Author: tustvold <[email protected]>
AuthorDate: Tue Apr 9 10:35:43 2024 +0000
deploy: f38283b49b29f77e1bb2b0b2af07718724db3285
---
parquet/arrow/async_reader/index.html | 2 +-
src/parquet/arrow/async_reader/mod.rs.html | 176 +++++++++++++++++++++++++++++
2 files changed, 177 insertions(+), 1 deletion(-)
diff --git a/parquet/arrow/async_reader/index.html
b/parquet/arrow/async_reader/index.html
index 5b4f9f012df..3b668eeac34 100644
--- a/parquet/arrow/async_reader/index.html
+++ b/parquet/arrow/async_reader/index.html
@@ -1,5 +1,5 @@
<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta
name="viewport" content="width=device-width, initial-scale=1.0"><meta
name="generator" content="rustdoc"><meta name="description" content="Provides
`async` API for reading parquet files as
`RecordBatch`es"><title>parquet::arrow::async_reader - Rust</title><script> if
(window.location.protocol !== "file:") document.write(`<link rel="preload"
as="font" type="font/woff2" crossorigin
href="../../../static.files/SourceSerif4-Reg [...]
- <main><div class="width-limiter"><nav class="sub"><form
class="search-form"><span></span><div id="sidebar-button" tabindex="-1"><a
href="../../../parquet/all.html" title="show sidebar"></a></div><input
class="search-input" name="search" aria-label="Run search in the documentation"
autocomplete="off" spellcheck="false" placeholder="Click or press ‘S’ to
search, ‘?’ for more options…" type="search"><div id="help-button"
tabindex="-1"><a href="../../../help.html" title="help">?</a></div [...]
+ <main><div class="width-limiter"><nav class="sub"><form
class="search-form"><span></span><div id="sidebar-button" tabindex="-1"><a
href="../../../parquet/all.html" title="show sidebar"></a></div><input
class="search-input" name="search" aria-label="Run search in the documentation"
autocomplete="off" spellcheck="false" placeholder="Click or press ‘S’ to
search, ‘?’ for more options…" type="search"><div id="help-button"
tabindex="-1"><a href="../../../help.html" title="help">?</a></div [...]
[<code>RecordBatch</code>]es</p>
<div class="example-wrap"><pre class="rust rust-example-rendered"><code><span
class="kw">let </span>testdata = arrow::util::test_util::parquet_test_data();
diff --git a/src/parquet/arrow/async_reader/mod.rs.html
b/src/parquet/arrow/async_reader/mod.rs.html
index 314093a69e6..022c09377a2 100644
--- a/src/parquet/arrow/async_reader/mod.rs.html
+++ b/src/parquet/arrow/async_reader/mod.rs.html
@@ -1860,6 +1860,94 @@
<a href="#1858" id="1858">1858</a>
<a href="#1859" id="1859">1859</a>
<a href="#1860" id="1860">1860</a>
+<a href="#1861" id="1861">1861</a>
+<a href="#1862" id="1862">1862</a>
+<a href="#1863" id="1863">1863</a>
+<a href="#1864" id="1864">1864</a>
+<a href="#1865" id="1865">1865</a>
+<a href="#1866" id="1866">1866</a>
+<a href="#1867" id="1867">1867</a>
+<a href="#1868" id="1868">1868</a>
+<a href="#1869" id="1869">1869</a>
+<a href="#1870" id="1870">1870</a>
+<a href="#1871" id="1871">1871</a>
+<a href="#1872" id="1872">1872</a>
+<a href="#1873" id="1873">1873</a>
+<a href="#1874" id="1874">1874</a>
+<a href="#1875" id="1875">1875</a>
+<a href="#1876" id="1876">1876</a>
+<a href="#1877" id="1877">1877</a>
+<a href="#1878" id="1878">1878</a>
+<a href="#1879" id="1879">1879</a>
+<a href="#1880" id="1880">1880</a>
+<a href="#1881" id="1881">1881</a>
+<a href="#1882" id="1882">1882</a>
+<a href="#1883" id="1883">1883</a>
+<a href="#1884" id="1884">1884</a>
+<a href="#1885" id="1885">1885</a>
+<a href="#1886" id="1886">1886</a>
+<a href="#1887" id="1887">1887</a>
+<a href="#1888" id="1888">1888</a>
+<a href="#1889" id="1889">1889</a>
+<a href="#1890" id="1890">1890</a>
+<a href="#1891" id="1891">1891</a>
+<a href="#1892" id="1892">1892</a>
+<a href="#1893" id="1893">1893</a>
+<a href="#1894" id="1894">1894</a>
+<a href="#1895" id="1895">1895</a>
+<a href="#1896" id="1896">1896</a>
+<a href="#1897" id="1897">1897</a>
+<a href="#1898" id="1898">1898</a>
+<a href="#1899" id="1899">1899</a>
+<a href="#1900" id="1900">1900</a>
+<a href="#1901" id="1901">1901</a>
+<a href="#1902" id="1902">1902</a>
+<a href="#1903" id="1903">1903</a>
+<a href="#1904" id="1904">1904</a>
+<a href="#1905" id="1905">1905</a>
+<a href="#1906" id="1906">1906</a>
+<a href="#1907" id="1907">1907</a>
+<a href="#1908" id="1908">1908</a>
+<a href="#1909" id="1909">1909</a>
+<a href="#1910" id="1910">1910</a>
+<a href="#1911" id="1911">1911</a>
+<a href="#1912" id="1912">1912</a>
+<a href="#1913" id="1913">1913</a>
+<a href="#1914" id="1914">1914</a>
+<a href="#1915" id="1915">1915</a>
+<a href="#1916" id="1916">1916</a>
+<a href="#1917" id="1917">1917</a>
+<a href="#1918" id="1918">1918</a>
+<a href="#1919" id="1919">1919</a>
+<a href="#1920" id="1920">1920</a>
+<a href="#1921" id="1921">1921</a>
+<a href="#1922" id="1922">1922</a>
+<a href="#1923" id="1923">1923</a>
+<a href="#1924" id="1924">1924</a>
+<a href="#1925" id="1925">1925</a>
+<a href="#1926" id="1926">1926</a>
+<a href="#1927" id="1927">1927</a>
+<a href="#1928" id="1928">1928</a>
+<a href="#1929" id="1929">1929</a>
+<a href="#1930" id="1930">1930</a>
+<a href="#1931" id="1931">1931</a>
+<a href="#1932" id="1932">1932</a>
+<a href="#1933" id="1933">1933</a>
+<a href="#1934" id="1934">1934</a>
+<a href="#1935" id="1935">1935</a>
+<a href="#1936" id="1936">1936</a>
+<a href="#1937" id="1937">1937</a>
+<a href="#1938" id="1938">1938</a>
+<a href="#1939" id="1939">1939</a>
+<a href="#1940" id="1940">1940</a>
+<a href="#1941" id="1941">1941</a>
+<a href="#1942" id="1942">1942</a>
+<a href="#1943" id="1943">1943</a>
+<a href="#1944" id="1944">1944</a>
+<a href="#1945" id="1945">1945</a>
+<a href="#1946" id="1946">1946</a>
+<a href="#1947" id="1947">1947</a>
+<a href="#1948" id="1948">1948</a>
</pre></div><pre class="rust"><code><span class="comment">// Licensed to the
Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
@@ -3719,5 +3807,93 @@
<span class="macro">assert_eq!</span>(total_rows, expected);
}
}
+
+ <span class="attr">#[tokio::test]
+ </span><span class="kw">async fn </span>test_row_filter_nested() {
+ <span class="kw">let </span>a = StringArray::from_iter_values([<span
class="string">"a"</span>, <span class="string">"b"</span>, <span
class="string">"b"</span>, <span class="string">"b"</span>, <span
class="string">"c"</span>, <span class="string">"c"</span>]);
+ <span class="kw">let </span>b = StructArray::from(<span
class="macro">vec!</span>[
+ (
+ Arc::new(Field::new(<span class="string">"aa"</span>,
DataType::Utf8, <span class="bool-val">true</span>)),
+ Arc::new(StringArray::from(<span
class="macro">vec!</span>[<span class="string">"a"</span>, <span
class="string">"b"</span>, <span class="string">"b"</span>, <span
class="string">"b"</span>, <span class="string">"c"</span>, <span
class="string">"c"</span>])) <span class="kw">as </span>ArrayRef,
+ ),
+ (
+ Arc::new(Field::new(<span class="string">"bb"</span>,
DataType::Utf8, <span class="bool-val">true</span>)),
+ Arc::new(StringArray::from(<span
class="macro">vec!</span>[<span class="string">"1"</span>, <span
class="string">"2"</span>, <span class="string">"3"</span>, <span
class="string">"4"</span>, <span class="string">"5"</span>, <span
class="string">"6"</span>])) <span class="kw">as </span>ArrayRef,
+ ),
+ ]);
+ <span class="kw">let </span>c = Int32Array::from_iter(<span
class="number">0</span>..<span class="number">6</span>);
+ <span class="kw">let </span>data = RecordBatch::try_from_iter([
+ (<span class="string">"a"</span>, Arc::new(a) <span class="kw">as
</span>ArrayRef),
+ (<span class="string">"b"</span>, Arc::new(b) <span class="kw">as
</span>ArrayRef),
+ (<span class="string">"c"</span>, Arc::new(c) <span class="kw">as
</span>ArrayRef),
+ ])
+ .unwrap();
+
+ <span class="kw">let </span><span class="kw-2">mut </span>buf =
Vec::with_capacity(<span class="number">1024</span>);
+ <span class="kw">let </span><span class="kw-2">mut </span>writer =
ArrowWriter::try_new(<span class="kw-2">&mut </span>buf, data.schema(),
<span class="prelude-val">None</span>).unwrap();
+ writer.write(<span class="kw-2">&</span>data).unwrap();
+ writer.close().unwrap();
+
+ <span class="kw">let </span>data: Bytes = buf.into();
+ <span class="kw">let </span>metadata = parse_metadata(<span
class="kw-2">&</span>data).unwrap();
+ <span class="kw">let </span>parquet_schema =
metadata.file_metadata().schema_descr_ptr();
+
+ <span class="kw">let </span>test = TestReader {
+ data,
+ metadata: Arc::new(metadata),
+ requests: Default::default(),
+ };
+ <span class="kw">let </span>requests = test.requests.clone();
+
+ <span class="kw">let </span>a_scalar =
StringArray::from_iter_values([<span class="string">"b"</span>]);
+ <span class="kw">let </span>a_filter = ArrowPredicateFn::new(
+ ProjectionMask::leaves(<span
class="kw-2">&</span>parquet_schema, <span class="macro">vec!</span>[<span
class="number">0</span>]),
+ <span class="kw">move </span>|batch| eq(batch.column(<span
class="number">0</span>), <span class="kw-2">&</span>Scalar::new(<span
class="kw-2">&</span>a_scalar)),
+ );
+
+ <span class="kw">let </span>b_scalar =
StringArray::from_iter_values([<span class="string">"4"</span>]);
+ <span class="kw">let </span>b_filter = ArrowPredicateFn::new(
+ ProjectionMask::leaves(<span
class="kw-2">&</span>parquet_schema, <span class="macro">vec!</span>[<span
class="number">2</span>]),
+ <span class="kw">move </span>|batch| {
+ <span class="comment">// Filter on the second element of the
struct.
+ </span><span class="kw">let </span>struct_array = batch
+ .column(<span class="number">0</span>)
+ .as_any()
+ .downcast_ref::<StructArray>()
+ .unwrap();
+ eq(struct_array.column(<span class="number">0</span>), <span
class="kw-2">&</span>Scalar::new(<span class="kw-2">&</span>b_scalar))
+ },
+ );
+
+ <span class="kw">let </span>filter = RowFilter::new(<span
class="macro">vec!</span>[Box::new(a_filter), Box::new(b_filter)]);
+
+ <span class="kw">let </span>mask = ProjectionMask::leaves(<span
class="kw-2">&</span>parquet_schema, <span class="macro">vec!</span>[<span
class="number">0</span>, <span class="number">3</span>]);
+ <span class="kw">let </span>stream =
ParquetRecordBatchStreamBuilder::new(test)
+ .<span class="kw">await
+ </span>.unwrap()
+ .with_projection(mask.clone())
+ .with_batch_size(<span class="number">1024</span>)
+ .with_row_filter(filter)
+ .build()
+ .unwrap();
+
+ <span class="kw">let </span>batches: Vec<<span
class="kw">_</span>> = stream.try_collect().<span
class="kw">await</span>.unwrap();
+ <span class="macro">assert_eq!</span>(batches.len(), <span
class="number">1</span>);
+
+ <span class="kw">let </span>batch = <span
class="kw-2">&</span>batches[<span class="number">0</span>];
+ <span class="macro">assert_eq!</span>(batch.num_rows(), <span
class="number">1</span>);
+ <span class="macro">assert_eq!</span>(batch.num_columns(), <span
class="number">2</span>);
+
+ <span class="kw">let </span>col = batch.column(<span
class="number">0</span>);
+ <span class="kw">let </span>val =
col.as_any().downcast_ref::<StringArray>().unwrap().value(<span
class="number">0</span>);
+ <span class="macro">assert_eq!</span>(val, <span
class="string">"b"</span>);
+
+ <span class="kw">let </span>col = batch.column(<span
class="number">1</span>);
+ <span class="kw">let </span>val =
col.as_any().downcast_ref::<Int32Array>().unwrap().value(<span
class="number">0</span>);
+ <span class="macro">assert_eq!</span>(val, <span
class="number">3</span>);
+
+ <span class="comment">// Should only have made 3 requests
+ </span><span
class="macro">assert_eq!</span>(requests.lock().unwrap().len(), <span
class="number">3</span>);
+ }
}
</code></pre></div></section></main></body></html>
\ No newline at end of file