albertlockett opened a new issue, #7545:
URL: https://github.com/apache/arrow-rs/issues/7545

   **Describe the bug**
   I'm not sure if this is a bug in parquet or datafusion. If this is is a 
datafusion bug, I'll close here and open in that repo.
   
   If I write a column of type `Dictionary(u8, FixedSizeBinary(_))`, and try to 
read it using datafusion, I get the error:
   ```
   thread 'main' panicked at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/parquet-55.1.0/src/arrow/buffer/offset_buffer.rs:133:48:
   called `Result::unwrap()` on an `Err` value: InvalidArgumentError("Expected 
1 buffers in array of type FixedSizeBinary(8), got 2")
   ```
   
   **To Reproduce**
   ```rs
   use std::sync::Arc;
   
   use arrow::{
       datatypes::{DataType, Field, Schema},
       util::pretty::print_batches,
   };
   use arrow_array::{FixedSizeBinaryArray, RecordBatch, UInt8Array, 
UInt8DictionaryArray};
   use datafusion::{
       prelude::{ParquetReadOptions, SessionContext},
       sql::TableReference,
   };
   use object_store::{local::LocalFileSystem, path::Path};
   use parquet::{
       arrow::{arrow_reader::ParquetRecordBatchReaderBuilder, 
async_writer::ParquetObjectWriter, AsyncArrowWriter},
       file::properties::WriterProperties,
   };
   
   #[tokio::main]
   async fn main() {
       let schema = Arc::new(Schema::new(vec![Field::new(
           "a",
           DataType::Dictionary(
               Box::new(DataType::UInt8),
               Box::new(DataType::FixedSizeBinary(8)),
           ),
           true,
       )]));
   
       let keys = UInt8Array::from_iter_values(vec![0, 0, 1]);
       // let values = ;
       let values = FixedSizeBinaryArray::try_from_iter(
           vec![
               (0u8..8u8).into_iter().collect::<Vec<u8>>(),
               (24u8..32u8).into_iter().collect::<Vec<u8>>(),
           ]
           .into_iter(),
       )
       .unwrap();
       let arr = UInt8DictionaryArray::new(keys, Arc::new(values));
       let batch = RecordBatch::try_new(schema, vec![Arc::new(arr)]).unwrap();
   
       // write batch to parquet
       let object_store = 
Arc::new(LocalFileSystem::new_with_prefix("/tmp").unwrap());
       let parquet_object_writer =
           ParquetObjectWriter::new(object_store.clone(), 
Path::from("test.parquet"));
       let mut parquet_writer = AsyncArrowWriter::try_new(
           parquet_object_writer,
           batch.schema().clone(),
           Some(WriterProperties::default()),
       )
       .unwrap();
       parquet_writer.write(&batch).await.unwrap();
       parquet_writer.close().await.unwrap();
   
       // read directly using parquet (this works)
       let file = std::fs::File::open("/tmp/test.parquet").unwrap();
       let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
       let mut reader = builder.build().unwrap();
       let read_batch = reader.next().unwrap().unwrap();
       print_batches(&[read_batch]).unwrap();
   
       // read using datafusion (this does not work)
       let ctx = SessionContext::new();
       ctx.register_parquet(
           TableReference::bare("tab"),
           "/tmp/test.parquet",
           ParquetReadOptions::default(),
       )
       .await
       .unwrap();
       let df = ctx.sql("select * from tab").await.unwrap();
       let batches = df.collect().await.unwrap();
       print_batches(&batches).unwrap();
   }
   ```
   
   **Expected behavior**
   I think I should be able to read the column in this table.
   
   **Additional context**
   Full stack trace:
   ```
   thread 'main' panicked at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/parquet-55.1.0/src/arrow/buffer/offset_buffer.rs:133:48:
   called `Result::unwrap()` on an `Err` value: InvalidArgumentError("Expected 
1 buffers in array of type FixedSizeBinary(8), got 2")
   stack backtrace:
      0: rust_begin_unwind
                at 
/rustc/05f9846f893b09a1be1fc8560e33fc3c815cfecb/library/std/src/panicking.rs:695:5
      1: core::panicking::panic_fmt
                at 
/rustc/05f9846f893b09a1be1fc8560e33fc3c815cfecb/library/core/src/panicking.rs:75:14
      2: core::result::unwrap_failed
                at 
/rustc/05f9846f893b09a1be1fc8560e33fc3c815cfecb/library/core/src/result.rs:1704:5
      3: core::result::Result<T,E>::unwrap
                at 
/Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/result.rs:1109:23
      4: parquet::arrow::buffer::offset_buffer::OffsetBuffer<I>::into_array
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/parquet-55.1.0/src/arrow/buffer/offset_buffer.rs:133:21
      5: 
parquet::arrow::buffer::dictionary_buffer::DictionaryBuffer<K,V>::into_array
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/parquet-55.1.0/src/arrow/buffer/dictionary_buffer.rs:187:39
      6: 
<parquet::arrow::array_reader::byte_array_dictionary::ByteArrayDictionaryReader<K,V>
 as parquet::arrow::array_reader::ArrayReader>::consume_batch
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/parquet-55.1.0/src/arrow/array_reader/byte_array_dictionary.rs:170:21
      7: <parquet::arrow::array_reader::struct_array::StructArrayReader as 
parquet::arrow::array_reader::ArrayReader>::consume_batch::{{closure}}
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/parquet-55.1.0/src/arrow/array_reader/struct_array.rs:111:27
      8: core::iter::adapters::map::map_try_fold::{{closure}}
                at 
/Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/adapters/map.rs:95:28
      9: core::iter::traits::iterator::Iterator::try_fold
                at 
/Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/traits/iterator.rs:2370:21
     10: <core::iter::adapters::map::Map<I,F> as 
core::iter::traits::iterator::Iterator>::try_fold
                at 
/Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/adapters/map.rs:121:9
     11: <core::iter::adapters::GenericShunt<I,R> as 
core::iter::traits::iterator::Iterator>::try_fold
                at 
/Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/adapters/mod.rs:191:9
     12: core::iter::traits::iterator::Iterator::try_for_each
                at 
/Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/traits/iterator.rs:2431:9
     13: <core::iter::adapters::GenericShunt<I,R> as 
core::iter::traits::iterator::Iterator>::next
                at 
/Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/adapters/mod.rs:174:14
     14: <alloc::vec::Vec<T> as 
alloc::vec::spec_from_iter_nested::SpecFromIterNested<T,I>>::from_iter
                at 
/Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/alloc/src/vec/spec_from_iter_nested.rs:25:32
     15: <alloc::vec::Vec<T> as 
alloc::vec::spec_from_iter::SpecFromIter<T,I>>::from_iter
                at 
/Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/alloc/src/vec/spec_from_iter.rs:34:9
     16: <alloc::vec::Vec<T> as 
core::iter::traits::collect::FromIterator<T>>::from_iter
                at 
/Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/alloc/src/vec/mod.rs:3424:9
     17: core::iter::traits::iterator::Iterator::collect
                at 
/Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/traits/iterator.rs:1971:9
     18: <core::result::Result<V,E> as 
core::iter::traits::collect::FromIterator<core::result::Result<A,E>>>::from_iter::{{closure}}
                at 
/Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/result.rs:1985:51
     19: core::iter::adapters::try_process
                at 
/Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/adapters/mod.rs:160:17
     20: <core::result::Result<V,E> as 
core::iter::traits::collect::FromIterator<core::result::Result<A,E>>>::from_iter
                at 
/Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/result.rs:1985:9
     21: core::iter::traits::iterator::Iterator::collect
                at 
/Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/traits/iterator.rs:1971:9
     22: <parquet::arrow::array_reader::struct_array::StructArrayReader as 
parquet::arrow::array_reader::ArrayReader>::consume_batch
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/parquet-55.1.0/src/arrow/array_reader/struct_array.rs:108:30
     23: <parquet::arrow::arrow_reader::ParquetRecordBatchReader as 
core::iter::traits::iterator::Iterator>::next
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/parquet-55.1.0/src/arrow/arrow_reader/mod.rs:855:15
     24: <parquet::arrow::async_reader::ParquetRecordBatchStream<T> as 
futures_core::stream::Stream>::poll_next
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/parquet-55.1.0/src/arrow/async_reader/mod.rs:811:62
     25: <S as futures_core::stream::TryStream>::try_poll_next
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/futures-core-0.3.31/src/stream.rs:206:9
     26: <futures_util::stream::try_stream::into_stream::IntoStream<St> as 
futures_core::stream::Stream>::poll_next
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/futures-util-0.3.31/src/stream/try_stream/into_stream.rs:38:9
     27: <futures_util::stream::stream::map::Map<St,F> as 
futures_core::stream::Stream>::poll_next
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/futures-util-0.3.31/src/stream/stream/map.rs:58:26
     28: <futures_util::stream::try_stream::MapErr<St,F> as 
futures_core::stream::Stream>::poll_next
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/futures-util-0.3.31/src/lib.rs:97:13
     29: <futures_util::stream::stream::map::Map<St,F> as 
futures_core::stream::Stream>::poll_next
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/futures-util-0.3.31/src/stream/stream/map.rs:58:26
     30: <core::pin::Pin<P> as futures_core::stream::Stream>::poll_next
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/futures-core-0.3.31/src/stream.rs:130:9
     31: futures_util::stream::stream::StreamExt::poll_next_unpin
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/futures-util-0.3.31/src/stream/stream/mod.rs:1638:9
     32: datafusion_datasource::file_stream::FileStream::poll_inner
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/datafusion-datasource-47.0.0/src/file_stream.rs:220:34
     33: <datafusion_datasource::file_stream::FileStream as 
futures_core::stream::Stream>::poll_next
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/datafusion-datasource-47.0.0/src/file_stream.rs:333:22
     34: <core::pin::Pin<P> as futures_core::stream::Stream>::poll_next
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/futures-core-0.3.31/src/stream.rs:130:9
     35: <S as futures_core::stream::TryStream>::try_poll_next
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/futures-core-0.3.31/src/stream.rs:206:9
     36: <futures_util::stream::try_stream::try_collect::TryCollect<St,C> as 
core::future::future::Future>::poll
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/futures-util-0.3.31/src/stream/try_stream/try_collect.rs:46:26
     37: datafusion_physical_plan::common::collect::{{closure}}
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/datafusion-physical-plan-47.0.0/src/common.rs:45:36
     38: datafusion_physical_plan::execution_plan::collect::{{closure}}
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/datafusion-physical-plan-47.0.0/src/execution_plan.rs:868:36
     39: datafusion::dataframe::DataFrame::collect::{{closure}}
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/datafusion-47.0.0/src/dataframe/mod.rs:1351:33
     40: parquet_bug_repro::main::{{closure}}
                at ./src/bin/parquet_bug_repro.rs:72:32
     41: <core::pin::Pin<P> as core::future::future::Future>::poll
                at 
/Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/future/future.rs:124:9
     42: tokio::runtime::park::CachedParkThread::block_on::{{closure}}
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/tokio-1.45.0/src/runtime/park.rs:284:60
     43: tokio::task::coop::with_budget
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/tokio-1.45.0/src/task/coop/mod.rs:167:5
     44: tokio::task::coop::budget
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/tokio-1.45.0/src/task/coop/mod.rs:133:5
     45: tokio::runtime::park::CachedParkThread::block_on
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/tokio-1.45.0/src/runtime/park.rs:284:31
     46: tokio::runtime::context::blocking::BlockingRegionGuard::block_on
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/tokio-1.45.0/src/runtime/context/blocking.rs:66:9
     47: 
tokio::runtime::scheduler::multi_thread::MultiThread::block_on::{{closure}}
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/tokio-1.45.0/src/runtime/scheduler/multi_thread/mod.rs:87:13
     48: tokio::runtime::context::runtime::enter_runtime
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/tokio-1.45.0/src/runtime/context/runtime.rs:65:16
     49: tokio::runtime::scheduler::multi_thread::MultiThread::block_on
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/tokio-1.45.0/src/runtime/scheduler/multi_thread/mod.rs:86:9
     50: tokio::runtime::runtime::Runtime::block_on_inner
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/tokio-1.45.0/src/runtime/runtime.rs:358:45
     51: tokio::runtime::runtime::Runtime::block_on
                at 
/Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/tokio-1.45.0/src/runtime/runtime.rs:328:13
     52: parquet_bug_repro::main
                at ./src/bin/parquet_bug_repro.rs:73:5
     53: core::ops::function::FnOnce::call_once
                at 
/Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/ops/function.rs:250:5
   note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose 
backtrace.
   ```
   
   Versions:
   ```toml
   arrow = { version = "55", features = ["prettyprint", "chrono-tz"] }
   arrow-array = "55"
   datafusion = "47"
   parquet = { version = "55", features = ["arrow", "async", "object_store"]}
   object_store = "0.12"
   tokio = { version = "1", features = ["full"] }
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to