albertlockett opened a new issue, #7545: URL: https://github.com/apache/arrow-rs/issues/7545
**Describe the bug** I'm not sure if this is a bug in parquet or datafusion. If this is is a datafusion bug, I'll close here and open in that repo. If I write a column of type `Dictionary(u8, FixedSizeBinary(_))`, and try to read it using datafusion, I get the error: ``` thread 'main' panicked at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/parquet-55.1.0/src/arrow/buffer/offset_buffer.rs:133:48: called `Result::unwrap()` on an `Err` value: InvalidArgumentError("Expected 1 buffers in array of type FixedSizeBinary(8), got 2") ``` **To Reproduce** ```rs use std::sync::Arc; use arrow::{ datatypes::{DataType, Field, Schema}, util::pretty::print_batches, }; use arrow_array::{FixedSizeBinaryArray, RecordBatch, UInt8Array, UInt8DictionaryArray}; use datafusion::{ prelude::{ParquetReadOptions, SessionContext}, sql::TableReference, }; use object_store::{local::LocalFileSystem, path::Path}; use parquet::{ arrow::{arrow_reader::ParquetRecordBatchReaderBuilder, async_writer::ParquetObjectWriter, AsyncArrowWriter}, file::properties::WriterProperties, }; #[tokio::main] async fn main() { let schema = Arc::new(Schema::new(vec![Field::new( "a", DataType::Dictionary( Box::new(DataType::UInt8), Box::new(DataType::FixedSizeBinary(8)), ), true, )])); let keys = UInt8Array::from_iter_values(vec![0, 0, 1]); // let values = ; let values = FixedSizeBinaryArray::try_from_iter( vec![ (0u8..8u8).into_iter().collect::<Vec<u8>>(), (24u8..32u8).into_iter().collect::<Vec<u8>>(), ] .into_iter(), ) .unwrap(); let arr = UInt8DictionaryArray::new(keys, Arc::new(values)); let batch = RecordBatch::try_new(schema, vec![Arc::new(arr)]).unwrap(); // write batch to parquet let object_store = Arc::new(LocalFileSystem::new_with_prefix("/tmp").unwrap()); let parquet_object_writer = ParquetObjectWriter::new(object_store.clone(), Path::from("test.parquet")); let mut parquet_writer = AsyncArrowWriter::try_new( parquet_object_writer, batch.schema().clone(), Some(WriterProperties::default()), ) .unwrap(); parquet_writer.write(&batch).await.unwrap(); parquet_writer.close().await.unwrap(); // read directly using parquet (this works) let file = std::fs::File::open("/tmp/test.parquet").unwrap(); let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap(); let mut reader = builder.build().unwrap(); let read_batch = reader.next().unwrap().unwrap(); print_batches(&[read_batch]).unwrap(); // read using datafusion (this does not work) let ctx = SessionContext::new(); ctx.register_parquet( TableReference::bare("tab"), "/tmp/test.parquet", ParquetReadOptions::default(), ) .await .unwrap(); let df = ctx.sql("select * from tab").await.unwrap(); let batches = df.collect().await.unwrap(); print_batches(&batches).unwrap(); } ``` **Expected behavior** I think I should be able to read the column in this table. **Additional context** Full stack trace: ``` thread 'main' panicked at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/parquet-55.1.0/src/arrow/buffer/offset_buffer.rs:133:48: called `Result::unwrap()` on an `Err` value: InvalidArgumentError("Expected 1 buffers in array of type FixedSizeBinary(8), got 2") stack backtrace: 0: rust_begin_unwind at /rustc/05f9846f893b09a1be1fc8560e33fc3c815cfecb/library/std/src/panicking.rs:695:5 1: core::panicking::panic_fmt at /rustc/05f9846f893b09a1be1fc8560e33fc3c815cfecb/library/core/src/panicking.rs:75:14 2: core::result::unwrap_failed at /rustc/05f9846f893b09a1be1fc8560e33fc3c815cfecb/library/core/src/result.rs:1704:5 3: core::result::Result<T,E>::unwrap at /Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/result.rs:1109:23 4: parquet::arrow::buffer::offset_buffer::OffsetBuffer<I>::into_array at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/parquet-55.1.0/src/arrow/buffer/offset_buffer.rs:133:21 5: parquet::arrow::buffer::dictionary_buffer::DictionaryBuffer<K,V>::into_array at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/parquet-55.1.0/src/arrow/buffer/dictionary_buffer.rs:187:39 6: <parquet::arrow::array_reader::byte_array_dictionary::ByteArrayDictionaryReader<K,V> as parquet::arrow::array_reader::ArrayReader>::consume_batch at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/parquet-55.1.0/src/arrow/array_reader/byte_array_dictionary.rs:170:21 7: <parquet::arrow::array_reader::struct_array::StructArrayReader as parquet::arrow::array_reader::ArrayReader>::consume_batch::{{closure}} at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/parquet-55.1.0/src/arrow/array_reader/struct_array.rs:111:27 8: core::iter::adapters::map::map_try_fold::{{closure}} at /Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/adapters/map.rs:95:28 9: core::iter::traits::iterator::Iterator::try_fold at /Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/traits/iterator.rs:2370:21 10: <core::iter::adapters::map::Map<I,F> as core::iter::traits::iterator::Iterator>::try_fold at /Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/adapters/map.rs:121:9 11: <core::iter::adapters::GenericShunt<I,R> as core::iter::traits::iterator::Iterator>::try_fold at /Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/adapters/mod.rs:191:9 12: core::iter::traits::iterator::Iterator::try_for_each at /Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/traits/iterator.rs:2431:9 13: <core::iter::adapters::GenericShunt<I,R> as core::iter::traits::iterator::Iterator>::next at /Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/adapters/mod.rs:174:14 14: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter_nested::SpecFromIterNested<T,I>>::from_iter at /Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/alloc/src/vec/spec_from_iter_nested.rs:25:32 15: <alloc::vec::Vec<T> as alloc::vec::spec_from_iter::SpecFromIter<T,I>>::from_iter at /Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/alloc/src/vec/spec_from_iter.rs:34:9 16: <alloc::vec::Vec<T> as core::iter::traits::collect::FromIterator<T>>::from_iter at /Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/alloc/src/vec/mod.rs:3424:9 17: core::iter::traits::iterator::Iterator::collect at /Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/traits/iterator.rs:1971:9 18: <core::result::Result<V,E> as core::iter::traits::collect::FromIterator<core::result::Result<A,E>>>::from_iter::{{closure}} at /Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/result.rs:1985:51 19: core::iter::adapters::try_process at /Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/adapters/mod.rs:160:17 20: <core::result::Result<V,E> as core::iter::traits::collect::FromIterator<core::result::Result<A,E>>>::from_iter at /Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/result.rs:1985:9 21: core::iter::traits::iterator::Iterator::collect at /Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/iter/traits/iterator.rs:1971:9 22: <parquet::arrow::array_reader::struct_array::StructArrayReader as parquet::arrow::array_reader::ArrayReader>::consume_batch at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/parquet-55.1.0/src/arrow/array_reader/struct_array.rs:108:30 23: <parquet::arrow::arrow_reader::ParquetRecordBatchReader as core::iter::traits::iterator::Iterator>::next at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/parquet-55.1.0/src/arrow/arrow_reader/mod.rs:855:15 24: <parquet::arrow::async_reader::ParquetRecordBatchStream<T> as futures_core::stream::Stream>::poll_next at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/parquet-55.1.0/src/arrow/async_reader/mod.rs:811:62 25: <S as futures_core::stream::TryStream>::try_poll_next at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/futures-core-0.3.31/src/stream.rs:206:9 26: <futures_util::stream::try_stream::into_stream::IntoStream<St> as futures_core::stream::Stream>::poll_next at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/futures-util-0.3.31/src/stream/try_stream/into_stream.rs:38:9 27: <futures_util::stream::stream::map::Map<St,F> as futures_core::stream::Stream>::poll_next at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/futures-util-0.3.31/src/stream/stream/map.rs:58:26 28: <futures_util::stream::try_stream::MapErr<St,F> as futures_core::stream::Stream>::poll_next at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/futures-util-0.3.31/src/lib.rs:97:13 29: <futures_util::stream::stream::map::Map<St,F> as futures_core::stream::Stream>::poll_next at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/futures-util-0.3.31/src/stream/stream/map.rs:58:26 30: <core::pin::Pin<P> as futures_core::stream::Stream>::poll_next at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/futures-core-0.3.31/src/stream.rs:130:9 31: futures_util::stream::stream::StreamExt::poll_next_unpin at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/futures-util-0.3.31/src/stream/stream/mod.rs:1638:9 32: datafusion_datasource::file_stream::FileStream::poll_inner at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/datafusion-datasource-47.0.0/src/file_stream.rs:220:34 33: <datafusion_datasource::file_stream::FileStream as futures_core::stream::Stream>::poll_next at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/datafusion-datasource-47.0.0/src/file_stream.rs:333:22 34: <core::pin::Pin<P> as futures_core::stream::Stream>::poll_next at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/futures-core-0.3.31/src/stream.rs:130:9 35: <S as futures_core::stream::TryStream>::try_poll_next at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/futures-core-0.3.31/src/stream.rs:206:9 36: <futures_util::stream::try_stream::try_collect::TryCollect<St,C> as core::future::future::Future>::poll at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/futures-util-0.3.31/src/stream/try_stream/try_collect.rs:46:26 37: datafusion_physical_plan::common::collect::{{closure}} at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/datafusion-physical-plan-47.0.0/src/common.rs:45:36 38: datafusion_physical_plan::execution_plan::collect::{{closure}} at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/datafusion-physical-plan-47.0.0/src/execution_plan.rs:868:36 39: datafusion::dataframe::DataFrame::collect::{{closure}} at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/datafusion-47.0.0/src/dataframe/mod.rs:1351:33 40: parquet_bug_repro::main::{{closure}} at ./src/bin/parquet_bug_repro.rs:72:32 41: <core::pin::Pin<P> as core::future::future::Future>::poll at /Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/future/future.rs:124:9 42: tokio::runtime::park::CachedParkThread::block_on::{{closure}} at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/tokio-1.45.0/src/runtime/park.rs:284:60 43: tokio::task::coop::with_budget at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/tokio-1.45.0/src/task/coop/mod.rs:167:5 44: tokio::task::coop::budget at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/tokio-1.45.0/src/task/coop/mod.rs:133:5 45: tokio::runtime::park::CachedParkThread::block_on at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/tokio-1.45.0/src/runtime/park.rs:284:31 46: tokio::runtime::context::blocking::BlockingRegionGuard::block_on at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/tokio-1.45.0/src/runtime/context/blocking.rs:66:9 47: tokio::runtime::scheduler::multi_thread::MultiThread::block_on::{{closure}} at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/tokio-1.45.0/src/runtime/scheduler/multi_thread/mod.rs:87:13 48: tokio::runtime::context::runtime::enter_runtime at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/tokio-1.45.0/src/runtime/context/runtime.rs:65:16 49: tokio::runtime::scheduler::multi_thread::MultiThread::block_on at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/tokio-1.45.0/src/runtime/scheduler/multi_thread/mod.rs:86:9 50: tokio::runtime::runtime::Runtime::block_on_inner at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/tokio-1.45.0/src/runtime/runtime.rs:358:45 51: tokio::runtime::runtime::Runtime::block_on at /Users/a.lockett/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/tokio-1.45.0/src/runtime/runtime.rs:328:13 52: parquet_bug_repro::main at ./src/bin/parquet_bug_repro.rs:73:5 53: core::ops::function::FnOnce::call_once at /Users/a.lockett/.rustup/toolchains/stable-aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/ops/function.rs:250:5 note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace. ``` Versions: ```toml arrow = { version = "55", features = ["prettyprint", "chrono-tz"] } arrow-array = "55" datafusion = "47" parquet = { version = "55", features = ["arrow", "async", "object_store"]} object_store = "0.12" tokio = { version = "1", features = ["full"] } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org