jorgecarleitao opened a new issue #786: URL: https://github.com/apache/arrow-rs/issues/786
```rust use std::sync::Arc; use parquet::arrow::*; use parquet::file::reader::SerializedFileReader; use parquet::file::serialized_reader::SliceableCursor; fn read(buffer: Arc<Vec<u8>>, column: usize) { let file = SliceableCursor::new(buffer); let file_reader = SerializedFileReader::new(file).unwrap(); let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(file_reader)); let mut record_batch_reader = arrow_reader .get_record_reader_by_columns(vec![column], 10) .unwrap(); let batch = record_batch_reader.next().unwrap().unwrap(); println!("{:?}", batch.column(0)); } fn main() { // a parquet file with 1 column with invalid utf8 let data = vec![ 80, 65, 82, 49, 21, 6, 21, 22, 21, 22, 92, 21, 2, 21, 0, 21, 2, 21, 0, 21, 4, 21, 0, 18, 28, 54, 0, 40, 5, 104, 101, 255, 108, 111, 24, 5, 104, 101, 255, 108, 111, 0, 0, 0, 3, 1, 5, 0, 0, 0, 104, 101, 255, 108, 111, 38, 110, 28, 21, 12, 25, 37, 6, 0, 25, 24, 2, 99, 49, 21, 0, 22, 2, 22, 102, 22, 102, 38, 8, 60, 54, 0, 40, 5, 104, 101, 255, 108, 111, 24, 5, 104, 101, 255, 108, 111, 0, 0, 0, 21, 4, 25, 44, 72, 4, 114, 111, 111, 116, 21, 2, 0, 21, 12, 37, 2, 24, 2, 99, 49, 37, 0, 76, 28, 0, 0, 0, 22, 2, 25, 28, 25, 28, 38, 110, 28, 21, 12, 25, 37, 6, 0, 25, 24, 2, 99, 49, 21, 0, 22, 2, 22, 102, 22, 102, 38, 8, 60, 54, 0, 40, 5, 104, 101, 255, 108, 111, 24, 5, 104, 101, 255, 108, 111, 0, 0, 0, 22, 102, 22, 2, 0, 40, 44, 65, 114, 114, 111, 119, 50, 32, 45, 32, 78, 97, 116, 105, 118, 101, 32, 82, 117, 115, 116, 32, 105, 109, 112, 108, 101, 109, 101, 110, 116, 97, 116, 105, 111, 110, 32, 111, 102, 32, 65, 114, 114, 111, 119, 0, 130, 0, 0, 0, 80, 65, 82, 49, ]; read(Arc::new(data), 0) } ``` miri output ``` cargo miri run --example unsafe StringArray [ error: Undefined Behavior: type validation failed: encountered 0x001ecbc0, but expected a valid unicode scalar value (in `0..=0x10FFFF` but not in `0xD800..=0xDFFF`) --> /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/char/convert.rs:94:78 | 94 | if cfg!(debug_assertions) { char::from_u32(i).unwrap() } else { unsafe { transmute(i) } } | ^^^^^^^^^^^^ type validation failed: encountered 0x001ecbc0, but expected a valid unicode scalar value (in `0..=0x10FFFF` but not in `0xD800..=0xDFFF`) | = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information = note: inside `std::char::from_u32_unchecked` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/char/convert.rs:94:78 = note: inside closure at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/str/iter.rs:43:22 = note: inside `std::option::Option::<u32>::map::<char, [closure@<std::str::Chars as std::iter::Iterator>::next::{closure#0}]>` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/option.rs:489:29 = note: inside `<std::str::Chars as std::iter::Iterator>::next` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/str/iter.rs:41:9 = note: inside `<std::str::CharIndices as std::iter::Iterator>::next` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/str/iter.rs:140:15 = note: inside `<str as std::fmt::Debug>::fmt` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/fmt/mod.rs:2079:23 = note: inside `<&str as std::fmt::Debug>::fmt` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/fmt/mod.rs:2033:62 = note: inside closure at /home/azureuser/projects/arrow-rs/arrow/src/array/array_string.rs:277:13 = note: inside `arrow::array::array::print_long_array::<arrow::array::array_string::GenericStringArray<i32>, [closure@<arrow::array::array_string::GenericStringArray<i32> as std::fmt::Debug>::fmt::{closure#0}]>` at /home/azureuser/projects/arrow-rs/arrow/src/array/array.rs:561:13 = note: inside `<arrow::array::array_string::GenericStringArray<i32> as std::fmt::Debug>::fmt` at /home/azureuser/projects/arrow-rs/arrow/src/array/array_string.rs:276:9 = note: inside `<std::sync::Arc<dyn arrow::array::array::Array> as std::fmt::Debug>::fmt` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/alloc/src/sync.rs:2266:9 = note: inside `<&std::sync::Arc<dyn arrow::array::array::Array> as std::fmt::Debug>::fmt` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/fmt/mod.rs:2033:62 = note: inside `std::fmt::write` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/fmt/mod.rs:1112:17 = note: inside `<std::io::StdoutLock as std::io::Write>::write_fmt` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/io/mod.rs:1640:15 = note: inside `<&std::io::Stdout as std::io::Write>::write_fmt` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/io/stdio.rs:657:9 = note: inside `<std::io::Stdout as std::io::Write>::write_fmt` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/io/stdio.rs:631:9 = note: inside `std::io::stdio::print_to::<std::io::Stdout>` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/io/stdio.rs:934:21 = note: inside `std::io::_print` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/io/stdio.rs:947:5 note: inside `read` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/macros.rs:97:9 --> parquet/examples/unsafe.rs:18:5 | 18 | println!("{:?}", batch.column(0)); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ note: inside `main` at parquet/examples/unsafe.rs:38:5 --> parquet/examples/unsafe.rs:38:5 | 38 | read(Arc::new(data), 0) | ^^^^^^^^^^^^^^^^^^^^^^^ = note: inside `<fn() as std::ops::FnOnce<()>>::call_once - shim(fn())` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ops/function.rs:227:5 = note: inside `std::sys_common::backtrace::__rust_begin_short_backtrace::<fn(), ()>` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/sys_common/backtrace.rs:125:18 = note: inside closure at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/rt.rs:63:18 = note: inside `std::ops::function::impls::<impl std::ops::FnOnce<()> for &dyn std::ops::Fn() -> i32 + std::marker::Sync + std::panic::RefUnwindSafe>::call_once` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ops/function.rs:259:13 = note: inside `std::panicking::r#try::do_call::<&dyn std::ops::Fn() -> i32 + std::marker::Sync + std::panic::RefUnwindSafe, i32>` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:401:40 = note: inside `std::panicking::r#try::<i32, &dyn std::ops::Fn() -> i32 + std::marker::Sync + std::panic::RefUnwindSafe>` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:365:19 = note: inside `std::panic::catch_unwind::<&dyn std::ops::Fn() -> i32 + std::marker::Sync + std::panic::RefUnwindSafe, i32>` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panic.rs:434:14 = note: inside closure at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/rt.rs:45:48 = note: inside `std::panicking::r#try::do_call::<[closure@std::rt::lang_start_internal::{closure#2}], isize>` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:401:40 = note: inside `std::panicking::r#try::<isize, [closure@std::rt::lang_start_internal::{closure#2}]>` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:365:19 = note: inside `std::panic::catch_unwind::<[closure@std::rt::lang_start_internal::{closure#2}], isize>` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panic.rs:434:14 = note: inside `std::rt::lang_start_internal` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/rt.rs:45:20 = note: inside `std::rt::lang_start::<()>` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/rt.rs:62:5 = note: this error originates in the macro `println` (in Nightly builds, run with -Z macro-backtrace for more info) error: aborting due to previous error ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org