jorgecarleitao opened a new issue #786:
URL: https://github.com/apache/arrow-rs/issues/786
```rust
use std::sync::Arc;
use parquet::arrow::*;
use parquet::file::reader::SerializedFileReader;
use parquet::file::serialized_reader::SliceableCursor;
fn read(buffer: Arc<Vec<u8>>, column: usize) {
let file = SliceableCursor::new(buffer);
let file_reader = SerializedFileReader::new(file).unwrap();
let mut arrow_reader =
ParquetFileArrowReader::new(Arc::new(file_reader));
let mut record_batch_reader = arrow_reader
.get_record_reader_by_columns(vec![column], 10)
.unwrap();
let batch = record_batch_reader.next().unwrap().unwrap();
println!("{:?}", batch.column(0));
}
fn main() {
// a parquet file with 1 column with invalid utf8
let data = vec![
80, 65, 82, 49, 21, 6, 21, 22, 21, 22, 92, 21, 2, 21, 0, 21, 2, 21,
0, 21, 4, 21,
0, 18, 28, 54, 0, 40, 5, 104, 101, 255, 108, 111, 24, 5, 104, 101,
255, 108, 111,
0, 0, 0, 3, 1, 5, 0, 0, 0, 104, 101, 255, 108, 111, 38, 110, 28, 21,
12, 25, 37,
6, 0, 25, 24, 2, 99, 49, 21, 0, 22, 2, 22, 102, 22, 102, 38, 8, 60,
54, 0, 40, 5,
104, 101, 255, 108, 111, 24, 5, 104, 101, 255, 108, 111, 0, 0, 0,
21, 4, 25, 44,
72, 4, 114, 111, 111, 116, 21, 2, 0, 21, 12, 37, 2, 24, 2, 99, 49,
37, 0, 76, 28,
0, 0, 0, 22, 2, 25, 28, 25, 28, 38, 110, 28, 21, 12, 25, 37, 6, 0,
25, 24, 2, 99,
49, 21, 0, 22, 2, 22, 102, 22, 102, 38, 8, 60, 54, 0, 40, 5, 104,
101, 255, 108,
111, 24, 5, 104, 101, 255, 108, 111, 0, 0, 0, 22, 102, 22, 2, 0, 40,
44, 65, 114,
114, 111, 119, 50, 32, 45, 32, 78, 97, 116, 105, 118, 101, 32, 82,
117, 115, 116,
32, 105, 109, 112, 108, 101, 109, 101, 110, 116, 97, 116, 105, 111,
110, 32, 111,
102, 32, 65, 114, 114, 111, 119, 0, 130, 0, 0, 0, 80, 65, 82, 49,
];
read(Arc::new(data), 0)
}
```
miri output
```
cargo miri run --example unsafe
StringArray
[
error: Undefined Behavior: type validation failed: encountered 0x001ecbc0,
but expected a valid unicode scalar value (in `0..=0x10FFFF` but not in
`0xD800..=0xDFFF`)
-->
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/char/convert.rs:94:78
|
94 | if cfg!(debug_assertions) { char::from_u32(i).unwrap() } else {
unsafe { transmute(i) } }
|
^^^^^^^^^^^^ type validation failed: encountered 0x001ecbc0, but expected
a valid unicode scalar value (in `0..=0x10FFFF` but not in `0xD800..=0xDFFF`)
|
= help: this indicates a bug in the program: it performed an invalid
operation, and caused Undefined Behavior
= help: see
https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html
for further information
= note: inside `std::char::from_u32_unchecked` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/char/convert.rs:94:78
= note: inside closure at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/str/iter.rs:43:22
= note: inside `std::option::Option::<u32>::map::<char,
[closure@<std::str::Chars as std::iter::Iterator>::next::{closure#0}]>` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/option.rs:489:29
= note: inside `<std::str::Chars as std::iter::Iterator>::next` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/str/iter.rs:41:9
= note: inside `<std::str::CharIndices as std::iter::Iterator>::next` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/str/iter.rs:140:15
= note: inside `<str as std::fmt::Debug>::fmt` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/fmt/mod.rs:2079:23
= note: inside `<&str as std::fmt::Debug>::fmt` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/fmt/mod.rs:2033:62
= note: inside closure at
/home/azureuser/projects/arrow-rs/arrow/src/array/array_string.rs:277:13
= note: inside
`arrow::array::array::print_long_array::<arrow::array::array_string::GenericStringArray<i32>,
[closure@<arrow::array::array_string::GenericStringArray<i32> as
std::fmt::Debug>::fmt::{closure#0}]>` at
/home/azureuser/projects/arrow-rs/arrow/src/array/array.rs:561:13
= note: inside `<arrow::array::array_string::GenericStringArray<i32> as
std::fmt::Debug>::fmt` at
/home/azureuser/projects/arrow-rs/arrow/src/array/array_string.rs:276:9
= note: inside `<std::sync::Arc<dyn arrow::array::array::Array> as
std::fmt::Debug>::fmt` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/alloc/src/sync.rs:2266:9
= note: inside `<&std::sync::Arc<dyn arrow::array::array::Array> as
std::fmt::Debug>::fmt` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/fmt/mod.rs:2033:62
= note: inside `std::fmt::write` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/fmt/mod.rs:1112:17
= note: inside `<std::io::StdoutLock as std::io::Write>::write_fmt` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/io/mod.rs:1640:15
= note: inside `<&std::io::Stdout as std::io::Write>::write_fmt` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/io/stdio.rs:657:9
= note: inside `<std::io::Stdout as std::io::Write>::write_fmt` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/io/stdio.rs:631:9
= note: inside `std::io::stdio::print_to::<std::io::Stdout>` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/io/stdio.rs:934:21
= note: inside `std::io::_print` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/io/stdio.rs:947:5
note: inside `read` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/macros.rs:97:9
--> parquet/examples/unsafe.rs:18:5
|
18 | println!("{:?}", batch.column(0));
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
note: inside `main` at parquet/examples/unsafe.rs:38:5
--> parquet/examples/unsafe.rs:38:5
|
38 | read(Arc::new(data), 0)
| ^^^^^^^^^^^^^^^^^^^^^^^
= note: inside `<fn() as std::ops::FnOnce<()>>::call_once - shim(fn())`
at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ops/function.rs:227:5
= note: inside
`std::sys_common::backtrace::__rust_begin_short_backtrace::<fn(), ()>` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/sys_common/backtrace.rs:125:18
= note: inside closure at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/rt.rs:63:18
= note: inside `std::ops::function::impls::<impl std::ops::FnOnce<()> for
&dyn std::ops::Fn() -> i32 + std::marker::Sync +
std::panic::RefUnwindSafe>::call_once` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ops/function.rs:259:13
= note: inside `std::panicking::r#try::do_call::<&dyn std::ops::Fn() ->
i32 + std::marker::Sync + std::panic::RefUnwindSafe, i32>` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:401:40
= note: inside `std::panicking::r#try::<i32, &dyn std::ops::Fn() -> i32 +
std::marker::Sync + std::panic::RefUnwindSafe>` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:365:19
= note: inside `std::panic::catch_unwind::<&dyn std::ops::Fn() -> i32 +
std::marker::Sync + std::panic::RefUnwindSafe, i32>` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panic.rs:434:14
= note: inside closure at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/rt.rs:45:48
= note: inside
`std::panicking::r#try::do_call::<[closure@std::rt::lang_start_internal::{closure#2}],
isize>` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:401:40
= note: inside `std::panicking::r#try::<isize,
[closure@std::rt::lang_start_internal::{closure#2}]>` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:365:19
= note: inside
`std::panic::catch_unwind::<[closure@std::rt::lang_start_internal::{closure#2}],
isize>` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panic.rs:434:14
= note: inside `std::rt::lang_start_internal` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/rt.rs:45:20
= note: inside `std::rt::lang_start::<()>` at
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/rt.rs:62:5
= note: this error originates in the macro `println` (in Nightly builds,
run with -Z macro-backtrace for more info)
error: aborting due to previous error
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]