jorgecarleitao opened a new issue #786:
URL: https://github.com/apache/arrow-rs/issues/786


   ```rust
   use std::sync::Arc;
   
   use parquet::arrow::*;
   use parquet::file::reader::SerializedFileReader;
   use parquet::file::serialized_reader::SliceableCursor;
   
   fn read(buffer: Arc<Vec<u8>>, column: usize) {
       let file = SliceableCursor::new(buffer);
   
       let file_reader = SerializedFileReader::new(file).unwrap();
       let mut arrow_reader = 
ParquetFileArrowReader::new(Arc::new(file_reader));
   
       let mut record_batch_reader = arrow_reader
           .get_record_reader_by_columns(vec![column], 10)
           .unwrap();
   
       let batch = record_batch_reader.next().unwrap().unwrap();
       println!("{:?}", batch.column(0));
   }
   
   fn main() {
       // a parquet file with 1 column with invalid utf8
       let data = vec![
           80, 65, 82, 49, 21, 6, 21, 22, 21, 22, 92, 21, 2, 21, 0, 21, 2, 21, 
0, 21, 4, 21,
           0, 18, 28, 54, 0, 40, 5, 104, 101, 255, 108, 111, 24, 5, 104, 101, 
255, 108, 111,
           0, 0, 0, 3, 1, 5, 0, 0, 0, 104, 101, 255, 108, 111, 38, 110, 28, 21, 
12, 25, 37,
           6, 0, 25, 24, 2, 99, 49, 21, 0, 22, 2, 22, 102, 22, 102, 38, 8, 60, 
54, 0, 40, 5,
           104, 101, 255, 108, 111, 24, 5, 104, 101, 255, 108, 111, 0, 0, 0, 
21, 4, 25, 44,
           72, 4, 114, 111, 111, 116, 21, 2, 0, 21, 12, 37, 2, 24, 2, 99, 49, 
37, 0, 76, 28,
           0, 0, 0, 22, 2, 25, 28, 25, 28, 38, 110, 28, 21, 12, 25, 37, 6, 0, 
25, 24, 2, 99,
           49, 21, 0, 22, 2, 22, 102, 22, 102, 38, 8, 60, 54, 0, 40, 5, 104, 
101, 255, 108,
           111, 24, 5, 104, 101, 255, 108, 111, 0, 0, 0, 22, 102, 22, 2, 0, 40, 
44, 65, 114,
           114, 111, 119, 50, 32, 45, 32, 78, 97, 116, 105, 118, 101, 32, 82, 
117, 115, 116,
           32, 105, 109, 112, 108, 101, 109, 101, 110, 116, 97, 116, 105, 111, 
110, 32, 111,
           102, 32, 65, 114, 114, 111, 119, 0, 130, 0, 0, 0, 80, 65, 82, 49,
       ];
   
       read(Arc::new(data), 0)
   }
   ```
   
   miri output
   
   ```
   cargo miri run --example unsafe
   
   StringArray
   [
   error: Undefined Behavior: type validation failed: encountered 0x001ecbc0, 
but expected a valid unicode scalar value (in `0..=0x10FFFF` but not in 
`0xD800..=0xDFFF`)
     --> 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/char/convert.rs:94:78
      |
   94 |     if cfg!(debug_assertions) { char::from_u32(i).unwrap() } else { 
unsafe { transmute(i) } }
      |                                                                         
     ^^^^^^^^^^^^ type validation failed: encountered 0x001ecbc0, but expected 
a valid unicode scalar value (in `0..=0x10FFFF` but not in `0xD800..=0xDFFF`)
      |
      = help: this indicates a bug in the program: it performed an invalid 
operation, and caused Undefined Behavior
      = help: see 
https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html 
for further information
              
      = note: inside `std::char::from_u32_unchecked` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/char/convert.rs:94:78
      = note: inside closure at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/str/iter.rs:43:22
      = note: inside `std::option::Option::<u32>::map::<char, 
[closure@<std::str::Chars as std::iter::Iterator>::next::{closure#0}]>` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/option.rs:489:29
      = note: inside `<std::str::Chars as std::iter::Iterator>::next` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/str/iter.rs:41:9
      = note: inside `<std::str::CharIndices as std::iter::Iterator>::next` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/str/iter.rs:140:15
      = note: inside `<str as std::fmt::Debug>::fmt` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/fmt/mod.rs:2079:23
      = note: inside `<&str as std::fmt::Debug>::fmt` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/fmt/mod.rs:2033:62
      = note: inside closure at 
/home/azureuser/projects/arrow-rs/arrow/src/array/array_string.rs:277:13
      = note: inside 
`arrow::array::array::print_long_array::<arrow::array::array_string::GenericStringArray<i32>,
 [closure@<arrow::array::array_string::GenericStringArray<i32> as 
std::fmt::Debug>::fmt::{closure#0}]>` at 
/home/azureuser/projects/arrow-rs/arrow/src/array/array.rs:561:13
      = note: inside `<arrow::array::array_string::GenericStringArray<i32> as 
std::fmt::Debug>::fmt` at 
/home/azureuser/projects/arrow-rs/arrow/src/array/array_string.rs:276:9
      = note: inside `<std::sync::Arc<dyn arrow::array::array::Array> as 
std::fmt::Debug>::fmt` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/alloc/src/sync.rs:2266:9
      = note: inside `<&std::sync::Arc<dyn arrow::array::array::Array> as 
std::fmt::Debug>::fmt` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/fmt/mod.rs:2033:62
      = note: inside `std::fmt::write` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/fmt/mod.rs:1112:17
      = note: inside `<std::io::StdoutLock as std::io::Write>::write_fmt` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/io/mod.rs:1640:15
      = note: inside `<&std::io::Stdout as std::io::Write>::write_fmt` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/io/stdio.rs:657:9
      = note: inside `<std::io::Stdout as std::io::Write>::write_fmt` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/io/stdio.rs:631:9
      = note: inside `std::io::stdio::print_to::<std::io::Stdout>` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/io/stdio.rs:934:21
      = note: inside `std::io::_print` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/io/stdio.rs:947:5
   note: inside `read` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/macros.rs:97:9
     --> parquet/examples/unsafe.rs:18:5
      |
   18 |     println!("{:?}", batch.column(0));
      |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
   note: inside `main` at parquet/examples/unsafe.rs:38:5
     --> parquet/examples/unsafe.rs:38:5
      |
   38 |     read(Arc::new(data), 0)
      |     ^^^^^^^^^^^^^^^^^^^^^^^
      = note: inside `<fn() as std::ops::FnOnce<()>>::call_once - shim(fn())` 
at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ops/function.rs:227:5
      = note: inside 
`std::sys_common::backtrace::__rust_begin_short_backtrace::<fn(), ()>` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/sys_common/backtrace.rs:125:18
      = note: inside closure at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/rt.rs:63:18
      = note: inside `std::ops::function::impls::<impl std::ops::FnOnce<()> for 
&dyn std::ops::Fn() -> i32 + std::marker::Sync + 
std::panic::RefUnwindSafe>::call_once` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ops/function.rs:259:13
      = note: inside `std::panicking::r#try::do_call::<&dyn std::ops::Fn() -> 
i32 + std::marker::Sync + std::panic::RefUnwindSafe, i32>` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:401:40
      = note: inside `std::panicking::r#try::<i32, &dyn std::ops::Fn() -> i32 + 
std::marker::Sync + std::panic::RefUnwindSafe>` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:365:19
      = note: inside `std::panic::catch_unwind::<&dyn std::ops::Fn() -> i32 + 
std::marker::Sync + std::panic::RefUnwindSafe, i32>` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panic.rs:434:14
      = note: inside closure at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/rt.rs:45:48
      = note: inside 
`std::panicking::r#try::do_call::<[closure@std::rt::lang_start_internal::{closure#2}],
 isize>` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:401:40
      = note: inside `std::panicking::r#try::<isize, 
[closure@std::rt::lang_start_internal::{closure#2}]>` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:365:19
      = note: inside 
`std::panic::catch_unwind::<[closure@std::rt::lang_start_internal::{closure#2}],
 isize>` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panic.rs:434:14
      = note: inside `std::rt::lang_start_internal` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/rt.rs:45:20
      = note: inside `std::rt::lang_start::<()>` at 
/home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/rt.rs:62:5
      = note: this error originates in the macro `println` (in Nightly builds, 
run with -Z macro-backtrace for more info)
   
   error: aborting due to previous error
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to