alamb commented on pull request #8645:
URL: https://github.com/apache/arrow/pull/8645#issuecomment-726736494
In terms of evidence that there is a problem on master, I ran the arrow test
suite under `valgind` @ 30516049522c1a527ffb375e7790102f58edb4f9 on master and
it does flag an invalid read (that could cause a segfault depending on the
circumstances). Here is the error it flagged:
```
test
compute::kernels::cast::tests::test_cast_dict_to_dict_bad_index_value_utf8 ...
==5483== Invalid read of size 1
==5483== at 0x55D7B2: arrow::util::bit_util::set_bits_raw
(bit_util.rs:128)
==5483== by 0x605D49: <arrow::array::builder::BufferBuilder<T> as
arrow::array::builder::BufferBuilderTrait<T>>::append_n (builder.rs:339)
==5483== by 0x62C854:
arrow::array::builder::PrimitiveBuilder<T>::append_slice (builder.rs:591)
==5483== by 0xA992F6: arrow::array::builder::StringBuilder::append_value
(builder.rs:1781)
==5483== by 0x6A309B:
arrow::array::builder::StringDictionaryBuilder<K>::append (builder.rs:2435)
==5483== by 0x302DA3:
arrow::compute::kernels::cast::tests::test_cast_dict_to_dict_bad_index_value_utf8
(cast.rs:2612)
==5483== by 0x31E499:
arrow::compute::kernels::cast::tests::test_cast_dict_to_dict_bad_index_value_utf8::{{closure}}
(cast.rs:2598)
==5483== by 0xA751ED: core::ops::function::FnOnce::call_once
(function.rs:232)
==5483== by 0xB97365: call_once<(),FnOnce<()>> (boxed.rs:1008)
==5483== by 0xB97365: call_once<(),alloc::boxed::Box<FnOnce<()>>>
(panic.rs:318)
==5483== by 0xB97365:
do_call<std::panic::AssertUnwindSafe<alloc::boxed::Box<FnOnce<()>>>,()>
(panicking.rs:331)
==5483== by 0xB97365:
try<(),std::panic::AssertUnwindSafe<alloc::boxed::Box<FnOnce<()>>>>
(panicking.rs:274)
==5483== by 0xB97365:
catch_unwind<std::panic::AssertUnwindSafe<alloc::boxed::Box<FnOnce<()>>>,()>
(panic.rs:394)
==5483== by 0xB97365: run_test_in_process (lib.rs:541)
==5483== by 0xB97365: test::run_test::run_test_inner::{{closure}}
(lib.rs:450)
==5483== by 0xB969F8: test::run_test::run_test_inner (lib.rs:475)
==5483== by 0xB94BE9: test::run_test (lib.rs:505)
==5483== by 0xB829D8: run_tests<closure-2> (lib.rs:284)
==5483== by 0xB829D8: test::console::run_tests_console (console.rs:280)
==5483== Address 0x65c1900 is 0 bytes after a block of size 128 alloc'd
==5483== at 0x4C34443: memalign (vg_replace_malloc.c:906)
==5483== by 0x4C34546: posix_memalign (vg_replace_malloc.c:1070)
==5483== by 0xEB42F3: aligned_malloc (alloc.rs:95)
==5483== by 0xEB42F3: alloc (alloc.rs:22)
==5483== by 0xEB42F3: realloc_fallback (alloc.rs:39)
==5483== by 0xEB42F3: realloc (alloc.rs:50)
==5483== by 0xEB42F3: __rdl_realloc (alloc.rs:320)
==5483== by 0x55348C: alloc::alloc::realloc (alloc.rs:124)
==5483== by 0x71FB58: arrow::memory::reallocate (memory.rs:187)
==5483== by 0x9D5606: arrow::buffer::MutableBuffer::reserve
(buffer.rs:665)
==5483== by 0x5FFC65: <arrow::array::builder::BufferBuilder<T> as
arrow::array::builder::BufferBuilderTrait<T>>::reserve (builder.rs:307)
==5483== by 0x605BEF: <arrow::array::builder::BufferBuilder<T> as
arrow::array::builder::BufferBuilderTrait<T>>::append_n (builder.rs:335)
==5483== by 0x62C854:
arrow::array::builder::PrimitiveBuilder<T>::append_slice (builder.rs:591)
==5483== by 0xA992F6: arrow::array::builder::StringBuilder::append_value
(builder.rs:1781)
==5483== by 0x6A309B:
arrow::array::builder::StringDictionaryBuilder<K>::append (builder.rs:2435)
==5483== by 0x302DA3:
arrow::compute::kernels::cast::tests::test_cast_dict_to_dict_bad_index_value_utf8
(cast.rs:2612)
==5483==
==5483== Invalid write of size 1
==5483== at 0x55D7B4: arrow::util::bit_util::set_bits_raw
(bit_util.rs:128)
==5483== by 0x605D49: <arrow::array::builder::BufferBuilder<T> as
arrow::array::builder::BufferBuilderTrait<T>>::append_n (builder.rs:339)
==5483== by 0x62C854:
arrow::array::builder::PrimitiveBuilder<T>::append_slice (builder.rs:591)
==5483== by 0xA992F6: arrow::array::builder::StringBuilder::append_value
(builder.rs:1781)
==5483== by 0x6A309B:
arrow::array::builder::StringDictionaryBuilder<K>::append (builder.rs:2435)
==5483== by 0x302DA3:
arrow::compute::kernels::cast::tests::test_cast_dict_to_dict_bad_index_value_utf8
(cast.rs:2612)
==5483== by 0x31E499:
arrow::compute::kernels::cast::tests::test_cast_dict_to_dict_bad_index_value_utf8::{{closure}}
(cast.rs:2598)
==5483== by 0xA751ED: core::ops::function::FnOnce::call_once
(function.rs:232)
==5483== by 0xB97365: call_once<(),FnOnce<()>> (boxed.rs:1008)
==5483== by 0xB97365: call_once<(),alloc::boxed::Box<FnOnce<()>>>
(panic.rs:318)
==5483== by 0xB97365:
do_call<std::panic::AssertUnwindSafe<alloc::boxed::Box<FnOnce<()>>>,()>
(panicking.rs:331)
==5483== by 0xB97365:
try<(),std::panic::AssertUnwindSafe<alloc::boxed::Box<FnOnce<()>>>>
(panicking.rs:274)
==5483== by 0xB97365:
catch_unwind<std::panic::AssertUnwindSafe<alloc::boxed::Box<FnOnce<()>>>,()>
(panic.rs:394)
==5483== by 0xB97365: run_test_in_process (lib.rs:541)
==5483== by 0xB97365: test::run_test::run_test_inner::{{closure}}
(lib.rs:450)
==5483== by 0xB969F8: test::run_test::run_test_inner (lib.rs:475)
==5483== by 0xB94BE9: test::run_test (lib.rs:505)
==5483== by 0xB829D8: run_tests<closure-2> (lib.rs:284)
==5483== by 0xB829D8: test::console::run_tests_console (console.rs:280)
==5483== Address 0x65c1900 is 0 bytes after a block of size 128 alloc'd
==5483== at 0x4C34443: memalign (vg_replace_malloc.c:906)
==5483== by 0x4C34546: posix_memalign (vg_replace_malloc.c:1070)
==5483== by 0xEB42F3: aligned_malloc (alloc.rs:95)
==5483== by 0xEB42F3: alloc (alloc.rs:22)
==5483== by 0xEB42F3: realloc_fallback (alloc.rs:39)
==5483== by 0xEB42F3: realloc (alloc.rs:50)
==5483== by 0xEB42F3: __rdl_realloc (alloc.rs:320)
==5483== by 0x55348C: alloc::alloc::realloc (alloc.rs:124)
==5483== by 0x71FB58: arrow::memory::reallocate (memory.rs:187)
==5483== by 0x9D5606: arrow::buffer::MutableBuffer::reserve
(buffer.rs:665)
==5483== by 0x5FFC65: <arrow::array::builder::BufferBuilder<T> as
arrow::array::builder::BufferBuilderTrait<T>>::reserve (builder.rs:307)
==5483== by 0x605BEF: <arrow::array::builder::BufferBuilder<T> as
arrow::array::builder::BufferBuilderTrait<T>>::append_n (builder.rs:335)
==5483== by 0x62C854:
arrow::array::builder::PrimitiveBuilder<T>::append_slice (builder.rs:591)
==5483== by 0xA992F6: arrow::array::builder::StringBuilder::append_value
(builder.rs:1781)
==5483== by 0x6A309B:
arrow::array::builder::StringDictionaryBuilder<K>::append (builder.rs:2435)
==5483== by 0x302DA3:
arrow::compute::kernels::cast::tests::test_cast_dict_to_dict_bad_index_value_utf8
(cast.rs:2612)
==5483==
```
My interpretation of this report is that
`arrow::buffer::MutableBuffer::reserve` is reading off the end of the array. I
haven't studied the code closely.
The actual command I used is below in case anyone is interested
PARQUET_TEST_DATA=`pwd`/../../cpp/submodules/parquet-testing/data
ARROW_TEST_DATA=`pwd`/../../testing/data valgrind
/home/andrew/Software/arrow/rust/target/debug/deps/arrow-b9bea680be7dc6e4
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]