This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 9b134ec286 GH-49477: [C++][Parquet] Fix multiplication overflow in
PLAIN BYTE_ARRAY decoder (#49478)
9b134ec286 is described below
commit 9b134ec2869b7602496c7f34a1c18424c5027026
Author: Antoine Pitrou <[email protected]>
AuthorDate: Wed Mar 11 11:46:36 2026 +0100
GH-49477: [C++][Parquet] Fix multiplication overflow in PLAIN BYTE_ARRAY
decoder (#49478)
### Rationale for this change
Issue found by OSS-Fuzz: https://issues.oss-fuzz.com/issues/489948953
### Are these changes tested?
By added regression file.
### Are there any user-facing changes?
No.
**This PR contains a "Critical Fix".** Signed integer overflow is undefined
behavior, so this could result in any kind of misbehavior.
* GitHub Issue: #49477
Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
ci/scripts/cpp_test.sh | 2 +-
cpp/src/parquet/decoder.cc | 3 ++-
testing | 2 +-
3 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/ci/scripts/cpp_test.sh b/ci/scripts/cpp_test.sh
index 921983fdb0..b88b358c06 100755
--- a/ci/scripts/cpp_test.sh
+++ b/ci/scripts/cpp_test.sh
@@ -222,7 +222,7 @@ if [ "${ARROW_FUZZING}" == "ON" ]; then
"${binary_output_dir}/arrow-ipc-tensor-stream-fuzz"
arrow-ipc-tensor-stream/*-testcase-*
if [ "${ARROW_PARQUET}" == "ON" ]; then
"${binary_output_dir}/parquet-arrow-fuzz" parquet/fuzzing/*-testcase-*
- # TODO replay encoding regression files when we have some
+ "${binary_output_dir}/parquet-encoding-fuzz"
parquet/encoding-fuzzing/*-testcase-*
fi
"${binary_output_dir}/arrow-csv-fuzz" csv/fuzzing/*-testcase-*
popd
diff --git a/cpp/src/parquet/decoder.cc b/cpp/src/parquet/decoder.cc
index 4500a72f01..df7a9becad 100644
--- a/cpp/src/parquet/decoder.cc
+++ b/cpp/src/parquet/decoder.cc
@@ -759,7 +759,8 @@ class PlainByteArrayDecoder : public
PlainDecoder<ByteArrayType> {
// We're going to decode `num_values - null_count` PLAIN values,
// and each value has a 4-byte length header that doesn't count for the
// Arrow binary data length.
- int64_t estimated_data_length = len_ - 4 * (num_values - null_count);
+ int64_t estimated_data_length =
+ len_ - 4 * static_cast<int64_t>(num_values - null_count);
if (ARROW_PREDICT_FALSE(estimated_data_length < 0)) {
return Status::Invalid("Invalid or truncated PLAIN-encoded BYTE_ARRAY
data");
}
diff --git a/testing b/testing
index fff99f68b7..afcdeba440 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit fff99f68b7085b7ac1c210f79f1087f6a085a06b
+Subproject commit afcdeba44006e05b2b09c0971373b36bb263e0aa