This is an automated email from the ASF dual-hosted git repository.
wgtmac pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 4e2546126d GH-50051: [C++][Parquet] Avoid size overflow in
WKBBuffer::ReadCoords (#50036)
4e2546126d is described below
commit 4e2546126d44779b00835ce817a4ffc06c17d69b
Author: jmestwa-coder <[email protected]>
AuthorDate: Wed Jun 10 12:21:29 2026 +0530
GH-50051: [C++][Parquet] Avoid size overflow in WKBBuffer::ReadCoords
(#50036)
### Rationale for this change
`WKBBuffer::ReadCoords()` computes the coordinate sequence byte size using
`n_coords * sizeof(Coord)` before validating the remaining buffer size.
On 32-bit targets such as wasm32, this multiplication can overflow when
`n_coords` is derived from malformed WKB input, causing the bounds check to
validate a truncated value before the subsequent coordinate read loop processes
the coordinate sequence.
This change widens the computation before validation so the bounds check
remains correct across supported architectures.
Related issue: #50051
### What changes are included in this PR?
* Widen the coordinate sequence byte-size computation in
`WKBBuffer::ReadCoords()`
* Prevent overflow-before-bounds-check behavior on 32-bit targets
* Audit and harden closely related WKB parsing size computations with the
same externally controlled arithmetic pattern
### Are these changes tested?
Yes.
Added coverage for malformed WKB inputs that exercise overflow-sensitive
coordinate size calculations and verified the parser rejects invalid input
safely.
### Are there any user-facing changes?
No.
**This PR contains a "Critical Fix".**
Malformed WKB input could cause coordinate sequence size calculations to
overflow on 32-bit targets before bounds validation occurs, potentially
resulting in out-of-bounds reads during parsing.
This change ensures bounds validation uses widened arithmetic before
parsing coordinate sequences.
* GitHub Issue: #50051
Authored-by: jmestwa-coder <[email protected]>
Signed-off-by: Gang Wu <[email protected]>
---
cpp/src/parquet/geospatial/util_internal.cc | 8 ++++++--
cpp/src/parquet/geospatial/util_internal_test.cc | 15 +++++++++++++++
2 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/cpp/src/parquet/geospatial/util_internal.cc
b/cpp/src/parquet/geospatial/util_internal.cc
index 59551ae870..54e82f6c04 100644
--- a/cpp/src/parquet/geospatial/util_internal.cc
+++ b/cpp/src/parquet/geospatial/util_internal.cc
@@ -21,6 +21,7 @@
#include <sstream>
#include "arrow/util/endian.h"
+#include "arrow/util/int_util_overflow.h"
#include "arrow/util/macros.h"
#include "arrow/util/ubsan.h"
#include "parquet/exception.h"
@@ -64,8 +65,11 @@ class WKBBuffer {
template <typename Coord, typename Visit>
void ReadCoords(uint32_t n_coords, bool swap, Visit&& visit) {
- size_t total_bytes = n_coords * sizeof(Coord);
- if (size_ < total_bytes) {
+ uint64_t total_bytes = 0;
+ if
(::arrow::internal::MultiplyWithOverflow(static_cast<uint64_t>(n_coords),
+
static_cast<uint64_t>(sizeof(Coord)),
+ &total_bytes) ||
+ size_ < total_bytes) {
throw ParquetException("Can't read coordinate sequence of ", total_bytes,
" bytes from WKBBuffer with ", size_, "
remaining");
}
diff --git a/cpp/src/parquet/geospatial/util_internal_test.cc
b/cpp/src/parquet/geospatial/util_internal_test.cc
index 527bcc5050..4104fe6afb 100644
--- a/cpp/src/parquet/geospatial/util_internal_test.cc
+++ b/cpp/src/parquet/geospatial/util_internal_test.cc
@@ -166,6 +166,21 @@ TEST_P(WKBTestFixture,
TestWKBBounderErrorForInputWithTooManyBytes) {
ASSERT_THROW(bounder.MergeGeometry(wkb_with_extra_byte), ParquetException);
}
+TEST(TestGeometryUtil, TestWKBBounderErrorForCoordinateSequenceSizeOverflow) {
+ WKBGeometryBounder bounder;
+
+ // LINESTRING with 0x10000001 XY coordinates but only one coordinate in the
buffer.
+ // On 32-bit targets, 0x10000001 * sizeof(XY) would overflow to 16 bytes.
+ std::vector<uint8_t> wkb = {0x01, // little endian
+ 0x02, 0x00, 0x00, 0x00, // LINESTRING
+ 0x01, 0x00, 0x00, 0x10, // 0x10000001
coordinates
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// x = 0
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// y = 0
+ 0x00};
+
+ ASSERT_THROW(bounder.MergeGeometry(wkb), ParquetException);
+}
+
INSTANTIATE_TEST_SUITE_P(
TestGeometryUtil, WKBTestFixture,
::testing::Values(