This is an automated email from the ASF dual-hosted git repository.

wgtmac pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 4e2546126d GH-50051: [C++][Parquet] Avoid size overflow in 
WKBBuffer::ReadCoords (#50036)
4e2546126d is described below

commit 4e2546126d44779b00835ce817a4ffc06c17d69b
Author: jmestwa-coder <[email protected]>
AuthorDate: Wed Jun 10 12:21:29 2026 +0530

    GH-50051: [C++][Parquet] Avoid size overflow in WKBBuffer::ReadCoords 
(#50036)
    
    ### Rationale for this change
    
    `WKBBuffer::ReadCoords()` computes the coordinate sequence byte size using 
`n_coords * sizeof(Coord)` before validating the remaining buffer size.
    
    On 32-bit targets such as wasm32, this multiplication can overflow when 
`n_coords` is derived from malformed WKB input, causing the bounds check to 
validate a truncated value before the subsequent coordinate read loop processes 
the coordinate sequence.
    
    This change widens the computation before validation so the bounds check 
remains correct across supported architectures.
    
    Related issue: #50051
    
    ### What changes are included in this PR?
    
    * Widen the coordinate sequence byte-size computation in 
`WKBBuffer::ReadCoords()`
    * Prevent overflow-before-bounds-check behavior on 32-bit targets
    * Audit and harden closely related WKB parsing size computations with the 
same externally controlled arithmetic pattern
    
    ### Are these changes tested?
    
    Yes.
    
    Added coverage for malformed WKB inputs that exercise overflow-sensitive 
coordinate size calculations and verified the parser rejects invalid input 
safely.
    
    ### Are there any user-facing changes?
    
    No.
    
    **This PR contains a "Critical Fix".**
    
    Malformed WKB input could cause coordinate sequence size calculations to 
overflow on 32-bit targets before bounds validation occurs, potentially 
resulting in out-of-bounds reads during parsing.
    
    This change ensures bounds validation uses widened arithmetic before 
parsing coordinate sequences.
    * GitHub Issue: #50051
    
    Authored-by: jmestwa-coder <[email protected]>
    Signed-off-by: Gang Wu <[email protected]>
---
 cpp/src/parquet/geospatial/util_internal.cc      |  8 ++++++--
 cpp/src/parquet/geospatial/util_internal_test.cc | 15 +++++++++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/cpp/src/parquet/geospatial/util_internal.cc 
b/cpp/src/parquet/geospatial/util_internal.cc
index 59551ae870..54e82f6c04 100644
--- a/cpp/src/parquet/geospatial/util_internal.cc
+++ b/cpp/src/parquet/geospatial/util_internal.cc
@@ -21,6 +21,7 @@
 #include <sstream>
 
 #include "arrow/util/endian.h"
+#include "arrow/util/int_util_overflow.h"
 #include "arrow/util/macros.h"
 #include "arrow/util/ubsan.h"
 #include "parquet/exception.h"
@@ -64,8 +65,11 @@ class WKBBuffer {
 
   template <typename Coord, typename Visit>
   void ReadCoords(uint32_t n_coords, bool swap, Visit&& visit) {
-    size_t total_bytes = n_coords * sizeof(Coord);
-    if (size_ < total_bytes) {
+    uint64_t total_bytes = 0;
+    if 
(::arrow::internal::MultiplyWithOverflow(static_cast<uint64_t>(n_coords),
+                                                
static_cast<uint64_t>(sizeof(Coord)),
+                                                &total_bytes) ||
+        size_ < total_bytes) {
       throw ParquetException("Can't read coordinate sequence of ", total_bytes,
                              " bytes from WKBBuffer with ", size_, " 
remaining");
     }
diff --git a/cpp/src/parquet/geospatial/util_internal_test.cc 
b/cpp/src/parquet/geospatial/util_internal_test.cc
index 527bcc5050..4104fe6afb 100644
--- a/cpp/src/parquet/geospatial/util_internal_test.cc
+++ b/cpp/src/parquet/geospatial/util_internal_test.cc
@@ -166,6 +166,21 @@ TEST_P(WKBTestFixture, 
TestWKBBounderErrorForInputWithTooManyBytes) {
   ASSERT_THROW(bounder.MergeGeometry(wkb_with_extra_byte), ParquetException);
 }
 
+TEST(TestGeometryUtil, TestWKBBounderErrorForCoordinateSequenceSizeOverflow) {
+  WKBGeometryBounder bounder;
+
+  // LINESTRING with 0x10000001 XY coordinates but only one coordinate in the 
buffer.
+  // On 32-bit targets, 0x10000001 * sizeof(XY) would overflow to 16 bytes.
+  std::vector<uint8_t> wkb = {0x01,                    // little endian
+                              0x02, 0x00, 0x00, 0x00,  // LINESTRING
+                              0x01, 0x00, 0x00, 0x10,  // 0x10000001 
coordinates
+                              0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,        
// x = 0
+                              0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,  
// y = 0
+                              0x00};
+
+  ASSERT_THROW(bounder.MergeGeometry(wkb), ParquetException);
+}
+
 INSTANTIATE_TEST_SUITE_P(
     TestGeometryUtil, WKBTestFixture,
     ::testing::Values(

Reply via email to