This is an automated email from the ASF dual-hosted git repository.

wgtmac pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git


The following commit(s) were added to refs/heads/main by this push:
     new 83aa9884 fix: preserve snapshot ID width in UseRef (#661)
83aa9884 is described below

commit 83aa9884555fbdb37d6e171dea54606bac38531a
Author: Minh Vu <[email protected]>
AuthorDate: Wed May 20 07:40:21 2026 +0200

    fix: preserve snapshot ID width in UseRef (#661)
    
    ## Summary
    
    - keep `UseRef()` snapshot IDs as `int64_t` while resolving refs
    - add a regression test for a ref pointing at `INT32_MAX + 42`
    
    ## Root Cause
    
    `TableScanBuilder::UseRef()` copied `SnapshotRef::snapshot_id` into an
    `int32_t`, even though snapshot IDs are modeled as `int64_t` in refs,
    metadata lookup, and scan context.
    
    ## Testing
    
    - `uvx cmake --build build --target scan_test -j 8`
    - `./build/src/iceberg/test/scan_test
    --gtest_filter=*UseRefPreservesInt64SnapshotIds*`
    - `./build/src/iceberg/test/scan_test`
    
    Fixes #660
---
 src/iceberg/table_scan.cc           |  2 +-
 src/iceberg/test/table_scan_test.cc | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/iceberg/table_scan.cc b/src/iceberg/table_scan.cc
index f61bd3a0..71075d90 100644
--- a/src/iceberg/table_scan.cc
+++ b/src/iceberg/table_scan.cc
@@ -430,7 +430,7 @@ TableScanBuilder<ScanType>& 
TableScanBuilder<ScanType>::UseRef(const std::string
   auto iter = metadata_->refs.find(ref);
   ICEBERG_BUILDER_CHECK(iter != metadata_->refs.end(), "Cannot find ref {}", 
ref);
   ICEBERG_BUILDER_CHECK(iter->second != nullptr, "Ref {} is null", ref);
-  int32_t snapshot_id = iter->second->snapshot_id;
+  const int64_t snapshot_id = iter->second->snapshot_id;
   ICEBERG_BUILDER_ASSIGN_OR_RETURN(std::ignore, 
metadata_->SnapshotById(snapshot_id));
   context_.snapshot_id = snapshot_id;
 
diff --git a/src/iceberg/test/table_scan_test.cc 
b/src/iceberg/test/table_scan_test.cc
index e4a3d21f..11905a87 100644
--- a/src/iceberg/test/table_scan_test.cc
+++ b/src/iceberg/test/table_scan_test.cc
@@ -17,6 +17,7 @@
  * under the License.
  */
 
+#include <limits>
 #include <memory>
 #include <optional>
 #include <string>
@@ -205,6 +206,30 @@ TEST_P(TableScanTest, TableScanBuilderOptions) {
   EXPECT_EQ(snapshot->snapshot_id, 1000L);
 }
 
+TEST_P(TableScanTest, UseRefPreservesInt64SnapshotIds) {
+  constexpr int64_t kLargeSnapshotId =
+      static_cast<int64_t>(std::numeric_limits<int32_t>::max()) + 42;
+  table_metadata_->snapshots.push_back(std::make_shared<Snapshot>(
+      Snapshot{.snapshot_id = kLargeSnapshotId,
+               .parent_snapshot_id = table_metadata_->current_snapshot_id,
+               .sequence_number = 2L,
+               .timestamp_ms = TimePointMsFromUnixMs(1609459201000L),
+               .manifest_list = 
"/tmp/metadata/snap-large-2-manifest-list.avro",
+               .schema_id = schema_->schema_id()}));
+  table_metadata_->refs["branch-with-large-snapshot-id"] = 
std::make_shared<SnapshotRef>(
+      SnapshotRef{.snapshot_id = kLargeSnapshotId, .retention = 
SnapshotRef::Branch{}});
+
+  ICEBERG_UNWRAP_OR_FAIL(auto builder,
+                         DataTableScanBuilder::Make(table_metadata_, 
file_io_));
+  builder->UseRef("branch-with-large-snapshot-id");
+  ICEBERG_UNWRAP_OR_FAIL(auto scan, builder->Build());
+
+  ASSERT_TRUE(scan->context().snapshot_id.has_value());
+  EXPECT_EQ(scan->context().snapshot_id.value(), kLargeSnapshotId);
+  ICEBERG_UNWRAP_OR_FAIL(auto snapshot, scan->snapshot());
+  EXPECT_EQ(snapshot->snapshot_id, kLargeSnapshotId);
+}
+
 TEST_P(TableScanTest, TableScanBuilderValidationErrors) {
   // Test negative min rows
   ICEBERG_UNWRAP_OR_FAIL(auto builder,

Reply via email to