This is an automated email from the ASF dual-hosted git repository.
wgtmac pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git
The following commit(s) were added to refs/heads/main by this push:
new 83aa9884 fix: preserve snapshot ID width in UseRef (#661)
83aa9884 is described below
commit 83aa9884555fbdb37d6e171dea54606bac38531a
Author: Minh Vu <[email protected]>
AuthorDate: Wed May 20 07:40:21 2026 +0200
fix: preserve snapshot ID width in UseRef (#661)
## Summary
- keep `UseRef()` snapshot IDs as `int64_t` while resolving refs
- add a regression test for a ref pointing at `INT32_MAX + 42`
## Root Cause
`TableScanBuilder::UseRef()` copied `SnapshotRef::snapshot_id` into an
`int32_t`, even though snapshot IDs are modeled as `int64_t` in refs,
metadata lookup, and scan context.
## Testing
- `uvx cmake --build build --target scan_test -j 8`
- `./build/src/iceberg/test/scan_test
--gtest_filter=*UseRefPreservesInt64SnapshotIds*`
- `./build/src/iceberg/test/scan_test`
Fixes #660
---
src/iceberg/table_scan.cc | 2 +-
src/iceberg/test/table_scan_test.cc | 25 +++++++++++++++++++++++++
2 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/src/iceberg/table_scan.cc b/src/iceberg/table_scan.cc
index f61bd3a0..71075d90 100644
--- a/src/iceberg/table_scan.cc
+++ b/src/iceberg/table_scan.cc
@@ -430,7 +430,7 @@ TableScanBuilder<ScanType>&
TableScanBuilder<ScanType>::UseRef(const std::string
auto iter = metadata_->refs.find(ref);
ICEBERG_BUILDER_CHECK(iter != metadata_->refs.end(), "Cannot find ref {}",
ref);
ICEBERG_BUILDER_CHECK(iter->second != nullptr, "Ref {} is null", ref);
- int32_t snapshot_id = iter->second->snapshot_id;
+ const int64_t snapshot_id = iter->second->snapshot_id;
ICEBERG_BUILDER_ASSIGN_OR_RETURN(std::ignore,
metadata_->SnapshotById(snapshot_id));
context_.snapshot_id = snapshot_id;
diff --git a/src/iceberg/test/table_scan_test.cc
b/src/iceberg/test/table_scan_test.cc
index e4a3d21f..11905a87 100644
--- a/src/iceberg/test/table_scan_test.cc
+++ b/src/iceberg/test/table_scan_test.cc
@@ -17,6 +17,7 @@
* under the License.
*/
+#include <limits>
#include <memory>
#include <optional>
#include <string>
@@ -205,6 +206,30 @@ TEST_P(TableScanTest, TableScanBuilderOptions) {
EXPECT_EQ(snapshot->snapshot_id, 1000L);
}
+TEST_P(TableScanTest, UseRefPreservesInt64SnapshotIds) {
+ constexpr int64_t kLargeSnapshotId =
+ static_cast<int64_t>(std::numeric_limits<int32_t>::max()) + 42;
+ table_metadata_->snapshots.push_back(std::make_shared<Snapshot>(
+ Snapshot{.snapshot_id = kLargeSnapshotId,
+ .parent_snapshot_id = table_metadata_->current_snapshot_id,
+ .sequence_number = 2L,
+ .timestamp_ms = TimePointMsFromUnixMs(1609459201000L),
+ .manifest_list =
"/tmp/metadata/snap-large-2-manifest-list.avro",
+ .schema_id = schema_->schema_id()}));
+ table_metadata_->refs["branch-with-large-snapshot-id"] =
std::make_shared<SnapshotRef>(
+ SnapshotRef{.snapshot_id = kLargeSnapshotId, .retention =
SnapshotRef::Branch{}});
+
+ ICEBERG_UNWRAP_OR_FAIL(auto builder,
+ DataTableScanBuilder::Make(table_metadata_,
file_io_));
+ builder->UseRef("branch-with-large-snapshot-id");
+ ICEBERG_UNWRAP_OR_FAIL(auto scan, builder->Build());
+
+ ASSERT_TRUE(scan->context().snapshot_id.has_value());
+ EXPECT_EQ(scan->context().snapshot_id.value(), kLargeSnapshotId);
+ ICEBERG_UNWRAP_OR_FAIL(auto snapshot, scan->snapshot());
+ EXPECT_EQ(snapshot->snapshot_id, kLargeSnapshotId);
+}
+
TEST_P(TableScanTest, TableScanBuilderValidationErrors) {
// Test negative min rows
ICEBERG_UNWRAP_OR_FAIL(auto builder,