This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git
The following commit(s) were added to refs/heads/main by this push:
new 4401ceb0 feat: implement table requirements (#380)
4401ceb0 is described below
commit 4401ceb0a275ca8bb656c2f488b4fe43a03f6beb
Author: Feiyang Li <[email protected]>
AuthorDate: Tue Dec 2 13:06:30 2025 +0800
feat: implement table requirements (#380)
---
src/iceberg/table_requirements.cc | 24 +++-
src/iceberg/table_requirements.h | 29 ++++
src/iceberg/table_update.cc | 14 +-
src/iceberg/test/CMakeLists.txt | 1 +
src/iceberg/test/table_requirements_test.cc | 210 ++++++++++++++++++++++++++++
src/iceberg/test/table_update_test.cc | 7 +-
6 files changed, 266 insertions(+), 19 deletions(-)
diff --git a/src/iceberg/table_requirements.cc
b/src/iceberg/table_requirements.cc
index fcb744aa..c9d34c29 100644
--- a/src/iceberg/table_requirements.cc
+++ b/src/iceberg/table_requirements.cc
@@ -19,7 +19,10 @@
#include "iceberg/table_requirements.h"
+#include <memory>
+
#include "iceberg/table_metadata.h"
+#include "iceberg/table_requirement.h"
#include "iceberg/table_update.h"
namespace iceberg {
@@ -34,19 +37,34 @@ Result<std::vector<std::unique_ptr<TableRequirement>>>
TableUpdateContext::Build
Result<std::vector<std::unique_ptr<TableRequirement>>>
TableRequirements::ForCreateTable(
const std::vector<std::unique_ptr<TableUpdate>>& table_updates) {
- return NotImplemented("TableRequirements::ForCreateTable not implemented");
+ TableUpdateContext context(nullptr, false);
+ context.AddRequirement(std::make_unique<table::AssertDoesNotExist>());
+ for (const auto& update : table_updates) {
+ ICEBERG_RETURN_UNEXPECTED(update->GenerateRequirements(context));
+ }
+ return context.Build();
}
Result<std::vector<std::unique_ptr<TableRequirement>>>
TableRequirements::ForReplaceTable(
const TableMetadata& base,
const std::vector<std::unique_ptr<TableUpdate>>& table_updates) {
- return NotImplemented("TableRequirements::ForReplaceTable not implemented");
+ TableUpdateContext context(&base, true);
+ context.AddRequirement(std::make_unique<table::AssertUUID>(base.table_uuid));
+ for (const auto& update : table_updates) {
+ ICEBERG_RETURN_UNEXPECTED(update->GenerateRequirements(context));
+ }
+ return context.Build();
}
Result<std::vector<std::unique_ptr<TableRequirement>>>
TableRequirements::ForUpdateTable(
const TableMetadata& base,
const std::vector<std::unique_ptr<TableUpdate>>& table_updates) {
- return NotImplemented("TableRequirements::ForUpdateTable not implemented");
+ TableUpdateContext context(&base, false);
+ context.AddRequirement(std::make_unique<table::AssertUUID>(base.table_uuid));
+ for (const auto& update : table_updates) {
+ ICEBERG_RETURN_UNEXPECTED(update->GenerateRequirements(context));
+ }
+ return context.Build();
}
} // namespace iceberg
diff --git a/src/iceberg/table_requirements.h b/src/iceberg/table_requirements.h
index 327e7d53..7af2fb2d 100644
--- a/src/iceberg/table_requirements.h
+++ b/src/iceberg/table_requirements.h
@@ -68,11 +68,40 @@ class ICEBERG_EXPORT TableUpdateContext {
/// \brief Build and return the list of requirements
Result<std::vector<std::unique_ptr<TableRequirement>>> Build();
+ // Getters for deduplication flags
+ bool added_last_assigned_field_id() const { return
added_last_assigned_field_id_; }
+ bool added_current_schema_id() const { return added_current_schema_id_; }
+ bool added_last_assigned_partition_id() const {
+ return added_last_assigned_partition_id_;
+ }
+ bool added_default_spec_id() const { return added_default_spec_id_; }
+ bool added_default_sort_order_id() const { return
added_default_sort_order_id_; }
+
+ // Setters for deduplication flags
+ void set_added_last_assigned_field_id(bool value) {
+ added_last_assigned_field_id_ = value;
+ }
+ void set_added_current_schema_id(bool value) { added_current_schema_id_ =
value; }
+ void set_added_last_assigned_partition_id(bool value) {
+ added_last_assigned_partition_id_ = value;
+ }
+ void set_added_default_spec_id(bool value) { added_default_spec_id_ = value;
}
+ void set_added_default_sort_order_id(bool value) {
+ added_default_sort_order_id_ = value;
+ }
+
private:
const TableMetadata* base_;
const bool is_replace_;
std::vector<std::unique_ptr<TableRequirement>> requirements_;
+
+ // flags to avoid adding duplicate requirements
+ bool added_last_assigned_field_id_ = false;
+ bool added_current_schema_id_ = false;
+ bool added_last_assigned_partition_id_ = false;
+ bool added_default_spec_id_ = false;
+ bool added_default_sort_order_id_ = false;
};
/// \brief Factory class for generating table requirements
diff --git a/src/iceberg/table_update.cc b/src/iceberg/table_update.cc
index fcb7a58c..71fa30ee 100644
--- a/src/iceberg/table_update.cc
+++ b/src/iceberg/table_update.cc
@@ -33,19 +33,7 @@ void AssignUUID::ApplyTo(TableMetadataBuilder& builder)
const {
}
Status AssignUUID::GenerateRequirements(TableUpdateContext& context) const {
- // AssignUUID operation generates a requirement to assert the table's UUID
- // if a base metadata exists (i.e., this is an update operation)
-
- const TableMetadata* base = context.base();
-
- if (base != nullptr && !base->table_uuid.empty()) {
- // For table updates, assert that the current UUID matches what we expect
- context.AddRequirement(std::make_unique<AssertUUID>(base->table_uuid));
- }
-
- // Note: For table creation (base == nullptr), no UUID requirement is needed
- // as the table doesn't exist yet
-
+ // AssignUUID does not generate additional requirements.
return {};
}
diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt
index 54e69ac6..1a7e61ac 100644
--- a/src/iceberg/test/CMakeLists.txt
+++ b/src/iceberg/test/CMakeLists.txt
@@ -85,6 +85,7 @@ add_iceberg_test(table_test
table_test.cc
table_metadata_builder_test.cc
table_requirement_test.cc
+ table_requirements_test.cc
table_update_test.cc)
add_iceberg_test(expression_test
diff --git a/src/iceberg/test/table_requirements_test.cc
b/src/iceberg/test/table_requirements_test.cc
new file mode 100644
index 00000000..441e72d4
--- /dev/null
+++ b/src/iceberg/test/table_requirements_test.cc
@@ -0,0 +1,210 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/table_requirements.h"
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "iceberg/partition_spec.h"
+#include "iceberg/snapshot.h"
+#include "iceberg/sort_order.h"
+#include "iceberg/table_metadata.h"
+#include "iceberg/table_requirement.h"
+#include "iceberg/table_update.h"
+#include "iceberg/test/matchers.h"
+
+namespace iceberg {
+
+namespace {
+
+// Helper function to create base metadata for tests
+std::unique_ptr<TableMetadata> CreateBaseMetadata(
+ const std::string& uuid = "test-uuid-1234") {
+ auto metadata = std::make_unique<TableMetadata>();
+ metadata->format_version = 2;
+ metadata->table_uuid = uuid;
+ metadata->location = "s3://bucket/test";
+ metadata->last_sequence_number = 0;
+ metadata->last_updated_ms = TimePointMs{std::chrono::milliseconds(1000)};
+ metadata->last_column_id = 0;
+ metadata->default_spec_id = PartitionSpec::kInitialSpecId;
+ metadata->last_partition_id = 0;
+ metadata->current_snapshot_id = Snapshot::kInvalidSnapshotId;
+ metadata->default_sort_order_id = SortOrder::kInitialSortOrderId;
+ metadata->next_row_id = TableMetadata::kInitialRowId;
+ return metadata;
+}
+
+} // namespace
+
+TEST(TableRequirementsTest, EmptyUpdatesForCreateTable) {
+ std::vector<std::unique_ptr<TableUpdate>> updates;
+
+ auto result = TableRequirements::ForCreateTable(updates);
+ ASSERT_THAT(result, IsOk());
+
+ auto& requirements = result.value();
+ ASSERT_EQ(requirements.size(), 1);
+
+ // Should have only AssertDoesNotExist requirement
+ auto* assert_does_not_exist =
+ dynamic_cast<table::AssertDoesNotExist*>(requirements[0].get());
+ EXPECT_NE(assert_does_not_exist, nullptr);
+}
+
+TEST(TableRequirementsTest, EmptyUpdatesForUpdateTable) {
+ auto metadata = CreateBaseMetadata();
+ std::vector<std::unique_ptr<TableUpdate>> updates;
+
+ auto result = TableRequirements::ForUpdateTable(*metadata, updates);
+ ASSERT_THAT(result, IsOk());
+
+ auto& requirements = result.value();
+ ASSERT_EQ(requirements.size(), 1);
+
+ // Should have only AssertUUID requirement
+ auto* assert_uuid = dynamic_cast<table::AssertUUID*>(requirements[0].get());
+ ASSERT_NE(assert_uuid, nullptr);
+ EXPECT_EQ(assert_uuid->uuid(), metadata->table_uuid);
+}
+
+TEST(TableRequirementsTest, EmptyUpdatesForReplaceTable) {
+ auto metadata = CreateBaseMetadata();
+ std::vector<std::unique_ptr<TableUpdate>> updates;
+
+ auto result = TableRequirements::ForReplaceTable(*metadata, updates);
+ ASSERT_THAT(result, IsOk());
+
+ auto& requirements = result.value();
+ ASSERT_EQ(requirements.size(), 1);
+
+ // Should have only AssertUUID requirement
+ auto* assert_uuid = dynamic_cast<table::AssertUUID*>(requirements[0].get());
+ ASSERT_NE(assert_uuid, nullptr);
+ EXPECT_EQ(assert_uuid->uuid(), metadata->table_uuid);
+}
+
+TEST(TableRequirementsTest, TableAlreadyExists) {
+ std::vector<std::unique_ptr<TableUpdate>> updates;
+
+ auto result = TableRequirements::ForCreateTable(updates);
+ ASSERT_THAT(result, IsOk());
+
+ auto& requirements = result.value();
+ ASSERT_EQ(requirements.size(), 1);
+
+ // Validate against existing metadata (should fail)
+ auto metadata = CreateBaseMetadata();
+ auto status = requirements[0]->Validate(metadata.get());
+ EXPECT_THAT(status, IsError(ErrorKind::kCommitFailed));
+ EXPECT_THAT(status, HasErrorMessage("table already exists"));
+}
+
+TEST(TableRequirementsTest, TableDoesNotExist) {
+ std::vector<std::unique_ptr<TableUpdate>> updates;
+
+ auto result = TableRequirements::ForCreateTable(updates);
+ ASSERT_THAT(result, IsOk());
+
+ auto& requirements = result.value();
+ ASSERT_EQ(requirements.size(), 1);
+
+ // Validate against null metadata (should succeed)
+ auto status = requirements[0]->Validate(nullptr);
+ EXPECT_THAT(status, IsOk());
+}
+
+// Test for AssignUUID update
+TEST(TableRequirementsTest, AssignUUID) {
+ auto metadata = CreateBaseMetadata("original-uuid");
+ std::vector<std::unique_ptr<TableUpdate>> updates;
+
+ // Add multiple AssignUUID updates
+ updates.push_back(std::make_unique<table::AssignUUID>(metadata->table_uuid));
+ updates.push_back(std::make_unique<table::AssignUUID>("new-uuid-1"));
+ updates.push_back(std::make_unique<table::AssignUUID>("new-uuid-2"));
+
+ auto result = TableRequirements::ForUpdateTable(*metadata, updates);
+ ASSERT_THAT(result, IsOk());
+
+ auto& requirements = result.value();
+ // After deduplication: only 1 AssertUUID from ForUpdateTable
+ ASSERT_EQ(requirements.size(), 1);
+
+ // Should have AssertUUID requirement with original UUID
+ auto* assert_uuid = dynamic_cast<table::AssertUUID*>(requirements[0].get());
+ ASSERT_NE(assert_uuid, nullptr);
+ EXPECT_EQ(assert_uuid->uuid(), "original-uuid");
+
+ auto status = requirements[0]->Validate(metadata.get());
+ EXPECT_THAT(status, IsOk());
+}
+
+TEST(TableRequirementsTest, AssignUUIDFailure) {
+ auto metadata = CreateBaseMetadata("original-uuid");
+ std::vector<std::unique_ptr<TableUpdate>> updates;
+ updates.push_back(std::make_unique<table::AssignUUID>(metadata->table_uuid));
+
+ auto result = TableRequirements::ForUpdateTable(*metadata, updates);
+ ASSERT_THAT(result, IsOk());
+
+ auto& requirements = result.value();
+ // After deduplication: only 1 AssertUUID from ForUpdateTable
+ ASSERT_EQ(requirements.size(), 1);
+
+ // Create updated metadata with different UUID
+ auto updated = CreateBaseMetadata("different-uuid");
+
+ // Validate against updated metadata (should fail)
+ auto status = requirements[0]->Validate(updated.get());
+ EXPECT_THAT(status, IsError(ErrorKind::kCommitFailed));
+ EXPECT_THAT(status, HasErrorMessage("UUID does not match"));
+}
+
+TEST(TableRequirementsTest, AssignUUIDForReplaceTable) {
+ auto metadata = CreateBaseMetadata("original-uuid");
+ std::vector<std::unique_ptr<TableUpdate>> updates;
+
+ // Add multiple AssignUUID updates
+ updates.push_back(std::make_unique<table::AssignUUID>(metadata->table_uuid));
+ updates.push_back(std::make_unique<table::AssignUUID>("new-uuid-1"));
+ updates.push_back(std::make_unique<table::AssignUUID>("new-uuid-2"));
+
+ auto result = TableRequirements::ForReplaceTable(*metadata, updates);
+ ASSERT_THAT(result, IsOk());
+
+ auto& requirements = result.value();
+ // After deduplication: only 1 AssertUUID from ForReplaceTable
+ ASSERT_EQ(requirements.size(), 1);
+
+ // Should have AssertUUID requirement
+ auto* assert_uuid = dynamic_cast<table::AssertUUID*>(requirements[0].get());
+ ASSERT_NE(assert_uuid, nullptr);
+ EXPECT_EQ(assert_uuid->uuid(), "original-uuid");
+
+ // Validate against base metadata (should succeed)
+ auto status = requirements[0]->Validate(metadata.get());
+ EXPECT_THAT(status, IsOk());
+}
+
+} // namespace iceberg
diff --git a/src/iceberg/test/table_update_test.cc
b/src/iceberg/test/table_update_test.cc
index 9607db59..298141a9 100644
--- a/src/iceberg/test/table_update_test.cc
+++ b/src/iceberg/test/table_update_test.cc
@@ -71,14 +71,15 @@ std::unique_ptr<TableMetadata> CreateBaseMetadata() {
TEST(TableUpdateTest, AssignUUIDGenerateRequirements) {
table::AssignUUID update("new-uuid");
- // New table - no requirements
+ // New table - no requirements (AssignUUID doesn't generate requirements)
auto new_table_reqs = GenerateRequirements(update, nullptr);
EXPECT_TRUE(new_table_reqs.empty());
- // Existing table - should generate AssertUUID requirement
+ // Existing table - AssignUUID doesn't generate requirements anymore
+ // The UUID assertion is added by ForUpdateTable/ForReplaceTable methods
auto base = CreateBaseMetadata();
auto existing_table_reqs = GenerateRequirements(update, base.get());
- EXPECT_EQ(existing_table_reqs.size(), 1);
+ EXPECT_TRUE(existing_table_reqs.empty());
// Existing table with empty UUID - no requirements
base->table_uuid = "";