This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git
The following commit(s) were added to refs/heads/main by this push:
new fc2cde01 feat: implement InMemoryCatalog's UpdateTable (#386)
fc2cde01 is described below
commit fc2cde0138befcdf3fdbbfee14ba363311cbd7d2
Author: wzhuo <[email protected]>
AuthorDate: Tue Dec 16 22:46:01 2025 +0800
feat: implement InMemoryCatalog's UpdateTable (#386)
---
src/iceberg/catalog/memory/in_memory_catalog.cc | 68 ++++++++--
src/iceberg/json_internal.cc | 4 +-
src/iceberg/table.cc | 11 +-
src/iceberg/table.h | 11 +-
src/iceberg/table_metadata.cc | 167 +++++++++++++++++++-----
src/iceberg/table_metadata.h | 95 +++++++++++++-
src/iceberg/table_properties.cc | 10 +-
src/iceberg/table_properties.h | 12 +-
src/iceberg/test/CMakeLists.txt | 1 +
src/iceberg/test/config_test.cc | 17 ++-
src/iceberg/test/in_memory_catalog_test.cc | 67 +++++++++-
src/iceberg/test/location_util_test.cc | 64 +++++++++
src/iceberg/test/meson.build | 1 +
src/iceberg/test/metadata_io_test.cc | 82 +++++++++++-
src/iceberg/test/table_metadata_builder_test.cc | 67 ++++++++--
src/iceberg/util/config.h | 2 +-
src/iceberg/util/location_util.h | 43 ++++++
src/iceberg/util/meson.build | 1 +
18 files changed, 636 insertions(+), 87 deletions(-)
diff --git a/src/iceberg/catalog/memory/in_memory_catalog.cc
b/src/iceberg/catalog/memory/in_memory_catalog.cc
index 9e4a485a..dc6d9d00 100644
--- a/src/iceberg/catalog/memory/in_memory_catalog.cc
+++ b/src/iceberg/catalog/memory/in_memory_catalog.cc
@@ -23,7 +23,10 @@
#include <iterator>
#include "iceberg/table.h"
+#include "iceberg/table_identifier.h"
#include "iceberg/table_metadata.h"
+#include "iceberg/table_requirement.h"
+#include "iceberg/table_update.h"
#include "iceberg/util/macros.h"
namespace iceberg {
@@ -120,6 +123,13 @@ class ICEBERG_EXPORT InMemoryNamespace {
/// \return The metadata location if the table exists; error otherwise.
Result<std::string> GetTableMetadataLocation(const TableIdentifier&
table_ident) const;
+ /// \brief Updates the metadata location for the specified table.
+ ///
+ /// \param table_ident The identifier of the table.
+ /// \param metadata_location The new metadata location.
+ Status UpdateTableMetadataLocation(const TableIdentifier& table_ident,
+ const std::string& metadata_location);
+
/// \brief Internal utility for retrieving a namespace node pointer from the
tree.
///
/// \tparam NamespacePtr The type of the namespace node pointer.
@@ -278,7 +288,7 @@ Result<std::vector<std::string>>
InMemoryNamespace::ListTables(
return table_names;
}
-Status InMemoryNamespace::RegisterTable(TableIdentifier const& table_ident,
+Status InMemoryNamespace::RegisterTable(const TableIdentifier& table_ident,
const std::string& metadata_location) {
const auto ns = GetNamespace(this, table_ident.ns);
ICEBERG_RETURN_UNEXPECTED(ns);
@@ -289,21 +299,21 @@ Status InMemoryNamespace::RegisterTable(TableIdentifier
const& table_ident,
return {};
}
-Status InMemoryNamespace::UnregisterTable(TableIdentifier const& table_ident) {
+Status InMemoryNamespace::UnregisterTable(const TableIdentifier& table_ident) {
const auto ns = GetNamespace(this, table_ident.ns);
ICEBERG_RETURN_UNEXPECTED(ns);
ns.value()->table_metadata_locations_.erase(table_ident.name);
return {};
}
-Result<bool> InMemoryNamespace::TableExists(TableIdentifier const&
table_ident) const {
+Result<bool> InMemoryNamespace::TableExists(const TableIdentifier&
table_ident) const {
const auto ns = GetNamespace(this, table_ident.ns);
ICEBERG_RETURN_UNEXPECTED(ns);
return ns.value()->table_metadata_locations_.contains(table_ident.name);
}
Result<std::string> InMemoryNamespace::GetTableMetadataLocation(
- TableIdentifier const& table_ident) const {
+ const TableIdentifier& table_ident) const {
const auto ns = GetNamespace(this, table_ident.ns);
ICEBERG_RETURN_UNEXPECTED(ns);
const auto it = ns.value()->table_metadata_locations_.find(table_ident.name);
@@ -313,17 +323,24 @@ Result<std::string>
InMemoryNamespace::GetTableMetadataLocation(
return it->second;
}
+Status InMemoryNamespace::UpdateTableMetadataLocation(
+ const TableIdentifier& table_ident, const std::string& metadata_location) {
+ ICEBERG_ASSIGN_OR_RAISE(auto ns, GetNamespace(this, table_ident.ns));
+ ns->table_metadata_locations_[table_ident.name] = metadata_location;
+ return {};
+}
+
std::shared_ptr<InMemoryCatalog> InMemoryCatalog::Make(
- std::string const& name, std::shared_ptr<FileIO> const& file_io,
- std::string const& warehouse_location,
- std::unordered_map<std::string, std::string> const& properties) {
+ const std::string& name, const std::shared_ptr<FileIO>& file_io,
+ const std::string& warehouse_location,
+ const std::unordered_map<std::string, std::string>& properties) {
return std::make_shared<InMemoryCatalog>(name, file_io, warehouse_location,
properties);
}
InMemoryCatalog::InMemoryCatalog(
- std::string const& name, std::shared_ptr<FileIO> const& file_io,
- std::string const& warehouse_location,
- std::unordered_map<std::string, std::string> const& properties)
+ const std::string& name, const std::shared_ptr<FileIO>& file_io,
+ const std::string& warehouse_location,
+ const std::unordered_map<std::string, std::string>& properties)
: catalog_name_(std::move(name)),
properties_(std::move(properties)),
file_io_(std::move(file_io)),
@@ -395,7 +412,31 @@ Result<std::unique_ptr<Table>>
InMemoryCatalog::UpdateTable(
const std::vector<std::unique_ptr<TableRequirement>>& requirements,
const std::vector<std::unique_ptr<TableUpdate>>& updates) {
std::unique_lock lock(mutex_);
- return NotImplemented("update table");
+ ICEBERG_ASSIGN_OR_RAISE(auto base_metadata_location,
+
root_namespace_->GetTableMetadataLocation(identifier));
+
+ ICEBERG_ASSIGN_OR_RAISE(auto base,
+ TableMetadataUtil::Read(*file_io_,
base_metadata_location));
+
+ for (const auto& requirement : requirements) {
+ ICEBERG_RETURN_UNEXPECTED(requirement->Validate(base.get()));
+ }
+
+ auto builder = TableMetadataBuilder::BuildFrom(base.get());
+ for (const auto& update : updates) {
+ update->ApplyTo(*builder);
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto updated, builder->Build());
+ ICEBERG_ASSIGN_OR_RAISE(
+ auto new_metadata_location,
+ TableMetadataUtil::Write(*file_io_, base.get(), base_metadata_location,
*updated));
+ ICEBERG_RETURN_UNEXPECTED(
+ root_namespace_->UpdateTableMetadataLocation(identifier,
new_metadata_location));
+ TableMetadataUtil::DeleteRemovedMetadataFiles(*file_io_, base.get(),
*updated);
+
+ return std::make_unique<Table>(identifier, std::move(updated),
+ std::move(new_metadata_location), file_io_,
+
std::static_pointer_cast<Catalog>(shared_from_this()));
}
Result<std::shared_ptr<Transaction>> InMemoryCatalog::StageCreateTable(
@@ -438,9 +479,8 @@ Result<std::unique_ptr<Table>> InMemoryCatalog::LoadTable(
ICEBERG_ASSIGN_OR_RAISE(auto metadata,
TableMetadataUtil::Read(*file_io_,
metadata_location));
-
- return std::make_unique<Table>(identifier, std::move(metadata),
metadata_location,
- file_io_,
+ return std::make_unique<Table>(identifier, std::move(metadata),
+ std::move(metadata_location), file_io_,
std::static_pointer_cast<Catalog>(shared_from_this()));
}
diff --git a/src/iceberg/json_internal.cc b/src/iceberg/json_internal.cc
index cee309a1..7f25a174 100644
--- a/src/iceberg/json_internal.cc
+++ b/src/iceberg/json_internal.cc
@@ -797,9 +797,7 @@ nlohmann::json ToJson(const TableMetadata& table_metadata) {
json[kSortOrders] = ToJsonList(table_metadata.sort_orders);
// write properties map
- if (table_metadata.properties) {
- json[kProperties] = table_metadata.properties->configs();
- }
+ json[kProperties] = table_metadata.properties.configs();
if (std::ranges::find_if(table_metadata.snapshots, [&](const auto& snapshot)
{
return snapshot->snapshot_id == table_metadata.current_snapshot_id;
diff --git a/src/iceberg/table.cc b/src/iceberg/table.cc
index 4a1798c3..45005d8e 100644
--- a/src/iceberg/table.cc
+++ b/src/iceberg/table.cc
@@ -51,9 +51,8 @@ Status Table::Refresh() {
}
ICEBERG_ASSIGN_OR_RAISE(auto refreshed_table,
catalog_->LoadTable(identifier_));
- if (metadata_location_ != refreshed_table->metadata_location_) {
+ if (metadata_location_ != refreshed_table->metadata_file_location()) {
metadata_ = std::move(refreshed_table->metadata_);
- metadata_location_ = std::move(refreshed_table->metadata_location_);
io_ = std::move(refreshed_table->io_);
metadata_cache_ = std::make_unique<TableMetadataCache>(metadata_.get());
}
@@ -87,12 +86,14 @@ Table::sort_orders() const {
return metadata_cache_->GetSortOrdersById();
}
-const std::shared_ptr<TableProperties>& Table::properties() const {
- return metadata_->properties;
-}
+const TableProperties& Table::properties() const { return
metadata_->properties; }
+
+const std::string& Table::metadata_file_location() const { return
metadata_location_; }
const std::string& Table::location() const { return metadata_->location; }
+const TimePointMs& Table::last_updated_ms() const { return
metadata_->last_updated_ms; }
+
Result<std::shared_ptr<Snapshot>> Table::current_snapshot() const {
return metadata_->Snapshot();
}
diff --git a/src/iceberg/table.h b/src/iceberg/table.h
index 00c6deac..0745fb69 100644
--- a/src/iceberg/table.h
+++ b/src/iceberg/table.h
@@ -29,6 +29,7 @@
#include "iceberg/snapshot.h"
#include "iceberg/table_identifier.h"
#include "iceberg/type_fwd.h"
+#include "iceberg/util/timepoint.h"
namespace iceberg {
@@ -82,11 +83,19 @@ class ICEBERG_EXPORT Table {
sort_orders() const;
/// \brief Return a map of string properties for this table
- const std::shared_ptr<TableProperties>& properties() const;
+ const TableProperties& properties() const;
+
+ /// \brief Return the table's metadata file location
+ const std::string& metadata_file_location() const;
/// \brief Return the table's base location
const std::string& location() const;
+ /// \brief Get the time when this table was last updated
+ ///
+ /// \return the time when this table was last updated
+ const TimePointMs& last_updated_ms() const;
+
/// \brief Return the table's current snapshot, return NotFoundError if not
found
Result<std::shared_ptr<Snapshot>> current_snapshot() const;
diff --git a/src/iceberg/table_metadata.cc b/src/iceberg/table_metadata.cc
index 5d6cec1d..4e814e02 100644
--- a/src/iceberg/table_metadata.cc
+++ b/src/iceberg/table_metadata.cc
@@ -20,13 +20,16 @@
#include "iceberg/table_metadata.h"
#include <algorithm>
+#include <charconv>
#include <chrono>
#include <cstdint>
#include <format>
+#include <memory>
#include <optional>
#include <ranges>
#include <string>
#include <unordered_map>
+#include <utility>
#include <nlohmann/json.hpp>
@@ -41,12 +44,14 @@
#include "iceberg/table_properties.h"
#include "iceberg/table_update.h"
#include "iceberg/util/gzip_internal.h"
+#include "iceberg/util/location_util.h"
#include "iceberg/util/macros.h"
#include "iceberg/util/uuid.h"
namespace iceberg {
namespace {
const TimePointMs kInvalidLastUpdatedMs = TimePointMs::min();
constexpr int32_t kLastAdded = -1;
+constexpr std::string_view kMetadataFolderName = "metadata";
} // namespace
std::string ToString(const SnapshotLogEntry& entry) {
@@ -142,19 +147,6 @@ bool SnapshotRefEquals(
return true;
}
-bool TablePropertiesEquals(const std::shared_ptr<TableProperties>& lhs,
- const std::shared_ptr<TableProperties>& rhs) {
- bool left_is_empty = lhs == nullptr || lhs->configs().empty();
- bool right_is_empty = rhs == nullptr || rhs->configs().empty();
- if (left_is_empty && right_is_empty) {
- return true;
- }
- if (left_is_empty || right_is_empty) {
- return false;
- }
- return lhs->configs() == rhs->configs();
-}
-
} // namespace
bool operator==(const TableMetadata& lhs, const TableMetadata& rhs) {
@@ -167,7 +159,7 @@ bool operator==(const TableMetadata& lhs, const
TableMetadata& rhs) {
SharedPtrVectorEquals(lhs.schemas, rhs.schemas) &&
lhs.default_spec_id == rhs.default_spec_id &&
lhs.last_partition_id == rhs.last_partition_id &&
- TablePropertiesEquals(lhs.properties, rhs.properties) &&
+ lhs.properties == rhs.properties &&
lhs.current_snapshot_id == rhs.current_snapshot_id &&
SharedPtrVectorEquals(lhs.snapshots, rhs.snapshots) &&
lhs.snapshot_log == rhs.snapshot_log && lhs.metadata_log ==
rhs.metadata_log &&
@@ -234,30 +226,57 @@ Result<TableMetadataCache::SnapshotsMap>
TableMetadataCache::InitSnapshotMap(
std::ranges::to<SnapshotsMap>();
}
-// TableMetadataUtil implementation
+Result<MetadataFileCodecType> TableMetadataUtil::Codec::FromString(
+ std::string_view name) {
+ std::string name_upper = StringUtils::ToUpper(name);
+ if (name_upper == kCodecTypeGzip) {
+ return MetadataFileCodecType::kGzip;
+ } else if (name_upper == kCodecTypeNone) {
+ return MetadataFileCodecType::kNone;
+ }
+ return InvalidArgument("Invalid codec name: {}", name);
+}
-Result<MetadataFileCodecType> TableMetadataUtil::CodecFromFileName(
+Result<MetadataFileCodecType> TableMetadataUtil::Codec::FromFileName(
std::string_view file_name) {
- auto pos = file_name.find_last_of(".metadata.json");
+ auto pos = file_name.find_last_of(kTableMetadataFileSuffix);
if (pos == std::string::npos) {
return InvalidArgument("{} is not a valid metadata file", file_name);
}
// We have to be backward-compatible with .metadata.json.gz files
- if (file_name.ends_with(".metadata.json.gz")) {
+ if (file_name.ends_with(kCompGzipTableMetadataFileSuffix)) {
return MetadataFileCodecType::kGzip;
}
std::string_view file_name_without_suffix = file_name.substr(0, pos);
- if (file_name_without_suffix.ends_with(".gz")) {
+ if (file_name_without_suffix.ends_with(kGzipTableMetadataFileExtension)) {
return MetadataFileCodecType::kGzip;
}
return MetadataFileCodecType::kNone;
}
+Result<std::string> TableMetadataUtil::Codec::NameToFileExtension(
+ std::string_view codec) {
+ ICEBERG_ASSIGN_OR_RAISE(MetadataFileCodecType codec_type, FromString(codec));
+ return TypeToFileExtension(codec_type);
+}
+
+std::string
TableMetadataUtil::Codec::TypeToFileExtension(MetadataFileCodecType codec) {
+ switch (codec) {
+ case MetadataFileCodecType::kGzip:
+ return std::string(kGzipTableMetadataFileSuffix);
+ case MetadataFileCodecType::kNone:
+ return std::string(kTableMetadataFileSuffix);
+ }
+ std::unreachable();
+}
+
+// TableMetadataUtil implementation
+
Result<std::unique_ptr<TableMetadata>> TableMetadataUtil::Read(
FileIO& io, const std::string& location, std::optional<size_t> length) {
- ICEBERG_ASSIGN_OR_RAISE(auto codec_type, CodecFromFileName(location));
+ ICEBERG_ASSIGN_OR_RAISE(auto codec_type, Codec::FromFileName(location));
ICEBERG_ASSIGN_OR_RAISE(auto content, io.ReadFile(location, length));
if (codec_type == MetadataFileCodecType::kGzip) {
@@ -272,6 +291,16 @@ Result<std::unique_ptr<TableMetadata>>
TableMetadataUtil::Read(
return TableMetadataFromJson(json);
}
+Result<std::string> TableMetadataUtil::Write(FileIO& io, const TableMetadata*
base,
+ const std::string&
base_metadata_location,
+ TableMetadata& metadata) {
+ int32_t version = ParseVersionFromLocation(base_metadata_location);
+ ICEBERG_ASSIGN_OR_RAISE(auto new_file_location,
+ NewTableMetadataFilePath(metadata, version + 1));
+ ICEBERG_RETURN_UNEXPECTED(Write(io, new_file_location, metadata));
+ return new_file_location;
+}
+
Status TableMetadataUtil::Write(FileIO& io, const std::string& location,
const TableMetadata& metadata) {
auto json = ToJson(metadata);
@@ -279,6 +308,57 @@ Status TableMetadataUtil::Write(FileIO& io, const
std::string& location,
return io.WriteFile(location, json_string);
}
+void TableMetadataUtil::DeleteRemovedMetadataFiles(FileIO& io, const
TableMetadata* base,
+ const TableMetadata&
metadata) {
+ if (!base) {
+ return;
+ }
+
+ bool delete_after_commit =
+
metadata.properties.Get(TableProperties::kMetadataDeleteAfterCommitEnabled);
+ if (delete_after_commit) {
+ auto current_files =
+ metadata.metadata_log |
std::ranges::to<std::unordered_set<MetadataLogEntry>>();
+ std::ranges::for_each(
+ base->metadata_log | std::views::filter([¤t_files](const auto&
entry) {
+ return !current_files.contains(entry);
+ }),
+ [&io](const auto& entry) { std::ignore =
io.DeleteFile(entry.metadata_file); });
+ }
+}
+
+int32_t TableMetadataUtil::ParseVersionFromLocation(std::string_view
metadata_location) {
+ size_t version_start = metadata_location.find_last_of('/') + 1;
+ size_t version_end = metadata_location.find('-', version_start);
+
+ int32_t version = -1;
+ if (version_end != std::string::npos) {
+ std::from_chars(metadata_location.data() + version_start,
+ metadata_location.data() + version_end, version);
+ }
+ return version;
+}
+
+Result<std::string> TableMetadataUtil::NewTableMetadataFilePath(const
TableMetadata& meta,
+ int32_t
new_version) {
+ auto codec_name =
+ meta.properties.Get<std::string>(TableProperties::kMetadataCompression);
+ ICEBERG_ASSIGN_OR_RAISE(std::string file_extension,
+ Codec::NameToFileExtension(codec_name));
+
+ std::string uuid = Uuid::GenerateV7().ToString();
+ std::string filename = std::format("{:05d}-{}{}", new_version, uuid,
file_extension);
+
+ auto metadata_location =
+
meta.properties.Get<std::string>(TableProperties::kWriteMetadataLocation);
+ if (!metadata_location.empty()) {
+ return std::format("{}/{}",
LocationUtil::StripTrailingSlash(metadata_location),
+ filename);
+ } else {
+ return std::format("{}/{}/{}", meta.location, kMetadataFolderName,
filename);
+ }
+}
+
// TableMetadataBuilder implementation
struct TableMetadataBuilder::Impl {
@@ -295,8 +375,8 @@ struct TableMetadataBuilder::Impl {
std::optional<int32_t> last_added_spec_id;
// Metadata location tracking
- std::optional<std::string> metadata_location;
- std::optional<std::string> previous_metadata_location;
+ std::string metadata_location;
+ std::string previous_metadata_location;
// indexes for convenience
std::unordered_map<int32_t, std::shared_ptr<Schema>> schemas_by_id;
@@ -314,11 +394,11 @@ struct TableMetadataBuilder::Impl {
metadata.current_snapshot_id = Snapshot::kInvalidSnapshotId;
metadata.default_sort_order_id = SortOrder::kInitialSortOrderId;
metadata.next_row_id = TableMetadata::kInitialRowId;
- metadata.properties = TableProperties::default_properties();
}
// Constructor from existing metadata
- explicit Impl(const TableMetadata* base_metadata)
+ explicit Impl(const TableMetadata* base_metadata,
+ std::string base_metadata_location = "")
: base(base_metadata), metadata(*base_metadata) {
// Initialize index maps from base metadata
for (const auto& schema : metadata.schemas) {
@@ -334,6 +414,8 @@ struct TableMetadataBuilder::Impl {
for (const auto& order : metadata.sort_orders) {
sort_orders_by_id.emplace(order->order_id(), order);
}
+
+ metadata.last_updated_ms = kInvalidLastUpdatedMs;
}
};
@@ -363,12 +445,22 @@ std::unique_ptr<TableMetadataBuilder>
TableMetadataBuilder::BuildFrom(
TableMetadataBuilder& TableMetadataBuilder::SetMetadataLocation(
std::string_view metadata_location) {
- throw IcebergError(std::format("{} not implemented", __FUNCTION__));
+ impl_->metadata_location = std::string(metadata_location);
+ if (impl_->base != nullptr) {
+ // Carry over lastUpdatedMillis from base and set previousFileLocation to
null to
+ // avoid writing a new metadata log entry.
+ // This is safe since setting metadata location doesn't cause any changes
and no other
+ // changes can be added when metadata location is configured
+ impl_->previous_metadata_location = std::string();
+ impl_->metadata.last_updated_ms = impl_->base->last_updated_ms;
+ }
+ return *this;
}
TableMetadataBuilder& TableMetadataBuilder::SetPreviousMetadataLocation(
std::string_view previous_metadata_location) {
- throw IcebergError(std::format("{} not implemented", __FUNCTION__));
+ impl_->previous_metadata_location = std::string(previous_metadata_location);
+ return *this;
}
TableMetadataBuilder& TableMetadataBuilder::AssignUUID() {
@@ -626,7 +718,7 @@ TableMetadataBuilder& TableMetadataBuilder::SetProperties(
// Add all updated properties to the metadata properties
for (const auto& [key, value] : updated) {
- impl_->metadata.properties->mutable_configs()[key] = value;
+ impl_->metadata.properties.mutable_configs()[key] = value;
}
// Record the change
@@ -644,7 +736,7 @@ TableMetadataBuilder&
TableMetadataBuilder::RemoveProperties(
// Remove each property from the metadata properties
for (const auto& key : removed) {
- impl_->metadata.properties->mutable_configs().erase(key);
+ impl_->metadata.properties.mutable_configs().erase(key);
}
// Record the change
@@ -679,10 +771,23 @@ Result<std::unique_ptr<TableMetadata>>
TableMetadataBuilder::Build() {
std::chrono::system_clock::now().time_since_epoch())};
}
- // 4. Create and return the TableMetadata
- auto result = std::make_unique<TableMetadata>(std::move(impl_->metadata));
+ // 4. Buildup metadata_log from base metadata
+ int32_t max_metadata_log_size =
+
impl_->metadata.properties.Get(TableProperties::kMetadataPreviousVersionsMax);
+ if (impl_->base != nullptr && !impl_->previous_metadata_location.empty()) {
+ impl_->metadata.metadata_log.emplace_back(impl_->base->last_updated_ms,
+
impl_->previous_metadata_location);
+ }
+ if (impl_->metadata.metadata_log.size() > max_metadata_log_size) {
+ impl_->metadata.metadata_log.erase(
+ impl_->metadata.metadata_log.begin(),
+ impl_->metadata.metadata_log.end() - max_metadata_log_size);
+ }
+
+ // TODO(anyone): 5. update snapshot_log
- return result;
+ // 6. Create and return the TableMetadata
+ return std::make_unique<TableMetadata>(std::move(impl_->metadata));
}
} // namespace iceberg
diff --git a/src/iceberg/table_metadata.h b/src/iceberg/table_metadata.h
index bc73a0ba..f428fd34 100644
--- a/src/iceberg/table_metadata.h
+++ b/src/iceberg/table_metadata.h
@@ -29,6 +29,7 @@
#include <vector>
#include "iceberg/iceberg_export.h"
+#include "iceberg/table_properties.h"
#include "iceberg/type_fwd.h"
#include "iceberg/util/error_collector.h"
#include "iceberg/util/lazy.h"
@@ -99,7 +100,7 @@ struct ICEBERG_EXPORT TableMetadata {
/// The highest assigned partition field ID across all partition specs for
the table
int32_t last_partition_id;
/// A string to string map of table properties
- std::shared_ptr<TableProperties> properties;
+ TableProperties properties;
/// ID of the current table snapshot
int64_t current_snapshot_id;
/// A list of valid snapshots
@@ -460,11 +461,37 @@ enum class ICEBERG_EXPORT MetadataFileCodecType {
/// \brief Utility class for table metadata
struct ICEBERG_EXPORT TableMetadataUtil {
- /// \brief Get the codec type from the table metadata file name.
- ///
- /// \param file_name The name of the table metadata file.
- /// \return The codec type of the table metadata file.
- static Result<MetadataFileCodecType> CodecFromFileName(std::string_view
file_name);
+ struct ICEBERG_EXPORT Codec {
+ /// \brief Returns the MetadataFileCodecType corresponding to the given
string.
+ ///
+ /// \param name The string to parse.
+ /// \return The MetadataFileCodecType corresponding to the given string.
+ static Result<MetadataFileCodecType> FromString(std::string_view name);
+
+ /// \brief Get the codec type from the table metadata file name.
+ ///
+ /// \param file_name The name of the table metadata file.
+ /// \return The codec type of the table metadata file.
+ static Result<MetadataFileCodecType> FromFileName(std::string_view
file_name);
+
+ /// \brief Get the file extension from the codec type.
+ /// \param codec The codec name.
+ /// \return The file extension of the codec.
+ static Result<std::string> NameToFileExtension(std::string_view codec);
+
+ /// \brief Get the file extension from the codec type.
+ /// \param codec The codec type.
+ /// \return The file extension of the codec.
+ static std::string TypeToFileExtension(MetadataFileCodecType codec);
+
+ static constexpr std::string_view kTableMetadataFileSuffix =
".metadata.json";
+ static constexpr std::string_view kCompGzipTableMetadataFileSuffix =
+ ".metadata.json.gz";
+ static constexpr std::string_view kGzipTableMetadataFileSuffix =
".gz.metadata.json";
+ static constexpr std::string_view kGzipTableMetadataFileExtension = ".gz";
+ static constexpr std::string_view kCodecTypeGzip = "GZIP";
+ static constexpr std::string_view kCodecTypeNone = "NONE";
+ };
/// \brief Read the table metadata file.
///
@@ -476,6 +503,32 @@ struct ICEBERG_EXPORT TableMetadataUtil {
class FileIO& io, const std::string& location,
std::optional<size_t> length = std::nullopt);
+ /// \brief Write a new metadata file to storage.
+ ///
+ /// Serializes the table metadata to JSON and writes it to a new metadata
+ /// file. If no location is specified in the metadata, generates a new
+ /// file path based on the version number.
+ ///
+ /// \param io The FileIO instance for writing files
+ /// \param base The base metadata (can be null for new tables)
+ /// \param metadata The metadata to write, which will be updated with the
new location
+ /// \return The new metadata location
+ static Result<std::string> Write(FileIO& io, const TableMetadata* base,
+ const std::string& base_metadata_location,
+ TableMetadata& metadata);
+
+ /// \brief Delete removed metadata files based on retention policy.
+ ///
+ /// Removes obsolete metadata files that are no longer referenced in the
+ /// current metadata log, based on the metadata.delete-after-commit.enabled
+ /// property.
+ ///
+ /// \param io The FileIO instance for deleting files
+ /// \param base The previous metadata version
+ /// \param metadata The current metadata containing the updated log
+ static void DeleteRemovedMetadataFiles(FileIO& io, const TableMetadata* base,
+ const TableMetadata& metadata);
+
/// \brief Write the table metadata to a file.
///
/// \param io The file IO to use to write the table metadata.
@@ -483,6 +536,36 @@ struct ICEBERG_EXPORT TableMetadataUtil {
/// \param metadata The table metadata to write.
static Status Write(FileIO& io, const std::string& location,
const TableMetadata& metadata);
+
+ private:
+ /// \brief Parse the version number from a metadata file location.
+ ///
+ /// Extracts the version number from a metadata file path which follows
+ /// the format: vvvvv-uuid.metadata.json where vvvvv is the zero-padded
+ /// version number.
+ ///
+ /// \param metadata_location The metadata file location string
+ /// \return The parsed version number, or -1 if parsing fails or the
+ /// location doesn't contain a version
+ static int32_t ParseVersionFromLocation(std::string_view metadata_location);
+
+ /// \brief Generate a new metadata file path for a table.
+ ///
+ /// \param metadata The table metadata.
+ /// \param version The version number for the new metadata file.
+ /// \return The generated metadata file path.
+ static Result<std::string> NewTableMetadataFilePath(const TableMetadata&
metadata,
+ int32_t version);
};
} // namespace iceberg
+
+namespace std {
+template <>
+struct hash<iceberg::MetadataLogEntry> {
+ size_t operator()(const iceberg::MetadataLogEntry& m) const noexcept {
+ return std::hash<std::string>{}(m.metadata_file);
+ }
+};
+
+} // namespace std
diff --git a/src/iceberg/table_properties.cc b/src/iceberg/table_properties.cc
index 035a6dda..96633bc2 100644
--- a/src/iceberg/table_properties.cc
+++ b/src/iceberg/table_properties.cc
@@ -31,14 +31,12 @@ const std::unordered_set<std::string>&
TableProperties::reserved_properties() {
return kReservedProperties;
}
-std::unique_ptr<TableProperties> TableProperties::default_properties() {
- return std::unique_ptr<TableProperties>(new TableProperties());
-}
+TableProperties TableProperties::default_properties() { return {}; }
-std::unique_ptr<TableProperties> TableProperties::FromMap(
+TableProperties TableProperties::FromMap(
std::unordered_map<std::string, std::string> properties) {
- auto table_properties = std::unique_ptr<TableProperties>(new
TableProperties());
- table_properties->configs_ = std::move(properties);
+ TableProperties table_properties;
+ table_properties.configs_ = std::move(properties);
return table_properties;
}
diff --git a/src/iceberg/table_properties.h b/src/iceberg/table_properties.h
index e9131cd9..debe61da 100644
--- a/src/iceberg/table_properties.h
+++ b/src/iceberg/table_properties.h
@@ -252,7 +252,7 @@ class ICEBERG_EXPORT TableProperties : public
ConfigBase<TableProperties> {
inline static Entry<int32_t>
kMinSnapshotsToKeep{"history.expire.min-snapshots-to-keep",
1};
inline static Entry<int64_t> kMaxRefAgeMs{"history.expire.max-ref-age-ms",
-
std::numeric_limits<int64_t>::max()};
+
(std::numeric_limits<int64_t>::max)()};
// Delete/Update/Merge properties
@@ -289,17 +289,17 @@ class ICEBERG_EXPORT TableProperties : public
ConfigBase<TableProperties> {
/// \brief Create a default TableProperties instance.
///
/// \return A unique pointer to a TableProperties instance with default
values
- static std::unique_ptr<TableProperties> default_properties();
+ static TableProperties default_properties();
/// \brief Create a TableProperties instance from a map of key-value pairs.
///
/// \param properties The map containing property key-value pairs
/// \return A unique pointer to a TableProperties instance
- static std::unique_ptr<TableProperties> FromMap(
- std::unordered_map<std::string, std::string> properties);
+ static TableProperties FromMap(std::unordered_map<std::string, std::string>
properties);
- private:
- TableProperties() = default;
+ bool operator==(const TableProperties& other) const {
+ return configs_ == other.configs_;
+ }
};
} // namespace iceberg
diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt
index c831ce02..2c4e0f51 100644
--- a/src/iceberg/test/CMakeLists.txt
+++ b/src/iceberg/test/CMakeLists.txt
@@ -106,6 +106,7 @@ add_iceberg_test(util_test
decimal_test.cc
endian_test.cc
formatter_test.cc
+ location_util_test.cc
string_util_test.cc
truncate_util_test.cc
uuid_test.cc
diff --git a/src/iceberg/test/config_test.cc b/src/iceberg/test/config_test.cc
index ec3b3915..b7f378bf 100644
--- a/src/iceberg/test/config_test.cc
+++ b/src/iceberg/test/config_test.cc
@@ -96,13 +96,26 @@ TEST(ConfigTest, BasicOperations) {
ASSERT_EQ(config->Get(TestConfig::kEnumConfig), TestEnum::VALUE2);
ASSERT_EQ(config->Get(TestConfig::kDoubleConfig), 2.99);
+ // Test setting values again
+ config->Set(TestConfig::kStringConfig, std::string("newer_value"));
+ config->Set(TestConfig::kIntConfig, 200);
+ config->Set(TestConfig::kBoolConfig, false);
+ config->Set(TestConfig::kEnumConfig, TestEnum::VALUE1);
+ config->Set(TestConfig::kDoubleConfig, 3.99);
+
+ ASSERT_EQ(config->Get(TestConfig::kStringConfig), "newer_value");
+ ASSERT_EQ(config->Get(TestConfig::kIntConfig), 200);
+ ASSERT_EQ(config->Get(TestConfig::kBoolConfig), false);
+ ASSERT_EQ(config->Get(TestConfig::kEnumConfig), TestEnum::VALUE1);
+ ASSERT_EQ(config->Get(TestConfig::kDoubleConfig), 3.99);
+
// Test unsetting a value
config->Unset(TestConfig::kIntConfig);
config->Unset(TestConfig::kEnumConfig);
config->Unset(TestConfig::kDoubleConfig);
ASSERT_EQ(config->Get(TestConfig::kIntConfig), 25);
- ASSERT_EQ(config->Get(TestConfig::kStringConfig), "new_value");
- ASSERT_EQ(config->Get(TestConfig::kBoolConfig), true);
+ ASSERT_EQ(config->Get(TestConfig::kStringConfig), "newer_value");
+ ASSERT_EQ(config->Get(TestConfig::kBoolConfig), false);
ASSERT_EQ(config->Get(TestConfig::kEnumConfig), TestEnum::VALUE1);
ASSERT_EQ(config->Get(TestConfig::kDoubleConfig), 3.14);
diff --git a/src/iceberg/test/in_memory_catalog_test.cc
b/src/iceberg/test/in_memory_catalog_test.cc
index f7e2f50a..d1a8ccef 100644
--- a/src/iceberg/test/in_memory_catalog_test.cc
+++ b/src/iceberg/test/in_memory_catalog_test.cc
@@ -20,6 +20,8 @@
#include "iceberg/catalog/memory/in_memory_catalog.h"
#include <filesystem>
+#include <string>
+#include <unordered_map>
#include <arrow/filesystem/localfs.h>
#include <gmock/gmock.h>
@@ -28,10 +30,14 @@
#include "iceberg/arrow/arrow_fs_file_io_internal.h"
#include "iceberg/schema.h"
#include "iceberg/table.h"
+#include "iceberg/table_identifier.h"
#include "iceberg/table_metadata.h"
+#include "iceberg/table_requirement.h"
+#include "iceberg/table_update.h"
#include "iceberg/test/matchers.h"
#include "iceberg/test/mock_catalog.h"
#include "iceberg/test/test_resource.h"
+#include "iceberg/util/uuid.h"
namespace iceberg {
@@ -63,7 +69,7 @@ class InMemoryCatalogTest : public ::testing::Test {
info->test_suite_name(), info->name(),
table_name);
// generate a unique directory for the table
std::error_code ec;
- std::filesystem::create_directories(table_location, ec);
+ std::filesystem::create_directories(table_location + "metadata", ec);
if (ec) {
throw std::runtime_error(
std::format("Failed to create temporary directory: {}, error
message: {}",
@@ -166,6 +172,65 @@ TEST_F(InMemoryCatalogTest, RefreshTable) {
ASSERT_EQ(loaded_table->current_snapshot().value()->snapshot_id, 2);
}
+TEST_F(InMemoryCatalogTest, UpdateTable) {
+ // First, create and register a table
+ TableIdentifier table_ident{.ns = {}, .name = "t1"};
+
+ ICEBERG_UNWRAP_OR_FAIL(auto metadata,
+
ReadTableMetadataFromResource("TableMetadataV2Valid.json"));
+
+ auto table_location = GenerateTestTableLocation(table_ident.name);
+ auto metadata_location = std::format("{}/metadata/00001-{}.metadata.json",
+ table_location,
Uuid::GenerateV7().ToString());
+ metadata->location = table_location;
+ auto status = TableMetadataUtil::Write(*file_io_, metadata_location,
*metadata);
+ EXPECT_THAT(status, IsOk());
+
+ auto table = catalog_->RegisterTable(table_ident, metadata_location);
+ EXPECT_THAT(table, IsOk());
+ ASSERT_EQ(table.value()->name().name, "t1");
+ ASSERT_EQ(table.value()->metadata_file_location(), metadata_location);
+
+ // Prepare updates - add a new property
+ std::vector<std::unique_ptr<TableUpdate>> updates;
+ auto property_update = std::make_unique<table::SetProperties>(
+ std::unordered_map<std::string, std::string>{{"property2", "value2"}});
+ updates.push_back(std::move(property_update));
+
+ // Prepare requirements - assert table must exist
+ std::vector<std::unique_ptr<TableRequirement>> requirements;
+
requirements.push_back(std::make_unique<table::AssertUUID>(metadata->table_uuid));
+
+ // Perform the update on a nonexist table
+ TableIdentifier nonexist_table_ident{.ns = {}, .name = "nonexist_table"};
+ auto res = catalog_->UpdateTable(nonexist_table_ident,
std::move(requirements),
+ std::move(updates));
+ EXPECT_THAT(res, IsError(ErrorKind::kNotFound));
+
+ // Verify requirements failed on an exist table
+ std::vector<std::unique_ptr<TableRequirement>> bad_requirements;
+
bad_requirements.push_back(std::make_unique<table::AssertUUID>("invalid-uuid"));
+ res =
+ catalog_->UpdateTable(table_ident, std::move(bad_requirements),
std::move(updates));
+ EXPECT_THAT(res, IsError(ErrorKind::kCommitFailed));
+
+ // Perform the update
+ auto update_result = catalog_->UpdateTable(table_ident, requirements,
updates);
+ EXPECT_THAT(update_result, IsOk());
+
+ // Verify the update by loading the table and checking properties
+ auto load_result = catalog_->LoadTable(table_ident);
+ EXPECT_THAT(load_result, IsOk());
+
+ auto updated_table = std::move(load_result.value());
+
+ // Verify that metadata file was updated (should have a new version)
+ EXPECT_EQ(table.value()->uuid(), updated_table->uuid());
+ EXPECT_GT(updated_table->last_updated_ms(),
table.value()->last_updated_ms());
+ EXPECT_THAT(updated_table->metadata_file_location(),
+ testing::HasSubstr("metadata/00002-"));
+}
+
TEST_F(InMemoryCatalogTest, DropTable) {
TableIdentifier tableIdent{.ns = {}, .name = "t1"};
auto result = catalog_->DropTable(tableIdent, false);
diff --git a/src/iceberg/test/location_util_test.cc
b/src/iceberg/test/location_util_test.cc
new file mode 100644
index 00000000..7098868a
--- /dev/null
+++ b/src/iceberg/test/location_util_test.cc
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/location_util.h"
+
+#include <gtest/gtest.h>
+
+namespace iceberg {
+
+TEST(LocationUtilTest, StripTrailingSlash) {
+ // Test normal paths with trailing slashes
+ ASSERT_EQ("/path/to/dir", LocationUtil::StripTrailingSlash("/path/to/dir/"));
+ ASSERT_EQ("/path/to/dir",
LocationUtil::StripTrailingSlash("/path/to/dir//"));
+ ASSERT_EQ("/path/to/dir",
LocationUtil::StripTrailingSlash("/path/to/dir///"));
+
+ // Test paths without trailing slashes
+ ASSERT_EQ("/path/to/dir", LocationUtil::StripTrailingSlash("/path/to/dir"));
+ ASSERT_EQ("/path/to/file.txt",
LocationUtil::StripTrailingSlash("/path/to/file.txt"));
+
+ // Test root path
+ ASSERT_EQ("", LocationUtil::StripTrailingSlash("/"));
+ ASSERT_EQ("", LocationUtil::StripTrailingSlash("//"));
+
+ // Test empty string
+ ASSERT_EQ("", LocationUtil::StripTrailingSlash(""));
+
+ // Test URLs with protocols
+ ASSERT_EQ("http://example.com",
+ LocationUtil::StripTrailingSlash("http://example.com/"));
+ ASSERT_EQ("https://example.com/path",
+ LocationUtil::StripTrailingSlash("https://example.com/path/"));
+
+ // Test that protocol endings are preserved
+ ASSERT_EQ("http://", LocationUtil::StripTrailingSlash("http://"));
+ ASSERT_EQ("https://", LocationUtil::StripTrailingSlash("https://"));
+ ASSERT_EQ("s3://", LocationUtil::StripTrailingSlash("s3://"));
+
+ // Test paths with protocol-like substrings in the middle
+ ASSERT_EQ("/path/http://test",
LocationUtil::StripTrailingSlash("/path/http://test/"));
+ ASSERT_EQ("/path/https://test",
+ LocationUtil::StripTrailingSlash("/path/https://test/"));
+
+ // Test multiple slashes not at the end
+ ASSERT_EQ("/path//to/dir",
LocationUtil::StripTrailingSlash("/path//to/dir/"));
+ ASSERT_EQ("/path///to/dir",
LocationUtil::StripTrailingSlash("/path///to/dir/"));
+}
+
+} // namespace iceberg
diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build
index 6a2a9e9a..f12b43af 100644
--- a/src/iceberg/test/meson.build
+++ b/src/iceberg/test/meson.build
@@ -85,6 +85,7 @@ iceberg_tests = {
'decimal_test.cc',
'endian_test.cc',
'formatter_test.cc',
+ 'location_util_test.cc',
'string_util_test.cc',
'truncate_util_test.cc',
'uuid_test.cc',
diff --git a/src/iceberg/test/metadata_io_test.cc
b/src/iceberg/test/metadata_io_test.cc
index a7ce162d..ea4555d3 100644
--- a/src/iceberg/test/metadata_io_test.cc
+++ b/src/iceberg/test/metadata_io_test.cc
@@ -17,16 +17,20 @@
* under the License.
*/
+#include <filesystem>
+
#include <arrow/filesystem/localfs.h>
#include <arrow/io/compressed.h>
#include <arrow/io/file.h>
#include <arrow/util/compression.h>
+#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <nlohmann/json.hpp>
#include "iceberg/arrow/arrow_fs_file_io_internal.h"
#include "iceberg/file_io.h"
#include "iceberg/json_internal.h"
+#include "iceberg/result.h"
#include "iceberg/schema.h"
#include "iceberg/snapshot.h"
#include "iceberg/table_metadata.h"
@@ -42,7 +46,10 @@ class MetadataIOTest : public TempFileTestBase {
TempFileTestBase::SetUp();
io_ = std::make_shared<iceberg::arrow::ArrowFileSystemFileIO>(
std::make_shared<::arrow::fs::LocalFileSystem>());
- temp_filepath_ = CreateNewTempFilePathWithSuffix(".metadata.json");
+ location_ = CreateTempDirectory();
+ temp_filepath_ = std::format("{}/{}", location_,
"metadata/00000-xxx.metadata.json");
+ ASSERT_TRUE(
+ std::filesystem::create_directories(std::format("{}/metadata",
location_)));
}
TableMetadata PrepareMetadata() {
@@ -53,7 +60,7 @@ class MetadataIOTest : public TempFileTestBase {
return TableMetadata{.format_version = 1,
.table_uuid = "1234567890",
- .location = "s3://bucket/path",
+ .location = location_,
.last_sequence_number = 0,
.schemas = {schema},
.current_schema_id = 1,
@@ -73,6 +80,7 @@ class MetadataIOTest : public TempFileTestBase {
}
std::shared_ptr<iceberg::FileIO> io_;
+ std::string location_;
std::string temp_filepath_;
};
@@ -126,4 +134,74 @@ TEST_F(MetadataIOTest, ReadWriteCompressedMetadata) {
EXPECT_EQ(*metadata_read, metadata);
}
+TEST_F(MetadataIOTest, WriteMetadataWithBase) {
+ TableMetadata base = PrepareMetadata();
+
+ {
+ // Invalid base metadata_file_location, set version to 0
+ TableMetadata new_metadata = PrepareMetadata();
+ ICEBERG_UNWRAP_OR_FAIL(
+ auto new_metadata_location,
+ TableMetadataUtil::Write(*io_, &base, "invalid_location",
new_metadata));
+ EXPECT_THAT(new_metadata_location, testing::HasSubstr("/metadata/00000-"));
+ }
+
+ // Reset base metadata_file_location
+ // base.metadata_file_location = temp_filepath_;
+
+ {
+ // Specify write location property
+ TableMetadata new_metadata = PrepareMetadata();
+ new_metadata.properties.Set(TableProperties::kWriteMetadataLocation,
location_);
+ ICEBERG_UNWRAP_OR_FAIL(
+ auto new_metadata_location,
+ TableMetadataUtil::Write(*io_, &base, temp_filepath_, new_metadata));
+ EXPECT_THAT(new_metadata_location,
+ testing::HasSubstr(std::format("{}/00001-", location_)));
+ }
+
+ {
+ // Default write location
+ TableMetadata new_metadata = PrepareMetadata();
+ ICEBERG_UNWRAP_OR_FAIL(
+ auto new_metadata_location,
+ TableMetadataUtil::Write(*io_, &base, temp_filepath_, new_metadata));
+ EXPECT_THAT(new_metadata_location,
+ testing::HasSubstr(std::format("{}/metadata/00001-",
location_)));
+ }
+}
+
+TEST_F(MetadataIOTest, RemoveDeletedMetadataFiles) {
+ TableMetadata base1 = PrepareMetadata();
+ base1.properties.Set(TableProperties::kMetadataPreviousVersionsMax, 1);
+ ICEBERG_UNWRAP_OR_FAIL(auto base1_metadata_location,
+ TableMetadataUtil::Write(*io_, nullptr, "", base1));
+
+ ICEBERG_UNWRAP_OR_FAIL(auto base2,
+ TableMetadataBuilder::BuildFrom(&base1)
+
->SetPreviousMetadataLocation(base1_metadata_location)
+ .Build());
+ ICEBERG_UNWRAP_OR_FAIL(
+ auto base2_metadata_location,
+ TableMetadataUtil::Write(*io_, &base1, base1_metadata_location, *base2));
+
+ ICEBERG_UNWRAP_OR_FAIL(auto new_metadata,
+ TableMetadataBuilder::BuildFrom(base2.get())
+
->SetPreviousMetadataLocation(base2_metadata_location)
+ .Build());
+ ICEBERG_UNWRAP_OR_FAIL(auto new_metadata_location,
+ TableMetadataUtil::Write(
+ *io_, base2.get(), base2_metadata_location,
*new_metadata));
+
+ // The first metadata file should not be deleted
+
new_metadata->properties.Set(TableProperties::kMetadataDeleteAfterCommitEnabled,
false);
+ TableMetadataUtil::DeleteRemovedMetadataFiles(*io_, base2.get(),
*new_metadata);
+ EXPECT_TRUE(std::filesystem::exists(base1_metadata_location));
+
+ // The first metadata file should be deleted
+
new_metadata->properties.Set(TableProperties::kMetadataDeleteAfterCommitEnabled,
true);
+ TableMetadataUtil::DeleteRemovedMetadataFiles(*io_, base2.get(),
*new_metadata);
+ EXPECT_FALSE(std::filesystem::exists(base1_metadata_location));
+}
+
} // namespace iceberg
diff --git a/src/iceberg/test/table_metadata_builder_test.cc
b/src/iceberg/test/table_metadata_builder_test.cc
index 5a270c49..bb9ce8c0 100644
--- a/src/iceberg/test/table_metadata_builder_test.cc
+++ b/src/iceberg/test/table_metadata_builder_test.cc
@@ -21,9 +21,11 @@
#include <string>
#include <vector>
+#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "iceberg/partition_spec.h"
+#include "iceberg/result.h"
#include "iceberg/schema.h"
#include "iceberg/snapshot.h"
#include "iceberg/sort_field.h"
@@ -85,11 +87,14 @@ TEST(TableMetadataBuilderTest, BuildFromEmpty) {
EXPECT_EQ(metadata->default_spec_id, PartitionSpec::kInitialSpecId);
EXPECT_EQ(metadata->default_sort_order_id, SortOrder::kInitialSortOrderId);
EXPECT_EQ(metadata->current_snapshot_id, Snapshot::kInvalidSnapshotId);
+ EXPECT_TRUE(metadata->metadata_log.empty());
}
TEST(TableMetadataBuilderTest, BuildFromExisting) {
auto base = CreateBaseMetadata();
+ std::string base_metadata_location =
"s3://bucket/test/00010-xxx.metadata.json";
auto builder = TableMetadataBuilder::BuildFrom(base.get());
+ builder->SetPreviousMetadataLocation(base_metadata_location);
ASSERT_NE(builder, nullptr);
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
@@ -98,6 +103,50 @@ TEST(TableMetadataBuilderTest, BuildFromExisting) {
EXPECT_EQ(metadata->format_version, 2);
EXPECT_EQ(metadata->table_uuid, "test-uuid-1234");
EXPECT_EQ(metadata->location, "s3://bucket/test");
+ ASSERT_EQ(1, metadata->metadata_log.size());
+ EXPECT_EQ(base_metadata_location, metadata->metadata_log[0].metadata_file);
+ EXPECT_EQ(base->last_updated_ms, metadata->metadata_log[0].timestamp_ms);
+}
+
+TEST(TableMetadataBuilderTest, BuildupMetadataLog) {
+ auto base = CreateBaseMetadata();
+ std::string base_metadata_location =
"s3://bucket/test/00010-xxx.metadata.json";
+ base->metadata_log = {
+ {.timestamp_ms = TimePointMs{std::chrono::milliseconds(100)},
+ .metadata_file = "s3://bucket/test/00000-aaa.metadata.json"},
+ {.timestamp_ms = TimePointMs{std::chrono::milliseconds(200)},
+ .metadata_file = "s3://bucket/test/00001-bbb.metadata.json"},
+ };
+
+ {
+ // Base metadata_log size less than max size
+ base->properties.Set(TableProperties::kMetadataPreviousVersionsMax, 3);
+ auto builder = TableMetadataBuilder::BuildFrom(base.get());
+ builder->SetPreviousMetadataLocation(base_metadata_location);
+ ASSERT_NE(builder, nullptr);
+ ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
+ EXPECT_EQ(3, metadata->metadata_log.size());
+ EXPECT_EQ(base->metadata_log[0].metadata_file,
+ metadata->metadata_log[0].metadata_file);
+ EXPECT_EQ(base->metadata_log[1].metadata_file,
+ metadata->metadata_log[1].metadata_file);
+ EXPECT_EQ(base->last_updated_ms, metadata->metadata_log[2].timestamp_ms);
+ EXPECT_EQ(base_metadata_location, metadata->metadata_log[2].metadata_file);
+ }
+
+ {
+ // Base metadata_log size greater than max size
+ base->properties.Set(TableProperties::kMetadataPreviousVersionsMax, 2);
+ auto builder = TableMetadataBuilder::BuildFrom(base.get());
+ builder->SetPreviousMetadataLocation(base_metadata_location);
+ ASSERT_NE(builder, nullptr);
+ ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
+ EXPECT_EQ(2, metadata->metadata_log.size());
+ EXPECT_EQ(base->metadata_log[1].metadata_file,
+ metadata->metadata_log[0].metadata_file);
+ EXPECT_EQ(base->last_updated_ms, metadata->metadata_log[1].timestamp_ms);
+ EXPECT_EQ(base_metadata_location, metadata->metadata_log[1].metadata_file);
+ }
}
// Test AssignUUID
@@ -147,9 +196,9 @@ TEST(TableMetadataBuilderTest, SetProperties) {
builder->SetProperties({{"key1", "value1"}, {"key2", "value2"}});
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
- EXPECT_EQ(metadata->properties->configs().size(), 2);
- EXPECT_EQ(metadata->properties->configs().at("key1"), "value1");
- EXPECT_EQ(metadata->properties->configs().at("key2"), "value2");
+ EXPECT_EQ(metadata->properties.configs().size(), 2);
+ EXPECT_EQ(metadata->properties.configs().at("key1"), "value1");
+ EXPECT_EQ(metadata->properties.configs().at("key2"), "value2");
// Update existing property and add new one
builder = TableMetadataBuilder::BuildFromEmpty(2);
@@ -157,9 +206,9 @@ TEST(TableMetadataBuilderTest, SetProperties) {
builder->SetProperties({{"key1", "new_value1"}, {"key3", "value3"}});
ICEBERG_UNWRAP_OR_FAIL(metadata, builder->Build());
- EXPECT_EQ(metadata->properties->configs().size(), 2);
- EXPECT_EQ(metadata->properties->configs().at("key1"), "new_value1");
- EXPECT_EQ(metadata->properties->configs().at("key3"), "value3");
+ EXPECT_EQ(metadata->properties.configs().size(), 2);
+ EXPECT_EQ(metadata->properties.configs().at("key1"), "new_value1");
+ EXPECT_EQ(metadata->properties.configs().at("key3"), "value3");
}
TEST(TableMetadataBuilderTest, RemoveProperties) {
@@ -168,9 +217,9 @@ TEST(TableMetadataBuilderTest, RemoveProperties) {
builder->RemoveProperties({"key2", "key4"}); // key4 does not exist
ICEBERG_UNWRAP_OR_FAIL(auto metadata, builder->Build());
- EXPECT_EQ(metadata->properties->configs().size(), 2);
- EXPECT_EQ(metadata->properties->configs().at("key1"), "value1");
- EXPECT_EQ(metadata->properties->configs().at("key3"), "value3");
+ EXPECT_EQ(metadata->properties.configs().size(), 2);
+ EXPECT_EQ(metadata->properties.configs().at("key1"), "value1");
+ EXPECT_EQ(metadata->properties.configs().at("key3"), "value3");
}
TEST(TableMetadataBuilderTest, UpgradeFormatVersion) {
diff --git a/src/iceberg/util/config.h b/src/iceberg/util/config.h
index 7e6742e9..5adbc96d 100644
--- a/src/iceberg/util/config.h
+++ b/src/iceberg/util/config.h
@@ -89,7 +89,7 @@ class ConfigBase {
template <typename T>
ConfigBase& Set(const Entry<T>& entry, const T& val) {
- configs_.emplace(entry.key_, entry.to_str_(val));
+ configs_[entry.key_] = entry.to_str_(val);
return *this;
}
diff --git a/src/iceberg/util/location_util.h b/src/iceberg/util/location_util.h
new file mode 100644
index 00000000..547dd5ce
--- /dev/null
+++ b/src/iceberg/util/location_util.h
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <string>
+
+#include "iceberg/iceberg_export.h"
+
+namespace iceberg {
+
+class ICEBERG_EXPORT LocationUtil {
+ public:
+ static std::string StripTrailingSlash(const std::string& path) {
+ if (path.empty()) {
+ return "";
+ }
+
+ std::string_view result = path;
+ while (result.ends_with("/") && !result.ends_with("://")) {
+ result.remove_suffix(1);
+ }
+ return std::string(result);
+ }
+};
+
+} // namespace iceberg
diff --git a/src/iceberg/util/meson.build b/src/iceberg/util/meson.build
index 0e050847..9f327753 100644
--- a/src/iceberg/util/meson.build
+++ b/src/iceberg/util/meson.build
@@ -28,6 +28,7 @@ install_headers(
'formatter.h',
'int128.h',
'lazy.h',
+ 'location_util.h',
'macros.h',
'partition_value_util.h',
'string_util.h',