This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git


The following commit(s) were added to refs/heads/main by this push:
     new 39a9c8b3 feat: add UpdateSchema interface skeleton (#460)
39a9c8b3 is described below

commit 39a9c8b3129770b465e0c21f51ac4d90c904d85b
Author: Guotao Yu <[email protected]>
AuthorDate: Wed Dec 31 16:46:58 2025 +0800

    feat: add UpdateSchema interface skeleton (#460)
    
    - Define `UpdateSchema` class interface with full API documentation
---
 src/iceberg/CMakeLists.txt          |   1 +
 src/iceberg/meson.build             |   1 +
 src/iceberg/table.cc                |  12 ++
 src/iceberg/table.h                 |   6 +
 src/iceberg/transaction.cc          |  15 ++
 src/iceberg/transaction.h           |   4 +
 src/iceberg/type_fwd.h              |   1 +
 src/iceberg/update/meson.build      |   1 +
 src/iceberg/update/pending_update.h |   1 +
 src/iceberg/update/update_schema.cc | 207 +++++++++++++++++++++
 src/iceberg/update/update_schema.h  | 356 ++++++++++++++++++++++++++++++++++++
 11 files changed, 605 insertions(+)

diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt
index bc7182ae..a6b836c4 100644
--- a/src/iceberg/CMakeLists.txt
+++ b/src/iceberg/CMakeLists.txt
@@ -83,6 +83,7 @@ set(ICEBERG_SOURCES
     update/pending_update.cc
     update/update_partition_spec.cc
     update/update_properties.cc
+    update/update_schema.cc
     update/update_sort_order.cc
     util/bucket_util.cc
     util/content_file_util.cc
diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build
index 55349d8d..34538bde 100644
--- a/src/iceberg/meson.build
+++ b/src/iceberg/meson.build
@@ -104,6 +104,7 @@ iceberg_sources = files(
     'update/pending_update.cc',
     'update/update_partition_spec.cc',
     'update/update_properties.cc',
+    'update/update_schema.cc',
     'update/update_sort_order.cc',
     'util/bucket_util.cc',
     'util/content_file_util.cc',
diff --git a/src/iceberg/table.cc b/src/iceberg/table.cc
index 6b4d317b..ee3ce594 100644
--- a/src/iceberg/table.cc
+++ b/src/iceberg/table.cc
@@ -32,6 +32,7 @@
 #include "iceberg/transaction.h"
 #include "iceberg/update/update_partition_spec.h"
 #include "iceberg/update/update_properties.h"
+#include "iceberg/update/update_schema.h"
 #include "iceberg/util/macros.h"
 
 namespace iceberg {
@@ -171,6 +172,13 @@ Result<std::shared_ptr<UpdateSortOrder>> 
Table::NewUpdateSortOrder() {
   return transaction->NewUpdateSortOrder();
 }
 
+Result<std::shared_ptr<UpdateSchema>> Table::NewUpdateSchema() {
+  ICEBERG_ASSIGN_OR_RAISE(
+      auto transaction, Transaction::Make(shared_from_this(), 
Transaction::Kind::kUpdate,
+                                          /*auto_commit=*/true));
+  return transaction->NewUpdateSchema();
+}
+
 Result<std::shared_ptr<StagedTable>> StagedTable::Make(
     TableIdentifier identifier, std::shared_ptr<TableMetadata> metadata,
     std::string metadata_location, std::shared_ptr<FileIO> io,
@@ -221,4 +229,8 @@ Result<std::shared_ptr<UpdateProperties>> 
StaticTable::NewUpdateProperties() {
   return NotSupported("Cannot create an update properties for a static table");
 }
 
+Result<std::shared_ptr<UpdateSchema>> StaticTable::NewUpdateSchema() {
+  return NotSupported("Cannot create an update schema for a static table");
+}
+
 }  // namespace iceberg
diff --git a/src/iceberg/table.h b/src/iceberg/table.h
index 30ad14c1..31139585 100644
--- a/src/iceberg/table.h
+++ b/src/iceberg/table.h
@@ -140,6 +140,10 @@ class ICEBERG_EXPORT Table : public 
std::enable_shared_from_this<Table> {
   /// changes.
   virtual Result<std::shared_ptr<UpdateSortOrder>> NewUpdateSortOrder();
 
+  /// \brief Create a new UpdateSchema to alter the columns of this table and 
commit the
+  /// changes.
+  virtual Result<std::shared_ptr<UpdateSchema>> NewUpdateSchema();
+
  protected:
   Table(TableIdentifier identifier, std::shared_ptr<TableMetadata> metadata,
         std::string metadata_location, std::shared_ptr<FileIO> io,
@@ -187,6 +191,8 @@ class ICEBERG_EXPORT StaticTable final : public Table {
 
   Result<std::shared_ptr<UpdateProperties>> NewUpdateProperties() override;
 
+  Result<std::shared_ptr<UpdateSchema>> NewUpdateSchema() override;
+
  private:
   using Table::Table;
 };
diff --git a/src/iceberg/transaction.cc b/src/iceberg/transaction.cc
index c8446e8b..6641a1af 100644
--- a/src/iceberg/transaction.cc
+++ b/src/iceberg/transaction.cc
@@ -22,6 +22,7 @@
 #include <memory>
 
 #include "iceberg/catalog.h"
+#include "iceberg/schema.h"
 #include "iceberg/table.h"
 #include "iceberg/table_metadata.h"
 #include "iceberg/table_requirement.h"
@@ -30,6 +31,7 @@
 #include "iceberg/update/pending_update.h"
 #include "iceberg/update/update_partition_spec.h"
 #include "iceberg/update/update_properties.h"
+#include "iceberg/update/update_schema.h"
 #include "iceberg/update/update_sort_order.h"
 #include "iceberg/util/checked_cast.h"
 #include "iceberg/util/macros.h"
@@ -105,6 +107,12 @@ Status Transaction::Apply(PendingUpdate& update) {
         metadata_builder_->AddPartitionSpec(std::move(result.spec));
       }
     } break;
+    case PendingUpdate::Kind::kUpdateSchema: {
+      auto& update_schema = internal::checked_cast<UpdateSchema&>(update);
+      ICEBERG_ASSIGN_OR_RAISE(auto result, update_schema.Apply());
+      metadata_builder_->SetCurrentSchema(std::move(result.schema),
+                                          result.new_last_column_id);
+    } break;
     default:
       return NotSupported("Unsupported pending update: {}",
                           static_cast<int32_t>(update.kind()));
@@ -178,4 +186,11 @@ Result<std::shared_ptr<UpdateSortOrder>> 
Transaction::NewUpdateSortOrder() {
   return update_sort_order;
 }
 
+Result<std::shared_ptr<UpdateSchema>> Transaction::NewUpdateSchema() {
+  ICEBERG_ASSIGN_OR_RAISE(std::shared_ptr<UpdateSchema> update_schema,
+                          UpdateSchema::Make(shared_from_this()));
+  ICEBERG_RETURN_UNEXPECTED(AddUpdate(update_schema));
+  return update_schema;
+}
+
 }  // namespace iceberg
diff --git a/src/iceberg/transaction.h b/src/iceberg/transaction.h
index 87a2139b..ea918a17 100644
--- a/src/iceberg/transaction.h
+++ b/src/iceberg/transaction.h
@@ -68,6 +68,10 @@ class ICEBERG_EXPORT Transaction : public 
std::enable_shared_from_this<Transacti
   /// changes.
   Result<std::shared_ptr<UpdateSortOrder>> NewUpdateSortOrder();
 
+  /// \brief Create a new UpdateSchema to alter the columns of this table and 
commit the
+  /// changes.
+  Result<std::shared_ptr<UpdateSchema>> NewUpdateSchema();
+
  private:
   Transaction(std::shared_ptr<Table> table, Kind kind, bool auto_commit,
               std::unique_ptr<TableMetadataBuilder> metadata_builder);
diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h
index 65afeb87..2daf39e6 100644
--- a/src/iceberg/type_fwd.h
+++ b/src/iceberg/type_fwd.h
@@ -190,6 +190,7 @@ class Transaction;
 class PendingUpdate;
 class UpdatePartitionSpec;
 class UpdateProperties;
+class UpdateSchema;
 class UpdateSortOrder;
 
 /// 
----------------------------------------------------------------------------
diff --git a/src/iceberg/update/meson.build b/src/iceberg/update/meson.build
index 3fdfda98..e4c786f4 100644
--- a/src/iceberg/update/meson.build
+++ b/src/iceberg/update/meson.build
@@ -19,6 +19,7 @@ install_headers(
     [
         'pending_update.h',
         'update_partition_spec.h',
+        'update_schema.h',
         'update_sort_order.h',
         'update_properties.h',
     ],
diff --git a/src/iceberg/update/pending_update.h 
b/src/iceberg/update/pending_update.h
index 95580f40..90723987 100644
--- a/src/iceberg/update/pending_update.h
+++ b/src/iceberg/update/pending_update.h
@@ -44,6 +44,7 @@ class ICEBERG_EXPORT PendingUpdate : public ErrorCollector {
   enum class Kind : uint8_t {
     kUpdatePartitionSpec,
     kUpdateProperties,
+    kUpdateSchema,
     kUpdateSortOrder,
   };
 
diff --git a/src/iceberg/update/update_schema.cc 
b/src/iceberg/update/update_schema.cc
new file mode 100644
index 00000000..14b962bd
--- /dev/null
+++ b/src/iceberg/update/update_schema.cc
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/update/update_schema.h"
+
+#include <memory>
+#include <optional>
+#include <ranges>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include <utility>
+
+#include "iceberg/schema.h"
+#include "iceberg/table_metadata.h"
+#include "iceberg/transaction.h"
+#include "iceberg/type.h"
+#include "iceberg/util/error_collector.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+Result<std::shared_ptr<UpdateSchema>> UpdateSchema::Make(
+    std::shared_ptr<Transaction> transaction) {
+  ICEBERG_PRECHECK(transaction != nullptr,
+                   "Cannot create UpdateSchema without transaction");
+  return std::shared_ptr<UpdateSchema>(new 
UpdateSchema(std::move(transaction)));
+}
+
+UpdateSchema::UpdateSchema(std::shared_ptr<Transaction> transaction)
+    : PendingUpdate(std::move(transaction)) {
+  const TableMetadata& base_metadata = transaction_->current();
+
+  // Get the current schema
+  auto schema_result = base_metadata.Schema();
+  if (!schema_result.has_value()) {
+    AddError(schema_result.error());
+    return;
+  }
+  schema_ = std::move(schema_result.value());
+
+  // Initialize last_column_id from base metadata
+  last_column_id_ = base_metadata.last_column_id;
+
+  // Initialize identifier field names from the current schema
+  auto identifier_names_result = schema_->IdentifierFieldNames();
+  if (!identifier_names_result.has_value()) {
+    AddError(identifier_names_result.error());
+    return;
+  }
+  identifier_field_names_ = identifier_names_result.value() |
+                            std::ranges::to<std::unordered_set<std::string>>();
+}
+
+UpdateSchema::~UpdateSchema() = default;
+
+UpdateSchema& UpdateSchema::AllowIncompatibleChanges() {
+  allow_incompatible_changes_ = true;
+  return *this;
+}
+
+UpdateSchema& UpdateSchema::CaseSensitive(bool case_sensitive) {
+  case_sensitive_ = case_sensitive;
+  return *this;
+}
+
+UpdateSchema& UpdateSchema::AddColumn(std::string_view name, 
std::shared_ptr<Type> type,
+                                      std::string_view doc) {
+  // Check for "." in top-level name
+  ICEBERG_BUILDER_CHECK(!name.contains('.'),
+                        "Cannot add column with ambiguous name: {}, use "
+                        "AddColumn(parent, name, type, doc)",
+                        name);
+  return AddColumnInternal(std::nullopt, name, /*is_optional=*/true, 
std::move(type),
+                           doc);
+}
+
+UpdateSchema& UpdateSchema::AddColumn(std::optional<std::string_view> parent,
+                                      std::string_view name, 
std::shared_ptr<Type> type,
+                                      std::string_view doc) {
+  return AddColumnInternal(std::move(parent), name, /*is_optional=*/true, 
std::move(type),
+                           doc);
+}
+
+UpdateSchema& UpdateSchema::AddRequiredColumn(std::string_view name,
+                                              std::shared_ptr<Type> type,
+                                              std::string_view doc) {
+  // Check for "." in top-level name
+  ICEBERG_BUILDER_CHECK(!name.contains('.'),
+                        "Cannot add column with ambiguous name: {}, use "
+                        "AddRequiredColumn(parent, name, type, doc)",
+                        name);
+  return AddColumnInternal(std::nullopt, name, /*is_optional=*/false, 
std::move(type),
+                           doc);
+}
+
+UpdateSchema& UpdateSchema::AddRequiredColumn(std::optional<std::string_view> 
parent,
+                                              std::string_view name,
+                                              std::shared_ptr<Type> type,
+                                              std::string_view doc) {
+  return AddColumnInternal(std::move(parent), name, /*is_optional=*/false,
+                           std::move(type), doc);
+}
+
+UpdateSchema& UpdateSchema::UpdateColumn(std::string_view name,
+                                         std::shared_ptr<PrimitiveType> 
new_type) {
+  // TODO(Guotao Yu): Implement UpdateColumn
+  AddError(NotImplemented("UpdateSchema::UpdateColumn not implemented"));
+  return *this;
+}
+
+UpdateSchema& UpdateSchema::UpdateColumnDoc(std::string_view name,
+                                            std::string_view new_doc) {
+  // TODO(Guotao Yu): Implement UpdateColumnDoc
+  AddError(NotImplemented("UpdateSchema::UpdateColumnDoc not implemented"));
+  return *this;
+}
+
+UpdateSchema& UpdateSchema::AddColumnInternal(std::optional<std::string_view> 
parent,
+                                              std::string_view name, bool 
is_optional,
+                                              std::shared_ptr<Type> type,
+                                              std::string_view doc) {
+  // TODO(Guotao Yu): Implement AddColumnInternal logic
+  // This is where the real work happens - finding parent, validating, etc.
+  AddError(NotImplemented("UpdateSchema::AddColumnInternal not implemented"));
+  return *this;
+}
+
+UpdateSchema& UpdateSchema::RenameColumn(std::string_view name,
+                                         std::string_view new_name) {
+  // TODO(Guotao Yu): Implement RenameColumn
+  AddError(NotImplemented("UpdateSchema::RenameColumn not implemented"));
+  return *this;
+}
+
+UpdateSchema& UpdateSchema::MakeColumnOptional(std::string_view name) {
+  // TODO(Guotao Yu): Implement MakeColumnOptional
+  AddError(NotImplemented("UpdateSchema::MakeColumnOptional not implemented"));
+  return *this;
+}
+
+UpdateSchema& UpdateSchema::RequireColumn(std::string_view name) {
+  // TODO(Guotao Yu): Implement RequireColumn
+  AddError(NotImplemented("UpdateSchema::RequireColumn not implemented"));
+  return *this;
+}
+
+UpdateSchema& UpdateSchema::DeleteColumn(std::string_view name) {
+  // TODO(Guotao Yu): Implement DeleteColumn
+  AddError(NotImplemented("UpdateSchema::DeleteColumn not implemented"));
+  return *this;
+}
+
+UpdateSchema& UpdateSchema::MoveFirst(std::string_view name) {
+  // TODO(Guotao Yu): Implement MoveFirst
+  AddError(NotImplemented("UpdateSchema::MoveFirst not implemented"));
+  return *this;
+}
+
+UpdateSchema& UpdateSchema::MoveBefore(std::string_view name,
+                                       std::string_view before_name) {
+  // TODO(Guotao Yu): Implement MoveBefore
+  AddError(NotImplemented("UpdateSchema::MoveBefore not implemented"));
+  return *this;
+}
+
+UpdateSchema& UpdateSchema::MoveAfter(std::string_view name,
+                                      std::string_view after_name) {
+  // TODO(Guotao Yu): Implement MoveAfter
+  AddError(NotImplemented("UpdateSchema::MoveAfter not implemented"));
+  return *this;
+}
+
+UpdateSchema& UpdateSchema::UnionByNameWith(std::shared_ptr<Schema> 
new_schema) {
+  // TODO(Guotao Yu): Implement UnionByNameWith
+  AddError(NotImplemented("UpdateSchema::UnionByNameWith not implemented"));
+  return *this;
+}
+
+UpdateSchema& UpdateSchema::SetIdentifierFields(
+    const std::span<std::string_view>& names) {
+  identifier_field_names_ = names | 
std::ranges::to<std::unordered_set<std::string>>();
+  return *this;
+}
+
+Result<UpdateSchema::ApplyResult> UpdateSchema::Apply() {
+  // TODO(Guotao Yu): Implement Apply
+  return NotImplemented("UpdateSchema::Apply not implemented");
+}
+
+}  // namespace iceberg
diff --git a/src/iceberg/update/update_schema.h 
b/src/iceberg/update/update_schema.h
new file mode 100644
index 00000000..bed2bfeb
--- /dev/null
+++ b/src/iceberg/update/update_schema.h
@@ -0,0 +1,356 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+/// \file iceberg/update/update_schema.h
+/// API for schema evolution.
+
+#include <memory>
+#include <optional>
+#include <span>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/result.h"
+#include "iceberg/type_fwd.h"
+#include "iceberg/update/pending_update.h"
+
+namespace iceberg {
+
+/// \brief API for schema evolution.
+///
+/// When committing, these changes will be applied to the current table 
metadata.
+/// Commit conflicts will not be resolved and will result in a CommitFailed 
error.
+///
+/// TODO(Guotao Yu): Add support for V3 default values when adding columns. 
Currently, all
+/// added columns use null as the default value, but Iceberg V3 supports custom
+/// default values for new columns.
+class ICEBERG_EXPORT UpdateSchema : public PendingUpdate {
+ public:
+  static Result<std::shared_ptr<UpdateSchema>> Make(
+      std::shared_ptr<Transaction> transaction);
+
+  ~UpdateSchema() override;
+
+  /// \brief Allow incompatible changes to the schema.
+  ///
+  /// Incompatible changes can cause failures when attempting to read older 
data files.
+  /// For example, adding a required column and attempting to read data files 
without
+  /// that column will cause a failure. However, if there are no data files 
that are
+  /// not compatible with the change, it can be allowed.
+  ///
+  /// This option allows incompatible changes to be made to a schema. This 
should be
+  /// used when the caller has validated that the change will not break. For 
example,
+  /// if a column is added as optional but always populated and data older 
than the
+  /// column addition has been deleted from the table, this can be used with
+  /// RequireColumn() to mark the column required.
+  ///
+  /// \return Reference to this for method chaining.
+  UpdateSchema& AllowIncompatibleChanges();
+
+  /// \brief Add a new optional top-level column with documentation.
+  ///
+  /// Because "." may be interpreted as a column path separator or may be used 
in
+  /// field names, it is not allowed in names passed to this method. To add to 
nested
+  /// structures or to add fields with names that contain ".", use 
AddColumn(parent,
+  /// name, type, doc).
+  ///
+  /// If type is a nested type, its field IDs are reassigned when added to the
+  /// existing schema.
+  ///
+  /// The added column will be optional with a null default value.
+  ///
+  /// \param name Name for the new column.
+  /// \param type Type for the new column.
+  /// \param doc Documentation string for the new column.
+  /// \return Reference to this for method chaining.
+  /// \note InvalidArgument will be reported if name contains ".".
+  UpdateSchema& AddColumn(std::string_view name, std::shared_ptr<Type> type,
+                          std::string_view doc = "");
+
+  /// \brief Add a new optional column to a nested struct with documentation.
+  ///
+  /// The parent name is used to find the parent using 
Schema::FindFieldByName(). If
+  /// the parent name is null or empty, the new column will be added to the 
root as a
+  /// top-level column. If parent identifies a struct, a new column is added 
to that
+  /// struct. If it identifies a list, the column is added to the list element 
struct,
+  /// and if it identifies a map, the new column is added to the map's value 
struct.
+  ///
+  /// The given name is used to name the new column and names containing "." 
are not
+  /// handled differently.
+  ///
+  /// If type is a nested type, its field IDs are reassigned when added to the
+  /// existing schema.
+  ///
+  /// The added column will be optional with a null default value.
+  ///
+  /// \param parent Name of the parent struct to which the column will be 
added.
+  /// \param name Name for the new column.
+  /// \param type Type for the new column.
+  /// \param doc Documentation string for the new column.
+  /// \return Reference to this for method chaining.
+  /// \note InvalidArgument will be reported if parent doesn't identify a 
struct.
+  UpdateSchema& AddColumn(std::optional<std::string_view> parent, 
std::string_view name,
+                          std::shared_ptr<Type> type, std::string_view doc = 
"");
+
+  /// \brief Add a new required top-level column with documentation.
+  ///
+  /// Adding a required column without a default is an incompatible change 
that can
+  /// break reading older data. To suppress exceptions thrown when an 
incompatible
+  /// change is detected, call AllowIncompatibleChanges().
+  ///
+  /// Because "." may be interpreted as a column path separator or may be used 
in
+  /// field names, it is not allowed in names passed to this method. To add to 
nested
+  /// structures or to add fields with names that contain ".", use
+  /// AddRequiredColumn(parent, name, type, doc).
+  ///
+  /// If type is a nested type, its field IDs are reassigned when added to the
+  /// existing schema.
+  ///
+  /// \param name Name for the new column.
+  /// \param type Type for the new column.
+  /// \param doc Documentation string for the new column.
+  /// \return Reference to this for method chaining.
+  /// \note InvalidArgument will be reported if name contains ".".
+  UpdateSchema& AddRequiredColumn(std::string_view name, std::shared_ptr<Type> 
type,
+                                  std::string_view doc = "");
+
+  /// \brief Add a new required column to a nested struct with documentation.
+  ///
+  /// Adding a required column without a default is an incompatible change 
that can
+  /// break reading older data. To suppress exceptions thrown when an 
incompatible
+  /// change is detected, call AllowIncompatibleChanges().
+  ///
+  /// The parent name is used to find the parent using 
Schema::FindFieldByName(). If
+  /// the parent name is null or empty, the new column will be added to the 
root as a
+  /// top-level column. If parent identifies a struct, a new column is added 
to that
+  /// struct. If it identifies a list, the column is added to the list element 
struct,
+  /// and if it identifies a map, the new column is added to the map's value 
struct.
+  ///
+  /// The given name is used to name the new column and names containing "." 
are not
+  /// handled differently.
+  ///
+  /// If type is a nested type, its field IDs are reassigned when added to the
+  /// existing schema.
+  ///
+  /// \param parent Name of the parent struct to which the column will be 
added.
+  /// \param name Name for the new column.
+  /// \param type Type for the new column.
+  /// \param doc Documentation string for the new column.
+  /// \return Reference to this for method chaining.
+  /// \note InvalidArgument will be reported if parent doesn't identify a 
struct.
+  UpdateSchema& AddRequiredColumn(std::optional<std::string_view> parent,
+                                  std::string_view name, std::shared_ptr<Type> 
type,
+                                  std::string_view doc = "");
+
+  /// \brief Rename a column in the schema.
+  ///
+  /// The name is used to find the column to rename using 
Schema::FindFieldByName().
+  ///
+  /// The new name may contain "." and such names are not parsed or handled
+  /// differently.
+  ///
+  /// Columns may be updated and renamed in the same schema update.
+  ///
+  /// \param name Name of the column to rename.
+  /// \param new_name Replacement name for the column.
+  /// \return Reference to this for method chaining.
+  /// \note InvalidArgument will be reported if name doesn't identify a column 
in the
+  /// schema or if
+  ///       this change conflicts with other additions, renames, or updates.
+  UpdateSchema& RenameColumn(std::string_view name, std::string_view new_name);
+
+  /// \brief Update a column in the schema to a new primitive type.
+  ///
+  /// The name is used to find the column to update using 
Schema::FindFieldByName().
+  ///
+  /// Only updates that widen types are allowed.
+  ///
+  /// Columns may be updated and renamed in the same schema update.
+  ///
+  /// \param name Name of the column to update.
+  /// \param new_type Replacement type for the column (must be primitive).
+  /// \return Reference to this for method chaining.
+  /// \note InvalidArgument will be reported if name doesn't identify a column 
in the
+  /// schema or if
+  ///       this change introduces a type incompatibility or if it conflicts 
with
+  ///       other additions, renames, or updates.
+  UpdateSchema& UpdateColumn(std::string_view name,
+                             std::shared_ptr<PrimitiveType> new_type);
+
+  /// \brief Update the documentation string for a column.
+  ///
+  /// The name is used to find the column to update using 
Schema::FindFieldByName().
+  ///
+  /// \param name Name of the column to update the documentation string for.
+  /// \param new_doc Replacement documentation string for the column.
+  /// \return Reference to this for method chaining.
+  /// \note InvalidArgument will be reported if name doesn't identify a column 
in the
+  /// schema or if
+  ///       the column will be deleted.
+  UpdateSchema& UpdateColumnDoc(std::string_view name, std::string_view 
new_doc);
+
+  /// \brief Update a column to be optional.
+  ///
+  /// \param name Name of the column to mark optional.
+  /// \return Reference to this for method chaining.
+  UpdateSchema& MakeColumnOptional(std::string_view name);
+
+  /// \brief Update a column to be required.
+  ///
+  /// This is an incompatible change that can break reading older data. This 
method
+  /// will result in an exception unless AllowIncompatibleChanges() has been 
called.
+  ///
+  /// \param name Name of the column to mark required.
+  /// \return Reference to this for method chaining.
+  UpdateSchema& RequireColumn(std::string_view name);
+
+  /// \brief Delete a column in the schema.
+  ///
+  /// The name is used to find the column to delete using 
Schema::FindFieldByName().
+  ///
+  /// \param name Name of the column to delete.
+  /// \return Reference to this for method chaining.
+  /// \note InvalidArgument will be reported if name doesn't identify a column 
in the
+  /// schema or if
+  ///       this change conflicts with other additions, renames, or updates.
+  UpdateSchema& DeleteColumn(std::string_view name);
+
+  /// \brief Move a column from its current position to the start of the 
schema or its
+  /// parent struct.
+  ///
+  /// \param name Name of the column to move.
+  /// \return Reference to this for method chaining.
+  /// \note InvalidArgument will be reported if name doesn't identify a column 
in the
+  /// schema or if
+  ///       this change conflicts with other changes.
+  UpdateSchema& MoveFirst(std::string_view name);
+
+  /// \brief Move a column from its current position to directly before a 
reference
+  /// column.
+  ///
+  /// The name is used to find the column to move using 
Schema::FindFieldByName(). If
+  /// the name identifies a nested column, it can only be moved within the 
nested
+  /// struct that contains it.
+  ///
+  /// \param name Name of the column to move.
+  /// \param before_name Name of the reference column.
+  /// \return Reference to this for method chaining.
+  /// \note InvalidArgument will be reported if name doesn't identify a column 
in the
+  /// schema or if
+  ///       this change conflicts with other changes.
+  UpdateSchema& MoveBefore(std::string_view name, std::string_view 
before_name);
+
+  /// \brief Move a column from its current position to directly after a 
reference
+  /// column.
+  ///
+  /// The name is used to find the column to move using 
Schema::FindFieldByName(). If
+  /// the name identifies a nested column, it can only be moved within the 
nested
+  /// struct that contains it.
+  ///
+  /// \param name Name of the column to move.
+  /// \param after_name Name of the reference column.
+  /// \return Reference to this for method chaining.
+  /// \note InvalidArgument will be reported if name doesn't identify a column 
in the
+  /// schema or if
+  ///       this change conflicts with other changes.
+  UpdateSchema& MoveAfter(std::string_view name, std::string_view after_name);
+
+  /// \brief Applies all field additions and updates from the provided new 
schema to
+  /// the existing schema to create a union schema.
+  ///
+  /// For fields with same canonical names in both schemas it is required that 
the
+  /// widen types is supported using UpdateColumn(). Differences in type are 
ignored
+  /// if the new type is narrower than the existing type (e.g. long to int, 
double to
+  /// float).
+  ///
+  /// Only supports turning a previously required field into an optional one 
if it is
+  /// marked optional in the provided new schema using MakeColumnOptional().
+  ///
+  /// Only supports updating existing field docs with fields docs from the 
provided
+  /// new schema using UpdateColumnDoc().
+  ///
+  /// \param new_schema A schema used in conjunction with the existing schema 
to
+  ///        create a union schema.
+  /// \return Reference to this for method chaining.
+  /// \note InvalidState will be reported if it encounters errors during 
provided schema
+  /// traversal. \note InvalidArgument will be reported if name doesn't 
identify a column
+  /// in the schema or if
+  ///       this change introduces a type incompatibility or if it conflicts 
with
+  ///       other additions, renames, or updates.
+  UpdateSchema& UnionByNameWith(std::shared_ptr<Schema> new_schema);
+
+  /// \brief Set the identifier fields given a set of field names.
+  ///
+  /// Because identifier fields are unique, duplicated names will be ignored. 
See
+  /// Schema::identifier_field_ids() to learn more about Iceberg identifier.
+  ///
+  /// \param names Names of the columns to set as identifier fields.
+  /// \return Reference to this for method chaining.
+  UpdateSchema& SetIdentifierFields(const std::span<std::string_view>& names);
+
+  /// \brief Determines if the case of schema needs to be considered when 
comparing
+  /// column names.
+  ///
+  /// \param case_sensitive When false case is not considered in column name
+  ///        comparisons.
+  /// \return Reference to this for method chaining.
+  UpdateSchema& CaseSensitive(bool case_sensitive);
+
+  Kind kind() const final { return Kind::kUpdateSchema; }
+
+  struct ApplyResult {
+    std::shared_ptr<Schema> schema;
+    int32_t new_last_column_id;
+  };
+
+  /// \brief Apply the pending changes to the original schema and return the 
result.
+  ///
+  /// This does not result in a permanent update.
+  ///
+  /// \return The result Schema and last column id when all pending updates 
are applied.
+  Result<ApplyResult> Apply();
+
+ private:
+  explicit UpdateSchema(std::shared_ptr<Transaction> transaction);
+
+  /// \brief Internal implementation for adding a column with full control.
+  ///
+  /// \param parent Optional parent field name (nullopt for top-level).
+  /// \param name Name for the new column.
+  /// \param is_optional Whether the column is optional.
+  /// \param type Type for the new column.
+  /// \param doc Optional documentation string.
+  /// \return Reference to this for method chaining.
+  UpdateSchema& AddColumnInternal(std::optional<std::string_view> parent,
+                                  std::string_view name, bool is_optional,
+                                  std::shared_ptr<Type> type, std::string_view 
doc);
+
+  // Internal state
+  std::shared_ptr<Schema> schema_;
+  int32_t last_column_id_;
+  bool allow_incompatible_changes_{false};
+  bool case_sensitive_{true};
+  std::unordered_set<std::string> identifier_field_names_;
+};
+
+}  // namespace iceberg

Reply via email to