This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 4b52dbd89b Core,Open-API: Don't expose the `last-column-id` (#11514)
4b52dbd89b is described below
commit 4b52dbd89bf583f1bbdbf0b7a73b62404812ee11
Author: Fokko Driesprong <[email protected]>
AuthorDate: Mon Nov 25 10:25:56 2024 +0100
Core,Open-API: Don't expose the `last-column-id` (#11514)
* Core,Open-API: Don't expose the `last-column-id`
Okay, I've added this to the spec a while ago:
https://github.com/apache/iceberg/pull/7445
But I think this was a mistake, and we should not expose this
to the public APIs, as it is much better to track this internally.
I noticed this while reviewing
https://github.com/apache/iceberg-rust/pull/587
Removing this as part of the APIs in Java, and the Open-API
update makes it much more resilient, and don't require the
clients to compute this value. For example. when there are two conflicting
schema changes, the last-column-id must be recomputed correctly when doing
the retry operation.
* Update the tests as well
* Add `deprecation` flag
* Wording
Co-authored-by: Eduard Tudenhoefner <[email protected]>
* Wording
Co-authored-by: Eduard Tudenhoefner <[email protected]>
* Wording
* Thanks Ryan!
* Remove `LOG`
---------
Co-authored-by: Eduard Tudenhoefner <[email protected]>
---
.../aws/glue/TestIcebergToGlueConverter.java | 2 +-
.../java/org/apache/iceberg/MetadataUpdate.java | 10 +++++++++
.../main/java/org/apache/iceberg/SchemaUpdate.java | 2 +-
.../java/org/apache/iceberg/TableMetadata.java | 25 +++++++++++++++++++++-
.../apache/iceberg/rest/RESTSessionCatalog.java | 2 +-
.../java/org/apache/iceberg/view/ViewMetadata.java | 7 +-----
.../apache/iceberg/TestMetadataUpdateParser.java | 18 ++--------------
.../java/org/apache/iceberg/TestTableMetadata.java | 16 +++++++-------
.../org/apache/iceberg/TestUpdateRequirements.java | 18 ++++++++--------
open-api/rest-catalog-open-api.py | 2 +-
open-api/rest-catalog-open-api.yaml | 8 ++++++-
.../spark/source/TestSparkMetadataColumns.java | 5 +----
.../spark/source/TestSparkReadProjection.java | 3 +--
13 files changed, 67 insertions(+), 51 deletions(-)
diff --git
a/aws/src/test/java/org/apache/iceberg/aws/glue/TestIcebergToGlueConverter.java
b/aws/src/test/java/org/apache/iceberg/aws/glue/TestIcebergToGlueConverter.java
index 1136ad63b4..edebfd3420 100644
---
a/aws/src/test/java/org/apache/iceberg/aws/glue/TestIcebergToGlueConverter.java
+++
b/aws/src/test/java/org/apache/iceberg/aws/glue/TestIcebergToGlueConverter.java
@@ -238,7 +238,7 @@ public class TestIcebergToGlueConverter {
Schema newSchema =
new Schema(Types.NestedField.required(1, "x", Types.StringType.get(),
"comment1"));
- tableMetadata = tableMetadata.updateSchema(newSchema, 3);
+ tableMetadata = tableMetadata.updateSchema(newSchema);
IcebergToGlueConverter.setTableInputInformation(actualTableInputBuilder,
tableMetadata);
TableInput actualTableInput = actualTableInputBuilder.build();
diff --git a/core/src/main/java/org/apache/iceberg/MetadataUpdate.java
b/core/src/main/java/org/apache/iceberg/MetadataUpdate.java
index 49fb1fe01c..ba038c196e 100644
--- a/core/src/main/java/org/apache/iceberg/MetadataUpdate.java
+++ b/core/src/main/java/org/apache/iceberg/MetadataUpdate.java
@@ -86,6 +86,16 @@ public interface MetadataUpdate extends Serializable {
private final Schema schema;
private final int lastColumnId;
+ public AddSchema(Schema schema) {
+ this(schema, schema.highestFieldId());
+ }
+
+ /**
+ * Set the schema
+ *
+ * @deprecated since 1.8.0, will be removed 1.9.0 or 2.0.0, use
AddSchema(schema).
+ */
+ @Deprecated
public AddSchema(Schema schema, int lastColumnId) {
this.schema = schema;
this.lastColumnId = lastColumnId;
diff --git a/core/src/main/java/org/apache/iceberg/SchemaUpdate.java
b/core/src/main/java/org/apache/iceberg/SchemaUpdate.java
index 0690977786..2b541080ac 100644
--- a/core/src/main/java/org/apache/iceberg/SchemaUpdate.java
+++ b/core/src/main/java/org/apache/iceberg/SchemaUpdate.java
@@ -444,7 +444,7 @@ class SchemaUpdate implements UpdateSchema {
@Override
public void commit() {
- TableMetadata update = applyChangesToMetadata(base.updateSchema(apply(),
lastColumnId));
+ TableMetadata update = applyChangesToMetadata(base.updateSchema(apply()));
ops.commit(base, update);
}
diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java
b/core/src/main/java/org/apache/iceberg/TableMetadata.java
index 0e323bca1c..9f6ffbcc87 100644
--- a/core/src/main/java/org/apache/iceberg/TableMetadata.java
+++ b/core/src/main/java/org/apache/iceberg/TableMetadata.java
@@ -563,10 +563,23 @@ public class TableMetadata implements Serializable {
return new Builder(this).assignUUID().build();
}
+ /**
+ * Updates the schema
+ *
+ * @deprecated since 1.8.0, will be removed in 1.9.0 or 2.0.0, use
updateSchema(schema).
+ */
+ @Deprecated
public TableMetadata updateSchema(Schema newSchema, int newLastColumnId) {
return new Builder(this).setCurrentSchema(newSchema,
newLastColumnId).build();
}
+ /** Updates the schema */
+ public TableMetadata updateSchema(Schema newSchema) {
+ return new Builder(this)
+ .setCurrentSchema(newSchema, Math.max(this.lastColumnId,
newSchema.highestFieldId()))
+ .build();
+ }
+
// The caller is responsible to pass a newPartitionSpec with correct
partition field IDs
public TableMetadata updatePartitionSpec(PartitionSpec newPartitionSpec) {
return new Builder(this).setDefaultPartitionSpec(newPartitionSpec).build();
@@ -1082,8 +1095,18 @@ public class TableMetadata implements Serializable {
return this;
}
+ public Builder addSchema(Schema schema) {
+ addSchemaInternal(schema, Math.max(lastColumnId,
schema.highestFieldId()));
+ return this;
+ }
+
+ /**
+ * Add a new schema.
+ *
+ * @deprecated since 1.8.0, will be removed in 1.9.0 or 2.0.0, use
AddSchema(schema).
+ */
+ @Deprecated
public Builder addSchema(Schema schema, int newLastColumnId) {
- // TODO: remove requirement for newLastColumnId
addSchemaInternal(schema, newLastColumnId);
return this;
}
diff --git a/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java
b/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java
index b895956353..1bf57dd13c 100644
--- a/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java
+++ b/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java
@@ -954,7 +954,7 @@ public class RESTSessionCatalog extends
BaseViewSessionCatalog
changes.add(new MetadataUpdate.UpgradeFormatVersion(meta.formatVersion()));
Schema schema = meta.schema();
- changes.add(new MetadataUpdate.AddSchema(schema, schema.highestFieldId()));
+ changes.add(new MetadataUpdate.AddSchema(schema));
changes.add(new MetadataUpdate.SetCurrentSchema(-1));
PartitionSpec spec = meta.spec();
diff --git a/core/src/main/java/org/apache/iceberg/view/ViewMetadata.java
b/core/src/main/java/org/apache/iceberg/view/ViewMetadata.java
index ae837ff968..94f3a56ba9 100644
--- a/core/src/main/java/org/apache/iceberg/view/ViewMetadata.java
+++ b/core/src/main/java/org/apache/iceberg/view/ViewMetadata.java
@@ -372,20 +372,15 @@ public interface ViewMetadata extends Serializable {
newSchema = schema;
}
- int highestFieldId = Math.max(highestFieldId(),
newSchema.highestFieldId());
schemas.add(newSchema);
schemasById.put(newSchema.schemaId(), newSchema);
- changes.add(new MetadataUpdate.AddSchema(newSchema, highestFieldId));
+ changes.add(new MetadataUpdate.AddSchema(newSchema));
this.lastAddedSchemaId = newSchemaId;
return newSchemaId;
}
- private int highestFieldId() {
- return
schemas.stream().map(Schema::highestFieldId).max(Integer::compareTo).orElse(0);
- }
-
private int reuseOrCreateNewSchemaId(Schema newSchema) {
// if the schema already exists, use its id; otherwise use the highest
id + 1
int newSchemaId = INITIAL_SCHEMA_ID;
diff --git
a/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java
b/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java
index bfed6ebebe..cae19fece4 100644
--- a/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java
+++ b/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java
@@ -112,23 +112,9 @@ public class TestMetadataUpdateParser {
public void testAddSchemaFromJson() {
String action = MetadataUpdateParser.ADD_SCHEMA;
Schema schema = ID_DATA_SCHEMA;
- int lastColumnId = schema.highestFieldId();
- String json =
- String.format(
- "{\"action\":\"add-schema\",\"schema\":%s,\"last-column-id\":%d}",
- SchemaParser.toJson(schema), lastColumnId);
- MetadataUpdate actualUpdate = new MetadataUpdate.AddSchema(schema,
lastColumnId);
- assertEquals(action, actualUpdate, MetadataUpdateParser.fromJson(json));
- }
-
- @Test
- public void testAddSchemaFromJsonWithoutLastColumnId() {
- String action = MetadataUpdateParser.ADD_SCHEMA;
- Schema schema = ID_DATA_SCHEMA;
- int lastColumnId = schema.highestFieldId();
String json =
String.format("{\"action\":\"add-schema\",\"schema\":%s}",
SchemaParser.toJson(schema));
- MetadataUpdate actualUpdate = new MetadataUpdate.AddSchema(schema,
lastColumnId);
+ MetadataUpdate actualUpdate = new MetadataUpdate.AddSchema(schema);
assertEquals(action, actualUpdate, MetadataUpdateParser.fromJson(json));
}
@@ -140,7 +126,7 @@ public class TestMetadataUpdateParser {
String.format(
"{\"action\":\"add-schema\",\"schema\":%s,\"last-column-id\":%d}",
SchemaParser.toJson(schema), lastColumnId);
- MetadataUpdate update = new MetadataUpdate.AddSchema(schema, lastColumnId);
+ MetadataUpdate update = new MetadataUpdate.AddSchema(schema);
String actual = MetadataUpdateParser.toJson(update);
assertThat(actual)
.as("Add schema should convert to the correct JSON value")
diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java
b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java
index c9a8eb75a9..64c410b464 100644
--- a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java
+++ b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java
@@ -1425,7 +1425,7 @@ public class TestTableMetadata {
new Schema(
Lists.newArrayList(Types.NestedField.required(1, "x",
Types.StringType.get())),
Sets.newHashSet(1));
- TableMetadata newMeta = meta.updateSchema(newSchema, 1);
+ TableMetadata newMeta = meta.updateSchema(newSchema);
assertThat(newMeta.schemas()).hasSize(2);
assertThat(newMeta.schema().identifierFieldIds()).containsExactly(1);
}
@@ -1447,7 +1447,7 @@ public class TestTableMetadata {
new Schema(
Types.NestedField.required(1, "y", Types.LongType.get(),
"comment"),
Types.NestedField.required(2, "x", Types.StringType.get()));
- TableMetadata twoSchemasTable = freshTable.updateSchema(schema2, 2);
+ TableMetadata twoSchemasTable = freshTable.updateSchema(schema2);
assertThat(twoSchemasTable.currentSchemaId()).isEqualTo(1);
assertSameSchemaList(
ImmutableList.of(schema, new Schema(1, schema2.columns())),
twoSchemasTable.schemas());
@@ -1459,26 +1459,26 @@ public class TestTableMetadata {
new Schema(
Types.NestedField.required(1, "y", Types.LongType.get(),
"comment"),
Types.NestedField.required(2, "x", Types.StringType.get()));
- TableMetadata sameSchemaTable = twoSchemasTable.updateSchema(sameSchema2,
2);
+ TableMetadata sameSchemaTable = twoSchemasTable.updateSchema(sameSchema2);
assertThat(sameSchemaTable).isSameAs(twoSchemasTable);
// update schema with the same schema and different last column ID as
current should create
// a new table
- TableMetadata differentColumnIdTable =
sameSchemaTable.updateSchema(sameSchema2, 3);
+ TableMetadata differentColumnIdTable =
sameSchemaTable.updateSchema(sameSchema2);
assertThat(differentColumnIdTable.currentSchemaId()).isEqualTo(1);
assertSameSchemaList(
ImmutableList.of(schema, new Schema(1, schema2.columns())),
differentColumnIdTable.schemas());
assertThat(differentColumnIdTable.schema().asStruct()).isEqualTo(schema2.asStruct());
- assertThat(differentColumnIdTable.lastColumnId()).isEqualTo(3);
+ assertThat(differentColumnIdTable.lastColumnId()).isEqualTo(2);
// update schema with old schema does not change schemas
- TableMetadata revertSchemaTable =
differentColumnIdTable.updateSchema(schema, 3);
+ TableMetadata revertSchemaTable =
differentColumnIdTable.updateSchema(schema);
assertThat(revertSchemaTable.currentSchemaId()).isEqualTo(0);
assertSameSchemaList(
ImmutableList.of(schema, new Schema(1, schema2.columns())),
revertSchemaTable.schemas());
assertThat(revertSchemaTable.schema().asStruct()).isEqualTo(schema.asStruct());
- assertThat(revertSchemaTable.lastColumnId()).isEqualTo(3);
+ assertThat(revertSchemaTable.lastColumnId()).isEqualTo(2);
// create new schema will use the largest schema id + 1
Schema schema3 =
@@ -1486,7 +1486,7 @@ public class TestTableMetadata {
Types.NestedField.required(2, "y", Types.LongType.get(),
"comment"),
Types.NestedField.required(4, "x", Types.StringType.get()),
Types.NestedField.required(6, "z", Types.IntegerType.get()));
- TableMetadata threeSchemaTable = revertSchemaTable.updateSchema(schema3,
6);
+ TableMetadata threeSchemaTable = revertSchemaTable.updateSchema(schema3);
assertThat(threeSchemaTable.currentSchemaId()).isEqualTo(2);
assertSameSchemaList(
ImmutableList.of(
diff --git a/core/src/test/java/org/apache/iceberg/TestUpdateRequirements.java
b/core/src/test/java/org/apache/iceberg/TestUpdateRequirements.java
index 1a6c289ea2..e5b3428508 100644
--- a/core/src/test/java/org/apache/iceberg/TestUpdateRequirements.java
+++ b/core/src/test/java/org/apache/iceberg/TestUpdateRequirements.java
@@ -223,9 +223,9 @@ public class TestUpdateRequirements {
UpdateRequirements.forUpdateTable(
metadata,
ImmutableList.of(
- new MetadataUpdate.AddSchema(new Schema(), lastColumnId),
- new MetadataUpdate.AddSchema(new Schema(), lastColumnId + 1),
- new MetadataUpdate.AddSchema(new Schema(), lastColumnId + 2)));
+ new MetadataUpdate.AddSchema(new Schema()),
+ new MetadataUpdate.AddSchema(new Schema()),
+ new MetadataUpdate.AddSchema(new Schema())));
requirements.forEach(req -> req.validate(metadata));
assertThat(requirements)
@@ -253,9 +253,9 @@ public class TestUpdateRequirements {
UpdateRequirements.forUpdateTable(
metadata,
ImmutableList.of(
- new MetadataUpdate.AddSchema(new Schema(), 1),
- new MetadataUpdate.AddSchema(new Schema(), 2),
- new MetadataUpdate.AddSchema(new Schema(), 3)));
+ new MetadataUpdate.AddSchema(new Schema()),
+ new MetadataUpdate.AddSchema(new Schema()),
+ new MetadataUpdate.AddSchema(new Schema())));
assertThatThrownBy(() -> requirements.forEach(req ->
req.validate(updated)))
.isInstanceOf(CommitFailedException.class)
@@ -269,9 +269,9 @@ public class TestUpdateRequirements {
UpdateRequirements.forReplaceView(
viewMetadata,
ImmutableList.of(
- new MetadataUpdate.AddSchema(new Schema(), lastColumnId),
- new MetadataUpdate.AddSchema(new Schema(), lastColumnId + 1),
- new MetadataUpdate.AddSchema(new Schema(), lastColumnId + 2)));
+ new MetadataUpdate.AddSchema(new Schema()),
+ new MetadataUpdate.AddSchema(new Schema()),
+ new MetadataUpdate.AddSchema(new Schema())));
requirements.forEach(req -> req.validate(viewMetadata));
assertThat(requirements)
diff --git a/open-api/rest-catalog-open-api.py
b/open-api/rest-catalog-open-api.py
index c3372544ef..d63e9bfe54 100644
--- a/open-api/rest-catalog-open-api.py
+++ b/open-api/rest-catalog-open-api.py
@@ -1152,7 +1152,7 @@ class AddSchemaUpdate(BaseUpdate):
last_column_id: Optional[int] = Field(
None,
alias='last-column-id',
- description='The highest assigned column ID for the table. This is
used to ensure columns are always assigned an unused ID when evolving schemas.
When omitted, it will be computed on the server side.',
+ description="This optional field is **DEPRECATED for REMOVAL** since
it more safe to handle this internally, and shouldn't be exposed to the
clients.\nThe highest assigned column ID for the table. This is used to ensure
columns are always assigned an unused ID when evolving schemas. When omitted,
it will be computed on the server side.",
)
diff --git a/open-api/rest-catalog-open-api.yaml
b/open-api/rest-catalog-open-api.yaml
index 9635af96c1..a154ce97b5 100644
--- a/open-api/rest-catalog-open-api.yaml
+++ b/open-api/rest-catalog-open-api.yaml
@@ -2692,7 +2692,13 @@ components:
$ref: '#/components/schemas/Schema'
last-column-id:
type: integer
- description: The highest assigned column ID for the table. This is
used to ensure columns are always assigned an unused ID when evolving schemas.
When omitted, it will be computed on the server side.
+ deprecated: true
+ description:
+ This optional field is **DEPRECATED for REMOVAL** since it more
safe to handle this internally,
+ and shouldn't be exposed to the clients.
+
+ The highest assigned column ID for the table. This is used to
ensure columns are always
+ assigned an unused ID when evolving schemas. When omitted, it will
be computed on the server side.
SetCurrentSchemaUpdate:
allOf:
diff --git
a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkMetadataColumns.java
b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkMetadataColumns.java
index 230a660c01..93f3929911 100644
---
a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkMetadataColumns.java
+++
b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkMetadataColumns.java
@@ -189,10 +189,7 @@ public class TestSparkMetadataColumns extends TestBase {
TableOperations ops = ((HasTableOperations) table).operations();
TableMetadata base = ops.current();
- ops.commit(
- base,
- base.updateSchema(manyColumnsSchema,
manyColumnsSchema.highestFieldId())
- .updatePartitionSpec(spec));
+ ops.commit(base,
base.updateSchema(manyColumnsSchema).updatePartitionSpec(spec));
Dataset<Row> df =
spark
diff --git
a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReadProjection.java
b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReadProjection.java
index 99a327402d..becf6a064d 100644
---
a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReadProjection.java
+++
b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReadProjection.java
@@ -152,8 +152,7 @@ public class TestSparkReadProjection extends
TestReadProjection {
Schema expectedSchema = reassignIds(readSchema, idMapping);
// Set the schema to the expected schema directly to simulate the table
schema evolving
- TestTables.replaceMetadata(
- desc, TestTables.readMetadata(desc).updateSchema(expectedSchema,
100));
+ TestTables.replaceMetadata(desc,
TestTables.readMetadata(desc).updateSchema(expectedSchema));
Dataset<Row> df =
spark