nastra commented on code in PR #10755:
URL: https://github.com/apache/iceberg/pull/10755#discussion_r1852353595
##########
api/src/main/java/org/apache/iceberg/ExpireSnapshots.java:
##########
@@ -118,4 +118,16 @@ public interface ExpireSnapshots extends
PendingUpdate<List<Snapshot>> {
* @return this for method chaining
*/
ExpireSnapshots cleanExpiredFiles(boolean clean);
+
+ /**
+ * Allows expiration of unreachable metadata, such as partition specs as
part of the operation.
+ *
+ * @param clean setting this to true will remove metadata(such as partition
spec) that are no
+ * longer reachable by any snapshot
Review Comment:
```suggestion
* @param clean setting this to true will remove metadata (such as
partition spec) that is no
* longer reachable by any snapshot
```
##########
core/src/test/java/org/apache/iceberg/TestUpdateRequirements.java:
##########
@@ -424,6 +424,89 @@ public void setDefaultPartitionSpecFailure() {
.hasMessage("Requirement failed: default partition spec changed:
expected id 0 != 1");
}
+ @Test
+ public void testRemovePartitionSpec() {
+ int defaultSpecId = 3;
+ when(metadata.defaultSpecId()).thenReturn(defaultSpecId);
+ // empty refs
+ when(metadata.refs()).thenReturn(ImmutableMap.of());
+
+ List<UpdateRequirement> requirements =
+ UpdateRequirements.forUpdateTable(
+ metadata,
+ ImmutableList.of(new
MetadataUpdate.RemovePartitionSpecs(Sets.newHashSet(1, 2))));
+
+ assertThat(requirements)
+ .hasSize(2)
+ .hasOnlyElementsOfTypes(
+ UpdateRequirement.AssertTableUUID.class,
UpdateRequirement.AssertDefaultSpecID.class);
+
+ assertTableUUID(requirements);
+
+ assertThat(requirements)
+ .element(1)
+
.asInstanceOf(InstanceOfAssertFactories.type(UpdateRequirement.AssertDefaultSpecID.class))
+ .extracting(UpdateRequirement.AssertDefaultSpecID::specId)
+ .isEqualTo(defaultSpecId);
+ }
+
+ @Test
+ public void testRemovePartitionSpecsWithRefs() {
+ int defaultSpecId = 3;
+ long snapshotId = 42L;
+ when(metadata.defaultSpecId()).thenReturn(defaultSpecId);
+
+ String branch = "branch";
+ SnapshotRef snapshotRef = mock(SnapshotRef.class);
+ when(snapshotRef.snapshotId()).thenReturn(snapshotId);
+ when(snapshotRef.isBranch()).thenReturn(true);
+ when(metadata.refs()).thenReturn(ImmutableMap.of(branch, snapshotRef));
+
+ List<UpdateRequirement> requirements =
+ UpdateRequirements.forUpdateTable(
+ metadata,
+ ImmutableList.of(new
MetadataUpdate.RemovePartitionSpecs(Sets.newHashSet(1, 2))));
+
+ assertThat(requirements)
Review Comment:
this is also missing a call to `requirements.forEach(req ->
req.validate(metadata));` before this line
##########
core/src/test/java/org/apache/iceberg/TestUpdateRequirements.java:
##########
@@ -424,6 +424,89 @@ public void setDefaultPartitionSpecFailure() {
.hasMessage("Requirement failed: default partition spec changed:
expected id 0 != 1");
}
+ @Test
+ public void testRemovePartitionSpec() {
Review Comment:
```suggestion
public void removePartitionSpec() {
```
test methods in this class don't have a `test` prefix, so would be good to
keep it that way in this test class
##########
core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java:
##########
@@ -1284,6 +1284,34 @@ public void testUpdateTableSpecThenRevert() {
assertThat(table.spec()).as("Loaded table should have expected
spec").isEqualTo(TABLE_SPEC);
}
+ @Test
+ public void testRemoveUnusedSpec() {
+ C catalog = catalog();
+
+ if (requiresNamespaceCreate()) {
+ catalog.createNamespace(NS);
+ }
+
+ Table table =
+ catalog
+ .buildTable(TABLE, SCHEMA)
+ .withPartitionSpec(SPEC)
+ .withProperty(TableProperties.GC_ENABLED, "true")
+ .create();
+ PartitionSpec spec = table.spec();
+ // added some file to trigger expire snapshot
Review Comment:
```suggestion
// add a file to trigger snapshot expiration
```
##########
core/src/test/java/org/apache/iceberg/TestUpdateRequirements.java:
##########
@@ -424,6 +424,89 @@ public void setDefaultPartitionSpecFailure() {
.hasMessage("Requirement failed: default partition spec changed:
expected id 0 != 1");
}
+ @Test
+ public void testRemovePartitionSpec() {
+ int defaultSpecId = 3;
+ when(metadata.defaultSpecId()).thenReturn(defaultSpecId);
+ // empty refs
+ when(metadata.refs()).thenReturn(ImmutableMap.of());
+
+ List<UpdateRequirement> requirements =
+ UpdateRequirements.forUpdateTable(
+ metadata,
+ ImmutableList.of(new
MetadataUpdate.RemovePartitionSpecs(Sets.newHashSet(1, 2))));
+
+ assertThat(requirements)
Review Comment:
this is missing a call to `requirements.forEach(req ->
req.validate(metadata));` before this line
##########
core/src/main/java/org/apache/iceberg/TableMetadata.java:
##########
@@ -1108,6 +1108,24 @@ public Builder setDefaultPartitionSpec(int specId) {
return this;
}
+ Builder removeUnusedSpecs(Iterable<Integer> specIds) {
+ Set<Integer> specIdsToRemove = Sets.newHashSet();
+ for (Integer specId : specIds) {
+ Preconditions.checkArgument(
+ specId != defaultSpecId, "Cannot remove default partition spec");
+ PartitionSpec toBeRemoved = specsById.remove(specId);
+ if (toBeRemoved != null) {
+ specIdsToRemove.add(specId);
+ }
+ }
Review Comment:
nit: newline after }
##########
core/src/test/java/org/apache/iceberg/TestUpdateRequirements.java:
##########
@@ -424,6 +424,89 @@ public void setDefaultPartitionSpecFailure() {
.hasMessage("Requirement failed: default partition spec changed:
expected id 0 != 1");
}
+ @Test
+ public void testRemovePartitionSpec() {
+ int defaultSpecId = 3;
+ when(metadata.defaultSpecId()).thenReturn(defaultSpecId);
+ // empty refs
+ when(metadata.refs()).thenReturn(ImmutableMap.of());
Review Comment:
this isn't being needed here
##########
core/src/test/java/org/apache/iceberg/TestUpdateRequirements.java:
##########
@@ -424,6 +424,89 @@ public void setDefaultPartitionSpecFailure() {
.hasMessage("Requirement failed: default partition spec changed:
expected id 0 != 1");
}
+ @Test
+ public void testRemovePartitionSpec() {
+ int defaultSpecId = 3;
+ when(metadata.defaultSpecId()).thenReturn(defaultSpecId);
+ // empty refs
+ when(metadata.refs()).thenReturn(ImmutableMap.of());
+
+ List<UpdateRequirement> requirements =
+ UpdateRequirements.forUpdateTable(
+ metadata,
+ ImmutableList.of(new
MetadataUpdate.RemovePartitionSpecs(Sets.newHashSet(1, 2))));
+
+ assertThat(requirements)
+ .hasSize(2)
+ .hasOnlyElementsOfTypes(
+ UpdateRequirement.AssertTableUUID.class,
UpdateRequirement.AssertDefaultSpecID.class);
+
+ assertTableUUID(requirements);
+
+ assertThat(requirements)
+ .element(1)
+
.asInstanceOf(InstanceOfAssertFactories.type(UpdateRequirement.AssertDefaultSpecID.class))
+ .extracting(UpdateRequirement.AssertDefaultSpecID::specId)
+ .isEqualTo(defaultSpecId);
+ }
+
+ @Test
+ public void testRemovePartitionSpecsWithRefs() {
+ int defaultSpecId = 3;
+ long snapshotId = 42L;
+ when(metadata.defaultSpecId()).thenReturn(defaultSpecId);
+
+ String branch = "branch";
+ SnapshotRef snapshotRef = mock(SnapshotRef.class);
+ when(snapshotRef.snapshotId()).thenReturn(snapshotId);
+ when(snapshotRef.isBranch()).thenReturn(true);
+ when(metadata.refs()).thenReturn(ImmutableMap.of(branch, snapshotRef));
Review Comment:
this also requires mocking this:
`when(metadata.ref(branch)).thenReturn(snapshotRef);` (it wasn't failing
because the call to `validate()` was missing
##########
core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java:
##########
@@ -1284,6 +1284,34 @@ public void testUpdateTableSpecThenRevert() {
assertThat(table.spec()).as("Loaded table should have expected
spec").isEqualTo(TABLE_SPEC);
}
+ @Test
+ public void testRemoveUnusedSpec() {
Review Comment:
I think it would make sense to also add a test with a branch (in order to
make sure that a spec used on some branch isn't accidentally removed)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]