amogh-jahagirdar commented on code in PR #11565:
URL: https://github.com/apache/iceberg/pull/11565#discussion_r1857166857
##########
gcp/src/test/java/org/apache/iceberg/gcp/gcs/GCSFileIOTest.java:
##########
@@ -270,4 +283,136 @@ public void
refreshCredentialsEndpointSetButRefreshDisabled() {
assertThat(client.getOptions().getCredentials()).isInstanceOf(OAuth2Credentials.class);
}
+
+ @Test
+ void recoverSoftDeletedObject_WhenSoftDeleteDisabled_ReturnsFalse() {
+ BlobId blobId = BlobId.of(TEST_BUCKET, "object");
+ Bucket bucket = mock(Bucket.class);
+ BucketInfo.SoftDeletePolicy policy =
mock(BucketInfo.SoftDeletePolicy.class);
+
+ when(storage.get(TEST_BUCKET)).thenReturn(bucket);
+ when(bucket.getSoftDeletePolicy()).thenReturn(policy);
+ when(policy.getRetentionDuration()).thenReturn(Duration.ofSeconds(0)); //
soft-delete disabled
+
+ assertThat(io.recoverSoftDeletedObject(blobId)).isFalse();
+ }
+
+ @Test
+ void recoverSoftDeletedObject_WhenSoftDeletedBlobExists_ReturnsTrue() {
Review Comment:
I feel like we can shrink the method names here and remove the underscore
separators as part of that, just so it fits the existing naming pattern in
these tests.
e.g. `recoverSoftDeletedObject`
##########
gcp/src/main/java/org/apache/iceberg/gcp/gcs/GCSFileIO.java:
##########
@@ -242,4 +250,116 @@ private void internalDeleteFiles(Stream<BlobId>
blobIdsToDelete) {
Streams.stream(Iterators.partition(blobIdsToDelete.iterator(),
gcpProperties.deleteBatchSize()))
.forEach(batch -> client().delete(batch));
}
+
+ @Override
+ public boolean recoverFile(String path) {
+ Preconditions.checkArgument(
+ !Strings.isNullOrEmpty(path), "Cannot recover file: path must not be
null or empty");
+
+ try {
+ BlobId blobId = BlobId.fromGsUtilUri(path);
+
+ // first attempt to restore with soft-delete
+ if (recoverSoftDeletedObject(blobId)) {
+ return true;
+ }
+
+ // fallback to restoring by copying the latest version
+ if (recoverLatestVersion(blobId)) {
+ return true;
+ }
+
+ } catch (IllegalArgumentException e) {
+ LOG.warn("Invalid GCS path format: {}", path, e);
+ }
+
+ return false;
+ }
+
+ /**
+ * Attempts to restore a soft-deleted object.
+ *
+ * <p>Requires {@code storage.objects.restore} permission
+ *
+ * <p>See <a
+ *
href="https://cloud.google.com/storage/docs/use-soft-deleted-objects#restore">docs</a>
+ *
+ * @param blobId the blob identifier
+ * @return {@code true} if blob was recovered, {@code false} if not
+ */
+ protected boolean recoverSoftDeletedObject(BlobId blobId) {
+ try {
+ BucketInfo.SoftDeletePolicy policy =
client().get(blobId.getBucket()).getSoftDeletePolicy();
+ if (Duration.ofSeconds(0).equals(policy.getRetentionDuration())) {
Review Comment:
Actually do we need the soft delete policy check to begin with? Is it not
possible to jump directly to attempting the listing of versions with
softDeleted(true) as done below, and if nothing gets returned we know that
recovering via soft delete wasn't effective? Or will the list versions call
fail?
##########
gcp/src/test/java/org/apache/iceberg/gcp/gcs/GCSFileIOTest.java:
##########
@@ -270,4 +283,136 @@ public void
refreshCredentialsEndpointSetButRefreshDisabled() {
assertThat(client.getOptions().getCredentials()).isInstanceOf(OAuth2Credentials.class);
}
+
+ @Test
+ void recoverSoftDeletedObject_WhenSoftDeleteDisabled_ReturnsFalse() {
+ BlobId blobId = BlobId.of(TEST_BUCKET, "object");
+ Bucket bucket = mock(Bucket.class);
+ BucketInfo.SoftDeletePolicy policy =
mock(BucketInfo.SoftDeletePolicy.class);
+
+ when(storage.get(TEST_BUCKET)).thenReturn(bucket);
+ when(bucket.getSoftDeletePolicy()).thenReturn(policy);
+ when(policy.getRetentionDuration()).thenReturn(Duration.ofSeconds(0)); //
soft-delete disabled
+
+ assertThat(io.recoverSoftDeletedObject(blobId)).isFalse();
+ }
+
+ @Test
+ void recoverSoftDeletedObject_WhenSoftDeletedBlobExists_ReturnsTrue() {
+ BlobId blobId = BlobId.of(TEST_BUCKET, "object");
+ Bucket bucket = mock(Bucket.class);
+ BucketInfo.SoftDeletePolicy policy =
mock(BucketInfo.SoftDeletePolicy.class);
+ Page<Blob> page = mock(Page.class);
+ Blob softDeletedBlob = mock(Blob.class);
+
+ when(storage.get(TEST_BUCKET)).thenReturn(bucket);
+ when(bucket.getSoftDeletePolicy()).thenReturn(policy);
+ when(policy.getRetentionDuration()).thenReturn(Duration.ofDays(7)); //
soft-delete enabled
+ doAnswer(invoke -> page).when(storage).list(eq(TEST_BUCKET), any(), any());
+ when(page.streamAll()).thenReturn(Stream.of(softDeletedBlob));
+ when(softDeletedBlob.getName()).thenReturn("object");
+ when(softDeletedBlob.getSoftDeleteTime()).thenReturn(OffsetDateTime.now());
+ when(softDeletedBlob.getBlobId()).thenReturn(blobId);
+ doAnswer(invocation -> softDeletedBlob).when(storage).restore(any(),
any());
+
+ assertThat(io.recoverSoftDeletedObject(blobId)).isTrue();
+ }
+
+ @Test
+ void recoverSoftDeletedObject_WhenNoSoftDeletedBlobExists_ReturnsFalse() {
Review Comment:
`cannotRecoverSoftDeletedObjectWhenNonExistant`
##########
gcp/src/main/java/org/apache/iceberg/gcp/gcs/GCSFileIO.java:
##########
@@ -242,4 +250,116 @@ private void internalDeleteFiles(Stream<BlobId>
blobIdsToDelete) {
Streams.stream(Iterators.partition(blobIdsToDelete.iterator(),
gcpProperties.deleteBatchSize()))
.forEach(batch -> client().delete(batch));
}
+
+ @Override
+ public boolean recoverFile(String path) {
+ Preconditions.checkArgument(
+ !Strings.isNullOrEmpty(path), "Cannot recover file: path must not be
null or empty");
+
+ try {
+ BlobId blobId = BlobId.fromGsUtilUri(path);
+
+ // first attempt to restore with soft-delete
+ if (recoverSoftDeletedObject(blobId)) {
+ return true;
+ }
+
+ // fallback to restoring by copying the latest version
+ if (recoverLatestVersion(blobId)) {
+ return true;
+ }
+
+ } catch (IllegalArgumentException e) {
+ LOG.warn("Invalid GCS path format: {}", path, e);
+ }
+
+ return false;
+ }
+
+ /**
+ * Attempts to restore a soft-deleted object.
+ *
+ * <p>Requires {@code storage.objects.restore} permission
+ *
+ * <p>See <a
+ *
href="https://cloud.google.com/storage/docs/use-soft-deleted-objects#restore">docs</a>
+ *
+ * @param blobId the blob identifier
+ * @return {@code true} if blob was recovered, {@code false} if not
+ */
+ protected boolean recoverSoftDeletedObject(BlobId blobId) {
+ try {
+ BucketInfo.SoftDeletePolicy policy =
client().get(blobId.getBucket()).getSoftDeletePolicy();
+ if (Duration.ofSeconds(0).equals(policy.getRetentionDuration())) {
Review Comment:
I think we'll need a null check for policy == null before so I think it
should be
```
if (policy == null ||
Duration.ofSeconds(0).equals(policy.getRetentionDuration())) {
return null
}
```
The GCP sdk
https://github.com/googleapis/google-api-java-client-services/blob/350652f6480c1dddf1c7dc2a966434deaeaa4964/clients/google-api-services-storage/v1/2.0.0/com/google/api/services/storage/model/Bucket.java#L911
API docs indicate it's that the soft delete policy will be null in case it
doesn't exist. Looks like Beam does the same check (inverted) as well
https://github.com/apache/beam/blob/78630d15445fdf54935d6ba99c2fd9d0930b6af8/sdks/java/extensions/google-cloud-platform-core/src/main/java/org/apache/beam/sdk/extensions/gcp/options/GcpOptions.java#L419
##########
gcp/src/test/java/org/apache/iceberg/gcp/gcs/GCSFileIOTest.java:
##########
@@ -270,4 +283,136 @@ public void
refreshCredentialsEndpointSetButRefreshDisabled() {
assertThat(client.getOptions().getCredentials()).isInstanceOf(OAuth2Credentials.class);
}
+
+ @Test
+ void recoverSoftDeletedObject_WhenSoftDeleteDisabled_ReturnsFalse() {
+ BlobId blobId = BlobId.of(TEST_BUCKET, "object");
+ Bucket bucket = mock(Bucket.class);
+ BucketInfo.SoftDeletePolicy policy =
mock(BucketInfo.SoftDeletePolicy.class);
+
+ when(storage.get(TEST_BUCKET)).thenReturn(bucket);
+ when(bucket.getSoftDeletePolicy()).thenReturn(policy);
+ when(policy.getRetentionDuration()).thenReturn(Duration.ofSeconds(0)); //
soft-delete disabled
+
+ assertThat(io.recoverSoftDeletedObject(blobId)).isFalse();
+ }
+
+ @Test
+ void recoverSoftDeletedObject_WhenSoftDeletedBlobExists_ReturnsTrue() {
+ BlobId blobId = BlobId.of(TEST_BUCKET, "object");
+ Bucket bucket = mock(Bucket.class);
+ BucketInfo.SoftDeletePolicy policy =
mock(BucketInfo.SoftDeletePolicy.class);
+ Page<Blob> page = mock(Page.class);
+ Blob softDeletedBlob = mock(Blob.class);
+
+ when(storage.get(TEST_BUCKET)).thenReturn(bucket);
+ when(bucket.getSoftDeletePolicy()).thenReturn(policy);
+ when(policy.getRetentionDuration()).thenReturn(Duration.ofDays(7)); //
soft-delete enabled
+ doAnswer(invoke -> page).when(storage).list(eq(TEST_BUCKET), any(), any());
+ when(page.streamAll()).thenReturn(Stream.of(softDeletedBlob));
+ when(softDeletedBlob.getName()).thenReturn("object");
+ when(softDeletedBlob.getSoftDeleteTime()).thenReturn(OffsetDateTime.now());
+ when(softDeletedBlob.getBlobId()).thenReturn(blobId);
+ doAnswer(invocation -> softDeletedBlob).when(storage).restore(any(),
any());
+
+ assertThat(io.recoverSoftDeletedObject(blobId)).isTrue();
+ }
+
+ @Test
+ void recoverSoftDeletedObject_WhenNoSoftDeletedBlobExists_ReturnsFalse() {
+ BlobId blobId = BlobId.of(TEST_BUCKET, "object");
+ Bucket bucket = mock(Bucket.class);
+ BucketInfo.SoftDeletePolicy policy =
mock(BucketInfo.SoftDeletePolicy.class);
+ Page<Blob> page = mock(Page.class);
+
+ when(storage.get(TEST_BUCKET)).thenReturn(bucket);
+ when(bucket.getSoftDeletePolicy()).thenReturn(policy);
+ when(policy.getRetentionDuration()).thenReturn(Duration.ofDays(7)); //
soft-delete enabled
+ when(page.streamAll()).thenReturn(Stream.empty());
+
+ assertThat(io.recoverSoftDeletedObject(blobId)).isFalse();
+ }
+
+ @Test
+ void recoverSoftDelete_WhenStorageException_ReturnsFalse() {
Review Comment:
`cannotRecoverSoftDeletedWhenStorageExceptionThrown`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]