>From Ritik Raj <[email protected]>:

Ritik Raj has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20570?usp=email )


Change subject: [NO ISSUE][CLOUD] Remove "if-not exists" in multiupload
......................................................................

[NO ISSUE][CLOUD] Remove "if-not exists" in multiupload

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
During multipart uploads performed by AzureBufferedWriter,
a transient 500 error from Azure may trigger a retry. On
retry, the upload can fail with a 409 Conflict error,
indicating that the target blob was already created during
the previous (partially successful) attempt.

To make multipart uploads retry-safe, the if-not-exists
condition should be removed. This ensures that retries can
successfully overwrite or commit the blob when a previous
attempt partially succeeded, preventing 409 conflicts and
improving upload robustness.

Ext-ref: MB-69374
Change-Id: I003570868b27643a1897e45ccb394c18771ad588
---
M 
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/azure/blobstorage/AzBlobStorageBufferedWriter.java
M hyracks-fullstack/hyracks/hyracks-cloud/pom.xml
M 
hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/util/CloudRetryableRequestUtil.java
M hyracks-fullstack/pom.xml
4 files changed, 24 insertions(+), 29 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/70/20570/1

diff --git 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/azure/blobstorage/AzBlobStorageBufferedWriter.java
 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/azure/blobstorage/AzBlobStorageBufferedWriter.java
index d2054c6d..5568433 100644
--- 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/azure/blobstorage/AzBlobStorageBufferedWriter.java
+++ 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/azure/blobstorage/AzBlobStorageBufferedWriter.java
@@ -28,13 +28,13 @@
 import java.util.Base64;
 import java.util.List;
 import java.util.UUID;
-import java.util.concurrent.TimeUnit;

 import org.apache.asterix.cloud.clients.ICloudBufferedWriter;
 import org.apache.asterix.cloud.clients.ICloudGuardian;
 import org.apache.asterix.cloud.clients.profiler.IRequestProfilerLimiter;
 import org.apache.commons.io.IOUtils;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.cloud.util.CloudRetryableRequestUtil;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;

@@ -43,10 +43,8 @@
 import com.azure.storage.blob.BlobContainerClient;
 import com.azure.storage.blob.models.AccessTier;
 import com.azure.storage.blob.models.BlobRequestConditions;
-import com.azure.storage.blob.models.BlobStorageException;
 import com.azure.storage.blob.options.BlobParallelUploadOptions;
 import com.azure.storage.blob.specialized.BlockBlobClient;
-import com.azure.storage.common.implementation.Constants;

 public class AzBlobStorageBufferedWriter implements ICloudBufferedWriter {
     private static final String PUT_UPLOAD_ID = "putUploadId";
@@ -101,6 +99,7 @@
     private void initBlockBlobUploads(String blockID) {
         if (this.uploadID == null) {
             this.uploadID = blockID;
+            log("STARTED");
         }
     }

@@ -137,32 +136,16 @@
         } else if (PUT_UPLOAD_ID.equals(uploadID)) {
             return;
         }
-        int currRetryAttempt = 0;
         BlockBlobClient blockBlobClient = 
blobContainerClient.getBlobClient(path).getBlockBlobClient();
-        while (true) {
-            try {
-                guardian.checkWriteAccess(bucket, path);
-                profiler.objectMultipartUpload();
-                blockBlobClient.commitBlockListWithResponse(blockIDArrayList, 
null, null, accessTier,
-                        new 
BlobRequestConditions().setIfNoneMatch(Constants.HeaderConstants.ETAG_WILDCARD),
 null,
-                        Context.NONE);
-                break;
-            } catch (BlobStorageException e) {
-                currRetryAttempt++;
-                if (currRetryAttempt == MAX_RETRIES) {
-                    throw HyracksDataException.create(e);
-                }
-                LOGGER.info(() -> "AzBlob storage write retry, encountered: " 
+ e.getMessage());
+        CloudRetryableRequestUtil.runWithNoRetryOnInterruption(() -> 
completeMultipartUpload(blockBlobClient));
+        log("FINISHED");
+    }

-                // Backoff for 1 sec for the first 2 retries, and 2 seconds 
from there onward
-                try {
-                    Thread.sleep(TimeUnit.SECONDS.toMillis(currRetryAttempt < 
2 ? 1 : 2));
-                } catch (InterruptedException ex) {
-                    Thread.currentThread().interrupt();
-                    throw HyracksDataException.create(ex);
-                }
-            }
-        }
+    private void completeMultipartUpload(BlockBlobClient blockBlobClient) {
+        guardian.checkWriteAccess(bucket, path);
+        profiler.objectMultipartUpload();
+        blockBlobClient.commitBlockListWithResponse(blockIDArrayList, null, 
null, accessTier,
+                new BlobRequestConditions(), null, Context.NONE);
     }

     @Override
@@ -171,4 +154,10 @@
         // https://github.com/Azure/azure-sdk-for-java/issues/31150
         LOGGER.warn("Multipart upload for {} was aborted", path);
     }
+
+    private void log(String op) {
+        if (LOGGER.isDebugEnabled()) {
+            LOGGER.debug("{} multipart upload for {}", op, path);
+        }
+    }
 }
diff --git a/hyracks-fullstack/hyracks/hyracks-cloud/pom.xml 
b/hyracks-fullstack/hyracks/hyracks-cloud/pom.xml
index 300c633..55b7bfd 100644
--- a/hyracks-fullstack/hyracks/hyracks-cloud/pom.xml
+++ b/hyracks-fullstack/hyracks/hyracks-cloud/pom.xml
@@ -97,5 +97,10 @@
       <artifactId>sdk-core</artifactId>
       <version>${awsjavasdk.version}</version>
     </dependency>
+    <dependency>
+      <groupId>com.azure</groupId>
+      <artifactId>azure-storage-blob</artifactId>
+      <version>${azureblobjavasdk.version}</version>
+    </dependency>
   </dependencies>
 </project>
\ No newline at end of file
diff --git 
a/hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/util/CloudRetryableRequestUtil.java
 
b/hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/util/CloudRetryableRequestUtil.java
index 5461ed5..423589e 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/util/CloudRetryableRequestUtil.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/util/CloudRetryableRequestUtil.java
@@ -33,6 +33,7 @@
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;

+import com.azure.storage.blob.models.BlobStorageException;
 import com.google.cloud.BaseServiceException;

 import software.amazon.awssdk.core.exception.SdkException;
@@ -184,7 +185,7 @@
         while (true) {
             try {
                 return request.call();
-            } catch (IOException | BaseServiceException | SdkException e) {
+            } catch (IOException | BaseServiceException | BlobStorageException 
| SdkException e) {
                 if (!shouldRetry.test(e)) {
                     throw HyracksDataException.create(e);
                 }
diff --git a/hyracks-fullstack/pom.xml b/hyracks-fullstack/pom.xml
index 2d871ba..b07f28f 100644
--- a/hyracks-fullstack/pom.xml
+++ b/hyracks-fullstack/pom.xml
@@ -79,7 +79,7 @@
     <asm.version>9.3</asm.version>
     <awsjavasdk.version>2.29.27</awsjavasdk.version>
     <gcsjavasdk.version>2.45.0</gcsjavasdk.version>
-
+    <azureblobjavasdk.version>12.31.1</azureblobjavasdk.version>
     <implementation.title>Apache Hyracks and Algebricks - 
${project.name}</implementation.title>
     <implementation.url>https://asterixdb.apache.org/</implementation.url>
     <implementation.version>${project.version}</implementation.version>

-- 
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20570?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings?usp=email

Gerrit-MessageType: newchange
Gerrit-Project: asterixdb
Gerrit-Branch: phoenix
Gerrit-Change-Id: I003570868b27643a1897e45ccb394c18771ad588
Gerrit-Change-Number: 20570
Gerrit-PatchSet: 1
Gerrit-Owner: Ritik Raj <[email protected]>

Reply via email to