>From Ritik Raj <[email protected]>:
Ritik Raj has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20570?usp=email )
Change subject: [NO ISSUE][CLOUD] Remove "if-not exists" in multiupload
......................................................................
[NO ISSUE][CLOUD] Remove "if-not exists" in multiupload
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
During multipart uploads performed by AzureBufferedWriter,
a transient 500 error from Azure may trigger a retry. On
retry, the upload can fail with a 409 Conflict error,
indicating that the target blob was already created during
the previous (partially successful) attempt.
To make multipart uploads retry-safe, the if-not-exists
condition should be removed. This ensures that retries can
successfully overwrite or commit the blob when a previous
attempt partially succeeded, preventing 409 conflicts and
improving upload robustness.
Ext-ref: MB-69374
Change-Id: I003570868b27643a1897e45ccb394c18771ad588
---
M
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/azure/blobstorage/AzBlobStorageBufferedWriter.java
M hyracks-fullstack/hyracks/hyracks-cloud/pom.xml
M
hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/util/CloudRetryableRequestUtil.java
M hyracks-fullstack/pom.xml
4 files changed, 24 insertions(+), 29 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/70/20570/1
diff --git
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/azure/blobstorage/AzBlobStorageBufferedWriter.java
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/azure/blobstorage/AzBlobStorageBufferedWriter.java
index d2054c6d..5568433 100644
---
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/azure/blobstorage/AzBlobStorageBufferedWriter.java
+++
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/azure/blobstorage/AzBlobStorageBufferedWriter.java
@@ -28,13 +28,13 @@
import java.util.Base64;
import java.util.List;
import java.util.UUID;
-import java.util.concurrent.TimeUnit;
import org.apache.asterix.cloud.clients.ICloudBufferedWriter;
import org.apache.asterix.cloud.clients.ICloudGuardian;
import org.apache.asterix.cloud.clients.profiler.IRequestProfilerLimiter;
import org.apache.commons.io.IOUtils;
import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.cloud.util.CloudRetryableRequestUtil;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -43,10 +43,8 @@
import com.azure.storage.blob.BlobContainerClient;
import com.azure.storage.blob.models.AccessTier;
import com.azure.storage.blob.models.BlobRequestConditions;
-import com.azure.storage.blob.models.BlobStorageException;
import com.azure.storage.blob.options.BlobParallelUploadOptions;
import com.azure.storage.blob.specialized.BlockBlobClient;
-import com.azure.storage.common.implementation.Constants;
public class AzBlobStorageBufferedWriter implements ICloudBufferedWriter {
private static final String PUT_UPLOAD_ID = "putUploadId";
@@ -101,6 +99,7 @@
private void initBlockBlobUploads(String blockID) {
if (this.uploadID == null) {
this.uploadID = blockID;
+ log("STARTED");
}
}
@@ -137,32 +136,16 @@
} else if (PUT_UPLOAD_ID.equals(uploadID)) {
return;
}
- int currRetryAttempt = 0;
BlockBlobClient blockBlobClient =
blobContainerClient.getBlobClient(path).getBlockBlobClient();
- while (true) {
- try {
- guardian.checkWriteAccess(bucket, path);
- profiler.objectMultipartUpload();
- blockBlobClient.commitBlockListWithResponse(blockIDArrayList,
null, null, accessTier,
- new
BlobRequestConditions().setIfNoneMatch(Constants.HeaderConstants.ETAG_WILDCARD),
null,
- Context.NONE);
- break;
- } catch (BlobStorageException e) {
- currRetryAttempt++;
- if (currRetryAttempt == MAX_RETRIES) {
- throw HyracksDataException.create(e);
- }
- LOGGER.info(() -> "AzBlob storage write retry, encountered: "
+ e.getMessage());
+ CloudRetryableRequestUtil.runWithNoRetryOnInterruption(() ->
completeMultipartUpload(blockBlobClient));
+ log("FINISHED");
+ }
- // Backoff for 1 sec for the first 2 retries, and 2 seconds
from there onward
- try {
- Thread.sleep(TimeUnit.SECONDS.toMillis(currRetryAttempt <
2 ? 1 : 2));
- } catch (InterruptedException ex) {
- Thread.currentThread().interrupt();
- throw HyracksDataException.create(ex);
- }
- }
- }
+ private void completeMultipartUpload(BlockBlobClient blockBlobClient) {
+ guardian.checkWriteAccess(bucket, path);
+ profiler.objectMultipartUpload();
+ blockBlobClient.commitBlockListWithResponse(blockIDArrayList, null,
null, accessTier,
+ new BlobRequestConditions(), null, Context.NONE);
}
@Override
@@ -171,4 +154,10 @@
// https://github.com/Azure/azure-sdk-for-java/issues/31150
LOGGER.warn("Multipart upload for {} was aborted", path);
}
+
+ private void log(String op) {
+ if (LOGGER.isDebugEnabled()) {
+ LOGGER.debug("{} multipart upload for {}", op, path);
+ }
+ }
}
diff --git a/hyracks-fullstack/hyracks/hyracks-cloud/pom.xml
b/hyracks-fullstack/hyracks/hyracks-cloud/pom.xml
index 300c633..55b7bfd 100644
--- a/hyracks-fullstack/hyracks/hyracks-cloud/pom.xml
+++ b/hyracks-fullstack/hyracks/hyracks-cloud/pom.xml
@@ -97,5 +97,10 @@
<artifactId>sdk-core</artifactId>
<version>${awsjavasdk.version}</version>
</dependency>
+ <dependency>
+ <groupId>com.azure</groupId>
+ <artifactId>azure-storage-blob</artifactId>
+ <version>${azureblobjavasdk.version}</version>
+ </dependency>
</dependencies>
</project>
\ No newline at end of file
diff --git
a/hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/util/CloudRetryableRequestUtil.java
b/hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/util/CloudRetryableRequestUtil.java
index 5461ed5..423589e 100644
---
a/hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/util/CloudRetryableRequestUtil.java
+++
b/hyracks-fullstack/hyracks/hyracks-cloud/src/main/java/org/apache/hyracks/cloud/util/CloudRetryableRequestUtil.java
@@ -33,6 +33,7 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
+import com.azure.storage.blob.models.BlobStorageException;
import com.google.cloud.BaseServiceException;
import software.amazon.awssdk.core.exception.SdkException;
@@ -184,7 +185,7 @@
while (true) {
try {
return request.call();
- } catch (IOException | BaseServiceException | SdkException e) {
+ } catch (IOException | BaseServiceException | BlobStorageException
| SdkException e) {
if (!shouldRetry.test(e)) {
throw HyracksDataException.create(e);
}
diff --git a/hyracks-fullstack/pom.xml b/hyracks-fullstack/pom.xml
index 2d871ba..b07f28f 100644
--- a/hyracks-fullstack/pom.xml
+++ b/hyracks-fullstack/pom.xml
@@ -79,7 +79,7 @@
<asm.version>9.3</asm.version>
<awsjavasdk.version>2.29.27</awsjavasdk.version>
<gcsjavasdk.version>2.45.0</gcsjavasdk.version>
-
+ <azureblobjavasdk.version>12.31.1</azureblobjavasdk.version>
<implementation.title>Apache Hyracks and Algebricks -
${project.name}</implementation.title>
<implementation.url>https://asterixdb.apache.org/</implementation.url>
<implementation.version>${project.version}</implementation.version>
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20570?usp=email
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings?usp=email
Gerrit-MessageType: newchange
Gerrit-Project: asterixdb
Gerrit-Branch: phoenix
Gerrit-Change-Id: I003570868b27643a1897e45ccb394c18771ad588
Gerrit-Change-Number: 20570
Gerrit-PatchSet: 1
Gerrit-Owner: Ritik Raj <[email protected]>