smiroslav commented on code in PR #2604:
URL: https://github.com/apache/jackrabbit-oak/pull/2604#discussion_r2487115180
##########
oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureArchiveManager.java:
##########
@@ -336,6 +331,73 @@ private void copyBlob(BlobItem blob, String newParent)
throws IOException {
}
+ private void batchCopyBlobs(List<BlobItem> from, String to) {
+ String newParent = getDirectory(to);
+
+ log.info("Start tp copy {} blobs to {}", from.size(), newParent);
Review Comment:
```suggestion
log.info("Start to copy {} blobs to {}", from.size(), newParent);
```
##########
oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureArchiveManager.java:
##########
@@ -336,6 +331,73 @@ private void copyBlob(BlobItem blob, String newParent)
throws IOException {
}
+ private void batchCopyBlobs(List<BlobItem> from, String to) {
+ String newParent = getDirectory(to);
+
+ log.info("Start tp copy {} blobs to {}", from.size(), newParent);
+
+ int batches = from.size() / COPY_BATCH;
+ int start = 0;
+ int end = COPY_BATCH;
+
+ for (int i = 0; i < batches; i++) {
+ log.info("Start batch {}/{}: {} to {}", i + 1, batches, start,
end);
+ List<BlobItem> blobItemsBatch = from.subList(start, end);
+ copyBlobs(blobItemsBatch, newParent);
+ start = end;
+ end += COPY_BATCH;
+ }
+
+ log.info("Copy {} to {}", start, from.size());
+ copyBlobs(from.subList(start, from.size()), newParent);
Review Comment:
This call is not necessary when blob count is an exact multiple of
COPY_BATCH (e.g., 2000). Code makes an unnecessary call with an empty list.
When this method is called, if we have a remainder, it will not be displayed
in the logs.
For example, if the list is 2500 blobs, we will see in the log "Start batch
2/2 ..." (from the for loop), but the method will be invoked once more,
##########
oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureArchiveManager.java:
##########
@@ -336,6 +331,73 @@ private void copyBlob(BlobItem blob, String newParent)
throws IOException {
}
+ private void batchCopyBlobs(List<BlobItem> from, String to) {
+ String newParent = getDirectory(to);
+
+ log.info("Start tp copy {} blobs to {}", from.size(), newParent);
+
+ int batches = from.size() / COPY_BATCH;
+ int start = 0;
+ int end = COPY_BATCH;
+
+ for (int i = 0; i < batches; i++) {
+ log.info("Start batch {}/{}: {} to {}", i + 1, batches, start,
end);
+ List<BlobItem> blobItemsBatch = from.subList(start, end);
+ copyBlobs(blobItemsBatch, newParent);
+ start = end;
+ end += COPY_BATCH;
+ }
+
+ log.info("Copy {} to {}", start, from.size());
+ copyBlobs(from.subList(start, from.size()), newParent);
+ }
+
+ private void copyBlobs(List<BlobItem> blobs, String newParent) {
+ List<CopyBlob> copyBlobs = new ArrayList<>();
+ for (BlobItem blob : blobs) {
+ String destinationBlob = AzureUtilities.asAzurePrefix(newParent) +
AzureUtilities.getName(blob);
+ try {
+ BlockBlobClient blobClient =
readBlobContainerClient.getBlobClient(blob.getName()).getBlockBlobClient();
+
+ BlockBlobClient destinationBlobClient =
writeBlobContainerClient.getBlobClient(destinationBlob).getBlockBlobClient();
+
+ SyncPoller<BlobCopyInfo, Void> copy =
destinationBlobClient.beginCopy(blobClient.getBlobUrl(), null);
+
+ copyBlobs.add(new CopyBlob(copy, destinationBlob));
+ } catch (Exception e) {
+ log.error("Failed to start copying of blob {} to {}",
blob.getName(), destinationBlob, e);
Review Comment:
The issue here is that if the copy fails, it will not in any way disturb the
caller.
The original code was also not better.
How about throwing IOException? The method ```backup``` has it in its
signature.
##########
oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureArchiveManager.java:
##########
@@ -336,6 +331,73 @@ private void copyBlob(BlobItem blob, String newParent)
throws IOException {
}
+ private void batchCopyBlobs(List<BlobItem> from, String to) {
+ String newParent = getDirectory(to);
+
+ log.info("Start tp copy {} blobs to {}", from.size(), newParent);
+
+ int batches = from.size() / COPY_BATCH;
+ int start = 0;
+ int end = COPY_BATCH;
+
+ for (int i = 0; i < batches; i++) {
+ log.info("Start batch {}/{}: {} to {}", i + 1, batches, start,
end);
+ List<BlobItem> blobItemsBatch = from.subList(start, end);
+ copyBlobs(blobItemsBatch, newParent);
+ start = end;
+ end += COPY_BATCH;
+ }
+
+ log.info("Copy {} to {}", start, from.size());
+ copyBlobs(from.subList(start, from.size()), newParent);
+ }
+
+ private void copyBlobs(List<BlobItem> blobs, String newParent) {
+ List<CopyBlob> copyBlobs = new ArrayList<>();
+ for (BlobItem blob : blobs) {
+ String destinationBlob = AzureUtilities.asAzurePrefix(newParent) +
AzureUtilities.getName(blob);
+ try {
+ BlockBlobClient blobClient =
readBlobContainerClient.getBlobClient(blob.getName()).getBlockBlobClient();
+
+ BlockBlobClient destinationBlobClient =
writeBlobContainerClient.getBlobClient(destinationBlob).getBlockBlobClient();
+
+ SyncPoller<BlobCopyInfo, Void> copy =
destinationBlobClient.beginCopy(blobClient.getBlobUrl(), null);
+
+ copyBlobs.add(new CopyBlob(copy, destinationBlob));
+ } catch (Exception e) {
+ log.error("Failed to start copying of blob {} to {}",
blob.getName(), destinationBlob, e);
+ }
+ }
+
+ processBeginCopy(copyBlobs);
+ }
+
+ private void processBeginCopy(List<CopyBlob> copyBlobs) {
+ for (CopyBlob copy : copyBlobs) {
+ try {
+ CopyStatusType statusType =
readBlobContainerClient.getBlobClient(copy.blobName).getBlockBlobClient().getProperties().getCopyStatus();
+ if (statusType == CopyStatusType.PENDING) {
+ statusType =
copy.poller.waitForCompletion().getValue().getCopyStatus();
+ }
+ if (statusType != CopyStatusType.SUCCESS) {
+ log.warn("Failed to copy blob {}, status {}",
copy.blobName, statusType.toString());
Review Comment:
Like the previous comment, in case of an issue, the caller won't be
notified.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]