This is an automated email from the ASF dual-hosted git repository. jsedding pushed a commit to branch jsedding/OAK-11857-improve-azure-path-handling in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit b8fa0977c930b8bcc66420dcf5d91893a7e2e583 Author: Julian Sedding <[email protected]> AuthorDate: Wed Aug 13 16:37:57 2025 +0200 OAK-11857 - Improve path handling in oak-segment-azure --- .../oak/segment/azure/AzureArchiveManager.java | 26 +++++++---------- .../segment/azure/AzureSegmentArchiveReader.java | 12 ++++---- .../segment/azure/AzureSegmentArchiveWriter.java | 10 +++---- .../oak/segment/azure/AzureUtilities.java | 34 ++++++++++++++++++---- .../segment/azure/v8/AzureArchiveManagerV8.java | 9 ++---- .../oak/segment/azure/v8/AzurePersistenceV8.java | 3 +- .../oak/segment/azure/v8/AzureUtilitiesV8.java | 6 ++-- 7 files changed, 57 insertions(+), 43 deletions(-) diff --git a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureArchiveManager.java b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureArchiveManager.java index a2d6c4e908..d44cbd94fd 100644 --- a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureArchiveManager.java +++ b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureArchiveManager.java @@ -36,8 +36,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; -import java.nio.file.Path; -import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; @@ -67,12 +65,13 @@ public class AzureArchiveManager implements SegmentArchiveManager { protected final IOMonitor ioMonitor; protected final FileStoreMonitor monitor; + private WriteAccessController writeAccessController; public AzureArchiveManager(BlobContainerClient readBlobContainerClient, BlobContainerClient writeBlobContainerClient, String rootPrefix, IOMonitor ioMonitor, FileStoreMonitor fileStoreMonitor, WriteAccessController writeAccessController) { this.readBlobContainerClient = readBlobContainerClient; this.writeBlobContainerClient = writeBlobContainerClient; - this.rootPrefix = rootPrefix; + this.rootPrefix = AzureUtilities.asAzurePrefix(rootPrefix); this.ioMonitor = ioMonitor; this.monitor = fileStoreMonitor; this.writeAccessController = writeAccessController; @@ -81,13 +80,10 @@ public class AzureArchiveManager implements SegmentArchiveManager { @Override public List<String> listArchives() throws IOException { try { - List<String> archiveNames = readBlobContainerClient.listBlobsByHierarchy(rootPrefix + "/").stream() + List<String> archiveNames = readBlobContainerClient.listBlobsByHierarchy(rootPrefix).stream() .filter(BlobItem::isPrefix) - .filter(blobItem -> blobItem.getName().endsWith(".tar") || blobItem.getName().endsWith(".tar/")) - .map(BlobItem::getName) - .map(Paths::get) - .map(Path::getFileName) - .map(Path::toString) + .map(AzureUtilities::getName) + .filter(blobName -> blobName.endsWith(".tar")) .collect(Collectors.toList()); Iterator<String> it = archiveNames.iterator(); @@ -110,7 +106,7 @@ public class AzureArchiveManager implements SegmentArchiveManager { * @return true if the archive is empty (no 0000.* segment) */ private boolean isArchiveEmpty(String archiveName) throws BlobStorageException { - String fullBlobPrefix = String.format("%s/%s", getDirectory(archiveName), "0000."); + String fullBlobPrefix = getDirectory(archiveName) + "0000."; ListBlobsOptions listBlobsOptions = new ListBlobsOptions(); listBlobsOptions.setPrefix(fullBlobPrefix); return !readBlobContainerClient.listBlobs(listBlobsOptions, null).iterator().hasNext(); @@ -119,7 +115,7 @@ public class AzureArchiveManager implements SegmentArchiveManager { @Override public SegmentArchiveReader open(String archiveName) throws IOException { try { - String closedBlob = String.format("%s/%s", getDirectory(archiveName), "closed"); + String closedBlob = getDirectory(archiveName) + "closed"; if (!readBlobContainerClient.getBlobClient(closedBlob).exists()) { return null; } @@ -242,7 +238,7 @@ public class AzureArchiveManager implements SegmentArchiveManager { } private void delete(String archiveName, Set<UUID> recoveredEntries) throws IOException { - getBlobs(archiveName + "/") + getBlobs(archiveName) .forEach(blobItem -> { if (!recoveredEntries.contains(RemoteUtilities.getSegmentUUID(getName(blobItem)))) { try { @@ -266,7 +262,7 @@ public class AzureArchiveManager implements SegmentArchiveManager { } protected String getDirectory(String archiveName) { - return String.format("%s/%s", rootPrefix, archiveName); + return AzureUtilities.asAzurePrefix(rootPrefix, archiveName); } private List<BlobItem> getBlobs(String archiveName) throws IOException { @@ -287,11 +283,11 @@ public class AzureArchiveManager implements SegmentArchiveManager { } private void copyBlob(BlobItem blob, String newParent) throws IOException { - checkArgument(blob.getProperties().getBlobType() == BLOCK_BLOB, "Only page blobs are supported for the rename"); + checkArgument(blob.getProperties().getBlobType() == BLOCK_BLOB, "Only page blobs are supported for the rename"); BlockBlobClient sourceBlobClient = readBlobContainerClient.getBlobClient(blob.getName()).getBlockBlobClient(); - String destinationBlob = String.format("%s/%s", newParent, AzureUtilities.getName(blob)); + String destinationBlob = AzureUtilities.asAzurePrefix(newParent) + AzureUtilities.getName(blob); BlockBlobClient destinationBlobClient = writeBlobContainerClient.getBlobClient(destinationBlob).getBlockBlobClient(); PollResponse<BlobCopyInfo> response = destinationBlobClient.beginCopy(sourceBlobClient.getBlobUrl(), Duration.ofMillis(100)).waitForCompletion(); diff --git a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveReader.java b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveReader.java index b5566f2f8c..18ca18f0d6 100644 --- a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveReader.java +++ b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveReader.java @@ -41,13 +41,13 @@ public class AzureSegmentArchiveReader extends AbstractRemoteSegmentArchiveReade private final String archiveName; - private final String archivePath; + private final String archivePathPrefix; AzureSegmentArchiveReader(BlobContainerClient blobContainerClient, String rootPrefix, String archiveName, IOMonitor ioMonitor) throws IOException { super(ioMonitor); this.blobContainerClient = blobContainerClient; - this.archiveName = archiveName; - this.archivePath = String.format("%s/%s", rootPrefix, archiveName); + this.archiveName = AzureUtilities.ensureNoTrailingSlash(archiveName); + this.archivePathPrefix = AzureUtilities.asAzurePrefix(rootPrefix, archiveName); this.length = computeArchiveIndexAndLength(); } @@ -65,7 +65,7 @@ public class AzureSegmentArchiveReader extends AbstractRemoteSegmentArchiveReade protected long computeArchiveIndexAndLength() throws IOException { long length = 0; ListBlobsOptions listBlobsOptions = new ListBlobsOptions(); - listBlobsOptions.setPrefix(archivePath + "/"); + listBlobsOptions.setPrefix(archivePathPrefix); for (BlobItem blob : AzureUtilities.getBlobs(blobContainerClient, listBlobsOptions)) { Map<String, String> metadata = blob.getMetadata(); if (AzureBlobMetadata.isSegment(metadata)) { @@ -90,12 +90,12 @@ public class AzureSegmentArchiveReader extends AbstractRemoteSegmentArchiveReade @Override protected File archivePathAsFile() { - return new File(archivePath); + return new File(archivePathPrefix); } private BlockBlobClient getBlobClient(String name) throws IOException { try { - String fullName = String.format("%s/%s", archivePath, name); + String fullName = archivePathPrefix + name; return blobContainerClient.getBlobClient(fullName).getBlockBlobClient(); } catch (BlobStorageException e) { throw new IOException(e); diff --git a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveWriter.java b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveWriter.java index 05614221c0..b558143b6c 100644 --- a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveWriter.java +++ b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureSegmentArchiveWriter.java @@ -42,10 +42,10 @@ public class AzureSegmentArchiveWriter extends AbstractRemoteSegmentArchiveWrite private final BlobContainerClient blobContainerClient; - private final String rootPrefix; - private final String archiveName; + private final String archivePathPrefix; + private final Retrier retrier = Retrier.withParams( Integer.getInteger("azure.segment.archive.writer.retries.max", 16), Integer.getInteger("azure.segment.archive.writer.retries.intervalMs", 5000) @@ -54,8 +54,8 @@ public class AzureSegmentArchiveWriter extends AbstractRemoteSegmentArchiveWrite public AzureSegmentArchiveWriter(BlobContainerClient blobContainerClient, String rootPrefix, String archiveName, IOMonitor ioMonitor, FileStoreMonitor monitor, WriteAccessController writeAccessController) { super(ioMonitor, monitor); this.blobContainerClient = blobContainerClient; - this.rootPrefix = rootPrefix; - this.archiveName = archiveName; + this.archiveName = AzureUtilities.ensureNoTrailingSlash(archiveName); + this.archivePathPrefix = AzureUtilities.asAzurePrefix(rootPrefix, archiveName); this.writeAccessController = writeAccessController; } @@ -128,7 +128,7 @@ public class AzureSegmentArchiveWriter extends AbstractRemoteSegmentArchiveWrite } private BlockBlobClient getBlockBlobClient(String name) throws IOException { - String blobFullName = String.format("%s/%s/%s", rootPrefix, archiveName, name); + String blobFullName = archivePathPrefix + name; try { return blobContainerClient.getBlobClient(blobFullName).getBlockBlobClient(); } catch (BlobStorageException e) { diff --git a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureUtilities.java b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureUtilities.java index c3a7349a0e..25f65a288c 100644 --- a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureUtilities.java +++ b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/AzureUtilities.java @@ -24,6 +24,7 @@ import com.azure.storage.blob.models.ListBlobsOptions; import com.azure.storage.blob.specialized.AppendBlobClient; import com.azure.storage.blob.specialized.BlockBlobClient; import org.apache.jackrabbit.oak.commons.Buffer; +import org.apache.jackrabbit.oak.commons.PathUtils; import org.apache.jackrabbit.oak.segment.spi.RepositoryNotReachableException; import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; @@ -32,9 +33,9 @@ import org.slf4j.LoggerFactory; import java.io.FileNotFoundException; import java.io.IOException; import java.io.OutputStream; -import java.nio.file.Paths; import java.util.List; import java.util.stream.Collectors; +import java.util.stream.Stream; public final class AzureUtilities { @@ -49,16 +50,18 @@ public final class AzureUtilities { private AzureUtilities() { } + public static String getName(String path) { + return PathUtils.getName(ensureNoTrailingSlash(path)); + } + public static String getName(BlobItem blob) { - return Paths.get(blob.getName()).getFileName().toString(); + return getName(blob.getName()); } public static String getName(AppendBlobClient blob) { - return Paths.get(blob.getBlobName()).getFileName().toString(); + return getName(blob.getBlobName()); } - - public static List<BlobItem> getBlobs(BlobContainerClient blobContainerClient, ListBlobsOptions listOptions) { if (listOptions != null) { listOptions.setDetails(new BlobListDetails().setRetrieveMetadata(true)); @@ -89,6 +92,27 @@ public final class AzureUtilities { }); } + static @NotNull String asAzurePrefix(@NotNull String... pathSegments) { + return Stream.of(pathSegments) + .map(AzureUtilities::ensureTrailingSlash) + .map(AzureUtilities::ensureNoLeadingSlash) + .collect(Collectors.joining("")); + } + + private static @NotNull String ensureTrailingSlash(@NotNull String path) { + int len = path.length(); + return len == 0 || path.charAt(len - 1) == '/' ? path : path + '/'; + } + + private static @NotNull String ensureNoLeadingSlash(@NotNull String path) { + return !path.isEmpty() && path.charAt(0) == '/' ? ensureNoLeadingSlash(path.substring(1)) : path; + } + + static @NotNull String ensureNoTrailingSlash(@NotNull String path) { + int lastPos = path.length() - 1; + return lastPos > 0 && path.charAt(lastPos) == '/' ? ensureNoTrailingSlash(path.substring(0, lastPos)) : path; + } + private static class ByteBufferOutputStream extends OutputStream { @NotNull diff --git a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureArchiveManagerV8.java b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureArchiveManagerV8.java index d33c95b06f..ea10290000 100644 --- a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureArchiveManagerV8.java +++ b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureArchiveManagerV8.java @@ -35,8 +35,6 @@ import org.slf4j.LoggerFactory; import java.io.IOException; import java.net.URISyntaxException; -import java.nio.file.Path; -import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; import java.util.EnumSet; @@ -79,11 +77,8 @@ public class AzureArchiveManagerV8 implements SegmentArchiveManager { .spliterator(), false) .filter(i -> i instanceof CloudBlobDirectory) .map(i -> (CloudBlobDirectory) i) - .filter(i -> getName(i).endsWith(".tar")) - .map(CloudBlobDirectory::getPrefix) - .map(Paths::get) - .map(Path::getFileName) - .map(Path::toString) + .map(AzureUtilitiesV8::getName) + .filter(name -> name.endsWith(".tar")) .collect(Collectors.toList()); Iterator<String> it = archiveNames.iterator(); diff --git a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzurePersistenceV8.java b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzurePersistenceV8.java index f8d5de9ed3..0b056f768f 100644 --- a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzurePersistenceV8.java +++ b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzurePersistenceV8.java @@ -18,7 +18,6 @@ package org.apache.jackrabbit.oak.segment.azure.v8; import java.io.IOException; import java.net.URISyntaxException; -import java.nio.file.Paths; import java.util.Date; import java.util.EnumSet; import java.util.concurrent.TimeUnit; @@ -71,7 +70,7 @@ public class AzurePersistenceV8 implements SegmentNodeStorePersistence { for (ListBlobItem i : segmentstoreDirectory.listBlobs(null, false, EnumSet.noneOf(BlobListingDetails.class), null, null)) { if (i instanceof CloudBlobDirectory) { CloudBlobDirectory dir = (CloudBlobDirectory) i; - String name = Paths.get(dir.getPrefix()).getFileName().toString(); + String name = AzureUtilitiesV8.getName(dir); if (name.endsWith(".tar")) { return true; } diff --git a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureUtilitiesV8.java b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureUtilitiesV8.java index 382eef83ac..2a95c41d0d 100644 --- a/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureUtilitiesV8.java +++ b/oak-segment-azure/src/main/java/org/apache/jackrabbit/oak/segment/azure/v8/AzureUtilitiesV8.java @@ -29,6 +29,7 @@ import com.microsoft.azure.storage.blob.CloudBlobDirectory; import com.microsoft.azure.storage.blob.LeaseStatus; import com.microsoft.azure.storage.blob.ListBlobItem; import org.apache.jackrabbit.oak.commons.Buffer; +import org.apache.jackrabbit.oak.segment.azure.AzureUtilities; import org.apache.jackrabbit.oak.segment.spi.RepositoryNotReachableException; import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; @@ -39,7 +40,6 @@ import java.io.IOException; import java.io.OutputStream; import java.net.URI; import java.net.URISyntaxException; -import java.nio.file.Paths; import java.security.InvalidKeyException; import java.util.ArrayList; import java.util.EnumSet; @@ -59,11 +59,11 @@ public final class AzureUtilitiesV8 { } public static String getName(CloudBlob blob) { - return Paths.get(blob.getName()).getFileName().toString(); + return AzureUtilities.getName(blob.getName()); } public static String getName(CloudBlobDirectory directory) { - return Paths.get(directory.getUri().getPath()).getFileName().toString(); + return AzureUtilities.getName(directory.getPrefix()); } public static List<CloudBlob> getBlobs(CloudBlobDirectory directory) throws IOException {
