This is an automated email from the ASF dual-hosted git repository. jsedding pushed a commit to branch jsedding/OAK-11861-segment-parallel-initialization in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit 9e62147c8d244210ea85cbda49fc2bdea3c19d21 Author: Julian Sedding <[email protected]> AuthorDate: Thu Aug 14 16:40:56 2025 +0200 OAK-11861 - segment store initialization should run in parallel --- .../jackrabbit/oak/segment/file/tar/TarFiles.java | 38 ++++++++++++++-------- .../jackrabbit/oak/segment/file/tar/TarReader.java | 7 ++++ 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarFiles.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarFiles.java index 63936be274..3d03c4550d 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarFiles.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarFiles.java @@ -25,17 +25,17 @@ import static java.util.Collections.emptySet; import java.io.Closeable; import java.io.File; import java.io.IOException; +import java.io.UncheckedIOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import java.util.NoSuchElementException; +import java.util.Objects; import java.util.Set; import java.util.UUID; import java.util.concurrent.locks.ReadWriteLock; @@ -44,6 +44,8 @@ import java.util.function.Consumer; import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.jackrabbit.oak.api.IllegalRepositoryStateException; import org.apache.jackrabbit.oak.commons.Buffer; @@ -411,18 +413,28 @@ public class TarFiles implements Closeable { // iterates the indices in ascending order, but prepends - instead of // appending - the corresponding TAR readers to the linked list. This // results in a properly ordered linked list. + Stream.of(indices) + .parallel() + .map(index -> { + try { + if (readOnly) { + return TarReader.openRO(map.get(index), tarRecovery, archiveManager); + } else { + return TarReader.open(map.get(index), tarRecovery, archiveManager); + } + } catch (IOException e) { + log.warn("Unable to open TAR file: {}", map.get(index), e); + throw new UncheckedIOException(e); + } + }) + .filter(Objects::nonNull) + .collect(Collectors.toUnmodifiableList()) + .forEach(reader -> { + segmentCount.inc(getSegmentCount(reader)); + readers = new Node(reader, readers); + readerCount.inc(); + }); - for (Integer index : indices) { - TarReader r; - if (readOnly) { - r = TarReader.openRO(map.get(index), tarRecovery, archiveManager); - } else { - r = TarReader.open(map.get(index), tarRecovery, archiveManager); - } - segmentCount.inc(getSegmentCount(r)); - readers = new Node(r, readers); - readerCount.inc(); - } if (!readOnly) { int writeNumber = 0; if (indices.length > 0) { diff --git a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java index c17a4caa32..b2129fd62e 100644 --- a/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java +++ b/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java @@ -100,6 +100,13 @@ public class TarReader implements Closeable { LinkedHashMap<UUID, byte[]> entries = new LinkedHashMap<>(); for (String file : sorted.values()) { collectFileEntries(file, entries, true, archiveManager); + if (entries.isEmpty()) { + archiveManager.delete(file); + } + } + + if (entries.isEmpty()) { + return null; } // regenerate the first generation based on the recovered data
