This is an automated email from the ASF dual-hosted git repository. stefanegli pushed a commit to branch OAK-10347 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit fbf842d6b12853d873634a0a1a1f929695488c82 Author: stefan-egli <stefane...@apache.org> AuthorDate: Thu Jul 13 15:57:24 2023 +0200 OAK-10347 : incorporated review feedback --- .../document/flatfile/SimpleFlatFileUtil.java | 63 ++++++++-------------- 1 file changed, 23 insertions(+), 40 deletions(-) diff --git a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/SimpleFlatFileUtil.java b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/SimpleFlatFileUtil.java index b67cebf1c1..3284b6493d 100644 --- a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/SimpleFlatFileUtil.java +++ b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/SimpleFlatFileUtil.java @@ -18,12 +18,13 @@ */ package org.apache.jackrabbit.oak.index.indexer.document.flatfile; +import static org.apache.jackrabbit.guava.common.collect.ImmutableList.copyOf; +import static org.apache.jackrabbit.oak.commons.PathUtils.elements; + import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; -import java.util.ArrayList; -import java.util.function.Predicate; import org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry; import org.apache.jackrabbit.oak.plugins.document.DocumentNodeState; @@ -43,67 +44,49 @@ public class SimpleFlatFileUtil { private static final String LINE_SEPARATOR = System.getProperty("line.separator"); - private final FileWriter fw; private final BufferedWriter bw; - private final ArrayList<StateInBytesHolder> entryBatch = new ArrayList<>(); - private final Predicate<String> pathPredicate = path -> true; private final NodeStateEntryWriter entryWriter; private long totalLines = 0; - private SimpleFlatFileUtil(File f) throws IOException { + private SimpleFlatFileUtil(BufferedWriter bw) throws IOException { // blobStore is only used for deserialization - so pass null here: entryWriter = new NodeStateEntryWriter(null); - fw = new FileWriter(f); - bw = new BufferedWriter(fw); + this.bw = bw; } public static void createFlatFileFor(NodeState ns, File f) throws IOException { - final SimpleFlatFileUtil h = new SimpleFlatFileUtil(f); log.info("createFlatFileFor : writing to {}", f.getCanonicalPath()); - h.addEntryAndTraverseChildren(ns); - h.close(); - log.info("createFlatFileFor : done. wrote {} lines in total.", h.totalLines); - } - - private void close() throws IOException { - flush(); - bw.close(); - fw.close(); - } - - private void flush() throws IOException { - for (StateInBytesHolder nsh : entryBatch) { - String line = entryWriter.toString(nsh.getPathElements(), nsh.getLine()); - bw.append(line); - bw.append(LINE_SEPARATOR); + try (FileWriter fw = new FileWriter(f); + BufferedWriter bw = new BufferedWriter(fw)) { + SimpleFlatFileUtil h = new SimpleFlatFileUtil(bw); + h.addEntryAndTraverseChildren(ns); + log.info("createFlatFileFor : done. wrote {} lines in total.", h.totalLines); } - log.info("flush : wrote another {} nodes, total so far: {} lines.", - entryBatch.size(), totalLines); - totalLines += entryBatch.size(); - entryBatch.clear(); } private void addEntryAndTraverseChildren(NodeState ns) throws IOException { addEntry(ns); - if (entryBatch.size() > 999) { - flush(); - } for (ChildNodeEntry e : ns.getChildNodeEntries()) { addEntryAndTraverseChildren(e.getNodeState()); } } - private void addEntry(NodeState ns) { + private void addEntry(NodeState ns) throws IOException { DocumentNodeState dns = (DocumentNodeState) ns; NodeStateEntry e = new NodeStateEntry.NodeStateEntryBuilder(dns, dns.getPath().toString()).build(); String path = e.getPath(); - if (!NodeStateUtils.isHiddenPath(path) && pathPredicate.test(path)) { - String jsonText = entryWriter.asJson(e.getNodeState()); - // Here logic differs from NodeStateEntrySorter in sense that - // Holder line consist only of json and not 'path|json' - StateInBytesHolder h = new StateInBytesHolder(path, jsonText); - entryBatch.add(h); + if (NodeStateUtils.isHiddenPath(path)) { + // skip + return; + } + String jsonText = entryWriter.asJson(e.getNodeState()); + String line = entryWriter.toString(copyOf(elements(path)), jsonText); + bw.append(line); + bw.append(LINE_SEPARATOR); + totalLines++; + if (totalLines % 10000 == 0) { + log.info("addEntry : wrote {} lines so far.", totalLines); } } -} +} \ No newline at end of file