This is an automated email from the ASF dual-hosted git repository. stefanegli pushed a commit to branch OAK-10347 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/OAK-10347 by this push: new 5cd9475f4c OAK-10347 : Adding small util class for exporting a tree as flat file 5cd9475f4c is described below commit 5cd9475f4cdcf79e9163490d578fbf9b467765e6 Author: stefan-egli <stefane...@apache.org> AuthorDate: Wed Jul 12 18:17:17 2023 +0200 OAK-10347 : Adding small util class for exporting a tree as flat file --- .../document/flatfile/SimpleFlatFileUtil.java | 109 +++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/SimpleFlatFileUtil.java b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/SimpleFlatFileUtil.java new file mode 100644 index 0000000000..59bbcfc131 --- /dev/null +++ b/oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/SimpleFlatFileUtil.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.index.indexer.document.flatfile; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.function.Predicate; + +import org.apache.jackrabbit.oak.index.indexer.document.NodeStateEntry; +import org.apache.jackrabbit.oak.plugins.document.DocumentNodeState; +import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry; +import org.apache.jackrabbit.oak.spi.state.NodeState; +import org.apache.jackrabbit.oak.spi.state.NodeStateUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This util class can be used to export a tree (eg entire repository) to a flat + * file, without index dependency/involvement. + */ +public class SimpleFlatFileUtil { + + private static final Logger log = LoggerFactory.getLogger(SimpleFlatFileUtil.class); + + private static final String LINE_SEPARATOR = System.getProperty("line.separator"); + + private final FileWriter fw; + private final BufferedWriter bw; + private final ArrayList<StateInBytesHolder> entryBatch = new ArrayList<>(); + private final Predicate<String> pathPredicate = path -> true; + private final NodeStateEntryWriter entryWriter; + private long totalLines = 0; + + private SimpleFlatFileUtil(File f) throws IOException { + // blobStore is only used for deserialization - so pass null here: + entryWriter = new NodeStateEntryWriter(null); + fw = new FileWriter(f); + bw = new BufferedWriter(fw); + } + + public static void createFlatFileFor(NodeState ns, File f) throws IOException { + final SimpleFlatFileUtil h = new SimpleFlatFileUtil(f); + log.info("createFlatFileFor : writing to {}", f.getCanonicalPath()); + h.addEntryAndTraverseChildren(ns); + h.close(); + log.info("createFlatFileFor : done. wrote {} lines in total.", h.totalLines); + } + + private void close() throws IOException { + flush(); + bw.close(); + fw.close(); + } + + private void flush() throws IOException { + for (StateInBytesHolder nsh : entryBatch) { + String line = entryWriter.toString(nsh.getPathElements(), nsh.getLine()); + bw.append(line); + bw.append(LINE_SEPARATOR); + } + log.info("flush : wroter another {} nodes, total so far: {} lines.", + entryBatch.size(), totalLines); + totalLines += entryBatch.size(); + entryBatch.clear(); + } + + private void addEntryAndTraverseChildren(NodeState ns) throws IOException { + addEntry(ns); + if (entryBatch.size() > 999) { + flush(); + } + for (ChildNodeEntry e : ns.getChildNodeEntries()) { + addEntryAndTraverseChildren(e.getNodeState()); + } + } + + private void addEntry(NodeState ns) { + DocumentNodeState dns = (DocumentNodeState) ns; + NodeStateEntry e = new NodeStateEntry.NodeStateEntryBuilder(dns, + dns.getPath().toString()).build(); + String path = e.getPath(); + if (!NodeStateUtils.isHiddenPath(path) && pathPredicate.test(path)) { + String jsonText = entryWriter.asJson(e.getNodeState()); + // Here logic differs from NodeStateEntrySorter in sense that + // Holder line consist only of json and not 'path|json' + StateInBytesHolder h = new StateInBytesHolder(path, jsonText); + entryBatch.add(h); + } + } +} \ No newline at end of file