This is an automated email from the ASF dual-hosted git repository.
thomasm pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new 6e3f3ace0e OAK-11735 Index merge: merge aggregation definitions (#2302)
6e3f3ace0e is described below
commit 6e3f3ace0ea0071c453d7857e46fb319e8cbb96c
Author: Thomas Mueller <[email protected]>
AuthorDate: Thu May 22 10:06:34 2025 +0200
OAK-11735 Index merge: merge aggregation definitions (#2302)
* OAK-11735 Index merge: merge aggregation definitions
* OAK-11735 Index merge: merge aggregation definitions
---
.../oak/index/merge/IndexDefMergerUtils.java | 87 +++++++++++++++++
.../index/merge/IndexDefMergerScenariosTest.java | 1 +
.../oak/index/merge/merge-aggregates.json | 103 +++++++++++++++++++++
3 files changed, 191 insertions(+)
diff --git
a/oak-run/src/main/java/org/apache/jackrabbit/oak/index/merge/IndexDefMergerUtils.java
b/oak-run/src/main/java/org/apache/jackrabbit/oak/index/merge/IndexDefMergerUtils.java
index 39ed699b76..6a107ee01d 100644
---
a/oak-run/src/main/java/org/apache/jackrabbit/oak/index/merge/IndexDefMergerUtils.java
+++
b/oak-run/src/main/java/org/apache/jackrabbit/oak/index/merge/IndexDefMergerUtils.java
@@ -28,6 +28,8 @@ import java.util.List;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.TreeSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.jackrabbit.oak.commons.json.JsonObject;
@@ -35,12 +37,18 @@ import org.apache.jackrabbit.oak.commons.json.JsopBuilder;
import org.apache.jackrabbit.oak.commons.json.JsopReader;
import org.apache.jackrabbit.oak.commons.json.JsopTokenizer;
import org.apache.jackrabbit.oak.plugins.index.IndexName;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import jline.internal.Log;
/**
* Utility that allows to merge index definitions.
*/
public class IndexDefMergerUtils {
+ private final static Logger LOG =
LoggerFactory.getLogger(IndexDefMergerUtils.class);
+
private static HashSet<String> IGNORE_LEVEL_0 = new
HashSet<>(Arrays.asList(
"reindex",
"refresh",
@@ -270,6 +278,9 @@ public class IndexDefMergerUtils {
private static JsonObject mergeChild(String path, String child, int level,
JsonObject ancestor, JsonObject custom, JsonObject product,
ArrayList<String> conflicts) {
+ if (level == 1 && path.indexOf("/aggregates/") >= 0) {
+ return mergeAggregates(path, child, level, ancestor, custom,
product, conflicts);
+ }
JsonObject a = ancestor.getChildren().get(child);
JsonObject c = custom.getChildren().get(child);
JsonObject p = product.getChildren().get(child);
@@ -287,6 +298,82 @@ public class IndexDefMergerUtils {
}
}
+ private static JsonObject mergeAggregates(String path, String child, int
level, JsonObject ancestor, JsonObject custom, JsonObject product,
+ ArrayList<String> conflicts) {
+
+ // merge, with level + 1 so that we don't recurse into this function
again
+ // conflicts are redirected to a new, temporary list
+ ArrayList<String> aggregateConflicts = new ArrayList<>();
+ JsonObject merged = mergeChild(path, child, level + 1, ancestor,
custom, product, aggregateConflicts);
+
+ // if there were conflicts, resolve them
+ if (!aggregateConflicts.isEmpty()) {
+
+ // list of "include" elements to move to the end
+ ArrayList<JsonObject> elementToMove = new ArrayList<>();
+
+ // which is the next id for "include" (eg. 12)
+ long nextIncludeId =
getNextIncludeId(ancestor.getChildren().get(child));
+ nextIncludeId = Math.max(nextIncludeId,
getNextIncludeId(custom.getChildren().get(child)));
+ nextIncludeId = Math.max(nextIncludeId,
getNextIncludeId(product.getChildren().get(child)));
+
+ // loop over conflicts, and find + remove these
+ // the aggregateConflicts will contain entries that look like this:
+ // "Could not merge value;
path=/oak:index/assets-11/aggregates/asset/include11
+ // property=path; ancestor=null; custom=...; product=..."
+ // and we need to extract the path
+ for (String n : aggregateConflicts) {
+ String regex = "path=([^\\s]+)\\sproperty=";
+ Pattern pattern = Pattern.compile(regex);
+ Matcher matcher = pattern.matcher(n);
+ if (matcher.find()) {
+ // the path of the conflicting aggregation node
+ String extractedPath = matcher.group(1);
+ String[] elements = extractedPath.split("/");
+ String conflictElement = elements[elements.length - 1];
+
+ // remove from the custom list
+ JsonObject conflict =
custom.getChildren().get(child).getChildren().remove(conflictElement);
+
+ // remember the element, to put it back later
+ elementToMove.add(conflict);
+ }
+ }
+
+ // merge again, with conflicts resolved now
+ // (if there are other conflicts unrelated to aggregation,
+ // those will not be resolved)
+ merged = mergeChild(path, child, level + 1, ancestor, custom,
product, conflicts);
+
+ // add the aggregation conflict at the end, with new ids
+ // first we need to clone the merged object,
+ // because it might be the same object as the product currently
+ merged = JsonObject.fromJson(merged.toString(), true);
+ for (JsonObject json : elementToMove) {
+ merged.getChildren().put("include" + nextIncludeId, json);
+ nextIncludeId++;
+ }
+ }
+ return merged;
+ }
+
+ private static long getNextIncludeId(JsonObject json) {
+ long max = 0;
+ for(String n : json.getChildren().keySet()) {
+ if (n.startsWith("include")) {
+ n = n.substring("include".length());
+ try {
+ long id = Long.parseLong(n);
+ max = Math.max(max, id);
+ } catch (NumberFormatException e) {
+ LOG.warn("Expected 'include' + number, got " + n);
+ // ignore: it will probably not result in a conflict
+ }
+ }
+ }
+ return max + 1;
+ }
+
private static boolean isSameJson(JsonObject a, JsonObject b) {
if (a == null || b == null) {
return a == null && b == null;
diff --git
a/oak-run/src/test/java/org/apache/jackrabbit/oak/index/merge/IndexDefMergerScenariosTest.java
b/oak-run/src/test/java/org/apache/jackrabbit/oak/index/merge/IndexDefMergerScenariosTest.java
index 961f9b07d4..ac11a2aa00 100644
---
a/oak-run/src/test/java/org/apache/jackrabbit/oak/index/merge/IndexDefMergerScenariosTest.java
+++
b/oak-run/src/test/java/org/apache/jackrabbit/oak/index/merge/IndexDefMergerScenariosTest.java
@@ -48,6 +48,7 @@ public class IndexDefMergerScenariosTest extends
ParameterizedMergingTestBase {
public static Collection<Object[]> data() {
return Arrays.asList(new Object[][] {
testCase("should merge tags fully; and override type",
"merge-override-tags-type.json"),
+ testCase("should merge aggregates", "merge-aggregates.json"),
testCase("should merge custom into new base index",
"basic.json"),
testCase("should use the latest base version for the base in
merges", "merges-base.json"),
testCase(
diff --git
a/oak-run/src/test/resources/org/apache/jackrabbit/oak/index/merge/merge-aggregates.json
b/oak-run/src/test/resources/org/apache/jackrabbit/oak/index/merge/merge-aggregates.json
new file mode 100644
index 0000000000..e0a8780bb3
--- /dev/null
+++
b/oak-run/src/test/resources/org/apache/jackrabbit/oak/index/merge/merge-aggregates.json
@@ -0,0 +1,103 @@
+{
+ "build": {
+ "/oak:index/lucene-2": {
+ "jcr:primaryType": "nam:oak:QueryIndexDefinition",
+ ":version": 2,
+ "tags": ["similarity", "asset", "fragments"],
+ "type": "elasticsearch",
+ "async": "elastic-async",
+ "reindex": false,
+ "reindexCount": 1,
+ "aggregates": {
+ "asset": {
+ "include0": {
+ "path": "a"
+ },
+ "include1": {
+ "path": "b"
+ }
+ }
+ }
+ }
+ },
+
+ "run": {
+ "/oak:index/lucene-1": {
+ "jcr:primaryType": "nam:oak:QueryIndexDefinition",
+ ":version": 2,
+ "tags": ["similarity", "asset"],
+ "type": "disabled",
+ "async": ["async", "nrt"],
+ "reindex": false,
+ "reindexCount": 1,
+ "aggregates": {
+ "asset": {
+ "include0": {
+ "path": "a"
+ }
+ }
+ }
+ },
+ "/oak:index/lucene-1-custom-1": {
+ "jcr:primaryType": "nam:oak:QueryIndexDefinition",
+ ":version": 2,
+ "tags": "custom",
+ "type": "lucene",
+ "async": "async",
+ "reindex": false,
+ "reindexCount": 1,
+ "aggregates": {
+ "asset": {
+ "include0": {
+ "path": "a"
+ },
+ "include1": {
+ "path": "xzy"
+ }
+ }
+ }
+ }
+ },
+
+ "expected": {
+ "/oak:index/lucene-2": {
+ "jcr:primaryType": "nam:oak:QueryIndexDefinition",
+ ":version": 2,
+ "tags": ["similarity", "asset", "fragments"],
+ "type": "elasticsearch",
+ "async": "elastic-async",
+ "reindex": false,
+ "reindexCount": 1,
+ "aggregates": {
+ "asset": {
+ "include0": {
+ "path": "a"
+ },
+ "include1": {
+ "path": "b"
+ }
+ }
+ }
+ },
+ "/oak:index/lucene-2-custom-1": {
+ "jcr:primaryType": "nam:oak:QueryIndexDefinition",
+ "tags": ["asset", "custom", "fragments", "similarity"],
+ "type": "elasticsearch",
+ "async": "elastic-async",
+ "merges": ["/oak:index/lucene-2", "/oak:index/lucene-1-custom-1"],
+ "aggregates": {
+ "asset": {
+ "include0": {
+ "path": "a"
+ },
+ "include1": {
+ "path": "b"
+ },
+ "include2": {
+ "path": "xzy"
+ }
+ }
+ }
+ }
+ }
+}