thomasmueller commented on code in PR #671: URL: https://github.com/apache/jackrabbit-oak/pull/671#discussion_r950112445
########## oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticResponseHandler.java: ########## @@ -41,14 +43,22 @@ public class ElasticResponseHandler { private final PlanResult planResult; private final Filter filter; + private final Set<String> seenPaths = Sets.newHashSet(); ElasticResponseHandler(@NotNull FulltextIndexPlanner.PlanResult planResult, @NotNull Filter filter) { this.planResult = planResult; this.filter = filter; } public String getPath(Hit<? extends JsonNode> hit) { - return transformPath(hit.source().get(FieldNames.PATH).asText()); + String originalPath = hit.source().get(FieldNames.PATH).asText(); + + // Check here for path transformation to avoid maintaining seenPaths set in case of non transformed queries + if (planResult.isPathTransformed()) { Review Comment: What about moving the isPathTransformed() condition to transformPath(String path)? Like it is done in FulltextIndexPlanner.transformPath. Something like: ``` private String transformPath(String path) { if (!planResult.isPathTransformed()) { return path; } ``` ########## oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticResponseHandler.java: ########## @@ -59,6 +69,13 @@ private String transformPath(String path) { return null; } + // avoid duplicate entries + // (This reduces number of entries passed on to QueryEngine for post processing in case of transformed path field queries) + if (seenPaths.contains(transformedPath)) { Review Comment: "add" returns "true if this set did not already contain the specified element" So a bit shorter (one line saving) and a bit faster would be: ``` if (!seenPaths.add(transformedPath)) { ... } ``` ########## oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java: ########## @@ -112,6 +113,67 @@ public void testFullTextTermWithUnescapedBraces() throws Exception { } } + @Test + public void pathTransformationsWithNoPathRestrictions() throws Exception { + Tree test = setup(); + test.addChild("a").addChild("j:c").setProperty("analyzed_field", "bar"); + test.addChild("b").setProperty("analyzed_field", "bar"); + test.addChild("c").addChild("d").addChild("j:c").setProperty("analyzed_field", "bar"); + + root.commit(); + + assertEventually(() -> { + assertQuery("//*[j:c/@analyzed_field = 'bar']", XPATH, Arrays.asList("/test/a", "/test/c/d")); + assertQuery("//*[d/*/@analyzed_field = 'bar']", XPATH, Collections.singletonList("/test/c")); + }); + } + + @Test + public void pathTransformationsWithPathRestrictions() throws Exception { + Tree test = setup(); + + test.addChild("a").addChild("j:c").setProperty("analyzed_field", "bar"); + test.addChild("b").setProperty("analyzed_field", "bar"); + test.addChild("c").addChild("d").addChild("j:c").setProperty("analyzed_field", "bar"); + test.addChild("e").addChild("temp:c").setProperty("analyzed_field", "bar"); + test.addChild("f").addChild("d").addChild("temp:c").setProperty("analyzed_field", "bar"); + test.addChild("g").addChild("e").addChild("temp:c").setProperty("analyzed_field", "bar"); + + + Tree temp = root.getTree("/").addChild("tmp"); + + temp.addChild("a").addChild("j:c").setProperty("analyzed_field", "bar"); + temp.addChild("b").setProperty("analyzed_field", "bar"); + temp.addChild("c").addChild("d").addChild("j:c").setProperty("analyzed_field", "bar"); + root.commit(); + + assertEventually(() -> { + // ALL CHILDREN + assertQuery("/jcr:root/test//*[j:c/analyzed_field = 'bar']", XPATH, Arrays.asList("/test/a", "/test/c/d")); + assertQuery("/jcr:root/test//*[*/analyzed_field = 'bar']", XPATH, Arrays.asList("/test/a", "/test/c/d", "/test/e", "/test/f/d", "/test/g/e")); + assertQuery("/jcr:root/test//*[d/*/analyzed_field = 'bar']", XPATH, Arrays.asList("/test/c", "/test/f")); + assertQuery("/jcr:root/test//*[analyzed_field = 'bar']", XPATH, Arrays.asList("/test/a/j:c","/test/b","/test/c/d/j:c", + "/test/e/temp:c", "/test/f/d/temp:c","/test/g/e/temp:c")); + + // DIRECT CHILDREN + assertQuery("/jcr:root/test/*[j:c/analyzed_field = 'bar']", XPATH, Arrays.asList("/test/a")); + assertQuery("/jcr:root/test/*[*/analyzed_field = 'bar']", XPATH, Arrays.asList("/test/a", "/test/e")); + assertQuery("/jcr:root/test/*[d/*/analyzed_field = 'bar']", XPATH, Arrays.asList("/test/c", "/test/f")); + assertQuery("/jcr:root/test/*[analyzed_field = 'bar']", XPATH, Arrays.asList("/test/b")); + + // EXACT + assertQuery("/jcr:root/test/a[j:c/analyzed_field = 'bar']", XPATH, Arrays.asList("/test/a")); + assertQuery("/jcr:root/test/a[*/analyzed_field = 'bar']", XPATH, Arrays.asList("/test/a")); + assertQuery("/jcr:root/test/c[d/*/analyzed_field = 'bar']", XPATH, Arrays.asList("/test/c")); + assertQuery("/jcr:root/test/a/j:c[analyzed_field = 'bar']", XPATH, Arrays.asList("/test/a/j:c")); + + // TODO : We don't have assertions for PARENT path filter here. Review Comment: Parent path filters are (sometimes) used for joins, depending on the join order... I think we would need to use SQL-2 queries; not sure if it is better to test it? If you think it's useful, I can have a look... -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@jackrabbit.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org