This is an automated email from the ASF dual-hosted git repository. dsmiley pushed a commit to branch branch_10x in repository https://gitbox.apache.org/repos/asf/solr.git
commit 285e05d7041d7e146002eddeb9837ea57b3a13ae Author: David Smiley <[email protected]> AuthorDate: Sat Apr 18 00:30:29 2026 -0400 SOLR-14687: {!parent} and {!child} qparsers now use NestPathField (#4206) The {!parent} and {!child} query parsers now support a parentPath local param that automatically derives the correct parent filter using the _nest_path_ field, making nested document queries easier to write correctly. childPath is also added as an optional param to limit the child-side of both queries. Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: dsmiley <[email protected]> --- .../unreleased/SOLR-14687-parentPath-param.yml | 9 + .../solr/search/join/BlockJoinChildQParser.java | 64 ++++- .../solr/search/join/BlockJoinParentQParser.java | 271 +++++++++++++++++++-- .../apache/solr/search/join/FiltersQParser.java | 15 +- .../org/apache/solr/search/QueryEqualityTest.java | 25 ++ .../solr/update/TestNestedUpdateProcessor.java | 147 ++++++++++- .../query-guide/pages/block-join-query-parser.adoc | 85 ++++++- .../pages/searching-nested-documents.adoc | 51 ++-- 8 files changed, 588 insertions(+), 79 deletions(-) diff --git a/changelog/unreleased/SOLR-14687-parentPath-param.yml b/changelog/unreleased/SOLR-14687-parentPath-param.yml new file mode 100644 index 00000000000..424deee0c42 --- /dev/null +++ b/changelog/unreleased/SOLR-14687-parentPath-param.yml @@ -0,0 +1,9 @@ +# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc +title: The {!parent} and {!child} query parsers now support a parentPath local param that automatically derives the correct parent filter using the _nest_path_ field, making nested document queries easier to write correctly. childPath is also added. +type: added # added, changed, fixed, deprecated, removed, dependency_update, security, other +authors: + - name: David Smiley + - name: hossman +links: + - name: SOLR-14687 + url: https://issues.apache.org/jira/browse/SOLR-14687 diff --git a/solr/core/src/java/org/apache/solr/search/join/BlockJoinChildQParser.java b/solr/core/src/java/org/apache/solr/search/join/BlockJoinChildQParser.java index bb6c80db07a..0d7383dfc4f 100644 --- a/solr/core/src/java/org/apache/solr/search/join/BlockJoinChildQParser.java +++ b/solr/core/src/java/org/apache/solr/search/join/BlockJoinChildQParser.java @@ -25,6 +25,7 @@ import org.apache.solr.common.params.SolrParams; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.search.SyntaxError; +/** Matches child documents based on parent doc criteria. */ public class BlockJoinChildQParser extends BlockJoinParentQParser { public BlockJoinChildQParser( @@ -33,18 +34,18 @@ public class BlockJoinChildQParser extends BlockJoinParentQParser { } @Override - protected Query createQuery(Query parentListQuery, Query query, String scoreMode) { - return new ToChildBlockJoinQuery(query, getBitSetProducer(parentListQuery)); + protected Query createQuery(Query parentListQuery, Query fromQuery, String scoreMode) { + return new ToChildBlockJoinQuery(fromQuery, getBitSetProducer(parentListQuery)); } @Override - protected String getParentFilterLocalParamName() { + protected String getLegacyParentFilterParamName() { return "of"; } @Override - protected Query noClausesQuery() throws SyntaxError { - final Query parents = parseParentFilter(); + protected Query noClausesQueryLegacy() throws SyntaxError { + final Query parents = parseLegacyParentFilter(); final BooleanQuery notParents = new BooleanQuery.Builder() .add(new MatchAllDocsQuery(), Occur.MUST) @@ -52,4 +53,57 @@ public class BlockJoinChildQParser extends BlockJoinParentQParser { .build(); return new BitSetProducerQuery(getBitSetProducer(notParents)); } + + /** + * Parses the query using the {@code parentPath} local-param for the child parser. + * + * <p>For the {@code child} parser with {@code parentPath="/a/b/c"}: + * + * <pre>NEW: q={!child parentPath="/a/b/c"}p_title:dad + * + * OLD: q={!child of=$ff v=$vv} + * ff=(*:* -{!prefix f="_nest_path_" v="/a/b/c/"}) + * vv=(+p_title:dad +{!field f="_nest_path_" v="/a/b/c"})</pre> + * + * <p>For {@code parentPath="/"}: + * + * <pre>NEW: q={!child parentPath="/"}p_title:dad + * + * OLD: q={!child of=$ff v=$vv} + * ff=(*:* -_nest_path_:*) + * vv=(+p_title:dad -_nest_path_:*)</pre> + * + * <p>The optional {@code childPath} local-param narrows the returned children to docs at exactly + * {@code parentPath/childPath}. + * + * @param parentPath the normalized parent path (starts with "/", no trailing slash except for + * root "/") + * @param childPath optional path constraining the children relative to parentPath + */ + @Override + protected Query parseUsingParentPath(String parentPath, String childPath) throws SyntaxError { + final BooleanQuery parsedParentQuery = parseImpl(); + + if (parsedParentQuery.clauses().isEmpty()) { // i.e. match all parents + // no block-join needed; just filter to certain children + return wrapWithChildPathConstraint(parentPath, childPath, new MatchAllDocsQuery()); + } + + // allParents filter: (*:* -{!prefix f="_nest_path_" v="<parentPath>/"}) + // For root: (*:* -_nest_path_:*) + final Query allParentsFilter = buildAllParentsFilterFromPath(parentPath); + + // constrain the parent query to only match docs at exactly parentPath + // (+<original_parent> +{!field f="_nest_path_" v="<parentPath>"}) + // For root: (+<original_parent> -_nest_path_:*) + Query constrainedParentQuery = wrapWithParentPathConstraint(parentPath, parsedParentQuery); + + Query joinQuery = createQuery(allParentsFilter, constrainedParentQuery, null); + // matches all children of matching parents + if (childPath == null) { + return joinQuery; + } + // need to constrain to certain children + return wrapWithChildPathConstraint(parentPath, childPath, joinQuery); + } } diff --git a/solr/core/src/java/org/apache/solr/search/join/BlockJoinParentQParser.java b/solr/core/src/java/org/apache/solr/search/join/BlockJoinParentQParser.java index 1d73bbd78aa..6e7dbaf4c63 100644 --- a/solr/core/src/java/org/apache/solr/search/join/BlockJoinParentQParser.java +++ b/solr/core/src/java/org/apache/solr/search/join/BlockJoinParentQParser.java @@ -18,15 +18,23 @@ package org.apache.solr.search.join; import java.io.IOException; import java.io.UncheckedIOException; +import java.util.Locale; import java.util.Objects; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.ConstantScoreScorer; import org.apache.lucene.search.ConstantScoreWeight; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.search.ScorerSupplier; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.Weight; import org.apache.lucene.search.join.BitSetProducer; import org.apache.lucene.search.join.QueryBitSetProducer; @@ -34,21 +42,41 @@ import org.apache.lucene.search.join.ScoreMode; import org.apache.lucene.search.join.ToParentBlockJoinQuery; import org.apache.lucene.util.BitSet; import org.apache.lucene.util.BitSetIterator; +import org.apache.solr.common.SolrException; import org.apache.solr.common.params.SolrParams; import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; import org.apache.solr.search.ExtendedQueryBase; -import org.apache.solr.search.QParser; import org.apache.solr.search.SolrCache; import org.apache.solr.search.SyntaxError; import org.apache.solr.util.SolrDefaultScorerSupplier; +/** Matches parent documents based on child doc criteria. */ public class BlockJoinParentQParser extends FiltersQParser { /** implementation detail subject to change */ public static final String CACHE_NAME = "perSegFilter"; - protected String getParentFilterLocalParamName() { - return "which"; - } + /** + * Optional local-param that, when specified, makes this parser natively aware of the {@link + * IndexSchema#NEST_PATH_FIELD_NAME} field to automatically derive the parent filter (the {@code + * which} param). The value must be an absolute path starting with {@code /} using {@code /} as + * separator, e.g. {@code /} for root-level parents or {@code /skus} for parents nested at that + * path. When specified, the {@code which} param must not also be specified. + * + * @see <a href="https://issues.apache.org/jira/browse/SOLR-14687">SOLR-14687</a> + */ + public static final String PARENT_PATH_PARAM = "parentPath"; + + /** + * Optional local-param, only valid together with {@link #PARENT_PATH_PARAM} on the {@code parent} + * parser. When specified, the subordinate (child) query is constrained to docs at exactly the + * path formed by concatenating {@code parentPath + "/" + childPath}, instead of the default + * behavior of matching all descendants. For example, {@code parentPath="/skus" + * childPath="manuals"} constrains children to docs whose {@code _nest_path_} is exactly {@code + * /skus/manuals}. + */ + public static final String CHILD_PATH_PARAM = "childPath"; @Override protected String getFiltersParamName() { @@ -60,35 +88,241 @@ public class BlockJoinParentQParser extends FiltersQParser { super(qstr, localParams, params, req); } - protected Query parseParentFilter() throws SyntaxError { - String filter = localParams.get(getParentFilterLocalParamName()); - QParser parentParser = subQuery(filter, null); - Query parentQ = parentParser.getQuery(); - return parentQ; + @Override + public Query parse() throws SyntaxError { + // Dispatch based on parentPath or none (DIY/legacy) + + String parentPath = localParams.get(PARENT_PATH_PARAM); + if (parentPath != null) { + if (localParams.get(getLegacyParentFilterParamName()) != null) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, + PARENT_PATH_PARAM + + " and " + + getLegacyParentFilterParamName() + + " local params are mutually exclusive"); + } + if (!parentPath.startsWith("/")) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, PARENT_PATH_PARAM + " must start with '/'"); + } + // strip trailing slash (except for root "/") + if (parentPath.length() > 1 && parentPath.endsWith("/")) { + parentPath = parentPath.substring(0, parentPath.length() - 1); + } + + String childPath = localParams.get(CHILD_PATH_PARAM); + if (childPath != null) { + if (childPath.startsWith("/")) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, CHILD_PATH_PARAM + " must not start with '/'"); + } + if (childPath.isEmpty()) { + childPath = null; // treat empty as not specified + } + } + return parseUsingParentPath(parentPath, childPath); + } + + // NO parentPath; use legacy/advanced/DIY code path: + + if (localParams.get(CHILD_PATH_PARAM) != null) { + throw new SolrException( + SolrException.ErrorCode.BAD_REQUEST, CHILD_PATH_PARAM + " requires " + PARENT_PATH_PARAM); + } + if (localParams.get(getLegacyParentFilterParamName()) == null) { + throw new SyntaxError( + String.format( + Locale.ROOT, + "'%s' or '%s' is required for the '%s' query parser", + getLegacyParentFilterParamName(), + PARENT_PATH_PARAM, + localParams.get("type", "parent"))); + } + return parseWithLegacyParam(); + } + + /** + * Parses the query using the {@code parentPath} localparam to automatically derive the parent + * filter and child query constraints from {@link IndexSchema#NEST_PATH_FIELD_NAME}. + * + * <p>For the {@code parent} parser with {@code parentPath="/a/b/c"}: + * + * <pre>NEW: q={!parent parentPath="/a/b/c"}c_title:son + * + * OLD: q=(+{!field f="_nest_path_" v="/a/b/c"} +{!parent which=$ff v=$vv}) + * ff=(*:* -{!prefix f="_nest_path_" v="/a/b/c/"}) + * vv=(+c_title:son +{!prefix f="_nest_path_" v="/a/b/c/"})</pre> + * + * <p>For {@code parentPath="/"}: + * + * <pre>NEW: q={!parent parentPath="/"}c_title:son + * + * OLD: q=(+(*:* -_nest_path_:*) +{!parent which=$ff v=$vv}) + * ff=(*:* -_nest_path_:*) + * vv=(+c_title:son +_nest_path_:*)</pre> + * + * @param parentPath the normalized parent path (starts with "/", no trailing slash except for + * root "/") + * @param childPath optional path constraining the children relative to parentPath + */ + protected Query parseUsingParentPath(String parentPath, String childPath) throws SyntaxError { + final BooleanQuery parsedChildQuery = parseImpl(); + + if (parsedChildQuery.clauses().isEmpty()) { // i.e. all children + // no block-join needed; just return all "parent" docs at this level + return wrapWithParentPathConstraint(parentPath, new MatchAllDocsQuery()); + } + + // allParents filter: (*:* -{!prefix f="_nest_path_" v="<parentPath>/"}) + // For root: (*:* -_nest_path_:*) + final Query allParentsFilter = buildAllParentsFilterFromPath(parentPath); + + // constrain child query: (+<original_child> +{!prefix f="_nest_path_" v="<parentPath>/"}) + // For root: (+<original_child> +_nest_path_:*) + // If childPath specified: (+<original_child> +{!term f="_nest_path_" + // v="<parentPath>/<childPath>"}) + final Query constrainedChildQuery = + wrapWithChildPathConstraint(parentPath, childPath, parsedChildQuery); + + final String scoreMode = localParams.get("score", ScoreMode.None.name()); + final Query parentJoinQuery = createQuery(allParentsFilter, constrainedChildQuery, scoreMode); + + // wrap result: (+<parent_join> +{!field f="_nest_path_" v="<parentPath>"}) + // For root: (+<parent_join> -_nest_path_:*) + return wrapWithParentPathConstraint(parentPath, parentJoinQuery); } - @Override - protected Query wrapSubordinateClause(Query subordinate) throws SyntaxError { + /** + * Builds the "all parents" filter query from the given {@code parentPath}. This query matches all + * documents that are NOT strictly below (nested inside) the given path. This includes: + * + * <ul> + * <li>documents without any {@code _nest_path_} (root-level, non-nested docs) + * <li>documents at the same level as {@code parentPath} (i.e. with exactly that path) + * <li>documents at levels above {@code parentPath} + * <li>documents at completely orthogonal paths (e.g. {@code /x/y/z} when parentPath is {@code + * /a/b/c}) + * </ul> + * + * <p>Equivalent to: {@code (*:* -{!prefix f="_nest_path_" v="<parentPath>/"})} For root ({@code + * /}): {@code (*:* -_nest_path_:*)} + */ + protected Query buildAllParentsFilterFromPath(String parentPath) { + final Query excludeQuery; + if (parentPath.equals("/")) { + excludeQuery = newNestPathExistsQuery(); + } else { + excludeQuery = new PrefixQuery(new Term(IndexSchema.NEST_PATH_FIELD_NAME, parentPath + "/")); + } + return new BooleanQuery.Builder() + .add(new MatchAllDocsQuery(), Occur.MUST) + .add(excludeQuery, Occur.MUST_NOT) + .build(); + } + + /** + * Wraps the given query with a constraint ensuring only docs at exactly {@code parentPath} are + * matched. + */ + protected Query wrapWithParentPathConstraint(String parentPath, Query query) { + final BooleanQuery.Builder builder = new BooleanQuery.Builder().add(query, Occur.MUST); + if (parentPath.equals("/")) { + builder.add(newNestPathExistsQuery(), Occur.MUST_NOT); + } else { + final Query constraint = + new TermQuery(new Term(IndexSchema.NEST_PATH_FIELD_NAME, parentPath)); + if (query instanceof MatchAllDocsQuery) { + return isFilter() ? constraint : new ConstantScoreQuery(constraint); + } + builder.add(constraint, Occur.FILTER); + } + return builder.build(); + } + + protected Query newNestPathExistsQuery() { + final SchemaField nestPathField = req.getSchema().getField(IndexSchema.NEST_PATH_FIELD_NAME); + return nestPathField.getType().getExistenceQuery(this, nestPathField); + } + + /** + * Wraps the sub-query with a constraint ensuring only docs that are descendants of {@code + * parentPath} are matched. If {@code childPath} is non-null, further narrows to docs at exactly + * {@code parentPath/childPath}. + */ + protected Query wrapWithChildPathConstraint(String parentPath, String childPath, Query subQuery) { + final Query nestPathConstraint; + if (childPath != null) { + String effectiveChildPath = + parentPath.equals("/") ? "/" + childPath : parentPath + "/" + childPath; + nestPathConstraint = + new TermQuery(new Term(IndexSchema.NEST_PATH_FIELD_NAME, effectiveChildPath)); + } else if (parentPath.equals("/")) { + nestPathConstraint = newNestPathExistsQuery(); + } else { + nestPathConstraint = + new PrefixQuery(new Term(IndexSchema.NEST_PATH_FIELD_NAME, parentPath + "/")); + } + if (subQuery instanceof MatchAllDocsQuery) { + return new ConstantScoreQuery(nestPathConstraint); + } + return new BooleanQuery.Builder() + .add(subQuery, Occur.MUST) + .add(nestPathConstraint, Occur.FILTER) + .build(); + } + + // + // Advanced/DIY parsing follows + // + + protected Query parseWithLegacyParam() throws SyntaxError { + BooleanQuery subordinateQuery = parseImpl(); + + if (subordinateQuery.clauses().isEmpty()) { // i.e. all children + return noClausesQueryLegacy(); + } + String scoreMode = localParams.get("score", ScoreMode.None.name()); - Query parentQ = parseParentFilter(); - return createQuery(parentQ, subordinate, scoreMode); + Query parentQ = parseLegacyParentFilter(); + return createQuery(parentQ, subordinateQuery, scoreMode); } - @Override - protected Query noClausesQuery() throws SyntaxError { - return new BitSetProducerQuery(getBitSetProducer(parseParentFilter())); + protected Query parseLegacyParentFilter() throws SyntaxError { + return subQuery(localParams.get(getLegacyParentFilterParamName()), null).getQuery(); + } + + protected Query noClausesQueryLegacy() throws SyntaxError { + return new BitSetProducerQuery(getBitSetProducer(parseLegacyParentFilter())); + } + + protected String getLegacyParentFilterParamName() { + return "which"; } - protected Query createQuery(final Query parentList, Query query, String scoreMode) + /** + * Create the block-join query, the core Query of the QParser. + * + * @param parentList the "parent" query. The result will internally be cached. + * @param fromQuery source/from query. For {!parent}, this is a child, otherwise it's a parent + * @param scoreMode see {@link ScoreMode} + * @return non-null + * @throws SyntaxError Only if scoreMode doesn't parse + */ + protected Query createQuery(final Query parentList, Query fromQuery, String scoreMode) throws SyntaxError { return new AllParentsAware( - query, getBitSetProducer(parentList), ScoreModeParser.parse(scoreMode), parentList); + fromQuery, getBitSetProducer(parentList), ScoreModeParser.parse(scoreMode), parentList); } BitSetProducer getBitSetProducer(Query query) { return getCachedBitSetProducer(req, query); } + /** + * Returns a Lucene {@link BitSetProducer}, typically cached by query. Note that BSP itself + * internally caches a per-segment {@link BitSet}. + */ public static BitSetProducer getCachedBitSetProducer( final SolrQueryRequest request, Query query) { @SuppressWarnings("unchecked") @@ -105,6 +339,7 @@ public class BlockJoinParentQParser extends FiltersQParser { } } + /** A {@link ToParentBlockJoinQuery} exposing the query underlying the {@link BitSetProducer}. */ static final class AllParentsAware extends ToParentBlockJoinQuery { private final Query parentQuery; diff --git a/solr/core/src/java/org/apache/solr/search/join/FiltersQParser.java b/solr/core/src/java/org/apache/solr/search/join/FiltersQParser.java index b031f136566..451de63337a 100644 --- a/solr/core/src/java/org/apache/solr/search/join/FiltersQParser.java +++ b/solr/core/src/java/org/apache/solr/search/join/FiltersQParser.java @@ -48,9 +48,10 @@ public class FiltersQParser extends QParser { @Override public Query parse() throws SyntaxError { BooleanQuery query = parseImpl(); - return !query.clauses().isEmpty() ? wrapSubordinateClause(query) : noClausesQuery(); + return !query.clauses().isEmpty() ? query : new MatchAllDocsQuery(); } + /** Parses the subQuery, applying filters and exclusions. Caller must check if empty. */ protected BooleanQuery parseImpl() throws SyntaxError { Map<QParser, Occur> clauses = clauses(); @@ -72,14 +73,6 @@ public class FiltersQParser extends QParser { return query; } - protected Query wrapSubordinateClause(Query subordinate) throws SyntaxError { - return subordinate; - } - - protected Query noClausesQuery() throws SyntaxError { - return new MatchAllDocsQuery(); - } - protected void exclude(Collection<QParser> clauses) { Set<String> tagsToExclude = new HashSet<>(); String excludeTags = localParams.get("excludeTags"); @@ -126,14 +119,14 @@ public class FiltersQParser extends QParser { private Collection<QParser> excludeSet(Map<?, ?> tagMap, Set<String> tagsToExclude) { - IdentityHashMap<QParser, Boolean> excludeSet = new IdentityHashMap<>(); + IdentityHashMap<QParser, Object> excludeSet = new IdentityHashMap<>(); for (String excludeTag : tagsToExclude) { Object olst = tagMap.get(excludeTag); // tagMap has entries of List<String,List<QParser>>, but subject to change in the future if (!(olst instanceof Collection)) continue; for (Object o : (Collection<?>) olst) { if (!(o instanceof QParser qp)) continue; - excludeSet.put(qp, Boolean.TRUE); + excludeSet.put(qp, null); // dummy value } } return excludeSet.keySet(); diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java index 817458569bf..b687d826077 100644 --- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java +++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java @@ -733,6 +733,31 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { + parent_path.replace("/", "\\/") + ")"); } + + // Test parentPath with no subordinate query: {!parent parentPath=/ v=''} returns all root-level + // docs. This is a useful trick to query docs at a specific nest path without using _nest_path_ + // directly. + try (SolrQueryRequest req = req()) { + Query q = + assertQueryEqualsAndReturn( + "parent", + req, + "{!parent parentPath=/ v=''}", + "{!parent parentPath=/}"); // omitting v is equivalent to empty v + assertEquals("+*:* -FieldExistsQuery [field=_nest_path_]", q.toString()); + + q = + assertQueryEqualsAndReturn( + "parent", + req, + "{!parent parentPath=/aa/bb v=}", + "{!parent parentPath=/aa/bb}"); // omitting v is equivalent to empty v + assertEquals("ConstantScore(_nest_path_:/aa/bb)", q.toString()); + } + + // Test that {!parent} and {!child} without required 'which'/'of' or 'parentPath' throw error + expectThrows(SyntaxError.class, () -> QParser.getParser("{!parent}", req()).getQuery()); + expectThrows(SyntaxError.class, () -> QParser.getParser("{!child}", req()).getQuery()); } public void testFilters() throws Exception { diff --git a/solr/core/src/test/org/apache/solr/update/TestNestedUpdateProcessor.java b/solr/core/src/test/org/apache/solr/update/TestNestedUpdateProcessor.java index 98c847ec12e..fca48d5765a 100644 --- a/solr/core/src/test/org/apache/solr/update/TestNestedUpdateProcessor.java +++ b/solr/core/src/test/org/apache/solr/update/TestNestedUpdateProcessor.java @@ -277,6 +277,44 @@ public class TestNestedUpdateProcessor extends SolrTestCaseJ4 { assertU(commit()); } + /** Test the {@code filters} local-param works with {@code parentPath}. */ + @Test + public void testFiltersWithParentPath() { + // integer IDs required: schema copies id→id_i (int) so auto-generated "p1/items#0" would fail + SolrInputDocument p1 = sdoc("id", "1", "name_s", "p1"); + p1.addField("items", sdoc("id", "11", "status_s", "active")); + p1.addField("items", sdoc("id", "12", "status_s", "inactive")); + assertU(adoc(p1)); + + SolrInputDocument p2 = sdoc("id", "2", "name_s", "p2"); + p2.addField("items", sdoc("id", "21", "status_s", "inactive")); + assertU(adoc(p2)); + + SolrInputDocument p3 = sdoc("id", "3", "name_s", "p3"); + p3.addField("items", sdoc("id", "31", "status_s", "active")); + assertU(adoc(p3)); + + assertU(commit()); + + // {!parent parentPath=/}: parents with children matching v AND filters + assertQ( + req( + "q", "{!parent parentPath=/ filters=$cf}*:*", + "cf", "status_s:active", + "fl", "id", + "sort", "id asc"), + "//*[@numFound='2']", + "//doc[1]/str[@name='id']='1'", + "//doc[2]/str[@name='id']='3'"); + + // {!child parentPath=/}: children of parents matching v AND filters + assertQ( + req( + "q", "{!child parentPath=/ filters=$pf}*:*", + "pf", "name_s:p1"), + "//*[@numFound='2']"); + } + /** * Randomized test to look for flaws in the documented approach for building "safe" values of the * <code>of</code> / <code>which</code> params in the <code>child</code> / <code>parent</code> @@ -438,6 +476,97 @@ public class TestNestedUpdateProcessor extends SolrTestCaseJ4 { "//result/@numFound=1", "//doc/str[@name='id'][.='" + ancestorId + "']"); + // additionally test childPath: find the immediate parent of descendentId and use + // childPath to constrain to that exact child path level + final String directParentPath = + doc_path.contains("/") + ? (doc_path.lastIndexOf("/") == 0 + ? "/" + : doc_path.substring(0, doc_path.lastIndexOf("/"))) + : "/"; + final String childSegment = doc_path.substring(doc_path.lastIndexOf("/") + 1); + // find the ancestor ID whose path is directParentPath + for (Object candAncestorId : allAncestorIds) { + final String candPath = + allDocs.get(candAncestorId.toString()).getFieldValue("test_path_s").toString(); + if (candPath.equals(directParentPath)) { + // childPath constrains the child query to exactly doc_path, so we should find + // the direct parent + assertQ( + req( + params( + "q", + "{!parent parentPath='" + + directParentPath + + "' childPath='" + + childSegment + + "'}id:" + + descendentId), + "_trace_childPath_tested", + directParentPath + "/" + childSegment, + "fl", + "id", + "indent", + "true"), + "//result/@numFound=1", + "//doc/str[@name='id'][.='" + candAncestorId + "']"); + // a childPath that doesn't match descendentId's path should return 0 results + assertQ( + req( + params( + "q", + "{!parent parentPath='" + + directParentPath + + "' childPath='xxx_yyy'}id:" + + descendentId), + "_trace_childPath_tested", + directParentPath + "/xxx_yyy", + "fl", + "id", + "indent", + "true"), + "//result/@numFound=0"); + // childPath for {!child}: constrain returned children to exactly doc_path + assertQ( + req( + params( + "q", + "{!child parentPath='" + + directParentPath + + "' childPath='" + + childSegment + + "'}id:" + + candAncestorId), + "_trace_child_childPath_tested", + directParentPath + "/" + childSegment, + "rows", + "9999", + "fl", + "id", + "indent", + "true"), + "count(//doc)>=1", + "//doc/str[@name='id'][.='" + descendentId + "']"); + // a childPath that doesn't match should return 0 results + assertQ( + req( + params( + "q", + "{!child parentPath='" + + directParentPath + + "' childPath='xxx_yyy'}id:" + + candAncestorId), + "_trace_child_childPath_tested", + directParentPath + "/xxx_yyy", + "fl", + "id", + "indent", + "true"), + "//result/@numFound=0"); + break; + } + } + // meanwhile, a 'child' query wrapped around a query for the ancestorId, using the // ancestor_path, should match all of its descendents (for simplicity we'll check just // the numFound and the 'descendentId' we started with) @@ -544,7 +673,14 @@ public class TestNestedUpdateProcessor extends SolrTestCaseJ4 { */ private SolrParams parentQueryMaker(String parent_path, String inner_child_query) { assertValidPathSyntax(parent_path); - final boolean verbose = random().nextBoolean(); + final int variant = random().nextInt(3); + + if (variant == 2) { + // new parentPath sugar + return params("q", "{!parent parentPath='" + parent_path + "'}" + inner_child_query); + } // else old-style with explicit which/of... + + final boolean verbose = variant == 1; if (parent_path.equals("/")) { if (verbose) { @@ -632,7 +768,14 @@ public class TestNestedUpdateProcessor extends SolrTestCaseJ4 { */ private SolrParams childQueryMaker(String parent_path, String inner_parent_query) { assertValidPathSyntax(parent_path); - final boolean verbose = random().nextBoolean(); + final int variant = random().nextInt(3); + + if (variant == 2) { + // new parentPath sugar + return params("q", "{!child parentPath='" + parent_path + "'}" + inner_parent_query); + } // else old-style with explicit which/of... + + final boolean verbose = variant == 1; if (parent_path.equals("/")) { if (verbose) { diff --git a/solr/solr-ref-guide/modules/query-guide/pages/block-join-query-parser.adoc b/solr/solr-ref-guide/modules/query-guide/pages/block-join-query-parser.adoc index 2eed9721639..205fce03025 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/block-join-query-parser.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/block-join-query-parser.adoc @@ -51,10 +51,40 @@ The example usage of the query parsers below assumes the following documents hav This parser wraps a query that matches some parent documents and returns the children of those documents. -The syntax for this parser is: `q={!child of=<blockMask>}<someParents>`. +=== Using `parentPath` -* The inner subordinate query string (`someParents`) must be a query that will match some parent documents -* The `of` parameter must be a query string to use as a <<block-mask,Block Mask>> -- typically a query that matches the set of all possible parent documents +If your schema supports xref:indexing-guide:indexing-nested-documents.adoc[nested documents], you _should_ specify `parentPath`. +Specify the path at which the parent documents live: + +[source,text] +q={!child parentPath=<path>}<someParents> + +Key points about `parentPath`: + +* Must start with `/`. +* Use `parentPath="/"` to treat root-level documents as the parents. +* A trailing `/` is stripped automatically (e.g., `"/skus/"` is treated as `"/skus"`). +* `parentPath` and `of` are mutually exclusive; specifying both returns a `400 Bad Request` error. +* Optionally, use `childPath` to narrow the returned children to docs at exactly `parentPath/childPath`. Without `childPath`, all descendants of parents at `parentPath` are returned. + +For example, using the deeply nested documents described in xref:searching-nested-documents.adoc[], the following query returns all children of root-level product documents that match a description query: + +[source,text] +q={!child parentPath="/"}description_t:staplers + +To return only `skus` children of root documents matching a description query (excluding other child types): + +[source,text] +q={!child parentPath="/" childPath="skus"}description_t:staplers + +=== Using the `of` Parameter + +This approach is used with anonymous child documents (schemas without `_nest_path_`). +It is more verbose and has some <<block-mask,gotchas>>. +The syntax is: `q={!child of=<blockMask>}<someParents>`. + +* The inner subordinate query string (`someParents`) must be a query that will match some parent documents. +* The `of` parameter must be a query string to use as a <<block-mask,Block Mask>> -- typically a query that matches the set of all possible parent documents. The resulting query will match all documents which do _not_ match the `<blockMask>` query and are children (or descendents) of the documents matched by `<someParents>`. @@ -111,10 +141,53 @@ More precisely, `q={!child of=<blockMask>}` is equivalent to `q=\*:* -<blockMask This parser takes a query that matches child documents and returns their parents. -The syntax for this parser is similar to the `child` parser: `q={!parent which=<blockMask>}<someChildren>`. +=== Using `parentPath` + +If your schema supports xref:indexing-guide:indexing-nested-documents.adoc[nested documents], you _should_ specify `parentPath`. +Specify the path at which the parent documents live: + +[source,text] +q={!parent parentPath=<path>}<someChildren> + +Key points about `parentPath`: + +* Must start with `/`. +* Use `parentPath="/"` to treat root-level documents as the parents. +* A trailing `/` is stripped automatically (e.g., `"/skus/"` is treated as `"/skus"`). +* `parentPath` and `which` are mutually exclusive; specifying both returns a `400 Bad Request` error. +* Optionally, use `childPath` to constrain the child query to docs at exactly `parentPath/childPath`. Without `childPath`, all descendants of `parentPath` are eligible as children. + +For example, using the deeply nested documents described in xref:searching-nested-documents.adoc[], the following query returns the root-level product documents that are ancestors of manuals with exactly one page: + +[source,text] +q={!parent parentPath="/"}pages_i:1 + +To instead return the `skus` that are ancestors of one-page _manuals_ (only manuals, not other sku children): + +[source,text] +q={!parent parentPath="/skus" childPath="manuals"}pages_i:1 + +==== Filtering to a Specific Nest Path + +When the subordinate query is omitted (empty), `{!parent parentPath=<path>}` is a convenient way to filter documents to exactly a specific nest path without needing to reference `_nest_path_` directly: + +[source,text] +---- +# Return all root-level documents (no _nest_path_): +q={!parent parentPath=/} + +# Return all documents at exactly /skus (not deeper descendants like /skus/manuals): +q={!parent parentPath=/skus} +---- + +=== Using the `which` Parameter + +This approach is used with anonymous child documents (schemas without `_nest_path_`). +It is more verbose and has some <<block-mask,gotchas>>. +The syntax is: `q={!parent which=<blockMask>}<someChildren>`. -* The inner subordinate query string (`someChildren`) must be a query that will match some child documents -* The `which` parameter must be a query string to use as a <<block-mask,Block Mask>> -- typically a query that matches the set of all possible parent documents +* The inner subordinate query string (`someChildren`) must be a query that will match some child documents. +* The `which` parameter must be a query string to use as a <<block-mask,Block Mask>> -- typically a query that matches the set of all possible parent documents. The resulting query will match all documents which _do_ match the `<blockMask>` query and are parents (or ancestors) of the documents matched by `<someChildren>`. diff --git a/solr/solr-ref-guide/modules/query-guide/pages/searching-nested-documents.adoc b/solr/solr-ref-guide/modules/query-guide/pages/searching-nested-documents.adoc index 83b2e35f54c..15979f8ac32 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/searching-nested-documents.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/searching-nested-documents.adoc @@ -108,11 +108,11 @@ $ curl 'http://localhost:8983/solr/gettingstarted/select?omitHeader=true&q=descr The `{!child}` query parser can be used to search for the _descendent_ documents of parent documents matching a wrapped query. For a detailed explanation of this parser, see the section xref:block-join-query-parser.adoc#block-join-children-query-parser[Block Join Children Query Parser]. -Let's consider again the `description_t:staplers` query used above -- if we wrap that query in a `{!child}` query parser then instead of "matching" & returning the product level documents, we instead match all of the _descendent_ child documents of the original query: +Let's consider again the `description_t:staplers` query used above -- if we wrap that query in a `{!child}` query parser with `parentPath="/"` then instead of "matching" & returning the product level documents, we instead match all of the _descendent_ child documents of the original query: [source,text] ---- -$ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' -d 'q={!child of="*:* -_nest_path_:*"}description_t:staplers' +$ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' --data-urlencode 'q={!child parentPath="/"}description_t:staplers' { "response":{"numFound":5,"start":0,"maxScore":0.30136836,"numFoundExact":true,"docs":[ { @@ -146,14 +146,14 @@ $ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' - }} ---- -In this example we've used `\*:* -\_nest_path_:*` as our xref:block-join-query-parser.adoc#block-mask[`of` parameter] to indicate we want to consider all documents which don't have a nest path -- i.e., all "root" level document -- as the set of possible parents. +In this example `parentPath="/"` indicates we want to consider all root-level documents as the set of possible parents. -By changing the `of` parameter to match ancestors at specific `\_nest_path_` levels, we can narrow down the list of children we return. -In the query below, we search for all descendants of `skus` (using an `of` parameter that identifies all documents that do _not_ have a `\_nest_path_` with the prefix `/skus/*`) with a `price_i` less than `50`: +By changing the `parentPath` to a specific `_nest_path_` level, we can narrow down the list of children we return. +In the query below, we search for all children of `skus` with a `price_i` less than `50`: [source,text] ---- -$ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' --data-urlencode 'q={!child of="*:* -_nest_path_:\\/skus\\/*"}(+price_i:[* TO 50] +_nest_path_:\/skus)' +$ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' --data-urlencode 'q={!child parentPath="/skus"}price_i:[* TO 50]' { "response":{"numFound":1,"start":0,"maxScore":1.0,"numFoundExact":true,"docs":[ { @@ -165,25 +165,6 @@ $ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' - }} ---- -[#double-escaping-nest-path-slashes] -[CAUTION] -.Double Escaping `\_nest_path_` slashes in `of` -==== -Note that in the above example, the `/` characters in the `\_nest_path_` were "double escaped" in the `of` parameter: - -* One level of `\` escaping is necessary to prevent the `/` from being interpreted as a {lucene-javadocs}/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Regexp_Searches[Regex Query] -* An additional level of "escaping the escape character" is necessary because the `of` local parameter is a quoted string; so we need a second `\` to ensure the first `\` is preserved and passed as is to the query parser. - -(You can see that only a single level of `\` escaping is needed in the body of the query string -- to prevent the Regex syntax -- because it's not a quoted string local param). - -You may find it more convenient to use xref:local-params.adoc#parameter-dereferencing[parameter references] in conjunction with xref:other-parsers.adoc[other parsers] that do not treat `/` as a special character to express the same query in a more verbose form: - -[source,text] ----- -$ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' --data-urlencode 'q={!child of=$block_mask}(+price_i:[* TO 50] +{!field f="_nest_path_" v="/skus"})' --data-urlencode 'block_mask=(*:* -{!prefix f="_nest_path_" v="/skus/"})' ----- -==== - === Parent Query Parser The inverse of the `{!child}` query parser is the `{!parent}` query parser, which lets you search for the _ancestor_ documents of some child documents matching a wrapped query. @@ -217,11 +198,11 @@ $ curl 'http://localhost:8983/solr/gettingstarted/select?omitHeader=true&q=pages }} ---- -We can wrap that query in a `{!parent}` query to return the details of all products that are ancestors of these manuals: +We can wrap that query in a `{!parent}` query with `parentPath="/"` to return the details of all root-level products that are ancestors of these manuals: [source,text] ---- -$ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' --data-urlencode 'q={!parent which="*:* -_nest_path_:*"}(+_nest_path_:\/skus\/manuals +pages_i:1)' +$ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' --data-urlencode 'q={!parent parentPath="/"}pages_i:1' { "response":{"numFound":2,"start":0,"maxScore":1.4E-45,"numFoundExact":true,"docs":[ { @@ -237,14 +218,15 @@ $ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' - }} ---- -In this example we've used `\*:* -\_nest_path_:*` as our xref:block-join-query-parser.adoc#block-mask[`which` parameter] to indicate we want to consider all documents which don't have a nest path -- i.e., all "root" level document -- as the set of possible parents. +In this example `parentPath="/"` indicates we want root-level documents to be the parents. -By changing the `which` parameter to match ancestors at specific `\_nest_path_` levels, we can change the type of ancestors we return. -In the query below, we search for `skus` (using an `which` parameter that identifies all documents that do _not_ have a `\_nest_path_` with the prefix `/skus/*`) that are the ancestors of `manuals` with exactly `1` page: +By changing `parentPath` to a specific path, we can change the type of ancestors we return. +In the query below, we search for the `skus` that are the ancestors of `manuals` with exactly `1` page. +Adding `childPath="manuals"` constrains the child query to only docs nested at `/skus/manuals`, preventing pages from other child types from matching: [source,text] ---- -$ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' --data-urlencode 'q={!parent which="*:* -_nest_path_:\\/skus\\/*"}(+_nest_path_:\/skus\/manuals +pages_i:1)' +$ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' --data-urlencode 'q={!parent parentPath="/skus" childPath="manuals"}pages_i:1' { "response":{"numFound":2,"start":0,"maxScore":1.4E-45,"numFoundExact":true,"docs":[ { @@ -260,11 +242,6 @@ $ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' - }} ---- -[CAUTION] -==== -Note that in the above example, the `/` characters in the `\_nest_path_` were "double escaped" in the `which` parameter, for the <<double-escaping-nest-path-slashes,same reasons discussed above>> regarding the `{!child} pasers `of` parameter. -==== - === Combining Block Join Query Parsers with Child Doc Transformer The combination of these two parsers with the `[child]` transformer enables seamless creation of very powerful queries. @@ -279,7 +256,7 @@ Here for example is a query where: [source,text] ---- -$ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' -d 'fq=color_s:RED' --data-urlencode 'q={!child of="*:* -_nest_path_:*" filters=$parent_fq}' --data-urlencode 'parent_fq={!parent which="*:* -_nest_path_:*"}(+_nest_path_:"/manuals" +content_t:"lifetime guarantee")' -d 'fl=*,[child]' +$ curl 'http://localhost:8983/solr/gettingstarted/select' -d 'omitHeader=true' -d 'fq=color_s:RED' --data-urlencode 'q={!child parentPath="/" filters=$parent_fq}' --data-urlencode 'parent_fq={!parent parentPath="/"}content_t:"lifetime guarantee"' -d 'fl=*,[child]' { "response":{"numFound":1,"start":0,"maxScore":1.4E-45,"numFoundExact":true,"docs":[ {
