Github user moshebla commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/416#discussion_r205960639 --- Diff: solr/core/src/java/org/apache/solr/response/transform/DeeplyNestedChildDocTransformer.java --- @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.response.transform; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.Multimap; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.join.BitSetProducer; +import org.apache.lucene.search.join.ToChildBlockJoinQuery; +import org.apache.lucene.util.BytesRef; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.DocsStreamer; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.DocIterator; +import org.apache.solr.search.DocList; +import org.apache.solr.search.SolrDocumentFetcher; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.search.SolrReturnFields; + +import static org.apache.solr.response.transform.ChildDocTransformerFactory.NUM_SEP_CHAR; +import static org.apache.solr.response.transform.ChildDocTransformerFactory.PATH_SEP_CHAR; +import static org.apache.solr.schema.IndexSchema.NEST_PATH_FIELD_NAME; + +class DeeplyNestedChildDocTransformer extends DocTransformer { + + private final String name; + protected final SchemaField idField; + protected final SolrQueryRequest req; + protected final IndexSchema schema; + private BitSetProducer parentsFilter; + protected int limit; + private final static Sort docKeySort = new Sort(new SortField(null, SortField.Type.DOC, false)); + private Query childFilterQuery; + + public DeeplyNestedChildDocTransformer(String name, final BitSetProducer parentsFilter, + final SolrQueryRequest req, final Query childFilterQuery, int limit) { + this.name = name; + this.schema = req.getSchema(); + this.idField = this.schema.getUniqueKeyField(); + this.req = req; + this.parentsFilter = parentsFilter; + this.limit = limit; + this.childFilterQuery = childFilterQuery; + } + + @Override + public String getName() { + return name; + } + + @Override + public String[] getExtraRequestFields() { + // we always need the idField (of the parent) in order to fill out it's children + return new String[] { idField.getName() }; + } + + @Override + public void transform(SolrDocument rootDoc, int rootDocId) { + + FieldType idFt = idField.getType(); + + String rootIdExt = getSolrFieldString(rootDoc.getFirstValue(idField.getName()), idFt); + + try { + Query parentQuery = idFt.getFieldQuery(null, idField, rootIdExt); + Query query = new ToChildBlockJoinQuery(parentQuery, parentsFilter); + SolrIndexSearcher searcher = context.getSearcher(); + DocList children = searcher.getDocList(query, childFilterQuery, docKeySort, 0, limit); + long segAndId = searcher.lookupId(new BytesRef(rootIdExt)); + final int seg = (int) (segAndId >> 32); + final LeafReaderContext leafReaderContext = searcher.getIndexReader().leaves().get(seg); + final SortedDocValues segPathDocValues = DocValues.getSorted(leafReaderContext.reader(), NEST_PATH_FIELD_NAME); + + Map<String, Multimap<String, SolrDocument>> pendingParentPathsToChildren = new HashMap<>(); + + if(children.matches() > 0) { + SolrDocumentFetcher docFetcher = searcher.getDocFetcher(); + Set<String> dvFieldsToReturn = docFetcher.getNonStoredDVs(true); + boolean shouldDecorateWithDVs = dvFieldsToReturn.size() > 0; + DocIterator i = children.iterator(); + final int segBaseId = leafReaderContext.docBase; + final int firstChildDocId = i.nextDoc(); + assert firstChildDocId < rootDocId; + + for (int docId = firstChildDocId; docId < rootDocId; ++docId) { + // get the path + String fullDocPath = getPathByDocId(docId - segBaseId, segPathDocValues); + + // Is this doc a direct ancestor of another doc we've seen? + boolean isAncestor = pendingParentPathsToChildren.containsKey(fullDocPath); + + // Do we need to do anything with this doc (either ancestor or a matched the child query) + if (isAncestor || children.exists(docId)) { + // load the doc + SolrDocument doc = DocsStreamer.convertLuceneDocToSolrDoc(docFetcher.doc(docId), + schema, new SolrReturnFields()); + if (shouldDecorateWithDVs) { + docFetcher.decorateDocValueFields(doc, docId, dvFieldsToReturn); + } + // get parent path + // put into pending + String parentDocPath = lookupParentPath(fullDocPath); + + if(isAncestor) { + // if this path has pending child docs, add them. + addChildrenToParent(doc, pendingParentPathsToChildren.remove(fullDocPath)); // no longer pending + } + // trim path if the doc was inside array, see DeeplyNestedChildDocTransformer#trimPathIfArrayDoc + // e.g. toppings#1/ingredients#1 -> outer map key toppings#1 + // -> inner MultiMap key ingredients + // or lonely#/lonelyGrandChild# -> outer map key lonely# + // -> inner MultiMap key lonelyGrandChild# + pendingParentPathsToChildren.computeIfAbsent(parentDocPath, x -> ArrayListMultimap.create()) + .put(trimPathIfArrayDoc(getLastPath(fullDocPath)), doc); // multimap add (won't replace) + } + } + + // only children of parent remain + assert pendingParentPathsToChildren.keySet().size() == 1; + + addChildrenToParent(rootDoc, pendingParentPathsToChildren.remove(null)); + } + } catch (IOException e) { + rootDoc.put(getName(), "Could not fetch child Documents"); + } + } + + void addChildrenToParent(SolrDocument parent, Multimap<String, SolrDocument> children) { + for(String childLabel: children.keySet()) { + addChildrenToParent(parent, children.get(childLabel), childLabel); + } + } + + void addChildrenToParent(SolrDocument parent, Collection<SolrDocument> children, String cDocsPath) { + // lookup leaf key for these children using path + // depending on the label, add to the parent at the right key/label + String trimmedPath = trimLastPound(cDocsPath); + // if the child doc's path does not end with #, it is an array(same string is returned by DeeplyNestedChildDocTransformer#trimLastPound) + if (!parent.containsKey(trimmedPath) && (trimmedPath == cDocsPath)) { + List<SolrDocument> list = new ArrayList<>(children); + parent.setField(trimmedPath, list); + return; + } + // is single value + parent.setField(trimmedPath, ((List)children).get(0)); + } + + private String getLastPath(String path) { + if(path.lastIndexOf(PATH_SEP_CHAR.charAt(0)) == -1) { + return path; + } + return path.substring(path.lastIndexOf(PATH_SEP_CHAR.charAt(0)) + 1); + } + + private String trimPathIfArrayDoc(String path) { + // remove index after last pound sign and if there is an array index e.g. toppings#1 -> toppings + // or return original string if child doc is not in an array ingredients# -> ingredients# + int lastIndex = path.length() - 1; + boolean singleDocVal = path.charAt(lastIndex) == NUM_SEP_CHAR.charAt(0); + return singleDocVal ? path: path.substring(0, path.lastIndexOf(NUM_SEP_CHAR.charAt(0))); + } + + private String trimLastPound(String path) { + // remove index after last pound sign and index from e.g. toppings#1 -> toppings + int lastIndex = path.lastIndexOf('#'); + return lastIndex == -1 ? path: path.substring(0, lastIndex); + } + + /** + * Returns the *parent* path for this document. + * Children of the root will yield null. + */ + String lookupParentPath(String currDocPath) { + // chop off leaf (after last '/') + // if child of leaf then return null (special value) + int lastPathIndex = currDocPath.lastIndexOf(PATH_SEP_CHAR); + return lastPathIndex == -1 ? null: currDocPath.substring(0, lastPathIndex); + } + + private String getPathByDocId(int segDocId, SortedDocValues segPathDocValues) throws IOException { + int numToAdvance = segPathDocValues.docID()==-1?segDocId: segDocId - (segPathDocValues.docID()); --- End diff -- Yes, because if we have not set the docId (the method returns -1), the iterator will advance by one doc too many.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org