Github user dsmiley commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/416#discussion_r204766704 --- Diff: solr/core/src/java/org/apache/solr/response/transform/DeeplyNestedChildDocTransformer.java --- @@ -0,0 +1,214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.response.transform; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.Multimap; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.join.BitSetProducer; +import org.apache.lucene.search.join.ToChildBlockJoinQuery; +import org.apache.lucene.util.BytesRef; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.DocsStreamer; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.DocIterator; +import org.apache.solr.search.DocList; +import org.apache.solr.search.SolrDocumentFetcher; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.search.SolrReturnFields; + +import static org.apache.solr.response.transform.ChildDocTransformerFactory.NUM_SEP_CHAR; +import static org.apache.solr.response.transform.ChildDocTransformerFactory.PATH_SEP_CHAR; +import static org.apache.solr.schema.IndexSchema.NEST_PATH_FIELD_NAME; + +class DeeplyNestedChildDocTransformer extends DocTransformer { + + private final String name; + protected final SchemaField idField; + protected final SolrQueryRequest req; + protected final IndexSchema schema; + private BitSetProducer parentsFilter; + protected int limit; + private final static Sort docKeySort = new Sort(new SortField(null, SortField.Type.DOC, false)); + private Query childFilterQuery; + + public DeeplyNestedChildDocTransformer(String name, final BitSetProducer parentsFilter, + final SolrQueryRequest req, final Query childFilterQuery, int limit) { + this.name = name; + this.schema = req.getSchema(); + this.idField = this.schema.getUniqueKeyField(); + this.req = req; + this.parentsFilter = parentsFilter; + this.limit = limit; + this.childFilterQuery = childFilterQuery; + } + + @Override + public String getName() { + return name; + } + + @Override + public String[] getExtraRequestFields() { + // we always need the idField (of the parent) in order to fill out it's children + return new String[] { idField.getName() }; + } + + protected static String getSolrFieldString(Object fieldVal, FieldType fieldType) { + return fieldVal instanceof IndexableField + ? fieldType.toExternal((IndexableField)fieldVal) + : fieldVal.toString(); + } + + @Override + public void transform(SolrDocument rootDoc, int rootDocId) { + + FieldType idFt = idField.getType(); + + String rootIdExt = getSolrFieldString(rootDoc.getFirstValue(idField.getName()), idFt); + + try { + Query parentQuery = idFt.getFieldQuery(null, idField, rootIdExt); + Query query = new ToChildBlockJoinQuery(parentQuery, parentsFilter); + SolrIndexSearcher searcher = context.getSearcher(); + DocList children = searcher.getDocList(query, childFilterQuery, docKeySort, 0, limit); + long segAndId = searcher.lookupId(new BytesRef(rootIdExt)); + final int seg = (int) (segAndId >> 32); + final LeafReaderContext leafReaderContext = searcher.getIndexReader().leaves().get(seg); + final SortedDocValues segPathDocValues = DocValues.getSorted(leafReaderContext.reader(), NEST_PATH_FIELD_NAME); + + Multimap<String,SolrDocument> pendingParentPathsToChildren = ArrayListMultimap.create(); + + if(children.matches() > 0) { + SolrDocumentFetcher docFetcher = searcher.getDocFetcher(); + Set<String> dvFieldsToReturn = docFetcher.getNonStoredDVs(true).stream() + .filter(name -> !NEST_PATH_FIELD_NAME.equals(name)).collect(Collectors.toSet()); + boolean shouldDecorateWithDVs = dvFieldsToReturn.size() > 0; + DocIterator i = children.iterator(); + final int segBaseId = leafReaderContext.docBase; + final int firstChildDocId = i.nextDoc(); + assert firstChildDocId < rootDocId; + + for (int docId = firstChildDocId; docId < rootDocId; ++docId) { + // get the path + String fullDocPath = getPathByDocId(docId - segBaseId, segPathDocValues); + + // Is this doc a direct ancestor of another doc we've seen? + boolean isAncestor = pendingParentPathsToChildren.containsKey(fullDocPath); + + // Do we need to do anything with this doc (either ancestor or a matched the child query) + if (isAncestor || children.exists(docId)) { + // load the doc + SolrDocument doc = DocsStreamer.convertLuceneDocToSolrDoc(docFetcher.doc(docId), + schema, new SolrReturnFields()); + doc.setField(NEST_PATH_FIELD_NAME, fullDocPath); + if (shouldDecorateWithDVs) { + docFetcher.decorateDocValueFields(doc, docId, dvFieldsToReturn); + } + // get parent path + // put into pending + String parentDocPath = lookupParentPath(fullDocPath); + pendingParentPathsToChildren.put(parentDocPath, doc); // multimap add (won't replace) + + // if this path has pending child docs, add them. + if (isAncestor) { + addChildrenToParent(doc, pendingParentPathsToChildren.get(fullDocPath)); + pendingParentPathsToChildren.removeAll(fullDocPath); // no longer pending + } + } + } + + // only children of parent remain + assert pendingParentPathsToChildren.keySet().size() == 1; + + addChildrenToParent(rootDoc, pendingParentPathsToChildren.get(null)); + } + } catch (IOException e) { + rootDoc.put(getName(), "Could not fetch child Documents"); + } + } + + void addChildToParent(SolrDocument parent, SolrDocument child, String label) { + // lookup leaf key for these children using path + // depending on the label, add to the parent at the right key/label + // TODO: unfortunately this is the 2nd time we grab the paths for these docs. resolve how? + String trimmedPath = trimSuffixFromPaths(getLastPath(label)); + if (!parent.containsKey(trimmedPath) && (label.contains(NUM_SEP_CHAR) && !label.endsWith(NUM_SEP_CHAR))) { + List<SolrDocument> list = new ArrayList<>(); + parent.setField(trimmedPath, list); + } + parent.addField(trimmedPath, child); + } + + void addChildToParent(SolrDocument parent, SolrDocument child) { + String docPath = getSolrFieldString(child.getFirstValue(NEST_PATH_FIELD_NAME), schema.getFieldType(NEST_PATH_FIELD_NAME)); + addChildToParent(parent, child, docPath); + } + + void addChildrenToParent(SolrDocument parent, Collection<SolrDocument> children) { + for(SolrDocument child: children) { + addChildToParent(parent, child); + } + } + + private String getLastPath(String path) { + + if(path.lastIndexOf(PATH_SEP_CHAR.charAt(0)) == -1) { + return path; + } + return path.substring(path.lastIndexOf(PATH_SEP_CHAR.charAt(0)) + 1); + } + + private String trimSuffixFromPaths(String path) { --- End diff -- Maybe we don't need this method; we'll see. I believe the goal of this method is only to trim off the trailing pound then number? I'd rather you use String.lastIndexOf type calls rather than a regexp for this simple task. Assume '#' is a special care thus simply assume what follows is the child index. Also remember to comment on the method an example to clearly indicate what it's doing
--- --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org