[GitHub] lucene-solr pull request #416: WIP: SOLR-12519

moshebla Sat, 28 Jul 2018 21:49:25 -0700

Github user moshebla commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/416#discussion_r205960639
  
    --- Diff: 
solr/core/src/java/org/apache/solr/response/transform/DeeplyNestedChildDocTransformer.java
 ---
    @@ -0,0 +1,224 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.solr.response.transform;
    +
    +import java.io.IOException;
    +import java.util.ArrayList;
    +import java.util.Collection;
    +import java.util.HashMap;
    +import java.util.List;
    +import java.util.Map;
    +import java.util.Set;
    +
    +import com.google.common.collect.ArrayListMultimap;
    +import com.google.common.collect.Multimap;
    +import org.apache.lucene.index.DocValues;
    +import org.apache.lucene.index.IndexableField;
    +import org.apache.lucene.index.LeafReaderContext;
    +import org.apache.lucene.index.SortedDocValues;
    +import org.apache.lucene.search.Query;
    +import org.apache.lucene.search.Sort;
    +import org.apache.lucene.search.SortField;
    +import org.apache.lucene.search.join.BitSetProducer;
    +import org.apache.lucene.search.join.ToChildBlockJoinQuery;
    +import org.apache.lucene.util.BytesRef;
    +import org.apache.solr.common.SolrDocument;
    +import org.apache.solr.request.SolrQueryRequest;
    +import org.apache.solr.response.DocsStreamer;
    +import org.apache.solr.schema.FieldType;
    +import org.apache.solr.schema.IndexSchema;
    +import org.apache.solr.schema.SchemaField;
    +import org.apache.solr.search.DocIterator;
    +import org.apache.solr.search.DocList;
    +import org.apache.solr.search.SolrDocumentFetcher;
    +import org.apache.solr.search.SolrIndexSearcher;
    +import org.apache.solr.search.SolrReturnFields;
    +
    +import static 
org.apache.solr.response.transform.ChildDocTransformerFactory.NUM_SEP_CHAR;
    +import static 
org.apache.solr.response.transform.ChildDocTransformerFactory.PATH_SEP_CHAR;
    +import static org.apache.solr.schema.IndexSchema.NEST_PATH_FIELD_NAME;
    +
    +class DeeplyNestedChildDocTransformer extends DocTransformer {
    +
    +  private final String name;
    +  protected final SchemaField idField;
    +  protected final SolrQueryRequest req;
    +  protected final IndexSchema schema;
    +  private BitSetProducer parentsFilter;
    +  protected int limit;
    +  private final static Sort docKeySort = new Sort(new SortField(null, 
SortField.Type.DOC, false));
    +  private Query childFilterQuery;
    +
    +  public DeeplyNestedChildDocTransformer(String name, final BitSetProducer 
parentsFilter,
    +                                         final SolrQueryRequest req, final 
Query childFilterQuery, int limit) {
    +    this.name = name;
    +    this.schema = req.getSchema();
    +    this.idField = this.schema.getUniqueKeyField();
    +    this.req = req;
    +    this.parentsFilter = parentsFilter;
    +    this.limit = limit;
    +    this.childFilterQuery = childFilterQuery;
    +  }
    +
    +  @Override
    +  public String getName()  {
    +    return name;
    +  }
    +
    +  @Override
    +  public String[] getExtraRequestFields() {
    +    // we always need the idField (of the parent) in order to fill out 
it's children
    +    return new String[] { idField.getName() };
    +  }
    +
    +  @Override
    +  public void transform(SolrDocument rootDoc, int rootDocId) {
    +
    +    FieldType idFt = idField.getType();
    +
    +    String rootIdExt = 
getSolrFieldString(rootDoc.getFirstValue(idField.getName()), idFt);
    +
    +    try {
    +      Query parentQuery = idFt.getFieldQuery(null, idField, rootIdExt);
    +      Query query = new ToChildBlockJoinQuery(parentQuery, parentsFilter);
    +      SolrIndexSearcher searcher = context.getSearcher();
    +      DocList children = searcher.getDocList(query, childFilterQuery, 
docKeySort, 0, limit);
    +      long segAndId = searcher.lookupId(new BytesRef(rootIdExt));
    +      final int seg = (int) (segAndId >> 32);
    +      final LeafReaderContext leafReaderContext = 
searcher.getIndexReader().leaves().get(seg);
    +      final SortedDocValues segPathDocValues = 
DocValues.getSorted(leafReaderContext.reader(), NEST_PATH_FIELD_NAME);
    +
    +      Map<String, Multimap<String, SolrDocument>> 
pendingParentPathsToChildren = new HashMap<>();
    +
    +      if(children.matches() > 0) {
    +        SolrDocumentFetcher docFetcher = searcher.getDocFetcher();
    +        Set<String> dvFieldsToReturn = docFetcher.getNonStoredDVs(true);
    +        boolean shouldDecorateWithDVs = dvFieldsToReturn.size() > 0;
    +        DocIterator i = children.iterator();
    +        final int segBaseId = leafReaderContext.docBase;
    +        final int firstChildDocId = i.nextDoc();
    +        assert firstChildDocId < rootDocId;
    +
    +        for (int docId = firstChildDocId; docId < rootDocId; ++docId) {
    +          // get the path
    +          String fullDocPath = getPathByDocId(docId - segBaseId, 
segPathDocValues);
    +
    +          // Is this doc a direct ancestor of another doc we've seen?
    +          boolean isAncestor = 
pendingParentPathsToChildren.containsKey(fullDocPath);
    +
    +          // Do we need to do anything with this doc (either ancestor or a 
matched the child query)
    +          if (isAncestor || children.exists(docId)) {
    +            // load the doc
    +            SolrDocument doc = 
DocsStreamer.convertLuceneDocToSolrDoc(docFetcher.doc(docId),
    +                schema, new SolrReturnFields());
    +            if (shouldDecorateWithDVs) {
    +              docFetcher.decorateDocValueFields(doc, docId, 
dvFieldsToReturn);
    +            }
    +            // get parent path
    +            // put into pending
    +            String parentDocPath = lookupParentPath(fullDocPath);
    +
    +            if(isAncestor) {
    +              // if this path has pending child docs, add them.
    +              addChildrenToParent(doc, 
pendingParentPathsToChildren.remove(fullDocPath)); // no longer pending
    +            }
    +            // trim path if the doc was inside array, see 
DeeplyNestedChildDocTransformer#trimPathIfArrayDoc
    +            // e.g. toppings#1/ingredients#1 -> outer map key toppings#1
    +            // -> inner MultiMap key ingredients
    +            // or lonely#/lonelyGrandChild# -> outer map key lonely#
    +            // -> inner MultiMap key lonelyGrandChild#
    +            pendingParentPathsToChildren.computeIfAbsent(parentDocPath, x 
-> ArrayListMultimap.create())
    +                .put(trimPathIfArrayDoc(getLastPath(fullDocPath)), doc); 
// multimap add (won't replace)
    +          }
    +        }
    +
    +        // only children of parent remain
    +        assert pendingParentPathsToChildren.keySet().size() == 1;
    +
    +        addChildrenToParent(rootDoc, 
pendingParentPathsToChildren.remove(null));
    +      }
    +    } catch (IOException e) {
    +      rootDoc.put(getName(), "Could not fetch child Documents");
    +    }
    +  }
    +
    +  void addChildrenToParent(SolrDocument parent, Multimap<String, 
SolrDocument> children) {
    +    for(String childLabel: children.keySet()) {
    +      addChildrenToParent(parent, children.get(childLabel), childLabel);
    +    }
    +  }
    +
    +  void addChildrenToParent(SolrDocument parent, Collection<SolrDocument> 
children, String cDocsPath) {
    +    // lookup leaf key for these children using path
    +    // depending on the label, add to the parent at the right key/label
    +    String trimmedPath = trimLastPound(cDocsPath);
    +    // if the child doc's path does not end with #, it is an array(same 
string is returned by DeeplyNestedChildDocTransformer#trimLastPound)
    +    if (!parent.containsKey(trimmedPath) && (trimmedPath == cDocsPath)) {
    +      List<SolrDocument> list = new ArrayList<>(children);
    +      parent.setField(trimmedPath, list);
    +      return;
    +    }
    +    // is single value
    +    parent.setField(trimmedPath, ((List)children).get(0));
    +  }
    +
    +  private String getLastPath(String path) {
    +    if(path.lastIndexOf(PATH_SEP_CHAR.charAt(0)) == -1) {
    +      return path;
    +    }
    +    return path.substring(path.lastIndexOf(PATH_SEP_CHAR.charAt(0)) + 1);
    +  }
    +
    +  private String trimPathIfArrayDoc(String path) {
    +    // remove index after last pound sign and if there is an array index 
e.g. toppings#1 -> toppings
    +    // or return original string if child doc is not in an array 
ingredients# -> ingredients#
    +    int lastIndex = path.length() - 1;
    +    boolean singleDocVal = path.charAt(lastIndex) == 
NUM_SEP_CHAR.charAt(0);
    +    return singleDocVal ? path: path.substring(0, 
path.lastIndexOf(NUM_SEP_CHAR.charAt(0)));
    +  }
    +
    +  private String trimLastPound(String path) {
    +    // remove index after last pound sign and index from e.g. toppings#1 
-> toppings
    +    int lastIndex = path.lastIndexOf('#');
    +    return lastIndex == -1 ? path: path.substring(0, lastIndex);
    +  }
    +
    +  /**
    +   * Returns the *parent* path for this document.
    +   * Children of the root will yield null.
    +   */
    +  String lookupParentPath(String currDocPath) {
    +    // chop off leaf (after last '/')
    +    // if child of leaf then return null (special value)
    +    int lastPathIndex = currDocPath.lastIndexOf(PATH_SEP_CHAR);
    +    return lastPathIndex == -1 ? null: currDocPath.substring(0, 
lastPathIndex);
    +  }
    +
    +  private String getPathByDocId(int segDocId, SortedDocValues 
segPathDocValues) throws IOException {
    +    int numToAdvance = segPathDocValues.docID()==-1?segDocId: segDocId - 
(segPathDocValues.docID());
    --- End diff --
    
    Yes,
    because if we have not set the docId (the method returns -1), the iterator 
will advance by one doc too many.



---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] lucene-solr pull request #416: WIP: SOLR-12519

Reply via email to