[GitHub] lucene-solr pull request #416: WIP: SOLR-12519

dsmiley Tue, 24 Jul 2018 07:16:45 -0700

Github user dsmiley commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/416#discussion_r204766704
  
    --- Diff: 
solr/core/src/java/org/apache/solr/response/transform/DeeplyNestedChildDocTransformer.java
 ---
    @@ -0,0 +1,214 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.solr.response.transform;
    +
    +import java.io.IOException;
    +import java.util.ArrayList;
    +import java.util.Collection;
    +import java.util.List;
    +import java.util.Set;
    +import java.util.stream.Collectors;
    +
    +import com.google.common.collect.ArrayListMultimap;
    +import com.google.common.collect.Multimap;
    +import org.apache.lucene.index.DocValues;
    +import org.apache.lucene.index.IndexableField;
    +import org.apache.lucene.index.LeafReaderContext;
    +import org.apache.lucene.index.SortedDocValues;
    +import org.apache.lucene.search.Query;
    +import org.apache.lucene.search.Sort;
    +import org.apache.lucene.search.SortField;
    +import org.apache.lucene.search.join.BitSetProducer;
    +import org.apache.lucene.search.join.ToChildBlockJoinQuery;
    +import org.apache.lucene.util.BytesRef;
    +import org.apache.solr.common.SolrDocument;
    +import org.apache.solr.request.SolrQueryRequest;
    +import org.apache.solr.response.DocsStreamer;
    +import org.apache.solr.schema.FieldType;
    +import org.apache.solr.schema.IndexSchema;
    +import org.apache.solr.schema.SchemaField;
    +import org.apache.solr.search.DocIterator;
    +import org.apache.solr.search.DocList;
    +import org.apache.solr.search.SolrDocumentFetcher;
    +import org.apache.solr.search.SolrIndexSearcher;
    +import org.apache.solr.search.SolrReturnFields;
    +
    +import static 
org.apache.solr.response.transform.ChildDocTransformerFactory.NUM_SEP_CHAR;
    +import static 
org.apache.solr.response.transform.ChildDocTransformerFactory.PATH_SEP_CHAR;
    +import static org.apache.solr.schema.IndexSchema.NEST_PATH_FIELD_NAME;
    +
    +class DeeplyNestedChildDocTransformer extends DocTransformer {
    +
    +  private final String name;
    +  protected final SchemaField idField;
    +  protected final SolrQueryRequest req;
    +  protected final IndexSchema schema;
    +  private BitSetProducer parentsFilter;
    +  protected int limit;
    +  private final static Sort docKeySort = new Sort(new SortField(null, 
SortField.Type.DOC, false));
    +  private Query childFilterQuery;
    +
    +  public DeeplyNestedChildDocTransformer(String name, final BitSetProducer 
parentsFilter,
    +                                         final SolrQueryRequest req, final 
Query childFilterQuery, int limit) {
    +    this.name = name;
    +    this.schema = req.getSchema();
    +    this.idField = this.schema.getUniqueKeyField();
    +    this.req = req;
    +    this.parentsFilter = parentsFilter;
    +    this.limit = limit;
    +    this.childFilterQuery = childFilterQuery;
    +  }
    +
    +  @Override
    +  public String getName()  {
    +    return name;
    +  }
    +
    +  @Override
    +  public String[] getExtraRequestFields() {
    +    // we always need the idField (of the parent) in order to fill out 
it's children
    +    return new String[] { idField.getName() };
    +  }
    +
    +  protected static String getSolrFieldString(Object fieldVal, FieldType 
fieldType) {
    +    return fieldVal instanceof IndexableField
    +        ? fieldType.toExternal((IndexableField)fieldVal)
    +        : fieldVal.toString();
    +  }
    +
    +  @Override
    +  public void transform(SolrDocument rootDoc, int rootDocId) {
    +
    +    FieldType idFt = idField.getType();
    +
    +    String rootIdExt = 
getSolrFieldString(rootDoc.getFirstValue(idField.getName()), idFt);
    +
    +    try {
    +      Query parentQuery = idFt.getFieldQuery(null, idField, rootIdExt);
    +      Query query = new ToChildBlockJoinQuery(parentQuery, parentsFilter);
    +      SolrIndexSearcher searcher = context.getSearcher();
    +      DocList children = searcher.getDocList(query, childFilterQuery, 
docKeySort, 0, limit);
    +      long segAndId = searcher.lookupId(new BytesRef(rootIdExt));
    +      final int seg = (int) (segAndId >> 32);
    +      final LeafReaderContext leafReaderContext = 
searcher.getIndexReader().leaves().get(seg);
    +      final SortedDocValues segPathDocValues = 
DocValues.getSorted(leafReaderContext.reader(), NEST_PATH_FIELD_NAME);
    +
    +      Multimap<String,SolrDocument> pendingParentPathsToChildren = 
ArrayListMultimap.create();
    +
    +      if(children.matches() > 0) {
    +        SolrDocumentFetcher docFetcher = searcher.getDocFetcher();
    +        Set<String> dvFieldsToReturn = 
docFetcher.getNonStoredDVs(true).stream()
    +            .filter(name -> 
!NEST_PATH_FIELD_NAME.equals(name)).collect(Collectors.toSet());
    +        boolean shouldDecorateWithDVs = dvFieldsToReturn.size() > 0;
    +        DocIterator i = children.iterator();
    +        final int segBaseId = leafReaderContext.docBase;
    +        final int firstChildDocId = i.nextDoc();
    +        assert firstChildDocId < rootDocId;
    +
    +        for (int docId = firstChildDocId; docId < rootDocId; ++docId) {
    +          // get the path
    +          String fullDocPath = getPathByDocId(docId - segBaseId, 
segPathDocValues);
    +
    +          // Is this doc a direct ancestor of another doc we've seen?
    +          boolean isAncestor = 
pendingParentPathsToChildren.containsKey(fullDocPath);
    +
    +          // Do we need to do anything with this doc (either ancestor or a 
matched the child query)
    +          if (isAncestor || children.exists(docId)) {
    +            // load the doc
    +            SolrDocument doc = 
DocsStreamer.convertLuceneDocToSolrDoc(docFetcher.doc(docId),
    +                schema, new SolrReturnFields());
    +            doc.setField(NEST_PATH_FIELD_NAME, fullDocPath);
    +            if (shouldDecorateWithDVs) {
    +              docFetcher.decorateDocValueFields(doc, docId, 
dvFieldsToReturn);
    +            }
    +            // get parent path
    +            // put into pending
    +            String parentDocPath = lookupParentPath(fullDocPath);
    +            pendingParentPathsToChildren.put(parentDocPath, doc); // 
multimap add (won't replace)
    +
    +            // if this path has pending child docs, add them.
    +            if (isAncestor) {
    +              addChildrenToParent(doc, 
pendingParentPathsToChildren.get(fullDocPath));
    +              pendingParentPathsToChildren.removeAll(fullDocPath); // no 
longer pending
    +            }
    +          }
    +        }
    +
    +        // only children of parent remain
    +        assert pendingParentPathsToChildren.keySet().size() == 1;
    +
    +        addChildrenToParent(rootDoc, 
pendingParentPathsToChildren.get(null));
    +      }
    +    } catch (IOException e) {
    +      rootDoc.put(getName(), "Could not fetch child Documents");
    +    }
    +  }
    +
    +  void addChildToParent(SolrDocument parent, SolrDocument child, String 
label) {
    +    // lookup leaf key for these children using path
    +    // depending on the label, add to the parent at the right key/label
    +    // TODO: unfortunately this is the 2nd time we grab the paths for 
these docs. resolve how?
    +    String trimmedPath = trimSuffixFromPaths(getLastPath(label));
    +    if (!parent.containsKey(trimmedPath) && (label.contains(NUM_SEP_CHAR) 
&& !label.endsWith(NUM_SEP_CHAR))) {
    +      List<SolrDocument> list = new ArrayList<>();
    +      parent.setField(trimmedPath, list);
    +    }
    +    parent.addField(trimmedPath, child);
    +  }
    +
    +  void addChildToParent(SolrDocument parent, SolrDocument child) {
    +    String docPath = 
getSolrFieldString(child.getFirstValue(NEST_PATH_FIELD_NAME), 
schema.getFieldType(NEST_PATH_FIELD_NAME));
    +    addChildToParent(parent, child, docPath);
    +  }
    +
    +  void addChildrenToParent(SolrDocument parent, Collection<SolrDocument> 
children) {
    +    for(SolrDocument child: children) {
    +      addChildToParent(parent, child);
    +    }
    +  }
    +
    +  private String getLastPath(String path) {
    +
    +    if(path.lastIndexOf(PATH_SEP_CHAR.charAt(0)) == -1) {
    +      return path;
    +    }
    +    return path.substring(path.lastIndexOf(PATH_SEP_CHAR.charAt(0)) + 1);
    +  }
    +
    +  private String trimSuffixFromPaths(String path) {
    --- End diff --
    
    Maybe we don't need this method; we'll see.  I believe the goal of this 
method is only to trim off the trailing pound then number?  I'd rather you use 
String.lastIndexOf type calls rather than a regexp for this simple task.  
Assume '#' is a special care thus simply assume what follows is the child 
index.  Also remember to comment on the method an example to clearly indicate 
what it's doing



---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] lucene-solr pull request #416: WIP: SOLR-12519

Reply via email to