Help retrieving BinaryDocValues

Xavier Sanchez Loro Wed, 05 Feb 2014 12:02:18 -0800

Hi,

I have some problems working with BinaryDocValues. The code below workswell with a few thousands of documents, but with more than 65000documents it does not return the correct BinaryDocValues after the docId(with docBase rebasing) reaches a certain id. From this point on, itcycles returning the BinaryDocValues of first docs. I'm working wihtlucene/solr 4.3.

I tested this code indexing 100000 documents, each with a"binary_ids_campaigns" value equal to docId. After docId 65500 aprox. itreturn BinaryDocValues corresponding to first doc ids. I have followedthe API instructions on how to rebase the docId, but I guess I'm missingsomething. If someone could point me in the right direction, I wouldreally appreciate it.


Best regards,
Xavier

public void computeVals(ResponseBuilder rb, SolrCore core, finalCampaignObserver observer) {

    RefCounted<SolrIndexSearcher> searchHolder = null;
    try {
      searchHolder = core.getNewestSearcher(false);
      AtomicReader reader = searchHolder.get().getAtomicReader();
      SolrIndexSearcher searcher = searchHolder.get();
      idsCampaigns = reader.getBinaryDocValues("binary_ids_campaigns");
      final float[] topscore = new float[]{Float.NEGATIVE_INFINITY};

CpcCollector delegate = new CpcCollector(reader, topscore,observer, maxCpc, idsCampaigns, maxDocCpc);

      DocSet filter = null;
      //Only filter in ppc, not for search, in search only apply sorting

SolrIndexSearcher.ProcessedFilter pf =searcher.getProcessedFilter(filter, rb.getFilters());

      //Check for existing filters, apply them
      if (pf != null && pf.filter != null) {
        searcher.search(rb.getQuery(), pf.filter, delegate);
      } else {
        searcher.search(rb.getQuery(), delegate);
      }
      float[] collectedTopscore = delegate.getTopscore();
      maxOrganicScore = collectedTopscore[0];
      maxCpc = delegate.getMaxCpc();
      if (core.getName().indexOf("ppc") > -1) {
        filter = delegate.getDocSet();
        List<Query> filters = rb.getFilters();
        if (filters == null) {
          filters = new ArrayList<Query>();
        }

filters.add(new FilteredQuery(rb.getQuery(),filter.getTopFilter()));

        rb.setFilters(filters);
      }
    } catch (Exception e) {
      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
              "Error loading FieldCache.Ints for idcampaigns field", e);
    } finally {
      if (searchHolder != null) {
        searchHolder.decref();
      }
    }
  }

  ----------------- Collector code -----------------------

  public class CpcCollector extends Collector {
  private static Logger log = LoggerFactory.getLogger(CpcCollector.class);
  private SortedIntDocSet docSet = null;
  Scorer scorer;
  private final float[] topscore;
  private CampaignObserver observer;
  private float maxCpc;
  private com.carrotsearch.hppc.IntArrayList idDocs;
  private BinaryDocValues values;

private com.carrotsearch.hppc.IntFloatOpenHashMap maxDocCpc;//Maximumcpc per document

  private int docBase = 0;

  /**
   *
   * @param reader
   * @param topscore
   * @param observer
   * @param ids
   * @param maxCpc
   * @param values
   */

public CpcCollector(IndexReader reader, final float[] topscore,CampaignObserver observer, float maxCpc, BinaryDocValues values,com.carrotsearch.hppc.IntFloatOpenHashMap maxDocCpc) {


    this.topscore = topscore;
    this.observer = observer;
    this.maxCpc = maxCpc;
    idDocs = new com.carrotsearch.hppc.IntArrayList();
    this.maxDocCpc = maxDocCpc;
    this.values = values;
  }

  @Override
  public void setScorer(Scorer scorer) throws IOException {
    this.scorer = scorer;
  }

  @Override
  public void collect(int doc) throws IOException {
    float score = scorer.score();
    if (score > getTopscore()[0]) {
      topscore[0] = score;
    }
    BytesRef term = new BytesRef();
    values.get(doc + docBase, term);
    int size = (int) term.bytes[term.offset] * 4 + 1;
    byte[] docValues = new byte[size];
    ByteBuffer.wrap(term.bytes, term.offset, size).get(docValues, 0, size);
    int[] campIds = observer.parseBinaryIdsOldSkoolWayArray(docValues);
    if (campIds != null) {
      float cpc = observer.getMaxActiveCpc(campIds);
      getMaxDocCpc().put(doc + docBase, cpc);
      if (cpc > 0) {
        if (cpc > getMaxCpc()) {
          maxCpc = cpc;
        }
        //active campaign
        idDocs.add(doc + docBase);
      }
    }
  }

  @Override
  public boolean acceptsDocsOutOfOrder() {
    return true;//podria ser tru
  }

  @Override

public void setNextReader(AtomicReaderContext context) throwsIOException {

    this.docBase = context.docBase;
  }

  /**
   * @return the topscore
   */
  public float[] getTopscore() {
    return topscore;
  }

  /**
   * @return the maxCpc
   */
  public float getMaxCpc() {
    return maxCpc;
  }

  /**
   * @return the docSet
   */
  public SortedIntDocSet getDocSet() {
    docSet = new SortedIntDocSet(idDocs.toArray());
    return docSet;
  }

  /**
   * @return the maxDocCpc
   */
  public com.carrotsearch.hppc.IntFloatOpenHashMap getMaxDocCpc() {
    return maxDocCpc;
  }

}







---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org
For additional commands, e-mail: java-user-h...@lucene.apache.org

Help retrieving BinaryDocValues

Reply via email to