Hey there. I needed a funcionality similar to adjacent-field-collapsing but instead of make the docs disapear I just wanted to put them at the end of the list (ids array).
At the moment, I am just experimenting the way to obtain the shortests reponse time. provably will not be able to use my solution as it's a pretty big core hack, just would like to hear advices of "cleaner" ways to do this or about what do you think. I don't want this algorithm to be applyed in the whole index as it makes responses slower and have no interest in results after page 30, for example. I just want it to be applyed for the first 3000 or 5000 results. Due to performance issues (speed request and index size) couldn't use the collapsing patch so what I have done is to apply the algorithm straight away in getDocListAndSetNC and getDocListNC. Basically what I do is... if the user asks for less than "considerHowMany" docs I will ask for this number or if there are less I will ask for all of them (when topCollector.topDocs... is called). then I will apply the adjacent field collapse algorithm but instead of making the docs desapear I will send them to the end of the cue. I meam, let's say a query has 1.357.534. I just want to apply the algorithm to the 5000 results. So, if the 2nd results must be collapsed, it will go to the position 5000, if the 3rd must be collapse will go to 4999... After the 5000th the pseudo-collapse algorithm will stop being applyied. I have added to parameters to the QueryCommand use to decide if the algorithm has to be applyed and for how many documents must be applyied. I repeat it, it's just testing, I now it's not good to modify this classes... just want to hear any advice that could help me to do something similar without messing the code that much or what people think. I leave here my getDocListAndSetNC.java (have done the same for getDocListNC): private DocSet getDocListAndSetNC(QueryResult qr,QueryCommand cmd) throws IOException { int len = cmd.getSupersetMaxDoc(); DocSet filter = cmd.getFilter()!=null ? cmd.getFilter() : getDocSet(cmd.getFilterList()); int last = len; if (last < 0 || last > maxDoc()) last=maxDoc(); final int lastDocRequested = last; int nDocsReturned; int totalHits; float maxScore; int[] ids; float[] scores; DocSet set; //extra vars boolean considerMoreDocs = cmd.getConsiderMoreDocs() ; int considerHowMany = cmd.getConsiderHowMany() ; boolean needScores = (cmd.getFlags() & GET_SCORES) != 0; int maxDoc = maxDoc(); int smallSetSize = maxDoc>>6; Query query = QueryUtils.makeQueryable(cmd.getQuery()); final long timeAllowed = cmd.getTimeAllowed(); final Filter luceneFilter = filter==null ? null : filter.getTopFilter(); // handle zero case... if (lastDocRequested<=0) { final float[] topscore = new float[] { Float.NEGATIVE_INFINITY }; Collector collector; DocSetCollector setCollector; if (!needScores) { collector = setCollector = new DocSetCollector(smallSetSize, maxDoc); } else { collector = setCollector = new DocSetDelegateCollector(smallSetSize, maxDoc, new Collector() { Scorer scorer; public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } public void collect(int doc) throws IOException { float score = scorer.score(); if (score > topscore[0]) topscore[0]=score; } public void setNextReader(IndexReader reader, int docBase) throws IOException { } }); } if( timeAllowed > 0 ) { collector = new TimeLimitingCollector(collector, timeAllowed); } try { super.search(query, luceneFilter, collector); } catch( TimeLimitingCollector.TimeExceededException x ) { log.warn( "Query: " + query + "; " + x.getMessage() ); qr.setPartialResults(true); } set = setCollector.getDocSet(); nDocsReturned = 0; ids = new int[nDocsReturned]; scores = new float[nDocsReturned]; totalHits = set.size(); maxScore = totalHits>0 ? topscore[0] : 0.0f; } else { TopDocsCollector topCollector; //This is how it was: /*****************/ /* if (cmd.getSort() == null) { topCollector = TopScoreDocCollector.create(len, true); } else { topCollector = TopFieldCollector.create(cmd.getSort(), len, false, needScores, needScores, true); } **/ if (cmd.getSort() == null) { if(len < considerHowMany && considerMoreDocs){ topCollector = TopScoreDocCollector.create(considerHowMany, true); }else{ topCollector = TopScoreDocCollector.create(len, true); } } else { if(len < considerHowMany && considerMoreDocs){ topCollector = TopFieldCollector.create(cmd.getSort(), considerHowMany, false, needScores, needScores, true); }else{ topCollector = TopFieldCollector.create(cmd.getSort(), len, false, needScores, needScores, true); } } /******************/ DocSetCollector setCollector = new DocSetDelegateCollector(maxDoc>>6, maxDoc, topCollector); Collector collector = setCollector; if( timeAllowed > 0 ) { collector = new TimeLimitingCollector(collector, timeAllowed ); } try { super.search(query, luceneFilter, collector); } catch( TimeLimitingCollector.TimeExceededException x ) { log.warn( "Query: " + query + "; " + x.getMessage() ); qr.setPartialResults(true); } set = setCollector.getDocSet(); totalHits = topCollector.getTotalHits(); assert(totalHits == set.size()); int collapseLen ; boolean not_enough = false ; //This is how it was... /****************/ /* TopDocs topDocs = topCollector.topDocs(0, len); */ if(len < considerHowMany && considerMoreDocs) { collapseLen = considerHowMany ; not_enough = true ; }else { collapseLen = len ; } TopDocs topDocs = topCollector.topDocs(0, collapseLen); /****************/ maxScore = totalHits>0 ? topDocs.getMaxScore() : 0.0f; nDocsReturned = topDocs.scoreDocs.length; ids = new int[nDocsReturned]; scores = (cmd.getFlags()&GET_SCORES)!=0 ? new float[nDocsReturned] : null; //This is how it was: /*****************/ /* for (int i=0; i<nDocsReturned; i++) { ScoreDoc scoreDoc = topDocs.scoreDocs[i]; ids[i] = scoreDoc.doc; if (scores != null) scores[i] = scoreDoc.score; } */ if(cmd.getSort() == null && considerMoreDocs) //there's no sort specified so I apply the algorithm { //collapse final String[] values = FieldCache.DEFAULT.getStrings(this.getReader(), "id_source"); String collapseValue = null ; int collapseId = -1; int up = 0; int down = 0; if(nDocsReturned < considerHowMany) { down = nDocsReturned-1 ; }else{ down=considerHowMany-1; } if(nDocsReturned < considerHowMany) { for (int i=0; i<nDocsReturned; i++) { int currentId = topDocs.scoreDocs[i].doc ; String currentValue = values[currentId] ; if (collapseValue == null) { //the first doc is always good collapseId = currentId; collapseValue = currentValue; ScoreDoc scoreDoc = topDocs.scoreDocs[i]; ids[up] = scoreDoc.doc; if (scores != null) scores[up] = scoreDoc.score; up++; }else { if (!collapseValue.equals(currentValue)) { //the good docs ScoreDoc scoreDoc = topDocs.scoreDocs[i]; ids[up] = scoreDoc.doc; up++ ; if (scores != null) scores[up] = scoreDoc.score; collapseId = currentId; collapseValue = currentValue; }else { //the bad docs ScoreDoc scoreDoc = topDocs.scoreDocs[i]; ids[down] = scoreDoc.doc; down-- ; if (scores != null) scores[down] = scoreDoc.score; } } } }else{ for (int i=0; i<nDocsReturned; i++) { if(i < considerHowMany) { int currentId = topDocs.scoreDocs[i].doc ; String currentValue = values[currentId] ; if (collapseValue == null) { //the first doc collapseId = currentId; collapseValue = currentValue; ScoreDoc scoreDoc = topDocs.scoreDocs[i]; ids[up] = scoreDoc.doc; if (scores != null) scores[up] = scoreDoc.score; up++; }else { if (!collapseValue.equals(currentValue)) { //the good docs ScoreDoc scoreDoc = topDocs.scoreDocs[i]; ids[up] = scoreDoc.doc; up++ ; if (scores != null) scores[up] = scoreDoc.score; collapseId = currentId; collapseValue = currentValue; }else { //the bad docs ScoreDoc scoreDoc = topDocs.scoreDocs[i]; ids[down] = scoreDoc.doc; down-- ; if (scores != null) scores[down] = scoreDoc.score; } } }else{ //after considerHowMany stop applying the algorithm ScoreDoc scoreDoc = topDocs.scoreDocs[i]; ids[i] = scoreDoc.doc; if (scores != null) scores[i] = scoreDoc.score; } } } }else //there is a sorter, don't wnat to apply new algorithm //or the requests specifically ask not for it { for (int i=0; i<nDocsReturned; i++) { ScoreDoc scoreDoc = topDocs.scoreDocs[i]; ids[i] = scoreDoc.doc; if (scores != null) scores[i] = scoreDoc.score; } } /********************/ } int sliceLen = Math.min(lastDocRequested,nDocsReturned); if (sliceLen < 0) sliceLen=0; qr.setDocList(new DocSlice(0,sliceLen,ids,scores,totalHits,maxScore)); // TODO: if we collect results before the filter, we just need to intersect with // that filter to generate the DocSet for qr.setDocSet() qr.setDocSet(set); // TODO: currently we don't generate the DocSet for the base query, // but the QueryDocSet == CompleteDocSet if filter==null. return filter==null ? qr.getDocSet() : null; } -- View this message in context: http://www.nabble.com/Custom-funcionality-in-SolrIndexSearcher-tp24475706p24475706.html Sent from the Solr - User mailing list archive at Nabble.com.