https://www.mediawiki.org/wiki/Special:Code/MediaWiki/109821
Revision: 109821 Author: oren Date: 2012-01-23 14:17:00 +0000 (Mon, 23 Jan 2012) Log Message: ----------- upgrade to lucene 2.9.4 * api does not support tokenStream(string,string) only tokenStream(string,reader) so wrapped second string with a StringReader * removed some redundant methods * other api adjustments Modified Paths: -------------- trunk/lucene-search-3/src/main/java/org/apache/lucene/search/ArticleQueryWrap.java trunk/lucene-search-3/src/main/java/org/apache/lucene/search/CustomBoostQuery.java trunk/lucene-search-3/src/main/java/org/apache/lucene/search/PositionalMultiQuery.java trunk/lucene-search-3/src/main/java/org/apache/lucene/search/PositionalQuery.java trunk/lucene-search-3/src/main/java/org/apache/lucene/search/RelevanceQuery.java trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/Aggregate.java trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/KeywordsAnalyzer.java trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/LowercaseAnalyzer.java trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/ReusableLanguageAnalyzer.java trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/SplitAnalyzer.java trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/WikiQueryParser.java trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/frontend/HttpMonitor.java trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/index/WikiIndexModifier.java trunk/lucene-search-3/src/test/java/org/wikimedia/lsearch/analyzers/AnalysisTest.java trunk/lucene-search-3/src/test/java/org/wikimedia/lsearch/analyzers/ExtTokenTest.java trunk/lucene-search-3/src/test/java/org/wikimedia/lsearch/index/WikiIndexModifierTest.java Modified: trunk/lucene-search-3/src/main/java/org/apache/lucene/search/ArticleQueryWrap.java =================================================================== --- trunk/lucene-search-3/src/main/java/org/apache/lucene/search/ArticleQueryWrap.java 2012-01-23 12:53:38 UTC (rev 109820) +++ trunk/lucene-search-3/src/main/java/org/apache/lucene/search/ArticleQueryWrap.java 2012-01-23 14:17:00 UTC (rev 109821) @@ -65,7 +65,7 @@ return exp; } - protected Weight createWeight(Searcher searcher) throws IOException { + public Weight createWeight(Searcher searcher) throws IOException { return new ArticleQueryWeight(searcher); } Modified: trunk/lucene-search-3/src/main/java/org/apache/lucene/search/CustomBoostQuery.java =================================================================== --- trunk/lucene-search-3/src/main/java/org/apache/lucene/search/CustomBoostQuery.java 2012-01-23 12:53:38 UTC (rev 109820) +++ trunk/lucene-search-3/src/main/java/org/apache/lucene/search/CustomBoostQuery.java 2012-01-23 14:17:00 UTC (rev 109821) @@ -313,7 +313,7 @@ } /*(non-Javadoc) @see org.apache.lucene.search.Query#createWeight(org.apache.lucene.search.Searcher) */ - protected Weight createWeight(Searcher searcher) throws IOException { + public Weight createWeight(Searcher searcher) throws IOException { return new CustomWeight(searcher); } Modified: trunk/lucene-search-3/src/main/java/org/apache/lucene/search/PositionalMultiQuery.java =================================================================== --- trunk/lucene-search-3/src/main/java/org/apache/lucene/search/PositionalMultiQuery.java 2012-01-23 12:53:38 UTC (rev 109820) +++ trunk/lucene-search-3/src/main/java/org/apache/lucene/search/PositionalMultiQuery.java 2012-01-23 14:17:00 UTC (rev 109821) @@ -90,7 +90,7 @@ return "(P "+buffer.toString()+")"; } - protected Weight createWeight(Searcher searcher) throws IOException { + public Weight createWeight(Searcher searcher) throws IOException { return new PositionalMultiWeight(searcher); } Modified: trunk/lucene-search-3/src/main/java/org/apache/lucene/search/PositionalQuery.java =================================================================== --- trunk/lucene-search-3/src/main/java/org/apache/lucene/search/PositionalQuery.java 2012-01-23 12:53:38 UTC (rev 109820) +++ trunk/lucene-search-3/src/main/java/org/apache/lucene/search/PositionalQuery.java 2012-01-23 14:17:00 UTC (rev 109821) @@ -9,6 +9,8 @@ import org.apache.lucene.util.ToStringUtils; /** + * based on lucene's PhraseQuery + * * Phrase query with * 1) extra boost for different position ranges within the document * 2) ability to use aggregate positional information if available @@ -128,7 +130,7 @@ - protected Weight createWeight(Searcher searcher) throws IOException { + public Weight createWeight(Searcher searcher) throws IOException { return new PositionalWeight(searcher); } @@ -321,5 +323,9 @@ return true; } + /** Returns the set of terms in this phrase. */ + public Term[] getTerms() { + return (Term[])terms.toArray(new Term[0]); + } } Modified: trunk/lucene-search-3/src/main/java/org/apache/lucene/search/RelevanceQuery.java =================================================================== --- trunk/lucene-search-3/src/main/java/org/apache/lucene/search/RelevanceQuery.java 2012-01-23 12:53:38 UTC (rev 109820) +++ trunk/lucene-search-3/src/main/java/org/apache/lucene/search/RelevanceQuery.java 2012-01-23 14:17:00 UTC (rev 109821) @@ -273,7 +273,7 @@ } /*(non-Javadoc) @see org.apache.lucene.search.Query#createWeight(org.apache.lucene.search.Searcher) */ - protected Weight createWeight(Searcher searcher) throws IOException { + public Weight createWeight(Searcher searcher) throws IOException { return new RelevanceWeight(searcher); } Modified: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/Aggregate.java =================================================================== --- trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/Aggregate.java 2012-01-23 12:53:38 UTC (rev 109820) +++ trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/Aggregate.java 2012-01-23 14:17:00 UTC (rev 109821) @@ -1,6 +1,7 @@ package org.wikimedia.lsearch.analyzers; import java.io.IOException; +import java.io.StringReader; import java.util.ArrayList; import java.util.HashSet; @@ -28,7 +29,7 @@ * @throws IOException */ public Aggregate(String text, float boost, IndexId iid, Analyzer analyzer, String field, HashSet<String> stopWords, Flags flags) throws IOException{ - setTokens(toTokenArray(analyzer.tokenStream(field,text)),stopWords); + setTokens(toTokenArray(analyzer.tokenStream(field,new StringReader(text))),stopWords); this.boost = boost; this.flags = flags; @@ -60,7 +61,7 @@ * @throws IOException */ public Aggregate(String text, float boost, IndexId iid, Analyzer analyzer, String field, Flags flags) throws IOException{ - this.tokens = toTokenArray(analyzer.tokenStream(field,text)); + this.tokens = toTokenArray(analyzer.tokenStream(field,new StringReader(text))); this.boost = boost; this.noStopWordsLength = noAliasLength(); this.flags = flags; Modified: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/KeywordsAnalyzer.java =================================================================== --- trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/KeywordsAnalyzer.java 2012-01-23 12:53:38 UTC (rev 109820) +++ trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/KeywordsAnalyzer.java 2012-01-23 14:17:00 UTC (rev 109821) @@ -2,6 +2,7 @@ import java.io.IOException; import java.io.Reader; +import java.io.StringReader; import java.util.ArrayList; import java.util.HashSet; @@ -92,11 +93,8 @@ return null; } } - @Override - public TokenStream tokenStream(String fieldName, String text) { - return tokenStream(fieldName,(Reader)null); - } + class KeywordsTokenStream extends TokenStream { protected Analyzer analyzer; protected ArrayList<String> keywords; @@ -145,7 +143,7 @@ do{ // next keyword title keyword = keywords.get(index++); - tokens = analyzer.tokenStream("",keyword); + tokens = analyzer.tokenStream("",new StringReader(keyword)); // try to tokenize t = tokens.next(); if(t == null && index == keywords.size()) Modified: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/LowercaseAnalyzer.java =================================================================== --- trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/LowercaseAnalyzer.java 2012-01-23 12:53:38 UTC (rev 109820) +++ trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/LowercaseAnalyzer.java 2012-01-23 14:17:00 UTC (rev 109821) @@ -31,11 +31,8 @@ } + @Override - public TokenStream tokenStream(String fieldName, String text) { - return new LowercaseTokenizer(text); - } - @Override public TokenStream tokenStream(String fieldName, Reader reader) { throw new UnsupportedOperationException("Use tokenStream(String,String)"); } Modified: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/ReusableLanguageAnalyzer.java =================================================================== --- trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/ReusableLanguageAnalyzer.java 2012-01-23 12:53:38 UTC (rev 109820) +++ trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/ReusableLanguageAnalyzer.java 2012-01-23 14:17:00 UTC (rev 109821) @@ -26,16 +26,7 @@ } - /** - * Used in {@link WikiQueryParser} to parse parts of the query. - */ @Override - public TokenStream tokenStream(String fieldName, String text) { - wikitokenizer = new WikiTokenizer(text,filters.getIndexId(),options); - return super.tokenStream(fieldName,(Reader)null); - } - - @Override public TokenStream tokenStream(String fieldName, Reader reader) { Thread.dumpStack(); log.error("Invalid usage of QueryLanguageAnalyzer.tokenStream(String,Reader). Use tokenStream(String,String). Probably bug in the software. "); Modified: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/SplitAnalyzer.java =================================================================== --- trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/SplitAnalyzer.java 2012-01-23 12:53:38 UTC (rev 109820) +++ trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/SplitAnalyzer.java 2012-01-23 14:17:00 UTC (rev 109821) @@ -63,7 +63,6 @@ this.splitPhrases = splitPhrases; } - @Override public TokenStream tokenStream(String fieldName, String text) { return new SplitTokenStream(text); } Modified: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/WikiQueryParser.java =================================================================== --- trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/WikiQueryParser.java 2012-01-23 12:53:38 UTC (rev 109820) +++ trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/WikiQueryParser.java 2012-01-23 14:17:00 UTC (rev 109821) @@ -1,6 +1,7 @@ package org.wikimedia.lsearch.analyzers; import java.io.IOException; +import java.io.StringReader; import java.util.ArrayList; import java.util.BitSet; import java.util.Collection; @@ -769,8 +770,8 @@ String analysisField = defaultField; if (defaultField.equals("contents") && isInTitle) analysisField = "title"; - tokenStream = analyzer.tokenStream(analysisField, new String(buffer, 0, - length)); + tokenStream = analyzer.tokenStream(analysisField, new StringReader( + new String(buffer, 0,length))); Token token; tokens.clear(); @@ -788,7 +789,7 @@ * storage attributes) */ private ArrayList<Token> analyzeString(String input) { - tokenStream = analyzer.tokenStream("contents", input); + tokenStream = analyzer.tokenStream("contents", new StringReader(input)); ArrayList<Token> ret = new ArrayList<Token>(); Token token; Modified: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/frontend/HttpMonitor.java =================================================================== --- trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/frontend/HttpMonitor.java 2012-01-23 12:53:38 UTC (rev 109820) +++ trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/frontend/HttpMonitor.java 2012-01-23 14:17:00 UTC (rev 109821) @@ -68,7 +68,6 @@ Hashtable<HttpHandler,Long> times = (Hashtable<HttpHandler, Long>) startTimes.clone(); // clone to avoid sync ArrayList<Entry<HttpHandler, Long>> sorted = new ArrayList<Entry<HttpHandler,Long>>(times.entrySet()); Collections.sort(sorted, new Comparator<Entry<HttpHandler,Long>>() { - @Override public int compare(Entry<HttpHandler, Long> o1, Entry<HttpHandler, Long> o2) { return (int) (o2.getValue() - o1.getValue()); Modified: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/index/WikiIndexModifier.java =================================================================== --- trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/index/WikiIndexModifier.java 2012-01-23 12:53:38 UTC (rev 109820) +++ trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/index/WikiIndexModifier.java 2012-01-23 14:17:00 UTC (rev 109821) @@ -6,6 +6,7 @@ import java.io.File; import java.io.IOException; +import java.io.StringReader; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collection; @@ -743,7 +744,8 @@ Analyzer contentAnalyzer = new LanguageAnalyzer(filters,tokenizer); // contents - doc.add(new Field(fields.contents(),contentAnalyzer.tokenStream(fields.contents(),""))); + //TODO: try passing null + doc.add(new Field(fields.contents(),contentAnalyzer.tokenStream(fields.contents(),new StringReader("")))); /* if(contentNamespaces.contains(article.getNamespace())){ @@ -760,7 +762,7 @@ // category if(!bs.isExactCase()){ // each token is one category (category names themself are not tokenized) - doc.add(new Field("category", new CategoryAnalyzer(tokenizer.getCategories(),false).tokenStream("category",""))); + doc.add(new Field("category", new CategoryAnalyzer(tokenizer.getCategories(),false).tokenStream("category",new StringReader("")))); } // reverse title for wildcard searches @@ -826,7 +828,7 @@ Analyzer analyzer = Analyzers.getHighlightAnalyzer(filters,fields,exactCase); ReusableLanguageAnalyzer contentAnalyzer = Analyzers.getReusableHighlightAnalyzer(filters,exactCase); - doc.add(new Field(fields.hl_text(),ExtToken.serialize(contentAnalyzer.tokenStream(fields.contents(),article.getContents())),Store.COMPRESS)); + doc.add(new Field(fields.hl_text(),ExtToken.serialize(contentAnalyzer.tokenStream(fields.contents(),new StringReader(article.getContents()))),Store.COMPRESS)); ArrayList<String> sections = contentAnalyzer.getWikiTokenizer().getHeadingText(); doc.add(new Field(fields.hl_alttitle(),Alttitles.serializeAltTitle(article,iid,sections,analyzer,fields.alttitle()),Store.COMPRESS)); } @@ -879,7 +881,7 @@ sb.append(" "); } // get individual words - TokenStream ts = Analyzers.getIndexerAnalyzer(new FieldBuilder(iid,false)).tokenStream("title",sb.toString()); + TokenStream ts = Analyzers.getIndexerAnalyzer(new FieldBuilder(iid,false)).tokenStream("title",new StringReader(sb.toString())); Token t; HashSet<String> tokenized = new HashSet<String>(); while((t = ts.next()) != null){ @@ -970,7 +972,9 @@ protected static void makeAggregate(Document doc, String prefix, ArrayList<Aggregate> items){ if(items.size() == 0) return; // don't add aggregate fields if they are empty - doc.add(new Field(prefix,new AggregateAnalyzer(items).tokenStream(prefix,""))); + + //TODO: try passing null + doc.add(new Field(prefix,new AggregateAnalyzer(items).tokenStream(prefix,new StringReader("")))); doc.add(new Field(prefix+"_meta",Aggregate.serializeAggregate(items),Field.Store.YES)); } Modified: trunk/lucene-search-3/src/test/java/org/wikimedia/lsearch/analyzers/AnalysisTest.java =================================================================== --- trunk/lucene-search-3/src/test/java/org/wikimedia/lsearch/analyzers/AnalysisTest.java 2012-01-23 12:53:38 UTC (rev 109820) +++ trunk/lucene-search-3/src/test/java/org/wikimedia/lsearch/analyzers/AnalysisTest.java 2012-01-23 14:17:00 UTC (rev 109821) @@ -1,6 +1,7 @@ package org.wikimedia.lsearch.analyzers; import java.io.IOException; +import java.io.StringReader; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; @@ -89,7 +90,7 @@ } public static Token[] tokensFromAnalysis(Analyzer analyzer, String text, String field) throws IOException { - TokenStream stream = analyzer.tokenStream(field, text); + TokenStream stream = analyzer.tokenStream(field, new StringReader(text)); ArrayList<Token> tokenList = new ArrayList<Token>(); while (true) { Token token = stream.next(); @@ -301,7 +302,7 @@ for(int i = 0 ; i<total; i++ ){ for(TestArticle article : articles){ count++; - byte[] b = ExtToken.serialize(analyzer.tokenStream("",article.content)); + byte[] b = ExtToken.serialize(analyzer.tokenStream("",new StringReader(article.content))); if(i == 0) size += b.length; else Modified: trunk/lucene-search-3/src/test/java/org/wikimedia/lsearch/analyzers/ExtTokenTest.java =================================================================== --- trunk/lucene-search-3/src/test/java/org/wikimedia/lsearch/analyzers/ExtTokenTest.java 2012-01-23 12:53:38 UTC (rev 109820) +++ trunk/lucene-search-3/src/test/java/org/wikimedia/lsearch/analyzers/ExtTokenTest.java 2012-01-23 14:17:00 UTC (rev 109821) @@ -1,6 +1,7 @@ package org.wikimedia.lsearch.analyzers; import java.io.IOException; +import java.io.StringReader; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -20,7 +21,7 @@ Analyzer analyzer = Analyzers.getHighlightAnalyzer(IndexId.get("enwiki"),false); String text = "Some extremely [[simple]] example text. With two sentences, by Šostakovič."; - byte[] serialized = ExtToken.serialize(analyzer.tokenStream("",text)); + byte[] serialized = ExtToken.serialize(analyzer.tokenStream("",new StringReader(text))); HashMap<Integer,Position> posMap = new HashMap<Integer,Position>(); for(Position p : Position.values()) posMap.put(p.ordinal(),p); @@ -42,7 +43,7 @@ break; } } - byte[] serialized = ExtToken.serialize(analyzer.tokenStream("",article.content)); + byte[] serialized = ExtToken.serialize(analyzer.tokenStream("",new StringReader(article.content))); long start = System.currentTimeMillis(); HashMap<Integer,Position> posMap = new HashMap<Integer,Position>(); for(Position p : Position.values()) Modified: trunk/lucene-search-3/src/test/java/org/wikimedia/lsearch/index/WikiIndexModifierTest.java =================================================================== --- trunk/lucene-search-3/src/test/java/org/wikimedia/lsearch/index/WikiIndexModifierTest.java 2012-01-23 12:53:38 UTC (rev 109820) +++ trunk/lucene-search-3/src/test/java/org/wikimedia/lsearch/index/WikiIndexModifierTest.java 2012-01-23 14:17:00 UTC (rev 109821) @@ -1,6 +1,7 @@ package org.wikimedia.lsearch.index; import java.io.IOException; +import java.io.StringReader; import java.util.ArrayList; import java.util.Date; import java.util.HashSet; @@ -188,10 +189,10 @@ return t.getPositionIncrement() + " ["+t.termText()+"]"; } TokenStream ts = f.tokenStreamValue(); - if(ts == null && f.stringValue()!=null) - ts = analyzer.tokenStream(field, f.stringValue()); if(ts == null && f.readerValue()!=null) ts = analyzer.tokenStream(field, f.readerValue()); + if(ts == null && f.stringValue()!=null) + ts = analyzer.tokenStream(field, new StringReader(f.stringValue())); if(ts == null) fail("No token stream for field "+field); Token t = null; _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs