https://www.mediawiki.org/wiki/Special:Code/MediaWiki/109911
Revision: 109911 Author: oren Date: 2012-01-24 10:11:26 +0000 (Tue, 24 Jan 2012) Log Message: ----------- replaced *TOKENIZED --> ANALYZED *UN_TOKENIZED --> NOT_ANALYZED *NO_NORMS --> NOT_ANALYZED_NO_NORMS which are clearer and became deprecated in version 3.0 Modified Paths: -------------- trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/index/WikiIndexModifier.java trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/prefix/PrefixIndexBuilder.java trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/ranks/Links.java trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/spell/CleanIndexWriter.java trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/spell/api/NgramIndexer.java trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/spell/api/SpellCheckIndexer.java trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/spell/api/TitleNgramIndexer.java trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/storage/RelatedStorage.java Modified: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/index/WikiIndexModifier.java =================================================================== --- trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/index/WikiIndexModifier.java 2012-01-24 09:59:10 UTC (rev 109910) +++ trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/index/WikiIndexModifier.java 2012-01-24 10:11:26 UTC (rev 109911) @@ -682,10 +682,10 @@ NamespaceFilter contentNamespaces = iid.getContentNamespaces(); // page_id from database, used to look up and replace entries on index updates - doc.add(new Field("key", article.getIndexKey(), Field.Store.YES, Field.Index.UN_TOKENIZED)); + doc.add(new Field("key", article.getIndexKey(), Field.Store.YES, Field.Index.NOT_ANALYZED)); // namespace, returned with results - doc.add(new Field("namespace", article.getNamespace(), Field.Store.YES, Field.Index.UN_TOKENIZED)); + doc.add(new Field("namespace", article.getNamespace(), Field.Store.YES, Field.Index.NOT_ANALYZED)); // raw rank value doc.add(new Field("rank",Integer.toString(article.getRank()), @@ -694,7 +694,7 @@ // redirect namespace if(article.isRedirect()){ doc.add(new Field("redirect_namespace",Integer.toString(article.getRedirectTargetNamespace()), - Field.Store.NO, Field.Index.UN_TOKENIZED)); + Field.Store.NO, Field.Index.NOT_ANALYZED)); } if(contentNamespaces.contains(article.getNamespace())){ @@ -710,7 +710,7 @@ float rankBoost = transformRank(article.getRank()); // prefix title for prefix: searches - Field prefix = new Field("prefix", article.getNsTitleKey().toLowerCase(), Field.Store.NO, Field.Index.UN_TOKENIZED); + Field prefix = new Field("prefix", article.getNsTitleKey().toLowerCase(), Field.Store.NO, Field.Index.NOT_ANALYZED); prefix.setBoost(rankBoost); doc.add(prefix); @@ -737,7 +737,7 @@ tokenizer.tokenize(); // title - Field title = new Field(fields.title(), article.getTitle(), Field.Store.YES, Field.Index.TOKENIZED); + Field title = new Field(fields.title(), article.getTitle(), Field.Store.YES, Field.Index.ANALYZED); title.setBoost(rankBoost); doc.add(title); @@ -766,7 +766,7 @@ } // reverse title for wildcard searches - Field rtitle = new Field(fields.reverse_title(), StringUtils.reverseString(article.getTitle()), Field.Store.NO, Field.Index.TOKENIZED); + Field rtitle = new Field(fields.reverse_title(), StringUtils.reverseString(article.getTitle()), Field.Store.NO, Field.Index.ANALYZED); rtitle.setBoost(rankBoost); doc.add(rtitle); @@ -775,7 +775,7 @@ while (e.hasMoreElements()) { String key = (String)e.nextElement(); String value = article.DiscussionThreadingInfo.get(key); - doc.add( new Field( key, value, Store.YES, Index.UN_TOKENIZED) ); + doc.add( new Field( key, value, Store.YES, Index.NOT_ANALYZED) ); } // extra info (for spellcheck indexes) @@ -819,8 +819,8 @@ SimpleDateFormat isoDate = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); isoDate.setTimeZone(TimeZone.getTimeZone("GMT")); Document doc = new Document(); - doc.add(new Field("pageid",article.getPageIdStr(),Store.NO,Index.UN_TOKENIZED)); - doc.add(new Field("key",key,Store.NO,Index.UN_TOKENIZED)); + doc.add(new Field("pageid",article.getPageIdStr(),Store.NO,Index.NOT_ANALYZED)); + doc.add(new Field("key",key,Store.NO,Index.NOT_ANALYZED)); for(FieldBuilder.BuilderSet bs : builder.getBuilders()){ FieldNameFactory fields = bs.getFields(); FilterFactory filters = bs.getFilters(); @@ -845,15 +845,15 @@ float rankBoost = transformRank(article.getRank()); Document doc = new Document(); log.debug("Adding interwiki title pageid="+suffix+":"+article.getPageIdStr()+", key="+suffix+":"+key); - doc.add(new Field("pageid",suffix+":"+article.getPageIdStr(),Store.NO,Index.UN_TOKENIZED)); - doc.add(new Field("key",suffix+":"+key,Store.NO,Index.UN_TOKENIZED)); - doc.add(new Field("suffix",suffix,Store.YES,Index.UN_TOKENIZED)); - doc.add(new Field("dbname",dbname,Store.NO,Index.UN_TOKENIZED)); - doc.add(new Field("namespace",article.getNamespace(),Store.YES,Index.UN_TOKENIZED)); + doc.add(new Field("pageid",suffix+":"+article.getPageIdStr(),Store.NO,Index.NOT_ANALYZED)); + doc.add(new Field("key",suffix+":"+key,Store.NO,Index.NOT_ANALYZED)); + doc.add(new Field("suffix",suffix,Store.YES,Index.NOT_ANALYZED)); + doc.add(new Field("dbname",dbname,Store.NO,Index.NOT_ANALYZED)); + doc.add(new Field("namespace",article.getNamespace(),Store.YES,Index.NOT_ANALYZED)); // redirect namespace if(article.isRedirect()){ doc.add(new Field("redirect_namespace",Integer.toString(article.getRedirectTargetNamespace()), - Field.Store.NO, Field.Index.UN_TOKENIZED)); + Field.Store.NO, Field.Index.NOT_ANALYZED)); } Field title = new Field("title",article.getTitle(),Store.YES, Index.NO); title.setBoost(rankBoost); Modified: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/prefix/PrefixIndexBuilder.java =================================================================== --- trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/prefix/PrefixIndexBuilder.java 2012-01-24 09:59:10 UTC (rev 109910) +++ trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/prefix/PrefixIndexBuilder.java 2012-01-24 10:11:26 UTC (rev 109911) @@ -245,7 +245,7 @@ } } Document d = new Document(); - d.add(new Field("prefix",prefix,Field.Store.NO,Field.Index.NO_NORMS)); + d.add(new Field("prefix",prefix,Field.Store.NO,Field.Index.NOT_ANALYZED_NO_NORMS)); d.add(new Field("articles",new StringList(selected).toString(),Field.Store.YES,Field.Index.NO)); setOmitNorms(d); writer.addDocument(d); @@ -268,7 +268,7 @@ d.add(new Field("article",serialize(key,ref,redirect),Field.Store.YES,Field.Index.NO)); ArrayList<Token> canonized = canonize(key,iid,filters); for(Token t : canonized){ - d.add(new Field("key",t.termText(),Field.Store.NO,Field.Index.TOKENIZED)); + d.add(new Field("key",t.termText(),Field.Store.NO,Field.Index.ANALYZED)); } setOmitNorms(d); writer.addDocument(d); @@ -387,11 +387,11 @@ return; // ignore redirects like byzantine -> byzantine empire // add to index Document d = new Document(); - d.add(new Field("pageid",pageId,Field.Store.NO,Field.Index.UN_TOKENIZED)); - d.add(new Field("key",key,Field.Store.YES,Field.Index.UN_TOKENIZED)); + d.add(new Field("pageid",pageId,Field.Store.NO,Field.Index.NOT_ANALYZED)); + d.add(new Field("key",key,Field.Store.YES,Field.Index.NOT_ANALYZED)); ArrayList<Token> canonized = canonize(key,iid,filters); for(Token t : canonized){ - d.add(new Field("key",t.termText(),Field.Store.NO,Field.Index.TOKENIZED)); + d.add(new Field("key",t.termText(),Field.Store.NO,Field.Index.ANALYZED)); } if(redirect!=null && !redirect.equals("")){ // redirect target and its rank d.add(new Field("redirect",redirect,Field.Store.YES,Field.Index.NO)); Modified: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/ranks/Links.java =================================================================== --- trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/ranks/Links.java 2012-01-24 09:59:10 UTC (rev 109910) +++ trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/ranks/Links.java 2012-01-24 10:11:26 UTC (rev 109911) @@ -361,16 +361,16 @@ StringList ak = new StringList(anchors); Analyzer an = new SplitAnalyzer(1,false); Document doc = new Document(); - doc.add(new Field("article_pageid",pageId,Field.Store.YES,Field.Index.UN_TOKENIZED)); + doc.add(new Field("article_pageid",pageId,Field.Store.YES,Field.Index.NOT_ANALYZED)); // ns:title - doc.add(new Field("article_key",t.getKey(),Field.Store.YES,Field.Index.UN_TOKENIZED)); + doc.add(new Field("article_key",t.getKey(),Field.Store.YES,Field.Index.NOT_ANALYZED)); if(redirectsTo != null) // redirect_ns:title|target_ns:title - doc.add(new Field("redirect",redirectsTo+"|"+t.getKey(),Field.Store.YES,Field.Index.UN_TOKENIZED)); + doc.add(new Field("redirect",redirectsTo+"|"+t.getKey(),Field.Store.YES,Field.Index.NOT_ANALYZED)); else{ // a list of all links/anchors - doc.add(new Field("links",lk.toString(),Field.Store.NO,Field.Index.TOKENIZED)); - doc.add(new Field("anchors",ak.toString(),Field.Store.NO,Field.Index.TOKENIZED)); + doc.add(new Field("links",lk.toString(),Field.Store.NO,Field.Index.ANALYZED)); + doc.add(new Field("anchors",ak.toString(),Field.Store.NO,Field.Index.ANALYZED)); } writer.addDocument(doc,an); Modified: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/spell/CleanIndexWriter.java =================================================================== --- trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/spell/CleanIndexWriter.java 2012-01-24 09:59:10 UTC (rev 109910) +++ trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/spell/CleanIndexWriter.java 2012-01-24 10:11:26 UTC (rev 109911) @@ -158,9 +158,9 @@ /** Add title/redirect with ranks information only */ protected void addTitleOnly(Article article) { Document doc = new Document(); - doc.add(new Field("key",article.getIndexKey(),Store.NO,Index.UN_TOKENIZED)); - doc.add(new Field("ns_title",article.getTitle(),Store.YES,Index.TOKENIZED)); - doc.add(new Field("ns_namespace",article.getNamespace(),Store.YES,Index.UN_TOKENIZED)); + doc.add(new Field("key",article.getIndexKey(),Store.NO,Index.NOT_ANALYZED)); + doc.add(new Field("ns_title",article.getTitle(),Store.YES,Index.ANALYZED)); + doc.add(new Field("ns_namespace",article.getNamespace(),Store.YES,Index.NOT_ANALYZED)); doc.add(new Field("ns_rank",Integer.toString(article.getReferences()),Store.YES,Index.NO)); if(article.isRedirect()) doc.add(new Field("ns_redirect",article.getRedirectTarget(),Store.YES,Index.NO)); @@ -202,7 +202,7 @@ sb.append(val); } Document doc = new Document(); - doc.add(new Field("metadata_key",key, Field.Store.YES, Field.Index.UN_TOKENIZED)); + doc.add(new Field("metadata_key",key, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("metadata_value",sb.toString(), Field.Store.YES, Field.Index.NO)); try { Modified: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/spell/api/NgramIndexer.java =================================================================== --- trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/spell/api/NgramIndexer.java 2012-01-24 09:59:10 UTC (rev 109910) +++ trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/spell/api/NgramIndexer.java 2012-01-24 10:11:26 UTC (rev 109911) @@ -246,8 +246,8 @@ for(int j=0 ; j<ngrams.length ; j++){ String ngram = ngrams[j]; if(j==0) - doc.add(new Field(startField+i, ngram, Field.Store.NO, Field.Index.UN_TOKENIZED)); - doc.add(new Field(field, ngram, Field.Store.NO, Field.Index.UN_TOKENIZED)); + doc.add(new Field(startField+i, ngram, Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(new Field(field, ngram, Field.Store.NO, Field.Index.NOT_ANALYZED)); } } } Modified: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/spell/api/SpellCheckIndexer.java =================================================================== --- trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/spell/api/SpellCheckIndexer.java 2012-01-24 09:59:10 UTC (rev 109910) +++ trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/spell/api/SpellCheckIndexer.java 2012-01-24 10:11:26 UTC (rev 109911) @@ -299,9 +299,9 @@ String normalized = FastWikiTokenizerEngine.normalize(title.toLowerCase()); String decomposed = FastWikiTokenizerEngine.decompose(normalized); // doc.add(new Field("title", ns+":"+title, Field.Store.YES, Field.Index.NO)); - doc.add(new Field("title", normalized, Field.Store.YES, Field.Index.UN_TOKENIZED)); + doc.add(new Field("title", normalized, Field.Store.YES, Field.Index.NOT_ANALYZED)); if(decomposed != normalized) - doc.add(new Field("title", decomposed, Field.Store.NO, Field.Index.UN_TOKENIZED)); + doc.add(new Field("title", decomposed, Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.add(new Field("rank", rank, Field.Store.YES, Field.Index.NO)); if(redirect!=null){ String redirectNormalized = FastWikiTokenizerEngine.normalize(redirect.substring(redirect.indexOf(':')+1).toLowerCase()); @@ -320,10 +320,10 @@ String normalized = FastWikiTokenizerEngine.normalize(title.toLowerCase()); String decomposed = FastWikiTokenizerEngine.decompose(normalized); //doc.add(new Field("ns_title", ns+":"+title, Field.Store.YES, Field.Index.NO)); - doc.add(new Field("ns_title", ns+":"+normalized, Field.Store.YES, Field.Index.UN_TOKENIZED)); + doc.add(new Field("ns_title", ns+":"+normalized, Field.Store.YES, Field.Index.NOT_ANALYZED)); if(decomposed != normalized) - doc.add(new Field("ns_title", ns+":"+decomposed, Field.Store.NO, Field.Index.UN_TOKENIZED)); - doc.add(new Field("ns_namespace", ns, Field.Store.YES, Field.Index.UN_TOKENIZED)); + doc.add(new Field("ns_title", ns+":"+decomposed, Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(new Field("ns_namespace", ns, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("ns_rank", rank, Field.Store.YES, Field.Index.NO)); if(redirect!=null && redirect.substring(0,redirect.indexOf(':')).equals(ns)){ String redirectNormalized = FastWikiTokenizerEngine.normalize(redirect.substring(redirect.indexOf(':')+1).toLowerCase()); @@ -374,13 +374,13 @@ HashMap<String,SimpleInt> freq = getFrequencies(phrase,ir); Document doc = new Document(); - doc.add(new Field("ns_phrase", phrase, Field.Store.YES, Field.Index.UN_TOKENIZED)); + doc.add(new Field("ns_phrase", phrase, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("ns_namespace", new StringTokenStream(freq.keySet()))); for(Entry<String,SimpleInt> e : freq.entrySet()){ doc.add(new Field("ns_freq_"+e.getKey(), Integer.toString(e.getValue().count), Field.Store.YES, Field.Index.NO)); } if(inTitle){ - doc.add(new Field("ns_intitle","1", Field.Store.YES, Field.Index.UN_TOKENIZED)); + doc.add(new Field("ns_intitle","1", Field.Store.YES, Field.Index.NOT_ANALYZED)); } setOmitNorms(doc); ngramWriter.addDocument(doc); @@ -397,9 +397,9 @@ Document doc = new Document(); String decomposed = FastWikiTokenizerEngine.decompose(word); ngramWriter.createNgramFields(doc,"ns_word",decomposed,NgramIndexer.Type.WORDS); - doc.add(new Field("ns_word",word, Field.Store.YES, Field.Index.UN_TOKENIZED)); + doc.add(new Field("ns_word",word, Field.Store.YES, Field.Index.NOT_ANALYZED)); if(decomposed != word) - doc.add(new Field("ns_word",decomposed, Field.Store.NO, Field.Index.UN_TOKENIZED)); + doc.add(new Field("ns_word",decomposed, Field.Store.NO, Field.Index.NOT_ANALYZED)); for(Entry<String,SimpleInt> e : freq.entrySet()) doc.add(new Field("ns_freq_"+e.getKey(), Integer.toString(e.getValue().count), Field.Store.YES, Field.Index.NO)); doc.add(new Field("ns_freq",Integer.toString(freqSum),Field.Store.YES, Field.Index.NO)); @@ -424,10 +424,10 @@ } Document doc = new Document(); //ngramWriter.createNgramFields(doc,"phrase",phrase); - doc.add(new Field("phrase",phrase, Field.Store.YES, Field.Index.UN_TOKENIZED)); + doc.add(new Field("phrase",phrase, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("freq",Integer.toString(freq), Field.Store.YES, Field.Index.NO)); if(inTitle){ - doc.add(new Field("intitle","1", Field.Store.YES, Field.Index.UN_TOKENIZED)); + doc.add(new Field("intitle","1", Field.Store.YES, Field.Index.NOT_ANALYZED)); } if(corrected != null){ doc.add(new Field("misspell",corrected, Field.Store.YES, Field.Index.NO)); @@ -451,7 +451,7 @@ sb.append(val); } Document doc = new Document(); - doc.add(new Field("metadata_key",key, Field.Store.YES, Field.Index.UN_TOKENIZED)); + doc.add(new Field("metadata_key",key, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("metadata_value",sb.toString(), Field.Store.YES, Field.Index.NO)); setOmitNorms(doc); @@ -470,9 +470,9 @@ Document doc = new Document(); String decomposed = FastWikiTokenizerEngine.decompose(word); ngramWriter.createNgramFields(doc,"word",decomposed,NgramIndexer.Type.WORDS); - doc.add(new Field("word",word, Field.Store.YES, Field.Index.UN_TOKENIZED)); + doc.add(new Field("word",word, Field.Store.YES, Field.Index.NOT_ANALYZED)); if(decomposed != word) - doc.add(new Field("word",decomposed, Field.Store.NO, Field.Index.UN_TOKENIZED)); + doc.add(new Field("word",decomposed, Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.add(new Field("freq",Integer.toString(freq), Field.Store.YES, Field.Index.NO)); doc.add(new Field("meta1",dmeta.doubleMetaphone(decomposed), Field.Store.YES, Field.Index.NO)); doc.add(new Field("meta2",dmeta.doubleMetaphone(decomposed,true), Field.Store.YES, Field.Index.NO)); @@ -485,7 +485,7 @@ if(context == null) return; Document doc = new Document(); - doc.add(new Field("context_key",key, Field.Store.NO, Field.Index.UN_TOKENIZED)); + doc.add(new Field("context_key",key, Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.add(new Field("context", context, Field.Store.YES, Field.Index.NO)); setOmitNorms(doc); ngramWriter.addDocument(doc); Modified: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/spell/api/TitleNgramIndexer.java =================================================================== --- trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/spell/api/TitleNgramIndexer.java 2012-01-24 09:59:10 UTC (rev 109910) +++ trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/spell/api/TitleNgramIndexer.java 2012-01-24 10:11:26 UTC (rev 109911) @@ -97,9 +97,9 @@ Document doc = new Document(); // pageId is primary key - doc.add(new Field("pageid", pageId, Field.Store.NO, Field.Index.UN_TOKENIZED)); + doc.add(new Field("pageid", pageId, Field.Store.NO, Field.Index.NOT_ANALYZED)); if(!ns.equals("0")) - doc.add(new Field("namespace", ns, Field.Store.NO, Field.Index.UN_TOKENIZED)); + doc.add(new Field("namespace", ns, Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.add(new Field("key", ns+":"+title, Field.Store.YES, Field.Index.NO)); doc.add(new Field(field, decomposed, Field.Store.YES, Field.Index.NO)); if(redirectTo != null) Modified: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/storage/RelatedStorage.java =================================================================== --- trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/storage/RelatedStorage.java 2012-01-24 09:59:10 UTC (rev 109910) +++ trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/storage/RelatedStorage.java 2012-01-24 10:11:26 UTC (rev 109911) @@ -34,7 +34,7 @@ ensureWrite(); StringList sl = new StringList(CompactRelated.convertToStringList(rel)); Document doc = new Document(); - doc.add(new Field("key",key,Field.Store.YES,Field.Index.UN_TOKENIZED)); + doc.add(new Field("key",key,Field.Store.YES,Field.Index.NOT_ANALYZED)); doc.add(new Field("related",sl.toString(),Field.Store.COMPRESS,Field.Index.NO)); writer.addDocument(doc); } @@ -43,7 +43,7 @@ ensureWrite(); StringList sl = new StringList(Related.convertToStringList(rel)); Document doc = new Document(); - doc.add(new Field("key",key,Field.Store.YES,Field.Index.UN_TOKENIZED)); + doc.add(new Field("key",key,Field.Store.YES,Field.Index.NOT_ANALYZED)); doc.add(new Field("related",sl.toString(),Field.Store.COMPRESS,Field.Index.NO)); writer.addDocument(doc); } _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs