Hi, Thanks for your response., But, the source code is correct, following the file content.
Regards, /** * The contents of this file are subject to the license and copyright * detailed in the LICENSE and NOTICE files at the root of the source * tree and available online at * * http://www.dspace.org/license/ */ package org.dspace.search; import java.io.Reader; import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.ISOLatin1AccentFilter; import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.PorterStemFilter; import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.StopwordAnalyzerBase; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.util.Version; import org.dspace.core.ConfigurationManager; /** * Custom Lucene Analyzer that combines the standard filter, lowercase filter, * stemming and stopword filters. */ public class DSAnalyzer extends StopwordAnalyzerBase { protected final Version matchVersion; /* * An array containing some common words that are not usually useful for * searching. */ protected static final String[] STOP_WORDS = { // new stopwords (per MargretB) "a", "am", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "the", "to", "was" // old stopwords (Lucene default) /* * "a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", * "into", "is", "it", "no", "not", "of", "on", "or", "s", "such", "t", * "that", "the", "their","then", "there","these", "they", "this", "to", * "was", "will", "with" */ }; /* * Stop table */ protected final Set stopSet; /** * Builds an analyzer * @param matchVersion Lucene version to match */ public DSAnalyzer(Version matchVersion) { super(matchVersion, StopFilter.makeStopSet(matchVersion, STOP_WORDS)); this.stopSet = StopFilter.makeStopSet(matchVersion, STOP_WORDS); this.matchVersion = matchVersion; } @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { final Tokenizer source = new DSTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); result = new StandardFilter(result); result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopSet); result = new PorterStemFilter(result); result = new ISOLatin1AccentFilter(result); return new TokenStreamComponents(source, result); return result; } @Override public int getPositionIncrementGap(String fieldName) { // If it is the default field, or bounded fields is turned off in the config, return the default value if ("default".equalsIgnoreCase(fieldName) || !ConfigurationManager.getBooleanProperty("search.boundedfields", false)) { return super.getPositionIncrementGap(fieldName); } // Not the default field, and we want bounded fields, so return an large gap increment return 10; } } -----Message d'origine----- De : Brian Freels-Stendel [mailto:bfre...@unm.edu] Envoyé : 8 août 2012 14:41 À : DSpace-tech@lists.sourceforge.net; Hatem Jlassi Objet : Re: [Dspace-tech] Searching : Diacritics & Indexing Hi, I think the problem may lie in the first line. It should be import org.apache.lucene.analysis.ISOLatin1AccentFilter; and be included at the top of the file with the rest of the imports. The second line looks fine, and goes with the rest of the filter statements. B-- >>> On 8/8/2012 at 12:14 PM, in message <85c980bb1085994793231c3abd13a5629ee...@xmbx03.sti.usherbrooke.ca>, Hatem Jlassi <hatem.jla...@usherbrooke.ca> wrote: > Hi all, > > We are running a bilingual (French/English) instance of last version > of Dspace (1.8.2). We have some problems with the search with > diacritics. The Dspace's searcher doesn't find words with accented > characters when the search doesn't include these accents. > We modified > (\dspace-1.8.2-src-release\dspace-api\src\main\java\org\dspace\search > \DSAnalyzer.java) and we added the followings two lines: > ISOLatin1AccentFilter; > result = new ISOLatin1AccentFilter(result); Rebuild, Re-index Dspace > But the problem was not resolved. > > If anyone has solved this problem - Please Help!!! Thank You > > Regards, ------------------------------------------------------------------------------ Live Security Virtual Conference Exclusive live event will cover all the ways today's security and threat landscape has changed and how IT managers can respond. Discussions will include endpoint security, mobile security and the latest in malware threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/ _______________________________________________ DSpace-tech mailing list DSpace-tech@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/dspace-tech