Can I do this at search time rather than index time? Below is my code that is handling the searching, where would I utilize such a filter?
Thanks for the help! package search.lucene.search; import org.apache.lucene.document.Document; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.ISOLatin1AccentFilter; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import search.lucene.index.IndexManager; /** * This class is used to search the * Lucene index and return search results */ public class SearchManager { private String searchWord; private IndexManager indexManager; private Analyzer analyzer; public SearchManager(String searchWord){ this.searchWord = searchWord; this.indexManager = new IndexManager(); this.analyzer = new StandardAnalyzer(); } /** * do search */ public List search(){ List searchResult = new ArrayList(); IndexSearcher indexSearcher = null; try{ indexSearcher = new IndexSearcher(indexManager.getIndexDir()); }catch(IOException ioe){ ioe.printStackTrace(); } QueryParser queryParser = new QueryParser("content",analyzer); Query query = null; try { query = queryParser.parse(searchWord); } catch (ParseException e) { e.printStackTrace(); } if(null != query && null != indexSearcher){ try { Hits hits = indexSearcher.search(query); for(int i = 0; i < hits.length(); i ++){ Document doc = hits.doc(i); System.out.println(doc.get("filename")); SearchResultBean resultBean = new SearchResultBean(); resultBean.setXMLId(hits.doc(i).get("id")); resultBean.setXMLTitle(hits.doc(i).get("title")); resultBean.setXMLAuthor(hits.doc(i).get("author")); resultBean.setXMLAbstract(hits.doc(i).get("abstract")); resultBean.setScore(hits.score(i)); searchResult.add(resultBean); } } catch (IOException e) { e.printStackTrace(); } } return searchResult; } thomas arni-2 wrote: > > You can extend the DefaultAnalyzer. > The only thing you have to do, is to rewrite the method tokenStream like > this: > > /** Constructs a [EMAIL PROTECTED] StandardTokenizer} filtered by a [EMAIL > PROTECTED] > StandardFilter}, a [EMAIL PROTECTED] LowerCaseFilter} and a [EMAIL > PROTECTED] StopFilter}. */ > public TokenStream tokenStream(String fieldName, Reader reader) { > TokenStream result = new StandardTokenizer(reader); > result = new StandardFilter(result); > result = new LowerCaseFilter(result); > result = new StopFilter(result, stopSet); > result = new ISOLatin1AccentFilter(result); > return result; > } > > > anorman wrote: >> This looks like exactly what I want. Would I implement this along with >> another analyzer such as the standard or stand alone? Does anyone have >> any >> code examples of implementing such a thing? >> >> Thanks, >> Albert >> >> >> >> >> karl wettin-3 wrote: >> >>> 27 aug 2007 kl. 16.03 skrev anorman: >>> >>> >>>> I have a searchable index of documents which contain french and >>>> spanish >>>> diacritics (è, é, À) etc. I would like to make the content >>>> searchable so >>>> that when a user searches for a word such as "Amèrique" or "Amerique" >>>> (without diacritic) then it returns the same results. >>>> >>>> Has anyone set up something similar? >>>> >>> ISOLatin1AccentFilter >>> >>> -- >>> karl >>> --------------------------------------------------------------------- >>> To unsubscribe, e-mail: [EMAIL PROTECTED] >>> For additional commands, e-mail: [EMAIL PROTECTED] >>> >>> >>> >>> >> >> > > > --------------------------------------------------------------------- > To unsubscribe, e-mail: [EMAIL PROTECTED] > For additional commands, e-mail: [EMAIL PROTECTED] > > > -- View this message in context: http://www.nabble.com/Searching-Diacritics-tf4335454.html#a12353022 Sent from the Lucene - Java Users mailing list archive at Nabble.com. --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]