Hi! Erik, Here is what I used : cd /opt/dynamo/prod/hww-doc/hww java org.apache.lucene.demo.IndexHTML -create -index help/index help
-----Original Message----- From: Erik Hatcher [mailto:[EMAIL PROTECTED] Sent: Wednesday, March 30, 2005 4:01 PM To: java-user@lucene.apache.org Subject: Re: HTML pages highlighter How did you index "contents"? If you did not use a stored field type, then that is the issue. Erik On Mar 30, 2005, at 12:31 PM, Yagnesh Shah wrote: > Hello Lucene-User, > Is any one try to do highlighting with HTML pages? > > I am trying to do this using demo example by Keld H. Hansen article > "Unweaving a Tangled Web HTMLParser and Lucene" but I am getting > "null" value for text at line #47 Any Idea? > > 1 package org.apache.lucene.search.highlight; > 2 > 3 import java.io.StringReader; > 4 > 5 import org.apache.lucene.analysis.Analyzer; > 6 import org.apache.lucene.analysis.TokenStream; > 7 import org.apache.lucene.analysis.standard.StandardAnalyzer; > 8 import org.apache.lucene.queryParser.QueryParser; > 9 import org.apache.lucene.search.Hits; > 10 import org.apache.lucene.search.IndexSearcher; > 11 import org.apache.lucene.search.Query; > 12 import org.apache.lucene.search.highlight.Formatter; > 13 import org.apache.lucene.search.highlight.Highlighter; > 14 import org.apache.lucene.search.highlight.QueryScorer; > 15 import org.apache.lucene.search.highlight.SimpleFragmenter; > 16 > 17 public class Searcher { > 18 > 19 static Query query; > 20 static Hits hits; > 21 > 22 private static final String FIELD_NAME = "contents"; > 23 private static final String indexDir = > "/opt/dynamo/prod/hww-doc/hww/help/index"; > 24 > 25 private static Analyzer analyzer = new StandardAnalyzer(); > 26 > 27 public static void main(String[] args) throws Exception { > 28 > 29 IndexSearcher is = new IndexSearcher(indexDir); > 30 String searchCriteria = "scholarly"; > 31 query = QueryParser.parse(searchCriteria, "contents", > analyzer); > 32 > 33 hits = is.search(query); > 34 System.out.println("found in: " + query > +"\nhits-length:" +hits.length()); > 35 > 36 doStandardHighlights(); > 37 > 38 is.close(); > 39 } > 40 > 41 static void doStandardHighlights() throws Exception { > 42 Highlighter highlighter = new Highlighter(new > MyBolder(), new QueryScorer(query)); > 43 System.out.println("Highlighter: " + highlighter > +"\nhits-length:" +hits.length()); > 44 highlighter.setTextFragmenter(new SimpleFragmenter(20)); > 45 for (int i = 0; i < hits.length(); i++) { > 46 System.out.println("URL " + (i + 1) + ": " + > hits.doc(i).getField("path").stringValue()); > 47 String text = hits.doc(i).get("FIELD_NAME"); > 48 int maxNumFragmentsRequired = 2; > 49 String fragmentSeparator = "..."; > 50 TokenStream tokenStream = > analyzer.tokenStream(FIELD_NAME, new StringReader(text)); > 51 > 52 String result = > 53 highlighter.getBestFragments( > 54 tokenStream, > 55 text, > 56 maxNumFragmentsRequired, > 57 fragmentSeparator); > 58 System.out.println("\tfound in: " + result); > 59 } > 60 } > 61 > 62 private static class MyBolder implements Formatter { > 63 public String highlightTerm(String originalText , > TokenGroup group) > 64 { > 65 if(group.getTotalScore()<=0) > 66 { > 67 return originalText; > 68 } > 69 return "<b>" + originalText + "</b>"; > 70 } > 71 } > 72 > 73 } > > Yagnesh N. Shah > Senior Technology Engineer > CS Dept., 4th Floor > H. W. Wilson > 950 University Avenue, > Bronx NY 10452 > (718) 588 8400 x2721 > http://www.hwwilson.com > > > > --------------------------------------------------------------------- > To unsubscribe, e-mail: [EMAIL PROTECTED] > For additional commands, e-mail: [EMAIL PROTECTED] --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED] --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]