How did you index "contents"? If you did not use a stored field type, then that is the issue.

        Erik

On Mar 30, 2005, at 12:31 PM, Yagnesh Shah wrote:

Hello Lucene-User,
        Is any one try to do highlighting with HTML pages?

I am trying to do this using demo example by Keld H. Hansen article "Unweaving a Tangled Web HTMLParser and Lucene" but I am getting "null" value for text at line #47 Any Idea?

1 package org.apache.lucene.search.highlight;
2
3 import java.io.StringReader;
4
5 import org.apache.lucene.analysis.Analyzer;
6 import org.apache.lucene.analysis.TokenStream;
7 import org.apache.lucene.analysis.standard.StandardAnalyzer;
8 import org.apache.lucene.queryParser.QueryParser;
9 import org.apache.lucene.search.Hits;
10 import org.apache.lucene.search.IndexSearcher;
11 import org.apache.lucene.search.Query;
12 import org.apache.lucene.search.highlight.Formatter;
13 import org.apache.lucene.search.highlight.Highlighter;
14 import org.apache.lucene.search.highlight.QueryScorer;
15 import org.apache.lucene.search.highlight.SimpleFragmenter;
16
17 public class Searcher {
18
19 static Query query;
20 static Hits hits;
21
22 private static final String FIELD_NAME = "contents";
23 private static final String indexDir = "/opt/dynamo/prod/hww-doc/hww/help/index";
24
25 private static Analyzer analyzer = new StandardAnalyzer();
26
27 public static void main(String[] args) throws Exception {
28
29 IndexSearcher is = new IndexSearcher(indexDir);
30 String searchCriteria = "scholarly";
31 query = QueryParser.parse(searchCriteria, "contents", analyzer);
32
33 hits = is.search(query);
34 System.out.println("found in: " + query +"\nhits-length:" +hits.length());
35
36 doStandardHighlights();
37
38 is.close();
39 }
40
41 static void doStandardHighlights() throws Exception {
42 Highlighter highlighter = new Highlighter(new MyBolder(), new QueryScorer(query));
43 System.out.println("Highlighter: " + highlighter +"\nhits-length:" +hits.length());
44 highlighter.setTextFragmenter(new SimpleFragmenter(20));
45 for (int i = 0; i < hits.length(); i++) {
46 System.out.println("URL " + (i + 1) + ": " + hits.doc(i).getField("path").stringValue());
47 String text = hits.doc(i).get("FIELD_NAME");
48 int maxNumFragmentsRequired = 2;
49 String fragmentSeparator = "...";
50 TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
51
52 String result =
53 highlighter.getBestFragments(
54 tokenStream,
55 text,
56 maxNumFragmentsRequired,
57 fragmentSeparator);
58 System.out.println("\tfound in: " + result);
59 }
60 }
61
62 private static class MyBolder implements Formatter {
63 public String highlightTerm(String originalText , TokenGroup group)
64 {
65 if(group.getTotalScore()<=0)
66 {
67 return originalText;
68 }
69 return "<b>" + originalText + "</b>";
70 }
71 }
72
73 }


Yagnesh N. Shah
Senior Technology Engineer
CS Dept., 4th Floor
H. W. Wilson
950 University Avenue,
Bronx NY 10452
(718) 588 8400 x2721
http://www.hwwilson.com



---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]


---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]



Reply via email to