hi all, I am trying a program that could highlight a searched term and writes the result into a demo.html file. As of now this demo.html can show only few pages of the book . Is there any way i can use it show the whole book. (Can increasing the fragment size upto filesize help?)
I have attached the program i am using(please use a large xml file as it runs fine with shorter ones). F1 F1 F1 Thanks in advance.
import org.apache.lucene.search.*; import org.apache.lucene.analysis.*; import org.apache.lucene.document.*; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.search.highlight.*; import org.apache.lucene.search.spans.SpanTermQuery; import java.io.File; import java.io.FileInputStream; import java.io.FileWriter; import java.io.InputStream; import java.io.Reader; import java.io.StringReader; import java.io.StringWriter; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; public class PhraseTestFile { private IndexSearcher searcher; private RAMDirectory directory; @SuppressWarnings("deprecation") public PhraseTestFile() throws Exception { directory = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer() { public TokenStream tokenStream(String fieldName, Reader reader) { return new LowerCaseTokenizer(reader); } public int getPositionIncrementGap(String fieldName) { return 100; } }; IndexWriter writer = new IndexWriter(directory, analyzer, true); Document doc = new Document(); String text= convertXMLFileToString("/root/Desktop/alldaisybuks/Ten_Days_That_Shook_the_World/Ten_Days_That_Shook_the_World_f1.xml"); System.out.println("The file size is : "+ text.length() ); doc.add(new Field("contents", text, Field.Store.YES,Field.Index.TOKENIZED)); writer.addDocument(doc); writer.optimize(); writer.close(); searcher = new IndexSearcher(directory); String searchit="Committee"; Term t= new Term("contents",searchit); //trying fuzzy query Query query= new FuzzyQuery(t); // Try a parsed query Query parsedQuery = new QueryParser("contents", analyzer).parse(searchit); Hits hits = searcher.search(parsedQuery); System.out.println("We found " + hits.length() + " hits."); // Highlight the results CachingTokenFilter tokenStream = new CachingTokenFilter(analyzer.tokenStream( "contents", new StringReader(text))); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); SpanScorer sc = new SpanScorer(parsedQuery, "contents", tokenStream,"contents"); Highlighter highlighter = new Highlighter(formatter, sc); highlighter.setTextFragmenter(new SimpleFragmenter(text.length()));//(new SimpleSpanFragmenter(sc)); tokenStream.reset(); String rv = highlighter.getBestFragments(tokenStream, text, text.length(), "..."); String rv1=highlighter.getBestFragment(tokenStream,text); FileWriter fwriter = new FileWriter("/root/Desktop/demo"); fwriter.write("<html>"); fwriter.write("<style>\n" + ".highlight {\n" + " background: orange;\n" + "}\n" + "</style>"); fwriter.write("<body>"); fwriter.write(rv); fwriter.write("</body></html>"); fwriter.close(); System.out.println(rv1); } public String convertXMLFileToString(String fileName) { try{ DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); InputStream inputStream = new FileInputStream(new File(fileName)); org.w3c.dom.Document doc = documentBuilderFactory.newDocumentBuilder().parse(inputStream); StringWriter stw = new StringWriter(); Transformer serializer = TransformerFactory.newInstance().newTransformer(); serializer.transform(new DOMSource(doc), new StreamResult(stw)); return stw.toString(); } catch (Exception e) { e.printStackTrace(); } return null; } public static void main(String[] args) { System.out.println("Starting..."); try { new PhraseTestFile(); } catch(Exception ex) { ex.printStackTrace(); } } }
--------------------------------------------------------------------- To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org For additional commands, e-mail: java-user-h...@lucene.apache.org