hi all,
 I am trying a program that could highlight a searched term and writes
the result into a demo.html file. As of now this demo.html can show
only few pages of the book . Is there any way i can use it show the
whole book. (Can increasing the fragment size upto filesize help?)

        I have attached the program  i am using(please use a large xml
file as it runs fine with shorter ones). F1 F1 F1

Thanks in advance.
import org.apache.lucene.search.*;
import org.apache.lucene.analysis.*;
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.search.spans.SpanTermQuery;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

public class PhraseTestFile {
    private IndexSearcher searcher;
   private RAMDirectory directory;

    @SuppressWarnings("deprecation")
	public PhraseTestFile() throws Exception {
       
directory = new RAMDirectory();
        

        
  Analyzer analyzer = new StandardAnalyzer() {
            public TokenStream tokenStream(String fieldName, Reader reader)
{
                return new LowerCaseTokenizer(reader);
            }

            public int getPositionIncrementGap(String fieldName) {
                return 100;
            }
        };

        IndexWriter writer = new IndexWriter(directory, analyzer, true);

        Document doc = new Document();
      
        String text= convertXMLFileToString("/root/Desktop/alldaisybuks/Ten_Days_That_Shook_the_World/Ten_Days_That_Shook_the_World_f1.xml");
        System.out.println("The file size is : "+ text.length() );
   doc.add(new Field("contents", text, Field.Store.YES,Field.Index.TOKENIZED));
        writer.addDocument(doc);

        writer.optimize();
        writer.close();

        searcher = new IndexSearcher(directory);
         String searchit="Committee";
         Term t= new Term("contents",searchit);
	//trying fuzzy query    
			Query query= new FuzzyQuery(t);
			
       

        // Try a parsed query
        Query parsedQuery = new QueryParser("contents",
analyzer).parse(searchit);

        Hits hits = searcher.search(parsedQuery);
        System.out.println("We found " + hits.length() + " hits.");

        // Highlight the results
        CachingTokenFilter tokenStream = new
CachingTokenFilter(analyzer.tokenStream( "contents", new
StringReader(text)));

        SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span class=\"highlight\">", "</span>"); 
        SpanScorer sc = new SpanScorer(parsedQuery, "contents", tokenStream,"contents");

        Highlighter highlighter = new Highlighter(formatter, sc);
        highlighter.setTextFragmenter(new SimpleFragmenter(text.length()));//(new SimpleSpanFragmenter(sc));
        tokenStream.reset();

        String rv = highlighter.getBestFragments(tokenStream, text, text.length(),
"...");
        String rv1=highlighter.getBestFragment(tokenStream,text);
      
FileWriter fwriter = new FileWriter("/root/Desktop/demo"); fwriter.write("<html>"); fwriter.write("<style>\n" +

		".highlight {\n" +

		" background: orange;\n" + "}\n" + "</style>"); fwriter.write("<body>"); fwriter.write(rv); fwriter.write("</body></html>"); fwriter.close(); 
System.out.println(rv1);
    
    }
    
    
    public String convertXMLFileToString(String fileName)
    {
      try{
        DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
        InputStream inputStream = new FileInputStream(new File(fileName));
        org.w3c.dom.Document doc = documentBuilderFactory.newDocumentBuilder().parse(inputStream);
        StringWriter stw = new StringWriter();
        Transformer serializer = TransformerFactory.newInstance().newTransformer();
        serializer.transform(new DOMSource(doc), new StreamResult(stw));
        return stw.toString();

      }
      catch (Exception e) {
        e.printStackTrace();
      }
        return null;
    }
    
    
    public static void main(String[] args) {
        System.out.println("Starting...");
        try {
             new PhraseTestFile();
        } catch(Exception ex) {
            ex.printStackTrace();
        }
    }


}
---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org
For additional commands, e-mail: java-user-h...@lucene.apache.org

Reply via email to