Hi, I'm trying to use Lucene in my Android project. To start with I've created a small demo app. It works with .txt files but I need to work with .pdf. So analyzing my code I understand that it will have some issues with .pdfs due to memory management. However the question I want to ask here is not related to memory but to hit highlighting. It works now but using of `Highlighter` class with pdfs is not what I want. So to implement my own highlighting I need to know some kind of coordinates of found words in the text. How can I get them? I'm using lucene 4.4.0 while all of the examples like here are for much older versions. Here is my code:
public class MainActivity extends Activity { //----------------------------------------------------------------------------------------------------- // // Constants // //----------------------------------------------------------------------------------------------------- public static final String FIELD_PATH = "path"; public static final String FIELD_CONTENTS = "contents"; //----------------------------------------------------------------------------------------------------- // // Fields // //----------------------------------------------------------------------------------------------------- private EditText mEditText; private TextView mTextView; //----------------------------------------------------------------------------------------------------- // // Methods // //----------------------------------------------------------------------------------------------------- @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.activity_main); findViews(); initViews(); createIndex(); } private void findViews() { mEditText = (EditText) findViewById(R.id.activity_main_edittext); mTextView = (TextView) findViewById(R.id.activity_main_textview); } private void initViews() { mEditText.setOnEditorActionListener(mEditorActionListener); } private void performSearch(String searchString) { try { Directory directory = NIOFSDirectory.open(getExternalFilesDir(null)); DirectoryReader ireader = DirectoryReader.open(directory); IndexSearcher isearcher = new IndexSearcher(ireader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44); QueryParser queryParser = new AnalyzingQueryParser(Version.LUCENE_44, FIELD_CONTENTS, analyzer); Query query = queryParser.parse(searchString); TopDocs topDocs = isearcher.search(query, null, 1000); ScoreDoc[] docs = topDocs.scoreDocs; StringBuilder result = new StringBuilder(); StringBuilder debugInfo = new StringBuilder(); debugInfo.append("Number of hits: "); debugInfo.append(docs.length); debugInfo.append("\n"); // Iterate through the results: for (int i = 0; i < docs.length; i++) { Document hitDoc = isearcher.doc(docs[i].doc); String path = hitDoc.get(FIELD_PATH); debugInfo.append("Path: "); debugInfo.append(path); debugInfo.append("\n"); result.append("-------------------------------------------------------"); result.append("File: "); result.append(path); result.append("-------------------------------------------------------"); result.append("<br>"); String content = hitDoc.get(FIELD_CONTENTS); QueryScorer scorer = new QueryScorer(query); Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("", ""), scorer); highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, Integer.MAX_VALUE)); String highlighted = highlighter.getBestFragment(analyzer, FIELD_CONTENTS, content); result.append("-------------------------------------------------------"); result.append("Contents: "); result.append("-------------------------------------------------------"); result.append("<br>"); result.append(highlighted); result.append("<br><br><br>"); } //not working /*PostingsHighlighter highlighter = new PostingsHighlighter(); String highlights[] = highlighter.highlight(FIELD_CONTENTS, query, isearcher, topDocs);*/ mTextView.setText(Html.fromHtml(result.toString())); Log.d(getClass().getSimpleName(), debugInfo.toString()); } catch (Exception e) { e.printStackTrace(); Log.e(getClass().getSimpleName(), e.getMessage()); } } private void createIndex() { try { //Create directory for index. Directory indexDirectory = new NIOFSDirectory(getExternalFilesDir(null)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_44, analyzer); config.setOpenMode(OpenMode.CREATE); IndexWriter indexWriter = new IndexWriter(indexDirectory, config); //Loop through files in specified directory and adding them to index. File dir = new File(Environment.getExternalStorageDirectory() + "/lucene"); File[] files = dir.listFiles(); for (File file : files) { Document document = new Document(); { FieldType fieldType = new FieldType(TextField.TYPE_STORED); fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); String path = file.getCanonicalPath(); document.add(new Field(FIELD_PATH, path, fieldType)); } { FieldType fieldType = new FieldType(TextField.TYPE_STORED); fieldType.setIndexed(true); fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); fieldType.setStored(true); fieldType.setStoreTermVectors(true); fieldType.setTokenized(true); fieldType.setStoreTermVectorOffsets(true); String content = readFully(new FileReader(file)); //we can't store Reader objects but we need to be able to access the content for highlighting document.add(new Field(FIELD_CONTENTS, content, fieldType)); } indexWriter.addDocument(document); } indexWriter.close(); } catch (Exception e) { e.printStackTrace(); } } public static String readFully(Reader reader) throws IOException { char[] arr = new char[8*1024]; // 8K at a time StringBuffer buf = new StringBuffer(); int numChars; while ((numChars = reader.read(arr, 0, arr.length)) > 0) { buf.append(arr, 0, numChars); } return buf.toString(); } @Override public boolean onCreateOptionsMenu(Menu menu) { getMenuInflater().inflate(R.menu.main, menu); return true; } //----------------------------------------------------------------------------------------------------- // // Listeners // //----------------------------------------------------------------------------------------------------- private OnEditorActionListener mEditorActionListener = new OnEditorActionListener() { @Override public boolean onEditorAction(TextView v, int actionId, KeyEvent event) { if (actionId == EditorInfo.IME_ACTION_SEARCH) { performSearch(v.getText().toString()); return true; } return false; } }; } So how can I get hit coordinates and maybe you have any other advices what I'm doing wrong? This is rather common task I think so it must be rather simple. -- View this message in context: http://lucene.472066.n3.nabble.com/How-to-get-hits-coordinates-in-Lucene-4-4-0-tp4083913.html Sent from the Lucene - Java Users mailing list archive at Nabble.com. --------------------------------------------------------------------- To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org For additional commands, e-mail: java-user-h...@lucene.apache.org