Repository: incubator-zeppelin Updated Branches: refs/heads/master d4396887d -> 82de508d7
http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/82de508d/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/LuceneSearch.java ---------------------------------------------------------------------- diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/LuceneSearch.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/LuceneSearch.java new file mode 100644 index 0000000..7f9cbbd --- /dev/null +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/LuceneSearch.java @@ -0,0 +1,391 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.zeppelin.search; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.Date; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.LongField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.queryparser.classic.QueryParser; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.search.highlight.Highlighter; +import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; +import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.search.highlight.SimpleHTMLFormatter; +import org.apache.lucene.search.highlight.TextFragment; +import org.apache.lucene.search.highlight.TokenSources; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.zeppelin.notebook.Note; +import org.apache.zeppelin.notebook.Paragraph; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; + +/** + * Search (both, indexing and query) the notebooks using Lucene. + * + * Query is thread-safe, as creates new IndexReader every time. + * Index is thread-safe, as re-uses single IndexWriter, which is thread-safe. + */ +public class LuceneSearch implements SearchService { + private static final Logger LOG = LoggerFactory.getLogger(LuceneSearch.class); + + private static final String SEARCH_FIELD = "contents"; + static final String PARAGRAPH = "paragraph"; + static final String ID_FIELD = "id"; + + Directory ramDirectory; + Analyzer analyzer; + IndexWriterConfig iwc; + IndexWriter writer; + + public LuceneSearch() { + ramDirectory = new RAMDirectory(); + analyzer = new StandardAnalyzer(); + iwc = new IndexWriterConfig(analyzer); + try { + writer = new IndexWriter(ramDirectory, iwc); + } catch (IOException e) { + LOG.error("Failed to reate new IndexWriter", e); + } + } + + /* (non-Javadoc) + * @see org.apache.zeppelin.search.Search#query(java.lang.String) + */ + @Override + public List<Map<String, String>> query(String queryStr) { + if (null == ramDirectory) { + throw new IllegalStateException( + "Something went wrong on instance creation time, index dir is null"); + } + List<Map<String, String>> result = Collections.emptyList(); + try (IndexReader indexReader = DirectoryReader.open(ramDirectory)) { + IndexSearcher indexSearcher = new IndexSearcher(indexReader); + Analyzer analyzer = new StandardAnalyzer(); + QueryParser parser = new QueryParser(SEARCH_FIELD, analyzer); + + Query query = parser.parse(queryStr); + LOG.debug("Searching for: " + query.toString(SEARCH_FIELD)); + + SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); + Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); + + result = doSearch(indexSearcher, query, analyzer, highlighter); + indexReader.close(); + } catch (IOException e) { + LOG.error("Failed to open index dir {}, make sure indexing finished OK", ramDirectory, e); + } catch (ParseException e) { + LOG.error("Failed to parse query " + queryStr, e); + } + return result; + } + + private List<Map<String, String>> doSearch(IndexSearcher searcher, Query query, + Analyzer analyzer, Highlighter highlighter) { + List<Map<String, String>> matchingParagraphs = Lists.newArrayList(); + ScoreDoc[] hits; + try { + hits = searcher.search(query, 20).scoreDocs; + for (int i = 0; i < hits.length; i++) { + LOG.debug("doc={} score={}", hits[i].doc, hits[i].score); + + int id = hits[i].doc; + Document doc = searcher.doc(id); + String path = doc.get(ID_FIELD); + if (path != null) { + LOG.debug((i + 1) + ". " + path); + String title = doc.get("title"); + if (title != null) { + LOG.debug(" Title: {}", doc.get("title")); + } + + String text = doc.get(SEARCH_FIELD); + TokenStream tokenStream = TokenSources.getTokenStream(searcher.getIndexReader(), id, + SEARCH_FIELD, analyzer); + TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, true, 3); + LOG.debug(" {} fragments found for query '{}'", frag.length, query); + for (int j = 0; j < frag.length; j++) { + if ((frag[j] != null) && (frag[j].getScore() > 0)) { + LOG.debug(" Fragment: {}", frag[j].toString()); + } + } + String fragment = (frag != null && frag.length > 0) ? frag[0].toString() : ""; + + matchingParagraphs.add(ImmutableMap.of("id", path, // <noteId>/paragraph/<paragraphId> + "name", title, "snippet", fragment, "text", text)); + } else { + LOG.info("{}. No {} for this document", i + 1, ID_FIELD); + } + } + } catch (IOException | InvalidTokenOffsetsException e) { + LOG.error("Exception on searching for {}", query, e); + } + return matchingParagraphs; + } + + /* (non-Javadoc) + * @see org.apache.zeppelin.search.Search#updateIndexDoc(org.apache.zeppelin.notebook.Note) + */ + @Override + public void updateIndexDoc(Note note) throws IOException { + updateIndexNoteName(note); + for (Paragraph p: note.getParagraphs()) { + updateIndexParagraph(note, p); + } + } + + private void updateIndexNoteName(Note note) throws IOException { + String noteName = note.getName(); + String noteId = note.getId(); + LOG.debug("Indexing Notebook {}, '{}'", noteId, noteName); + if (null == noteName || noteName.isEmpty()) { + LOG.debug("Skipping empty notebook name"); + return; + } + updateDoc(noteId, noteName, null); + } + + private void updateIndexParagraph(Note note, Paragraph p) throws IOException { + if (p.getText() == null) { + LOG.debug("Skipping empty paragraph"); + return; + } + updateDoc(note.getId(), note.getName(), p); + } + + /** + * Updates index for the given note: either note.name or a paragraph If + * paragraph is <code>null</code> - updates only for the note.name + * + * @param noteId + * @param noteName + * @param p + * @throws IOException + */ + private void updateDoc(String noteId, String noteName, Paragraph p) throws IOException { + String id = formatId(noteId, p); + Document doc = newDocument(id, noteName, p); + try { + writer.updateDocument(new Term(ID_FIELD, id), doc); + writer.commit(); + } catch (IOException e) { + LOG.error("Failed to updaet index of notebook {}", noteId, e); + } + } + + /** + * If paragraph is not null, id is <noteId>/paragraphs/<paragraphId>, + * otherwise it's just <noteId>. + */ + static String formatId(String noteId, Paragraph p) { + String id = noteId; + if (null != p) { + id = Joiner.on('/').join(id, PARAGRAPH, p.getId()); + } + return id; + } + + static String formatDeleteId(String noteId, Paragraph p) { + String id = noteId; + if (null != p) { + id = Joiner.on('/').join(id, PARAGRAPH, p.getId()); + } else { + id = id + "*"; + } + return id; + } + + /** + * If paragraph is not null, indexes code in the paragraph, otherwise indexes + * the notebook name. + * + * @param id id of the document, different for Note name and paragraph + * @param noteName name of the note + * @param p paragraph + * @return + */ + private Document newDocument(String id, String noteName, Paragraph p) { + Document doc = new Document(); + + Field pathField = new StringField(ID_FIELD, id, Field.Store.YES); + doc.add(pathField); + doc.add(new StringField("title", noteName, Field.Store.YES)); + + if (null != p) { + doc.add(new TextField(SEARCH_FIELD, p.getText(), Field.Store.YES)); + Date date = p.getDateStarted() != null ? p.getDateStarted() : p.getDateCreated(); + doc.add(new LongField("modified", date.getTime(), Field.Store.NO)); + } else { + doc.add(new TextField(SEARCH_FIELD, noteName, Field.Store.YES)); + } + return doc; + } + + /* (non-Javadoc) + * @see org.apache.zeppelin.search.Search#addIndexDocs(java.util.Collection) + */ + @Override + public void addIndexDocs(Collection<Note> collection) { + int docsIndexed = 0; + long start = System.nanoTime(); + try { + for (Note note : collection) { + addIndexDocAsync(note); + docsIndexed++; + } + } catch (IOException e) { + LOG.error("Failed to index all Notebooks", e); + } finally { + try { // save what's been indexed, even if not full collection + writer.commit(); + } catch (IOException e) { + LOG.error("Failed to save index", e); + } + long end = System.nanoTime(); + LOG.info("Indexing {} notebooks took {}ms", docsIndexed, + TimeUnit.NANOSECONDS.toMillis(end - start)); + } + } + + /* (non-Javadoc) + * @see org.apache.zeppelin.search.Search#addIndexDoc(org.apache.zeppelin.notebook.Note) + */ + @Override + public void addIndexDoc(Note note) { + try { + addIndexDocAsync(note); + writer.commit(); + } catch (IOException e) { + LOG.error("Failed to add note {} to index", note, e); + } + } + + /** + * Indexes the given notebook, but does not commit changes. + * + * @param note + * @throws IOException + */ + private void addIndexDocAsync(Note note) throws IOException { + indexNoteName(writer, note.getId(), note.getName()); + for (Paragraph doc : note.getParagraphs()) { + if (doc.getText() == null) { + LOG.debug("Skipping empty paragraph"); + continue; + } + indexDoc(writer, note.getId(), note.getName(), doc); + } + } + + /* (non-Javadoc) + * @see org.apache.zeppelin.search.Search#deleteIndexDocs(org.apache.zeppelin.notebook.Note) + */ + @Override + public void deleteIndexDocs(Note note) { + deleteDoc(note, null); + } + + /* (non-Javadoc) + * @see org.apache.zeppelin.search.Search + * #deleteIndexDoc(org.apache.zeppelin.notebook.Note, org.apache.zeppelin.notebook.Paragraph) + */ + @Override + public void deleteIndexDoc(Note note, Paragraph p) { + deleteDoc(note, p); + } + + private void deleteDoc(Note note, Paragraph p) { + if (null == note) { + LOG.error("Trying to delete note by reference to NULL"); + return; + } + String fullNoteOrJustParagraph = formatDeleteId(note.getId(), p); + LOG.debug("Deleting note {}, out of: {}", note.getId(), writer.numDocs()); + try { + writer.deleteDocuments(new WildcardQuery(new Term(ID_FIELD, fullNoteOrJustParagraph))); + writer.commit(); + } catch (IOException e) { + LOG.error("Failed to delete {} from index by '{}'", note, fullNoteOrJustParagraph, e); + } + LOG.debug("Done, index contains {} docs now" + writer.numDocs()); + } + + /* (non-Javadoc) + * @see org.apache.zeppelin.search.Search#close() + */ + @Override + public void close() { + try { + writer.close(); + } catch (IOException e) { + LOG.error("Failed to .close() the notebook index", e); + } + } + + /** + * Indexes a notebook name + * + * @throws IOException + */ + private void indexNoteName(IndexWriter w, String noteId, String noteName) throws IOException { + LOG.debug("Indexing Notebook {}, '{}'", noteId, noteName); + if (null == noteName || noteName.isEmpty()) { + LOG.debug("Skipping empty notebook name"); + return; + } + indexDoc(w, noteId, noteName, null); + } + + /** + * Indexes a single document: + * - code of the paragraph (if non-null) + * - or just a note name + */ + private void indexDoc(IndexWriter w, String noteId, String noteName, Paragraph p) + throws IOException { + String id = formatId(noteId, p); + Document doc = newDocument(id, noteName, p); + w.addDocument(doc); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/82de508d/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/SearchService.java ---------------------------------------------------------------------- diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/SearchService.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/SearchService.java new file mode 100644 index 0000000..64f2b75 --- /dev/null +++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/SearchService.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.zeppelin.search; + +import java.io.IOException; +import java.util.Collection; +import java.util.List; +import java.util.Map; + +import org.apache.zeppelin.notebook.Note; +import org.apache.zeppelin.notebook.Paragraph; + +/** + * Search (both, indexing and query) the notebooks. + * + * Intended to have multiple implementation, i.e: + * - local Lucene (in-memory, on-disk) + * - remote Elasticsearch + */ +public interface SearchService { + + /** + * Full-text search in all the notebooks + * + * @param queryStr a query + * @return A list of matching paragraphs (id, text, snippet w/ highlight) + */ + public List<Map<String, String>> query(String queryStr); + + /** + * Updates all documents in index for the given note: + * - name + * - all paragraphs + * + * @param note a Note to update index for + * @throws IOException + */ + public void updateIndexDoc(Note note) throws IOException; + + /** + * Indexes full collection of notes: all the paragraphs + Note names + * + * @param collection of Notes + */ + public void addIndexDocs(Collection<Note> collection); + + /** + * Indexes the given notebook. + * + * @throws IOException If there is a low-level I/O error + */ + public void addIndexDoc(Note note); + + /** + * Deletes all docs on given Note from index + */ + public void deleteIndexDocs(Note note); + + /** + * Deletes doc for a given + * + * @param note + * @param p + * @throws IOException + */ + public void deleteIndexDoc(Note note, Paragraph p); + + /** + * Frees the recourses used by index + */ + public void close(); + +} http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/82de508d/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/NotebookTest.java ---------------------------------------------------------------------- diff --git a/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/NotebookTest.java b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/NotebookTest.java index ee35773..e2d1aac 100644 --- a/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/NotebookTest.java +++ b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/NotebookTest.java @@ -22,6 +22,7 @@ import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; import java.io.File; import java.io.IOException; @@ -45,6 +46,8 @@ import org.apache.zeppelin.scheduler.Job; import org.apache.zeppelin.scheduler.Job.Status; import org.apache.zeppelin.scheduler.JobListener; import org.apache.zeppelin.scheduler.SchedulerFactory; +import org.apache.zeppelin.search.SearchService; +import org.apache.zeppelin.search.LuceneSearch; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -84,8 +87,9 @@ public class NotebookTest implements JobListenerFactory{ factory = new InterpreterFactory(conf, new InterpreterOption(false), null); + SearchService search = mock(SearchService.class); notebookRepo = new VFSNotebookRepo(conf); - notebook = new Notebook(conf, notebookRepo, schedulerFactory, factory, this); + notebook = new Notebook(conf, notebookRepo, schedulerFactory, factory, this, search); } @After @@ -170,7 +174,8 @@ public class NotebookTest implements JobListenerFactory{ p1.setText("hello world"); note.persist(); - Notebook notebook2 = new Notebook(conf, notebookRepo, schedulerFactory, new InterpreterFactory(conf, null), this); + Notebook notebook2 = new Notebook( + conf, notebookRepo, schedulerFactory, new InterpreterFactory(conf, null), this, null); assertEquals(1, notebook2.getAllNotes().size()); } http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/82de508d/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/NotebookRepoSyncTest.java ---------------------------------------------------------------------- diff --git a/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/NotebookRepoSyncTest.java b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/NotebookRepoSyncTest.java index 64d9b32..6d8c50d 100644 --- a/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/NotebookRepoSyncTest.java +++ b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/NotebookRepoSyncTest.java @@ -19,6 +19,7 @@ package org.apache.zeppelin.notebook.repo; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; import java.io.File; import java.io.IOException; @@ -39,6 +40,8 @@ import org.apache.zeppelin.scheduler.Job; import org.apache.zeppelin.scheduler.Job.Status; import org.apache.zeppelin.scheduler.JobListener; import org.apache.zeppelin.scheduler.SchedulerFactory; +import org.apache.zeppelin.search.SearchService; +import org.apache.zeppelin.search.LuceneSearch; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -86,8 +89,9 @@ public class NotebookRepoSyncTest implements JobListenerFactory { factory = new InterpreterFactory(conf, new InterpreterOption(false), null); + SearchService search = mock(SearchService.class); notebookRepoSync = new NotebookRepoSync(conf); - notebookSync = new Notebook(conf, notebookRepoSync, schedulerFactory, factory, this); + notebookSync = new Notebook(conf, notebookRepoSync, schedulerFactory, factory, this, search); } @After http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/82de508d/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepoTest.java ---------------------------------------------------------------------- diff --git a/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepoTest.java b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepoTest.java index c3bb3a0..80d1174 100644 --- a/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepoTest.java +++ b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepoTest.java @@ -18,6 +18,7 @@ package org.apache.zeppelin.notebook.repo; import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.mock; import java.io.File; import java.io.IOException; @@ -32,20 +33,19 @@ import org.apache.zeppelin.interpreter.mock.MockInterpreter1; import org.apache.zeppelin.notebook.JobListenerFactory; import org.apache.zeppelin.notebook.Note; import org.apache.zeppelin.notebook.Notebook; -import org.apache.zeppelin.notebook.NotebookTest; import org.apache.zeppelin.notebook.Paragraph; import org.apache.zeppelin.scheduler.JobListener; import org.apache.zeppelin.scheduler.SchedulerFactory; -import org.apache.zeppelin.scheduler.Job.Status; +import org.apache.zeppelin.search.SearchService; +import org.apache.zeppelin.search.LuceneSearch; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class VFSNotebookRepoTest implements JobListenerFactory{ - private static final Logger logger = LoggerFactory.getLogger(NotebookTest.class); - +public class VFSNotebookRepoTest implements JobListenerFactory { + private static final Logger LOG = LoggerFactory.getLogger(VFSNotebookRepoTest.class); private ZeppelinConfiguration conf; private SchedulerFactory schedulerFactory; private Notebook notebook; @@ -53,16 +53,15 @@ public class VFSNotebookRepoTest implements JobListenerFactory{ private InterpreterFactory factory; private File mainZepDir; - private File mainNotebookDir; @Before public void setUp() throws Exception { - String zpath = System.getProperty("java.io.tmpdir")+"/ZeppelinLTest_"+System.currentTimeMillis(); + String zpath = System.getProperty("java.io.tmpdir") + "/ZeppelinLTest_" + System.currentTimeMillis(); mainZepDir = new File(zpath); mainZepDir.mkdirs(); new File(mainZepDir, "conf").mkdirs(); - String mainNotePath = zpath+"/notebook"; + String mainNotePath = zpath + "/notebook"; mainNotebookDir = new File(mainNotePath); mainNotebookDir.mkdirs(); @@ -79,15 +78,15 @@ public class VFSNotebookRepoTest implements JobListenerFactory{ this.schedulerFactory = new SchedulerFactory(); factory = new InterpreterFactory(conf, new InterpreterOption(false), null); + SearchService search = mock(SearchService.class); notebookRepo = new VFSNotebookRepo(conf); - notebook = new Notebook(conf, notebookRepo, schedulerFactory, factory, this); + notebook = new Notebook(conf, notebookRepo, schedulerFactory, factory, this, search); } @After public void tearDown() throws Exception { - //FileUtils.deleteDirectory(mainZepDir); if (!FileUtils.deleteQuietly(mainZepDir)) { - logger.error("Failed to delete {} ", mainZepDir.getName()); + LOG.error("Failed to delete {} ", mainZepDir.getName()); } } @@ -97,7 +96,7 @@ public class VFSNotebookRepoTest implements JobListenerFactory{ note.getNoteReplLoader().setInterpreters(factory.getDefaultInterpreterSettingList()); Paragraph p1 = note.addParagraph(); - Map config = p1.getConfig(); + Map<String, Object> config = p1.getConfig(); config.put("enabled", true); p1.setConfig(config); p1.setText("%mock1 hello world"); http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/82de508d/zeppelin-zengine/src/test/java/org/apache/zeppelin/search/LuceneSearchTest.java ---------------------------------------------------------------------- diff --git a/zeppelin-zengine/src/test/java/org/apache/zeppelin/search/LuceneSearchTest.java b/zeppelin-zengine/src/test/java/org/apache/zeppelin/search/LuceneSearchTest.java new file mode 100644 index 0000000..f74d95e --- /dev/null +++ b/zeppelin-zengine/src/test/java/org/apache/zeppelin/search/LuceneSearchTest.java @@ -0,0 +1,259 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.zeppelin.search; + +import static com.google.common.truth.Truth.assertThat; +import static org.mockito.Mockito.*; +import static org.apache.zeppelin.search.LuceneSearch.formatId; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import org.apache.zeppelin.interpreter.InterpreterSetting; +import org.apache.zeppelin.notebook.Note; +import org.apache.zeppelin.notebook.NoteInterpreterLoader; +import org.apache.zeppelin.notebook.Paragraph; +import org.apache.zeppelin.notebook.repo.NotebookRepo; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import com.google.common.base.Splitter; +import com.google.common.collect.ImmutableList; + +public class LuceneSearchTest { + + private static NoteInterpreterLoader replLoaderMock; + private static NotebookRepo notebookRepoMock; + private SearchService notebookIndex; + + @BeforeClass + public static void beforeStartUp() { + notebookRepoMock = mock(NotebookRepo.class); + replLoaderMock = mock(NoteInterpreterLoader.class); + + when(replLoaderMock.getInterpreterSettings()) + .thenReturn(ImmutableList.<InterpreterSetting>of()); + } + + @Before + public void startUp() { + notebookIndex = new LuceneSearch(); + } + + @After + public void shutDown() { + notebookIndex.close(); + } + + @Test public void canIndexNotebook() { + //give + Note note1 = newNoteWithParapgraph("Notebook1", "test"); + Note note2 = newNoteWithParapgraph("Notebook2", "not test"); + List<Note> notebook = Arrays.asList(note1, note2); + + //when + notebookIndex.addIndexDocs(notebook); + } + + @Test public void canIndexAndQuery() { + //given + Note note1 = newNoteWithParapgraph("Notebook1", "test"); + Note note2 = newNoteWithParapgraphs("Notebook2", "not test", "not test at all"); + notebookIndex.addIndexDocs(Arrays.asList(note1, note2)); + + //when + List<Map<String, String>> results = notebookIndex.query("all"); + + //then + assertThat(results).isNotEmpty(); + assertThat(results.size()).isEqualTo(1); + assertThat(results.get(0)) + .containsEntry("id", formatId(note2.getId(), note2.getLastParagraph())); + } + + @Test public void canIndexAndQueryByNotebookName() { + //given + Note note1 = newNoteWithParapgraph("Notebook1", "test"); + Note note2 = newNoteWithParapgraphs("Notebook2", "not test", "not test at all"); + notebookIndex.addIndexDocs(Arrays.asList(note1, note2)); + + //when + List<Map<String, String>> results = notebookIndex.query("Notebook1"); + + //then + assertThat(results).isNotEmpty(); + assertThat(results.size()).isEqualTo(1); + assertThat(results.get(0)).containsEntry("id", note1.getId()); + } + + @Test public void indexKeyContract() throws IOException { + //give + Note note1 = newNoteWithParapgraph("Notebook1", "test"); + //when + notebookIndex.addIndexDoc(note1); + //then + String id = resultForQuery("test").get(0).get(LuceneSearch.ID_FIELD); + + assertThat(Splitter.on("/").split(id)) //key structure <noteId>/paragraph/<paragraphId> + .containsAllOf(note1.getId(), LuceneSearch.PARAGRAPH, note1.getLastParagraph().getId()); + } + + @Test //(expected=IllegalStateException.class) + public void canNotSearchBeforeIndexing() { + //given NO notebookIndex.index() was called + //when + List<Map<String, String>> result = notebookIndex.query("anything"); + //then + assertThat(result).isEmpty(); + //assert logs were printed + //"ERROR org.apache.zeppelin.search.SearchService:97 - Failed to open index dir RAMDirectory" + } + + @Test public void canIndexAndReIndex() throws IOException { + //given + Note note1 = newNoteWithParapgraph("Notebook1", "test"); + Note note2 = newNoteWithParapgraphs("Notebook2", "not test", "not test at all"); + notebookIndex.addIndexDocs(Arrays.asList(note1, note2)); + + //when + Paragraph p2 = note2.getLastParagraph(); + p2.setText("test indeed"); + notebookIndex.updateIndexDoc(note2); + + //then + List<Map<String, String>> results = notebookIndex.query("all"); + assertThat(results).isEmpty(); + + results = notebookIndex.query("indeed"); + assertThat(results).isNotEmpty(); + } + + @Test public void canDeleteNull() throws IOException { + //give + // looks like a bug in web UI: it tries to delete a note twice (after it has just been deleted) + //when + notebookIndex.deleteIndexDocs(null); + } + + @Test public void canDeleteFromIndex() throws IOException { + //given + Note note1 = newNoteWithParapgraph("Notebook1", "test"); + Note note2 = newNoteWithParapgraphs("Notebook2", "not test", "not test at all"); + notebookIndex.addIndexDocs(Arrays.asList(note1, note2)); + assertThat(resultForQuery("Notebook2")).isNotEmpty(); + + //when + notebookIndex.deleteIndexDocs(note2); + + //then + assertThat(notebookIndex.query("all")).isEmpty(); + assertThat(resultForQuery("Notebook2")).isEmpty(); + + List<Map<String, String>> results = resultForQuery("test"); + assertThat(results).isNotEmpty(); + assertThat(results.size()).isEqualTo(1); + } + + @Test public void indexParagraphUpdatedOnNoteSave() throws IOException { + //given: total 2 notebooks, 3 paragraphs + Note note1 = newNoteWithParapgraph("Notebook1", "test"); + Note note2 = newNoteWithParapgraphs("Notebook2", "not test", "not test at all"); + notebookIndex.addIndexDocs(Arrays.asList(note1, note2)); + assertThat(resultForQuery("test").size()).isEqualTo(3); + + //when + Paragraph p1 = note1.getLastParagraph(); + p1.setText("no no no"); + note1.persist(); + + //then + assertThat(resultForQuery("Notebook1").size()).isEqualTo(1); + + List<Map<String, String>> results = resultForQuery("test"); + assertThat(results).isNotEmpty(); + assertThat(results.size()).isEqualTo(2); + + //does not include Notebook1's paragraph any more + for (Map<String, String> result: results) { + assertThat(result.get("id").startsWith(note1.getId())).isFalse();; + } + } + + @Test public void indexNoteNameUpdatedOnNoteSave() throws IOException { + //given: total 2 notebooks, 3 paragraphs + Note note1 = newNoteWithParapgraph("Notebook1", "test"); + Note note2 = newNoteWithParapgraphs("Notebook2", "not test", "not test at all"); + notebookIndex.addIndexDocs(Arrays.asList(note1, note2)); + assertThat(resultForQuery("test").size()).isEqualTo(3); + + //when + note1.setName("NotebookN"); + note1.persist(); + + //then + assertThat(resultForQuery("Notebook1")).isEmpty(); + assertThat(resultForQuery("NotebookN")).isNotEmpty(); + assertThat(resultForQuery("NotebookN").size()).isEqualTo(1); + } + + private List<Map<String, String>> resultForQuery(String q) { + return notebookIndex.query(q); + } + + /** + * Creates a new Note \w given name, + * adds a new paragraph \w given text + * + * @param noteName name of the note + * @param parText text of the paragraph + * @return Note + */ + private Note newNoteWithParapgraph(String noteName, String parText) { + Note note1 = newNote(noteName); + addParagraphWithText(note1, parText); + return note1; + } + + /** + * Creates a new Note \w given name, + * adds N paragraphs \w given texts + */ + private Note newNoteWithParapgraphs(String noteName, String... parTexts) { + Note note1 = newNote(noteName); + for (String parText : parTexts) { + addParagraphWithText(note1, parText); + } + return note1; + } + + private Paragraph addParagraphWithText(Note note, String text) { + Paragraph p = note.addParagraph(); + p.setText(text); + return p; + } + + private Note newNote(String name) { + Note note = new Note(notebookRepoMock, replLoaderMock, null, notebookIndex); + note.setName(name); + return note; + } + +}
