[1/2] incubator-zeppelin git commit: ZEPPELIN-501 Notebook search

bzz Wed, 23 Dec 2015 02:28:39 -0800

Repository: incubator-zeppelin
Updated Branches:
  refs/heads/master d4396887d -> 82de508d7



http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/82de508d/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/LuceneSearch.java
----------------------------------------------------------------------
diff --git 
a/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/LuceneSearch.java 
b/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/LuceneSearch.java
new file mode 100644
index 0000000..7f9cbbd
--- /dev/null
+++ 
b/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/LuceneSearch.java
@@ -0,0 +1,391 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.zeppelin.search;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Date;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.LongField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.highlight.Highlighter;
+import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
+import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
+import org.apache.lucene.search.highlight.TextFragment;
+import org.apache.lucene.search.highlight.TokenSources;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.zeppelin.notebook.Note;
+import org.apache.zeppelin.notebook.Paragraph;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
+
+/**
+ * Search (both, indexing and query) the notebooks using Lucene.
+ *
+ * Query is thread-safe, as creates new IndexReader every time.
+ * Index is thread-safe, as re-uses single IndexWriter, which is thread-safe.
+ */
+public class LuceneSearch implements SearchService {
+  private static final Logger LOG = 
LoggerFactory.getLogger(LuceneSearch.class);
+
+  private static final String SEARCH_FIELD = "contents";
+  static final String PARAGRAPH = "paragraph";
+  static final String ID_FIELD = "id";
+
+  Directory ramDirectory;
+  Analyzer analyzer;
+  IndexWriterConfig iwc;
+  IndexWriter writer;
+
+  public LuceneSearch() {
+    ramDirectory = new RAMDirectory();
+    analyzer = new StandardAnalyzer();
+    iwc = new IndexWriterConfig(analyzer);
+    try {
+      writer = new IndexWriter(ramDirectory, iwc);
+    } catch (IOException e) {
+      LOG.error("Failed to reate new IndexWriter", e);
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.zeppelin.search.Search#query(java.lang.String)
+   */
+  @Override
+  public List<Map<String, String>> query(String queryStr) {
+    if (null == ramDirectory) {
+      throw new IllegalStateException(
+          "Something went wrong on instance creation time, index dir is null");
+    }
+    List<Map<String, String>> result = Collections.emptyList();
+    try (IndexReader indexReader = DirectoryReader.open(ramDirectory)) {
+      IndexSearcher indexSearcher = new IndexSearcher(indexReader);
+      Analyzer analyzer = new StandardAnalyzer();
+      QueryParser parser = new QueryParser(SEARCH_FIELD, analyzer);
+
+      Query query = parser.parse(queryStr);
+      LOG.debug("Searching for: " + query.toString(SEARCH_FIELD));
+
+      SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter();
+      Highlighter highlighter = new Highlighter(htmlFormatter, new 
QueryScorer(query));
+
+      result = doSearch(indexSearcher, query, analyzer, highlighter);
+      indexReader.close();
+    } catch (IOException e) {
+      LOG.error("Failed to open index dir {}, make sure indexing finished OK", 
ramDirectory, e);
+    } catch (ParseException e) {
+      LOG.error("Failed to parse query " + queryStr, e);
+    }
+    return result;
+  }
+
+  private List<Map<String, String>> doSearch(IndexSearcher searcher, Query 
query,
+      Analyzer analyzer, Highlighter highlighter) {
+    List<Map<String, String>> matchingParagraphs = Lists.newArrayList();
+    ScoreDoc[] hits;
+    try {
+      hits = searcher.search(query, 20).scoreDocs;
+      for (int i = 0; i < hits.length; i++) {
+        LOG.debug("doc={} score={}", hits[i].doc, hits[i].score);
+
+        int id = hits[i].doc;
+        Document doc = searcher.doc(id);
+        String path = doc.get(ID_FIELD);
+        if (path != null) {
+          LOG.debug((i + 1) + ". " + path);
+          String title = doc.get("title");
+          if (title != null) {
+            LOG.debug("   Title: {}", doc.get("title"));
+          }
+
+          String text = doc.get(SEARCH_FIELD);
+          TokenStream tokenStream = 
TokenSources.getTokenStream(searcher.getIndexReader(), id,
+              SEARCH_FIELD, analyzer);
+          TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, 
text, true, 3);
+          LOG.debug("    {} fragments found for query '{}'", frag.length, 
query);
+          for (int j = 0; j < frag.length; j++) {
+            if ((frag[j] != null) && (frag[j].getScore() > 0)) {
+              LOG.debug("    Fragment: {}", frag[j].toString());
+            }
+          }
+          String fragment = (frag != null && frag.length > 0) ? 
frag[0].toString() : "";
+
+          matchingParagraphs.add(ImmutableMap.of("id", path, // 
<noteId>/paragraph/<paragraphId>
+              "name", title, "snippet", fragment, "text", text));
+        } else {
+          LOG.info("{}. No {} for this document", i + 1, ID_FIELD);
+        }
+      }
+    } catch (IOException | InvalidTokenOffsetsException e) {
+      LOG.error("Exception on searching for {}", query, e);
+    }
+    return matchingParagraphs;
+  }
+
+  /* (non-Javadoc)
+   * @see 
org.apache.zeppelin.search.Search#updateIndexDoc(org.apache.zeppelin.notebook.Note)
+   */
+  @Override
+  public void updateIndexDoc(Note note) throws IOException {
+    updateIndexNoteName(note);
+    for (Paragraph p: note.getParagraphs()) {
+      updateIndexParagraph(note, p);
+    }
+  }
+
+  private void updateIndexNoteName(Note note) throws IOException {
+    String noteName = note.getName();
+    String noteId = note.getId();
+    LOG.debug("Indexing Notebook {}, '{}'", noteId, noteName);
+    if (null == noteName || noteName.isEmpty()) {
+      LOG.debug("Skipping empty notebook name");
+      return;
+    }
+    updateDoc(noteId, noteName, null);
+  }
+
+  private void updateIndexParagraph(Note note, Paragraph p) throws IOException 
{
+    if (p.getText() == null) {
+      LOG.debug("Skipping empty paragraph");
+      return;
+    }
+    updateDoc(note.getId(), note.getName(), p);
+  }
+
+  /**
+   * Updates index for the given note: either note.name or a paragraph If
+   * paragraph is <code>null</code> - updates only for the note.name
+   *
+   * @param noteId
+   * @param noteName
+   * @param p
+   * @throws IOException
+   */
+  private void updateDoc(String noteId, String noteName, Paragraph p) throws 
IOException {
+    String id = formatId(noteId, p);
+    Document doc = newDocument(id, noteName, p);
+    try {
+      writer.updateDocument(new Term(ID_FIELD, id), doc);
+      writer.commit();
+    } catch (IOException e) {
+      LOG.error("Failed to updaet index of notebook {}", noteId, e);
+    }
+  }
+
+  /**
+   * If paragraph is not null, id is <noteId>/paragraphs/<paragraphId>,
+   * otherwise it's just <noteId>.
+   */
+  static String formatId(String noteId, Paragraph p) {
+    String id = noteId;
+    if (null != p) {
+      id = Joiner.on('/').join(id, PARAGRAPH, p.getId());
+    }
+    return id;
+  }
+
+  static String formatDeleteId(String noteId, Paragraph p) {
+    String id = noteId;
+    if (null != p) {
+      id = Joiner.on('/').join(id, PARAGRAPH, p.getId());
+    } else {
+      id = id + "*";
+    }
+    return id;
+  }
+
+  /**
+   * If paragraph is not null, indexes code in the paragraph, otherwise indexes
+   * the notebook name.
+   *
+   * @param id id of the document, different for Note name and paragraph
+   * @param noteName name of the note
+   * @param p paragraph
+   * @return
+   */
+  private Document newDocument(String id, String noteName, Paragraph p) {
+    Document doc = new Document();
+
+    Field pathField = new StringField(ID_FIELD, id, Field.Store.YES);
+    doc.add(pathField);
+    doc.add(new StringField("title", noteName, Field.Store.YES));
+
+    if (null != p) {
+      doc.add(new TextField(SEARCH_FIELD, p.getText(), Field.Store.YES));
+      Date date = p.getDateStarted() != null ? p.getDateStarted() : 
p.getDateCreated();
+      doc.add(new LongField("modified", date.getTime(), Field.Store.NO));
+    } else {
+      doc.add(new TextField(SEARCH_FIELD, noteName, Field.Store.YES));
+    }
+    return doc;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.zeppelin.search.Search#addIndexDocs(java.util.Collection)
+   */
+  @Override
+  public void addIndexDocs(Collection<Note> collection) {
+    int docsIndexed = 0;
+    long start = System.nanoTime();
+    try {
+      for (Note note : collection) {
+        addIndexDocAsync(note);
+        docsIndexed++;
+      }
+    } catch (IOException e) {
+      LOG.error("Failed to index all Notebooks", e);
+    } finally {
+      try { // save what's been indexed, even if not full collection
+        writer.commit();
+      } catch (IOException e) {
+        LOG.error("Failed to save index", e);
+      }
+      long end = System.nanoTime();
+      LOG.info("Indexing {} notebooks took {}ms", docsIndexed,
+          TimeUnit.NANOSECONDS.toMillis(end - start));
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see 
org.apache.zeppelin.search.Search#addIndexDoc(org.apache.zeppelin.notebook.Note)
+   */
+  @Override
+  public void addIndexDoc(Note note) {
+    try {
+      addIndexDocAsync(note);
+      writer.commit();
+    } catch (IOException e) {
+      LOG.error("Failed to add note {} to index", note, e);
+    }
+  }
+
+  /**
+   * Indexes the given notebook, but does not commit changes.
+   *
+   * @param note
+   * @throws IOException
+   */
+  private void addIndexDocAsync(Note note) throws IOException {
+    indexNoteName(writer, note.getId(), note.getName());
+    for (Paragraph doc : note.getParagraphs()) {
+      if (doc.getText() == null) {
+        LOG.debug("Skipping empty paragraph");
+        continue;
+      }
+      indexDoc(writer, note.getId(), note.getName(), doc);
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see 
org.apache.zeppelin.search.Search#deleteIndexDocs(org.apache.zeppelin.notebook.Note)
+   */
+  @Override
+  public void deleteIndexDocs(Note note) {
+    deleteDoc(note, null);
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.zeppelin.search.Search
+   *  #deleteIndexDoc(org.apache.zeppelin.notebook.Note, 
org.apache.zeppelin.notebook.Paragraph)
+   */
+  @Override
+  public void deleteIndexDoc(Note note, Paragraph p) {
+    deleteDoc(note, p);
+  }
+
+  private void deleteDoc(Note note, Paragraph p) {
+    if (null == note) {
+      LOG.error("Trying to delete note by reference to NULL");
+      return;
+    }
+    String fullNoteOrJustParagraph = formatDeleteId(note.getId(), p);
+    LOG.debug("Deleting note {}, out of: {}", note.getId(), writer.numDocs());
+    try {
+      writer.deleteDocuments(new WildcardQuery(new Term(ID_FIELD, 
fullNoteOrJustParagraph)));
+      writer.commit();
+    } catch (IOException e) {
+      LOG.error("Failed to delete {} from index by '{}'", note, 
fullNoteOrJustParagraph, e);
+    }
+    LOG.debug("Done, index contains {} docs now" + writer.numDocs());
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.zeppelin.search.Search#close()
+   */
+  @Override
+  public void close() {
+    try {
+      writer.close();
+    } catch (IOException e) {
+      LOG.error("Failed to .close() the notebook index", e);
+    }
+  }
+
+  /**
+   * Indexes a notebook name
+   *
+   * @throws IOException
+   */
+  private void indexNoteName(IndexWriter w, String noteId, String noteName) 
throws IOException {
+    LOG.debug("Indexing Notebook {}, '{}'", noteId, noteName);
+    if (null == noteName || noteName.isEmpty()) {
+      LOG.debug("Skipping empty notebook name");
+      return;
+    }
+    indexDoc(w, noteId, noteName, null);
+  }
+
+  /**
+   * Indexes a single document:
+   *  - code of the paragraph (if non-null)
+   *  - or just a note name
+   */
+  private void indexDoc(IndexWriter w, String noteId, String noteName, 
Paragraph p)
+      throws IOException {
+    String id = formatId(noteId, p);
+    Document doc = newDocument(id, noteName, p);
+    w.addDocument(doc);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/82de508d/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/SearchService.java
----------------------------------------------------------------------
diff --git 
a/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/SearchService.java 
b/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/SearchService.java
new file mode 100644
index 0000000..64f2b75
--- /dev/null
+++ 
b/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/SearchService.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.zeppelin.search;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.zeppelin.notebook.Note;
+import org.apache.zeppelin.notebook.Paragraph;
+
+/**
+ * Search (both, indexing and query) the notebooks.
+ * 
+ * Intended to have multiple implementation, i.e:
+ *  - local Lucene (in-memory, on-disk)
+ *  - remote Elasticsearch
+ */
+public interface SearchService {
+
+  /**
+   * Full-text search in all the notebooks
+   *
+   * @param queryStr a query
+   * @return A list of matching paragraphs (id, text, snippet w/ highlight)
+   */
+  public List<Map<String, String>> query(String queryStr);
+
+  /**
+   * Updates all documents in index for the given note:
+   *  - name
+   *  - all paragraphs
+   *
+   * @param note a Note to update index for
+   * @throws IOException
+   */
+  public void updateIndexDoc(Note note) throws IOException;
+
+  /**
+   * Indexes full collection of notes: all the paragraphs + Note names
+   *
+   * @param collection of Notes
+   */
+  public void addIndexDocs(Collection<Note> collection);
+
+  /**
+   * Indexes the given notebook.
+   *
+   * @throws IOException If there is a low-level I/O error
+   */
+  public void addIndexDoc(Note note);
+
+  /**
+   * Deletes all docs on given Note from index
+   */
+  public void deleteIndexDocs(Note note);
+
+  /**
+   * Deletes doc for a given
+   *
+   * @param note
+   * @param p
+   * @throws IOException
+   */
+  public void deleteIndexDoc(Note note, Paragraph p);
+
+  /**
+   * Frees the recourses used by index
+   */
+  public void close();
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/82de508d/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/NotebookTest.java
----------------------------------------------------------------------
diff --git 
a/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/NotebookTest.java 
b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/NotebookTest.java
index ee35773..e2d1aac 100644
--- 
a/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/NotebookTest.java
+++ 
b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/NotebookTest.java
@@ -22,6 +22,7 @@ import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
 
 import java.io.File;
 import java.io.IOException;
@@ -45,6 +46,8 @@ import org.apache.zeppelin.scheduler.Job;
 import org.apache.zeppelin.scheduler.Job.Status;
 import org.apache.zeppelin.scheduler.JobListener;
 import org.apache.zeppelin.scheduler.SchedulerFactory;
+import org.apache.zeppelin.search.SearchService;
+import org.apache.zeppelin.search.LuceneSearch;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -84,8 +87,9 @@ public class NotebookTest implements JobListenerFactory{
 
     factory = new InterpreterFactory(conf, new InterpreterOption(false), null);
 
+    SearchService search = mock(SearchService.class);
     notebookRepo = new VFSNotebookRepo(conf);
-    notebook = new Notebook(conf, notebookRepo, schedulerFactory, factory, 
this);
+    notebook = new Notebook(conf, notebookRepo, schedulerFactory, factory, 
this, search);
   }
 
   @After
@@ -170,7 +174,8 @@ public class NotebookTest implements JobListenerFactory{
     p1.setText("hello world");
     note.persist();
 
-    Notebook notebook2 = new Notebook(conf, notebookRepo, schedulerFactory, 
new InterpreterFactory(conf, null), this);
+    Notebook notebook2 = new Notebook(
+        conf, notebookRepo, schedulerFactory, new InterpreterFactory(conf, 
null), this, null);
     assertEquals(1, notebook2.getAllNotes().size());
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/82de508d/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/NotebookRepoSyncTest.java
----------------------------------------------------------------------
diff --git 
a/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/NotebookRepoSyncTest.java
 
b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/NotebookRepoSyncTest.java
index 64d9b32..6d8c50d 100644
--- 
a/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/NotebookRepoSyncTest.java
+++ 
b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/NotebookRepoSyncTest.java
@@ -19,6 +19,7 @@ package org.apache.zeppelin.notebook.repo;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
 
 import java.io.File;
 import java.io.IOException;
@@ -39,6 +40,8 @@ import org.apache.zeppelin.scheduler.Job;
 import org.apache.zeppelin.scheduler.Job.Status;
 import org.apache.zeppelin.scheduler.JobListener;
 import org.apache.zeppelin.scheduler.SchedulerFactory;
+import org.apache.zeppelin.search.SearchService;
+import org.apache.zeppelin.search.LuceneSearch;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -86,8 +89,9 @@ public class NotebookRepoSyncTest implements 
JobListenerFactory {
 
     factory = new InterpreterFactory(conf, new InterpreterOption(false), null);
     
+    SearchService search = mock(SearchService.class);
     notebookRepoSync = new NotebookRepoSync(conf);
-    notebookSync = new Notebook(conf, notebookRepoSync, schedulerFactory, 
factory, this);
+    notebookSync = new Notebook(conf, notebookRepoSync, schedulerFactory, 
factory, this, search);
   }
 
   @After

http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/82de508d/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepoTest.java
----------------------------------------------------------------------
diff --git 
a/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepoTest.java
 
b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepoTest.java
index c3bb3a0..80d1174 100644
--- 
a/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepoTest.java
+++ 
b/zeppelin-zengine/src/test/java/org/apache/zeppelin/notebook/repo/VFSNotebookRepoTest.java
@@ -18,6 +18,7 @@
 package org.apache.zeppelin.notebook.repo;
 
 import static org.junit.Assert.assertEquals;
+import static org.mockito.Mockito.mock;
 
 import java.io.File;
 import java.io.IOException;
@@ -32,20 +33,19 @@ import 
org.apache.zeppelin.interpreter.mock.MockInterpreter1;
 import org.apache.zeppelin.notebook.JobListenerFactory;
 import org.apache.zeppelin.notebook.Note;
 import org.apache.zeppelin.notebook.Notebook;
-import org.apache.zeppelin.notebook.NotebookTest;
 import org.apache.zeppelin.notebook.Paragraph;
 import org.apache.zeppelin.scheduler.JobListener;
 import org.apache.zeppelin.scheduler.SchedulerFactory;
-import org.apache.zeppelin.scheduler.Job.Status;
+import org.apache.zeppelin.search.SearchService;
+import org.apache.zeppelin.search.LuceneSearch;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-public class VFSNotebookRepoTest implements JobListenerFactory{
-  private static final Logger logger = 
LoggerFactory.getLogger(NotebookTest.class);
-
+public class VFSNotebookRepoTest implements JobListenerFactory {
+  private static final Logger LOG = 
LoggerFactory.getLogger(VFSNotebookRepoTest.class);
   private ZeppelinConfiguration conf;
   private SchedulerFactory schedulerFactory;
   private Notebook notebook;
@@ -53,16 +53,15 @@ public class VFSNotebookRepoTest implements 
JobListenerFactory{
   private InterpreterFactory factory;
 
   private File mainZepDir;
-
   private File mainNotebookDir;
 
   @Before
   public void setUp() throws Exception {
-    String zpath = 
System.getProperty("java.io.tmpdir")+"/ZeppelinLTest_"+System.currentTimeMillis();
+    String zpath = System.getProperty("java.io.tmpdir") + "/ZeppelinLTest_" + 
System.currentTimeMillis();
     mainZepDir = new File(zpath);
     mainZepDir.mkdirs();
     new File(mainZepDir, "conf").mkdirs();
-    String mainNotePath = zpath+"/notebook";
+    String mainNotePath = zpath + "/notebook";
     mainNotebookDir = new File(mainNotePath);
     mainNotebookDir.mkdirs();
 
@@ -79,15 +78,15 @@ public class VFSNotebookRepoTest implements 
JobListenerFactory{
     this.schedulerFactory = new SchedulerFactory();
     factory = new InterpreterFactory(conf, new InterpreterOption(false), null);
 
+    SearchService search = mock(SearchService.class);
     notebookRepo = new VFSNotebookRepo(conf);
-    notebook = new Notebook(conf, notebookRepo, schedulerFactory, factory, 
this);
+    notebook = new Notebook(conf, notebookRepo, schedulerFactory, factory, 
this, search);
   }
 
   @After
   public void tearDown() throws Exception {
-    //FileUtils.deleteDirectory(mainZepDir);
     if (!FileUtils.deleteQuietly(mainZepDir)) {
-      logger.error("Failed to delete {} ", mainZepDir.getName());
+      LOG.error("Failed to delete {} ", mainZepDir.getName());
     }
   }
 
@@ -97,7 +96,7 @@ public class VFSNotebookRepoTest implements 
JobListenerFactory{
     
note.getNoteReplLoader().setInterpreters(factory.getDefaultInterpreterSettingList());
 
     Paragraph p1 = note.addParagraph();
-    Map config = p1.getConfig();
+    Map<String, Object> config = p1.getConfig();
     config.put("enabled", true);
     p1.setConfig(config);
     p1.setText("%mock1 hello world");

http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/82de508d/zeppelin-zengine/src/test/java/org/apache/zeppelin/search/LuceneSearchTest.java
----------------------------------------------------------------------
diff --git 
a/zeppelin-zengine/src/test/java/org/apache/zeppelin/search/LuceneSearchTest.java
 
b/zeppelin-zengine/src/test/java/org/apache/zeppelin/search/LuceneSearchTest.java
new file mode 100644
index 0000000..f74d95e
--- /dev/null
+++ 
b/zeppelin-zengine/src/test/java/org/apache/zeppelin/search/LuceneSearchTest.java
@@ -0,0 +1,259 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.zeppelin.search;
+
+import static com.google.common.truth.Truth.assertThat;
+import static org.mockito.Mockito.*;
+import static org.apache.zeppelin.search.LuceneSearch.formatId;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.zeppelin.interpreter.InterpreterSetting;
+import org.apache.zeppelin.notebook.Note;
+import org.apache.zeppelin.notebook.NoteInterpreterLoader;
+import org.apache.zeppelin.notebook.Paragraph;
+import org.apache.zeppelin.notebook.repo.NotebookRepo;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import com.google.common.base.Splitter;
+import com.google.common.collect.ImmutableList;
+
+public class LuceneSearchTest {
+
+  private static NoteInterpreterLoader replLoaderMock;
+  private static NotebookRepo notebookRepoMock;
+  private SearchService notebookIndex;
+
+  @BeforeClass
+  public static void beforeStartUp() {
+    notebookRepoMock = mock(NotebookRepo.class);
+    replLoaderMock = mock(NoteInterpreterLoader.class);
+
+    when(replLoaderMock.getInterpreterSettings())
+      .thenReturn(ImmutableList.<InterpreterSetting>of());
+  }
+
+  @Before
+  public void startUp() {
+    notebookIndex = new LuceneSearch();
+  }
+
+  @After
+  public void shutDown() {
+    notebookIndex.close();
+  }
+
+  @Test public void canIndexNotebook() {
+    //give
+    Note note1 = newNoteWithParapgraph("Notebook1", "test");
+    Note note2 = newNoteWithParapgraph("Notebook2", "not test");
+    List<Note> notebook = Arrays.asList(note1, note2);
+
+    //when
+    notebookIndex.addIndexDocs(notebook);
+  }
+
+  @Test public void canIndexAndQuery() {
+    //given
+    Note note1 = newNoteWithParapgraph("Notebook1", "test");
+    Note note2 = newNoteWithParapgraphs("Notebook2", "not test", "not test at 
all");
+    notebookIndex.addIndexDocs(Arrays.asList(note1, note2));
+
+    //when
+    List<Map<String, String>> results = notebookIndex.query("all");
+
+    //then
+    assertThat(results).isNotEmpty();
+    assertThat(results.size()).isEqualTo(1);
+    assertThat(results.get(0))
+      .containsEntry("id", formatId(note2.getId(), note2.getLastParagraph()));
+  }
+
+  @Test public void canIndexAndQueryByNotebookName() {
+    //given
+    Note note1 = newNoteWithParapgraph("Notebook1", "test");
+    Note note2 = newNoteWithParapgraphs("Notebook2", "not test", "not test at 
all");
+    notebookIndex.addIndexDocs(Arrays.asList(note1, note2));
+
+    //when
+    List<Map<String, String>> results = notebookIndex.query("Notebook1");
+
+    //then
+    assertThat(results).isNotEmpty();
+    assertThat(results.size()).isEqualTo(1);
+    assertThat(results.get(0)).containsEntry("id", note1.getId());
+  }
+
+  @Test public void indexKeyContract() throws IOException {
+    //give
+    Note note1 = newNoteWithParapgraph("Notebook1", "test");
+    //when
+    notebookIndex.addIndexDoc(note1);
+    //then
+    String id = resultForQuery("test").get(0).get(LuceneSearch.ID_FIELD);
+
+    assertThat(Splitter.on("/").split(id)) //key structure 
<noteId>/paragraph/<paragraphId>
+      .containsAllOf(note1.getId(), LuceneSearch.PARAGRAPH, 
note1.getLastParagraph().getId());
+  }
+
+  @Test //(expected=IllegalStateException.class)
+  public void canNotSearchBeforeIndexing() {
+    //given NO notebookIndex.index() was called
+    //when
+    List<Map<String, String>> result = notebookIndex.query("anything");
+    //then
+    assertThat(result).isEmpty();
+    //assert logs were printed
+    //"ERROR org.apache.zeppelin.search.SearchService:97 - Failed to open 
index dir RAMDirectory"
+  }
+
+  @Test public void canIndexAndReIndex() throws IOException {
+    //given
+    Note note1 = newNoteWithParapgraph("Notebook1", "test");
+    Note note2 = newNoteWithParapgraphs("Notebook2", "not test", "not test at 
all");
+    notebookIndex.addIndexDocs(Arrays.asList(note1, note2));
+
+    //when
+    Paragraph p2 = note2.getLastParagraph();
+    p2.setText("test indeed");
+    notebookIndex.updateIndexDoc(note2);
+
+    //then
+    List<Map<String, String>> results = notebookIndex.query("all");
+    assertThat(results).isEmpty();
+
+    results = notebookIndex.query("indeed");
+    assertThat(results).isNotEmpty();
+  }
+
+  @Test public void canDeleteNull() throws IOException {
+    //give
+    // looks like a bug in web UI: it tries to delete a note twice (after it 
has just been deleted)
+    //when
+    notebookIndex.deleteIndexDocs(null);
+  }
+
+  @Test public void canDeleteFromIndex() throws IOException {
+    //given
+    Note note1 = newNoteWithParapgraph("Notebook1", "test");
+    Note note2 = newNoteWithParapgraphs("Notebook2", "not test", "not test at 
all");
+    notebookIndex.addIndexDocs(Arrays.asList(note1, note2));
+    assertThat(resultForQuery("Notebook2")).isNotEmpty();
+
+    //when
+    notebookIndex.deleteIndexDocs(note2);
+
+    //then
+    assertThat(notebookIndex.query("all")).isEmpty();
+    assertThat(resultForQuery("Notebook2")).isEmpty();
+
+    List<Map<String, String>> results = resultForQuery("test");
+    assertThat(results).isNotEmpty();
+    assertThat(results.size()).isEqualTo(1);
+  }
+
+  @Test public void indexParagraphUpdatedOnNoteSave() throws IOException {
+    //given: total 2 notebooks, 3 paragraphs
+    Note note1 = newNoteWithParapgraph("Notebook1", "test");
+    Note note2 = newNoteWithParapgraphs("Notebook2", "not test", "not test at 
all");
+    notebookIndex.addIndexDocs(Arrays.asList(note1, note2));
+    assertThat(resultForQuery("test").size()).isEqualTo(3);
+
+    //when
+    Paragraph p1 = note1.getLastParagraph();
+    p1.setText("no no no");
+    note1.persist();
+
+    //then
+    assertThat(resultForQuery("Notebook1").size()).isEqualTo(1);
+
+    List<Map<String, String>> results = resultForQuery("test");
+    assertThat(results).isNotEmpty();
+    assertThat(results.size()).isEqualTo(2);
+
+    //does not include Notebook1's paragraph any more
+    for (Map<String, String> result: results) {
+      assertThat(result.get("id").startsWith(note1.getId())).isFalse();;
+    }
+  }
+
+  @Test public void indexNoteNameUpdatedOnNoteSave() throws IOException {
+    //given: total 2 notebooks, 3 paragraphs
+    Note note1 = newNoteWithParapgraph("Notebook1", "test");
+    Note note2 = newNoteWithParapgraphs("Notebook2", "not test", "not test at 
all");
+    notebookIndex.addIndexDocs(Arrays.asList(note1, note2));
+    assertThat(resultForQuery("test").size()).isEqualTo(3);
+
+    //when
+    note1.setName("NotebookN");
+    note1.persist();
+
+    //then
+    assertThat(resultForQuery("Notebook1")).isEmpty();
+    assertThat(resultForQuery("NotebookN")).isNotEmpty();
+    assertThat(resultForQuery("NotebookN").size()).isEqualTo(1);
+  }
+
+  private List<Map<String, String>> resultForQuery(String q) {
+    return notebookIndex.query(q);
+  }
+
+  /**
+   * Creates a new Note \w given name,
+   * adds a new paragraph \w given text
+   *
+   * @param noteName name of the note
+   * @param parText text of the paragraph
+   * @return Note
+   */
+  private Note newNoteWithParapgraph(String noteName, String parText) {
+    Note note1 = newNote(noteName);
+    addParagraphWithText(note1, parText);
+    return note1;
+  }
+
+  /**
+   * Creates a new Note \w given name,
+   * adds N paragraphs \w given texts
+   */
+  private Note newNoteWithParapgraphs(String noteName, String... parTexts) {
+    Note note1 = newNote(noteName);
+    for (String parText : parTexts) {
+      addParagraphWithText(note1, parText);
+    }
+    return note1;
+  }
+
+  private Paragraph addParagraphWithText(Note note, String text) {
+    Paragraph p = note.addParagraph();
+    p.setText(text);
+    return p;
+  }
+
+  private Note newNote(String name) {
+    Note note = new Note(notebookRepoMock, replLoaderMock, null, 
notebookIndex);
+    note.setName(name);
+    return note;
+  }
+
+}

[1/2] incubator-zeppelin git commit: ZEPPELIN-501 Notebook search

Reply via email to