Author: tommaso
Date: Thu Feb 5 10:12:21 2015
New Revision: 1657511
URL: http://svn.apache.org/r1657511
Log:
OAK-2456 - periodic update of suggester from main index data
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/SpellcheckHelper.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/SuggestHelper.java
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/LuceneOakRepositoryStub.java
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java?rev=1657511&r1=1657510&r2=1657511&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/IndexDefinition.java
Thu Feb 5 10:12:21 2015
@@ -193,6 +193,8 @@ class IndexDefinition implements Aggrega
private final int maxExtractLength;
+ private final int suggesterUpdateFrequencyMinutes;
+
public IndexDefinition(NodeState root, NodeState defn) {
this(root, defn, null);
}
@@ -251,6 +253,7 @@ class IndexDefinition implements Aggrega
this.analyzer = createAnalyzer();
this.hasCustomTikaConfig = getTikaConfigNode().exists();
this.maxExtractLength = determineMaxExtractLength();
+ this.suggesterUpdateFrequencyMinutes = getOptionalValue(defn,
LuceneIndexConstants.SUGGEST_UPDATE_FREQUENCY_MINUTES, 60);
}
public boolean isFullTextEnabled() {
@@ -288,6 +291,10 @@ class IndexDefinition implements Aggrega
return entryCount;
}
+ public int getSuggesterUpdateFrequencyMinutes() {
+ return suggesterUpdateFrequencyMinutes;
+ }
+
public boolean isEntryCountDefined() {
return entryCountDefined;
}
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java?rev=1657511&r1=1657510&r2=1657511&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
Thu Feb 5 10:12:21 2015
@@ -499,7 +499,7 @@ public class LuceneIndex implements Adva
} else if (query.startsWith("suggest?")) {
String suggestQueryString = query.replace("suggest?", "");
if (reader != null) {
- return new
LuceneRequestFacade<SuggestHelper.SuggestQuery>(SuggestHelper.getSuggestQuery(suggestQueryString,
reader));
+ return new
LuceneRequestFacade<SuggestHelper.SuggestQuery>(SuggestHelper.getSuggestQuery(suggestQueryString));
}
} else {
try {
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java?rev=1657511&r1=1657510&r2=1657511&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
Thu Feb 5 10:12:21 2015
@@ -233,6 +233,11 @@ public interface LuceneIndexConstants {
String PROP_USE_IN_SUGGEST = "useInSuggest";
/**
+ * update frequency of the suggester in minutes
+ */
+ String SUGGEST_UPDATE_FREQUENCY_MINUTES = "suggestUpdateFrequencyMinutes";
+
+ /**
* whether use this property values for spellchecking
*/
String PROP_USE_IN_SPELLCHECK = "useInSpellcheck";
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java?rev=1657511&r1=1657510&r2=1657511&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
Thu Feb 5 10:12:21 2015
@@ -62,7 +62,6 @@ import org.apache.lucene.document.LongFi
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
-import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.util.BytesRef;
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java?rev=1657511&r1=1657510&r2=1657511&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorContext.java
Thu Feb 5 10:12:21 2015
@@ -28,11 +28,14 @@ import java.util.Calendar;
import org.apache.commons.io.IOUtils;
import org.apache.jackrabbit.oak.api.CommitFailedException;
+import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.plugins.index.IndexUpdateCallback;
+import org.apache.jackrabbit.oak.plugins.index.lucene.util.SuggestHelper;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.jackrabbit.util.ISO8601;
+import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.SerialMergeScheduler;
@@ -148,6 +151,9 @@ public class LuceneIndexEditorContext {
}
if (writer != null) {
+
+ updateSuggester();
+
writer.close();
//OAK-2029 Record the last updated status so
@@ -159,6 +165,36 @@ public class LuceneIndexEditorContext {
}
}
+ private void updateSuggester() throws IOException {
+
+ // eventually update suggest dictionary
+ boolean updateSuggester = false;
+ NodeBuilder suggesterStatus =
definitionBuilder.child(":suggesterStatus");
+ if (suggesterStatus.hasProperty("lastUpdated")) {
+ PropertyState suggesterLastUpdatedValue =
suggesterStatus.getProperty("lastUpdated");
+ Calendar suggesterLastUpdatedTime =
ISO8601.parse(suggesterLastUpdatedValue.getValue(Type.DATE));
+ int updateFrequency =
definition.getSuggesterUpdateFrequencyMinutes();
+ suggesterLastUpdatedTime.add(Calendar.MINUTE, updateFrequency);
+ if (Calendar.getInstance().after(suggesterLastUpdatedTime)) {
+ updateSuggester = true;
+ }
+ } else {
+ updateSuggester = true;
+ }
+
+ if (updateSuggester) {
+ DirectoryReader reader = DirectoryReader.open(writer, false);
+ try {
+ SuggestHelper.updateSuggester(reader);
+ suggesterStatus.setProperty("lastUpdated",
ISO8601.format(Calendar.getInstance()), Type.DATE);
+ } catch (Throwable e) {
+ log.warn("could not update suggester", e);
+ } finally {
+ reader.close();
+ }
+ }
+ }
+
public void enableReindexMode(){
reindex = true;
IndexFormatVersion version =
IndexDefinition.determineVersionForFreshIndex(definitionBuilder);
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java?rev=1657511&r1=1657510&r2=1657511&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
Thu Feb 5 10:12:21 2015
@@ -497,8 +497,7 @@ public class LucenePropertyIndex impleme
} else if (query.startsWith("suggest?")) {
String suggestQueryString = query.replace("suggest?", "");
if (reader != null) {
- return new
LuceneRequestFacade<SuggestHelper.SuggestQuery>(SuggestHelper.getSuggestQuery(suggestQueryString,
- reader));
+ return new
LuceneRequestFacade<SuggestHelper.SuggestQuery>(SuggestHelper.getSuggestQuery(suggestQueryString));
}
} else {
try {
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/SpellcheckHelper.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/SpellcheckHelper.java?rev=1657511&r1=1657510&r2=1657511&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/SpellcheckHelper.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/SpellcheckHelper.java
Thu Feb 5 10:12:21 2015
@@ -19,6 +19,7 @@
package org.apache.jackrabbit.oak.plugins.index.lucene.util;
import org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames;
+import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spell.DirectSpellChecker;
@@ -29,9 +30,11 @@ import org.apache.lucene.search.spell.Su
* under the hood.
*/
public class SpellcheckHelper {
+
+ private static final DirectSpellChecker spellChecker = new
DirectSpellChecker();
+
public static SuggestWord[] getSpellcheck(SpellcheckQuery spellcheckQuery)
{
try {
- DirectSpellChecker spellChecker = new DirectSpellChecker();
return spellChecker.suggestSimilar(spellcheckQuery.getTerm(),
spellcheckQuery.getCount(), spellcheckQuery.getReader());
} catch (Exception e) {
throw new RuntimeException("could not handle Spellcheck query " +
spellcheckQuery, e);
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/SuggestHelper.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/SuggestHelper.java?rev=1657511&r1=1657510&r2=1657511&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/SuggestHelper.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/SuggestHelper.java
Thu Feb 5 10:12:21 2015
@@ -18,6 +18,7 @@
*/
package org.apache.jackrabbit.oak.plugins.index.lucene.util;
+import java.io.IOException;
import java.io.Reader;
import java.util.Collections;
import java.util.List;
@@ -25,6 +26,8 @@ import java.util.List;
import org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Terms;
import org.apache.lucene.search.suggest.DocumentDictionary;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.analyzing.FreeTextSuggester;
@@ -47,14 +50,25 @@ public class SuggestHelper {
}
};
+ private static final Lookup suggester = new FreeTextSuggester(analyzer);
+
+ public static void updateSuggester(IndexReader reader) throws IOException {
+// Terms terms = MultiFields.getTerms(reader, FieldNames.SUGGEST);
+// long size = terms.size() * 2;
+// if (size < 0) {
+// size = terms.getDocCount() / 3;
+// }
+// long count = suggester.getCount();
+// if (size > count) {
+ try {
+ suggester.build(new DocumentDictionary(reader,
FieldNames.SUGGEST, FieldNames.PATH_DEPTH));
+ } catch (RuntimeException e) {
+ log.debug("could not update the suggester", e);
+ }
+// }
+ }
+
public static List<Lookup.LookupResult> getSuggestions(SuggestQuery
suggestQuery) {
- FreeTextSuggester suggester = new FreeTextSuggester(analyzer);
- try {
- DocumentDictionary dictionary = suggestQuery.getDictionary();
- suggester.build(dictionary); // TODO : it should be possible to
avoid rebuilding the index every time
- } catch (Exception e) {
- log.warn("could not build suggester from the passed dictionary ",
e);
- }
try {
long count = suggester.getCount();
if (count > 0) {
@@ -67,7 +81,7 @@ public class SuggestHelper {
}
}
- public static SuggestQuery getSuggestQuery(String suggestQueryString,
IndexReader reader) {
+ public static SuggestQuery getSuggestQuery(String suggestQueryString) {
try {
String text = null;
for (String param : suggestQueryString.split("&")) {
@@ -81,7 +95,7 @@ public class SuggestHelper {
}
}
if (text != null) {
- return new SuggestQuery(new DocumentDictionary(reader,
FieldNames.SUGGEST, FieldNames.PATH_DEPTH), text, analyzer);
+ return new SuggestQuery(text);
} else {
return null;
}
@@ -93,33 +107,20 @@ public class SuggestHelper {
public static class SuggestQuery {
- private final DocumentDictionary dictionary;
private final String text;
- private final Analyzer analyzer;
- public SuggestQuery(DocumentDictionary dictionary, String text,
Analyzer analyzer) {
- this.dictionary = dictionary;
+ public SuggestQuery(String text) {
this.text = text;
- this.analyzer = analyzer;
- }
-
- public DocumentDictionary getDictionary() {
- return dictionary;
}
public String getText() {
return text;
}
- public Analyzer getAnalyzer() {
- return analyzer;
- }
-
@Override
public String toString() {
return "SuggestQuery{" +
- "dictionary=" + dictionary +
- ", text='" + text + '\'' +
+ "text='" + text + '\'' +
'}';
}
}
Modified:
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/LuceneOakRepositoryStub.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/LuceneOakRepositoryStub.java?rev=1657511&r1=1657510&r2=1657511&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/LuceneOakRepositoryStub.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/LuceneOakRepositoryStub.java
Thu Feb 5 10:12:21 2015
@@ -85,6 +85,7 @@ public class LuceneOakRepositoryStub ext
.setProperty(REINDEX_PROPERTY_NAME, true)
.setProperty(LuceneIndexConstants.TEST_MODE, true)
.setProperty(LuceneIndexConstants.EVALUATE_PATH_RESTRICTION, true)
+
.setProperty(LuceneIndexConstants.SUGGEST_UPDATE_FREQUENCY_MINUTES, 0) //
always update suggester
.setProperty(LuceneIndexConstants.COMPAT_MODE,
IndexFormatVersion.V2.getVersion());
NodeBuilder props =
index.child(LuceneIndexConstants.INDEX_RULES)