Author: rwesten
Date: Wed Sep 18 07:38:08 2013
New Revision: 1524317
URL: http://svn.apache.org/r1524317
Log:
STANNOL-1128: The storage location of the FST models is now configureable. The
default is still data-dir/fst
Modified:
stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineComponent.java
stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/resources/OSGI-INF/metatype/metatype.properties
Modified:
stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineComponent.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineComponent.java?rev=1524317&r1=1524316&r2=1524317&view=diff
==============================================================================
---
stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineComponent.java
(original)
+++
stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/FstLinkingEngineComponent.java
Wed Sep 18 07:38:08 2013
@@ -18,16 +18,10 @@ package org.apache.stanbol.enhancer.engi
import static
org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.CASE_SENSITIVE;
import static
org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_CASE_SENSITIVE_MATCHING_STATE;
-import static
org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_DEREFERENCE_ENTITIES_STATE;
import static
org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_MATCHING_LANGUAGE;
import static
org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_SUGGESTIONS;
-import static
org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEREFERENCE_ENTITIES;
-import static
org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEREFERENCE_ENTITIES_FIELDS;
import static
org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.ENTITY_TYPES;
-import static
org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.REDIRECT_FIELD;
-import static
org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.REDIRECT_MODE;
import static
org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.SUGGESTIONS;
-import static
org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.TYPE_FIELD;
import static
org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.TYPE_MAPPINGS;
import static
org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig.DEFAULT_PROCESS_ONLY_PROPER_NOUNS_STATE;
import static
org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig.PROCESSED_LANGUAGES;
@@ -43,17 +37,18 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
-import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
-import java.util.concurrent.ThreadFactory;
+import org.apache.commons.compress.compressors.FileNameUtil;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.filefilter.WildcardFileFilter;
import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang.text.StrLookup;
+import org.apache.commons.lang.text.StrSubstitutor;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.ConfigurationPolicy;
@@ -61,13 +56,10 @@ import org.apache.felix.scr.annotations.
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.PropertyOption;
import org.apache.felix.scr.annotations.Reference;
-import org.apache.felix.scr.annotations.ReferenceCardinality;
-import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
-import org.apache.lucene.util.NamedThreadFactory;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.core.SolrCore;
@@ -81,7 +73,6 @@ import org.apache.stanbol.commons.solr.R
import
org.apache.stanbol.commons.stanboltools.datafileprovider.DataFileProvider;
import
org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig;
import
org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig;
-import
org.apache.stanbol.enhancer.engines.entitylinking.engine.EntityLinkingEngine;
import
org.apache.stanbol.enhancer.engines.lucenefstlinking.cache.EntityCacheManager;
import
org.apache.stanbol.enhancer.engines.lucenefstlinking.cache.FastLRUCacheManager;
import org.apache.stanbol.enhancer.nlp.utils.LanguageConfiguration;
@@ -143,6 +134,8 @@ import com.google.common.util.concurrent
name="AtSuffix")
},value="SolrYard"),
@Property(name=FstLinkingEngineComponent.FST_CONFIG,
cardinality=Integer.MAX_VALUE),
+ @Property(name=FstLinkingEngineComponent.FST_FOLDER,
+ value=FstLinkingEngineComponent.DEFAULT_FST_FOLDER),
@Property(name=FstLinkingEngineComponent.SOLR_TYPE_FIELD,
value="rdf:type"),
@Property(name=FstLinkingEngineComponent.SOLR_RANKING_FIELD,
value="entityhub:entityRank"),
// @Property(name=REDIRECT_FIELD,value="rdfs:seeAlso"),
@@ -198,6 +191,18 @@ public class FstLinkingEngineComponent {
*/
public static final String FST_CONFIG =
"enhancer.engines.linking.solrfst.fstconfig";
/**
+ * The folder used to store the FST files. The {@link #DEFAULT_FST_FOLDER
default} is
+ * '<code>${solr-data-dir}/fst</code>' - this is '<code>./fst</code>'
relative to the
+ * {@link SolrCore#getDataDir()} of the current SolrCore.
+ */
+ public static final String FST_FOLDER =
"enhancer.engines.linking.solrfst.fstfolder";
+ /**
+ * The default of the FST folder is '<code>${solr-data-dir}/fst</code>' -
+ * this is '<code>./fst</code>' relative to the {@link
SolrCore#getDataDir()}
+ * of the current SolrCore.
+ */
+ public static final String DEFAULT_FST_FOLDER = "${solr-data-dir}/fst";
+ /**
* The name of the Solr field holding the entity type information
*/
public static final String SOLR_TYPE_FIELD =
"enhancer.engines.linking.solrfst.typeField";
@@ -291,6 +296,12 @@ public class FstLinkingEngineComponent {
*/
private LanguageConfiguration fstConfig;
/**
+ * The configured fstFolder. NOTE that the actual folder is determined in
the
+ * {@link #updateEngineRegistration(ServiceReference, SolrServer)} based on
+ * the SolrCore.
+ */
+ private String fstFolder;
+ /**
* Holds the {@link TextProcessingConfig} parsed from the configuration of
* this engine. <p>
* NOTE: that by far not all configurations are supported. See
documentation
@@ -439,6 +450,19 @@ public class FstLinkingEngineComponent {
fstConfig.setConfiguration(properties);
} //else keep the default
+ value = properties.get(FST_FOLDER);
+ if(value instanceof String){
+ this.fstFolder = ((String)value).trim();
+ if(this.fstFolder.isEmpty()){
+ this.fstFolder = null;
+ }
+ } else if(value == null){
+ this.fstFolder = null;
+ } else {
+ throw new ConfigurationException(FST_FOLDER, "Values MUST BE of
type String"
+ + "(found: "+value.getClass().getName()+")!");
+ }
+
//(5) Create the ThreadPool used for the runtime creation of FST models
value = properties.get(FST_THREAD_POOL_SIZE);
int tpSize;
@@ -602,17 +626,8 @@ public class FstLinkingEngineComponent {
//NOTE: the FST cofnig is processed even if the SolrCore has
not changed
// because their might be config changes and/or new FST
files in the
// FST directory of the SolrCore.
- String dataDir = core.getDataDir();
- File fstDir = new File(dataDir,"fst");
- if(!fstDir.isDirectory()){ //create the FST directory
- try {
- FileUtils.forceMkdir(fstDir);
- } catch (IOException e) {
- unregisterEngine(); //unregister current engine and
clean up
- throw new IllegalStateException("Unable to create
Directory for"
- + "storing the FST files within the SolrCore
data dir.");
- }
- }
+ File fstDir = getFstDirectory(core, fstFolder);
+ //File fstDir = new File(dataDir,"fst");
//now collect the FST configuration
indexConfig = new IndexConfiguration(fstConfig, core);
//set fields parsed in the activate method
@@ -687,6 +702,37 @@ public class FstLinkingEngineComponent {
}
+ /**
+ * Resolves the directory to store the FST models based on the configured
+ * {@link #FST_FOLDER}. Also considering the name of the SolrServer and
+ * SolrCore
+ * @param core
+ * @param fstFolderConfig
+ * @return
+ */
+ private File getFstDirectory(SolrCore core, String fstFolderConfig) {
+ StrSubstitutor substitutor = new StrSubstitutor(new SolrCoreStrLookup(
+ indexReference, core, bundleContext));
+ substitutor.setEnableSubstitutionInVariables(true);
+ String folderStr = substitutor.replace(fstFolderConfig);
+ if(folderStr.indexOf("${") > 0){
+ folderStr = substitutor.replace(folderStr);
+ }
+ //convert separators to the current OS
+ folderStr = FilenameUtils.separatorsToSystem(folderStr);
+ File fstDir = new File(folderStr);
+ if(!fstDir.isDirectory()){ //create the FST directory
+ try {
+ FileUtils.forceMkdir(fstDir);
+ } catch (IOException e) {
+ unregisterEngine(); //unregister current engine and clean up
+ throw new IllegalStateException("Unable to create Directory
for"
+ + "storing the FST files at location '"+fstDir+"'.");
+ }
+ }
+
+ return fstDir;
+ }
/**
* This method combines the {@link #fstConfig} with the data present in the
@@ -1012,4 +1058,40 @@ public class FstLinkingEngineComponent {
bundleContext = null;
}
+ /**
+ * {@link StrSubstitutor} {@link StrLookup} implementation used for
+ * determining the directory for storing FST files based on the configured
+ * {@link FstLinkingEngineComponent#FST_FOLDER} configuration.
+ * @author Rupert Westenthaler
+ *
+ */
+ private static class SolrCoreStrLookup extends StrLookup {
+
+ private final BundleContext bc;
+ private final SolrCore core;
+ private final IndexReference indexRef;
+
+ public SolrCoreStrLookup(IndexReference indexRef, SolrCore core,
BundleContext bc) {
+ this.indexRef = indexRef;
+ this.core = core;
+ this.bc = bc;
+ }
+
+ @Override
+ public String lookup(String key) {
+ if("solr-data-dir".equals(key)){
+ return core.getDataDir();
+ } else if("solr-index-dir".equals(key)){
+ return core.getIndexDir();
+ } else if("solr-server-name".equals(key)){
+ return indexRef.getServer();
+ } else if("solr-core-name".equals(key)){
+ return core.getName();
+ } else {
+ return bc.getProperty(key);
+ }
+ }
+
+ }
+
}
Modified:
stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/resources/OSGI-INF/metatype/metatype.properties
URL:
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/resources/OSGI-INF/metatype/metatype.properties?rev=1524317&r1=1524316&r2=1524317&view=diff
==============================================================================
---
stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/resources/OSGI-INF/metatype/metatype.properties
(original)
+++
stanbol/trunk/enhancement-engines/lucenefstlinking/src/main/resources/OSGI-INF/metatype/metatype.properties
Wed Sep 18 07:38:08 2013
@@ -61,7 +61,15 @@ none alpha-numeric chars replaced by '_'
Files are located in the 'fst' folder relative to the instance directory of
the \
configured SolrCore. \
'generate' ... Boolean switch that allows to enable runtime generation of FST \
-corpora (default: false) \
+corpora (default: false)
+enhancer.engines.linking.solrfst.fstfolder.name=FST Folder
+enhancer.engines.linking.solrfst.fstfolder.description=The Folder used to
store \
+FST files. This supports property substitution (${property-name}) with all \
+OSGI and System properties. In addition the following properties are
supported: \
+${solr-data-dir} ... the data directory of the configured SolrCore; \
+${solr-index-dir} ... the index directory of the configured SolrCore; \
+${solr-server-name} ... the name of the Referenced/Managed SolrServer of the
SolrCore \
+${solr-core-name} ... the name of the SolrCore
enhancer.engines.linking.solrfst.typeField.name=Entity Type Field
enhancer.engines.linking.solrfst.typeField.description=The Solr Field holding
the \