Author: ogrisel
Date: Tue Jan 3 18:57:51 2012
New Revision: 1226915
URL: http://svn.apache.org/viewvc?rev=1226915&view=rev
Log:
STANBOL-197: factorize common code for Solr core tracking and configuration and
new Solr back training set stub implementation
Added:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/ConfiguredSolrCoreTracker.java
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TrainingSet.java
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1226915&r1=1226914&r2=1226915&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
Tue Jan 3 18:57:51 2012
@@ -20,7 +20,6 @@ import static org.apache.stanbol.enhance
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Dictionary;
@@ -51,8 +50,6 @@ import org.apache.solr.common.SolrDocume
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.MoreLikeThisParams;
-import org.apache.stanbol.commons.solr.IndexReference;
-import org.apache.stanbol.commons.solr.RegisteredSolrServerTracker;
import org.apache.stanbol.commons.solr.utils.StreamQueryRequest;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
@@ -62,6 +59,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
import org.apache.stanbol.enhancer.topic.ClassifierException;
+import org.apache.stanbol.enhancer.topic.ConfiguredSolrCoreTracker;
import org.apache.stanbol.enhancer.topic.TopicClassifier;
import org.apache.stanbol.enhancer.topic.TopicSuggestion;
import org.apache.stanbol.enhancer.topic.TrainingSet;
@@ -91,7 +89,7 @@ import org.slf4j.LoggerFactory;
@Property(name = TopicClassificationEngine.BROADER_FIELD),
@Property(name =
TopicClassificationEngine.MATERIALIZED_PATH_FIELD),
@Property(name =
TopicClassificationEngine.MODEL_UPDATE_DATE_FIELD)})
-public class TopicClassificationEngine implements EnhancementEngine,
ServiceProperties, TopicClassifier {
+public class TopicClassificationEngine extends ConfiguredSolrCoreTracker
implements EnhancementEngine, ServiceProperties, TopicClassifier {
public static final String ENGINE_ID =
"org.apache.stanbol.enhancer.engine.id";
@@ -115,17 +113,10 @@ public class TopicClassificationEngine i
protected String engineId;
- protected String solrCoreId;
-
protected List<String> acceptedLanguages;
protected Integer order = ORDERING_EXTRACTION_ENHANCEMENT;
- protected RegisteredSolrServerTracker indexTracker;
-
- // instance of solrServer to use if not using the OSGi service tracker
(e.g. for tests)
- protected SolrServer solrServer;
-
protected String similarityField;
protected String topicUriField;
@@ -136,8 +127,6 @@ public class TopicClassificationEngine i
protected String materializedPathField;
- protected ComponentContext context;
-
protected int numTopics = 10;
protected TrainingSet trainingSet;
@@ -162,28 +151,8 @@ public class TopicClassificationEngine i
similarityField = getRequiredStringParam(config, SIMILARTITY_FIELD);
topicUriField = getRequiredStringParam(config, TOPIC_URI_FIELD);
acceptedLanguages = getStringListParan(config, LANGUAGES);
- if (config.get(SOLR_CORE) instanceof SolrServer) {
- // Bind a fixed Solr server client instead of doing dynamic OSGi
lookup using the service tracker.
- // This can be useful both for unit-testing .
- // The Solr server is expected to be configured with the
MoreLikeThisQueryHandler and the matching
- // fields from the configuration.
- solrServer = (SolrServer) config.get(SOLR_CORE);
- } else {
- String solrCoreId = getRequiredStringParam(config, SOLR_CORE);
- if (context == null) {
- throw new ConfigurationException(SOLR_CORE, SOLR_CORE
- + " should be a
SolrServer instance for using"
- + " the engine
without any OSGi context. Got: "
- + solrCoreId);
- }
- try {
- indexTracker = new
RegisteredSolrServerTracker(context.getBundleContext(),
- IndexReference.parse(solrCoreId));
- indexTracker.open();
- } catch (InvalidSyntaxException e) {
- throw new ConfigurationException(SOLR_CORE, e.getMessage(), e);
- }
- }
+ configureSolrCore(config, SOLR_CORE);
+
// optional fields, can be null
broaderField = (String) config.get(BROADER_FIELD);
materializedPathField = (String) config.get(TOPIC_URI_FIELD);
@@ -194,41 +163,6 @@ public class TopicClassificationEngine i
}
}
- protected String getRequiredStringParam(Dictionary<String,Object>
parameters, String paramName) throws ConfigurationException {
- return getRequiredStringParam(parameters, paramName, null);
- }
-
- protected String getRequiredStringParam(Dictionary<String,Object> config,
- String paramName,
- String defaultValue) throws
ConfigurationException {
- Object paramValue = config.get(paramName);
- if (paramValue == null) {
- if (defaultValue == null) {
- throw new ConfigurationException(paramName, paramName + " is a
required parameter.");
- } else {
- return defaultValue;
- }
- }
- return paramValue.toString();
- }
-
- @SuppressWarnings("unchecked")
- protected List<String> getStringListParan(Dictionary<String,Object>
config, String paramName) throws ConfigurationException {
- Object paramValue = config.get(paramName);
- if (paramValue == null) {
- return new ArrayList<String>();
- } else if (paramValue instanceof String) {
- return Arrays.asList(paramValue.toString().split(",\\s*"));
- } else if (paramValue instanceof String[]) {
- return Arrays.asList((String[]) paramValue);
- } else if (paramValue instanceof List) {
- return (List<String>) paramValue;
- } else {
- throw new ConfigurationException(paramName, String.format(
- "Unexpected parameter type for '%s': %s", paramName,
paramValue));
- }
- }
-
@Override
public int canEnhance(ContentItem ci) throws EngineException {
String text = getTextFromContentItem(ci);
@@ -267,13 +201,6 @@ public class TopicClassificationEngine i
}
}
- /**
- * @return the manually bound solrServer instance or the one tracked by
the OSGi service tracker.
- */
- protected SolrServer getActiveSolrServer() {
- return solrServer != null ? solrServer : indexTracker.getService();
- }
-
@Override
public Map<String,Object> getServiceProperties() {
return
Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING,
Added:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/ConfiguredSolrCoreTracker.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/ConfiguredSolrCoreTracker.java?rev=1226915&view=auto
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/ConfiguredSolrCoreTracker.java
(added)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/ConfiguredSolrCoreTracker.java
Tue Jan 3 18:57:51 2012
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.topic;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Dictionary;
+import java.util.List;
+
+import org.apache.solr.client.solrj.SolrServer;
+import org.apache.stanbol.commons.solr.IndexReference;
+import org.apache.stanbol.commons.solr.RegisteredSolrServerTracker;
+import org.osgi.framework.InvalidSyntaxException;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+
+/**
+ * Helper class to factorize some common code for Solr Core tracking OSGi
component
+ */
+public abstract class ConfiguredSolrCoreTracker {
+
+ protected String solrCoreId;
+
+ protected RegisteredSolrServerTracker indexTracker;
+
+ // instance of solrServer to use if not using the OSGi service tracker
(e.g. for tests)
+ protected SolrServer solrServer;
+
+ protected ComponentContext context;
+
+ abstract public void configure(Dictionary<String,Object> config) throws
ConfigurationException;
+
+ protected String getRequiredStringParam(Dictionary<String,Object>
parameters, String paramName) throws ConfigurationException {
+ return getRequiredStringParam(parameters, paramName, null);
+ }
+
+ protected String getRequiredStringParam(Dictionary<String,Object> config,
+ String paramName,
+ String defaultValue) throws
ConfigurationException {
+ Object paramValue = config.get(paramName);
+ if (paramValue == null) {
+ if (defaultValue == null) {
+ throw new ConfigurationException(paramName, paramName + " is a
required parameter.");
+ } else {
+ return defaultValue;
+ }
+ }
+ return paramValue.toString();
+ }
+
+ @SuppressWarnings("unchecked")
+ protected List<String> getStringListParan(Dictionary<String,Object>
config, String paramName) throws ConfigurationException {
+ Object paramValue = config.get(paramName);
+ if (paramValue == null) {
+ return new ArrayList<String>();
+ } else if (paramValue instanceof String) {
+ return Arrays.asList(paramValue.toString().split(",\\s*"));
+ } else if (paramValue instanceof String[]) {
+ return Arrays.asList((String[]) paramValue);
+ } else if (paramValue instanceof List) {
+ return (List<String>) paramValue;
+ } else {
+ throw new ConfigurationException(paramName, String.format(
+ "Unexpected parameter type for '%s': %s", paramName,
paramValue));
+ }
+ }
+
+ /**
+ * @return the manually bound solrServer instance or the one tracked by
the OSGi service tracker.
+ */
+ public SolrServer getActiveSolrServer() {
+ return solrServer != null ? solrServer : indexTracker.getService();
+ }
+
+ protected void configureSolrCore(Dictionary<String,Object> config, String
solrCoreProperty) throws ConfigurationException {
+ if (config.get(solrCoreProperty) instanceof SolrServer) {
+ // Bind a fixed Solr server client instead of doing dynamic OSGi
lookup using the service tracker.
+ // This can be useful both for unit-testing .
+ solrServer = (SolrServer) config.get(solrCoreProperty);
+ } else {
+ String solrCoreId = getRequiredStringParam(config,
solrCoreProperty);
+ if (context == null) {
+ throw new ConfigurationException(solrCoreProperty,
+ solrCoreProperty + " should be a SolrServer instance
for using"
+ + " the engine without any OSGi context. Got:
" + solrCoreId);
+ }
+ try {
+ indexTracker = new
RegisteredSolrServerTracker(context.getBundleContext(),
+ IndexReference.parse(solrCoreId));
+ indexTracker.open();
+ } catch (InvalidSyntaxException e) {
+ throw new ConfigurationException(solrCoreProperty,
e.getMessage(), e);
+ }
+ }
+ }
+
+}
Added:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java?rev=1226915&view=auto
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java
(added)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java
Tue Jan 3 18:57:51 2012
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.topic;
+
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.ConfigurationPolicy;
+import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Service;
+import org.osgi.framework.InvalidSyntaxException;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Implementation of the {@code TrainingSet} interface that uses a Solr Core
as backend to store and retrieve
+ * the text examples used to train a classifier.
+ */
+@Component(metatype = true, immediate = true, configurationFactory = true,
policy = ConfigurationPolicy.REQUIRE)
+@Service
+@Properties(value = {@Property(name = SolrTrainingSet.TRAINING_SET_ID),
+ @Property(name = SolrTrainingSet.SOLR_CORE),
+ @Property(name = SolrTrainingSet.EXAMPLE_ID_FIELD),
+ @Property(name = SolrTrainingSet.EXAMPLE_TEXT_FIELD),
+ @Property(name = SolrTrainingSet.TOPICS_URI_FIELD),
+ @Property(name =
SolrTrainingSet.MODIFICATION_DATE_FIELD)})
+public class SolrTrainingSet extends ConfiguredSolrCoreTracker implements
TrainingSet {
+
+ public static final String TRAINING_SET_ID =
"org.apache.stanbol.enhancer.topic.trainingset.id";
+
+ public static final String SOLR_CORE =
"org.apache.stanbol.enhancer.engine.topic.solrCore";
+
+ public static final String TOPICS_URI_FIELD =
"org.apache.stanbol.enhancer.engine.topic.topicUriField";
+
+ public static final String EXAMPLE_ID_FIELD =
"org.apache.stanbol.enhancer.engine.topic.exampleIdField";
+
+ public static final String EXAMPLE_TEXT_FIELD =
"org.apache.stanbol.enhancer.engine.topic.exampleTextField";
+
+ public static final String MODIFICATION_DATE_FIELD =
"org.apache.stanbol.enhancer.engine.topic.modificiationDateField";
+
+ @SuppressWarnings("unused")
+ private static final Logger log =
LoggerFactory.getLogger(SolrTrainingSet.class);
+
+ protected String trainingSetId;
+
+ protected String topicUriField;
+
+ protected String modificationDateField;
+
+ // TODO: make me configurable using an OSGi property
+ protected int batchSize = 100;
+
+ @Activate
+ protected void activate(ComponentContext context) throws
ConfigurationException, InvalidSyntaxException {
+ @SuppressWarnings("unchecked")
+ Dictionary<String,Object> config = context.getProperties();
+ this.context = context;
+ configure(config);
+ }
+
+ @Deactivate
+ public void deactivate(ComponentContext context) {
+ if (indexTracker != null) {
+ indexTracker.close();
+ }
+ }
+
+ @Override
+ public void configure(Dictionary<String,Object> config) throws
ConfigurationException {
+ trainingSetId = getRequiredStringParam(config, TRAINING_SET_ID);
+ topicUriField = getRequiredStringParam(config, TOPICS_URI_FIELD);
+ modificationDateField = getRequiredStringParam(config,
MODIFICATION_DATE_FIELD);
+ configureSolrCore(config, SOLR_CORE);
+ }
+
+ public static ConfiguredSolrCoreTracker
fromParameters(Dictionary<String,Object> config) throws ConfigurationException {
+ ConfiguredSolrCoreTracker engine = new SolrTrainingSet();
+ engine.configure(config);
+ return engine;
+ }
+
+ @Override
+ public boolean isUpdatable() {
+ return true;
+ }
+
+ @Override
+ public String registerExample(String exampleId, String text, List<String>
topics) throws TrainingSetException {
+ // TODO
+ return exampleId;
+ }
+
+ @Override
+ public Set<String> getUpdatedTopics(Calendar lastModificationDate) throws
TrainingSetException {
+ // TODO
+ return Collections.emptySet();
+ }
+
+ @Override
+ public Batch<String> getPositiveExamples(List<String> topics, Object
offset) throws TrainingSetException {
+ // TODO
+ return new Batch<String>(new ArrayList<String>(), false, null);
+ }
+
+ @Override
+ public Batch<String> getNegativeExamples(List<String> topics, Object
offset) throws TrainingSetException {
+ // TODO
+ return new Batch<String>(new ArrayList<String>(), false, null);
+ }
+
+ @Override
+ public void setBatchSize(int batchSize) {
+ this.batchSize = batchSize;
+ }
+}
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TrainingSet.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TrainingSet.java?rev=1226915&r1=1226914&r2=1226915&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TrainingSet.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TrainingSet.java
Tue Jan 3 18:57:51 2012
@@ -91,6 +91,6 @@ public interface TrainingSet {
/**
* Number of examples to fetch at once.
*/
- public void setBatchSize();
+ void setBatchSize(int batchSize);
}
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java?rev=1226915&r1=1226914&r2=1226915&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
Tue Jan 3 18:57:51 2012
@@ -139,7 +139,7 @@ public class TopicEngineTest {
TopicClassificationEngine engine =
TopicClassificationEngine.fromParameters(config);
assertNotNull(engine);
assertEquals(engine.engineId, "test-engine");
- assertEquals(engine.solrServer, solrServer);
+ assertEquals(engine.getActiveSolrServer(), solrServer);
assertEquals(engine.topicUriField, "topic");
assertEquals(engine.similarityField, "text");
assertEquals(engine.acceptedLanguages, new ArrayList<String>());
@@ -189,7 +189,7 @@ public class TopicEngineTest {
engine.addTopic("http://example.com/topics/node2",
Arrays.asList("http://example.com/topics/root3"));
engine.addTopic("http://example.com/topics/node3",
Arrays.asList("http://example.com/topics/node1",
"http://example.com/topics/node2"));
-
+
// the root where not impacted
assertEquals(0,
engine.getBroaderTopics("http://example.com/topics/root1").size());
assertEquals(0,
engine.getBroaderTopics("http://example.com/topics/root2").size());