Author: ogrisel
Date: Tue Jan  3 18:57:51 2012
New Revision: 1226915

URL: http://svn.apache.org/viewvc?rev=1226915&view=rev
Log:
STANBOL-197: factorize common code for Solr core tracking and configuration and 
new Solr back training set stub implementation

Added:
    
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/ConfiguredSolrCoreTracker.java
    
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java
Modified:
    
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
    
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TrainingSet.java
    
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java

Modified: 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1226915&r1=1226914&r2=1226915&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
 Tue Jan  3 18:57:51 2012
@@ -20,7 +20,6 @@ import static org.apache.stanbol.enhance
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Dictionary;
@@ -51,8 +50,6 @@ import org.apache.solr.common.SolrDocume
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.MoreLikeThisParams;
-import org.apache.stanbol.commons.solr.IndexReference;
-import org.apache.stanbol.commons.solr.RegisteredSolrServerTracker;
 import org.apache.stanbol.commons.solr.utils.StreamQueryRequest;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
@@ -62,6 +59,7 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
 import org.apache.stanbol.enhancer.topic.ClassifierException;
+import org.apache.stanbol.enhancer.topic.ConfiguredSolrCoreTracker;
 import org.apache.stanbol.enhancer.topic.TopicClassifier;
 import org.apache.stanbol.enhancer.topic.TopicSuggestion;
 import org.apache.stanbol.enhancer.topic.TrainingSet;
@@ -91,7 +89,7 @@ import org.slf4j.LoggerFactory;
                      @Property(name = TopicClassificationEngine.BROADER_FIELD),
                      @Property(name = 
TopicClassificationEngine.MATERIALIZED_PATH_FIELD),
                      @Property(name = 
TopicClassificationEngine.MODEL_UPDATE_DATE_FIELD)})
-public class TopicClassificationEngine implements EnhancementEngine, 
ServiceProperties, TopicClassifier {
+public class TopicClassificationEngine extends ConfiguredSolrCoreTracker 
implements EnhancementEngine, ServiceProperties, TopicClassifier {
 
     public static final String ENGINE_ID = 
"org.apache.stanbol.enhancer.engine.id";
 
@@ -115,17 +113,10 @@ public class TopicClassificationEngine i
 
     protected String engineId;
 
-    protected String solrCoreId;
-
     protected List<String> acceptedLanguages;
 
     protected Integer order = ORDERING_EXTRACTION_ENHANCEMENT;
 
-    protected RegisteredSolrServerTracker indexTracker;
-
-    // instance of solrServer to use if not using the OSGi service tracker 
(e.g. for tests)
-    protected SolrServer solrServer;
-
     protected String similarityField;
 
     protected String topicUriField;
@@ -136,8 +127,6 @@ public class TopicClassificationEngine i
 
     protected String materializedPathField;
 
-    protected ComponentContext context;
-
     protected int numTopics = 10;
 
     protected TrainingSet trainingSet;
@@ -162,28 +151,8 @@ public class TopicClassificationEngine i
         similarityField = getRequiredStringParam(config, SIMILARTITY_FIELD);
         topicUriField = getRequiredStringParam(config, TOPIC_URI_FIELD);
         acceptedLanguages = getStringListParan(config, LANGUAGES);
-        if (config.get(SOLR_CORE) instanceof SolrServer) {
-            // Bind a fixed Solr server client instead of doing dynamic OSGi 
lookup using the service tracker.
-            // This can be useful both for unit-testing .
-            // The Solr server is expected to be configured with the 
MoreLikeThisQueryHandler and the matching
-            // fields from the configuration.
-            solrServer = (SolrServer) config.get(SOLR_CORE);
-        } else {
-            String solrCoreId = getRequiredStringParam(config, SOLR_CORE);
-            if (context == null) {
-                throw new ConfigurationException(SOLR_CORE, SOLR_CORE
-                                                            + " should be a 
SolrServer instance for using"
-                                                            + " the engine 
without any OSGi context. Got: "
-                                                            + solrCoreId);
-            }
-            try {
-                indexTracker = new 
RegisteredSolrServerTracker(context.getBundleContext(),
-                        IndexReference.parse(solrCoreId));
-                indexTracker.open();
-            } catch (InvalidSyntaxException e) {
-                throw new ConfigurationException(SOLR_CORE, e.getMessage(), e);
-            }
-        }
+        configureSolrCore(config, SOLR_CORE);
+
         // optional fields, can be null
         broaderField = (String) config.get(BROADER_FIELD);
         materializedPathField = (String) config.get(TOPIC_URI_FIELD);
@@ -194,41 +163,6 @@ public class TopicClassificationEngine i
         }
     }
 
-    protected String getRequiredStringParam(Dictionary<String,Object> 
parameters, String paramName) throws ConfigurationException {
-        return getRequiredStringParam(parameters, paramName, null);
-    }
-
-    protected String getRequiredStringParam(Dictionary<String,Object> config,
-                                            String paramName,
-                                            String defaultValue) throws 
ConfigurationException {
-        Object paramValue = config.get(paramName);
-        if (paramValue == null) {
-            if (defaultValue == null) {
-                throw new ConfigurationException(paramName, paramName + " is a 
required parameter.");
-            } else {
-                return defaultValue;
-            }
-        }
-        return paramValue.toString();
-    }
-
-    @SuppressWarnings("unchecked")
-    protected List<String> getStringListParan(Dictionary<String,Object> 
config, String paramName) throws ConfigurationException {
-        Object paramValue = config.get(paramName);
-        if (paramValue == null) {
-            return new ArrayList<String>();
-        } else if (paramValue instanceof String) {
-            return Arrays.asList(paramValue.toString().split(",\\s*"));
-        } else if (paramValue instanceof String[]) {
-            return Arrays.asList((String[]) paramValue);
-        } else if (paramValue instanceof List) {
-            return (List<String>) paramValue;
-        } else {
-            throw new ConfigurationException(paramName, String.format(
-                "Unexpected parameter type for '%s': %s", paramName, 
paramValue));
-        }
-    }
-
     @Override
     public int canEnhance(ContentItem ci) throws EngineException {
         String text = getTextFromContentItem(ci);
@@ -267,13 +201,6 @@ public class TopicClassificationEngine i
         }
     }
 
-    /**
-     * @return the manually bound solrServer instance or the one tracked by 
the OSGi service tracker.
-     */
-    protected SolrServer getActiveSolrServer() {
-        return solrServer != null ? solrServer : indexTracker.getService();
-    }
-
     @Override
     public Map<String,Object> getServiceProperties() {
         return 
Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING,

Added: 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/ConfiguredSolrCoreTracker.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/ConfiguredSolrCoreTracker.java?rev=1226915&view=auto
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/ConfiguredSolrCoreTracker.java
 (added)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/ConfiguredSolrCoreTracker.java
 Tue Jan  3 18:57:51 2012
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.topic;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Dictionary;
+import java.util.List;
+
+import org.apache.solr.client.solrj.SolrServer;
+import org.apache.stanbol.commons.solr.IndexReference;
+import org.apache.stanbol.commons.solr.RegisteredSolrServerTracker;
+import org.osgi.framework.InvalidSyntaxException;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+
+/**
+ * Helper class to factorize some common code for Solr Core tracking OSGi 
component
+ */
+public abstract class ConfiguredSolrCoreTracker {
+
+    protected String solrCoreId;
+
+    protected RegisteredSolrServerTracker indexTracker;
+
+    // instance of solrServer to use if not using the OSGi service tracker 
(e.g. for tests)
+    protected SolrServer solrServer;
+
+    protected ComponentContext context;
+
+    abstract public void configure(Dictionary<String,Object> config) throws 
ConfigurationException;
+
+    protected String getRequiredStringParam(Dictionary<String,Object> 
parameters, String paramName) throws ConfigurationException {
+        return getRequiredStringParam(parameters, paramName, null);
+    }
+
+    protected String getRequiredStringParam(Dictionary<String,Object> config,
+                                            String paramName,
+                                            String defaultValue) throws 
ConfigurationException {
+        Object paramValue = config.get(paramName);
+        if (paramValue == null) {
+            if (defaultValue == null) {
+                throw new ConfigurationException(paramName, paramName + " is a 
required parameter.");
+            } else {
+                return defaultValue;
+            }
+        }
+        return paramValue.toString();
+    }
+
+    @SuppressWarnings("unchecked")
+    protected List<String> getStringListParan(Dictionary<String,Object> 
config, String paramName) throws ConfigurationException {
+        Object paramValue = config.get(paramName);
+        if (paramValue == null) {
+            return new ArrayList<String>();
+        } else if (paramValue instanceof String) {
+            return Arrays.asList(paramValue.toString().split(",\\s*"));
+        } else if (paramValue instanceof String[]) {
+            return Arrays.asList((String[]) paramValue);
+        } else if (paramValue instanceof List) {
+            return (List<String>) paramValue;
+        } else {
+            throw new ConfigurationException(paramName, String.format(
+                "Unexpected parameter type for '%s': %s", paramName, 
paramValue));
+        }
+    }
+
+    /**
+     * @return the manually bound solrServer instance or the one tracked by 
the OSGi service tracker.
+     */
+    public SolrServer getActiveSolrServer() {
+        return solrServer != null ? solrServer : indexTracker.getService();
+    }
+
+    protected void configureSolrCore(Dictionary<String,Object> config, String 
solrCoreProperty) throws ConfigurationException {
+        if (config.get(solrCoreProperty) instanceof SolrServer) {
+            // Bind a fixed Solr server client instead of doing dynamic OSGi 
lookup using the service tracker.
+            // This can be useful both for unit-testing .
+            solrServer = (SolrServer) config.get(solrCoreProperty);
+        } else {
+            String solrCoreId = getRequiredStringParam(config, 
solrCoreProperty);
+            if (context == null) {
+                throw new ConfigurationException(solrCoreProperty,
+                        solrCoreProperty + " should be a SolrServer instance 
for using"
+                                + " the engine without any OSGi context. Got: 
" + solrCoreId);
+            }
+            try {
+                indexTracker = new 
RegisteredSolrServerTracker(context.getBundleContext(),
+                        IndexReference.parse(solrCoreId));
+                indexTracker.open();
+            } catch (InvalidSyntaxException e) {
+                throw new ConfigurationException(solrCoreProperty, 
e.getMessage(), e);
+            }
+        }
+    }
+
+}

Added: 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java?rev=1226915&view=auto
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java
 (added)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java
 Tue Jan  3 18:57:51 2012
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.topic;
+
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Collections;
+import java.util.Dictionary;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.felix.scr.annotations.Activate;
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.ConfigurationPolicy;
+import org.apache.felix.scr.annotations.Deactivate;
+import org.apache.felix.scr.annotations.Properties;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Service;
+import org.osgi.framework.InvalidSyntaxException;
+import org.osgi.service.cm.ConfigurationException;
+import org.osgi.service.component.ComponentContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Implementation of the {@code TrainingSet} interface that uses a Solr Core 
as backend to store and retrieve
+ * the text examples used to train a classifier.
+ */
+@Component(metatype = true, immediate = true, configurationFactory = true, 
policy = ConfigurationPolicy.REQUIRE)
+@Service
+@Properties(value = {@Property(name = SolrTrainingSet.TRAINING_SET_ID),
+                     @Property(name = SolrTrainingSet.SOLR_CORE),
+                     @Property(name = SolrTrainingSet.EXAMPLE_ID_FIELD),
+                     @Property(name = SolrTrainingSet.EXAMPLE_TEXT_FIELD),
+                     @Property(name = SolrTrainingSet.TOPICS_URI_FIELD),
+                     @Property(name = 
SolrTrainingSet.MODIFICATION_DATE_FIELD)})
+public class SolrTrainingSet extends ConfiguredSolrCoreTracker implements 
TrainingSet {
+
+    public static final String TRAINING_SET_ID = 
"org.apache.stanbol.enhancer.topic.trainingset.id";
+
+    public static final String SOLR_CORE = 
"org.apache.stanbol.enhancer.engine.topic.solrCore";
+
+    public static final String TOPICS_URI_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.topicUriField";
+
+    public static final String EXAMPLE_ID_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.exampleIdField";
+
+    public static final String EXAMPLE_TEXT_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.exampleTextField";
+
+    public static final String MODIFICATION_DATE_FIELD = 
"org.apache.stanbol.enhancer.engine.topic.modificiationDateField";
+
+    @SuppressWarnings("unused")
+    private static final Logger log = 
LoggerFactory.getLogger(SolrTrainingSet.class);
+
+    protected String trainingSetId;
+
+    protected String topicUriField;
+
+    protected String modificationDateField;
+
+    // TODO: make me configurable using an OSGi property
+    protected int batchSize = 100;
+
+    @Activate
+    protected void activate(ComponentContext context) throws 
ConfigurationException, InvalidSyntaxException {
+        @SuppressWarnings("unchecked")
+        Dictionary<String,Object> config = context.getProperties();
+        this.context = context;
+        configure(config);
+    }
+
+    @Deactivate
+    public void deactivate(ComponentContext context) {
+        if (indexTracker != null) {
+            indexTracker.close();
+        }
+    }
+
+    @Override
+    public void configure(Dictionary<String,Object> config) throws 
ConfigurationException {
+        trainingSetId = getRequiredStringParam(config, TRAINING_SET_ID);
+        topicUriField = getRequiredStringParam(config, TOPICS_URI_FIELD);
+        modificationDateField = getRequiredStringParam(config, 
MODIFICATION_DATE_FIELD);
+        configureSolrCore(config, SOLR_CORE);
+    }
+
+    public static ConfiguredSolrCoreTracker 
fromParameters(Dictionary<String,Object> config) throws ConfigurationException {
+        ConfiguredSolrCoreTracker engine = new SolrTrainingSet();
+        engine.configure(config);
+        return engine;
+    }
+
+    @Override
+    public boolean isUpdatable() {
+        return true;
+    }
+
+    @Override
+    public String registerExample(String exampleId, String text, List<String> 
topics) throws TrainingSetException {
+        // TODO
+        return exampleId;
+    }
+
+    @Override
+    public Set<String> getUpdatedTopics(Calendar lastModificationDate) throws 
TrainingSetException {
+        // TODO
+        return Collections.emptySet();
+    }
+
+    @Override
+    public Batch<String> getPositiveExamples(List<String> topics, Object 
offset) throws TrainingSetException {
+        // TODO
+        return new Batch<String>(new ArrayList<String>(), false, null);
+    }
+
+    @Override
+    public Batch<String> getNegativeExamples(List<String> topics, Object 
offset) throws TrainingSetException {
+        // TODO
+        return new Batch<String>(new ArrayList<String>(), false, null);
+    }
+
+    @Override
+    public void setBatchSize(int batchSize) {
+        this.batchSize = batchSize;
+    }
+}

Modified: 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TrainingSet.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TrainingSet.java?rev=1226915&r1=1226914&r2=1226915&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TrainingSet.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TrainingSet.java
 Tue Jan  3 18:57:51 2012
@@ -91,6 +91,6 @@ public interface TrainingSet {
     /**
      * Number of examples to fetch at once.
      */
-    public void setBatchSize();
+    void setBatchSize(int batchSize);
 
 }

Modified: 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java?rev=1226915&r1=1226914&r2=1226915&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
 Tue Jan  3 18:57:51 2012
@@ -139,7 +139,7 @@ public class TopicEngineTest {
         TopicClassificationEngine engine = 
TopicClassificationEngine.fromParameters(config);
         assertNotNull(engine);
         assertEquals(engine.engineId, "test-engine");
-        assertEquals(engine.solrServer, solrServer);
+        assertEquals(engine.getActiveSolrServer(), solrServer);
         assertEquals(engine.topicUriField, "topic");
         assertEquals(engine.similarityField, "text");
         assertEquals(engine.acceptedLanguages, new ArrayList<String>());
@@ -189,7 +189,7 @@ public class TopicEngineTest {
         engine.addTopic("http://example.com/topics/node2";, 
Arrays.asList("http://example.com/topics/root3";));
         engine.addTopic("http://example.com/topics/node3";,
             Arrays.asList("http://example.com/topics/node1";, 
"http://example.com/topics/node2";));
-        
+
         // the root where not impacted
         assertEquals(0, 
engine.getBroaderTopics("http://example.com/topics/root1";).size());
         assertEquals(0, 
engine.getBroaderTopics("http://example.com/topics/root2";).size());


Reply via email to