Author: ogrisel
Date: Thu Jan  5 14:41:50 2012
New Revision: 1227614

URL: http://svn.apache.org/viewvc?rev=1227614&view=rev
Log:
STANBOL-197: working on the impl of SolrTrainingSet: example registration and 
fetching

Modified:
    
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java
    
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TrainingSetException.java
    
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java

Modified: 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java?rev=1227614&r1=1227613&r2=1227614&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java
 Thu Jan  5 14:41:50 2012
@@ -18,11 +18,15 @@ package org.apache.stanbol.enhancer.topi
 
 import java.util.ArrayList;
 import java.util.Calendar;
+import java.util.Collection;
 import java.util.Collections;
+import java.util.Date;
 import java.util.Dictionary;
 import java.util.List;
 import java.util.Set;
+import java.util.UUID;
 
+import org.apache.commons.lang.StringUtils;
 import org.apache.felix.scr.annotations.Activate;
 import org.apache.felix.scr.annotations.Component;
 import org.apache.felix.scr.annotations.ConfigurationPolicy;
@@ -30,6 +34,11 @@ import org.apache.felix.scr.annotations.
 import org.apache.felix.scr.annotations.Properties;
 import org.apache.felix.scr.annotations.Property;
 import org.apache.felix.scr.annotations.Service;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.SolrServer;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrInputDocument;
 import org.osgi.framework.InvalidSyntaxException;
 import org.osgi.service.cm.ConfigurationException;
 import org.osgi.service.component.ComponentContext;
@@ -67,7 +76,11 @@ public class SolrTrainingSet extends Con
 
     protected String trainingSetId;
 
-    protected String topicUriField;
+    protected String exampleIdField;
+
+    protected String exampleTextField;
+
+    protected String topicUrisField;
 
     protected String modificationDateField;
 
@@ -92,7 +105,9 @@ public class SolrTrainingSet extends Con
     @Override
     public void configure(Dictionary<String,Object> config) throws 
ConfigurationException {
         trainingSetId = getRequiredStringParam(config, TRAINING_SET_ID);
-        topicUriField = getRequiredStringParam(config, TOPICS_URI_FIELD);
+        exampleIdField = getRequiredStringParam(config, EXAMPLE_ID_FIELD);
+        exampleTextField = getRequiredStringParam(config, EXAMPLE_TEXT_FIELD);
+        topicUrisField = getRequiredStringParam(config, TOPICS_URI_FIELD);
         modificationDateField = getRequiredStringParam(config, 
MODIFICATION_DATE_FIELD);
         configureSolrCore(config, SOLR_CORE);
     }
@@ -110,7 +125,25 @@ public class SolrTrainingSet extends Con
 
     @Override
     public String registerExample(String exampleId, String text, List<String> 
topics) throws TrainingSetException {
-        // TODO
+        if (exampleId == null || exampleId.isEmpty()) {
+            exampleId = UUID.randomUUID().toString();
+        }
+        SolrInputDocument doc = new SolrInputDocument();
+        doc.addField(exampleIdField, exampleId);
+        doc.addField(exampleTextField, text);
+        if (topics != null) {
+            doc.addField(topicUrisField, topics);
+        }
+        doc.addField(modificationDateField, new Date());
+        SolrServer server = getActiveSolrServer();
+        try {
+            server.add(doc);
+            server.commit();
+        } catch (Exception e) {
+            String msg = String.format("Could not register example '%s' with 
topics: ['%s']", exampleId,
+                StringUtils.join(topics, "', '"));
+            throw new TrainingSetException(msg, e);
+        }
         return exampleId;
     }
 
@@ -122,14 +155,51 @@ public class SolrTrainingSet extends Con
 
     @Override
     public Batch<String> getPositiveExamples(List<String> topics, Object 
offset) throws TrainingSetException {
-        // TODO
-        return new Batch<String>(new ArrayList<String>(), false, null);
+        return getExamples(topics, offset, true);
     }
 
     @Override
     public Batch<String> getNegativeExamples(List<String> topics, Object 
offset) throws TrainingSetException {
-        // TODO
-        return new Batch<String>(new ArrayList<String>(), false, null);
+        return getExamples(topics, offset, false);
+    }
+
+    protected Batch<String> getExamples(List<String> topics, Object offset, 
boolean positive) throws TrainingSetException {
+        List<String> items = new ArrayList<String>();
+        SolrServer solrServer = getActiveSolrServer();
+        SolrQuery query = new SolrQuery();
+        List<String> parts = new ArrayList<String>();
+        if (topics.isEmpty()) {
+            query.setQuery("*:*");
+        } else if (positive) {
+            for (String topic : topics) {
+                // use a nested query to avoid string escaping issues with 
special solr chars
+                parts.add("_query_:\"{!field f=" + topicUrisField + "}" + 
topic + "\"");
+            }
+            query.setQuery(StringUtils.join(parts, " OR "));
+        } else {
+            for (String topic : topics) {
+                // use a nested query to avoid string escaping issues with 
special solr chars
+                parts.add("-_query_:\"{!field f=" + topicUrisField + "}" + 
topic + "\"");
+            }
+            query.setQuery(StringUtils.join(parts, " AND "));
+        }
+        try {
+            for (SolrDocument result : solrServer.query(query).getResults()) {
+                Collection<Object> textValues = 
result.getFieldValues(exampleTextField);
+                if (textValues == null) {
+                    continue;
+                }
+                for (Object value : textValues) {
+                    items.add(value.toString());
+                }
+            }
+        } catch (SolrServerException e) {
+            String msg = String.format(
+                "Error while fetching positive examples for topics ['%s'] on 
Solr Core '%s'.",
+                StringUtils.join(topics, "', '"), solrCoreId);
+            throw new TrainingSetException(msg, e);
+        }
+        return new Batch<String>(items, false, null);
     }
 
     @Override

Modified: 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TrainingSetException.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TrainingSetException.java?rev=1227614&r1=1227613&r2=1227614&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TrainingSetException.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TrainingSetException.java
 Thu Jan  5 14:41:50 2012
@@ -29,4 +29,8 @@ public class TrainingSetException extend
         super(message);
     }
 
+    public TrainingSetException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
 }

Modified: 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java?rev=1227614&r1=1227613&r2=1227614&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java
 Thu Jan  5 14:41:50 2012
@@ -76,6 +76,9 @@ public class TrainingSetTest extends Bas
         Batch<String> examples = trainingSet.getPositiveExamples(new 
ArrayList<String>(), null);
         assertEquals(examples.items.size(), 0);
         assertFalse(examples.hasMore);
+        examples = trainingSet.getNegativeExamples(new ArrayList<String>(), 
null);
+        assertEquals(examples.items.size(), 0);
+        assertFalse(examples.hasMore);
         examples = trainingSet.getPositiveExamples(Arrays.asList(TOPIC_1), 
null);
         assertEquals(examples.items.size(), 0);
         assertFalse(examples.hasMore);
@@ -87,29 +90,29 @@ public class TrainingSetTest extends Bas
         assertFalse(examples.hasMore);
     }
 
-    //@Test
+    @Test
     public void testStoringExamples() throws ConfigurationException, 
TrainingSetException {
         trainingSet.registerExample("example1", "Text of example1.", 
Arrays.asList(TOPIC_1));
         trainingSet.registerExample("example2", "Text of example2.", 
Arrays.asList(TOPIC_1, TOPIC_2));
         trainingSet.registerExample("example3", "Text of example3.", new 
ArrayList<String>());
 
-        Batch<String> examples = 
trainingSet.getPositiveExamples(Arrays.asList(TOPIC_1, TOPIC_3), null);
-        assertEquals(2, examples.items.size());
-        assertEquals(examples.items, Arrays.asList("Text of example1.", "Text 
of example2."));
-        assertFalse(examples.hasMore);
-
-        examples = trainingSet.getPositiveExamples(Arrays.asList(TOPIC_2), 
null);
+        Batch<String> examples = 
trainingSet.getPositiveExamples(Arrays.asList(TOPIC_2), null);
         assertEquals(1, examples.items.size());
         assertEquals(examples.items, Arrays.asList("Text of example2."));
         assertFalse(examples.hasMore);
 
+        examples = trainingSet.getPositiveExamples(Arrays.asList(TOPIC_1, 
TOPIC_3), null);
+        assertEquals(2, examples.items.size());
+        assertEquals(examples.items, Arrays.asList("Text of example1.", "Text 
of example2."));
+        assertFalse(examples.hasMore);
+
         examples = trainingSet.getNegativeExamples(Arrays.asList(TOPIC_1), 
null);
         assertEquals(1, examples.items.size());
         assertEquals(examples.items, Arrays.asList("Text of example3."));
         assertFalse(examples.hasMore);
     }
 
-    //@Test
+    // @Test
     public void testBatchingExamples() throws ConfigurationException, 
TrainingSetException {
         for (int i = 0; i < 28; i++) {
             trainingSet.registerExample("example" + i, "Text of example" + i + 
".", Arrays.asList(TOPIC_1));


Reply via email to