Author: ogrisel
Date: Thu Jan 5 14:41:50 2012
New Revision: 1227614
URL: http://svn.apache.org/viewvc?rev=1227614&view=rev
Log:
STANBOL-197: working on the impl of SolrTrainingSet: example registration and
fetching
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TrainingSetException.java
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java?rev=1227614&r1=1227613&r2=1227614&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java
Thu Jan 5 14:41:50 2012
@@ -18,11 +18,15 @@ package org.apache.stanbol.enhancer.topi
import java.util.ArrayList;
import java.util.Calendar;
+import java.util.Collection;
import java.util.Collections;
+import java.util.Date;
import java.util.Dictionary;
import java.util.List;
import java.util.Set;
+import java.util.UUID;
+import org.apache.commons.lang.StringUtils;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.ConfigurationPolicy;
@@ -30,6 +34,11 @@ import org.apache.felix.scr.annotations.
import org.apache.felix.scr.annotations.Properties;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Service;
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.SolrServer;
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrInputDocument;
import org.osgi.framework.InvalidSyntaxException;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
@@ -67,7 +76,11 @@ public class SolrTrainingSet extends Con
protected String trainingSetId;
- protected String topicUriField;
+ protected String exampleIdField;
+
+ protected String exampleTextField;
+
+ protected String topicUrisField;
protected String modificationDateField;
@@ -92,7 +105,9 @@ public class SolrTrainingSet extends Con
@Override
public void configure(Dictionary<String,Object> config) throws
ConfigurationException {
trainingSetId = getRequiredStringParam(config, TRAINING_SET_ID);
- topicUriField = getRequiredStringParam(config, TOPICS_URI_FIELD);
+ exampleIdField = getRequiredStringParam(config, EXAMPLE_ID_FIELD);
+ exampleTextField = getRequiredStringParam(config, EXAMPLE_TEXT_FIELD);
+ topicUrisField = getRequiredStringParam(config, TOPICS_URI_FIELD);
modificationDateField = getRequiredStringParam(config,
MODIFICATION_DATE_FIELD);
configureSolrCore(config, SOLR_CORE);
}
@@ -110,7 +125,25 @@ public class SolrTrainingSet extends Con
@Override
public String registerExample(String exampleId, String text, List<String>
topics) throws TrainingSetException {
- // TODO
+ if (exampleId == null || exampleId.isEmpty()) {
+ exampleId = UUID.randomUUID().toString();
+ }
+ SolrInputDocument doc = new SolrInputDocument();
+ doc.addField(exampleIdField, exampleId);
+ doc.addField(exampleTextField, text);
+ if (topics != null) {
+ doc.addField(topicUrisField, topics);
+ }
+ doc.addField(modificationDateField, new Date());
+ SolrServer server = getActiveSolrServer();
+ try {
+ server.add(doc);
+ server.commit();
+ } catch (Exception e) {
+ String msg = String.format("Could not register example '%s' with
topics: ['%s']", exampleId,
+ StringUtils.join(topics, "', '"));
+ throw new TrainingSetException(msg, e);
+ }
return exampleId;
}
@@ -122,14 +155,51 @@ public class SolrTrainingSet extends Con
@Override
public Batch<String> getPositiveExamples(List<String> topics, Object
offset) throws TrainingSetException {
- // TODO
- return new Batch<String>(new ArrayList<String>(), false, null);
+ return getExamples(topics, offset, true);
}
@Override
public Batch<String> getNegativeExamples(List<String> topics, Object
offset) throws TrainingSetException {
- // TODO
- return new Batch<String>(new ArrayList<String>(), false, null);
+ return getExamples(topics, offset, false);
+ }
+
+ protected Batch<String> getExamples(List<String> topics, Object offset,
boolean positive) throws TrainingSetException {
+ List<String> items = new ArrayList<String>();
+ SolrServer solrServer = getActiveSolrServer();
+ SolrQuery query = new SolrQuery();
+ List<String> parts = new ArrayList<String>();
+ if (topics.isEmpty()) {
+ query.setQuery("*:*");
+ } else if (positive) {
+ for (String topic : topics) {
+ // use a nested query to avoid string escaping issues with
special solr chars
+ parts.add("_query_:\"{!field f=" + topicUrisField + "}" +
topic + "\"");
+ }
+ query.setQuery(StringUtils.join(parts, " OR "));
+ } else {
+ for (String topic : topics) {
+ // use a nested query to avoid string escaping issues with
special solr chars
+ parts.add("-_query_:\"{!field f=" + topicUrisField + "}" +
topic + "\"");
+ }
+ query.setQuery(StringUtils.join(parts, " AND "));
+ }
+ try {
+ for (SolrDocument result : solrServer.query(query).getResults()) {
+ Collection<Object> textValues =
result.getFieldValues(exampleTextField);
+ if (textValues == null) {
+ continue;
+ }
+ for (Object value : textValues) {
+ items.add(value.toString());
+ }
+ }
+ } catch (SolrServerException e) {
+ String msg = String.format(
+ "Error while fetching positive examples for topics ['%s'] on
Solr Core '%s'.",
+ StringUtils.join(topics, "', '"), solrCoreId);
+ throw new TrainingSetException(msg, e);
+ }
+ return new Batch<String>(items, false, null);
}
@Override
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TrainingSetException.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TrainingSetException.java?rev=1227614&r1=1227613&r2=1227614&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TrainingSetException.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TrainingSetException.java
Thu Jan 5 14:41:50 2012
@@ -29,4 +29,8 @@ public class TrainingSetException extend
super(message);
}
+ public TrainingSetException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
}
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java?rev=1227614&r1=1227613&r2=1227614&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java
Thu Jan 5 14:41:50 2012
@@ -76,6 +76,9 @@ public class TrainingSetTest extends Bas
Batch<String> examples = trainingSet.getPositiveExamples(new
ArrayList<String>(), null);
assertEquals(examples.items.size(), 0);
assertFalse(examples.hasMore);
+ examples = trainingSet.getNegativeExamples(new ArrayList<String>(),
null);
+ assertEquals(examples.items.size(), 0);
+ assertFalse(examples.hasMore);
examples = trainingSet.getPositiveExamples(Arrays.asList(TOPIC_1),
null);
assertEquals(examples.items.size(), 0);
assertFalse(examples.hasMore);
@@ -87,29 +90,29 @@ public class TrainingSetTest extends Bas
assertFalse(examples.hasMore);
}
- //@Test
+ @Test
public void testStoringExamples() throws ConfigurationException,
TrainingSetException {
trainingSet.registerExample("example1", "Text of example1.",
Arrays.asList(TOPIC_1));
trainingSet.registerExample("example2", "Text of example2.",
Arrays.asList(TOPIC_1, TOPIC_2));
trainingSet.registerExample("example3", "Text of example3.", new
ArrayList<String>());
- Batch<String> examples =
trainingSet.getPositiveExamples(Arrays.asList(TOPIC_1, TOPIC_3), null);
- assertEquals(2, examples.items.size());
- assertEquals(examples.items, Arrays.asList("Text of example1.", "Text
of example2."));
- assertFalse(examples.hasMore);
-
- examples = trainingSet.getPositiveExamples(Arrays.asList(TOPIC_2),
null);
+ Batch<String> examples =
trainingSet.getPositiveExamples(Arrays.asList(TOPIC_2), null);
assertEquals(1, examples.items.size());
assertEquals(examples.items, Arrays.asList("Text of example2."));
assertFalse(examples.hasMore);
+ examples = trainingSet.getPositiveExamples(Arrays.asList(TOPIC_1,
TOPIC_3), null);
+ assertEquals(2, examples.items.size());
+ assertEquals(examples.items, Arrays.asList("Text of example1.", "Text
of example2."));
+ assertFalse(examples.hasMore);
+
examples = trainingSet.getNegativeExamples(Arrays.asList(TOPIC_1),
null);
assertEquals(1, examples.items.size());
assertEquals(examples.items, Arrays.asList("Text of example3."));
assertFalse(examples.hasMore);
}
- //@Test
+ // @Test
public void testBatchingExamples() throws ConfigurationException,
TrainingSetException {
for (int i = 0; i < 28; i++) {
trainingSet.registerExample("example" + i, "Text of example" + i +
".", Arrays.asList(TOPIC_1));