Author: ogrisel
Date: Thu Jan 12 16:20:19 2012
New Revision: 1230619
URL: http://svn.apache.org/viewvc?rev=1230619&view=rev
Log:
STANBOL-197: invalidate model of parents when incrementally adding new topics
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1230619&r1=1230618&r2=1230619&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
Thu Jan 12 16:20:19 2012
@@ -20,6 +20,7 @@ import static org.apache.stanbol.enhance
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
@@ -36,6 +37,7 @@ import org.apache.clerezza.rdf.core.Trip
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang.StringUtils;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.ConfigurationPolicy;
@@ -44,7 +46,6 @@ import org.apache.felix.scr.annotations.
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Service;
import org.apache.solr.client.solrj.SolrQuery;
-import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.request.UpdateRequest;
@@ -376,9 +377,7 @@ public class TopicClassificationEngine e
return broaderTopics;
}
SolrServer solrServer = getActiveSolrServer();
- SolrQuery query = new SolrQuery();
- // use a filter query to avoid string escaping issues with special
solr chars
- query.setQuery("{!field f=" + topicUriField + "}" + id);
+ SolrQuery query = new SolrQuery(topicUriField + ":" +
ClientUtils.escapeQueryChars(id));
query.addField(broaderField);
try {
for (SolrDocument result : solrServer.query(query).getResults()) {
@@ -451,6 +450,9 @@ public class TopicClassificationEngine e
modelEntry.addField(entryIdField, modelEntryId);
modelEntry.addField(topicUriField, topicId);
modelEntry.addField(entryTypeField, MODEL_ENTRY);
+ if (broaderTopics != null) {
+ invalidateModelFields(broaderTopics, modelUpdateDateField,
modelEvaluationDateField);
+ }
SolrServer solrServer = getActiveSolrServer();
try {
UpdateRequest request = new UpdateRequest();
@@ -463,9 +465,39 @@ public class TopicClassificationEngine e
solrCoreId);
throw new ClassifierException(msg, e);
}
+ }
- // TODO: invalidate the last_model_update_dt field of the metadata of
the broader topics to schedule
- // them for the next coming model updates
+ /*
+ * The commit is the responsibility of the caller.
+ */
+ protected void invalidateModelFields(Collection<String> topicIds,
String... fieldNames) throws ClassifierException {
+ if (topicIds.isEmpty() || fieldNames.length == 0) {
+ return;
+ }
+ SolrServer solrServer = getActiveSolrServer();
+ List<String> invalidatedFields = Arrays.asList(fieldNames);
+ try {
+ UpdateRequest request = new UpdateRequest();
+ for (String topicId : topicIds) {
+ SolrQuery query = new SolrQuery(entryTypeField + ":" +
METADATA_ENTRY + " AND "
+ + topicUriField + ":" +
ClientUtils.escapeQueryChars(topicId));
+ for (SolrDocument result :
solrServer.query(query).getResults()) {
+ // there should be only one (or none: tolerated)
+ SolrInputDocument newEntry = new SolrInputDocument();
+ for (String fieldName : result.getFieldNames()) {
+ if (!invalidatedFields.contains(fieldName)) {
+ newEntry.setField(fieldName,
result.getFieldValues(fieldName));
+ }
+ }
+ request.add(newEntry);
+ }
+ }
+ solrServer.request(request);
+ } catch (Exception e) {
+ String msg = String.format("Error invalidating topics [%s] on Solr
Core '%s'",
+ StringUtils.join(topicIds, ", "), solrCoreId);
+ throw new ClassifierException(msg, e);
+ }
}
@Override
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java?rev=1230619&r1=1230618&r2=1230619&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
Thu Jan 12 16:20:19 2012
@@ -321,6 +321,12 @@ public class TopicEngineTest extends Bas
classifier.addTopic(law, null);
assertEquals(1, classifier.updateModel(true));
assertEquals(0, classifier.updateModel(true));
+
+ // registering new subtopics invalidate the models of the parent as
well
+ classifier.addTopic("urn:topics/sportsmafia", Arrays.asList(football,
business));
+ assertEquals(3, classifier.updateModel(true));
+ assertEquals(0, classifier.updateModel(true));
+
}
protected Hashtable<String,Object> getDefaultClassifierConfigParams() {