Author: ogrisel
Date: Thu Jan 12 16:20:19 2012
New Revision: 1230619

URL: http://svn.apache.org/viewvc?rev=1230619&view=rev
Log:
STANBOL-197: invalidate model of parents when incrementally adding new topics

Modified:
    
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
    
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java

Modified: 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1230619&r1=1230618&r2=1230619&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
 Thu Jan 12 16:20:19 2012
@@ -20,6 +20,7 @@ import static org.apache.stanbol.enhance
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Date;
@@ -36,6 +37,7 @@ import org.apache.clerezza.rdf.core.Trip
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
 import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang.StringUtils;
 import org.apache.felix.scr.annotations.Activate;
 import org.apache.felix.scr.annotations.Component;
 import org.apache.felix.scr.annotations.ConfigurationPolicy;
@@ -44,7 +46,6 @@ import org.apache.felix.scr.annotations.
 import org.apache.felix.scr.annotations.Property;
 import org.apache.felix.scr.annotations.Service;
 import org.apache.solr.client.solrj.SolrQuery;
-import org.apache.solr.client.solrj.SolrRequest;
 import org.apache.solr.client.solrj.SolrServer;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.request.UpdateRequest;
@@ -376,9 +377,7 @@ public class TopicClassificationEngine e
             return broaderTopics;
         }
         SolrServer solrServer = getActiveSolrServer();
-        SolrQuery query = new SolrQuery();
-        // use a filter query to avoid string escaping issues with special 
solr chars
-        query.setQuery("{!field f=" + topicUriField + "}" + id);
+        SolrQuery query = new SolrQuery(topicUriField + ":" + 
ClientUtils.escapeQueryChars(id));
         query.addField(broaderField);
         try {
             for (SolrDocument result : solrServer.query(query).getResults()) {
@@ -451,6 +450,9 @@ public class TopicClassificationEngine e
         modelEntry.addField(entryIdField, modelEntryId);
         modelEntry.addField(topicUriField, topicId);
         modelEntry.addField(entryTypeField, MODEL_ENTRY);
+        if (broaderTopics != null) {
+            invalidateModelFields(broaderTopics, modelUpdateDateField, 
modelEvaluationDateField);
+        }
         SolrServer solrServer = getActiveSolrServer();
         try {
             UpdateRequest request = new UpdateRequest();
@@ -463,9 +465,39 @@ public class TopicClassificationEngine e
                 solrCoreId);
             throw new ClassifierException(msg, e);
         }
+    }
 
-        // TODO: invalidate the last_model_update_dt field of the metadata of 
the broader topics to schedule
-        // them for the next coming model updates
+    /*
+     * The commit is the responsibility of the caller.
+     */
+    protected void invalidateModelFields(Collection<String> topicIds, 
String... fieldNames) throws ClassifierException {
+        if (topicIds.isEmpty() || fieldNames.length == 0) {
+            return;
+        }
+        SolrServer solrServer = getActiveSolrServer();
+        List<String> invalidatedFields = Arrays.asList(fieldNames);
+        try {
+            UpdateRequest request = new UpdateRequest();
+            for (String topicId : topicIds) {
+                SolrQuery query = new SolrQuery(entryTypeField + ":" + 
METADATA_ENTRY + " AND "
+                                                + topicUriField + ":" + 
ClientUtils.escapeQueryChars(topicId));
+                for (SolrDocument result : 
solrServer.query(query).getResults()) {
+                    // there should be only one (or none: tolerated)
+                    SolrInputDocument newEntry = new SolrInputDocument();
+                    for (String fieldName : result.getFieldNames()) {
+                        if (!invalidatedFields.contains(fieldName)) {
+                            newEntry.setField(fieldName, 
result.getFieldValues(fieldName));
+                        }
+                    }
+                    request.add(newEntry);
+                }
+            }
+            solrServer.request(request);
+        } catch (Exception e) {
+            String msg = String.format("Error invalidating topics [%s] on Solr 
Core '%s'",
+                StringUtils.join(topicIds, ", "), solrCoreId);
+            throw new ClassifierException(msg, e);
+        }
     }
 
     @Override

Modified: 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java?rev=1230619&r1=1230618&r2=1230619&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
 Thu Jan 12 16:20:19 2012
@@ -321,6 +321,12 @@ public class TopicEngineTest extends Bas
         classifier.addTopic(law, null);
         assertEquals(1, classifier.updateModel(true));
         assertEquals(0, classifier.updateModel(true));
+        
+        // registering new subtopics invalidate the models of the parent as 
well
+        classifier.addTopic("urn:topics/sportsmafia", Arrays.asList(football, 
business));
+        assertEquals(3, classifier.updateModel(true));
+        assertEquals(0, classifier.updateModel(true));
+        
     }
 
     protected Hashtable<String,Object> getDefaultClassifierConfigParams() {


Reply via email to