Author: ogrisel
Date: Thu Jan  5 18:13:12 2012
New Revision: 1227729

URL: http://svn.apache.org/viewvc?rev=1227729&view=rev
Log:
STANBOL-197: implement incremental queries on SolrTrainingSet

Modified:
    
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java
    
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java

Modified: 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java?rev=1227729&r1=1227728&r2=1227729&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/SolrTrainingSet.java
 Thu Jan  5 18:13:12 2012
@@ -16,14 +16,18 @@
  */
 package org.apache.stanbol.enhancer.topic;
 
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Calendar;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.Date;
 import java.util.Dictionary;
+import java.util.GregorianCalendar;
 import java.util.List;
 import java.util.Set;
+import java.util.TimeZone;
+import java.util.TreeSet;
 import java.util.UUID;
 
 import org.apache.commons.lang.StringUtils;
@@ -60,6 +64,8 @@ import org.slf4j.LoggerFactory;
                      @Property(name = 
SolrTrainingSet.MODIFICATION_DATE_FIELD)})
 public class SolrTrainingSet extends ConfiguredSolrCoreTracker implements 
TrainingSet {
 
+    protected static final TimeZone UTC = TimeZone.getTimeZone("UTC");
+
     public static final String TRAINING_SET_ID = 
"org.apache.stanbol.enhancer.topic.trainingset.id";
 
     public static final String SOLR_CORE = 
"org.apache.stanbol.enhancer.engine.topic.solrCore";
@@ -124,6 +130,18 @@ public class SolrTrainingSet extends Con
         return true;
     }
 
+    protected String utcIsoString(Date date) {
+        DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
+        return df.format(date);
+    }
+
+    protected String utcIsoString(Calendar calendar) {
+        if (!calendar.getTimeZone().equals(UTC)) {
+            calendar.setTimeZone(UTC);
+        }
+        return utcIsoString(calendar.getTime());
+    }
+
     @Override
     public String registerExample(String exampleId, String text, List<String> 
topics) throws TrainingSetException {
         if (exampleId == null || exampleId.isEmpty()) {
@@ -135,7 +153,8 @@ public class SolrTrainingSet extends Con
         if (topics != null) {
             doc.addField(topicUrisField, topics);
         }
-        doc.addField(modificationDateField, new Date());
+        String utcIsoDate = utcIsoString(new GregorianCalendar(UTC));
+        doc.addField(modificationDateField, utcIsoDate + "Z");
         SolrServer server = getActiveSolrServer();
         try {
             server.add(doc);
@@ -150,8 +169,48 @@ public class SolrTrainingSet extends Con
 
     @Override
     public Set<String> getUpdatedTopics(Calendar lastModificationDate) throws 
TrainingSetException {
-        // TODO
-        return Collections.emptySet();
+        TreeSet<String> collectedTopics = new TreeSet<String>();
+        SolrQuery query = new SolrQuery();
+        String utcIsoDate = utcIsoString(lastModificationDate);
+        String q = modificationDateField + ":[" + utcIsoDate + "Z TO *]";
+        String offset = null;
+        boolean done = false;
+        query.addSortField(exampleIdField, SolrQuery.ORDER.asc);
+        query.set("rows", batchSize + 1);
+        query.set("fl", exampleIdField + "," + topicUrisField);
+        while (!done) {
+            try {
+                if (offset != null) {
+                    q += " AND " + exampleIdField + ":[" + offset.toString() + 
" TO *]";
+                }
+                query.setQuery(q);
+                QueryResponse response = solrServer.query(query);
+                int count = 0;
+                for (SolrDocument result : response.getResults()) {
+                    if (count == batchSize) {
+                        offset = 
result.getFirstValue(exampleIdField).toString();
+                    } else {
+                        count++;
+                        Collection<Object> values = 
result.getFieldValues(topicUrisField);
+                        if (values == null) {
+                            continue;
+                        }
+                        for (Object value : values) {
+                            collectedTopics.add(value.toString());
+                        }
+                    }
+                }
+                if (count < batchSize) {
+                    done = true;
+                }
+            } catch (SolrServerException e) {
+                String msg = String.format(
+                    "Error while fetching topics for examples modified after 
'%s' on Solr Core '%s'.",
+                    utcIsoDate, solrCoreId);
+                throw new TrainingSetException(msg, e);
+            }
+        }
+        return collectedTopics;
     }
 
     @Override

Modified: 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java?rev=1227729&r1=1227728&r2=1227729&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java
 Thu Jan  5 18:13:12 2012
@@ -22,6 +22,8 @@ import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Calendar;
+import java.util.GregorianCalendar;
 import java.util.HashSet;
 import java.util.Hashtable;
 import java.util.Set;
@@ -173,6 +175,28 @@ public class TrainingSetTest extends Bas
         assertEquals(expectedCollectedText, collectedText);
     }
 
+    @Test
+    public void testIncrementalQueries() throws Exception {
+        Calendar date0 = new GregorianCalendar();
+        Set<String> updatedTopics = trainingSet.getUpdatedTopics(date0);
+        assertEquals(0, updatedTopics.size());
+
+        trainingSet.registerExample("example1", "Text of example1.", 
Arrays.asList(TOPIC_1));
+        trainingSet.registerExample("example2", "Text of example2.", 
Arrays.asList(TOPIC_1, TOPIC_2));
+
+        updatedTopics = trainingSet.getUpdatedTopics(date0);
+        assertEquals(2, updatedTopics.size());
+        assertTrue(updatedTopics.contains(TOPIC_1));
+        assertTrue(updatedTopics.contains(TOPIC_2));
+
+        // check that the new registration look as compared to a new date:
+        Thread.sleep(1000);
+
+        Calendar date1 = new GregorianCalendar();
+        updatedTopics = trainingSet.getUpdatedTopics(date1);
+        assertEquals(0, updatedTopics.size());
+    }
+
     protected Hashtable<String,Object> getDefaultConfigParams() {
         Hashtable<String,Object> config = new Hashtable<String,Object>();
         config.put(SolrTrainingSet.SOLR_CORE, trainingsetSolrServer);


Reply via email to