Author: ogrisel
Date: Mon Jan 16 17:41:54 2012
New Revision: 1232074
URL: http://svn.apache.org/viewvc?rev=1232074&view=rev
Log:
STANBOL-197: more progress on the evaluation implementation
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1232074&r1=1232073&r2=1232074&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
Mon Jan 16 17:41:54 2012
@@ -26,6 +26,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.Dictionary;
+import java.util.Hashtable;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
@@ -613,10 +614,18 @@ public class TopicClassificationEngine e
}
final boolean incr = incremental;
int updatedTopics = batchOverTopics(new BatchProcessor<SolrDocument>()
{
+ int offset = 0;
+
@Override
public int process(List<SolrDocument> batch) throws
ClassifierException, TrainingSetException {
int processed = 0;
for (SolrDocument result : batch) {
+ offset++;
+ if (cvFoldCount != 0 && offset % cvFoldCount ==
cvFoldIndex) {
+ // we are performing a cross validation session and
this example belong to the test
+ // fold hence should be skipped
+ continue;
+ }
String topicId =
result.getFirstValue(topicUriField).toString();
List<String> impactedTopics = new ArrayList<String>();
impactedTopics.add(topicId);
@@ -727,12 +736,28 @@ public class TopicClassificationEngine e
}
protected Dictionary<String,Object>
getCanonicalConfiguration(EmbeddedSolrServer server) {
- // TODO
- return null;
+ Hashtable<String,Object> config = new Hashtable<String,Object>();
+ config.put(TopicClassificationEngine.ENGINE_ID, engineId +
"-evaluation");
+ config.put(TopicClassificationEngine.ENTRY_ID_FIELD, "entry_id");
+ config.put(TopicClassificationEngine.ENTRY_TYPE_FIELD, "entry_type");
+ config.put(TopicClassificationEngine.MODEL_ENTRY_ID_FIELD,
"model_entry_id");
+ config.put(TopicClassificationEngine.SOLR_CORE, server);
+ config.put(TopicClassificationEngine.TOPIC_URI_FIELD, "topic");
+ config.put(TopicClassificationEngine.SIMILARTITY_FIELD,
"classifier_features");
+ config.put(TopicClassificationEngine.BROADER_FIELD, "broader");
+ config.put(TopicClassificationEngine.MODEL_UPDATE_DATE_FIELD,
"last_update_dt");
+ config.put(TopicClassificationEngine.MODEL_EVALUATION_DATE_FIELD,
"last_evaluation_dt");
+ config.put(TopicClassificationEngine.PRECISION_FIELD, "precision");
+ config.put(TopicClassificationEngine.RECALL_FIELD, "recall");
+ config.put(TopicClassificationEngine.F1_FIELD, "f1");
+ config.put(TopicClassificationEngine.POSITIVE_SUPPORT_FIELD,
"positive_support");
+ config.put(TopicClassificationEngine.NEGATIVE_SUPPORT_FIELD,
"negative_support");
+ config.put(TopicClassificationEngine.FALSE_POSITIVES_FIELD,
"false_positives");
+ config.put(TopicClassificationEngine.FALSE_NEGATIVES_FIELD,
"false_negatives");
+ return config;
}
protected EmbeddedSolrServer makeTopicClassifierSolrServer(File folder) {
-
// TODO
return null;
}
@@ -768,9 +793,9 @@ public class TopicClassificationEngine e
return updatedTopics;
}
- protected void performCVFold(TopicClassificationEngine classifier, int
cvFoldIndex, int cvFoldCount) throws ConfigurationException,
-
TrainingSetException,
-
ClassifierException {
+ protected void performCVFold(final TopicClassificationEngine classifier,
int cvFoldIndex, int cvFoldCount) throws ConfigurationException,
+
TrainingSetException,
+
ClassifierException {
log.info(String.format("Performing evaluation CV iteration %d/%d on
classifier %s", cvFoldIndex + 1,
cvFoldCount, engineId));
@@ -783,8 +808,21 @@ public class TopicClassificationEngine e
// iterate over all the topics to register them in the evaluation
classifier
batchOverTopics(new BatchProcessor<SolrDocument>() {
@Override
- public int process(List<SolrDocument> batch) {
- return 0;
+ public int process(List<SolrDocument> batch) throws
ClassifierException {
+ for (SolrDocument topicEntry : batch) {
+ String topicId =
topicEntry.getFirstValue(topicUriField).toString();
+ Collection<Object> broader =
topicEntry.getFieldValues(broaderField);
+ if (broader == null) {
+ classifier.addTopic(topicId, null);
+ } else {
+ List<String> broaderTopics = new ArrayList<String>();
+ for (Object broaderTopic : broader) {
+ broaderTopics.add(broaderTopic.toString());
+ }
+ classifier.addTopic(topicId, broaderTopics);
+ }
+ }
+ return batch.size();
}
});