Author: ogrisel
Date: Fri Jan 20 11:27:08 2012
New Revision: 1233873
URL: http://svn.apache.org/viewvc?rev=1233873&view=rev
Log:
STANBOL-197: add missing info on evaluation support
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1233873&r1=1233872&r2=1233873&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
Fri Jan 20 11:27:08 2012
@@ -872,6 +872,7 @@ public class TopicClassificationEngine e
List<String> falseNegativeExamples = new
ArrayList<String>();
int truePositives = 0;
int falseNegatives = 0;
+ int positiveSupport = 0;
offset = 0;
Batch<String> examples = Batch.emtpyBatch(String.class);
do {
@@ -882,6 +883,7 @@ public class TopicClassificationEngine e
offset++;
continue;
}
+ positiveSupport++;
offset++;
List<TopicSuggestion> suggestedTopics =
classifier.suggestTopics(example);
boolean match = false;
@@ -901,6 +903,7 @@ public class TopicClassificationEngine e
List<String> falsePositiveExamples = new
ArrayList<String>();
int falsePositives = 0;
+ int negativeSupport = 0;
offset = 0;
examples = Batch.emtpyBatch(String.class);
do {
@@ -912,6 +915,7 @@ public class TopicClassificationEngine e
offset++;
continue;
}
+ negativeSupport++;
offset++;
List<TopicSuggestion> suggestedTopics =
classifier.suggestTopics(example);
for (TopicSuggestion suggestedTopic :
suggestedTopics) {
@@ -934,8 +938,8 @@ public class TopicClassificationEngine e
if (truePositives != 0 || falseNegatives != 0) {
recall = truePositives / (float) (truePositives +
falseNegatives);
}
- updatePerformanceMetadata(topic, precision, recall,
falsePositiveExamples,
- falseNegativeExamples);
+ updatePerformanceMetadata(topic, precision, recall,
positiveSupport, negativeSupport,
+ falsePositiveExamples, falseNegativeExamples);
}
try {
getActiveSolrServer().commit();
@@ -960,6 +964,8 @@ public class TopicClassificationEngine e
protected void updatePerformanceMetadata(String topicId,
float precision,
float recall,
+ int positiveSupport,
+ int negativeSupport,
List<String>
falsePositiveExamples,
List<String>
falseNegativeExamples) throws ClassifierException {
SolrServer solrServer = getActiveSolrServer();
@@ -975,6 +981,8 @@ public class TopicClassificationEngine e
}
addToList(fieldValues, precisionField, precision);
addToList(fieldValues, recallField, recall);
+ increment(fieldValues, positiveSupportField, positiveSupport);
+ increment(fieldValues, negativeSupportField, negativeSupport);
// TODO: handle supports too...
// addToList(fieldValues, falsePositivesField,
falsePositiveExamples);
// addToList(fieldValues, falseNegativesField,
falseNegativeExamples);
@@ -992,6 +1000,17 @@ public class TopicClassificationEngine e
}
}
+ protected void increment(Map<String,Collection<Object>> fieldValues,
String fieldName, int count) {
+ // this collection is expected to be a singleton for this particular
field
+ Collection<Object> oldValues = fieldValues.get(fieldName);
+ if (oldValues != null && !oldValues.isEmpty()) {
+ count += (Integer) oldValues.iterator().next();
+ }
+ Collection<Object> values = new ArrayList<Object>();
+ values.add(count);
+ fieldValues.put(fieldName, values);
+ }
+
@SuppressWarnings("unchecked")
protected void addToList(Map<String,Collection<Object>> fieldValues,
String fieldName, Object value) {
Collection<Object> values = new ArrayList<Object>();
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java?rev=1233873&r1=1233872&r2=1233873&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
Fri Jan 20 11:27:08 2012
@@ -360,18 +360,20 @@ public class TopicEngineTest extends Emb
assertFalse(performanceEstimates.uptodate);
// update the performance metadata manually
- classifier.updatePerformanceMetadata("urn:t/002", 0.76f, 0.60f,
Arrays.asList("ex14", "ex78"),
- Arrays.asList("ex34", "ex23", "ex89"));
+ classifier.updatePerformanceMetadata("urn:t/002", 0.76f, 0.60f, 34, 32,
+ Arrays.asList("ex14", "ex78"), Arrays.asList("ex34", "ex23",
"ex89"));
classifier.getActiveSolrServer().commit();
performanceEstimates = classifier.getPerformanceEstimates("urn:t/002");
assertTrue(performanceEstimates.uptodate);
assertEquals(0.76f, performanceEstimates.precision, 0.01);
assertEquals(0.60f, performanceEstimates.recall, 0.01);
assertEquals(0.67f, performanceEstimates.f1, 0.01);
+ assertEquals(34, performanceEstimates.positiveSupport);
+ assertEquals(32, performanceEstimates.negativeSupport);
assertTrue(classifier.getBroaderTopics("urn:t/002").contains("urn:t/001"));
// accumulate other folds statistics and compute means of statistics
- classifier.updatePerformanceMetadata("urn:t/002", 0.79f, 0.63f,
Arrays.asList("ex1", "ex5"),
+ classifier.updatePerformanceMetadata("urn:t/002", 0.79f, 0.63f, 10,
10, Arrays.asList("ex1", "ex5"),
Arrays.asList("ex3", "ex10", "ex11"));
classifier.getActiveSolrServer().commit();
performanceEstimates = classifier.getPerformanceEstimates("urn:t/002");
@@ -379,6 +381,8 @@ public class TopicEngineTest extends Emb
assertEquals(0.775f, performanceEstimates.precision, 0.01);
assertEquals(0.615f, performanceEstimates.recall, 0.01);
assertEquals(0.695f, performanceEstimates.f1, 0.01);
+ assertEquals(44, performanceEstimates.positiveSupport);
+ assertEquals(42, performanceEstimates.negativeSupport);
}
@Test
@@ -418,8 +422,8 @@ public class TopicEngineTest extends Emb
assertGreater(performanceEstimates.precision, 0.5f);
assertGreater(performanceEstimates.recall, 0.5f);
assertGreater(performanceEstimates.f1, 0.65f);
- // assertGreater(performanceEstimates.positiveSupport, 10);
- // assertGreater(performanceEstimates.negativeSupport, 90);
+ assertGreater(performanceEstimates.positiveSupport, 10);
+ assertGreater(performanceEstimates.negativeSupport, 10);
assertNotNull(performanceEstimates.evaluationDate);
}