Author: ogrisel
Date: Fri Jan 20 17:51:59 2012
New Revision: 1234031
URL: http://svn.apache.org/viewvc?rev=1234031&view=rev
Log:
STANBOL-197: collect example ids of errors
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
incubator/stanbol/trunk/enhancer/engines/topic/src/main/resources/classifier/schema.xml
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1234031&r1=1234030&r2=1234031&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
Fri Jan 20 17:51:59 2012
@@ -161,7 +161,9 @@ public class TopicClassificationEngine e
public static final String SOLR_NON_EMPTY_FIELD = "[\"\" TO *]";
- // TODO: make the following bounds configurable
+ // TODO: make the following fields configurable
+
+ public int MAX_EVALUATION_SAMPLES = 1000;
public int MAX_CHARS_PER_TOPIC = 100000;
@@ -906,7 +908,7 @@ public class TopicClassificationEngine e
falseNegativeExamples.add(example.id);
}
}
- } while (examples.hasMore); // TODO: put a bound on the
number of examples
+ } while (examples.hasMore && offset <
MAX_EVALUATION_SAMPLES);
List<String> falsePositiveExamples = new
ArrayList<String>();
int falsePositives = 0;
@@ -917,7 +919,6 @@ public class TopicClassificationEngine e
examples = trainingSet.getNegativeExamples(topics,
examples.nextOffset);
for (Example example : examples.items) {
if (!(offset % foldCount == foldIndex)) {
- // TODO: change the dataset API to include
exampleId
// this example is not part of the test fold,
skip it
offset++;
continue;
@@ -935,7 +936,7 @@ public class TopicClassificationEngine e
}
// we don't need to collect true negatives
}
- } while (examples.hasMore); // TODO: put a bound on the
number of examples
+ } while (examples.hasMore && offset <
MAX_EVALUATION_SAMPLES);
// compute precision, recall and f1 score for the current
test fold and topic
float precision = 0;
@@ -991,9 +992,8 @@ public class TopicClassificationEngine e
addToList(fieldValues, recallField, recall);
increment(fieldValues, positiveSupportField, positiveSupport);
increment(fieldValues, negativeSupportField, negativeSupport);
- // TODO: handle supports too...
- // addToList(fieldValues, falsePositivesField,
falsePositiveExamples);
- // addToList(fieldValues, falseNegativesField,
falseNegativeExamples);
+ addToList(fieldValues, falsePositivesField,
falsePositiveExamples);
+ addToList(fieldValues, falseNegativesField,
falseNegativeExamples);
SolrInputDocument newEntry = new SolrInputDocument();
for (Map.Entry<String,Collection<Object>> entry :
fieldValues.entrySet()) {
newEntry.addField(entry.getKey(), entry.getValue());
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/resources/classifier/schema.xml
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/resources/classifier/schema.xml?rev=1234031&r1=1234030&r2=1234031&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/main/resources/classifier/schema.xml
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/main/resources/classifier/schema.xml
Fri Jan 20 17:51:59 2012
@@ -96,7 +96,7 @@
over several CV folds) -->
<field name="false_positives" type="string" indexed="false"
multiValued="true" stored="true" />
- <field name="negative_positives" type="string" indexed="false"
+ <field name="false_negatives" type="string" indexed="false"
multiValued="true" stored="true" />
</fields>
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java?rev=1234031&r1=1234030&r2=1234031&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
Fri Jan 20 17:51:59 2012
@@ -420,6 +420,14 @@ public class TopicEngineTest extends Emb
performanceEstimates = classifier.getPerformanceEstimates(topic);
assertTrue(performanceEstimates.uptodate);
assertGreater(performanceEstimates.precision, 0.5f);
+ assertNotNull(performanceEstimates.falsePositiveExampleIds);
+ assertNotNull(performanceEstimates.falseNegativeExampleIds);
+ if (performanceEstimates.precision < 1) {
+
assertFalse(performanceEstimates.falsePositiveExampleIds.isEmpty());
+ }
+ if (performanceEstimates.recall < 1) {
+
assertFalse(performanceEstimates.falseNegativeExampleIds.isEmpty());
+ }
assertGreater(performanceEstimates.recall, 0.5f);
assertGreater(performanceEstimates.f1, 0.65f);
assertGreater(performanceEstimates.positiveSupport, 10);