Author: ogrisel
Date: Fri Jan 20 17:51:59 2012
New Revision: 1234031

URL: http://svn.apache.org/viewvc?rev=1234031&view=rev
Log:
STANBOL-197: collect example ids of errors

Modified:
    
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
    
incubator/stanbol/trunk/enhancer/engines/topic/src/main/resources/classifier/schema.xml
    
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java

Modified: 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1234031&r1=1234030&r2=1234031&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
 Fri Jan 20 17:51:59 2012
@@ -161,7 +161,9 @@ public class TopicClassificationEngine e
 
     public static final String SOLR_NON_EMPTY_FIELD = "[\"\" TO *]";
 
-    // TODO: make the following bounds configurable
+    // TODO: make the following fields configurable
+
+    public int MAX_EVALUATION_SAMPLES = 1000;
 
     public int MAX_CHARS_PER_TOPIC = 100000;
 
@@ -906,7 +908,7 @@ public class TopicClassificationEngine e
                                 falseNegativeExamples.add(example.id);
                             }
                         }
-                    } while (examples.hasMore); // TODO: put a bound on the 
number of examples
+                    } while (examples.hasMore && offset < 
MAX_EVALUATION_SAMPLES);
 
                     List<String> falsePositiveExamples = new 
ArrayList<String>();
                     int falsePositives = 0;
@@ -917,7 +919,6 @@ public class TopicClassificationEngine e
                         examples = trainingSet.getNegativeExamples(topics, 
examples.nextOffset);
                         for (Example example : examples.items) {
                             if (!(offset % foldCount == foldIndex)) {
-                                // TODO: change the dataset API to include 
exampleId
                                 // this example is not part of the test fold, 
skip it
                                 offset++;
                                 continue;
@@ -935,7 +936,7 @@ public class TopicClassificationEngine e
                             }
                             // we don't need to collect true negatives
                         }
-                    } while (examples.hasMore); // TODO: put a bound on the 
number of examples
+                    } while (examples.hasMore && offset < 
MAX_EVALUATION_SAMPLES);
 
                     // compute precision, recall and f1 score for the current 
test fold and topic
                     float precision = 0;
@@ -991,9 +992,8 @@ public class TopicClassificationEngine e
                 addToList(fieldValues, recallField, recall);
                 increment(fieldValues, positiveSupportField, positiveSupport);
                 increment(fieldValues, negativeSupportField, negativeSupport);
-                // TODO: handle supports too...
-                // addToList(fieldValues, falsePositivesField, 
falsePositiveExamples);
-                // addToList(fieldValues, falseNegativesField, 
falseNegativeExamples);
+                addToList(fieldValues, falsePositivesField, 
falsePositiveExamples);
+                addToList(fieldValues, falseNegativesField, 
falseNegativeExamples);
                 SolrInputDocument newEntry = new SolrInputDocument();
                 for (Map.Entry<String,Collection<Object>> entry : 
fieldValues.entrySet()) {
                     newEntry.addField(entry.getKey(), entry.getValue());

Modified: 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/resources/classifier/schema.xml
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/resources/classifier/schema.xml?rev=1234031&r1=1234030&r2=1234031&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/resources/classifier/schema.xml
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/resources/classifier/schema.xml
 Fri Jan 20 17:51:59 2012
@@ -96,7 +96,7 @@
       over several CV folds) -->
     <field name="false_positives" type="string" indexed="false"
       multiValued="true" stored="true" />
-    <field name="negative_positives" type="string" indexed="false"
+    <field name="false_negatives" type="string" indexed="false"
       multiValued="true" stored="true" />
 
   </fields>

Modified: 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java?rev=1234031&r1=1234030&r2=1234031&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
 Fri Jan 20 17:51:59 2012
@@ -420,6 +420,14 @@ public class TopicEngineTest extends Emb
             performanceEstimates = classifier.getPerformanceEstimates(topic);
             assertTrue(performanceEstimates.uptodate);
             assertGreater(performanceEstimates.precision, 0.5f);
+            assertNotNull(performanceEstimates.falsePositiveExampleIds);
+            assertNotNull(performanceEstimates.falseNegativeExampleIds);
+            if (performanceEstimates.precision < 1) {
+                
assertFalse(performanceEstimates.falsePositiveExampleIds.isEmpty());
+            }
+            if (performanceEstimates.recall < 1) {
+                
assertFalse(performanceEstimates.falseNegativeExampleIds.isEmpty());
+            }
             assertGreater(performanceEstimates.recall, 0.5f);
             assertGreater(performanceEstimates.f1, 0.65f);
             assertGreater(performanceEstimates.positiveSupport, 10);


Reply via email to