Author: ogrisel
Date: Thu Jan 12 18:14:35 2012
New Revision: 1230679

URL: http://svn.apache.org/viewvc?rev=1230679&view=rev
Log:
STANBOL-197: WIP on classification performance evaluation

Added:
    
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/ClassificationPerformance.java
Modified:
    
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
    
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicClassifier.java

Modified: 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1230679&r1=1230678&r2=1230679&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
 Thu Jan 12 18:14:35 2012
@@ -65,6 +65,7 @@ import org.apache.stanbol.enhancer.servi
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
 import org.apache.stanbol.enhancer.topic.Batch;
+import org.apache.stanbol.enhancer.topic.ClassificationPerformance;
 import org.apache.stanbol.enhancer.topic.ClassifierException;
 import org.apache.stanbol.enhancer.topic.ConfiguredSolrCoreTracker;
 import org.apache.stanbol.enhancer.topic.TopicClassifier;
@@ -184,6 +185,11 @@ public class TopicClassificationEngine e
 
     protected String modelEntryIdField;
 
+    // customize the behavior of the classifier instance for model evaluation
+    protected int cvFoldIndex = 0;
+
+    protected int cvFoldCount = 0;
+
     @Activate
     protected void activate(ComponentContext context) throws 
ConfigurationException, InvalidSyntaxException {
         @SuppressWarnings("unchecked")
@@ -668,4 +674,32 @@ public class TopicClassificationEngine e
                                   + " training set hence cannot be updated.", 
engineId));
         }
     }
+
+    @Override
+    public void setCrossValidationInfo(int foldIndex, int foldCount) {
+        // TODO Auto-generated method stub
+
+    }
+
+    @Override
+    public TopicClassifier cloneWithEmdeddedModel() throws ClassifierException 
{
+        // TODO Auto-generated method stub
+        return null;
+    }
+
+    @Override
+    public void destroyModel() throws ClassifierException {
+        // TODO Auto-generated method stub
+
+    }
+
+    public void updatePerformanceEstimates(boolean incremental) throws 
ClassifierException, TrainingSetException {
+        
+    }
+
+    @Override
+    public ClassificationPerformance getPerformanceEstimates(String topic) 
throws ClassifierException {
+        // TODO Auto-generated method stub
+        return null;
+    }
 }

Added: 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/ClassificationPerformance.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/ClassificationPerformance.java?rev=1230679&view=auto
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/ClassificationPerformance.java
 (added)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/ClassificationPerformance.java
 Thu Jan 12 18:14:35 2012
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.topic;
+
+/**
+ * Data transfer object to report estimated classification performance of a 
classifier.
+ * 
+ * TODO: explain the metrics and give links to wikipedia
+ */
+public class ClassificationPerformance {
+
+    public final float precision;
+
+    public final float recall;
+
+    public final float f1;
+
+    // TODO: include ids of badly classified positive and negative examples?
+
+    public ClassificationPerformance(float precision, float recall, float f1) {
+        this.precision = precision;
+        this.recall = recall;
+        this.f1 = f1;
+    }
+
+}

Modified: 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicClassifier.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicClassifier.java?rev=1230679&r1=1230678&r2=1230679&view=diff
==============================================================================
--- 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicClassifier.java
 (original)
+++ 
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicClassifier.java
 Thu Jan 12 18:14:35 2012
@@ -108,4 +108,39 @@ public interface TopicClassifier {
      * @return the number of updated topics
      */
     int updateModel(boolean incremental) throws TrainingSetException, 
ClassifierException;
+
+    /**
+     * Perform k-fold cross validation of the model to compute estimates of 
the precision, recall and f1
+     * score.
+     */
+    public void updatePerformanceEstimates(boolean incremental) throws 
ClassifierException,
+                                                               
TrainingSetException;
+
+    /**
+     * Tell the classifier which slice of data to keep aside while training 
for model evaluation using k-folds
+     * cross validation.
+     * 
+     * 
http://en.wikipedia.org/wiki/Cross-validation_%28statistics%29#K-fold_cross-validation
+     * 
+     * @param foldIndex
+     *            the fold id used as a training set for this classifier 
instance.
+     * @param foldCount
+     *            the number of folds used in the cross validation process 
(typically 3 or 5). Set to 0 to
+     *            disable cross validation for this classifier.
+     */
+    void setCrossValidationInfo(int foldIndex, int foldCount);
+
+    /**
+     * Clone the classifier to get a new independent instance with an empty 
embedded model to be trained on a
+     * subsample of the dataset in a cross validation setting for model 
evaluation.
+     */
+    TopicClassifier cloneWithEmdeddedModel() throws ClassifierException;
+
+    /**
+     * Free the backing resources of the model (e.g. indices persisted on the 
harddrive or a DB) once the
+     * cross validation process is completed.
+     */
+    void destroyModel() throws ClassifierException;
+
+    ClassificationPerformance getPerformanceEstimates(String topic) throws 
ClassifierException;
 }


Reply via email to