Author: ogrisel
Date: Thu Jan 12 18:14:35 2012
New Revision: 1230679
URL: http://svn.apache.org/viewvc?rev=1230679&view=rev
Log:
STANBOL-197: WIP on classification performance evaluation
Added:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/ClassificationPerformance.java
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicClassifier.java
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1230679&r1=1230678&r2=1230679&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
Thu Jan 12 18:14:35 2012
@@ -65,6 +65,7 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
import org.apache.stanbol.enhancer.topic.Batch;
+import org.apache.stanbol.enhancer.topic.ClassificationPerformance;
import org.apache.stanbol.enhancer.topic.ClassifierException;
import org.apache.stanbol.enhancer.topic.ConfiguredSolrCoreTracker;
import org.apache.stanbol.enhancer.topic.TopicClassifier;
@@ -184,6 +185,11 @@ public class TopicClassificationEngine e
protected String modelEntryIdField;
+ // customize the behavior of the classifier instance for model evaluation
+ protected int cvFoldIndex = 0;
+
+ protected int cvFoldCount = 0;
+
@Activate
protected void activate(ComponentContext context) throws
ConfigurationException, InvalidSyntaxException {
@SuppressWarnings("unchecked")
@@ -668,4 +674,32 @@ public class TopicClassificationEngine e
+ " training set hence cannot be updated.",
engineId));
}
}
+
+ @Override
+ public void setCrossValidationInfo(int foldIndex, int foldCount) {
+ // TODO Auto-generated method stub
+
+ }
+
+ @Override
+ public TopicClassifier cloneWithEmdeddedModel() throws ClassifierException
{
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public void destroyModel() throws ClassifierException {
+ // TODO Auto-generated method stub
+
+ }
+
+ public void updatePerformanceEstimates(boolean incremental) throws
ClassifierException, TrainingSetException {
+
+ }
+
+ @Override
+ public ClassificationPerformance getPerformanceEstimates(String topic)
throws ClassifierException {
+ // TODO Auto-generated method stub
+ return null;
+ }
}
Added:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/ClassificationPerformance.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/ClassificationPerformance.java?rev=1230679&view=auto
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/ClassificationPerformance.java
(added)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/ClassificationPerformance.java
Thu Jan 12 18:14:35 2012
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.topic;
+
+/**
+ * Data transfer object to report estimated classification performance of a
classifier.
+ *
+ * TODO: explain the metrics and give links to wikipedia
+ */
+public class ClassificationPerformance {
+
+ public final float precision;
+
+ public final float recall;
+
+ public final float f1;
+
+ // TODO: include ids of badly classified positive and negative examples?
+
+ public ClassificationPerformance(float precision, float recall, float f1) {
+ this.precision = precision;
+ this.recall = recall;
+ this.f1 = f1;
+ }
+
+}
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicClassifier.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicClassifier.java?rev=1230679&r1=1230678&r2=1230679&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicClassifier.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicClassifier.java
Thu Jan 12 18:14:35 2012
@@ -108,4 +108,39 @@ public interface TopicClassifier {
* @return the number of updated topics
*/
int updateModel(boolean incremental) throws TrainingSetException,
ClassifierException;
+
+ /**
+ * Perform k-fold cross validation of the model to compute estimates of
the precision, recall and f1
+ * score.
+ */
+ public void updatePerformanceEstimates(boolean incremental) throws
ClassifierException,
+
TrainingSetException;
+
+ /**
+ * Tell the classifier which slice of data to keep aside while training
for model evaluation using k-folds
+ * cross validation.
+ *
+ *
http://en.wikipedia.org/wiki/Cross-validation_%28statistics%29#K-fold_cross-validation
+ *
+ * @param foldIndex
+ * the fold id used as a training set for this classifier
instance.
+ * @param foldCount
+ * the number of folds used in the cross validation process
(typically 3 or 5). Set to 0 to
+ * disable cross validation for this classifier.
+ */
+ void setCrossValidationInfo(int foldIndex, int foldCount);
+
+ /**
+ * Clone the classifier to get a new independent instance with an empty
embedded model to be trained on a
+ * subsample of the dataset in a cross validation setting for model
evaluation.
+ */
+ TopicClassifier cloneWithEmdeddedModel() throws ClassifierException;
+
+ /**
+ * Free the backing resources of the model (e.g. indices persisted on the
harddrive or a DB) once the
+ * cross validation process is completed.
+ */
+ void destroyModel() throws ClassifierException;
+
+ ClassificationPerformance getPerformanceEstimates(String topic) throws
ClassifierException;
}