Author: srowen
Date: Mon Mar  7 08:36:36 2011
New Revision: 1078712

URL: http://svn.apache.org/viewvc?rev=1078712&view=rev
Log:
MAHOUT-619

Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/IRStatisticsImpl.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluatorImplTest.java

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java?rev=1078712&r1=1078711&r2=1078712&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java
 Mon Mar  7 08:36:36 2011
@@ -63,5 +63,13 @@ public interface IRStatistics {
    * </p>
    */
   double getFNMeasure(double n);
+
+  /**
+   * <p>
+   * See <a 
href="http://en.wikipedia.org/wiki/Discounted_cumulative_gain#Normalized_DCG";>
+   * Normalized Discounted Cumulative Gain</a>.
+   * </p>
+   */
+  double getNormalizedDiscountedCumulativeGain();
   
 }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java?rev=1078712&r1=1078711&r2=1078712&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java
 Mon Mar  7 08:36:36 2011
@@ -61,6 +61,8 @@ import com.google.common.base.Preconditi
 public final class GenericRecommenderIRStatsEvaluator implements 
RecommenderIRStatsEvaluator {
   
   private static final Logger log = 
LoggerFactory.getLogger(GenericRecommenderIRStatsEvaluator.class);
+
+  private static final double LOG2 = Math.log(2.0);
   
   /**
    * Pass as "relevanceThreshold" argument to
@@ -83,6 +85,7 @@ public final class GenericRecommenderIRS
                                int at,
                                double relevanceThreshold,
                                double evaluationPercentage) throws 
TasteException {
+
     Preconditions.checkArgument(recommenderBuilder != null, 
"recommenderBuilder is null");
     Preconditions.checkArgument(dataModel != null, "dataModel is null");
     Preconditions.checkArgument(at >= 1, "at must be at least 1");
@@ -93,74 +96,114 @@ public final class GenericRecommenderIRS
     RunningAverage precision = new FullRunningAverage();
     RunningAverage recall = new FullRunningAverage();
     RunningAverage fallOut = new FullRunningAverage();
+    RunningAverage nDCG = new FullRunningAverage();
+
     LongPrimitiveIterator it = dataModel.getUserIDs();
     while (it.hasNext()) {
+
       long userID = it.nextLong();
-      if (random.nextDouble() < evaluationPercentage) {
-        long start = System.currentTimeMillis();
-        FastIDSet relevantItemIDs = new FastIDSet(at);
-        PreferenceArray prefs = dataModel.getPreferencesFromUser(userID);
-        int size = prefs.length();
-        if (size < 2 * at) {
-          // Really not enough prefs to meaningfully evaluate this user
-          continue;
+
+      if (random.nextDouble() >= evaluationPercentage) {
+        // Skipped
+        continue;
+      }
+
+      long start = System.currentTimeMillis();
+
+      PreferenceArray prefs = dataModel.getPreferencesFromUser(userID);
+      int size = prefs.length();
+      if (size < 2 * at) {
+        // Really not enough prefs to meaningfully evaluate this user
+        continue;
+      }
+
+      FastIDSet relevantItemIDs = new FastIDSet(at);
+
+      // List some most-preferred items that would count as (most) "relevant" 
results
+      double theRelevanceThreshold = Double.isNaN(relevanceThreshold) ? 
computeThreshold(prefs) : relevanceThreshold;
+
+      prefs.sortByValueReversed();
+
+      for (int i = 0; (i < size) && (relevantItemIDs.size() < at); i++) {
+        if (prefs.getValue(i) >= theRelevanceThreshold) {
+          relevantItemIDs.add(prefs.getItemID(i));
+        }
+      }
+
+      int numRelevantItems = relevantItemIDs.size();
+      if (numRelevantItems <= 0) {
+        continue;
+      }
+
+      FastByIDMap<PreferenceArray> trainingUsers = new 
FastByIDMap<PreferenceArray>(dataModel.getNumUsers());
+      LongPrimitiveIterator it2 = dataModel.getUserIDs();
+      while (it2.hasNext()) {
+        processOtherUser(userID, relevantItemIDs, trainingUsers, 
it2.nextLong(), dataModel);
+      }
+
+      DataModel trainingModel = dataModelBuilder == null ? new 
GenericDataModel(trainingUsers)
+          : dataModelBuilder.buildDataModel(trainingUsers);
+      Recommender recommender = 
recommenderBuilder.buildRecommender(trainingModel);
+
+      try {
+        trainingModel.getPreferencesFromUser(userID);
+      } catch (NoSuchUserException nsee) {
+        continue; // Oops we excluded all prefs for the user -- just move on
+      }
+
+      int intersectionSize = 0;
+      List<RecommendedItem> recommendedItems = recommender.recommend(userID, 
at, rescorer);
+      for (RecommendedItem recommendedItem : recommendedItems) {
+        if (relevantItemIDs.contains(recommendedItem.getItemID())) {
+          intersectionSize++;
         }
+      }
+
+      int numRecommendedItems = recommendedItems.size();
+
+      // Precision
+      if (numRecommendedItems > 0) {
+        precision.addDatum((double) intersectionSize / (double) 
numRecommendedItems);
+      }
 
-        // List some most-preferred items that would count as (most) 
"relevant" results
-        double theRelevanceThreshold =
-            Double.isNaN(relevanceThreshold) ? computeThreshold(prefs) : 
relevanceThreshold;
-        prefs.sortByValueReversed();
-        for (int i = 0; (i < size) && (relevantItemIDs.size() < at); i++) {
-          if (prefs.getValue(i) >= theRelevanceThreshold) {
-            relevantItemIDs.add(prefs.getItemID(i));
-          }
+      // Recall
+      recall.addDatum((double) intersectionSize / (double) numRelevantItems);
+
+      // Fall-out
+      if (numRelevantItems < size) {
+        fallOut.addDatum((double) (numRecommendedItems - intersectionSize)
+                         / (double) (numItems - numRelevantItems));
+      }
+
+      // nDCG
+      // In computing, assume relevant IDs have relevance 1 and others 0
+      double cumulativeGain = 0.0;
+      double idealizedGain = 0.0;
+      for (int i = 0; i < recommendedItems.size(); i++) {
+        RecommendedItem item = recommendedItems.get(i);
+        double discount = i == 0 ? 1.0 : 1.0 / log2(i + 1);
+        if (relevantItemIDs.contains(item.getItemID())) {
+          cumulativeGain += discount;
         }
-        int numRelevantItems = relevantItemIDs.size();
-        if (numRelevantItems > 0) {
-          FastByIDMap<PreferenceArray> trainingUsers = new 
FastByIDMap<PreferenceArray>(dataModel
-              .getNumUsers());
-          LongPrimitiveIterator it2 = dataModel.getUserIDs();
-          while (it2.hasNext()) {
-            processOtherUser(userID, relevantItemIDs, trainingUsers, it2
-                .nextLong(), dataModel);
-          }
-
-          DataModel trainingModel = dataModelBuilder == null ? new 
GenericDataModel(trainingUsers)
-              : dataModelBuilder.buildDataModel(trainingUsers);
-          Recommender recommender = 
recommenderBuilder.buildRecommender(trainingModel);
-
-          try {
-            trainingModel.getPreferencesFromUser(userID);
-          } catch (NoSuchUserException nsee) {
-            continue; // Oops we excluded all prefs for the user -- just move 
on
-          }
-
-          int intersectionSize = 0;
-          List<RecommendedItem> recommendedItems = 
recommender.recommend(userID, at, rescorer);
-          for (RecommendedItem recommendedItem : recommendedItems) {
-            if (relevantItemIDs.contains(recommendedItem.getItemID())) {
-              intersectionSize++;
-            }
-          }
-          int numRecommendedItems = recommendedItems.size();
-          if (numRecommendedItems > 0) {
-            precision.addDatum((double) intersectionSize / (double) 
numRecommendedItems);
-          }
-          recall.addDatum((double) intersectionSize / (double) 
numRelevantItems);
-          if (numRelevantItems < size) {
-            fallOut.addDatum((double) (numRecommendedItems - intersectionSize)
-                             / (double) (numItems - numRelevantItems));
-          }
-
-          long end = System.currentTimeMillis();
-          GenericRecommenderIRStatsEvaluator.log.info("Evaluated with user {} 
in {}ms", userID, end - start);
-          log.info("Precision/recall/fall-out: {} / {} / {}",
-            new Object[] {precision.getAverage(), recall.getAverage(), 
fallOut.getAverage()});
+        // otherwise we're multiplying discount by relevance 0 so it doesn't 
do anything
+
+        // Ideally results would be ordered with all relevant ones first, so 
this theoretical
+        // ideal list starts with number of relevant items equal to the total 
number of relevant items
+        if (i < relevantItemIDs.size()) {
+          idealizedGain += discount;
         }
       }
+      nDCG.addDatum(cumulativeGain / idealizedGain);
+
+      long end = System.currentTimeMillis();
+
+      log.info("Evaluated with user {} in {}ms", userID, end - start);
+      log.info("Precision/recall/fall-out/nDCG: {} / {} / {} / {}", new 
Object[] {
+          precision.getAverage(), recall.getAverage(), fallOut.getAverage(), 
nDCG.getAverage()
+      });
     }
 
-    return new IRStatisticsImpl(precision.getAverage(), recall.getAverage(), 
fallOut.getAverage());
+    return new IRStatisticsImpl(precision.getAverage(), recall.getAverage(), 
fallOut.getAverage(), nDCG.getAverage());
   }
   
   private static void processOtherUser(long id,
@@ -200,5 +243,9 @@ public final class GenericRecommenderIRS
     }
     return stdDev.getAverage() + stdDev.getStandardDeviation();
   }
+
+  private static double log2(double value) {
+    return Math.log(value) / LOG2;
+  }
   
 }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/IRStatisticsImpl.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/IRStatisticsImpl.java?rev=1078712&r1=1078711&r2=1078712&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/IRStatisticsImpl.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/IRStatisticsImpl.java
 Mon Mar  7 08:36:36 2011
@@ -28,14 +28,17 @@ public final class IRStatisticsImpl impl
   private final double precision;
   private final double recall;
   private final double fallOut;
+  private final double ndcg;
   
-  IRStatisticsImpl(double precision, double recall, double fallOut) {
+  IRStatisticsImpl(double precision, double recall, double fallOut, double 
ndcg) {
     Preconditions.checkArgument(precision >= 0.0 && precision <= 1.0, "Illegal 
precision: " + precision);
     Preconditions.checkArgument(recall >= 0.0 && recall <= 1.0, "Illegal 
recall: " + recall);
     Preconditions.checkArgument(fallOut >= 0.0 && fallOut <= 1.0, "Illegal 
fallOut: " + fallOut);
+    Preconditions.checkArgument(fallOut >= 0.0 && fallOut <= 1.0, "Illegal 
nDCG: " + ndcg);
     this.precision = precision;
     this.recall = recall;
     this.fallOut = fallOut;
+    this.ndcg = ndcg;
   }
   
   @Override
@@ -63,10 +66,16 @@ public final class IRStatisticsImpl impl
     double sum = n * precision + recall;
     return sum == 0.0 ? Double.NaN : (1.0 + n) * precision * recall / sum;
   }
+
+  @Override
+  public double getNormalizedDiscountedCumulativeGain() {
+    return ndcg;
+  }
   
   @Override
   public String toString() {
-    return "IRStatisticsImpl[precision:" + precision + ",recall:" + recall + 
",fallOut:" + fallOut + ']';
+    return "IRStatisticsImpl[precision:" + precision + ",recall:" + recall + 
",fallOut:"
+        + fallOut + ",nDCG:" + ndcg + ']';
   }
   
 }

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluatorImplTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluatorImplTest.java?rev=1078712&r1=1078711&r2=1078712&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluatorImplTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluatorImplTest.java
 Mon Mar  7 08:36:36 2011
@@ -44,6 +44,7 @@ public final class GenericRecommenderIRS
     assertEquals(0.75, stats.getPrecision(), EPSILON);
     assertEquals(0.75, stats.getRecall(), EPSILON);
     assertEquals(0.75, stats.getF1Measure(), EPSILON);
+    assertEquals(0.75, stats.getNormalizedDiscountedCumulativeGain(), EPSILON);
   }
 
 }
\ No newline at end of file


Reply via email to