Author: srowen
Date: Fri Mar 18 17:54:11 2011
New Revision: 1082997

URL: http://svn.apache.org/viewvc?rev=1082997&view=rev
Log:
Iterate more on KDD Cup support code now that I've tried it a bit and see it 
could be better

Added:
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Callable.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java
      - copied, changed from r1082143, 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommender.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderBuilder.java
      - copied, changed from r1082143, 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommenderBuilder.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Callable.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java
      - copied, changed from r1082143, 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommender.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2RecommenderBuilder.java
      - copied, changed from r1082143, 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommenderBuilder.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackData.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackItemSimilarity.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/UserResult.java
Removed:
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommender.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommenderBuilder.java
Modified:
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupDataModel.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderEvaluatorRunner.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Runner.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Runner.java

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupDataModel.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupDataModel.java?rev=1082997&r1=1082996&r2=1082997&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupDataModel.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupDataModel.java
 Fri Mar 18 17:54:11 2011
@@ -33,6 +33,8 @@ import org.apache.mahout.cf.taste.model.
 import org.apache.mahout.cf.taste.model.PreferenceArray;
 import org.apache.mahout.common.Pair;
 import org.apache.mahout.common.iterator.SamplingIterator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * <p>An {@link DataModel} which reads into memory any of the KDD Cup's rating 
files; it is really
@@ -44,6 +46,9 @@ import org.apache.mahout.common.iterator
  */
 public final class KDDCupDataModel implements DataModel {
 
+  private static final Logger log = 
LoggerFactory.getLogger(KDDCupDataModel.class);
+
+  private final File dataFileDirectory;
   private final DataModel delegate;
 
   /**
@@ -62,6 +67,8 @@ public final class KDDCupDataModel imple
 
     Preconditions.checkArgument(!Double.isNaN(samplingRate) && samplingRate > 
0.0 && samplingRate <= 1.0);
 
+    dataFileDirectory = dataFile.getParentFile();
+
     Iterator<Pair<PreferenceArray,long[]>> dataIterator = new 
DataFileIterator(dataFile);
     if (samplingRate < 1.0) {
       dataIterator = new 
SamplingIterator<Pair<PreferenceArray,long[]>>(dataIterator, samplingRate);
@@ -95,19 +102,28 @@ public final class KDDCupDataModel imple
       delegate = new GenericDataModel(userData);
     }
 
+    Runtime runtime = Runtime.getRuntime();
+    log.info("Loaded data model in about {}MB heap", (runtime.totalMemory() - 
runtime.freeMemory()) / 1000000);
   }
 
+  public File getDataFileDirectory() {
+    return dataFileDirectory;
+  }
 
   public static File getTrainingFile(File dataFileDirectory) {
-    return getFile(dataFileDirectory, "train");
+    return getFile(dataFileDirectory, "trainIdx");
   }
 
   public static File getValidationFile(File dataFileDirectory) {
-    return getFile(dataFileDirectory, "validation");
+    return getFile(dataFileDirectory, "validationIdx");
   }
 
   public static File getTestFile(File dataFileDirectory) {
-    return getFile(dataFileDirectory, "test");
+    return getFile(dataFileDirectory, "testIdx");
+  }
+
+  public static File getTrackFile(File dataFileDirectory) {
+    return getFile(dataFileDirectory, "trackData");
   }
 
   private static File getFile(File dataFileDirectory, String prefix) {
@@ -115,9 +131,11 @@ public final class KDDCupDataModel imple
     for (int set : new int[] {1,2}) {
       // Works on sample data from before contest or real data
       for (String firstLinesOrNot : new String[] {"", ".firstLines"}) {
-        File dataFile = new File(dataFileDirectory, prefix + "Idx" + set + 
firstLinesOrNot + ".txt");
-        if (dataFile.exists()) {
-          return dataFile;
+        for (String gzippedOrNot : new String[] {".gz", ""}) {
+          File dataFile = new File(dataFileDirectory, prefix + set + 
firstLinesOrNot + ".txt" + gzippedOrNot);
+          if (dataFile.exists()) {
+            return dataFile;
+          }
         }
       }
     }

Added: 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Callable.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Callable.java?rev=1082997&view=auto
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Callable.java
 (added)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Callable.java
 Fri Mar 18 17:54:11 2011
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.example.kddcup.track1;
+
+import java.util.concurrent.Callable;
+
+import org.apache.mahout.cf.taste.common.NoSuchItemException;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+final class Track1Callable implements Callable<byte[]> {
+
+  private static final Logger log = 
LoggerFactory.getLogger(Track1Callable.class);
+
+  private final Recommender recommender;
+  private final PreferenceArray userTest;
+
+  Track1Callable(Recommender recommender, PreferenceArray userTest) {
+    this.recommender = recommender;
+    this.userTest = userTest;
+  }
+
+  @Override
+  public byte[] call() throws TasteException {
+    long userID = userTest.get(0).getUserID();
+    byte[] result = new byte[userTest.length()];
+    for (int i = 0; i < userTest.length(); i++) {
+      long itemID = userTest.getItemID(i);
+      double estimate;
+      try {
+        estimate = recommender.estimatePreference(userID, itemID);
+      } catch (NoSuchItemException nsie) {
+        // OK in the sample data provided before the contest, should never 
happen otherwise
+        log.warn("Unknown item {}; OK unless this is the real contest data", 
itemID);
+        continue;
+      }
+
+      int scaledEstimate = (int) (estimate * 2.55);
+      if (scaledEstimate > 255) {
+        scaledEstimate = 255;
+      } else if (scaledEstimate < 0) {
+        scaledEstimate = 0;
+      }
+
+      result[i] = (byte) scaledEstimate;
+    }
+    return result;
+  }
+
+}

Copied: 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java
 (from r1082143, 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommender.java)
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java?p2=mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java&p1=mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommender.java&r1=1082143&r2=1082997&rev=1082997&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommender.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java
 Fri Mar 18 17:54:11 2011
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.mahout.cf.taste.example.kddcup;
+package org.apache.mahout.cf.taste.example.kddcup.track1;
 
 import java.util.Collection;
 import java.util.List;
@@ -24,20 +24,20 @@ import org.apache.mahout.cf.taste.common
 import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
 import org.apache.mahout.cf.taste.impl.similarity.CachingItemSimilarity;
-import org.apache.mahout.cf.taste.impl.similarity.UncenteredCosineSimilarity;
+import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.recommender.IDRescorer;
 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
 import org.apache.mahout.cf.taste.recommender.Recommender;
 import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
 
-public final class KDDCupRecommender implements Recommender {
+public final class Track1Recommender implements Recommender {
 
   private final Recommender recommender;
 
-  public KDDCupRecommender(DataModel dataModel) throws TasteException {
+  public Track1Recommender(DataModel dataModel) throws TasteException {
     // Change this to whatever you like!
-    ItemSimilarity similarity = new CachingItemSimilarity(new 
UncenteredCosineSimilarity(dataModel), dataModel);
+    ItemSimilarity similarity = new CachingItemSimilarity(new 
LogLikelihoodSimilarity(dataModel), dataModel);
     recommender = new GenericItemBasedRecommender(dataModel, similarity);
   }
   

Copied: 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderBuilder.java
 (from r1082143, 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommenderBuilder.java)
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderBuilder.java?p2=mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderBuilder.java&p1=mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommenderBuilder.java&r1=1082143&r2=1082997&rev=1082997&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommenderBuilder.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderBuilder.java
 Fri Mar 18 17:54:11 2011
@@ -15,18 +15,18 @@
  * limitations under the License.
  */
 
-package org.apache.mahout.cf.taste.example.kddcup;
+package org.apache.mahout.cf.taste.example.kddcup.track1;
 
 import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.recommender.Recommender;
 
-public final class KDDCupRecommenderBuilder implements RecommenderBuilder {
+final class Track1RecommenderBuilder implements RecommenderBuilder {
   
   @Override
   public Recommender buildRecommender(DataModel dataModel) throws 
TasteException {
-    return new KDDCupRecommender(dataModel);
+    return new Track1Recommender(dataModel);
   }
   
 }
\ No newline at end of file

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderEvaluatorRunner.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderEvaluatorRunner.java?rev=1082997&r1=1082996&r2=1082997&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderEvaluatorRunner.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderEvaluatorRunner.java
 Fri Mar 18 17:54:11 2011
@@ -24,7 +24,6 @@ import org.apache.commons.cli2.OptionExc
 import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.example.TasteOptionParser;
 import org.apache.mahout.cf.taste.example.kddcup.KDDCupDataModel;
-import org.apache.mahout.cf.taste.example.kddcup.KDDCupRecommenderBuilder;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -46,7 +45,7 @@ public final class Track1RecommenderEval
     }
     Track1RecommenderEvaluator evaluator = new 
Track1RecommenderEvaluator(dataFileDirectory);
     DataModel model = new 
KDDCupDataModel(KDDCupDataModel.getTrainingFile(dataFileDirectory));
-    double evaluation = evaluator.evaluate(new KDDCupRecommenderBuilder(),
+    double evaluation = evaluator.evaluate(new Track1RecommenderBuilder(),
       null,
       model,
       Float.NaN,

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Runner.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Runner.java?rev=1082997&r1=1082996&r2=1082997&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Runner.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Runner.java
 Fri Mar 18 17:54:11 2011
@@ -17,21 +17,26 @@
 
 package org.apache.mahout.cf.taste.example.kddcup.track1;
 
+import java.io.BufferedOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 
-import org.apache.mahout.cf.taste.common.NoSuchItemException;
 import org.apache.mahout.cf.taste.example.kddcup.DataFileIterable;
 import org.apache.mahout.cf.taste.example.kddcup.KDDCupDataModel;
-import org.apache.mahout.cf.taste.example.kddcup.KDDCupRecommender;
 import org.apache.mahout.cf.taste.model.PreferenceArray;
 import org.apache.mahout.common.Pair;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * <p>Runs "track 1" of the KDD Cup competition using whatever recommender is 
inside {@link KDDCupRecommender}
+ * <p>Runs "track 1" of the KDD Cup competition using whatever recommender is 
inside {@link Track1Recommender}
  * and attempts to output the result in the correct contest format.</p>
  *
  * <p>Run as: <code>Track1Runner [track 1 data file directory] [output 
file]</code></p>
@@ -50,45 +55,42 @@ public final class Track1Runner {
       throw new IllegalArgumentException("Bad data file directory: " + 
dataFileDirectory);
     }
 
+    long start = System.currentTimeMillis();
+
     KDDCupDataModel model = new 
KDDCupDataModel(KDDCupDataModel.getTrainingFile(dataFileDirectory));
-    KDDCupRecommender recommender = new KDDCupRecommender(model);
+    Track1Recommender recommender = new Track1Recommender(model);
 
-    File outFile = new File(args[1]);
-    OutputStream out = new FileOutputStream(outFile);
+    long end = System.currentTimeMillis();
+    log.info("Loaded model in {}s", (end - start) / 1000);
+    start = end;
 
+    Collection<Track1Callable> callables = new ArrayList<Track1Callable>();
     for (Pair<PreferenceArray,long[]> tests : new 
DataFileIterable(KDDCupDataModel.getTestFile(dataFileDirectory))) {
-
       PreferenceArray userTest = tests.getFirst();
-      long userID = userTest.get(0).getUserID();
-      for (int i = 0; i < userTest.length(); i++) {
-        long itemID = userTest.getItemID(i);
-        double estimate;
-        try {
-          estimate = recommender.estimatePreference(userID, itemID);
-        } catch (NoSuchItemException nsie) {
-          // OK in the sample data provided before the contest, should never 
happen otherwise
-          log.warn("Unknown item {}; OK unless this is the real contest data", 
itemID);
-          continue;
-        }
-
-        log.info("Estimate for user {}, item {}: ", new Object[] {userID, 
itemID, estimate});
-
-        int scaledEstimate = (int) ((estimate / 100.0) * 255.0);
-        if (scaledEstimate > 255) {
-          scaledEstimate = 255;
-        } else if (scaledEstimate < 0) {
-          scaledEstimate = 0;
-        }
-
-        out.write(scaledEstimate);
+      callables.add(new Track1Callable(recommender, userTest));
+    }
 
+    int cores = Runtime.getRuntime().availableProcessors();
+    log.info("Running on {} cores", cores);
+    ExecutorService executor = Executors.newFixedThreadPool(cores);
+    List<Future<byte[]>> results = executor.invokeAll(callables);
+    executor.shutdown();
+
+    end = System.currentTimeMillis();
+    log.info("Ran recommendations in {}s", (end - start) / 1000);
+    start = end;
+
+    OutputStream out = new BufferedOutputStream(new FileOutputStream(new 
File(args[1])));
+    for (Future<byte[]> result : results) {
+      for (byte estimate : result.get()) {
+        out.write(estimate);
       }
-
     }
-
+    out.flush();
     out.close();
 
+    end = System.currentTimeMillis();
+    log.info("Wrote output in {}s", (end - start) / 1000);
   }
 
-
 }

Added: 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Callable.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Callable.java?rev=1082997&view=auto
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Callable.java
 (added)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Callable.java
 Fri Mar 18 17:54:11 2011
@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.example.kddcup.track2;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.TreeMap;
+import java.util.concurrent.Callable;
+
+import org.apache.mahout.cf.taste.common.NoSuchItemException;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+final class Track2Callable implements Callable<UserResult> {
+
+  private static final Logger log = 
LoggerFactory.getLogger(Track2Callable.class);
+
+  private final Recommender recommender;
+  private final PreferenceArray userTest;
+
+  Track2Callable(Recommender recommender, PreferenceArray userTest) {
+    this.recommender = recommender;
+    this.userTest = userTest;
+  }
+
+  @Override
+  public UserResult call() throws TasteException {
+
+    int testSize = userTest.length();
+    if (testSize != 6) {
+      throw new IllegalArgumentException("Expecting 6 items for user but got " 
+ userTest);
+    }
+    long userID = userTest.get(0).getUserID();
+    TreeMap<Double,Long> estimateToItemID = new 
TreeMap<Double,Long>(Collections.reverseOrder());
+
+    for (int i = 0; i < testSize; i++) {
+      long itemID = userTest.getItemID(i);
+      double estimate;
+      try {
+        estimate = recommender.estimatePreference(userID, itemID);
+      } catch (NoSuchItemException nsie) {
+        // OK in the sample data provided before the contest, should never 
happen otherwise
+        log.warn("Unknown item {}; OK unless this is the real contest data", 
itemID);
+        continue;
+      }
+
+      if (!Double.isNaN(estimate)) {
+        estimateToItemID.put(estimate, itemID);
+      }
+    }
+
+    Collection<Long> itemIDs = estimateToItemID.values();
+    List<Long> topThree = new ArrayList<Long>(itemIDs);
+    if (topThree.size() > 3) {
+      topThree = topThree.subList(0, 3);
+    } else if (topThree.size() < 3) {
+      log.warn("Unable to recommend three items for {}", userID);
+      // Some NaNs - just guess at the rest then
+      Collection<Long> newItemIDs = new HashSet<Long>(3);
+      newItemIDs.addAll(itemIDs);
+      int i = 0;
+      while (i < testSize && newItemIDs.size() < 3) {
+        newItemIDs.add(userTest.getItemID(i));
+        i++;
+      }
+      topThree = new ArrayList<Long>(newItemIDs);
+    }
+    if (topThree.size() != 3) {
+      throw new IllegalStateException();
+    }
+
+    boolean[] result = new boolean[testSize];
+    for (int i = 0; i < testSize; i++) {
+      result[i] = topThree.contains(userTest.getItemID(i));
+    }
+    return new UserResult(userID, result);
+  }
+}

Copied: 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java
 (from r1082143, 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommender.java)
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java?p2=mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java&p1=mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommender.java&r1=1082143&r2=1082997&rev=1082997&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommender.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java
 Fri Mar 18 17:54:11 2011
@@ -15,30 +15,39 @@
  * limitations under the License.
  */
 
-package org.apache.mahout.cf.taste.example.kddcup;
+package org.apache.mahout.cf.taste.example.kddcup.track2;
 
+import java.io.File;
+import java.io.IOException;
 import java.util.Collection;
 import java.util.List;
 
 import org.apache.mahout.cf.taste.common.Refreshable;
 import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.example.kddcup.KDDCupDataModel;
+import 
org.apache.mahout.cf.taste.impl.recommender.GenericBooleanPrefItemBasedRecommender;
 import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
 import org.apache.mahout.cf.taste.impl.similarity.CachingItemSimilarity;
-import org.apache.mahout.cf.taste.impl.similarity.UncenteredCosineSimilarity;
+import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.recommender.IDRescorer;
 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
 import org.apache.mahout.cf.taste.recommender.Recommender;
 import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
 
-public final class KDDCupRecommender implements Recommender {
+public final class Track2Recommender implements Recommender {
 
   private final Recommender recommender;
 
-  public KDDCupRecommender(DataModel dataModel) throws TasteException {
+  public Track2Recommender(DataModel dataModel, File dataFileDirectory) throws 
TasteException {
     // Change this to whatever you like!
-    ItemSimilarity similarity = new CachingItemSimilarity(new 
UncenteredCosineSimilarity(dataModel), dataModel);
-    recommender = new GenericItemBasedRecommender(dataModel, similarity);
+    ItemSimilarity similarity;
+    try {
+      similarity = new TrackItemSimilarity(dataFileDirectory);
+    } catch (IOException ioe) {
+      throw new TasteException(ioe);
+    }
+    recommender = new GenericBooleanPrefItemBasedRecommender(dataModel, 
similarity);
   }
   
   @Override

Copied: 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2RecommenderBuilder.java
 (from r1082143, 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommenderBuilder.java)
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2RecommenderBuilder.java?p2=mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2RecommenderBuilder.java&p1=mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommenderBuilder.java&r1=1082143&r2=1082997&rev=1082997&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommenderBuilder.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2RecommenderBuilder.java
 Fri Mar 18 17:54:11 2011
@@ -15,18 +15,19 @@
  * limitations under the License.
  */
 
-package org.apache.mahout.cf.taste.example.kddcup;
+package org.apache.mahout.cf.taste.example.kddcup.track2;
 
 import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
+import org.apache.mahout.cf.taste.example.kddcup.KDDCupDataModel;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.recommender.Recommender;
 
-public final class KDDCupRecommenderBuilder implements RecommenderBuilder {
+final class Track2RecommenderBuilder implements RecommenderBuilder {
   
   @Override
   public Recommender buildRecommender(DataModel dataModel) throws 
TasteException {
-    return new KDDCupRecommender(dataModel);
+    return new Track2Recommender(dataModel, ((KDDCupDataModel) 
dataModel).getDataFileDirectory());
   }
   
 }
\ No newline at end of file

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Runner.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Runner.java?rev=1082997&r1=1082996&r2=1082997&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Runner.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Runner.java
 Fri Mar 18 17:54:11 2011
@@ -17,26 +17,26 @@
 
 package org.apache.mahout.cf.taste.example.kddcup.track2;
 
+import java.io.BufferedOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.List;
-import java.util.TreeMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 
-import org.apache.mahout.cf.taste.common.NoSuchItemException;
 import org.apache.mahout.cf.taste.example.kddcup.DataFileIterable;
 import org.apache.mahout.cf.taste.example.kddcup.KDDCupDataModel;
-import org.apache.mahout.cf.taste.example.kddcup.KDDCupRecommender;
 import org.apache.mahout.cf.taste.model.PreferenceArray;
 import org.apache.mahout.common.Pair;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * <p>Runs "track 2" of the KDD Cup competition using whatever recommender is 
inside {@link KDDCupRecommender}
+ * <p>Runs "track 2" of the KDD Cup competition using whatever recommender is 
inside {@link Track2Recommender}
  * and attempts to output the result in the correct contest format.</p>
  *
  * <p>Run as: <code>Track2Runner [track 2 data file directory] [output 
file]</code></p>
@@ -55,60 +55,47 @@ public final class Track2Runner {
       throw new IllegalArgumentException("Bad data file directory: " + 
dataFileDirectory);
     }
 
+    long start = System.currentTimeMillis();
+
     KDDCupDataModel model = new 
KDDCupDataModel(KDDCupDataModel.getTrainingFile(dataFileDirectory));
-    KDDCupRecommender recommender = new KDDCupRecommender(model);
+    Track2Recommender recommender = new Track2Recommender(model, 
dataFileDirectory);
 
-    File outFile = new File(args[1]);
-    OutputStream out = new FileOutputStream(outFile);
+    long end = System.currentTimeMillis();
+    log.info("Loaded model in {}s", (end - start) / 1000);
+    start = end;
 
+    Collection<Track2Callable> callables = new ArrayList<Track2Callable>();
     for (Pair<PreferenceArray,long[]> tests : new 
DataFileIterable(KDDCupDataModel.getTestFile(dataFileDirectory))) {
-
       PreferenceArray userTest = tests.getFirst();
-      if (userTest.length() != 6) {
-        throw new IllegalArgumentException("Expecting 6 items for user but got 
" + userTest);
-      }
-      long userID = userTest.get(0).getUserID();
-
-      TreeMap<Double,Long> estimateToItemID = new 
TreeMap<Double,Long>(Collections.reverseOrder());
-
-      for (int i = 0; i < userTest.length(); i++) {
-        long itemID = userTest.getItemID(i);
-        double estimate;
-        try {
-          estimate = recommender.estimatePreference(userID, itemID);
-        } catch (NoSuchItemException nsie) {
-          // OK in the sample data provided before the contest, should never 
happen otherwise
-          log.warn("Unknown item {}; OK unless this is the real contest data", 
itemID);
-          continue;
-        }
-
-        log.debug("Estimate for user {}, item {}: {}", new Object[] {userID, 
itemID, estimate});
-        if (!Double.isNaN(estimate)) {
-          estimateToItemID.put(estimate, itemID);
-        }
-      }
-
-      Collection<Long> itemIDs = estimateToItemID.values();
-      log.debug("Scores are {}", itemIDs);
-      List<Long> topThree = new ArrayList<Long>(itemIDs);
-      if (topThree.size() > 3) {
-        topThree = topThree.subList(0, 3);
-      }
-      log.debug("Top three are {}", topThree);
+      callables.add(new Track2Callable(recommender, userTest));
+    }
 
-      for (int i = 0; i < userTest.length(); i++) {
-        long itemID = userTest.getItemID(i);
-        if (topThree.contains(itemID)) {
-          out.write('1');
-        } else {
-          out.write('0');
-        }
+    int cores = Runtime.getRuntime().availableProcessors();
+    log.info("Running on {} cores", cores);
+    ExecutorService executor = Executors.newFixedThreadPool(cores);
+    List<Future<UserResult>> futures = executor.invokeAll(callables);
+    executor.shutdown();
+
+    end = System.currentTimeMillis();
+    log.info("Ran recommendations in {}s", (end - start) / 1000);
+    start = end;
+
+    OutputStream out = new BufferedOutputStream(new FileOutputStream(new 
File(args[1])));
+    long lastUserID = Long.MIN_VALUE;
+    for (Future<UserResult> future : futures) {
+      UserResult result = future.get();
+      long userID = result.getUserID();
+      if (userID <= lastUserID) {
+        throw new IllegalStateException();
       }
-
+      lastUserID = userID;
+      out.write(result.getResultBytes());
     }
-
+    out.flush();
     out.close();
 
+    end = System.currentTimeMillis();
+    log.info("Wrote output in {}s", (end - start) / 1000);
   }
 
 }

Added: 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackData.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackData.java?rev=1082997&view=auto
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackData.java
 (added)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackData.java
 Fri Mar 18 17:54:11 2011
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.example.kddcup.track2;
+
+import java.util.regex.Pattern;
+
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+
+final class TrackData {
+
+  private static final Pattern PIPE = Pattern.compile("\\|");
+  private static final String NO_VALUE = "None";
+  static final long NO_VALUE_ID = Long.MIN_VALUE;
+  private static final FastIDSet NO_GENRES = new FastIDSet();
+
+  private final long trackID;
+  private final long albumID;
+  private final long artistID;
+  private final FastIDSet genreIDs;
+
+  TrackData(CharSequence line) {
+    String[] tokens = PIPE.split(line);
+    trackID = Long.parseLong(tokens[0]);
+    albumID = parse(tokens[1]);
+    artistID = parse(tokens[2]);
+    if (tokens.length > 3) {
+      genreIDs = new FastIDSet(tokens.length - 3);
+      for (int i = 3; i < tokens.length; i++) {
+        genreIDs.add(Long.parseLong(tokens[i]));
+      }
+    } else {
+      genreIDs = NO_GENRES;
+    }
+  }
+
+  private static long parse(String value) {
+    return NO_VALUE.equals(value) ? NO_VALUE_ID : Long.parseLong(value);
+  }
+
+  public long getTrackID() {
+    return trackID;
+  }
+
+  public long getAlbumID() {
+    return albumID;
+  }
+
+  public long getArtistID() {
+    return artistID;
+  }
+
+  public FastIDSet getGenreIDs() {
+    return genreIDs;
+  }
+
+}

Added: 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackItemSimilarity.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackItemSimilarity.java?rev=1082997&view=auto
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackItemSimilarity.java
 (added)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackItemSimilarity.java
 Fri Mar 18 17:54:11 2011
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.example.kddcup.track2;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.example.kddcup.KDDCupDataModel;
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
+import org.apache.mahout.common.FileLineIterable;
+
+final class TrackItemSimilarity implements ItemSimilarity {
+
+  private final FastByIDMap<TrackData> trackData;
+
+  TrackItemSimilarity(File dataFileDirectory) throws IOException {
+    trackData = new FastByIDMap<TrackData>();
+    for (String line : new 
FileLineIterable(KDDCupDataModel.getTrackFile(dataFileDirectory))) {
+      TrackData trackDatum = new TrackData(line);
+      trackData.put(trackDatum.getTrackID(), trackDatum);
+    }
+  }
+
+  @Override
+  public double itemSimilarity(long itemID1, long itemID2) {
+    if (itemID1 == itemID2) {
+      return 1.0;
+    }
+    TrackData data1 = trackData.get(itemID1);
+    TrackData data2 = trackData.get(itemID2);
+    if (data1 == null || data2 == null) {
+      return 0.0;
+    }
+
+    // Arbitrarily decide that same album means "very similar"
+    if (data1.getAlbumID() != TrackData.NO_VALUE_ID && data1.getAlbumID() == 
data2.getAlbumID()) {
+      return 0.9;
+    }
+    // ... and same artist means "fairly similar"
+    if (data1.getArtistID() != TrackData.NO_VALUE_ID && data1.getArtistID() == 
data2.getArtistID()) {
+      return 0.7;
+    }
+
+    // Tanimoto coefficient similarity based on genre, but maximum value of 
0.25
+    FastIDSet genres1 = data1.getGenreIDs();
+    FastIDSet genres2 = data2.getGenreIDs();
+    if (genres1 == null || genres2 == null) {
+      return 0.0;
+    }
+    int intersectionSize = genres1.intersectionSize(genres2);
+    if (intersectionSize == 0) {
+      return 0.0;
+    }
+    int unionSize = genres1.size() + genres2.size() - intersectionSize;
+    return (double) intersectionSize / (4.0 * unionSize);
+  }
+
+  @Override
+  public double[] itemSimilarities(long itemID1, long[] itemID2s) {
+    int length = itemID2s.length;
+    double[] result = new double[length];
+    for (int i = 0; i < length; i++) {
+      result[i] = itemSimilarity(itemID1, itemID2s[i]);
+    }
+    return result;
+  }
+
+  @Override
+  public void refresh(Collection<Refreshable> alreadyRefreshed) {
+    // do nothing
+  }
+
+}

Added: 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/UserResult.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/UserResult.java?rev=1082997&view=auto
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/UserResult.java
 (added)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/UserResult.java
 Fri Mar 18 17:54:11 2011
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.example.kddcup.track2;
+
+final class UserResult {
+
+  private final long userID;
+  private final byte[] resultBytes;
+
+  UserResult(long userID, boolean[] result) {
+
+    this.userID = userID;
+
+    int trueCount = 0;
+    for (boolean b : result) {
+      if (b) {
+        trueCount++;
+      }
+    }
+    if (trueCount != 3) {
+      throw new IllegalStateException();
+    }
+
+    resultBytes = new byte[result.length];
+    for (int i = 0; i < result.length; i++) {
+      resultBytes[i] = (byte) (result[i] ? '1' : '0');
+    }
+  }
+
+  public long getUserID() {
+    return userID;
+  }
+
+  public byte[] getResultBytes() {
+    return resultBytes;
+  }
+
+
+}


Reply via email to