Author: srowen
Date: Sun Mar 20 15:27:02 2011
New Revision: 1083467
URL: http://svn.apache.org/viewvc?rev=1083467&view=rev
Log:
Last of first round of changes for KDD Cup example code
Added:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/ToCSV.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/HybridSimilarity.java
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Callable.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Callable.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackItemSimilarity.java
Added:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/ToCSV.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/ToCSV.java?rev=1083467&view=auto
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/ToCSV.java
(added)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/ToCSV.java
Sun Mar 20 15:27:02 2011
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.example.kddcup;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.nio.charset.Charset;
+import java.util.zip.GZIPOutputStream;
+
+
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.common.Pair;
+
+/**
+ * <p>This class converts a KDD Cup input file into a compressed CSV format.
The output format is
+ * <code>userID,itemID,score,timestamp</code>. It can optionally restrict its
output to exclude
+ * score and/or timestamp.</p>
+ *
+ * <p>Run as: <code>ToCSV (input file) (output file) [num columns to
output]</p>
+ */
+public final class ToCSV {
+
+ private ToCSV() {
+ }
+
+ public static void main(String[] args) throws Exception {
+
+ File inputFile = new File(args[0]);
+ File outputFile = new File(args[1]);
+ int columnsToOutput = 4;
+ if (args.length >= 3) {
+ columnsToOutput = Integer.parseInt(args[2]);
+ }
+
+ OutputStream outStream = new GZIPOutputStream(new
FileOutputStream(outputFile));
+ Writer outWriter = new BufferedWriter(new OutputStreamWriter(outStream,
Charset.forName("UTF-8")));
+
+ for (Pair<PreferenceArray,long[]> user : new DataFileIterable(inputFile)) {
+ PreferenceArray prefs = user.getFirst();
+ long[] timestamps = user.getSecond();
+ for (int i = 0; i < prefs.length(); i++) {
+ outWriter.write(String.valueOf(prefs.getUserID(i)));
+ outWriter.write(',');
+ outWriter.write(String.valueOf(prefs.getItemID(i)));
+ if (columnsToOutput > 2) {
+ outWriter.write(',');
+ outWriter.write(String.valueOf(prefs.getValue(i)));
+ }
+ if (columnsToOutput > 3) {
+ outWriter.write(',');
+ outWriter.write(String.valueOf(timestamps[i]));
+ }
+ outWriter.write('\n');
+ }
+ }
+ outWriter.flush();
+ outWriter.close();
+ }
+
+}
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Callable.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Callable.java?rev=1083467&r1=1083466&r2=1083467&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Callable.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Callable.java
Sun Mar 20 15:27:02 2011
@@ -18,6 +18,7 @@
package org.apache.mahout.cf.taste.example.kddcup.track1;
import java.util.concurrent.Callable;
+import java.util.concurrent.atomic.AtomicInteger;
import org.apache.mahout.cf.taste.common.NoSuchItemException;
import org.apache.mahout.cf.taste.common.TasteException;
@@ -29,6 +30,7 @@ import org.slf4j.LoggerFactory;
final class Track1Callable implements Callable<byte[]> {
private static final Logger log =
LoggerFactory.getLogger(Track1Callable.class);
+ private static final AtomicInteger COUNT = new AtomicInteger();
private final Recommender recommender;
private final PreferenceArray userTest;
@@ -53,15 +55,24 @@ final class Track1Callable implements Ca
continue;
}
- int scaledEstimate = (int) (estimate * 2.55);
- if (scaledEstimate > 255) {
- scaledEstimate = 255;
- } else if (scaledEstimate < 0) {
- scaledEstimate = 0;
+ if (Double.isNaN(estimate)) {
+ log.warn("Unable to compute estimate for user {}, item {}", userID,
itemID);
+ result[i] = 0x7F;
+ } else {
+ int scaledEstimate = (int) (estimate * 2.55);
+ if (scaledEstimate > 255) {
+ scaledEstimate = 255;
+ } else if (scaledEstimate < 0) {
+ scaledEstimate = 0;
+ }
+ result[i] = (byte) scaledEstimate;
}
+ }
- result[i] = (byte) scaledEstimate;
+ if (COUNT.incrementAndGet() % 10000 == 0) {
+ log.info("Completed {} users", COUNT.get());
}
+
return result;
}
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java?rev=1083467&r1=1083466&r2=1083467&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java
Sun Mar 20 15:27:02 2011
@@ -25,6 +25,7 @@ import org.apache.mahout.cf.taste.common
import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.CachingItemSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
+import org.apache.mahout.cf.taste.impl.similarity.UncenteredCosineSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.IDRescorer;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
@@ -37,7 +38,7 @@ public final class Track1Recommender imp
public Track1Recommender(DataModel dataModel) throws TasteException {
// Change this to whatever you like!
- ItemSimilarity similarity = new CachingItemSimilarity(new
LogLikelihoodSimilarity(dataModel), dataModel);
+ ItemSimilarity similarity = new UncenteredCosineSimilarity(dataModel);
recommender = new GenericItemBasedRecommender(dataModel, similarity);
}
Added:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/HybridSimilarity.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/HybridSimilarity.java?rev=1083467&view=auto
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/HybridSimilarity.java
(added)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/HybridSimilarity.java
Sun Mar 20 15:27:02 2011
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.example.kddcup.track2;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
+
+final class HybridSimilarity implements ItemSimilarity {
+
+ private final ItemSimilarity cfSimilarity;
+ private final ItemSimilarity contentSimilarity;
+
+ HybridSimilarity(DataModel dataModel, File dataFileDirectory) throws
IOException {
+ cfSimilarity = new LogLikelihoodSimilarity(dataModel);
+ contentSimilarity = new TrackItemSimilarity(dataFileDirectory);
+ }
+
+ @Override
+ public double itemSimilarity(long itemID1, long itemID2) throws
TasteException {
+ return contentSimilarity.itemSimilarity(itemID1, itemID2) *
cfSimilarity.itemSimilarity(itemID1, itemID2);
+ }
+
+ @Override
+ public double[] itemSimilarities(long itemID1, long[] itemID2s) throws
TasteException {
+ double[] result = contentSimilarity.itemSimilarities(itemID1, itemID2s);
+ double[] multipliers = cfSimilarity.itemSimilarities(itemID1, itemID2s);
+ for (int i = 0; i < result.length; i++) {
+ result[i] *= multipliers[i];
+ }
+ return result;
+ }
+
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {
+ cfSimilarity.refresh(alreadyRefreshed);
+ }
+
+}
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Callable.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Callable.java?rev=1083467&r1=1083466&r2=1083467&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Callable.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Callable.java
Sun Mar 20 15:27:02 2011
@@ -24,6 +24,7 @@ import java.util.HashSet;
import java.util.List;
import java.util.TreeMap;
import java.util.concurrent.Callable;
+import java.util.concurrent.atomic.AtomicInteger;
import org.apache.mahout.cf.taste.common.NoSuchItemException;
import org.apache.mahout.cf.taste.common.TasteException;
@@ -35,6 +36,7 @@ import org.slf4j.LoggerFactory;
final class Track2Callable implements Callable<UserResult> {
private static final Logger log =
LoggerFactory.getLogger(Track2Callable.class);
+ private static final AtomicInteger COUNT = new AtomicInteger();
private final Recommender recommender;
private final PreferenceArray userTest;
@@ -94,6 +96,11 @@ final class Track2Callable implements Ca
for (int i = 0; i < testSize; i++) {
result[i] = topThree.contains(userTest.getItemID(i));
}
+
+ if (COUNT.incrementAndGet() % 1000 == 0) {
+ log.info("Completed {} users", COUNT.get());
+ }
+
return new UserResult(userID, result);
}
}
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java?rev=1083467&r1=1083466&r2=1083467&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java
Sun Mar 20 15:27:02 2011
@@ -24,11 +24,7 @@ import java.util.List;
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.example.kddcup.KDDCupDataModel;
import
org.apache.mahout.cf.taste.impl.recommender.GenericBooleanPrefItemBasedRecommender;
-import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
-import org.apache.mahout.cf.taste.impl.similarity.CachingItemSimilarity;
-import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.IDRescorer;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
@@ -43,7 +39,7 @@ public final class Track2Recommender imp
// Change this to whatever you like!
ItemSimilarity similarity;
try {
- similarity = new TrackItemSimilarity(dataFileDirectory);
+ similarity = new HybridSimilarity(dataModel, dataFileDirectory);
} catch (IOException ioe) {
throw new TasteException(ioe);
}
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackItemSimilarity.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackItemSimilarity.java?rev=1083467&r1=1083466&r2=1083467&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackItemSimilarity.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackItemSimilarity.java
Sun Mar 20 15:27:02 2011
@@ -22,7 +22,6 @@ import java.io.IOException;
import java.util.Collection;
import org.apache.mahout.cf.taste.common.Refreshable;
-import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.example.kddcup.KDDCupDataModel;
import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;