Author: srowen
Date: Fri Mar 18 17:54:11 2011
New Revision: 1082997
URL: http://svn.apache.org/viewvc?rev=1082997&view=rev
Log:
Iterate more on KDD Cup support code now that I've tried it a bit and see it
could be better
Added:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Callable.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java
- copied, changed from r1082143,
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommender.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderBuilder.java
- copied, changed from r1082143,
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommenderBuilder.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Callable.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java
- copied, changed from r1082143,
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommender.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2RecommenderBuilder.java
- copied, changed from r1082143,
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommenderBuilder.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackData.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackItemSimilarity.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/UserResult.java
Removed:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommender.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommenderBuilder.java
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupDataModel.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderEvaluatorRunner.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Runner.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Runner.java
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupDataModel.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupDataModel.java?rev=1082997&r1=1082996&r2=1082997&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupDataModel.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupDataModel.java
Fri Mar 18 17:54:11 2011
@@ -33,6 +33,8 @@ import org.apache.mahout.cf.taste.model.
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.iterator.SamplingIterator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* <p>An {@link DataModel} which reads into memory any of the KDD Cup's rating
files; it is really
@@ -44,6 +46,9 @@ import org.apache.mahout.common.iterator
*/
public final class KDDCupDataModel implements DataModel {
+ private static final Logger log =
LoggerFactory.getLogger(KDDCupDataModel.class);
+
+ private final File dataFileDirectory;
private final DataModel delegate;
/**
@@ -62,6 +67,8 @@ public final class KDDCupDataModel imple
Preconditions.checkArgument(!Double.isNaN(samplingRate) && samplingRate >
0.0 && samplingRate <= 1.0);
+ dataFileDirectory = dataFile.getParentFile();
+
Iterator<Pair<PreferenceArray,long[]>> dataIterator = new
DataFileIterator(dataFile);
if (samplingRate < 1.0) {
dataIterator = new
SamplingIterator<Pair<PreferenceArray,long[]>>(dataIterator, samplingRate);
@@ -95,19 +102,28 @@ public final class KDDCupDataModel imple
delegate = new GenericDataModel(userData);
}
+ Runtime runtime = Runtime.getRuntime();
+ log.info("Loaded data model in about {}MB heap", (runtime.totalMemory() -
runtime.freeMemory()) / 1000000);
}
+ public File getDataFileDirectory() {
+ return dataFileDirectory;
+ }
public static File getTrainingFile(File dataFileDirectory) {
- return getFile(dataFileDirectory, "train");
+ return getFile(dataFileDirectory, "trainIdx");
}
public static File getValidationFile(File dataFileDirectory) {
- return getFile(dataFileDirectory, "validation");
+ return getFile(dataFileDirectory, "validationIdx");
}
public static File getTestFile(File dataFileDirectory) {
- return getFile(dataFileDirectory, "test");
+ return getFile(dataFileDirectory, "testIdx");
+ }
+
+ public static File getTrackFile(File dataFileDirectory) {
+ return getFile(dataFileDirectory, "trackData");
}
private static File getFile(File dataFileDirectory, String prefix) {
@@ -115,9 +131,11 @@ public final class KDDCupDataModel imple
for (int set : new int[] {1,2}) {
// Works on sample data from before contest or real data
for (String firstLinesOrNot : new String[] {"", ".firstLines"}) {
- File dataFile = new File(dataFileDirectory, prefix + "Idx" + set +
firstLinesOrNot + ".txt");
- if (dataFile.exists()) {
- return dataFile;
+ for (String gzippedOrNot : new String[] {".gz", ""}) {
+ File dataFile = new File(dataFileDirectory, prefix + set +
firstLinesOrNot + ".txt" + gzippedOrNot);
+ if (dataFile.exists()) {
+ return dataFile;
+ }
}
}
}
Added:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Callable.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Callable.java?rev=1082997&view=auto
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Callable.java
(added)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Callable.java
Fri Mar 18 17:54:11 2011
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.example.kddcup.track1;
+
+import java.util.concurrent.Callable;
+
+import org.apache.mahout.cf.taste.common.NoSuchItemException;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+final class Track1Callable implements Callable<byte[]> {
+
+ private static final Logger log =
LoggerFactory.getLogger(Track1Callable.class);
+
+ private final Recommender recommender;
+ private final PreferenceArray userTest;
+
+ Track1Callable(Recommender recommender, PreferenceArray userTest) {
+ this.recommender = recommender;
+ this.userTest = userTest;
+ }
+
+ @Override
+ public byte[] call() throws TasteException {
+ long userID = userTest.get(0).getUserID();
+ byte[] result = new byte[userTest.length()];
+ for (int i = 0; i < userTest.length(); i++) {
+ long itemID = userTest.getItemID(i);
+ double estimate;
+ try {
+ estimate = recommender.estimatePreference(userID, itemID);
+ } catch (NoSuchItemException nsie) {
+ // OK in the sample data provided before the contest, should never
happen otherwise
+ log.warn("Unknown item {}; OK unless this is the real contest data",
itemID);
+ continue;
+ }
+
+ int scaledEstimate = (int) (estimate * 2.55);
+ if (scaledEstimate > 255) {
+ scaledEstimate = 255;
+ } else if (scaledEstimate < 0) {
+ scaledEstimate = 0;
+ }
+
+ result[i] = (byte) scaledEstimate;
+ }
+ return result;
+ }
+
+}
Copied:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java
(from r1082143,
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommender.java)
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java?p2=mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java&p1=mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommender.java&r1=1082143&r2=1082997&rev=1082997&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommender.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java
Fri Mar 18 17:54:11 2011
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.mahout.cf.taste.example.kddcup;
+package org.apache.mahout.cf.taste.example.kddcup.track1;
import java.util.Collection;
import java.util.List;
@@ -24,20 +24,20 @@ import org.apache.mahout.cf.taste.common
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.CachingItemSimilarity;
-import org.apache.mahout.cf.taste.impl.similarity.UncenteredCosineSimilarity;
+import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.IDRescorer;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
-public final class KDDCupRecommender implements Recommender {
+public final class Track1Recommender implements Recommender {
private final Recommender recommender;
- public KDDCupRecommender(DataModel dataModel) throws TasteException {
+ public Track1Recommender(DataModel dataModel) throws TasteException {
// Change this to whatever you like!
- ItemSimilarity similarity = new CachingItemSimilarity(new
UncenteredCosineSimilarity(dataModel), dataModel);
+ ItemSimilarity similarity = new CachingItemSimilarity(new
LogLikelihoodSimilarity(dataModel), dataModel);
recommender = new GenericItemBasedRecommender(dataModel, similarity);
}
Copied:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderBuilder.java
(from r1082143,
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommenderBuilder.java)
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderBuilder.java?p2=mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderBuilder.java&p1=mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommenderBuilder.java&r1=1082143&r2=1082997&rev=1082997&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommenderBuilder.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderBuilder.java
Fri Mar 18 17:54:11 2011
@@ -15,18 +15,18 @@
* limitations under the License.
*/
-package org.apache.mahout.cf.taste.example.kddcup;
+package org.apache.mahout.cf.taste.example.kddcup.track1;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.Recommender;
-public final class KDDCupRecommenderBuilder implements RecommenderBuilder {
+final class Track1RecommenderBuilder implements RecommenderBuilder {
@Override
public Recommender buildRecommender(DataModel dataModel) throws
TasteException {
- return new KDDCupRecommender(dataModel);
+ return new Track1Recommender(dataModel);
}
}
\ No newline at end of file
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderEvaluatorRunner.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderEvaluatorRunner.java?rev=1082997&r1=1082996&r2=1082997&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderEvaluatorRunner.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1RecommenderEvaluatorRunner.java
Fri Mar 18 17:54:11 2011
@@ -24,7 +24,6 @@ import org.apache.commons.cli2.OptionExc
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.example.TasteOptionParser;
import org.apache.mahout.cf.taste.example.kddcup.KDDCupDataModel;
-import org.apache.mahout.cf.taste.example.kddcup.KDDCupRecommenderBuilder;
import org.apache.mahout.cf.taste.model.DataModel;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -46,7 +45,7 @@ public final class Track1RecommenderEval
}
Track1RecommenderEvaluator evaluator = new
Track1RecommenderEvaluator(dataFileDirectory);
DataModel model = new
KDDCupDataModel(KDDCupDataModel.getTrainingFile(dataFileDirectory));
- double evaluation = evaluator.evaluate(new KDDCupRecommenderBuilder(),
+ double evaluation = evaluator.evaluate(new Track1RecommenderBuilder(),
null,
model,
Float.NaN,
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Runner.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Runner.java?rev=1082997&r1=1082996&r2=1082997&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Runner.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Runner.java
Fri Mar 18 17:54:11 2011
@@ -17,21 +17,26 @@
package org.apache.mahout.cf.taste.example.kddcup.track1;
+import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
-import org.apache.mahout.cf.taste.common.NoSuchItemException;
import org.apache.mahout.cf.taste.example.kddcup.DataFileIterable;
import org.apache.mahout.cf.taste.example.kddcup.KDDCupDataModel;
-import org.apache.mahout.cf.taste.example.kddcup.KDDCupRecommender;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.common.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * <p>Runs "track 1" of the KDD Cup competition using whatever recommender is
inside {@link KDDCupRecommender}
+ * <p>Runs "track 1" of the KDD Cup competition using whatever recommender is
inside {@link Track1Recommender}
* and attempts to output the result in the correct contest format.</p>
*
* <p>Run as: <code>Track1Runner [track 1 data file directory] [output
file]</code></p>
@@ -50,45 +55,42 @@ public final class Track1Runner {
throw new IllegalArgumentException("Bad data file directory: " +
dataFileDirectory);
}
+ long start = System.currentTimeMillis();
+
KDDCupDataModel model = new
KDDCupDataModel(KDDCupDataModel.getTrainingFile(dataFileDirectory));
- KDDCupRecommender recommender = new KDDCupRecommender(model);
+ Track1Recommender recommender = new Track1Recommender(model);
- File outFile = new File(args[1]);
- OutputStream out = new FileOutputStream(outFile);
+ long end = System.currentTimeMillis();
+ log.info("Loaded model in {}s", (end - start) / 1000);
+ start = end;
+ Collection<Track1Callable> callables = new ArrayList<Track1Callable>();
for (Pair<PreferenceArray,long[]> tests : new
DataFileIterable(KDDCupDataModel.getTestFile(dataFileDirectory))) {
-
PreferenceArray userTest = tests.getFirst();
- long userID = userTest.get(0).getUserID();
- for (int i = 0; i < userTest.length(); i++) {
- long itemID = userTest.getItemID(i);
- double estimate;
- try {
- estimate = recommender.estimatePreference(userID, itemID);
- } catch (NoSuchItemException nsie) {
- // OK in the sample data provided before the contest, should never
happen otherwise
- log.warn("Unknown item {}; OK unless this is the real contest data",
itemID);
- continue;
- }
-
- log.info("Estimate for user {}, item {}: ", new Object[] {userID,
itemID, estimate});
-
- int scaledEstimate = (int) ((estimate / 100.0) * 255.0);
- if (scaledEstimate > 255) {
- scaledEstimate = 255;
- } else if (scaledEstimate < 0) {
- scaledEstimate = 0;
- }
-
- out.write(scaledEstimate);
+ callables.add(new Track1Callable(recommender, userTest));
+ }
+ int cores = Runtime.getRuntime().availableProcessors();
+ log.info("Running on {} cores", cores);
+ ExecutorService executor = Executors.newFixedThreadPool(cores);
+ List<Future<byte[]>> results = executor.invokeAll(callables);
+ executor.shutdown();
+
+ end = System.currentTimeMillis();
+ log.info("Ran recommendations in {}s", (end - start) / 1000);
+ start = end;
+
+ OutputStream out = new BufferedOutputStream(new FileOutputStream(new
File(args[1])));
+ for (Future<byte[]> result : results) {
+ for (byte estimate : result.get()) {
+ out.write(estimate);
}
-
}
-
+ out.flush();
out.close();
+ end = System.currentTimeMillis();
+ log.info("Wrote output in {}s", (end - start) / 1000);
}
-
}
Added:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Callable.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Callable.java?rev=1082997&view=auto
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Callable.java
(added)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Callable.java
Fri Mar 18 17:54:11 2011
@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.example.kddcup.track2;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.TreeMap;
+import java.util.concurrent.Callable;
+
+import org.apache.mahout.cf.taste.common.NoSuchItemException;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+final class Track2Callable implements Callable<UserResult> {
+
+ private static final Logger log =
LoggerFactory.getLogger(Track2Callable.class);
+
+ private final Recommender recommender;
+ private final PreferenceArray userTest;
+
+ Track2Callable(Recommender recommender, PreferenceArray userTest) {
+ this.recommender = recommender;
+ this.userTest = userTest;
+ }
+
+ @Override
+ public UserResult call() throws TasteException {
+
+ int testSize = userTest.length();
+ if (testSize != 6) {
+ throw new IllegalArgumentException("Expecting 6 items for user but got "
+ userTest);
+ }
+ long userID = userTest.get(0).getUserID();
+ TreeMap<Double,Long> estimateToItemID = new
TreeMap<Double,Long>(Collections.reverseOrder());
+
+ for (int i = 0; i < testSize; i++) {
+ long itemID = userTest.getItemID(i);
+ double estimate;
+ try {
+ estimate = recommender.estimatePreference(userID, itemID);
+ } catch (NoSuchItemException nsie) {
+ // OK in the sample data provided before the contest, should never
happen otherwise
+ log.warn("Unknown item {}; OK unless this is the real contest data",
itemID);
+ continue;
+ }
+
+ if (!Double.isNaN(estimate)) {
+ estimateToItemID.put(estimate, itemID);
+ }
+ }
+
+ Collection<Long> itemIDs = estimateToItemID.values();
+ List<Long> topThree = new ArrayList<Long>(itemIDs);
+ if (topThree.size() > 3) {
+ topThree = topThree.subList(0, 3);
+ } else if (topThree.size() < 3) {
+ log.warn("Unable to recommend three items for {}", userID);
+ // Some NaNs - just guess at the rest then
+ Collection<Long> newItemIDs = new HashSet<Long>(3);
+ newItemIDs.addAll(itemIDs);
+ int i = 0;
+ while (i < testSize && newItemIDs.size() < 3) {
+ newItemIDs.add(userTest.getItemID(i));
+ i++;
+ }
+ topThree = new ArrayList<Long>(newItemIDs);
+ }
+ if (topThree.size() != 3) {
+ throw new IllegalStateException();
+ }
+
+ boolean[] result = new boolean[testSize];
+ for (int i = 0; i < testSize; i++) {
+ result[i] = topThree.contains(userTest.getItemID(i));
+ }
+ return new UserResult(userID, result);
+ }
+}
Copied:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java
(from r1082143,
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommender.java)
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java?p2=mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java&p1=mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommender.java&r1=1082143&r2=1082997&rev=1082997&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommender.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java
Fri Mar 18 17:54:11 2011
@@ -15,30 +15,39 @@
* limitations under the License.
*/
-package org.apache.mahout.cf.taste.example.kddcup;
+package org.apache.mahout.cf.taste.example.kddcup.track2;
+import java.io.File;
+import java.io.IOException;
import java.util.Collection;
import java.util.List;
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.example.kddcup.KDDCupDataModel;
+import
org.apache.mahout.cf.taste.impl.recommender.GenericBooleanPrefItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.CachingItemSimilarity;
-import org.apache.mahout.cf.taste.impl.similarity.UncenteredCosineSimilarity;
+import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.IDRescorer;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
-public final class KDDCupRecommender implements Recommender {
+public final class Track2Recommender implements Recommender {
private final Recommender recommender;
- public KDDCupRecommender(DataModel dataModel) throws TasteException {
+ public Track2Recommender(DataModel dataModel, File dataFileDirectory) throws
TasteException {
// Change this to whatever you like!
- ItemSimilarity similarity = new CachingItemSimilarity(new
UncenteredCosineSimilarity(dataModel), dataModel);
- recommender = new GenericItemBasedRecommender(dataModel, similarity);
+ ItemSimilarity similarity;
+ try {
+ similarity = new TrackItemSimilarity(dataFileDirectory);
+ } catch (IOException ioe) {
+ throw new TasteException(ioe);
+ }
+ recommender = new GenericBooleanPrefItemBasedRecommender(dataModel,
similarity);
}
@Override
Copied:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2RecommenderBuilder.java
(from r1082143,
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommenderBuilder.java)
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2RecommenderBuilder.java?p2=mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2RecommenderBuilder.java&p1=mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommenderBuilder.java&r1=1082143&r2=1082997&rev=1082997&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/KDDCupRecommenderBuilder.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2RecommenderBuilder.java
Fri Mar 18 17:54:11 2011
@@ -15,18 +15,19 @@
* limitations under the License.
*/
-package org.apache.mahout.cf.taste.example.kddcup;
+package org.apache.mahout.cf.taste.example.kddcup.track2;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
+import org.apache.mahout.cf.taste.example.kddcup.KDDCupDataModel;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.Recommender;
-public final class KDDCupRecommenderBuilder implements RecommenderBuilder {
+final class Track2RecommenderBuilder implements RecommenderBuilder {
@Override
public Recommender buildRecommender(DataModel dataModel) throws
TasteException {
- return new KDDCupRecommender(dataModel);
+ return new Track2Recommender(dataModel, ((KDDCupDataModel)
dataModel).getDataFileDirectory());
}
}
\ No newline at end of file
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Runner.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Runner.java?rev=1082997&r1=1082996&r2=1082997&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Runner.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Runner.java
Fri Mar 18 17:54:11 2011
@@ -17,26 +17,26 @@
package org.apache.mahout.cf.taste.example.kddcup.track2;
+import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Collections;
import java.util.List;
-import java.util.TreeMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
-import org.apache.mahout.cf.taste.common.NoSuchItemException;
import org.apache.mahout.cf.taste.example.kddcup.DataFileIterable;
import org.apache.mahout.cf.taste.example.kddcup.KDDCupDataModel;
-import org.apache.mahout.cf.taste.example.kddcup.KDDCupRecommender;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.common.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * <p>Runs "track 2" of the KDD Cup competition using whatever recommender is
inside {@link KDDCupRecommender}
+ * <p>Runs "track 2" of the KDD Cup competition using whatever recommender is
inside {@link Track2Recommender}
* and attempts to output the result in the correct contest format.</p>
*
* <p>Run as: <code>Track2Runner [track 2 data file directory] [output
file]</code></p>
@@ -55,60 +55,47 @@ public final class Track2Runner {
throw new IllegalArgumentException("Bad data file directory: " +
dataFileDirectory);
}
+ long start = System.currentTimeMillis();
+
KDDCupDataModel model = new
KDDCupDataModel(KDDCupDataModel.getTrainingFile(dataFileDirectory));
- KDDCupRecommender recommender = new KDDCupRecommender(model);
+ Track2Recommender recommender = new Track2Recommender(model,
dataFileDirectory);
- File outFile = new File(args[1]);
- OutputStream out = new FileOutputStream(outFile);
+ long end = System.currentTimeMillis();
+ log.info("Loaded model in {}s", (end - start) / 1000);
+ start = end;
+ Collection<Track2Callable> callables = new ArrayList<Track2Callable>();
for (Pair<PreferenceArray,long[]> tests : new
DataFileIterable(KDDCupDataModel.getTestFile(dataFileDirectory))) {
-
PreferenceArray userTest = tests.getFirst();
- if (userTest.length() != 6) {
- throw new IllegalArgumentException("Expecting 6 items for user but got
" + userTest);
- }
- long userID = userTest.get(0).getUserID();
-
- TreeMap<Double,Long> estimateToItemID = new
TreeMap<Double,Long>(Collections.reverseOrder());
-
- for (int i = 0; i < userTest.length(); i++) {
- long itemID = userTest.getItemID(i);
- double estimate;
- try {
- estimate = recommender.estimatePreference(userID, itemID);
- } catch (NoSuchItemException nsie) {
- // OK in the sample data provided before the contest, should never
happen otherwise
- log.warn("Unknown item {}; OK unless this is the real contest data",
itemID);
- continue;
- }
-
- log.debug("Estimate for user {}, item {}: {}", new Object[] {userID,
itemID, estimate});
- if (!Double.isNaN(estimate)) {
- estimateToItemID.put(estimate, itemID);
- }
- }
-
- Collection<Long> itemIDs = estimateToItemID.values();
- log.debug("Scores are {}", itemIDs);
- List<Long> topThree = new ArrayList<Long>(itemIDs);
- if (topThree.size() > 3) {
- topThree = topThree.subList(0, 3);
- }
- log.debug("Top three are {}", topThree);
+ callables.add(new Track2Callable(recommender, userTest));
+ }
- for (int i = 0; i < userTest.length(); i++) {
- long itemID = userTest.getItemID(i);
- if (topThree.contains(itemID)) {
- out.write('1');
- } else {
- out.write('0');
- }
+ int cores = Runtime.getRuntime().availableProcessors();
+ log.info("Running on {} cores", cores);
+ ExecutorService executor = Executors.newFixedThreadPool(cores);
+ List<Future<UserResult>> futures = executor.invokeAll(callables);
+ executor.shutdown();
+
+ end = System.currentTimeMillis();
+ log.info("Ran recommendations in {}s", (end - start) / 1000);
+ start = end;
+
+ OutputStream out = new BufferedOutputStream(new FileOutputStream(new
File(args[1])));
+ long lastUserID = Long.MIN_VALUE;
+ for (Future<UserResult> future : futures) {
+ UserResult result = future.get();
+ long userID = result.getUserID();
+ if (userID <= lastUserID) {
+ throw new IllegalStateException();
}
-
+ lastUserID = userID;
+ out.write(result.getResultBytes());
}
-
+ out.flush();
out.close();
+ end = System.currentTimeMillis();
+ log.info("Wrote output in {}s", (end - start) / 1000);
}
}
Added:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackData.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackData.java?rev=1082997&view=auto
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackData.java
(added)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackData.java
Fri Mar 18 17:54:11 2011
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.example.kddcup.track2;
+
+import java.util.regex.Pattern;
+
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+
+final class TrackData {
+
+ private static final Pattern PIPE = Pattern.compile("\\|");
+ private static final String NO_VALUE = "None";
+ static final long NO_VALUE_ID = Long.MIN_VALUE;
+ private static final FastIDSet NO_GENRES = new FastIDSet();
+
+ private final long trackID;
+ private final long albumID;
+ private final long artistID;
+ private final FastIDSet genreIDs;
+
+ TrackData(CharSequence line) {
+ String[] tokens = PIPE.split(line);
+ trackID = Long.parseLong(tokens[0]);
+ albumID = parse(tokens[1]);
+ artistID = parse(tokens[2]);
+ if (tokens.length > 3) {
+ genreIDs = new FastIDSet(tokens.length - 3);
+ for (int i = 3; i < tokens.length; i++) {
+ genreIDs.add(Long.parseLong(tokens[i]));
+ }
+ } else {
+ genreIDs = NO_GENRES;
+ }
+ }
+
+ private static long parse(String value) {
+ return NO_VALUE.equals(value) ? NO_VALUE_ID : Long.parseLong(value);
+ }
+
+ public long getTrackID() {
+ return trackID;
+ }
+
+ public long getAlbumID() {
+ return albumID;
+ }
+
+ public long getArtistID() {
+ return artistID;
+ }
+
+ public FastIDSet getGenreIDs() {
+ return genreIDs;
+ }
+
+}
Added:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackItemSimilarity.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackItemSimilarity.java?rev=1082997&view=auto
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackItemSimilarity.java
(added)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackItemSimilarity.java
Fri Mar 18 17:54:11 2011
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.example.kddcup.track2;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.example.kddcup.KDDCupDataModel;
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
+import org.apache.mahout.common.FileLineIterable;
+
+final class TrackItemSimilarity implements ItemSimilarity {
+
+ private final FastByIDMap<TrackData> trackData;
+
+ TrackItemSimilarity(File dataFileDirectory) throws IOException {
+ trackData = new FastByIDMap<TrackData>();
+ for (String line : new
FileLineIterable(KDDCupDataModel.getTrackFile(dataFileDirectory))) {
+ TrackData trackDatum = new TrackData(line);
+ trackData.put(trackDatum.getTrackID(), trackDatum);
+ }
+ }
+
+ @Override
+ public double itemSimilarity(long itemID1, long itemID2) {
+ if (itemID1 == itemID2) {
+ return 1.0;
+ }
+ TrackData data1 = trackData.get(itemID1);
+ TrackData data2 = trackData.get(itemID2);
+ if (data1 == null || data2 == null) {
+ return 0.0;
+ }
+
+ // Arbitrarily decide that same album means "very similar"
+ if (data1.getAlbumID() != TrackData.NO_VALUE_ID && data1.getAlbumID() ==
data2.getAlbumID()) {
+ return 0.9;
+ }
+ // ... and same artist means "fairly similar"
+ if (data1.getArtistID() != TrackData.NO_VALUE_ID && data1.getArtistID() ==
data2.getArtistID()) {
+ return 0.7;
+ }
+
+ // Tanimoto coefficient similarity based on genre, but maximum value of
0.25
+ FastIDSet genres1 = data1.getGenreIDs();
+ FastIDSet genres2 = data2.getGenreIDs();
+ if (genres1 == null || genres2 == null) {
+ return 0.0;
+ }
+ int intersectionSize = genres1.intersectionSize(genres2);
+ if (intersectionSize == 0) {
+ return 0.0;
+ }
+ int unionSize = genres1.size() + genres2.size() - intersectionSize;
+ return (double) intersectionSize / (4.0 * unionSize);
+ }
+
+ @Override
+ public double[] itemSimilarities(long itemID1, long[] itemID2s) {
+ int length = itemID2s.length;
+ double[] result = new double[length];
+ for (int i = 0; i < length; i++) {
+ result[i] = itemSimilarity(itemID1, itemID2s[i]);
+ }
+ return result;
+ }
+
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {
+ // do nothing
+ }
+
+}
Added:
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/UserResult.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/UserResult.java?rev=1082997&view=auto
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/UserResult.java
(added)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/UserResult.java
Fri Mar 18 17:54:11 2011
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.example.kddcup.track2;
+
+final class UserResult {
+
+ private final long userID;
+ private final byte[] resultBytes;
+
+ UserResult(long userID, boolean[] result) {
+
+ this.userID = userID;
+
+ int trueCount = 0;
+ for (boolean b : result) {
+ if (b) {
+ trueCount++;
+ }
+ }
+ if (trueCount != 3) {
+ throw new IllegalStateException();
+ }
+
+ resultBytes = new byte[result.length];
+ for (int i = 0; i < result.length; i++) {
+ resultBytes[i] = (byte) (result[i] ? '1' : '0');
+ }
+ }
+
+ public long getUserID() {
+ return userID;
+ }
+
+ public byte[] getResultBytes() {
+ return resultBytes;
+ }
+
+
+}