Author: ssc
Date: Sun Jan 2 10:34:27 2011
New Revision: 1054382
URL: http://svn.apache.org/viewvc?rev=1054382&view=rev
Log:
MAHOUT-570 Make the retrieval of candidate items for the most-similar-items
computation customizable
Added:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractCandidateItemsStrategy.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/MostSimilarItemsCandidateItemsStrategy.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllUnknownItemsCandidateItemsStrategy.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericBooleanPrefItemBasedRecommender.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/PreferredItemsNeighborhoodCandidateItemsStrategy.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java
Added:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractCandidateItemsStrategy.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractCandidateItemsStrategy.java?rev=1054382&view=auto
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractCandidateItemsStrategy.java
(added)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractCandidateItemsStrategy.java
Sun Jan 2 10:34:27 2011
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.recommender.CandidateItemsStrategy;
+import
org.apache.mahout.cf.taste.recommender.MostSimilarItemsCandidateItemsStrategy;
+
+/**
+ * Abstract base implementation for retrieving candidate items to recommend
+ */
+public abstract class AbstractCandidateItemsStrategy implements
CandidateItemsStrategy,
+ MostSimilarItemsCandidateItemsStrategy {
+
+ @Override
+ public FastIDSet getCandidateItems(long userID, DataModel dataModel) throws
TasteException {
+ return doGetCandidateItems(dataModel.getItemIDsFromUser(userID),
dataModel);
+ }
+
+ @Override
+ public FastIDSet getCandidateItems(long[] itemIDs, DataModel dataModel)
throws TasteException {
+ FastIDSet preferredItemIDs = new FastIDSet(itemIDs.length);
+ preferredItemIDs.addAll(itemIDs);
+ return doGetCandidateItems(preferredItemIDs, dataModel);
+ }
+
+ abstract FastIDSet doGetCandidateItems(FastIDSet preferredItemIDs, DataModel
dataModel) throws TasteException;
+}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllUnknownItemsCandidateItemsStrategy.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllUnknownItemsCandidateItemsStrategy.java?rev=1054382&r1=1054381&r2=1054382&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllUnknownItemsCandidateItemsStrategy.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllUnknownItemsCandidateItemsStrategy.java
Sun Jan 2 10:34:27 2011
@@ -17,26 +17,24 @@
package org.apache.mahout.cf.taste.impl.recommender;
-import org.apache.mahout.cf.taste.recommender.CandidateItemsStrategy;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
import org.apache.mahout.cf.taste.model.DataModel;
-/**
- * returns all items the user has not rated yet
- */
-public final class AllUnknownItemsCandidateItemsStrategy implements
CandidateItemsStrategy {
+public final class AllUnknownItemsCandidateItemsStrategy extends
AbstractCandidateItemsStrategy {
+ /**
+ * return all items the user has not yet seen
+ */
@Override
- public FastIDSet getCandidateItems(long userID, DataModel dataModel) throws
TasteException {
+ protected FastIDSet doGetCandidateItems(FastIDSet preferredItemIDs,
DataModel dataModel) throws TasteException {
FastIDSet possibleItemIDs = new FastIDSet(dataModel.getNumItems());
LongPrimitiveIterator allItemIDs = dataModel.getItemIDs();
while (allItemIDs.hasNext()) {
possibleItemIDs.add(allItemIDs.nextLong());
}
- possibleItemIDs.removeAll(dataModel.getItemIDsFromUser(userID));
+ possibleItemIDs.removeAll(preferredItemIDs);
return possibleItemIDs;
}
-
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericBooleanPrefItemBasedRecommender.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericBooleanPrefItemBasedRecommender.java?rev=1054382&r1=1054381&r2=1054382&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericBooleanPrefItemBasedRecommender.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericBooleanPrefItemBasedRecommender.java
Sun Jan 2 10:34:27 2011
@@ -21,6 +21,7 @@ import org.apache.mahout.cf.taste.common
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.recommender.CandidateItemsStrategy;
+import
org.apache.mahout.cf.taste.recommender.MostSimilarItemsCandidateItemsStrategy;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
/**
@@ -35,10 +36,10 @@ public final class GenericBooleanPrefIte
super(dataModel, similarity);
}
- public GenericBooleanPrefItemBasedRecommender(DataModel dataModel,
- ItemSimilarity similarity,
- CandidateItemsStrategy
candidateItemsStrategy) {
- super(dataModel, similarity, candidateItemsStrategy);
+ public GenericBooleanPrefItemBasedRecommender(DataModel dataModel,
ItemSimilarity similarity,
+ CandidateItemsStrategy candidateItemsStrategy,
MostSimilarItemsCandidateItemsStrategy
+ mostSimilarItemsCandidateItemsStrategy) {
+ super(dataModel, similarity, candidateItemsStrategy,
mostSimilarItemsCandidateItemsStrategy);
}
/**
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java?rev=1054382&r1=1054381&r2=1054382&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java
Sun Jan 2 10:34:27 2011
@@ -33,6 +33,7 @@ import org.apache.mahout.cf.taste.model.
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.recommender.IDRescorer;
import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender;
+import
org.apache.mahout.cf.taste.recommender.MostSimilarItemsCandidateItemsStrategy;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Rescorer;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
@@ -70,6 +71,7 @@ public class GenericItemBasedRecommender
private static final Logger log =
LoggerFactory.getLogger(GenericItemBasedRecommender.class);
private final ItemSimilarity similarity;
+ private final MostSimilarItemsCandidateItemsStrategy
mostSimilarItemsCandidateItemsStrategy;
private final RefreshHelper refreshHelper;
private EstimatedPreferenceCapper capper;
@@ -77,10 +79,14 @@ public class GenericItemBasedRecommender
public GenericItemBasedRecommender(DataModel dataModel,
ItemSimilarity similarity,
- CandidateItemsStrategy
candidateItemsStrategy) {
+ CandidateItemsStrategy
candidateItemsStrategy,
+ MostSimilarItemsCandidateItemsStrategy
mostSimilarItemsCandidateItemsStrategy) {
super(dataModel, candidateItemsStrategy);
Preconditions.checkArgument(similarity != null, "similarity is null");
this.similarity = similarity;
+ Preconditions.checkArgument(mostSimilarItemsCandidateItemsStrategy != null,
+ "mostSimilarItemsCandidateItemsStrategy is null");
+ this.mostSimilarItemsCandidateItemsStrategy =
mostSimilarItemsCandidateItemsStrategy;
this.refreshHelper = new RefreshHelper(new Callable<Void>() {
@Override
public Void call() {
@@ -93,9 +99,13 @@ public class GenericItemBasedRecommender
capper = buildCapper();
}
- public GenericItemBasedRecommender(DataModel dataModel,
- ItemSimilarity similarity) {
- this(dataModel, similarity,
AbstractRecommender.getDefaultCandidateItemsStrategy());
+ public GenericItemBasedRecommender(DataModel dataModel, ItemSimilarity
similarity) {
+ this(dataModel, similarity,
AbstractRecommender.getDefaultCandidateItemsStrategy(),
+ getDefaultMostSimilarItemsCandidateItemsStrategy());
+ }
+
+ protected static MostSimilarItemsCandidateItemsStrategy
getDefaultMostSimilarItemsCandidateItemsStrategy() {
+ return new PreferredItemsNeighborhoodCandidateItemsStrategy();
}
public ItemSimilarity getSimilarity() {
@@ -198,18 +208,8 @@ public class GenericItemBasedRecommender
private List<RecommendedItem> doMostSimilarItems(long[] itemIDs,
int howMany,
TopItems.Estimator<Long>
estimator) throws TasteException {
- DataModel model = getDataModel();
- FastIDSet possibleItemsIDs = new FastIDSet();
- for (long itemID : itemIDs) {
- PreferenceArray prefs = model.getPreferencesForItem(itemID);
- int size = prefs.length();
- for (int i = 0; i < size; i++) {
- long userID = prefs.get(i).getUserID();
- possibleItemsIDs.addAll(model.getItemIDsFromUser(userID));
- }
- }
- possibleItemsIDs.removeAll(itemIDs);
- return TopItems.getTopItems(howMany, possibleItemsIDs.iterator(), null,
estimator);
+ FastIDSet possibleItemIDs =
mostSimilarItemsCandidateItemsStrategy.getCandidateItems(itemIDs,
getDataModel());
+ return TopItems.getTopItems(howMany, possibleItemIDs.iterator(), null,
estimator);
}
protected float doEstimatePreference(long userID, long itemID) throws
TasteException {
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/PreferredItemsNeighborhoodCandidateItemsStrategy.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/PreferredItemsNeighborhoodCandidateItemsStrategy.java?rev=1054382&r1=1054381&r2=1054382&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/PreferredItemsNeighborhoodCandidateItemsStrategy.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/PreferredItemsNeighborhoodCandidateItemsStrategy.java
Sun Jan 2 10:34:27 2011
@@ -17,24 +17,22 @@
package org.apache.mahout.cf.taste.impl.recommender;
-import org.apache.mahout.cf.taste.recommender.CandidateItemsStrategy;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.PreferenceArray;
-/**
- * returns all items that have not been rated by the user and that were
preferred by another user
- * that has preferred at least one item that the current user has preferred too
- */
-public final class PreferredItemsNeighborhoodCandidateItemsStrategy implements
CandidateItemsStrategy {
+public final class PreferredItemsNeighborhoodCandidateItemsStrategy extends
AbstractCandidateItemsStrategy {
+ /**
+ * returns all items that have not been rated by the user and that were
preferred by another user
+ * that has preferred at least one item that the current user has preferred
too
+ */
@Override
- public FastIDSet getCandidateItems(long userID, DataModel dataModel) throws
TasteException {
+ protected FastIDSet doGetCandidateItems(FastIDSet preferredItemIDs,
DataModel dataModel) throws TasteException {
FastIDSet possibleItemsIDs = new FastIDSet();
- FastIDSet itemIDs = dataModel.getItemIDsFromUser(userID);
- LongPrimitiveIterator itemIDIterator = itemIDs.iterator();
+ LongPrimitiveIterator itemIDIterator = preferredItemIDs.iterator();
while (itemIDIterator.hasNext()) {
long itemID = itemIDIterator.nextLong();
PreferenceArray prefs2 = dataModel.getPreferencesForItem(itemID);
@@ -43,8 +41,8 @@ public final class PreferredItemsNeighbo
possibleItemsIDs.addAll(dataModel.getItemIDsFromUser(prefs2.getUserID(j)));
}
}
- possibleItemsIDs.removeAll(itemIDs);
+ possibleItemsIDs.removeAll(preferredItemIDs);
return possibleItemsIDs;
}
-
+
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java?rev=1054382&r1=1054381&r2=1054382&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java
Sun Jan 2 10:34:27 2011
@@ -23,7 +23,6 @@ import org.apache.mahout.cf.taste.impl.c
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.model.PreferenceArray;
-import org.apache.mahout.cf.taste.recommender.CandidateItemsStrategy;
import org.apache.mahout.common.iterator.FixedSizeSamplingIterator;
import java.util.Iterator;
@@ -37,7 +36,7 @@ import java.util.Iterator;
* max(defaultMaxPrefsPerItemConsidered, userItemCountFactor *
log(max(N_users, N_items)))
* </pre></p>
*/
-public class SamplingCandidateItemsStrategy implements CandidateItemsStrategy {
+public class SamplingCandidateItemsStrategy extends
AbstractCandidateItemsStrategy {
private final int defaultMaxPrefsPerItemConsidered;
private final int userItemCountMultiplier;
@@ -67,12 +66,11 @@ public class SamplingCandidateItemsStrat
}
@Override
- public FastIDSet getCandidateItems(long userID, DataModel dataModel) throws
TasteException {
+ protected FastIDSet doGetCandidateItems(FastIDSet preferredItemIDs,
DataModel dataModel) throws TasteException {
int maxPrefsPerItemConsidered = (int)
Math.max(defaultMaxPrefsPerItemConsidered,
userItemCountMultiplier * Math.log(Math.max(dataModel.getNumUsers(),
dataModel.getNumItems())));
FastIDSet possibleItemsIDs = new FastIDSet();
- FastIDSet itemIDs = dataModel.getItemIDsFromUser(userID);
- LongPrimitiveIterator itemIDIterator = itemIDs.iterator();
+ LongPrimitiveIterator itemIDIterator = preferredItemIDs.iterator();
while (itemIDIterator.hasNext()) {
long itemID = itemIDIterator.nextLong();
PreferenceArray prefs = dataModel.getPreferencesForItem(itemID);
@@ -82,8 +80,7 @@ public class SamplingCandidateItemsStrat
possibleItemsIDs.addAll(dataModel.getItemIDsFromUser(sampledPrefs.next().getUserID()));
}
}
- possibleItemsIDs.removeAll(itemIDs);
+ possibleItemsIDs.removeAll(preferredItemIDs);
return possibleItemsIDs;
}
-
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java?rev=1054382&r1=1054381&r2=1054382&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java
Sun Jan 2 10:34:27 2011
@@ -27,6 +27,7 @@ import org.apache.mahout.cf.taste.impl.r
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.recommender.CandidateItemsStrategy;
+import
org.apache.mahout.cf.taste.recommender.MostSimilarItemsCandidateItemsStrategy;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Rescorer;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
@@ -47,8 +48,9 @@ public final class KnnItemBasedRecommend
ItemSimilarity similarity,
Optimizer optimizer,
CandidateItemsStrategy candidateItemsStrategy,
+ MostSimilarItemsCandidateItemsStrategy
mostSimilarItemsCandidateItemsStrategy,
int neighborhoodSize) {
- super(dataModel, similarity, candidateItemsStrategy);
+ super(dataModel, similarity, candidateItemsStrategy,
mostSimilarItemsCandidateItemsStrategy);
this.optimizer = optimizer;
this.neighborhoodSize = neighborhoodSize;
}
@@ -57,7 +59,8 @@ public final class KnnItemBasedRecommend
ItemSimilarity similarity,
Optimizer optimizer,
int neighborhoodSize) {
- this(dataModel, similarity, optimizer, getDefaultCandidateItemsStrategy(),
neighborhoodSize);
+ this(dataModel, similarity, optimizer, getDefaultCandidateItemsStrategy(),
+ getDefaultMostSimilarItemsCandidateItemsStrategy(), neighborhoodSize);
}
private List<RecommendedItem> mostSimilarItems(long itemID,
Added:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/MostSimilarItemsCandidateItemsStrategy.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/MostSimilarItemsCandidateItemsStrategy.java?rev=1054382&view=auto
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/MostSimilarItemsCandidateItemsStrategy.java
(added)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/MostSimilarItemsCandidateItemsStrategy.java
Sun Jan 2 10:34:27 2011
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.model.DataModel;
+
+/**
+ * Used to retrieve all items that could possibly be similar
+ */
+public interface MostSimilarItemsCandidateItemsStrategy {
+
+ FastIDSet getCandidateItems(long[] itemIDs, DataModel dataModel) throws
TasteException;
+}