Dear,
Here is a patch of a new distance metric for the collaborative
filtering modules - CityBlockDistance. With the 0 - 1 binary split on
preference. KLDistance, AHDistance, and Symmetric KLDistance don't
make sense.
Daniel McEnnis.
Index:
core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CityBlockSimilarity.java
===================================================================
---
core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CityBlockSimilarity.java
(revision 0)
+++
core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CityBlockSimilarity.java
(revision 0)
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mahout.cf.taste.impl.similarity;
+
+import java.util.Collection;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
+import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+
+/**
+ * Implementation of City Block distance - the absolute value of the
difference
+ * of each direction is summed. The resulting unbounded distance is then
mapped
+ * between 1 and -1.
+ *
+ * @author Daniel McEnnis
+ *
+ */
+public class CityBlockSimilarity implements UserSimilarity, ItemSimilarity {
+
+ private final DataModel dataModel;
+
+ /**
+ * Creates a CityBlockSimilarity class
+ * @param dataModel
+ */
+ public CityBlockSimilarity(DataModel dataModel) {
+ this.dataModel = dataModel;
+ }
+
+ /**
+ * @throws UnsupportedOperationException
+ */
+ @Override
+ public void setPreferenceInferrer(PreferenceInferrer inferrer) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {
+ alreadyRefreshed = RefreshHelper.buildRefreshed(alreadyRefreshed);
+ RefreshHelper.maybeRefresh(alreadyRefreshed, dataModel);
+ }
+
+ /* (non-Javadoc)
+ * @see
org.apache.mahout.cf.taste.similarity.ItemSimilarity#itemSimilarity(long, long)
+ */
+ @Override
+ public double itemSimilarity(long itemID1, long itemID2)
+ throws TasteException {
+ int preferring1 = dataModel.getNumUsersWithPreferenceFor(itemID1);
+ int preferring2 = dataModel.getNumUsersWithPreferenceFor(itemID2);
+ int intersection =
dataModel.getNumUsersWithPreferenceFor(itemID1,itemID2);
+ double distance = preferring1+preferring2 - 2*intersection;
+ if(distance < 1.0){
+ distance=1.0-distance;
+ }else{
+ distance = -1.0 + 1.0 / distance;
+ }
+ return distance;
+ }
+
+ /* (non-Javadoc)
+ * @see
org.apache.mahout.cf.taste.similarity.ItemSimilarity#itemSimilarities(long,
long[])
+ */
+ @Override
+ public double[] itemSimilarities(long itemID1, long[] itemID2s)
+ throws TasteException {
+ int preferring1 = dataModel.getNumUsersWithPreferenceFor(itemID1);
+ double[] distance = new double[itemID2s.length];
+ for(int i=0;i<itemID2s.length;++i){
+ int preferring2 =
dataModel.getNumUsersWithPreferenceFor(itemID2s[i]);
+ int intersection =
dataModel.getNumUsersWithPreferenceFor(itemID1,itemID2s[i]);
+ distance[i] = preferring1+preferring2 - 2*intersection;
+ if(distance[i] < 1.0){
+ distance[i]=1.0-distance[i];
+ }else{
+ distance[i] = -1.0 + 1.0 / distance[i];
+ }
+ }
+ return distance;
+ }
+
+ /* (non-Javadoc)
+ * @see
org.apache.mahout.cf.taste.similarity.UserSimilarity#userSimilarity(long, long)
+ */
+ @Override
+ public double userSimilarity(long userID1, long userID2)
+ throws TasteException {
+ FastIDSet prefs1 = dataModel.getItemIDsFromUser(userID1);
+ FastIDSet prefs2 = dataModel.getItemIDsFromUser(userID2);
+
+ int prefs1Size = prefs1.size();
+ int prefs2Size = prefs2.size();
+ int intersectionSize = prefs1Size < prefs2Size ?
prefs2.intersectionSize(prefs1) : prefs1.intersectionSize(prefs2);
+ double distance = prefs1Size + prefs2Size -
2*intersectionSize;
+ if(distance < 1.0){
+ distance=1.0-distance;
+ }else{
+ distance = -1.0 + 1.0 / distance;
+ }
+ return distance;
+ }
+
+
+}