Dear,

Here is a patch of a new distance metric for the collaborative
filtering modules - CityBlockDistance.  With the 0 - 1 binary split on
preference. KLDistance, AHDistance, and Symmetric KLDistance don't
make sense.

Daniel McEnnis.
Index: 
core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CityBlockSimilarity.java
===================================================================
--- 
core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CityBlockSimilarity.java
      (revision 0)
+++ 
core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CityBlockSimilarity.java
      (revision 0)
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mahout.cf.taste.impl.similarity;
+
+import java.util.Collection;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
+import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+
+/**
+ * Implementation of City Block distance - the absolute value of the 
difference 
+ * of each direction is summed.  The resulting unbounded distance is then 
mapped
+ * between 1 and -1.  
+ * 
+ * @author Daniel McEnnis
+ *
+ */
+public class CityBlockSimilarity implements UserSimilarity, ItemSimilarity {
+
+         private final DataModel dataModel;
+         
+         /**
+          * Creates a CityBlockSimilarity class
+          * @param dataModel
+          */
+         public CityBlockSimilarity(DataModel dataModel) {
+           this.dataModel = dataModel;
+         }
+         
+         /**
+          * @throws UnsupportedOperationException
+          */
+         @Override
+         public void setPreferenceInferrer(PreferenceInferrer inferrer) {
+           throw new UnsupportedOperationException();
+         }
+
+       @Override
+       public void refresh(Collection<Refreshable> alreadyRefreshed) {
+           alreadyRefreshed = RefreshHelper.buildRefreshed(alreadyRefreshed);
+           RefreshHelper.maybeRefresh(alreadyRefreshed, dataModel);
+       }
+
+       /* (non-Javadoc)
+        * @see 
org.apache.mahout.cf.taste.similarity.ItemSimilarity#itemSimilarity(long, long)
+        */
+       @Override
+       public double itemSimilarity(long itemID1, long itemID2)
+                       throws TasteException {
+           int preferring1 = dataModel.getNumUsersWithPreferenceFor(itemID1);
+           int preferring2 = dataModel.getNumUsersWithPreferenceFor(itemID2);
+           int intersection = 
dataModel.getNumUsersWithPreferenceFor(itemID1,itemID2);
+           double distance = preferring1+preferring2 - 2*intersection;
+           if(distance < 1.0){
+               distance=1.0-distance;
+           }else{
+               distance = -1.0 + 1.0 / distance;
+           }
+           return distance;
+       }
+
+       /* (non-Javadoc)
+        * @see 
org.apache.mahout.cf.taste.similarity.ItemSimilarity#itemSimilarities(long, 
long[])
+        */
+       @Override
+       public double[] itemSimilarities(long itemID1, long[] itemID2s)
+                       throws TasteException {
+           int preferring1 = dataModel.getNumUsersWithPreferenceFor(itemID1);
+           double[] distance = new double[itemID2s.length];
+           for(int i=0;i<itemID2s.length;++i){
+                   int preferring2 = 
dataModel.getNumUsersWithPreferenceFor(itemID2s[i]);
+                   int intersection = 
dataModel.getNumUsersWithPreferenceFor(itemID1,itemID2s[i]);
+                   distance[i] = preferring1+preferring2 - 2*intersection;
+                   if(distance[i] < 1.0){
+                       distance[i]=1.0-distance[i];
+                   }else{
+                       distance[i] = -1.0 + 1.0 / distance[i];
+                   }
+           }
+               return distance;
+       }
+
+       /* (non-Javadoc)
+        * @see 
org.apache.mahout.cf.taste.similarity.UserSimilarity#userSimilarity(long, long)
+        */
+       @Override
+       public double userSimilarity(long userID1, long userID2)
+                       throws TasteException {
+                  FastIDSet prefs1 = dataModel.getItemIDsFromUser(userID1);
+                   FastIDSet prefs2 = dataModel.getItemIDsFromUser(userID2);
+                   
+                   int prefs1Size = prefs1.size();
+                   int prefs2Size = prefs2.size();
+                   int intersectionSize = prefs1Size < prefs2Size ? 
prefs2.intersectionSize(prefs1) : prefs1.intersectionSize(prefs2);
+                   double distance = prefs1Size + prefs2Size - 
2*intersectionSize;
+                   if(distance < 1.0){
+                       distance=1.0-distance;
+                   }else{
+                       distance = -1.0 + 1.0 / distance;
+                   }
+                   return distance;
+       }
+
+
+}

Reply via email to