Author: gsingers
Date: Thu Nov  3 18:20:59 2011
New Revision: 1197253

URL: http://svn.apache.org/viewvc?rev=1197253&view=rev
Log:
added some comments

Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/PreparePreferenceMatrixJob.java

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/PreparePreferenceMatrixJob.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/PreparePreferenceMatrixJob.java?rev=1197253&r1=1197252&r2=1197253&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/PreparePreferenceMatrixJob.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/PreparePreferenceMatrixJob.java
 Thu Nov  3 18:20:59 2011
@@ -56,13 +56,13 @@ public class PreparePreferenceMatrixJob 
     addInputOption();
     addOutputOption();
     addOption("maxPrefsPerUser", "mppu", "max number of preferences to 
consider per user, " +
-        "users with more preferences will be sampled down");
+            "users with more preferences will be sampled down");
     addOption("minPrefsPerUser", "mp", "ignore users with less preferences 
than this "
-        + "(default: " + DEFAULT_MIN_PREFS_PER_USER + ')', 
String.valueOf(DEFAULT_MIN_PREFS_PER_USER));
+            + "(default: " + DEFAULT_MIN_PREFS_PER_USER + ')', 
String.valueOf(DEFAULT_MIN_PREFS_PER_USER));
     addOption("booleanData", "b", "Treat input as without pref values", 
Boolean.FALSE.toString());
     addOption("ratingShift", "rs", "shift ratings by this value", "0.0");
 
-    Map<String,String> parsedArgs = parseArguments(args);
+    Map<String, String> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
       return -1;
     }
@@ -70,27 +70,27 @@ public class PreparePreferenceMatrixJob 
     int minPrefsPerUser = 
Integer.parseInt(parsedArgs.get("--minPrefsPerUser"));
     boolean booleanData = Boolean.valueOf(parsedArgs.get("--booleanData"));
     float ratingShift = Float.parseFloat(parsedArgs.get("--ratingShift"));
-
+    //convert items to an internal index
     Job itemIDIndex = prepareJob(getInputPath(), getOutputPath(ITEMID_INDEX), 
TextInputFormat.class,
-        ItemIDIndexMapper.class, VarIntWritable.class, VarLongWritable.class, 
ItemIDIndexReducer.class,
-        VarIntWritable.class, VarLongWritable.class, 
SequenceFileOutputFormat.class);
+            ItemIDIndexMapper.class, VarIntWritable.class, 
VarLongWritable.class, ItemIDIndexReducer.class,
+            VarIntWritable.class, VarLongWritable.class, 
SequenceFileOutputFormat.class);
     itemIDIndex.setCombinerClass(ItemIDIndexReducer.class);
     itemIDIndex.waitForCompletion(true);
-
+    //convert user preferences into a vector per user
     Job toUserVectors = prepareJob(getInputPath(), 
getOutputPath(USER_VECTORS), TextInputFormat.class,
-        ToItemPrefsMapper.class, VarLongWritable.class, booleanData ? 
VarLongWritable.class : EntityPrefWritable.class,
-        ToUserVectorsReducer.class, VarLongWritable.class, 
VectorWritable.class, SequenceFileOutputFormat.class);
+            ToItemPrefsMapper.class, VarLongWritable.class, booleanData ? 
VarLongWritable.class : EntityPrefWritable.class,
+            ToUserVectorsReducer.class, VarLongWritable.class, 
VectorWritable.class, SequenceFileOutputFormat.class);
     toUserVectors.getConfiguration().setBoolean(RecommenderJob.BOOLEAN_DATA, 
booleanData);
     
toUserVectors.getConfiguration().setInt(ToUserVectorsReducer.MIN_PREFERENCES_PER_USER,
 minPrefsPerUser);
     toUserVectors.getConfiguration().set(ToEntityPrefsMapper.RATING_SHIFT, 
String.valueOf(ratingShift));
     toUserVectors.waitForCompletion(true);
-
+    //we need the number of users later
     int numberOfUsers = (int) 
toUserVectors.getCounters().findCounter(ToUserVectorsReducer.Counters.USERS).getValue();
     TasteHadoopUtils.writeInt(numberOfUsers, getOutputPath(NUM_USERS), 
getConf());
-
+    //build the rating matrix
     Job toItemVectors = prepareJob(getOutputPath(USER_VECTORS), 
getOutputPath(RATING_MATRIX),
-        ToItemVectorsMapper.class, IntWritable.class, VectorWritable.class, 
ToItemVectorsReducer.class,
-        IntWritable.class, VectorWritable.class);
+            ToItemVectorsMapper.class, IntWritable.class, 
VectorWritable.class, ToItemVectorsReducer.class,
+            IntWritable.class, VectorWritable.class);
     toItemVectors.setCombinerClass(ToItemVectorsReducer.class);
 
     /* configure sampling regarding the uservectors */


Reply via email to