Author: srowen
Date: Mon Nov 14 21:26:53 2011
New Revision: 1201911

URL: http://svn.apache.org/viewvc?rev=1201911&view=rev
Log:
MAHOUT-885 correct, set FPGrowth default number of reducers to 1000

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java?rev=1201911&r1=1201910&r2=1201911&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java 
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java 
Mon Nov 14 21:26:53 2011
@@ -57,7 +57,12 @@ public class Parameters {
   public void set(String key, String value) {
     params.put(key, value);
   }
-  
+
+  public int getInt(String key, int defaultValue) {
+    String ret = params.get(key);
+    return ret == null ? defaultValue : Integer.parseInt(ret);
+  }
+
   @Override
   public String toString() {
     Configuration conf = new Configuration();

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java?rev=1201911&r1=1201910&r2=1201911&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java
 Mon Nov 14 21:26:53 2011
@@ -66,7 +66,8 @@ public final class FPGrowthDriver extend
     addOption("minSupport", "s", "(Optional) The minimum number of times a 
co-occurrence must be present. Default Value: 3", "3");
     addOption("maxHeapSize", "k", "(Optional) Maximum Heap Size k, to denote 
the requirement to mine top K items. Default value: 50", "50");
     addOption("numGroups", "g", "(Optional) Number of groups the features 
should be divided in the map-reduce version."
-            + " Doesn't work in sequential version Default Value:1000", 
"1000");
+            + " Doesn't work in sequential version Default Value:" + 
PFPGrowth.NUM_GROUPS_DEFAULT,
+             Integer.toString(PFPGrowth.NUM_GROUPS_DEFAULT));
     addOption("splitterPattern", "regex", "Regular Expression pattern used to 
split given string transaction into itemsets."
             + " Default value splits comma separated itemsets.  Default Value:"
             + " \"[ ,\\t]*[,|\\t][ ,\\t]*\" ", "[ ,\t]*[,|\t][ ,\t]*");

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java?rev=1201911&r1=1201910&r2=1201911&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java 
(original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java 
Mon Nov 14 21:26:53 2011
@@ -67,6 +67,7 @@ public final class PFPGrowth {
   public static final String F_LIST = "fList";
   public static final String G_LIST = "gList";
   public static final String NUM_GROUPS = "numGroups";
+  public static final int NUM_GROUPS_DEFAULT = 1000;
   public static final String OUTPUT = "output";
   public static final String MIN_SUPPORT = "minSupport";
   public static final String MAX_HEAPSIZE = "maxHeapSize";
@@ -282,7 +283,7 @@ public final class PFPGrowth {
    */
   public static void startGroupingItems(Parameters params, Configuration conf) 
throws IOException {
     List<Pair<String,Long>> fList = readFList(params);
-    Integer numGroups = Integer.valueOf(params.get(NUM_GROUPS, "50"));
+    int numGroups = params.getInt(NUM_GROUPS, NUM_GROUPS_DEFAULT);
     
     Map<String,Long> gList = Maps.newHashMap();
     long maxPerGroup = fList.size() / numGroups;
@@ -353,7 +354,18 @@ public final class PFPGrowth {
     String input = params.get(INPUT);
     Job job = new Job(conf, "PFP Transaction Sorting running over input" + 
input);
     job.setJarByClass(PFPGrowth.class);
-    
+
+    Integer numGroups = Integer.valueOf(params.get(NUM_GROUPS, "-1"));
+    int numRed = job.getNumReduceTasks();
+    if (numGroups < 0) {
+      if (NUM_GROUPS_DEFAULT < numRed) {
+        params.set(NUM_GROUPS, Integer.toString(numRed));
+      }
+    }
+    if (numRed > numGroups) {
+      log.info("Warning: running with less groups of work than  reducers!");
+    }
+
     job.setMapOutputKeyClass(LongWritable.class);
     job.setMapOutputValueClass(TransactionTree.class);
     


Reply via email to