Author: srowen
Date: Mon Nov 14 21:26:53 2011
New Revision: 1201911
URL: http://svn.apache.org/viewvc?rev=1201911&view=rev
Log:
MAHOUT-885 correct, set FPGrowth default number of reducers to 1000
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java?rev=1201911&r1=1201910&r2=1201911&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java
Mon Nov 14 21:26:53 2011
@@ -57,7 +57,12 @@ public class Parameters {
public void set(String key, String value) {
params.put(key, value);
}
-
+
+ public int getInt(String key, int defaultValue) {
+ String ret = params.get(key);
+ return ret == null ? defaultValue : Integer.parseInt(ret);
+ }
+
@Override
public String toString() {
Configuration conf = new Configuration();
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java?rev=1201911&r1=1201910&r2=1201911&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java
Mon Nov 14 21:26:53 2011
@@ -66,7 +66,8 @@ public final class FPGrowthDriver extend
addOption("minSupport", "s", "(Optional) The minimum number of times a
co-occurrence must be present. Default Value: 3", "3");
addOption("maxHeapSize", "k", "(Optional) Maximum Heap Size k, to denote
the requirement to mine top K items. Default value: 50", "50");
addOption("numGroups", "g", "(Optional) Number of groups the features
should be divided in the map-reduce version."
- + " Doesn't work in sequential version Default Value:1000",
"1000");
+ + " Doesn't work in sequential version Default Value:" +
PFPGrowth.NUM_GROUPS_DEFAULT,
+ Integer.toString(PFPGrowth.NUM_GROUPS_DEFAULT));
addOption("splitterPattern", "regex", "Regular Expression pattern used to
split given string transaction into itemsets."
+ " Default value splits comma separated itemsets. Default Value:"
+ " \"[ ,\\t]*[,|\\t][ ,\\t]*\" ", "[ ,\t]*[,|\t][ ,\t]*");
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java?rev=1201911&r1=1201910&r2=1201911&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java
Mon Nov 14 21:26:53 2011
@@ -67,6 +67,7 @@ public final class PFPGrowth {
public static final String F_LIST = "fList";
public static final String G_LIST = "gList";
public static final String NUM_GROUPS = "numGroups";
+ public static final int NUM_GROUPS_DEFAULT = 1000;
public static final String OUTPUT = "output";
public static final String MIN_SUPPORT = "minSupport";
public static final String MAX_HEAPSIZE = "maxHeapSize";
@@ -282,7 +283,7 @@ public final class PFPGrowth {
*/
public static void startGroupingItems(Parameters params, Configuration conf)
throws IOException {
List<Pair<String,Long>> fList = readFList(params);
- Integer numGroups = Integer.valueOf(params.get(NUM_GROUPS, "50"));
+ int numGroups = params.getInt(NUM_GROUPS, NUM_GROUPS_DEFAULT);
Map<String,Long> gList = Maps.newHashMap();
long maxPerGroup = fList.size() / numGroups;
@@ -353,7 +354,18 @@ public final class PFPGrowth {
String input = params.get(INPUT);
Job job = new Job(conf, "PFP Transaction Sorting running over input" +
input);
job.setJarByClass(PFPGrowth.class);
-
+
+ Integer numGroups = Integer.valueOf(params.get(NUM_GROUPS, "-1"));
+ int numRed = job.getNumReduceTasks();
+ if (numGroups < 0) {
+ if (NUM_GROUPS_DEFAULT < numRed) {
+ params.set(NUM_GROUPS, Integer.toString(numRed));
+ }
+ }
+ if (numRed > numGroups) {
+ log.info("Warning: running with less groups of work than reducers!");
+ }
+
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(TransactionTree.class);