Author: robinanil Date: Fri Jan 8 08:23:22 2010 New Revision: 897134 URL: http://svn.apache.org/viewvc?rev=897134&view=rev Log: MAHOUT-221 Missed out two files while checking in FP-Bonsai
Added: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeapTest.java lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/example/dataset/KeyBasedStringTupleReducer.java Added: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeapTest.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeapTest.java?rev=897134&view=auto ============================================================================== --- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeapTest.java (added) +++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeapTest.java Fri Jan 8 08:23:22 2010 @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.fpm.pfpgrowth.fpgrowth; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Random; +import java.util.Set; + +import junit.framework.TestCase; + +public class FrequentPatternMaxHeapTest extends TestCase { + + public void testMapHeap() { + Random gen = new Random(123L); + + FrequentPatternMaxHeap pq = new FrequentPatternMaxHeap(50, true); + for (int i = 0; i < 20; i++) { + FrequentPatternMaxHeap rs = new FrequentPatternMaxHeap(50, false); + for (int j = 0; j < 1000; j++) { + Pattern p = generateRandomPattern(gen); + rs.insert(p); + } + for (Pattern p : rs.getHeap()) { + pq.insert(p); + } + } + } + + public Pattern generateRandomPattern(Random gen) { + int length = 1 + Math.abs(gen.nextInt() % 6); + Pattern p = new Pattern(); + Set<Integer> set = new HashSet<Integer>(); + for (int i = 0; i < length; i++) { + int id = Math.abs(gen.nextInt() % 20); + while (set.contains(id)) { + id = Math.abs(gen.nextInt() % 20); + } + set.add(id); + int s = 5 + gen.nextInt() % 4; + p.add(id, s); + } + Arrays.sort(p.getPattern()); + return p; + } +} Added: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/example/dataset/KeyBasedStringTupleReducer.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/example/dataset/KeyBasedStringTupleReducer.java?rev=897134&view=auto ============================================================================== --- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/example/dataset/KeyBasedStringTupleReducer.java (added) +++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/example/dataset/KeyBasedStringTupleReducer.java Fri Jan 8 08:23:22 2010 @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.fpm.pfpgrowth.example.dataset; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.mahout.common.Parameters; +import org.apache.mahout.common.StringTuple; + +public class KeyBasedStringTupleReducer extends Reducer<Text, StringTuple, Text, Text> { + + private int maxTransactionLength = 100; + + protected void reduce(Text key, Iterable<StringTuple> values, Context context) throws IOException, + InterruptedException { + Set<String> items = new HashSet<String>(); + + for (StringTuple value : values) { + for (String field : value.getEntries()) { + items.add(field); + } + } + if (items.size() > 1) { + int i = 0; + StringBuilder sb = new StringBuilder(); + String sep = ""; + for (String field : items) { + if (i % maxTransactionLength == 0) { + if (i != 0) { + context.write(null, new Text(sb.toString())); + } + sb.replace(0, sb.length(), ""); + sep = ""; + } + + sb.append(sep).append(field); + sep = "\t"; + + i++; + + } + if (sb.length() > 0) { + context.write(null, new Text(sb.toString())); + } + } + }; + + protected void setup(Context context) throws IOException, InterruptedException { + super.setup(context); + Parameters params = Parameters.fromString(context.getConfiguration().get("job.parameters", "")); + maxTransactionLength = Integer.valueOf(params.get("maxTransactionLength", "100")); + } +}