Author: stack Date: Tue Apr 15 14:39:20 2008 New Revision: 648422 URL: http://svn.apache.org/viewvc?rev=648422&view=rev Log: HBASE-559 MR example job to count table rows
Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/Driver.java hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/RowCounter.java hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/RowCounter_Counters.properties Modified: hadoop/hbase/trunk/CHANGES.txt hadoop/hbase/trunk/build.xml hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/package-info.java Modified: hadoop/hbase/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=648422&r1=648421&r2=648422&view=diff ============================================================================== --- hadoop/hbase/trunk/CHANGES.txt (original) +++ hadoop/hbase/trunk/CHANGES.txt Tue Apr 15 14:39:20 2008 @@ -7,6 +7,10 @@ HBASE-12 When hbase regionserver restarts, it says "impossible state for createLease()" + IMPROVEMENTS + HBASE-559 MR example job to count table rows + + Release 0.1.1 - 04/11/2008 INCOMPATIBLE CHANGES Modified: hadoop/hbase/trunk/build.xml URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/build.xml?rev=648422&r1=648421&r2=648422&view=diff ============================================================================== --- hadoop/hbase/trunk/build.xml (original) +++ hadoop/hbase/trunk/build.xml Tue Apr 15 14:39:20 2008 @@ -148,12 +148,20 @@ </javac> </target> - <!-- Override jar target to specify main class --> <target name="jar" depends="compile"> + <!--Copy over any properties under src--> + <copy todir="${build.classes}"> + <fileset dir="${src.dir}"> + <include name="**/*.properties" /> + </fileset> + </copy> <jar jarfile="${build.dir}/${final.name}.jar" basedir="${build.classes}" > <fileset file="${basedir}/conf/hbase-default.xml"/> <zipfileset dir="${build.webapps}" prefix="webapps"/> + <manifest> + <attribute name="Main-Class" value="org/apache/hadoop/hbase/mapred/Driver" /> + </manifest> </jar> </target> Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/Driver.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/Driver.java?rev=648422&view=auto ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/Driver.java (added) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/Driver.java Tue Apr 15 14:39:20 2008 @@ -0,0 +1,39 @@ +/** + * Copyright 2008 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.mapred; + +import org.apache.hadoop.util.ProgramDriver; + +/** + * Driver for hbase mapreduce jobs. Select which to run by passing + * name of job to this main. + */ +public class Driver { + /** + * @param args + * @throws Throwable + */ + public static void main(String[] args) throws Throwable { + ProgramDriver pgd = new ProgramDriver(); + pgd.addClass(RowCounter.NAME, RowCounter.class, + "Count rows in HBase table"); + pgd.driver(args); + } +} Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/RowCounter.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/RowCounter.java?rev=648422&view=auto ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/RowCounter.java (added) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/RowCounter.java Tue Apr 15 14:39:20 2008 @@ -0,0 +1,126 @@ +/** + * Copyright 2008 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.mapred; + +import java.io.IOException; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.io.Cell; +import org.apache.hadoop.hbase.io.RowResult; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.JobClient; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.OutputCollector; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.lib.IdentityReducer; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +/** + * A job with a map to count rows. + * Map outputs table rows IF the input row has columns that have content. + * Uses an [EMAIL PROTECTED] IdentityReducer} + */ +public class RowCounter extends TableMap<Text, RowResult> implements Tool { + /* Name of this 'program' + */ + static final String NAME = "rowcounter"; + + private Configuration conf; + private final RowResult EMPTY_RESULT_VALUE = new RowResult(); + private static enum Counters {ROWS} + + @Override + public void map(Text row, RowResult value, + OutputCollector<Text, RowResult> output, + @SuppressWarnings("unused") Reporter reporter) + throws IOException { + boolean content = false; + for (Map.Entry<Text, Cell> e: value.entrySet()) { + Cell cell = e.getValue(); + if (cell != null && cell.getValue().length > 0) { + content = true; + break; + } + } + if (!content) { + return; + } + // Give out same value every time. We're only interested in the row/key + reporter.incrCounter(Counters.ROWS, 1); + output.collect(row, EMPTY_RESULT_VALUE); + } + + @SuppressWarnings({ "unused", "deprecation" }) + public JobConf createSubmittableJob(String[] args) throws IOException { + JobConf c = new JobConf(getConf(), RowCounter.class); + c.setJobName(NAME); + // Columns are space delimited + StringBuilder sb = new StringBuilder(); + final int columnoffset = 2; + for (int i = columnoffset; i < args.length; i++) { + if (i > columnoffset) { + sb.append(" "); + } + sb.append(args[i]); + } + // Second argument is the table name. + TableMap.initJob(args[1], sb.toString(), this.getClass(), Text.class, + RowResult.class, c); + c.setReducerClass(IdentityReducer.class); + // First arg is the output directory. + c.setOutputPath(new Path(args[0])); + return c; + } + + static int printUsage() { + System.out.println(NAME + + " <outputdir> <tablename> <column1> [<column2>...]"); + return -1; + } + + public int run(final String[] args) throws Exception { + // Make sure there are at least 3 parameters + if (args.length < 3) { + System.err.println("ERROR: Wrong number of parameters: " + args.length); + return printUsage(); + } + JobClient.runJob(createSubmittableJob(args)); + return 0; + } + + public Configuration getConf() { + return this.conf; + } + + public void setConf(final Configuration c) { + this.conf = c; + } + + public static void main(String[] args) throws Exception { + HBaseConfiguration c = new HBaseConfiguration(); + c.set("hbase.master", args[0]); + int errCode = ToolRunner.run(c, new RowCounter(), args); + System.exit(errCode); + } +} \ No newline at end of file Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/RowCounter_Counters.properties URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/RowCounter_Counters.properties?rev=648422&view=auto ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/RowCounter_Counters.properties (added) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/RowCounter_Counters.properties Tue Apr 15 14:39:20 2008 @@ -0,0 +1,6 @@ + +# ResourceBundle properties file for RowCounter MR job + +CounterGroupName= RowCounter + +ROWS.name= Rows Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/package-info.java URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/package-info.java?rev=648422&r1=648421&r2=648422&view=diff ============================================================================== --- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/package-info.java (original) +++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/mapred/package-info.java Tue Apr 15 14:39:20 2008 @@ -75,7 +75,15 @@ reducers so load is spread across the hbase cluster. </p> -<h2> Sample MR Bulk Uploader </h2> +<h2>Example Code</h2> +<h3>Sample Row Counter</h3> +<p>See [EMAIL PROTECTED] org.apache.hadoop.hbase.mapred.RowCounter}. You should be able to run +it by doing: <code>% ./bin/hadoop jar hbase-X.X.X.jar</code>. This will invoke +the hbase MapReduce Driver class. Select 'rowcounter' from the choice of jobs +offered. +</p> + +<h3> Sample MR Bulk Uploader </h3> <p>Read the class comment below for specification of inputs, prerequisites, etc. </p> <blockquote><pre>package org.apache.hadoop.hbase.mapred;