CRUNCH-4: Create profiles for hadoop 1.0.3 and hadoop 2.0.0-alpha
Project: http://git-wip-us.apache.org/repos/asf/incubator-crunch/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-crunch/commit/629f5734 Tree: http://git-wip-us.apache.org/repos/asf/incubator-crunch/tree/629f5734 Diff: http://git-wip-us.apache.org/repos/asf/incubator-crunch/diff/629f5734 Branch: refs/heads/master Commit: 629f57342bb8fb41b1484de7869c90509fdbfb9d Parents: 38d7b3a Author: jwills <[email protected]> Authored: Fri Jul 27 11:14:11 2012 -0700 Committer: jwills <[email protected]> Committed: Sun Aug 12 17:24:29 2012 -0700 ---------------------------------------------------------------------- .../java/org/apache/crunch/WordCountHBaseIT.java | 13 +- .../it/java/org/apache/crunch/lib/AggregateIT.java | 4 +- crunch/src/it/resources/log4j.properties | 2 + .../org/apache/crunch/lib/join/MapsideJoin.java | 2 +- pom.xml | 213 ++++++++++----- 5 files changed, 162 insertions(+), 72 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/629f5734/crunch/src/it/java/org/apache/crunch/WordCountHBaseIT.java ---------------------------------------------------------------------- diff --git a/crunch/src/it/java/org/apache/crunch/WordCountHBaseIT.java b/crunch/src/it/java/org/apache/crunch/WordCountHBaseIT.java index f7c9836..b7531b9 100644 --- a/crunch/src/it/java/org/apache/crunch/WordCountHBaseIT.java +++ b/crunch/src/it/java/org/apache/crunch/WordCountHBaseIT.java @@ -98,6 +98,7 @@ public class WordCountHBaseIT { conf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/1"); conf.setInt("hbase.master.info.port", -1); conf.setInt("hbase.regionserver.info.port", -1); + conf.set("dfs.datanode.data.dir.perm", "775"); hbaseTestUtil.startMiniZKCluster(); hbaseTestUtil.startMiniCluster(); @@ -116,16 +117,16 @@ public class WordCountHBaseIT { } // Create a programmatic container for this jar. - JarOutputStream jos = new JarOutputStream(new FileOutputStream("WordCountHBaseTest.jar")); + JarOutputStream jos = new JarOutputStream(new FileOutputStream("WordCountHBaseIT.jar")); File baseDir = new File("target/test-classes"); String prefix = "org/apache/crunch/"; - jarUp(jos, baseDir, prefix + "WordCountHBaseTest.class"); - jarUp(jos, baseDir, prefix + "WordCountHBaseTest$1.class"); - jarUp(jos, baseDir, prefix + "WordCountHBaseTest$2.class"); + jarUp(jos, baseDir, prefix + "WordCountHBaseIT.class"); + jarUp(jos, baseDir, prefix + "WordCountHBaseIT$1.class"); + jarUp(jos, baseDir, prefix + "WordCountHBaseIT$2.class"); jos.close(); - Path target = new Path(tmpPath, "WordCountHBaseTest.jar"); - fs.copyFromLocalFile(true, new Path("WordCountHBaseTest.jar"), target); + Path target = new Path(tmpPath, "WordCountHBaseIT.jar"); + fs.copyFromLocalFile(true, new Path("WordCountHBaseIT.jar"), target); DistributedCache.addFileToClassPath(target, conf, fs); } } http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/629f5734/crunch/src/it/java/org/apache/crunch/lib/AggregateIT.java ---------------------------------------------------------------------- diff --git a/crunch/src/it/java/org/apache/crunch/lib/AggregateIT.java b/crunch/src/it/java/org/apache/crunch/lib/AggregateIT.java index 0202d09..4f47d8f 100644 --- a/crunch/src/it/java/org/apache/crunch/lib/AggregateIT.java +++ b/crunch/src/it/java/org/apache/crunch/lib/AggregateIT.java @@ -148,7 +148,7 @@ public class AggregateIT { assertEquals(1, collectionMap.size()); - assertEquals(Lists.newArrayList(new Text("c"), new Text("d"), new Text("a")), collectionMap.get(1)); + assertTrue(collectionMap.get(1).containsAll(Lists.newArrayList(new Text("c"), new Text("d"), new Text("a")))); } @Test @@ -166,7 +166,7 @@ public class AggregateIT { Employee empD = mapFn.map("d").second(); Employee empA = mapFn.map("a").second(); - assertEquals(Lists.newArrayList(empC, empD, empA), collectionMap.get(1)); + assertTrue(collectionMap.get(1).containsAll(Lists.newArrayList(empC, empD, empA))); } private static class MapStringToTextPair extends MapFn<String, Pair<Integer, Text>> { http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/629f5734/crunch/src/it/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/crunch/src/it/resources/log4j.properties b/crunch/src/it/resources/log4j.properties index a04cec8..5d144a0 100644 --- a/crunch/src/it/resources/log4j.properties +++ b/crunch/src/it/resources/log4j.properties @@ -19,6 +19,8 @@ log4j.logger.org.apache.crunch=info, A # Log warnings on Hadoop for the local runner when testing log4j.logger.org.apache.hadoop=warn, A +# Except for Configuration, which is chatty. +log4j.logger.org.apache.hadoop.conf.Configuration=error, A # ***** A is set to be a ConsoleAppender. log4j.appender.A=org.apache.log4j.ConsoleAppender http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/629f5734/crunch/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java ---------------------------------------------------------------------- diff --git a/crunch/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java b/crunch/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java index 226ad90..0ca1ab3 100644 --- a/crunch/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java +++ b/crunch/src/main/java/org/apache/crunch/lib/join/MapsideJoin.java @@ -88,7 +88,7 @@ public class MapsideJoin { Path path = sourcePathTarget.getPath(); DistributedCache.addCacheFile(path.toUri(), pipeline.getConfiguration()); - MapsideJoinDoFn<K, U, V> mapJoinDoFn = new MapsideJoinDoFn<K, U, V>(path.toString(), right.getPType()); + MapsideJoinDoFn<K, U, V> mapJoinDoFn = new MapsideJoinDoFn<K, U, V>(path.getName(), right.getPType()); PTypeFamily typeFamily = left.getTypeFamily(); return left.parallelDo("mapjoin", mapJoinDoFn, typeFamily.tableOf(left.getKeyType(), typeFamily.pairs(left.getValueType(), right.getValueType()))); http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/629f5734/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 9afd875..c0a6526 100644 --- a/pom.xml +++ b/pom.xml @@ -46,8 +46,6 @@ under the License. <java.target.version>1.6</java.target.version> <scala.version>2.9.2</scala.version> <avro.version>1.7.0</avro.version> - <hadoop.version>0.20.2-cdh3u4</hadoop.version> - <hbase.version>0.90.6-cdh3u4</hbase.version> </properties> <scm> @@ -120,12 +118,6 @@ under the License. </dependency> <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <version>${hadoop.version}</version> - </dependency> - - <dependency> <groupId>org.apache.avro</groupId> <artifactId>avro</artifactId> <version>${avro.version}</version> @@ -180,12 +172,6 @@ under the License. </dependency> <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-minicluster</artifactId> - <version>${hadoop.version}</version> - </dependency> - - <dependency> <groupId>org.hamcrest</groupId> <artifactId>hamcrest-all</artifactId> <version>1.1</version> @@ -228,55 +214,6 @@ under the License. </dependency> <dependency> - <groupId>org.apache.hbase</groupId> - <artifactId>hbase</artifactId> - <version>${hbase.version}</version> - <exclusions> - <exclusion> - <groupId>org.apache.hadoop</groupId> - <artifactId>avro</artifactId> - </exclusion> - <exclusion> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-core</artifactId> - </exclusion> - <exclusion> - <groupId>commons-logging</groupId> - <artifactId>commons-logging</artifactId> - </exclusion> - <exclusion> - <groupId>com.google.guava</groupId> - <artifactId>guava</artifactId> - </exclusion> - <exclusion> - <groupId>log4j</groupId> - <artifactId>log4j</artifactId> - </exclusion> - <exclusion> - <groupId>org.slf4j</groupId> - <artifactId>slf4j-log4j12</artifactId> - </exclusion> - </exclusions> - </dependency> - - <dependency> - <groupId>org.apache.hbase</groupId> - <artifactId>hbase</artifactId> - <version>${hbase.version}</version> - <type>test-jar</type> - <exclusions> - <exclusion> - <groupId>org.apache.hadoop</groupId> - <artifactId>avro</artifactId> - </exclusion> - <exclusion> - <groupId>org.slf4j</groupId> - <artifactId>slf4j-log4j12</artifactId> - </exclusion> - </exclusions> - </dependency> - - <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-library</artifactId> <version>${scala.version}</version> @@ -320,6 +257,156 @@ under the License. </site> </distributionManagement> + <profiles> + <profile> + <id>hadoop-1</id> + <activation> + <property> + <name>!crunch.platform</name> + </property> + </activation> + <properties> + <hadoop.version>1.0.3</hadoop.version> + <hbase.version>0.90.4</hbase.version> + </properties> + <dependencyManagement> + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client</artifactId> + <version>${hadoop.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-minicluster</artifactId> + <version>${hadoop.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase</artifactId> + <version>${hbase.version}</version> + <exclusions> + <exclusion> + <groupId>org.apache.hadoop</groupId> + <artifactId>avro</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-core</artifactId> + </exclusion> + <exclusion> + <groupId>commons-logging</groupId> + <artifactId>commons-logging</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + </exclusion> + <exclusion> + <groupId>log4j</groupId> + <artifactId>log4j</artifactId> + </exclusion> + <exclusion> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-log4j12</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase</artifactId> + <version>${hbase.version}</version> + <type>test-jar</type> + <exclusions> + <exclusion> + <groupId>org.apache.hadoop</groupId> + <artifactId>avro</artifactId> + </exclusion> + <exclusion> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-log4j12</artifactId> + </exclusion> + </exclusions> + </dependency> + </dependencies> + </dependencyManagement> + </profile> + <profile> + <id>hadoop-2</id> + <activation> + <property> + <name>crunch.platform</name> + <value>2</value> + </property> + </activation> + <properties> + <hadoop.version>2.0.0-alpha</hadoop.version> + <hbase.version>0.92.1-cdh4.0.0</hbase.version> + </properties> + <dependencyManagement> + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client</artifactId> + <version>${hadoop.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-minicluster</artifactId> + <version>${hadoop.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase</artifactId> + <version>${hbase.version}</version> + <exclusions> + <exclusion> + <groupId>org.apache.hadoop</groupId> + <artifactId>avro</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-core</artifactId> + </exclusion> + <exclusion> + <groupId>commons-logging</groupId> + <artifactId>commons-logging</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + </exclusion> + <exclusion> + <groupId>log4j</groupId> + <artifactId>log4j</artifactId> + </exclusion> + <exclusion> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-log4j12</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase</artifactId> + <version>${hbase.version}</version> + <type>test-jar</type> + <exclusions> + <exclusion> + <groupId>org.apache.hadoop</groupId> + <artifactId>avro</artifactId> + </exclusion> + <exclusion> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-log4j12</artifactId> + </exclusion> + </exclusions> + </dependency> + </dependencies> + </dependencyManagement> + </profile> + </profiles> + <build> <plugins> <plugin>
