Author: umamahesh Date: Mon May 12 12:43:59 2014 New Revision: 1593948 URL: http://svn.apache.org/r1593948 Log: Merge from trunk to HDFS-2006 branch
Added: hadoop/common/branches/HDFS-2006/hadoop-assemblies/src/main/resources/assemblies/hadoop-kms-dist.xml - copied unchanged from r1593927, hadoop/common/trunk/hadoop-assemblies/src/main/resources/assemblies/hadoop-kms-dist.xml Modified: hadoop/common/branches/HDFS-2006/ (props changed) hadoop/common/branches/HDFS-2006/.gitignore hadoop/common/branches/HDFS-2006/hadoop-client/pom.xml hadoop/common/branches/HDFS-2006/hadoop-dist/pom.xml hadoop/common/branches/HDFS-2006/hadoop-project/pom.xml hadoop/common/branches/HDFS-2006/hadoop-project/src/site/site.xml hadoop/common/branches/HDFS-2006/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java hadoop/common/branches/HDFS-2006/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputFormat.java hadoop/common/branches/HDFS-2006/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/lib/TestDynamicInputFormat.java Propchange: hadoop/common/branches/HDFS-2006/ ------------------------------------------------------------------------------ Merged /hadoop/common/trunk:r1588992-1593927 Modified: hadoop/common/branches/HDFS-2006/.gitignore URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2006/.gitignore?rev=1593948&r1=1593947&r2=1593948&view=diff ============================================================================== --- hadoop/common/branches/HDFS-2006/.gitignore (original) +++ hadoop/common/branches/HDFS-2006/.gitignore Mon May 12 12:43:59 2014 @@ -7,5 +7,6 @@ .project .settings target +hadoop-common-project/hadoop-kms/downloads/ hadoop-hdfs-project/hadoop-hdfs/downloads hadoop-hdfs-project/hadoop-hdfs-httpfs/downloads Modified: hadoop/common/branches/HDFS-2006/hadoop-client/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2006/hadoop-client/pom.xml?rev=1593948&r1=1593947&r2=1593948&view=diff ============================================================================== --- hadoop/common/branches/HDFS-2006/hadoop-client/pom.xml (original) +++ hadoop/common/branches/HDFS-2006/hadoop-client/pom.xml Mon May 12 12:43:59 2014 @@ -40,22 +40,10 @@ <scope>compile</scope> <exclusions> <exclusion> - <groupId>tomcat</groupId> - <artifactId>jasper-compiler</artifactId> - </exclusion> - <exclusion> - <groupId>tomcat</groupId> - <artifactId>jasper-runtime</artifactId> - </exclusion> - <exclusion> <groupId>javax.servlet</groupId> <artifactId>servlet-api</artifactId> </exclusion> <exclusion> - <groupId>javax.servlet.jsp</groupId> - <artifactId>jsp-api</artifactId> - </exclusion> - <exclusion> <groupId>commons-logging</groupId> <artifactId>commons-logging-api</artifactId> </exclusion> @@ -73,10 +61,6 @@ </exclusion> <exclusion> <groupId>org.mortbay.jetty</groupId> - <artifactId>jsp-api-2.1</artifactId> - </exclusion> - <exclusion> - <groupId>org.mortbay.jetty</groupId> <artifactId>servlet-api-2.5</artifactId> </exclusion> <exclusion> @@ -111,10 +95,6 @@ <groupId>com.jcraft</groupId> <artifactId>jsch</artifactId> </exclusion> - <exclusion> - <groupId>commons-el</groupId> - <artifactId>commons-el</artifactId> - </exclusion> </exclusions> </dependency> @@ -147,14 +127,6 @@ <groupId>javax.servlet</groupId> <artifactId>servlet-api</artifactId> </exclusion> - <exclusion> - <groupId>javax.servlet.jsp</groupId> - <artifactId>jsp-api</artifactId> - </exclusion> - <exclusion> - <groupId>tomcat</groupId> - <artifactId>jasper-runtime</artifactId> - </exclusion> </exclusions> </dependency> Modified: hadoop/common/branches/HDFS-2006/hadoop-dist/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2006/hadoop-dist/pom.xml?rev=1593948&r1=1593947&r2=1593948&view=diff ============================================================================== --- hadoop/common/branches/HDFS-2006/hadoop-dist/pom.xml (original) +++ hadoop/common/branches/HDFS-2006/hadoop-dist/pom.xml Mon May 12 12:43:59 2014 @@ -118,6 +118,7 @@ run cp -r $ROOT/hadoop-common-project/hadoop-nfs/target/hadoop-nfs-${project.version}/* . run cp -r $ROOT/hadoop-hdfs-project/hadoop-hdfs/target/hadoop-hdfs-${project.version}/* . run cp -r $ROOT/hadoop-hdfs-project/hadoop-hdfs-httpfs/target/hadoop-hdfs-httpfs-${project.version}/* . + run cp -r $ROOT/hadoop-common-project/hadoop-kms/target/hadoop-kms-${project.version}/* . run cp -r $ROOT/hadoop-hdfs-project/hadoop-hdfs-nfs/target/hadoop-hdfs-nfs-${project.version}/* . run cp -r $ROOT/hadoop-yarn-project/target/hadoop-yarn-project-${project.version}/* . run cp -r $ROOT/hadoop-mapreduce-project/target/hadoop-mapreduce-${project.version}/* . Modified: hadoop/common/branches/HDFS-2006/hadoop-project/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2006/hadoop-project/pom.xml?rev=1593948&r1=1593947&r2=1593948&view=diff ============================================================================== --- hadoop/common/branches/HDFS-2006/hadoop-project/pom.xml (original) +++ hadoop/common/branches/HDFS-2006/hadoop-project/pom.xml Mon May 12 12:43:59 2014 @@ -483,36 +483,6 @@ <version>6.1.26</version> </dependency> <dependency> - <groupId>tomcat</groupId> - <artifactId>jasper-compiler</artifactId> - <version>5.5.23</version> - <exclusions> - <exclusion> - <groupId>javax.servlet</groupId> - <artifactId>jsp-api</artifactId> - </exclusion> - <exclusion> - <groupId>ant</groupId> - <artifactId>ant</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>tomcat</groupId> - <artifactId>jasper-runtime</artifactId> - <version>5.5.23</version> - </dependency> - <dependency> - <groupId>javax.servlet.jsp</groupId> - <artifactId>jsp-api</artifactId> - <version>2.1</version> - </dependency> - <dependency> - <groupId>commons-el</groupId> - <artifactId>commons-el</artifactId> - <version>1.0</version> - </dependency> - <dependency> <groupId>commons-logging</groupId> <artifactId>commons-logging</artifactId> <version>1.1.3</version> @@ -619,6 +589,11 @@ <version>1.7.5</version> </dependency> <dependency> + <groupId>org.slf4j</groupId> + <artifactId>jul-to-slf4j</artifactId> + <version>1.7.5</version> + </dependency> + <dependency> <groupId>org.eclipse.jdt</groupId> <artifactId>core</artifactId> <version>3.1.1</version> @@ -745,7 +720,7 @@ <dependency> <groupId>com.codahale.metrics</groupId> <artifactId>metrics-core</artifactId> - <version>3.0.0</version> + <version>3.0.1</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> @@ -790,6 +765,7 @@ <artifactId>leveldbjni-all</artifactId> <version>1.8</version> </dependency> + </dependencies> </dependencyManagement> @@ -875,11 +851,6 @@ <version>${avro.version}</version> </plugin> <plugin> - <groupId>org.codehaus.mojo.jspc</groupId> - <artifactId>jspc-maven-plugin</artifactId> - <version>2.0-alpha-3</version> - </plugin> - <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-project-info-reports-plugin</artifactId> <version>2.4</version> Modified: hadoop/common/branches/HDFS-2006/hadoop-project/src/site/site.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2006/hadoop-project/src/site/site.xml?rev=1593948&r1=1593947&r2=1593948&view=diff ============================================================================== --- hadoop/common/branches/HDFS-2006/hadoop-project/src/site/site.xml (original) +++ hadoop/common/branches/HDFS-2006/hadoop-project/src/site/site.xml Mon May 12 12:43:59 2014 @@ -62,6 +62,7 @@ <item name="Secure Mode" href="hadoop-project-dist/hadoop-common/SecureMode.html"/> <item name="Service Level Authorization" href="hadoop-project-dist/hadoop-common/ServiceLevelAuth.html"/> <item name="HTTP Authentication" href="hadoop-project-dist/hadoop-common/HttpAuthentication.html"/> + <item name="Hadoop KMS" href="hadoop-kms/index.html"/> </menu> <menu name="HDFS" inherit="top"> @@ -88,10 +89,13 @@ </menu> <menu name="MapReduce" inherit="top"> + <item name="MapReduce Tutorial" href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html"/> <item name="Compatibilty between Hadoop 1.x and Hadoop 2.x" href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html"/> <item name="Encrypted Shuffle" href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/EncryptedShuffle.html"/> <item name="Pluggable Shuffle/Sort" href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/PluggableShuffleAndPluggableSort.html"/> <item name="Distributed Cache Deploy" href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/DistributedCacheDeploy.html"/> + <item name="Hadoop Streaming" href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/HadoopStreaming.html"/> + <item name="Hadoop Archives" href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/HadoopArchives.html"/> <item name="DistCp" href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/DistCp.html"/> </menu> @@ -100,6 +104,7 @@ <item name="Capacity Scheduler" href="hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html"/> <item name="Fair Scheduler" href="hadoop-yarn/hadoop-yarn-site/FairScheduler.html"/> <item name="ResourceManager Restart" href="hadoop-yarn/hadoop-yarn-site/ResourceManagerRestart.html"/> + <item name="ResourceManager HA" href="hadoop-yarn/hadoop-yarn-site/ResourceManagerHA.html"/> <item name="Web Application Proxy" href="hadoop-yarn/hadoop-yarn-site/WebApplicationProxy.html"/> <item name="YARN Timeline Server" href="hadoop-yarn/hadoop-yarn-site/TimelineServer.html"/> <item name="Writing YARN Applications" href="hadoop-yarn/hadoop-yarn-site/WritingYarnApplications.html"/> Modified: hadoop/common/branches/HDFS-2006/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2006/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java?rev=1593948&r1=1593947&r2=1593948&view=diff ============================================================================== --- hadoop/common/branches/HDFS-2006/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java (original) +++ hadoop/common/branches/HDFS-2006/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java Mon May 12 12:43:59 2014 @@ -51,7 +51,16 @@ public class DistCpConstants { public static final String CONF_LABEL_SKIP_CRC = "distcp.skip.crc"; public static final String CONF_LABEL_OVERWRITE = "distcp.copy.overwrite"; public static final String CONF_LABEL_BANDWIDTH_MB = "distcp.map.bandwidth.mb"; - + + public static final String CONF_LABEL_MAX_CHUNKS_TOLERABLE = + "distcp.dynamic.max.chunks.tolerable"; + public static final String CONF_LABEL_MAX_CHUNKS_IDEAL = + "distcp.dynamic.max.chunks.ideal"; + public static final String CONF_LABEL_MIN_RECORDS_PER_CHUNK = + "distcp.dynamic.min.records_per_chunk"; + public static final String CONF_LABEL_SPLIT_RATIO = + "distcp.dynamic.split.ratio"; + /* Total bytes to be copied. Updated by copylisting. Unfiltered count */ public static final String CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED = "mapred.total.bytes.expected"; @@ -107,4 +116,13 @@ public class DistCpConstants { public static final int INVALID_ARGUMENT = -1; public static final int DUPLICATE_INPUT = -2; public static final int UNKNOWN_ERROR = -999; + + /** + * Constants for DistCp default values of configurable values + */ + public static final int MAX_CHUNKS_TOLERABLE_DEFAULT = 400; + public static final int MAX_CHUNKS_IDEAL_DEFAULT = 100; + public static final int MIN_RECORDS_PER_CHUNK_DEFAULT = 5; + public static final int SPLIT_RATIO_DEFAULT = 2; + } Modified: hadoop/common/branches/HDFS-2006/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputFormat.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2006/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputFormat.java?rev=1593948&r1=1593947&r2=1593948&view=diff ============================================================================== --- hadoop/common/branches/HDFS-2006/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputFormat.java (original) +++ hadoop/common/branches/HDFS-2006/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputFormat.java Mon May 12 12:43:59 2014 @@ -57,7 +57,7 @@ public class DynamicInputFormat<K, V> ex = "mapred.num.splits"; private static final String CONF_LABEL_NUM_ENTRIES_PER_CHUNK = "mapred.num.entries.per.chunk"; - + /** * Implementation of InputFormat::getSplits(). This method splits up the * copy-listing file into chunks, and assigns the first batch to different @@ -91,7 +91,7 @@ public class DynamicInputFormat<K, V> ex // Setting non-zero length for FileSplit size, to avoid a possible // future when 0-sized file-splits are considered "empty" and skipped // over. - MIN_RECORDS_PER_CHUNK, + getMinRecordsPerChunk(jobContext.getConfiguration()), null)); } DistCpUtils.publish(jobContext.getConfiguration(), @@ -107,9 +107,11 @@ public class DynamicInputFormat<K, V> ex final Configuration configuration = context.getConfiguration(); int numRecords = getNumberOfRecords(configuration); int numMaps = getNumMapTasks(configuration); + int maxChunksTolerable = getMaxChunksTolerable(configuration); + // Number of chunks each map will process, on average. int splitRatio = getListingSplitRatio(configuration, numMaps, numRecords); - validateNumChunksUsing(splitRatio, numMaps); + validateNumChunksUsing(splitRatio, numMaps, maxChunksTolerable); int numEntriesPerChunk = (int)Math.ceil((float)numRecords /(splitRatio * numMaps)); @@ -168,9 +170,9 @@ public class DynamicInputFormat<K, V> ex return chunksFinal; } - private static void validateNumChunksUsing(int splitRatio, int numMaps) - throws IOException { - if (splitRatio * numMaps > MAX_CHUNKS_TOLERABLE) + private static void validateNumChunksUsing(int splitRatio, int numMaps, + int maxChunksTolerable) throws IOException { + if (splitRatio * numMaps > maxChunksTolerable) throw new IOException("Too many chunks created with splitRatio:" + splitRatio + ", numMaps:" + numMaps + ". Reduce numMaps or decrease split-ratio to proceed."); @@ -238,14 +240,61 @@ public class DynamicInputFormat<K, V> ex int numMaps, int numPaths) { return configuration.getInt( CONF_LABEL_LISTING_SPLIT_RATIO, - getSplitRatio(numMaps, numPaths)); + getSplitRatio(numMaps, numPaths, configuration)); + } + + private static int getMaxChunksTolerable(Configuration conf) { + int maxChunksTolerable = conf.getInt( + DistCpConstants.CONF_LABEL_MAX_CHUNKS_TOLERABLE, + DistCpConstants.MAX_CHUNKS_TOLERABLE_DEFAULT); + if (maxChunksTolerable <= 0) { + LOG.warn(DistCpConstants.CONF_LABEL_MAX_CHUNKS_TOLERABLE + + " should be positive. Fall back to default value: " + + DistCpConstants.MAX_CHUNKS_TOLERABLE_DEFAULT); + maxChunksTolerable = DistCpConstants.MAX_CHUNKS_TOLERABLE_DEFAULT; + } + return maxChunksTolerable; + } + + private static int getMaxChunksIdeal(Configuration conf) { + int maxChunksIdeal = conf.getInt( + DistCpConstants.CONF_LABEL_MAX_CHUNKS_IDEAL, + DistCpConstants.MAX_CHUNKS_IDEAL_DEFAULT); + if (maxChunksIdeal <= 0) { + LOG.warn(DistCpConstants.CONF_LABEL_MAX_CHUNKS_IDEAL + + " should be positive. Fall back to default value: " + + DistCpConstants.MAX_CHUNKS_IDEAL_DEFAULT); + maxChunksIdeal = DistCpConstants.MAX_CHUNKS_IDEAL_DEFAULT; + } + return maxChunksIdeal; + } + + private static int getMinRecordsPerChunk(Configuration conf) { + int minRecordsPerChunk = conf.getInt( + DistCpConstants.CONF_LABEL_MIN_RECORDS_PER_CHUNK, + DistCpConstants.MIN_RECORDS_PER_CHUNK_DEFAULT); + if (minRecordsPerChunk <= 0) { + LOG.warn(DistCpConstants.CONF_LABEL_MIN_RECORDS_PER_CHUNK + + " should be positive. Fall back to default value: " + + DistCpConstants.MIN_RECORDS_PER_CHUNK_DEFAULT); + minRecordsPerChunk = DistCpConstants.MIN_RECORDS_PER_CHUNK_DEFAULT; + } + return minRecordsPerChunk; } - private static final int MAX_CHUNKS_TOLERABLE = 400; - private static final int MAX_CHUNKS_IDEAL = 100; - private static final int MIN_RECORDS_PER_CHUNK = 5; - private static final int SPLIT_RATIO_DEFAULT = 2; - + private static int getSplitRatio(Configuration conf) { + int splitRatio = conf.getInt( + DistCpConstants.CONF_LABEL_SPLIT_RATIO, + DistCpConstants.SPLIT_RATIO_DEFAULT); + if (splitRatio <= 0) { + LOG.warn(DistCpConstants.CONF_LABEL_SPLIT_RATIO + + " should be positive. Fall back to default value: " + + DistCpConstants.SPLIT_RATIO_DEFAULT); + splitRatio = DistCpConstants.SPLIT_RATIO_DEFAULT; + } + return splitRatio; + } + /** * Package private, for testability. * @param nMaps The number of maps requested for. @@ -253,19 +302,34 @@ public class DynamicInputFormat<K, V> ex * @return The number of splits each map should handle, ideally. */ static int getSplitRatio(int nMaps, int nRecords) { + return getSplitRatio(nMaps, nRecords,new Configuration()); + } + + /** + * Package private, for testability. + * @param nMaps The number of maps requested for. + * @param nRecords The number of records to be copied. + * @param conf The configuration set by users. + * @return The number of splits each map should handle, ideally. + */ + static int getSplitRatio(int nMaps, int nRecords, Configuration conf) { + int maxChunksIdeal = getMaxChunksIdeal(conf); + int minRecordsPerChunk = getMinRecordsPerChunk(conf); + int splitRatio = getSplitRatio(conf); + if (nMaps == 1) { LOG.warn("nMaps == 1. Why use DynamicInputFormat?"); return 1; } - if (nMaps > MAX_CHUNKS_IDEAL) - return SPLIT_RATIO_DEFAULT; + if (nMaps > maxChunksIdeal) + return splitRatio; - int nPickups = (int)Math.ceil((float)MAX_CHUNKS_IDEAL/nMaps); + int nPickups = (int)Math.ceil((float)maxChunksIdeal/nMaps); int nRecordsPerChunk = (int)Math.ceil((float)nRecords/(nMaps*nPickups)); - return nRecordsPerChunk < MIN_RECORDS_PER_CHUNK ? - SPLIT_RATIO_DEFAULT : nPickups; + return nRecordsPerChunk < minRecordsPerChunk ? + splitRatio : nPickups; } static int getNumEntriesPerChunk(Configuration configuration) { Modified: hadoop/common/branches/HDFS-2006/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/lib/TestDynamicInputFormat.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2006/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/lib/TestDynamicInputFormat.java?rev=1593948&r1=1593947&r2=1593948&view=diff ============================================================================== --- hadoop/common/branches/HDFS-2006/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/lib/TestDynamicInputFormat.java (original) +++ hadoop/common/branches/HDFS-2006/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/lib/TestDynamicInputFormat.java Mon May 12 12:43:59 2014 @@ -18,6 +18,7 @@ package org.apache.hadoop.tools.mapred.lib; +import org.apache.hadoop.tools.DistCpConstants; import org.junit.Assert; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -160,5 +161,25 @@ public class TestDynamicInputFormat { Assert.assertEquals(2, DynamicInputFormat.getSplitRatio(11000000, 10)); Assert.assertEquals(4, DynamicInputFormat.getSplitRatio(30, 700)); Assert.assertEquals(2, DynamicInputFormat.getSplitRatio(30, 200)); + + // Tests with negative value configuration + Configuration conf = new Configuration(); + conf.setInt(DistCpConstants.CONF_LABEL_MAX_CHUNKS_TOLERABLE, -1); + conf.setInt(DistCpConstants.CONF_LABEL_MAX_CHUNKS_IDEAL, -1); + conf.setInt(DistCpConstants.CONF_LABEL_MIN_RECORDS_PER_CHUNK, -1); + conf.setInt(DistCpConstants.CONF_LABEL_SPLIT_RATIO, -1); + Assert.assertEquals(1, + DynamicInputFormat.getSplitRatio(1, 1000000000, conf)); + Assert.assertEquals(2, + DynamicInputFormat.getSplitRatio(11000000, 10, conf)); + Assert.assertEquals(4, DynamicInputFormat.getSplitRatio(30, 700, conf)); + Assert.assertEquals(2, DynamicInputFormat.getSplitRatio(30, 200, conf)); + + // Tests with valid configuration + conf.setInt(DistCpConstants.CONF_LABEL_MAX_CHUNKS_TOLERABLE, 100); + conf.setInt(DistCpConstants.CONF_LABEL_MAX_CHUNKS_IDEAL, 30); + conf.setInt(DistCpConstants.CONF_LABEL_MIN_RECORDS_PER_CHUNK, 10); + conf.setInt(DistCpConstants.CONF_LABEL_SPLIT_RATIO, 53); + Assert.assertEquals(53, DynamicInputFormat.getSplitRatio(3, 200, conf)); } }