Author: tgraves Date: Tue Mar 26 17:42:29 2013 New Revision: 1461236 URL: http://svn.apache.org/r1461236 Log: MAPREDUCE-2722. [Gridmix] Gridmix simulated job's map's hdfsBytesRead counter is wrong when compressed input is used.(ravigummadi via tgraves)
Modified: hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/CompressionEmulationUtil.java hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/LoadJob.java hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestCompressionEmulationUtils.java Modified: hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/CompressionEmulationUtil.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/CompressionEmulationUtil.java?rev=1461236&r1=1461235&r2=1461236&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/CompressionEmulationUtil.java (original) +++ hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/CompressionEmulationUtil.java Tue Mar 26 17:42:29 2013 @@ -571,4 +571,25 @@ class CompressionEmulationUtil { } setInputCompressionEmulationEnabled(target, needsCompressedInput); } + + /** + * Get the uncompressed input bytes count from the given possibly compressed + * input bytes count. + * @param possiblyCompressedInputBytes input bytes count. This is compressed + * input size if compression emulation is on. + * @param conf configuration of the Gridmix simulated job + * @return uncompressed input bytes count. Compute this in case if compressed + * input was used + */ + static long getUncompressedInputBytes(long possiblyCompressedInputBytes, + Configuration conf) { + long uncompressedInputBytes = possiblyCompressedInputBytes; + + if (CompressionEmulationUtil.isInputCompressionEmulationEnabled(conf)) { + float inputCompressionRatio = + CompressionEmulationUtil.getMapInputCompressionEmulationRatio(conf); + uncompressedInputBytes /= inputCompressionRatio; + } + return uncompressedInputBytes; + } } Modified: hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/LoadJob.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/LoadJob.java?rev=1461236&r1=1461235&r2=1461236&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/LoadJob.java (original) +++ hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/LoadJob.java Tue Mar 26 17:42:29 2013 @@ -536,9 +536,14 @@ class LoadJob extends GridmixJob { } } final TaskInfo info = jobdesc.getTaskInfo(TaskType.MAP, i); + long possiblyCompressedInputBytes = info.getInputBytes(); + Configuration conf = job.getConfiguration(); + long uncompressedInputBytes = + CompressionEmulationUtil.getUncompressedInputBytes( + possiblyCompressedInputBytes, conf); splits.add( - new LoadSplit(striper.splitFor(inputDir, info.getInputBytes(), 3), - maps, i, info.getInputBytes(), info.getInputRecords(), + new LoadSplit(striper.splitFor(inputDir, uncompressedInputBytes, 3), + maps, i, uncompressedInputBytes, info.getInputRecords(), info.getOutputBytes(), info.getOutputRecords(), reduceByteRatio, reduceRecordRatio, specBytes, specRecords, info.getResourceUsageMetrics(), @@ -546,4 +551,4 @@ class LoadJob extends GridmixJob { } pushDescription(id(), splits); } -} \ No newline at end of file +} Modified: hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestCompressionEmulationUtils.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestCompressionEmulationUtils.java?rev=1461236&r1=1461235&r2=1461236&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestCompressionEmulationUtils.java (original) +++ hadoop/common/branches/branch-2/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestCompressionEmulationUtils.java Tue Mar 26 17:42:29 2013 @@ -19,7 +19,6 @@ package org.apache.hadoop.mapred.gridmix import java.io.BufferedReader; import java.io.BufferedWriter; -import java.io.DataInput; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; @@ -31,13 +30,11 @@ import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.GzipCodec; -import org.apache.hadoop.mapred.ClusterStatus; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Utils; @@ -561,4 +558,30 @@ public class TestCompressionEmulationUti String readLine = new String(bytes); assertEquals("Compression/Decompression error", inputLine, readLine); } + + /** + * Tests the computation logic of uncompressed input bytes by + * {@link LoadJob#getUncompressedInputBytes(long, Configuration)} + */ + @Test + public void testComputeUncompressedInputBytes() { + long possiblyCompressedInputBytes = 100000; + float compressionRatio = 0.45F; + Configuration conf = new Configuration(); + CompressionEmulationUtil.setMapInputCompressionEmulationRatio(conf, + compressionRatio); + + // By default, input compression emulation is diabled. Verify the + // computation of uncompressed input bytes. + long result = CompressionEmulationUtil.getUncompressedInputBytes( + possiblyCompressedInputBytes, conf); + assertEquals(possiblyCompressedInputBytes, result); + + // Enable input compression emulation and verify uncompressed + // input bytes computation logic + CompressionEmulationUtil.setInputCompressionEmulationEnabled(conf, true); + result = CompressionEmulationUtil.getUncompressedInputBytes( + possiblyCompressedInputBytes, conf); + assertEquals((long)(possiblyCompressedInputBytes/compressionRatio), result); + } }