Author: shv Date: Fri Sep 30 19:21:11 2011 New Revision: 1177787 URL: http://svn.apache.org/viewvc?rev=1177787&view=rev Log: MAPREDUCE-2779. JobSplitWriter.java can't handle large job.split file. Contributed by Ming Ma.
Modified: hadoop/common/branches/branch-0.22/mapreduce/CHANGES.txt hadoop/common/branches/branch-0.22/mapreduce/src/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java Modified: hadoop/common/branches/branch-0.22/mapreduce/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.22/mapreduce/CHANGES.txt?rev=1177787&r1=1177786&r2=1177787&view=diff ============================================================================== --- hadoop/common/branches/branch-0.22/mapreduce/CHANGES.txt (original) +++ hadoop/common/branches/branch-0.22/mapreduce/CHANGES.txt Fri Sep 30 19:21:11 2011 @@ -613,6 +613,9 @@ Release 0.22.0 - Unreleased MAPREDUCE-3026. Fix NPE in mapred queue -list with hierarchical queues. (Mayank Bansal via shv) + MAPREDUCE-2779. JobSplitWriter.java can't handle large job.split file. + (Ming Ma via shv) + Release 0.21.1 - Unreleased NEW FEATURES Modified: hadoop/common/branches/branch-0.22/mapreduce/src/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.22/mapreduce/src/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java?rev=1177787&r1=1177786&r2=1177787&view=diff ============================================================================== --- hadoop/common/branches/branch-0.22/mapreduce/src/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java (original) +++ hadoop/common/branches/branch-0.22/mapreduce/src/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java Fri Sep 30 19:21:11 2011 @@ -114,15 +114,15 @@ public class JobSplitWriter { if (array.length != 0) { SerializationFactory factory = new SerializationFactory(conf); int i = 0; - long offset = out.size(); + long offset = out.getPos(); for(T split: array) { - int prevCount = out.size(); + long prevCount = out.getPos(); Text.writeString(out, split.getClass().getName()); Serializer<T> serializer = factory.getSerializer((Class<T>) split.getClass()); serializer.open(out); serializer.serialize(split); - int currCount = out.size(); + long currCount = out.getPos(); info[i++] = new JobSplit.SplitMetaInfo( split.getLocations(), offset, @@ -139,12 +139,12 @@ public class JobSplitWriter { SplitMetaInfo[] info = new SplitMetaInfo[splits.length]; if (splits.length != 0) { int i = 0; - long offset = out.size(); + long offset = out.getPos(); for(org.apache.hadoop.mapred.InputSplit split: splits) { - int prevLen = out.size(); + long prevLen = out.getPos(); Text.writeString(out, split.getClass().getName()); split.write(out); - int currLen = out.size(); + long currLen = out.getPos(); info[i++] = new JobSplit.SplitMetaInfo( split.getLocations(), offset, split.getLength());