Author: sandy Date: Fri Aug 23 21:28:05 2013 New Revision: 1517054 URL: http://svn.apache.org/r1517054 Log: MAPREDUCE-5478. TeraInputFormat unnecessarily defines its own FileSplit subclass (Sandy Ryza)
Modified: hadoop/common/branches/branch-2.1-beta/hadoop-mapreduce-project/CHANGES.txt hadoop/common/branches/branch-2.1-beta/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraInputFormat.java hadoop/common/branches/branch-2.1-beta/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java Modified: hadoop/common/branches/branch-2.1-beta/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.1-beta/hadoop-mapreduce-project/CHANGES.txt?rev=1517054&r1=1517053&r2=1517054&view=diff ============================================================================== --- hadoop/common/branches/branch-2.1-beta/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-2.1-beta/hadoop-mapreduce-project/CHANGES.txt Fri Aug 23 21:28:05 2013 @@ -8,6 +8,9 @@ Release 2.1.1-beta - UNRELEASED IMPROVEMENTS + MAPREDUCE-5478. TeraInputFormat unnecessarily defines its own FileSplit + subclass (Sandy Ryza) + OPTIMIZATIONS MAPREDUCE-5446. TestJobHistoryEvents and TestJobHistoryParsing have race Modified: hadoop/common/branches/branch-2.1-beta/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraInputFormat.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.1-beta/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraInputFormat.java?rev=1517054&r1=1517053&r2=1517054&view=diff ============================================================================== --- hadoop/common/branches/branch-2.1-beta/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraInputFormat.java (original) +++ hadoop/common/branches/branch-2.1-beta/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraInputFormat.java Fri Aug 23 21:28:05 2013 @@ -60,48 +60,6 @@ public class TeraInputFormat extends Fil private static MRJobConfig lastContext = null; private static List<InputSplit> lastResult = null; - static class TeraFileSplit extends FileSplit { - static private String[] ZERO_LOCATIONS = new String[0]; - - private String[] locations; - - public TeraFileSplit() { - locations = ZERO_LOCATIONS; - } - public TeraFileSplit(Path file, long start, long length, String[] hosts) { - super(file, start, length, hosts); - try { - locations = super.getLocations(); - } catch (IOException e) { - locations = ZERO_LOCATIONS; - } - } - - // XXXXXX should this also be null-protected? - protected void setLocations(String[] hosts) { - locations = hosts; - } - - @Override - public String[] getLocations() { - return locations; - } - - public String toString() { - StringBuffer result = new StringBuffer(); - result.append(getPath()); - result.append(" from "); - result.append(getStart()); - result.append(" length "); - result.append(getLength()); - for(String host: getLocations()) { - result.append(" "); - result.append(host); - } - return result.toString(); - } - } - static class TextSampler implements IndexedSortable { private ArrayList<Text> records = new ArrayList<Text>(); @@ -325,11 +283,6 @@ public class TeraInputFormat extends Fil return new TeraRecordReader(); } - protected FileSplit makeSplit(Path file, long start, long length, - String[] hosts) { - return new TeraFileSplit(file, start, length, hosts); - } - @Override public List<InputSplit> getSplits(JobContext job) throws IOException { if (job == lastContext) { @@ -343,7 +296,7 @@ public class TeraInputFormat extends Fil System.out.println("Spent " + (t2 - t1) + "ms computing base-splits."); if (job.getConfiguration().getBoolean(TeraScheduler.USE, true)) { TeraScheduler scheduler = new TeraScheduler( - lastResult.toArray(new TeraFileSplit[0]), job.getConfiguration()); + lastResult.toArray(new FileSplit[0]), job.getConfiguration()); lastResult = scheduler.getNewFileSplits(); t3 = System.currentTimeMillis(); System.out.println("Spent " + (t3 - t2) + "ms computing TeraScheduler splits."); Modified: hadoop/common/branches/branch-2.1-beta/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.1-beta/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java?rev=1517054&r1=1517053&r2=1517054&view=diff ============================================================================== --- hadoop/common/branches/branch-2.1-beta/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java (original) +++ hadoop/common/branches/branch-2.1-beta/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java Fri Aug 23 21:28:05 2013 @@ -24,7 +24,6 @@ import java.util.*; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.examples.terasort.TeraInputFormat.TeraFileSplit; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.hadoop.mapreduce.server.tasktracker.TTConfig; @@ -214,8 +213,9 @@ class TeraScheduler { for(int i=0; i < splits.length; ++i) { if (splits[i].isAssigned) { // copy the split and fix up the locations - ((TeraFileSplit) realSplits[i]).setLocations - (new String[]{splits[i].locations.get(0).hostname}); + String[] newLocations = {splits[i].locations.get(0).hostname}; + realSplits[i] = new FileSplit(realSplits[i].getPath(), + realSplits[i].getStart(), realSplits[i].getLength(), newLocations); result[left++] = realSplits[i]; } else { result[right--] = realSplits[i];