Author: cutting Date: Tue Jul 19 11:21:59 2005 New Revision: 219745 URL: http://svn.apache.org/viewcvs?rev=219745&view=rev Log: Sort splits to minimize tail when mapping.
Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java?rev=219745&r1=219744&r2=219745&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java (original) +++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java Tue Jul 19 11:21:59 2005 @@ -588,6 +588,15 @@ FileSplit[] splits = jd.getInputFormat().getSplits(fs, jd, numMapTasks); + // sort splits by decreasing length, to reduce job's tail + Arrays.sort(splits, new Comparator() { + public int compare(Object a, Object b) { + long diff = + ((FileSplit)b).getLength() - ((FileSplit)a).getLength(); + return diff==0 ? 0 : (diff > 0 ? 1 : -1); + } + }); + // adjust number of map tasks to actual number of splits numMapTasks = splits.length;