Author: cutting
Date: Tue Jul 19 11:21:59 2005
New Revision: 219745

URL: http://svn.apache.org/viewcvs?rev=219745&view=rev
Log:
Sort splits to minimize tail when mapping.

Modified:
    
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java

Modified: 
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java?rev=219745&r1=219744&r2=219745&view=diff
==============================================================================
--- 
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java 
(original)
+++ 
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobTracker.java 
Tue Jul 19 11:21:59 2005
@@ -588,6 +588,15 @@
             FileSplit[] splits =
               jd.getInputFormat().getSplits(fs, jd, numMapTasks);
 
+            // sort splits by decreasing length, to reduce job's tail
+            Arrays.sort(splits, new Comparator() {
+                public int compare(Object a, Object b) {
+                  long diff =
+                    ((FileSplit)b).getLength() - ((FileSplit)a).getLength();
+                  return diff==0 ? 0 : (diff > 0 ? 1 : -1);
+                }
+              });
+
             // adjust number of map tasks to actual number of splits
             numMapTasks = splits.length;
 


Reply via email to