Author: jbellis Date: Sun May 16 22:29:11 2010 New Revision: 944927 URL: http://svn.apache.org/viewvc?rev=944927&view=rev Log: add Collections.shuffle(splits) so Hadoop doesn't send all the jobs to the same nodes at once. patch by Joost Ouwerkerk; reviewed by jbellis for CASSANDRA-1096
Modified: cassandra/branches/cassandra-0.6/CHANGES.txt cassandra/branches/cassandra-0.6/src/java/org/apache/cassandra/hadoop/ColumnFamilyInputFormat.java Modified: cassandra/branches/cassandra-0.6/CHANGES.txt URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.6/CHANGES.txt?rev=944927&r1=944926&r2=944927&view=diff ============================================================================== --- cassandra/branches/cassandra-0.6/CHANGES.txt (original) +++ cassandra/branches/cassandra-0.6/CHANGES.txt Sun May 16 22:29:11 2010 @@ -16,6 +16,8 @@ * install json2sstable, sstable2json, and sstablekeys to Debian package * StreamingService.StreamDestinations wouldn't empty itself after streaming finished (CASSANDRA-1076) + * added Collections.shuffle(splits) before returning the splits in + ColumnFamilyInputFormat (CASSANDRA-1096) 0.6.1 Modified: cassandra/branches/cassandra-0.6/src/java/org/apache/cassandra/hadoop/ColumnFamilyInputFormat.java URL: http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.6/src/java/org/apache/cassandra/hadoop/ColumnFamilyInputFormat.java?rev=944927&r1=944926&r2=944927&view=diff ============================================================================== --- cassandra/branches/cassandra-0.6/src/java/org/apache/cassandra/hadoop/ColumnFamilyInputFormat.java (original) +++ cassandra/branches/cassandra-0.6/src/java/org/apache/cassandra/hadoop/ColumnFamilyInputFormat.java Sun May 16 22:29:11 2010 @@ -119,7 +119,7 @@ public class ColumnFamilyInputFormat ext } assert splits.size() > 0; - + Collections.shuffle(splits, new Random(System.nanoTime())); return splits; }