HDFS-316. Balancer should run for a configurable # of iterations (Xiaoyu Yao via aw)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/b94c1117 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/b94c1117 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/b94c1117 Branch: refs/heads/YARN-2928 Commit: b94c1117a28e996adee68fe0e181eb6f536289f4 Parents: b015fec Author: Allen Wittenauer <a...@apache.org> Authored: Wed Feb 11 08:10:34 2015 -0800 Committer: Allen Wittenauer <a...@apache.org> Committed: Wed Feb 11 08:10:34 2015 -0800 ---------------------------------------------------------------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../hadoop/hdfs/server/balancer/Balancer.java | 28 +++++++++++++++----- .../hdfs/server/balancer/NameNodeConnector.java | 26 ++++++++++++------ .../apache/hadoop/hdfs/server/mover/Mover.java | 3 ++- .../src/site/apt/HDFSCommands.apt.vm | 5 +++- .../hdfs/server/balancer/TestBalancer.java | 6 ++++- .../hadoop/hdfs/server/mover/TestMover.java | 8 +++++- 7 files changed, 61 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/b94c1117/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 1d2a76a..ed5db9b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -139,6 +139,9 @@ Trunk (Unreleased) HDFS-7546. Document, and set an accepting default for dfs.namenode.kerberos.principal.pattern (Harsh J via aw) + HDFS-316. Balancer should run for a configurable # of iterations (Xiaoyu + Yao via aw) + OPTIMIZATIONS BUG FIXES http://git-wip-us.apache.org/repos/asf/hadoop/blob/b94c1117/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java index dba1e2d..5b87cb5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java @@ -74,6 +74,10 @@ import com.google.common.base.Preconditions; * start the balancer with a default threshold of 10% * bin/ start-balancer.sh -threshold 5 * start the balancer with a threshold of 5% + * bin/ start-balancer.sh -idleiterations 20 + * start the balancer with maximum 20 consecutive idle iterations + * bin/ start-balancer.sh -idleiterations -1 + * run the balancer with default threshold infinitely * To stop: * bin/ stop-balancer.sh * </pre> @@ -136,7 +140,7 @@ import com.google.common.base.Preconditions; * <ol> * <li>The cluster is balanced; * <li>No block can be moved; - * <li>No block has been moved for five consecutive iterations; + * <li>No block has been moved for specified consecutive iterations (5 by default); * <li>An IOException occurs while communicating with the namenode; * <li>Another balancer is running. * </ol> @@ -147,7 +151,7 @@ import com.google.common.base.Preconditions; * <ol> * <li>The cluster is balanced. Exiting * <li>No block can be moved. Exiting... - * <li>No block has been moved for 5 iterations. Exiting... + * <li>No block has been moved for specified iterations (5 by default). Exiting... * <li>Received an IO exception: failure reason. Exiting... * <li>Another balancer is running. Exiting... * </ol> @@ -175,7 +179,9 @@ public class Balancer { + "\n\t[-exclude [-f <hosts-file> | comma-sperated list of hosts]]" + "\tExcludes the specified datanodes." + "\n\t[-include [-f <hosts-file> | comma-sperated list of hosts]]" - + "\tIncludes only the specified datanodes."; + + "\tIncludes only the specified datanodes." + + "\n\t[-idleiterations <idleiterations>]" + + "\tNumber of consecutive idle iterations (-1 for Infinite) before exit."; private final Dispatcher dispatcher; private final BalancingPolicy policy; @@ -572,7 +578,7 @@ public class Balancer { List<NameNodeConnector> connectors = Collections.emptyList(); try { connectors = NameNodeConnector.newNameNodeConnectors(namenodes, - Balancer.class.getSimpleName(), BALANCER_ID_PATH, conf); + Balancer.class.getSimpleName(), BALANCER_ID_PATH, conf, p.maxIdleIteration); boolean done = false; for(int iteration = 0; !done; iteration++) { @@ -628,19 +634,22 @@ public class Balancer { static class Parameters { static final Parameters DEFAULT = new Parameters( BalancingPolicy.Node.INSTANCE, 10.0, + NameNodeConnector.DEFAULT_MAX_IDLE_ITERATIONS, Collections.<String> emptySet(), Collections.<String> emptySet()); final BalancingPolicy policy; final double threshold; + final int maxIdleIteration; // exclude the nodes in this set from balancing operations Set<String> nodesToBeExcluded; //include only these nodes in balancing operations Set<String> nodesToBeIncluded; - Parameters(BalancingPolicy policy, double threshold, + Parameters(BalancingPolicy policy, double threshold, int maxIdleIteration, Set<String> nodesToBeExcluded, Set<String> nodesToBeIncluded) { this.policy = policy; this.threshold = threshold; + this.maxIdleIteration = maxIdleIteration; this.nodesToBeExcluded = nodesToBeExcluded; this.nodesToBeIncluded = nodesToBeIncluded; } @@ -649,6 +658,7 @@ public class Balancer { public String toString() { return Balancer.class.getSimpleName() + "." + getClass().getSimpleName() + "[" + policy + ", threshold=" + threshold + + ", max idle iteration = " + maxIdleIteration + ", number of nodes to be excluded = "+ nodesToBeExcluded.size() + ", number of nodes to be included = "+ nodesToBeIncluded.size() +"]"; } @@ -687,6 +697,7 @@ public class Balancer { static Parameters parse(String[] args) { BalancingPolicy policy = Parameters.DEFAULT.policy; double threshold = Parameters.DEFAULT.threshold; + int maxIdleIteration = Parameters.DEFAULT.maxIdleIteration; Set<String> nodesTobeExcluded = Parameters.DEFAULT.nodesToBeExcluded; Set<String> nodesTobeIncluded = Parameters.DEFAULT.nodesToBeIncluded; @@ -742,6 +753,11 @@ public class Balancer { } else { nodesTobeIncluded = Util.parseHostList(args[i]); } + } else if ("-idleiterations".equalsIgnoreCase(args[i])) { + checkArgument(++i < args.length, + "idleiterations value is missing: args = " + Arrays.toString(args)); + maxIdleIteration = Integer.parseInt(args[i]); + LOG.info("Using a idleiterations of " + maxIdleIteration); } else { throw new IllegalArgumentException("args = " + Arrays.toString(args)); @@ -755,7 +771,7 @@ public class Balancer { } } - return new Parameters(policy, threshold, nodesTobeExcluded, nodesTobeIncluded); + return new Parameters(policy, threshold, maxIdleIteration, nodesTobeExcluded, nodesTobeIncluded); } private static void printUsage(PrintStream out) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/b94c1117/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java index e01d57d..cf5f36f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java @@ -60,18 +60,18 @@ import com.google.common.annotations.VisibleForTesting; public class NameNodeConnector implements Closeable { private static final Log LOG = LogFactory.getLog(NameNodeConnector.class); - private static final int MAX_NOT_CHANGED_ITERATIONS = 5; + public static final int DEFAULT_MAX_IDLE_ITERATIONS = 5; private static boolean write2IdFile = true; /** Create {@link NameNodeConnector} for the given namenodes. */ public static List<NameNodeConnector> newNameNodeConnectors( - Collection<URI> namenodes, String name, Path idPath, Configuration conf) - throws IOException { + Collection<URI> namenodes, String name, Path idPath, Configuration conf, + int maxIdleIterations) throws IOException { final List<NameNodeConnector> connectors = new ArrayList<NameNodeConnector>( namenodes.size()); for (URI uri : namenodes) { NameNodeConnector nnc = new NameNodeConnector(name, uri, idPath, - null, conf); + null, conf, maxIdleIterations); nnc.getKeyManager().startBlockKeyUpdater(); connectors.add(nnc); } @@ -80,12 +80,12 @@ public class NameNodeConnector implements Closeable { public static List<NameNodeConnector> newNameNodeConnectors( Map<URI, List<Path>> namenodes, String name, Path idPath, - Configuration conf) throws IOException { + Configuration conf, int maxIdleIterations) throws IOException { final List<NameNodeConnector> connectors = new ArrayList<NameNodeConnector>( namenodes.size()); for (Map.Entry<URI, List<Path>> entry : namenodes.entrySet()) { NameNodeConnector nnc = new NameNodeConnector(name, entry.getKey(), - idPath, entry.getValue(), conf); + idPath, entry.getValue(), conf, maxIdleIterations); nnc.getKeyManager().startBlockKeyUpdater(); connectors.add(nnc); } @@ -111,15 +111,18 @@ public class NameNodeConnector implements Closeable { private final List<Path> targetPaths; private final AtomicLong bytesMoved = new AtomicLong(); + private final int maxNotChangedIterations; private int notChangedIterations = 0; public NameNodeConnector(String name, URI nameNodeUri, Path idPath, - List<Path> targetPaths, Configuration conf) + List<Path> targetPaths, Configuration conf, + int maxNotChangedIterations) throws IOException { this.nameNodeUri = nameNodeUri; this.idPath = idPath; this.targetPaths = targetPaths == null || targetPaths.isEmpty() ? Arrays .asList(new Path("/")) : targetPaths; + this.maxNotChangedIterations = maxNotChangedIterations; this.namenode = NameNodeProxies.createProxy(conf, nameNodeUri, NamenodeProtocol.class).getProxy(); @@ -182,7 +185,14 @@ public class NameNodeConnector implements Closeable { notChangedIterations = 0; } else { notChangedIterations++; - if (notChangedIterations >= MAX_NOT_CHANGED_ITERATIONS) { + if (LOG.isDebugEnabled()) { + LOG.debug("No block has been moved for " + + notChangedIterations + " iterations, " + + "maximum notChangedIterations before exit is: " + + ((maxNotChangedIterations >= 0) ? maxNotChangedIterations : "Infinite")); + } + if ((maxNotChangedIterations >= 0) && + (notChangedIterations >= maxNotChangedIterations)) { System.out.println("No block has been moved for " + notChangedIterations + " iterations. Exiting..."); return false; http://git-wip-us.apache.org/repos/asf/hadoop/blob/b94c1117/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java index a22f920..6fa6963 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java @@ -530,7 +530,8 @@ public class Mover { List<NameNodeConnector> connectors = Collections.emptyList(); try { connectors = NameNodeConnector.newNameNodeConnectors(namenodes, - Mover.class.getSimpleName(), MOVER_ID_PATH, conf); + Mover.class.getSimpleName(), MOVER_ID_PATH, conf, + NameNodeConnector.DEFAULT_MAX_IDLE_ITERATIONS); while (connectors.size() > 0) { Collections.shuffle(connectors); http://git-wip-us.apache.org/repos/asf/hadoop/blob/b94c1117/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSCommands.apt.vm ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSCommands.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSCommands.apt.vm index 941a8ee..846b0b4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSCommands.apt.vm +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSCommands.apt.vm @@ -324,7 +324,7 @@ HDFS Commands Guide ** <<<balancer>>> - Usage: <<<hdfs balancer [-threshold <threshold>] [-policy <policy>]>>> + Usage: <<<hdfs balancer [-threshold <threshold>] [-policy <policy>] [-idleiterations <idleiterations>]>>> *------------------------+----------------------------------------------------+ || COMMAND_OPTION | Description @@ -337,6 +337,9 @@ HDFS Commands Guide | -threshold <threshold> | Percentage of disk capacity. This overwrites the | | default threshold. *------------------------+----------------------------------------------------+ +| -idleiterations <iterations> | Maximum number of idle iterations before exit. +| | This overwrites the default idleiterations(5). +*------------------------+----------------------------------------------------+ Runs a cluster balancing utility. An administrator can simply press Ctrl-C to stop the rebalancing process. See http://git-wip-us.apache.org/repos/asf/hadoop/blob/b94c1117/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java index 6955fcd..153baeb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java @@ -564,6 +564,7 @@ public class TestBalancer { p = new Balancer.Parameters( Balancer.Parameters.DEFAULT.policy, Balancer.Parameters.DEFAULT.threshold, + Balancer.Parameters.DEFAULT.maxIdleIteration, nodes.getNodesToBeExcluded(), nodes.getNodesToBeIncluded()); } @@ -629,7 +630,8 @@ public class TestBalancer { List<NameNodeConnector> connectors = Collections.emptyList(); try { connectors = NameNodeConnector.newNameNodeConnectors(namenodes, - Balancer.class.getSimpleName(), Balancer.BALANCER_ID_PATH, conf); + Balancer.class.getSimpleName(), Balancer.BALANCER_ID_PATH, conf, + Balancer.Parameters.DEFAULT.maxIdleIteration); boolean done = false; for(int iteration = 0; !done; iteration++) { @@ -801,6 +803,7 @@ public class TestBalancer { Balancer.Parameters p = new Balancer.Parameters( Balancer.Parameters.DEFAULT.policy, Balancer.Parameters.DEFAULT.threshold, + Balancer.Parameters.DEFAULT.maxIdleIteration, datanodes, Balancer.Parameters.DEFAULT.nodesToBeIncluded); final int r = Balancer.run(namenodes, p, conf); assertEquals(ExitStatus.SUCCESS.getExitCode(), r); @@ -1233,6 +1236,7 @@ public class TestBalancer { Balancer.Parameters p = new Balancer.Parameters( Parameters.DEFAULT.policy, Parameters.DEFAULT.threshold, + Balancer.Parameters.DEFAULT.maxIdleIteration, Parameters.DEFAULT.nodesToBeExcluded, Parameters.DEFAULT.nodesToBeIncluded); final int r = Balancer.run(namenodes, p, conf); http://git-wip-us.apache.org/repos/asf/hadoop/blob/b94c1117/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java index c9fc5ba..f35e1c8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.net.URI; import java.util.*; +import com.google.common.collect.Maps; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; @@ -39,9 +40,14 @@ public class TestMover { static Mover newMover(Configuration conf) throws IOException { final Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf); Assert.assertEquals(1, namenodes.size()); + Map<URI, List<Path>> nnMap = Maps.newHashMap(); + for (URI nn : namenodes) { + nnMap.put(nn, null); + } final List<NameNodeConnector> nncs = NameNodeConnector.newNameNodeConnectors( - namenodes, Mover.class.getSimpleName(), Mover.MOVER_ID_PATH, conf); + nnMap, Mover.class.getSimpleName(), Mover.MOVER_ID_PATH, conf, + NameNodeConnector.DEFAULT_MAX_IDLE_ITERATIONS); return new Mover(nncs.get(0), conf); }