This is an automated email from the ASF dual-hosted git repository.

elserj pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase-operator-tools.git


The following commit(s) were added to refs/heads/master by this push:
     new 9aa27fe  HBASE-23562 [operator tools] Add a RegionsMerge tool that 
allows for merging multiple adjacent regions until a desired number of regions 
is reached.
9aa27fe is described below

commit 9aa27feea4f6b214a59842685a500b7c05c682d8
Author: Wellington Chevreuil <wchevre...@apache.org>
AuthorDate: Wed Dec 11 14:33:13 2019 +0000

    HBASE-23562 [operator tools] Add a RegionsMerge tool that allows for 
merging multiple adjacent regions until a desired number of regions is reached.
    
    Co-authored-by: BukrosSzabolcs <bukros.szabo...@gmail.com>
    
    Closes #56
    
    Signed-off-by: Josh Elser <els...@apache.org>
---
 hbase-operator-tools-assembly/pom.xml              |   5 +
 hbase-tools/README.md                              |  87 +++++++
 hbase-tools/pom.xml                                | 203 +++++++++++++++++
 .../main/java/org/apache/hbase/RegionsMerger.java  | 253 +++++++++++++++++++++
 hbase-tools/src/main/resources/log4j2.xml          |  35 +++
 .../java/org/apache/hbase/TestRegionsMerger.java   | 130 +++++++++++
 pom.xml                                            |   6 +
 7 files changed, 719 insertions(+)

diff --git a/hbase-operator-tools-assembly/pom.xml 
b/hbase-operator-tools-assembly/pom.xml
index 1f28edd..8412fce 100644
--- a/hbase-operator-tools-assembly/pom.xml
+++ b/hbase-operator-tools-assembly/pom.xml
@@ -60,5 +60,10 @@
       <artifactId>hbase-hbck2</artifactId>
       <version>${project.version}</version>
     </dependency>
+  <dependency>
+    <groupId>org.apache.hbase.operator.tools</groupId>
+    <artifactId>hbase-tools</artifactId>
+    <version>${project.version}</version>
+  </dependency>
   </dependencies>
 </project>
diff --git a/hbase-tools/README.md b/hbase-tools/README.md
new file mode 100644
index 0000000..bb0a2cd
--- /dev/null
+++ b/hbase-tools/README.md
@@ -0,0 +1,87 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+# Apache HBase Tool for merging regions
+
+_RegionsMerger_ is an utility tool for manually merging bunch of regions of
+a given table. It's mainly useful on situations when an HBase cluster has too
+many regions per RegionServers, and many of these regions are small enough that
+it can be merged together, reducing the total number of regions in the cluster
+and releasing RegionServers overall memory resources.
+
+This may happen for mistakenly pre-splits, or after a purge in table
+data, as regions would not be automatically merged.
+
+## Setup
+Make sure HBase tools jar is added to HBase classpath:
+
+```
+export HBASE_CLASSPATH=$HBASE_CLASSPATH:./hbase-tools-1.1.0-SNAPSHOT.jar
+```
+
+## Usage
+
+_RegionsMerger_ requires two arguments as parameters: 1) The name of the table
+to have regions merged; 2) The desired total number of regions for the informed
+table. For example, to merge all regions of table `my-table` until it gets to a
+total of 5 regions, assuming the _setup_ step above has been performed:
+
+```
+$ hbase org.apache.hbase.RegionsMerger my-table 5
+```
+
+## Implementation Details
+
+_RegionsMerger_ uses client API
+_org.apache.hadoop.hbase.client.Admin.getRegions_ to fetch the list of regions
+for the specified table, iterates through the resulting list, identifying pairs
+of adjacent regions. For each pair found, it submits a merge request using
+_org.apache.hadoop.hbase.client.Admin.mergeRegionsAsync_ client API method.
+This means multiple merge requests had been sent once the whole list has been
+iterated.
+
+Assuming that all merges issued by the RegionsMerger are successful, the 
resulting number of 
+regions will be no more than half the original number of regions. This 
resulting total
+might not be equal to the target value passed as parameter, in which case
+_RegionsMerger_ will perform another round of merge requests, this time over
+the current existing regions (it fetches another list of regions from
+  _org.apache.hadoop.hbase.client.Admin.getRegions_).
+
+Merge requests are processed asynchronously. HBase may take a certain time to
+complete some merge requests, so _RegionsMerger_ may perform some sleep between
+rounds of regions iteration for sending requests. The specific amount of time 
is
+configured by `hbase.tools.merge.sleep` property, in milliseconds, and it
+defaults to `2000`(2 seconds).
+
+While iterating through the list of regions, once a pair of adjacent regions is
+detected, _RegionsMerger_ checks the current file system size of each region 
(excluding MOB data),
+before deciding to submit the merge request for the given regions. If the sum 
of
+both regions size exceeds a threshold, merge will not be attempted.
+This threshold is a configurable percentage of `hbase.hregion.max.filesize`
+value, and is applied to avoid merged regions from getting immediately split
+after the merge completes, which would happen automatically if the resulting
+region size reaches `hbase.hregion.max.filesize` value. The percentage of
+`hbase.hregion.max.filesize` is a double value configurable via
+`hbase.tools.merge.upper.mark` property and it defaults to `0.9`.
+
+Given this `hbase.hregion.max.filesize` restriction for merge results, it may 
be
+impossible to achieve the desired total number of regions.
+_RegionsMerger_ keeps tracking the progress of regions merges, on each round.
+If no progress is observed after a configurable amount of rounds,
+_RegionsMerger_ aborts automatically. The limit of rounds without progress is 
an
+integer value configured via `hbase.tools.max.iterations.blocked` property.
diff --git a/hbase-tools/pom.xml b/hbase-tools/pom.xml
new file mode 100644
index 0000000..c5659ce
--- /dev/null
+++ b/hbase-tools/pom.xml
@@ -0,0 +1,203 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <!--
+    /**
+     * Licensed to the Apache Software Foundation (ASF) under one
+     * or more contributor license agreements.  See the NOTICE file
+     * distributed with this work for additional information
+     * regarding copyright ownership.  The ASF licenses this file
+     * to you under the Apache License, Version 2.0 (the
+     * "License"); you may not use this file except in compliance
+     * with the License.  You may obtain a copy of the License at
+     *
+     *     http://www.apache.org/licenses/LICENSE-2.0
+     *
+     * Unless required by applicable law or agreed to in writing, software
+     * distributed under the License is distributed on an "AS IS" BASIS,
+     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     * See the License for the specific language governing permissions and
+     * limitations under the License.
+     */
+    -->
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <artifactId>hbase-operator-tools</artifactId>
+        <groupId>org.apache.hbase.operator.tools</groupId>
+        <version>1.1.0-SNAPSHOT</version>
+        <relativePath>..</relativePath>
+    </parent>
+
+
+    <artifactId>hbase-tools</artifactId>
+    <name>Apache HBase - HBase Tools</name>
+    <description>Utility Maintenance tools for HBase 2+</description>
+    <properties>
+        <hbase-thirdparty.version>2.2.1</hbase-thirdparty.version>
+        <log4j2.version>2.11.1</log4j2.version>
+    </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>4.12</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-slf4j-impl</artifactId>
+            <version>${log4j2.version}</version>
+        </dependency>
+
+        <!--We want to use the shaded client but for testing, we need to rely 
on hbase-server.
+            HBASE-15666 is about how shaded-client and hbase-server won't work 
together.
+            TODO: Fix.-->
+
+        <dependency>
+            <groupId>org.apache.hbase</groupId>
+            <artifactId>hbase-server</artifactId>
+            <version>${hbase.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hbase</groupId>
+            <artifactId>hbase-shaded-testing-util</artifactId>
+            <version>${hbase.version}</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hbase</groupId>
+            <artifactId>hbase-zookeeper</artifactId>
+            <version>${hbase.version}</version>
+            <scope>provided</scope>
+            <type>test-jar</type>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hbase</groupId>
+            <artifactId>hbase-common</artifactId>
+            <version>${hbase.version}</version>
+            <scope>provided</scope>
+            <type>test-jar</type>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hbase</groupId>
+            <artifactId>hbase-testing-util</artifactId>
+            <version>${hbase.version}</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.mockito</groupId>
+            <artifactId>mockito-core</artifactId>
+            <version>2.1.0</version>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+                <filtering>true</filtering>
+            </resource>
+        </resources>
+        <testResources>
+            <testResource>
+                <directory>src/test/resources/META-INF/</directory>
+                <targetPath>META-INF/</targetPath>
+                <includes>
+                    <include>NOTICE</include>
+                </includes>
+                <filtering>true</filtering>
+            </testResource>
+        </testResources>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-remote-resources-plugin</artifactId>
+            </plugin>
+            <plugin>
+                <artifactId>maven-surefire-plugin</artifactId>
+            </plugin>
+            <!-- Make a jar and put the sources in the jar -->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-source-plugin</artifactId>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-jar-plugin</artifactId>
+            </plugin>
+            <!--Used packaging a fat jar-->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <version>3.2.0</version>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <artifactSet>
+                                <excludes>
+                                    <exclude>classworlds:classworlds</exclude>
+                                    <exclude>junit:junit</exclude>
+                                    <exclude>jmock:*</exclude>
+                                    <exclude>*:xml-apis</exclude>
+                                    
<exclude>org.apache.maven:lib:tests</exclude>
+                                    <exclude>log4j:log4j:jar:</exclude>
+                                </excludes>
+                            </artifactSet>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-checkstyle-plugin</artifactId>
+                <configuration>
+                    <failOnViolation>true</failOnViolation>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+    <profiles>
+        <!-- Needs to match the profile in apache parent pom -->
+        <profile>
+            <id>apache-release</id>
+            <build>
+                <plugins>
+                    <plugin>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-resources-plugin</artifactId>
+                        <executions>
+                            <execution>
+                                <id>license-javadocs</id>
+                                <phase>prepare-package</phase>
+                                <goals>
+                                    <goal>copy-resources</goal>
+                                </goals>
+                                <configuration>
+                                    
<outputDirectory>${project.build.directory}/apidocs</outputDirectory>
+                                    <resources>
+                                        <resource>
+                                            
<directory>src/main/javadoc/META-INF/</directory>
+                                            <targetPath>META-INF/</targetPath>
+                                            <includes>
+                                                <include>NOTICE</include>
+                                            </includes>
+                                            <filtering>true</filtering>
+                                        </resource>
+                                    </resources>
+                                </configuration>
+                            </execution>
+                        </executions>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+    </profiles>
+</project>
diff --git a/hbase-tools/src/main/java/org/apache/hbase/RegionsMerger.java 
b/hbase-tools/src/main/java/org/apache/hbase/RegionsMerger.java
new file mode 100644
index 0000000..afe3ab4
--- /dev/null
+++ b/hbase-tools/src/main/java/org/apache/hbase/RegionsMerger.java
@@ -0,0 +1,253 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hbase;
+
+import static org.apache.hadoop.hbase.HConstants.CATALOG_FAMILY;
+import static org.apache.hadoop.hbase.HConstants.REGIONINFO_QUALIFIER;
+import static org.apache.hadoop.hbase.HConstants.STATE_QUALIFIER;
+import static org.apache.hadoop.hbase.TableName.META_TABLE_NAME;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.Future;
+import java.util.concurrent.atomic.LongAdder;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CompareOperator;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.client.ConnectionFactory;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.hadoop.hbase.filter.RowFilter;
+import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
+import org.apache.hadoop.hbase.filter.SubstringComparator;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.util.ToolRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * HBase maintenance tool for merging regions of a specific table, until a 
target number of regions
+ * for the table is reached, or no more merges can complete due to limit in 
resulting merged
+ * region size.
+ */
+public class RegionsMerger extends Configured implements 
org.apache.hadoop.util.Tool {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(RegionsMerger.class.getName());
+  public static final String RESULTING_REGION_UPPER_MARK = 
"hbase.tools.merge.upper.mark";
+  public static final String SLEEP = "hbase.tools.merge.sleep";
+  public static final String MAX_ROUNDS_IDLE = 
"hbase.tools.max.iterations.blocked";
+
+  private final Configuration conf;
+  private final FileSystem fs;
+  private final double resultSizeThreshold;
+  private final int sleepBetweenCycles;
+  private final long maxRoundsStuck;
+
+  public RegionsMerger(Configuration conf) throws IOException {
+    this.conf = conf;
+    Path basePath = new Path(conf.get(HConstants.HBASE_DIR));
+    fs = basePath.getFileSystem(conf);
+    resultSizeThreshold = this.conf.getDouble(RESULTING_REGION_UPPER_MARK, 
0.9) *
+      this.conf.getLong(HConstants.HREGION_MAX_FILESIZE, 
HConstants.DEFAULT_MAX_FILE_SIZE);
+    sleepBetweenCycles = this.conf.getInt(SLEEP, 2000);
+    this.maxRoundsStuck = this.conf.getInt(MAX_ROUNDS_IDLE, 10);
+  }
+
+  private Path getTablePath(TableName table){
+    Path basePath = new Path(conf.get(HConstants.HBASE_DIR));
+    basePath = new Path(basePath, "data");
+    Path tablePath = new Path(basePath, table.getNamespaceAsString());
+    return new Path(tablePath, table.getNameAsString());
+  }
+
+  private long sumSizeInFS(Path parentPath) throws IOException {
+    long size = 0;
+    FileStatus[] files = this.fs.listStatus(parentPath);
+    for(FileStatus f : files) {
+      if(f.isFile()) {
+        size += f.getLen();
+      } else if(f.isDirectory()) {
+        size += sumSizeInFS(f.getPath());
+      }
+    }
+    return size;
+  }
+
+  private List<RegionInfo> getOpenRegions(Connection connection, TableName 
table) throws Exception {
+    List<RegionInfo> regions = new ArrayList<>();
+    Table metaTbl = connection.getTable(META_TABLE_NAME);
+    String tblName = table.getNameAsString();
+    RowFilter rowFilter = new RowFilter(CompareOperator.EQUAL,
+      new SubstringComparator(tblName+","));
+    SingleColumnValueFilter colFilter = new 
SingleColumnValueFilter(CATALOG_FAMILY,
+      STATE_QUALIFIER, CompareOperator.EQUAL, Bytes.toBytes("OPEN"));
+    Scan scan = new Scan();
+    FilterList filter = new FilterList(FilterList.Operator.MUST_PASS_ALL);
+    filter.addFilter(rowFilter);
+    filter.addFilter(colFilter);
+    scan.setFilter(filter);
+    try(ResultScanner rs = metaTbl.getScanner(scan)){
+      Result r;
+      while ((r = rs.next()) != null) {
+        RegionInfo region = RegionInfo.parseFrom(r.getValue(CATALOG_FAMILY, 
REGIONINFO_QUALIFIER));
+        regions.add(region);
+      }
+    }
+    return regions;
+  }
+
+  private boolean canMerge(Path path, RegionInfo region1, RegionInfo region2,
+      Collection<Pair<RegionInfo, RegionInfo>> alreadyMerging) throws 
IOException {
+    if(alreadyMerging.stream().anyMatch(regionPair ->
+        region1.equals(regionPair.getFirst()) ||
+        region2.equals(regionPair.getFirst()) ||
+        region1.equals(regionPair.getSecond()) ||
+        region2.equals(regionPair.getSecond()))){
+      return false;
+    }
+    if (RegionInfo.areAdjacent(region1, region2)) {
+      long size1 = sumSizeInFS(new Path(path, region1.getEncodedName()));
+      long size2 = sumSizeInFS(new Path(path, region2.getEncodedName()));
+      boolean mergeable = (resultSizeThreshold > (size1 + size2));
+      if (!mergeable) {
+        LOG.warn("Not merging regions {} and {} because resulting region size 
would get close to " +
+            "the {} limit. {} total size: {}; {} total size:{}", 
region1.getEncodedName(),
+          region2.getEncodedName(), resultSizeThreshold, 
region1.getEncodedName(), size1,
+          region2.getEncodedName(), size2);
+      }
+      return mergeable;
+    } else {
+      LOG.warn(
+        "WARNING: Can't merge regions {} and {} because those are not 
adjacent.",
+        region1.getEncodedName(),
+        region2.getEncodedName());
+      return false;
+    }
+  }
+
+  public void mergeRegions(String tblName, int targetRegions) throws Exception 
{
+    TableName table = TableName.valueOf(tblName);
+    Path tableDir = getTablePath(table);
+    try(Connection conn = ConnectionFactory.createConnection(conf)) {
+      Admin admin = conn.getAdmin();
+      LongAdder counter = new LongAdder();
+      LongAdder lastTimeProgessed = new LongAdder();
+      //need to get all regions for the table, regardless of region state
+      List<RegionInfo> regions = admin.getRegions(table);
+      Map<Future, Pair<RegionInfo, RegionInfo>> regionsMerging = new 
HashMap<>();
+      long roundsNoProgress = 0;
+      while (regions.size() > targetRegions) {
+        LOG.info("Iteration: {}", counter);
+        RegionInfo previous = null;
+        int regionSize = regions.size();
+        LOG.info("Attempting to merge {} regions to reach the target {} ...", 
regionSize, targetRegions);
+        //to request merge, regions must be OPEN, though
+        regions = getOpenRegions(conn, table);
+        for (RegionInfo current : regions) {
+          if (!current.isSplit()) {
+            if (previous != null && canMerge(tableDir, previous, current, 
regionsMerging.values())) {
+              Future f = 
admin.mergeRegionsAsync(current.getEncodedNameAsBytes(),
+                  previous.getEncodedNameAsBytes(), true);
+              Pair<RegionInfo, RegionInfo> regionPair = new Pair<>(previous, 
current);
+              regionsMerging.put(f,regionPair);
+              previous = null;
+              if ((regionSize - regionsMerging.size()) <= targetRegions) {
+                break;
+              }
+            } else {
+              previous = current;
+            }
+          }
+          else{
+            LOG.debug("Skipping split region: {}", current.getEncodedName());
+          }
+        }
+        counter.increment();
+        LOG.info("Sleeping for {} seconds before next iteration...", 
(sleepBetweenCycles/1000));
+        Thread.sleep(sleepBetweenCycles);
+        regionsMerging.forEach((f, currentPair)-> {
+          if (f.isDone()) {
+            LOG.info("Merged regions {} and {} together.",
+              currentPair.getFirst().getEncodedName(),
+              currentPair.getSecond().getEncodedName());
+            regionsMerging.remove(f);
+            lastTimeProgessed.reset();
+            lastTimeProgessed.add(counter.longValue());
+          } else {
+            LOG.warn("Merge of regions {} and {} isn't completed yet.",
+              currentPair.getFirst(),
+              currentPair.getSecond());
+          }
+        });
+        roundsNoProgress = counter.longValue() - lastTimeProgessed.longValue();
+        if(roundsNoProgress == this.maxRoundsStuck){
+          LOG.warn("Reached {} iterations without progressing with new merges. 
Aborting...",
+            roundsNoProgress);
+          break;
+        }
+
+        //again, get all regions, regardless of the state,
+        // in order to avoid breaking the loop prematurely
+        regions = admin.getRegions(table);
+      }
+    }
+  }
+
+  @Override
+  public int run(String[] args) {
+    if(args.length!=2){
+      LOG.error("Wrong number of arguments. "
+        + "Arguments are: <TABLE_NAME> <TARGET_NUMBER_OF_REGIONS>");
+      return 1;
+    }
+    try {
+      this.mergeRegions(args[0], Integer.parseInt(args[1]));
+    } catch(Exception e){
+      LOG.error("Merging regions failed:", e);
+      return 2;
+    }
+    return 0;
+  }
+
+  public static void main(String [] args) throws Exception {
+    Configuration conf = HBaseConfiguration.create();
+    int errCode = ToolRunner.run(new RegionsMerger(conf), args);
+    if (errCode != 0) {
+      System.exit(errCode);
+    }
+  }
+}
diff --git a/hbase-tools/src/main/resources/log4j2.xml 
b/hbase-tools/src/main/resources/log4j2.xml
new file mode 100644
index 0000000..5084c97
--- /dev/null
+++ b/hbase-tools/src/main/resources/log4j2.xml
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+-->
+<Configuration status="INFO">
+  <Appenders>
+    <Console name="Console" target="SYSTEM_OUT">
+      <PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - 
%msg%n"/>
+    </Console>
+  </Appenders>
+  <Loggers>
+    <Logger name="org.apache.zookeeper" level="warn" additivity="false">
+      <AppenderRef ref="Console"/>
+    </Logger>
+    <Root level="info">
+      <AppenderRef ref="Console"/>
+    </Root>
+  </Loggers>
+</Configuration>
diff --git a/hbase-tools/src/test/java/org/apache/hbase/TestRegionsMerger.java 
b/hbase-tools/src/test/java/org/apache/hbase/TestRegionsMerger.java
new file mode 100644
index 0000000..ec43bfc
--- /dev/null
+++ b/hbase-tools/src/test/java/org/apache/hbase/TestRegionsMerger.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hbase;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+
+public class TestRegionsMerger {
+  private final static HBaseTestingUtility TEST_UTIL = new 
HBaseTestingUtility();
+  private static final TableName TABLE_NAME =
+    TableName.valueOf(TestRegionsMerger.class.getSimpleName());
+  private static final byte[] family = Bytes.toBytes("f");
+  private Table table;
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    TEST_UTIL.getConfiguration().set(HConstants.HREGION_MAX_FILESIZE,
+      Long.toString(1024*1024*3));
+    TEST_UTIL.startMiniCluster(3);
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  @Before
+  public void setup() throws Exception {
+    table = TEST_UTIL.createMultiRegionTable(TABLE_NAME, family, 15);
+    TEST_UTIL.waitUntilAllRegionsAssigned(TABLE_NAME);
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    TEST_UTIL.deleteTable(TABLE_NAME);
+  }
+
+  @Test
+  public void testMergeRegionsCanMergeToTarget() throws Exception {
+    final int originalCount = TEST_UTIL.countRows(table);
+    TEST_UTIL.getConfiguration().setInt(RegionsMerger.MAX_ROUNDS_IDLE, 10);
+    RegionsMerger merger = new RegionsMerger(TEST_UTIL.getConfiguration());
+    merger.mergeRegions(TABLE_NAME.getNameWithNamespaceInclAsString(), 3);
+    List<RegionInfo> result = TEST_UTIL.getAdmin().getRegions(TABLE_NAME);
+    assertEquals(3, result.size());
+    assertEquals("Row count before and after merge should be equal",
+        originalCount, TEST_UTIL.countRows(table));
+  }
+
+  @Test
+  public void testMergeRegionsCanMergeSomeButNotToTarget() throws Exception {
+    TEST_UTIL.getConfiguration().setInt(RegionsMerger.MAX_ROUNDS_IDLE, 3);
+    RegionsMerger merger = new RegionsMerger(TEST_UTIL.getConfiguration());
+    generateTableData();
+    final int originalCount = TEST_UTIL.countRows(table);
+    merger.mergeRegions(TABLE_NAME.getNameWithNamespaceInclAsString(), 3);
+    List<RegionInfo> result = TEST_UTIL.getAdmin().getRegions(TABLE_NAME);
+    assertEquals(8, result.size());
+    assertEquals("Row count before and after merge should be equal",
+        originalCount, TEST_UTIL.countRows(table));
+  }
+
+  @Test
+  public void testMergeRegionsCannotMergeAny() throws Exception {
+    
TEST_UTIL.getConfiguration().setDouble(RegionsMerger.RESULTING_REGION_UPPER_MARK,
 0.5);
+    TEST_UTIL.getConfiguration().setInt(RegionsMerger.MAX_ROUNDS_IDLE, 2);
+    RegionsMerger merger = new RegionsMerger(TEST_UTIL.getConfiguration());
+    generateTableData();
+    TEST_UTIL.getAdmin().flush(TABLE_NAME);
+    final int originalCount = TEST_UTIL.countRows(table);
+    merger.mergeRegions(TABLE_NAME.getNameWithNamespaceInclAsString(), 3);
+    List<RegionInfo> result = TEST_UTIL.getAdmin().getRegions(TABLE_NAME);
+    assertEquals(15, result.size());
+    assertEquals("Row count before and after merge should be equal",
+        originalCount, TEST_UTIL.countRows(table));
+  }
+
+  @Test
+  public void testMergeRegionsInvalidParams() throws Exception {
+    final int originalCount = TEST_UTIL.countRows(table);
+    RegionsMerger merger = new RegionsMerger(TEST_UTIL.getConfiguration());
+    assertEquals(1, merger.run(new String[]{}));
+    assertEquals("Row count before and after merge should be equal",
+        originalCount, TEST_UTIL.countRows(table));
+  }
+
+  private void generateTableData() throws Exception {
+    TEST_UTIL.getAdmin().getRegions(TABLE_NAME).forEach(r -> {
+      byte[] key = r.getStartKey().length == 0 ? new byte[]{0} : 
r.getStartKey();
+      Put put = new Put(key);
+      put.addColumn(family, Bytes.toBytes("c"), new byte[1024*1024]);
+      try {
+        table.put(put);
+      } catch (IOException e) {
+        throw new Error("Failed to put row");
+      }
+    });
+  }
+}
diff --git a/pom.xml b/pom.xml
index c759c75..74c6403 100644
--- a/pom.xml
+++ b/pom.xml
@@ -58,6 +58,7 @@
     <!--Add an assembly module because of 
http://maven.apache.org/plugins/maven-assembly-plugin/faq.html#module-binaries
          -->
     <module>hbase-operator-tools-assembly</module>
+      <module>hbase-tools</module>
   </modules>
   <scm>
     
<connection>scm:git:git://gitbox.apache.org/repos/asf/hbase-operator-tools.git</connection>
@@ -147,6 +148,11 @@
         <artifactId>hbase-hbck2</artifactId>
         <version>${project.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.apache.hbase.operator.tools</groupId>
+        <artifactId>hbase-tools</artifactId>
+        <version>${project.version}</version>
+      </dependency>
     </dependencies>
   </dependencyManagement>
   <build>

Reply via email to