This is an automated email from the ASF dual-hosted git repository. elserj pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hbase-operator-tools.git
The following commit(s) were added to refs/heads/master by this push: new 9aa27fe HBASE-23562 [operator tools] Add a RegionsMerge tool that allows for merging multiple adjacent regions until a desired number of regions is reached. 9aa27fe is described below commit 9aa27feea4f6b214a59842685a500b7c05c682d8 Author: Wellington Chevreuil <wchevre...@apache.org> AuthorDate: Wed Dec 11 14:33:13 2019 +0000 HBASE-23562 [operator tools] Add a RegionsMerge tool that allows for merging multiple adjacent regions until a desired number of regions is reached. Co-authored-by: BukrosSzabolcs <bukros.szabo...@gmail.com> Closes #56 Signed-off-by: Josh Elser <els...@apache.org> --- hbase-operator-tools-assembly/pom.xml | 5 + hbase-tools/README.md | 87 +++++++ hbase-tools/pom.xml | 203 +++++++++++++++++ .../main/java/org/apache/hbase/RegionsMerger.java | 253 +++++++++++++++++++++ hbase-tools/src/main/resources/log4j2.xml | 35 +++ .../java/org/apache/hbase/TestRegionsMerger.java | 130 +++++++++++ pom.xml | 6 + 7 files changed, 719 insertions(+) diff --git a/hbase-operator-tools-assembly/pom.xml b/hbase-operator-tools-assembly/pom.xml index 1f28edd..8412fce 100644 --- a/hbase-operator-tools-assembly/pom.xml +++ b/hbase-operator-tools-assembly/pom.xml @@ -60,5 +60,10 @@ <artifactId>hbase-hbck2</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.apache.hbase.operator.tools</groupId> + <artifactId>hbase-tools</artifactId> + <version>${project.version}</version> + </dependency> </dependencies> </project> diff --git a/hbase-tools/README.md b/hbase-tools/README.md new file mode 100644 index 0000000..bb0a2cd --- /dev/null +++ b/hbase-tools/README.md @@ -0,0 +1,87 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +# Apache HBase Tool for merging regions + +_RegionsMerger_ is an utility tool for manually merging bunch of regions of +a given table. It's mainly useful on situations when an HBase cluster has too +many regions per RegionServers, and many of these regions are small enough that +it can be merged together, reducing the total number of regions in the cluster +and releasing RegionServers overall memory resources. + +This may happen for mistakenly pre-splits, or after a purge in table +data, as regions would not be automatically merged. + +## Setup +Make sure HBase tools jar is added to HBase classpath: + +``` +export HBASE_CLASSPATH=$HBASE_CLASSPATH:./hbase-tools-1.1.0-SNAPSHOT.jar +``` + +## Usage + +_RegionsMerger_ requires two arguments as parameters: 1) The name of the table +to have regions merged; 2) The desired total number of regions for the informed +table. For example, to merge all regions of table `my-table` until it gets to a +total of 5 regions, assuming the _setup_ step above has been performed: + +``` +$ hbase org.apache.hbase.RegionsMerger my-table 5 +``` + +## Implementation Details + +_RegionsMerger_ uses client API +_org.apache.hadoop.hbase.client.Admin.getRegions_ to fetch the list of regions +for the specified table, iterates through the resulting list, identifying pairs +of adjacent regions. For each pair found, it submits a merge request using +_org.apache.hadoop.hbase.client.Admin.mergeRegionsAsync_ client API method. +This means multiple merge requests had been sent once the whole list has been +iterated. + +Assuming that all merges issued by the RegionsMerger are successful, the resulting number of +regions will be no more than half the original number of regions. This resulting total +might not be equal to the target value passed as parameter, in which case +_RegionsMerger_ will perform another round of merge requests, this time over +the current existing regions (it fetches another list of regions from + _org.apache.hadoop.hbase.client.Admin.getRegions_). + +Merge requests are processed asynchronously. HBase may take a certain time to +complete some merge requests, so _RegionsMerger_ may perform some sleep between +rounds of regions iteration for sending requests. The specific amount of time is +configured by `hbase.tools.merge.sleep` property, in milliseconds, and it +defaults to `2000`(2 seconds). + +While iterating through the list of regions, once a pair of adjacent regions is +detected, _RegionsMerger_ checks the current file system size of each region (excluding MOB data), +before deciding to submit the merge request for the given regions. If the sum of +both regions size exceeds a threshold, merge will not be attempted. +This threshold is a configurable percentage of `hbase.hregion.max.filesize` +value, and is applied to avoid merged regions from getting immediately split +after the merge completes, which would happen automatically if the resulting +region size reaches `hbase.hregion.max.filesize` value. The percentage of +`hbase.hregion.max.filesize` is a double value configurable via +`hbase.tools.merge.upper.mark` property and it defaults to `0.9`. + +Given this `hbase.hregion.max.filesize` restriction for merge results, it may be +impossible to achieve the desired total number of regions. +_RegionsMerger_ keeps tracking the progress of regions merges, on each round. +If no progress is observed after a configurable amount of rounds, +_RegionsMerger_ aborts automatically. The limit of rounds without progress is an +integer value configured via `hbase.tools.max.iterations.blocked` property. diff --git a/hbase-tools/pom.xml b/hbase-tools/pom.xml new file mode 100644 index 0000000..c5659ce --- /dev/null +++ b/hbase-tools/pom.xml @@ -0,0 +1,203 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <!-- + /** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + --> + <modelVersion>4.0.0</modelVersion> + <parent> + <artifactId>hbase-operator-tools</artifactId> + <groupId>org.apache.hbase.operator.tools</groupId> + <version>1.1.0-SNAPSHOT</version> + <relativePath>..</relativePath> + </parent> + + + <artifactId>hbase-tools</artifactId> + <name>Apache HBase - HBase Tools</name> + <description>Utility Maintenance tools for HBase 2+</description> + <properties> + <hbase-thirdparty.version>2.2.1</hbase-thirdparty.version> + <log4j2.version>2.11.1</log4j2.version> + </properties> + + <dependencies> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>4.12</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.logging.log4j</groupId> + <artifactId>log4j-slf4j-impl</artifactId> + <version>${log4j2.version}</version> + </dependency> + + <!--We want to use the shaded client but for testing, we need to rely on hbase-server. + HBASE-15666 is about how shaded-client and hbase-server won't work together. + TODO: Fix.--> + + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-server</artifactId> + <version>${hbase.version}</version> + <scope>provided</scope> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-shaded-testing-util</artifactId> + <version>${hbase.version}</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-zookeeper</artifactId> + <version>${hbase.version}</version> + <scope>provided</scope> + <type>test-jar</type> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-common</artifactId> + <version>${hbase.version}</version> + <scope>provided</scope> + <type>test-jar</type> + </dependency> + <dependency> + <groupId>org.apache.hbase</groupId> + <artifactId>hbase-testing-util</artifactId> + <version>${hbase.version}</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.mockito</groupId> + <artifactId>mockito-core</artifactId> + <version>2.1.0</version> + <scope>test</scope> + </dependency> + </dependencies> + + <build> + <resources> + <resource> + <directory>src/main/resources</directory> + <filtering>true</filtering> + </resource> + </resources> + <testResources> + <testResource> + <directory>src/test/resources/META-INF/</directory> + <targetPath>META-INF/</targetPath> + <includes> + <include>NOTICE</include> + </includes> + <filtering>true</filtering> + </testResource> + </testResources> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-remote-resources-plugin</artifactId> + </plugin> + <plugin> + <artifactId>maven-surefire-plugin</artifactId> + </plugin> + <!-- Make a jar and put the sources in the jar --> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-source-plugin</artifactId> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-jar-plugin</artifactId> + </plugin> + <!--Used packaging a fat jar--> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-shade-plugin</artifactId> + <version>3.2.0</version> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>shade</goal> + </goals> + <configuration> + <artifactSet> + <excludes> + <exclude>classworlds:classworlds</exclude> + <exclude>junit:junit</exclude> + <exclude>jmock:*</exclude> + <exclude>*:xml-apis</exclude> + <exclude>org.apache.maven:lib:tests</exclude> + <exclude>log4j:log4j:jar:</exclude> + </excludes> + </artifactSet> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-checkstyle-plugin</artifactId> + <configuration> + <failOnViolation>true</failOnViolation> + </configuration> + </plugin> + </plugins> + </build> + <profiles> + <!-- Needs to match the profile in apache parent pom --> + <profile> + <id>apache-release</id> + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-resources-plugin</artifactId> + <executions> + <execution> + <id>license-javadocs</id> + <phase>prepare-package</phase> + <goals> + <goal>copy-resources</goal> + </goals> + <configuration> + <outputDirectory>${project.build.directory}/apidocs</outputDirectory> + <resources> + <resource> + <directory>src/main/javadoc/META-INF/</directory> + <targetPath>META-INF/</targetPath> + <includes> + <include>NOTICE</include> + </includes> + <filtering>true</filtering> + </resource> + </resources> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> + </profile> + </profiles> +</project> diff --git a/hbase-tools/src/main/java/org/apache/hbase/RegionsMerger.java b/hbase-tools/src/main/java/org/apache/hbase/RegionsMerger.java new file mode 100644 index 0000000..afe3ab4 --- /dev/null +++ b/hbase-tools/src/main/java/org/apache/hbase/RegionsMerger.java @@ -0,0 +1,253 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hbase; + +import static org.apache.hadoop.hbase.HConstants.CATALOG_FAMILY; +import static org.apache.hadoop.hbase.HConstants.REGIONINFO_QUALIFIER; +import static org.apache.hadoop.hbase.HConstants.STATE_QUALIFIER; +import static org.apache.hadoop.hbase.TableName.META_TABLE_NAME; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.LongAdder; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.CompareOperator; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.filter.FilterList; +import org.apache.hadoop.hbase.filter.RowFilter; +import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; +import org.apache.hadoop.hbase.filter.SubstringComparator; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.util.ToolRunner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * HBase maintenance tool for merging regions of a specific table, until a target number of regions + * for the table is reached, or no more merges can complete due to limit in resulting merged + * region size. + */ +public class RegionsMerger extends Configured implements org.apache.hadoop.util.Tool { + + private static final Logger LOG = LoggerFactory.getLogger(RegionsMerger.class.getName()); + public static final String RESULTING_REGION_UPPER_MARK = "hbase.tools.merge.upper.mark"; + public static final String SLEEP = "hbase.tools.merge.sleep"; + public static final String MAX_ROUNDS_IDLE = "hbase.tools.max.iterations.blocked"; + + private final Configuration conf; + private final FileSystem fs; + private final double resultSizeThreshold; + private final int sleepBetweenCycles; + private final long maxRoundsStuck; + + public RegionsMerger(Configuration conf) throws IOException { + this.conf = conf; + Path basePath = new Path(conf.get(HConstants.HBASE_DIR)); + fs = basePath.getFileSystem(conf); + resultSizeThreshold = this.conf.getDouble(RESULTING_REGION_UPPER_MARK, 0.9) * + this.conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE); + sleepBetweenCycles = this.conf.getInt(SLEEP, 2000); + this.maxRoundsStuck = this.conf.getInt(MAX_ROUNDS_IDLE, 10); + } + + private Path getTablePath(TableName table){ + Path basePath = new Path(conf.get(HConstants.HBASE_DIR)); + basePath = new Path(basePath, "data"); + Path tablePath = new Path(basePath, table.getNamespaceAsString()); + return new Path(tablePath, table.getNameAsString()); + } + + private long sumSizeInFS(Path parentPath) throws IOException { + long size = 0; + FileStatus[] files = this.fs.listStatus(parentPath); + for(FileStatus f : files) { + if(f.isFile()) { + size += f.getLen(); + } else if(f.isDirectory()) { + size += sumSizeInFS(f.getPath()); + } + } + return size; + } + + private List<RegionInfo> getOpenRegions(Connection connection, TableName table) throws Exception { + List<RegionInfo> regions = new ArrayList<>(); + Table metaTbl = connection.getTable(META_TABLE_NAME); + String tblName = table.getNameAsString(); + RowFilter rowFilter = new RowFilter(CompareOperator.EQUAL, + new SubstringComparator(tblName+",")); + SingleColumnValueFilter colFilter = new SingleColumnValueFilter(CATALOG_FAMILY, + STATE_QUALIFIER, CompareOperator.EQUAL, Bytes.toBytes("OPEN")); + Scan scan = new Scan(); + FilterList filter = new FilterList(FilterList.Operator.MUST_PASS_ALL); + filter.addFilter(rowFilter); + filter.addFilter(colFilter); + scan.setFilter(filter); + try(ResultScanner rs = metaTbl.getScanner(scan)){ + Result r; + while ((r = rs.next()) != null) { + RegionInfo region = RegionInfo.parseFrom(r.getValue(CATALOG_FAMILY, REGIONINFO_QUALIFIER)); + regions.add(region); + } + } + return regions; + } + + private boolean canMerge(Path path, RegionInfo region1, RegionInfo region2, + Collection<Pair<RegionInfo, RegionInfo>> alreadyMerging) throws IOException { + if(alreadyMerging.stream().anyMatch(regionPair -> + region1.equals(regionPair.getFirst()) || + region2.equals(regionPair.getFirst()) || + region1.equals(regionPair.getSecond()) || + region2.equals(regionPair.getSecond()))){ + return false; + } + if (RegionInfo.areAdjacent(region1, region2)) { + long size1 = sumSizeInFS(new Path(path, region1.getEncodedName())); + long size2 = sumSizeInFS(new Path(path, region2.getEncodedName())); + boolean mergeable = (resultSizeThreshold > (size1 + size2)); + if (!mergeable) { + LOG.warn("Not merging regions {} and {} because resulting region size would get close to " + + "the {} limit. {} total size: {}; {} total size:{}", region1.getEncodedName(), + region2.getEncodedName(), resultSizeThreshold, region1.getEncodedName(), size1, + region2.getEncodedName(), size2); + } + return mergeable; + } else { + LOG.warn( + "WARNING: Can't merge regions {} and {} because those are not adjacent.", + region1.getEncodedName(), + region2.getEncodedName()); + return false; + } + } + + public void mergeRegions(String tblName, int targetRegions) throws Exception { + TableName table = TableName.valueOf(tblName); + Path tableDir = getTablePath(table); + try(Connection conn = ConnectionFactory.createConnection(conf)) { + Admin admin = conn.getAdmin(); + LongAdder counter = new LongAdder(); + LongAdder lastTimeProgessed = new LongAdder(); + //need to get all regions for the table, regardless of region state + List<RegionInfo> regions = admin.getRegions(table); + Map<Future, Pair<RegionInfo, RegionInfo>> regionsMerging = new HashMap<>(); + long roundsNoProgress = 0; + while (regions.size() > targetRegions) { + LOG.info("Iteration: {}", counter); + RegionInfo previous = null; + int regionSize = regions.size(); + LOG.info("Attempting to merge {} regions to reach the target {} ...", regionSize, targetRegions); + //to request merge, regions must be OPEN, though + regions = getOpenRegions(conn, table); + for (RegionInfo current : regions) { + if (!current.isSplit()) { + if (previous != null && canMerge(tableDir, previous, current, regionsMerging.values())) { + Future f = admin.mergeRegionsAsync(current.getEncodedNameAsBytes(), + previous.getEncodedNameAsBytes(), true); + Pair<RegionInfo, RegionInfo> regionPair = new Pair<>(previous, current); + regionsMerging.put(f,regionPair); + previous = null; + if ((regionSize - regionsMerging.size()) <= targetRegions) { + break; + } + } else { + previous = current; + } + } + else{ + LOG.debug("Skipping split region: {}", current.getEncodedName()); + } + } + counter.increment(); + LOG.info("Sleeping for {} seconds before next iteration...", (sleepBetweenCycles/1000)); + Thread.sleep(sleepBetweenCycles); + regionsMerging.forEach((f, currentPair)-> { + if (f.isDone()) { + LOG.info("Merged regions {} and {} together.", + currentPair.getFirst().getEncodedName(), + currentPair.getSecond().getEncodedName()); + regionsMerging.remove(f); + lastTimeProgessed.reset(); + lastTimeProgessed.add(counter.longValue()); + } else { + LOG.warn("Merge of regions {} and {} isn't completed yet.", + currentPair.getFirst(), + currentPair.getSecond()); + } + }); + roundsNoProgress = counter.longValue() - lastTimeProgessed.longValue(); + if(roundsNoProgress == this.maxRoundsStuck){ + LOG.warn("Reached {} iterations without progressing with new merges. Aborting...", + roundsNoProgress); + break; + } + + //again, get all regions, regardless of the state, + // in order to avoid breaking the loop prematurely + regions = admin.getRegions(table); + } + } + } + + @Override + public int run(String[] args) { + if(args.length!=2){ + LOG.error("Wrong number of arguments. " + + "Arguments are: <TABLE_NAME> <TARGET_NUMBER_OF_REGIONS>"); + return 1; + } + try { + this.mergeRegions(args[0], Integer.parseInt(args[1])); + } catch(Exception e){ + LOG.error("Merging regions failed:", e); + return 2; + } + return 0; + } + + public static void main(String [] args) throws Exception { + Configuration conf = HBaseConfiguration.create(); + int errCode = ToolRunner.run(new RegionsMerger(conf), args); + if (errCode != 0) { + System.exit(errCode); + } + } +} diff --git a/hbase-tools/src/main/resources/log4j2.xml b/hbase-tools/src/main/resources/log4j2.xml new file mode 100644 index 0000000..5084c97 --- /dev/null +++ b/hbase-tools/src/main/resources/log4j2.xml @@ -0,0 +1,35 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +--> +<Configuration status="INFO"> + <Appenders> + <Console name="Console" target="SYSTEM_OUT"> + <PatternLayout pattern="%d{HH:mm:ss.SSS} [%t] %-5level %logger{36} - %msg%n"/> + </Console> + </Appenders> + <Loggers> + <Logger name="org.apache.zookeeper" level="warn" additivity="false"> + <AppenderRef ref="Console"/> + </Logger> + <Root level="info"> + <AppenderRef ref="Console"/> + </Root> + </Loggers> +</Configuration> diff --git a/hbase-tools/src/test/java/org/apache/hbase/TestRegionsMerger.java b/hbase-tools/src/test/java/org/apache/hbase/TestRegionsMerger.java new file mode 100644 index 0000000..ec43bfc --- /dev/null +++ b/hbase-tools/src/test/java/org/apache/hbase/TestRegionsMerger.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hbase; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + + +public class TestRegionsMerger { + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static final TableName TABLE_NAME = + TableName.valueOf(TestRegionsMerger.class.getSimpleName()); + private static final byte[] family = Bytes.toBytes("f"); + private Table table; + + @BeforeClass + public static void beforeClass() throws Exception { + TEST_UTIL.getConfiguration().set(HConstants.HREGION_MAX_FILESIZE, + Long.toString(1024*1024*3)); + TEST_UTIL.startMiniCluster(3); + } + + @AfterClass + public static void afterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Before + public void setup() throws Exception { + table = TEST_UTIL.createMultiRegionTable(TABLE_NAME, family, 15); + TEST_UTIL.waitUntilAllRegionsAssigned(TABLE_NAME); + } + + @After + public void tearDown() throws Exception { + TEST_UTIL.deleteTable(TABLE_NAME); + } + + @Test + public void testMergeRegionsCanMergeToTarget() throws Exception { + final int originalCount = TEST_UTIL.countRows(table); + TEST_UTIL.getConfiguration().setInt(RegionsMerger.MAX_ROUNDS_IDLE, 10); + RegionsMerger merger = new RegionsMerger(TEST_UTIL.getConfiguration()); + merger.mergeRegions(TABLE_NAME.getNameWithNamespaceInclAsString(), 3); + List<RegionInfo> result = TEST_UTIL.getAdmin().getRegions(TABLE_NAME); + assertEquals(3, result.size()); + assertEquals("Row count before and after merge should be equal", + originalCount, TEST_UTIL.countRows(table)); + } + + @Test + public void testMergeRegionsCanMergeSomeButNotToTarget() throws Exception { + TEST_UTIL.getConfiguration().setInt(RegionsMerger.MAX_ROUNDS_IDLE, 3); + RegionsMerger merger = new RegionsMerger(TEST_UTIL.getConfiguration()); + generateTableData(); + final int originalCount = TEST_UTIL.countRows(table); + merger.mergeRegions(TABLE_NAME.getNameWithNamespaceInclAsString(), 3); + List<RegionInfo> result = TEST_UTIL.getAdmin().getRegions(TABLE_NAME); + assertEquals(8, result.size()); + assertEquals("Row count before and after merge should be equal", + originalCount, TEST_UTIL.countRows(table)); + } + + @Test + public void testMergeRegionsCannotMergeAny() throws Exception { + TEST_UTIL.getConfiguration().setDouble(RegionsMerger.RESULTING_REGION_UPPER_MARK, 0.5); + TEST_UTIL.getConfiguration().setInt(RegionsMerger.MAX_ROUNDS_IDLE, 2); + RegionsMerger merger = new RegionsMerger(TEST_UTIL.getConfiguration()); + generateTableData(); + TEST_UTIL.getAdmin().flush(TABLE_NAME); + final int originalCount = TEST_UTIL.countRows(table); + merger.mergeRegions(TABLE_NAME.getNameWithNamespaceInclAsString(), 3); + List<RegionInfo> result = TEST_UTIL.getAdmin().getRegions(TABLE_NAME); + assertEquals(15, result.size()); + assertEquals("Row count before and after merge should be equal", + originalCount, TEST_UTIL.countRows(table)); + } + + @Test + public void testMergeRegionsInvalidParams() throws Exception { + final int originalCount = TEST_UTIL.countRows(table); + RegionsMerger merger = new RegionsMerger(TEST_UTIL.getConfiguration()); + assertEquals(1, merger.run(new String[]{})); + assertEquals("Row count before and after merge should be equal", + originalCount, TEST_UTIL.countRows(table)); + } + + private void generateTableData() throws Exception { + TEST_UTIL.getAdmin().getRegions(TABLE_NAME).forEach(r -> { + byte[] key = r.getStartKey().length == 0 ? new byte[]{0} : r.getStartKey(); + Put put = new Put(key); + put.addColumn(family, Bytes.toBytes("c"), new byte[1024*1024]); + try { + table.put(put); + } catch (IOException e) { + throw new Error("Failed to put row"); + } + }); + } +} diff --git a/pom.xml b/pom.xml index c759c75..74c6403 100644 --- a/pom.xml +++ b/pom.xml @@ -58,6 +58,7 @@ <!--Add an assembly module because of http://maven.apache.org/plugins/maven-assembly-plugin/faq.html#module-binaries --> <module>hbase-operator-tools-assembly</module> + <module>hbase-tools</module> </modules> <scm> <connection>scm:git:git://gitbox.apache.org/repos/asf/hbase-operator-tools.git</connection> @@ -147,6 +148,11 @@ <artifactId>hbase-hbck2</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.apache.hbase.operator.tools</groupId> + <artifactId>hbase-tools</artifactId> + <version>${project.version}</version> + </dependency> </dependencies> </dependencyManagement> <build>