[hbase] branch branch-2.5 updated: HBASE-27305 add an option to skip file splitting when bulkload hfiles (#4709)

zhangduo Mon, 22 Aug 2022 07:50:57 -0700

This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch branch-2.5
in repository https://gitbox.apache.org/repos/asf/hbase.git



The following commit(s) were added to refs/heads/branch-2.5 by this push:
     new 51576ae45a9 HBASE-27305 add an option to skip file splitting when 
bulkload hfiles (#4709)
51576ae45a9 is described below

commit 51576ae45a97036e9b0a578d742d27c89ea0e808
Author: Ruanhui <32773751+frostr...@users.noreply.github.com>
AuthorDate: Mon Aug 22 21:14:30 2022 +0800

    HBASE-27305 add an option to skip file splitting when bulkload hfiles 
(#4709)
    
    Co-authored-by: huiruan <huir...@tencent.com>
    Signed-off-by: Duo Zhang <zhang...@apache.org>
    (cherry picked from commit 00a719e76f16b7380f6695fc986f003e0e5f47fe)
    
    Conflicts:
            
hbase-server/src/main/java/org/apache/hadoop/hbase/tool/BulkLoadHFilesTool.java
---
 .../hadoop/hbase/tool/LoadIncrementalHFiles.java   | 11 +++++++++
 .../hbase/tool/TestLoadIncrementalHFiles.java      | 26 ++++++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/LoadIncrementalHFiles.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/LoadIncrementalHFiles.java
index 7842e21d076..5e3f2e9468f 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/LoadIncrementalHFiles.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/LoadIncrementalHFiles.java
@@ -141,6 +141,9 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
     BulkLoadHFiles.IGNORE_UNMATCHED_CF_CONF_KEY;
   public final static String ALWAYS_COPY_FILES = 
BulkLoadHFiles.ALWAYS_COPY_FILES;
 
+  public static final String FAIL_IF_NEED_SPLIT_HFILE =
+    "hbase.loadincremental.fail.if.need.split.hfile";
+
   // We use a '.' prefix which is ignored when walking directory trees
   // above. It is invalid family name.
   static final String TMP_DIR = ".tmp";
@@ -162,6 +165,8 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
 
   private boolean replicate = true;
 
+  private boolean failIfNeedSplitHFile = false;
+
   /**
    * Represents an HFile waiting to be loaded. An queue is used in this class 
in order to support
    * the case where a region has split during the process of the load. When 
this happens, the HFile
@@ -195,6 +200,7 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
     assignSeqIds = conf.getBoolean(ASSIGN_SEQ_IDS, true);
     maxFilesPerRegionPerFamily = conf.getInt(MAX_FILES_PER_REGION_PER_FAMILY, 
32);
     bulkLoadByFamily = 
conf.getBoolean(BulkLoadHFiles.BULK_LOAD_HFILES_BY_FAMILY, false);
+    failIfNeedSplitHFile = conf.getBoolean(FAIL_IF_NEED_SPLIT_HFILE, false);
     nrThreads =
       conf.getInt("hbase.loadincremental.threads.max", 
Runtime.getRuntime().availableProcessors());
     numRetries = new AtomicInteger(0);
@@ -803,6 +809,11 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
       Bytes.compareTo(last.get(), startEndKeys.getSecond()[firstKeyRegionIdx]) 
< 0
         || Bytes.equals(startEndKeys.getSecond()[firstKeyRegionIdx], 
HConstants.EMPTY_BYTE_ARRAY);
     if (!lastKeyInRange) {
+      if (failIfNeedSplitHFile) {
+        throw new IOException(
+          "The key range of hfile=" + hfilePath + " fits into no region. " + 
"And because "
+            + FAIL_IF_NEED_SPLIT_HFILE + " was set to true, we just skip the 
next steps.");
+      }
       int lastKeyRegionIdx = getRegionIndex(startEndKeys, last.get());
       int splitIdx = (firstKeyRegionIdx + lastKeyRegionIdx) >>> 1;
       // make sure the splitPoint is valid in case region overlap occur, maybe 
the splitPoint bigger
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestLoadIncrementalHFiles.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestLoadIncrementalHFiles.java
index a842c1d223f..cef666942f8 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestLoadIncrementalHFiles.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestLoadIncrementalHFiles.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.tool;
 
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThrows;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
@@ -793,4 +794,29 @@ public class TestLoadIncrementalHFiles {
       
util.getConfiguration().setBoolean(BulkLoadHFiles.BULK_LOAD_HFILES_BY_FAMILY, 
false);
     }
   }
+
+  @Test
+  public void testFailIfNeedSplitHFile() throws IOException {
+    TableName tableName = TableName.valueOf(tn.getMethodName());
+    Table table = util.createTable(tableName, FAMILY);
+
+    util.loadTable(table, FAMILY);
+
+    FileSystem fs = util.getTestFileSystem();
+    Path sfPath = new Path(fs.getWorkingDirectory(), new 
Path(Bytes.toString(FAMILY), "file"));
+    HFileTestUtil.createHFile(util.getConfiguration(), fs, sfPath, FAMILY, 
QUALIFIER,
+      Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), 1000);
+
+    util.getAdmin().split(tableName);
+    util.waitFor(10000, 1000, () -> 
util.getAdmin().getRegions(tableName).size() > 1);
+
+    Configuration config = new Configuration(util.getConfiguration());
+    config.setBoolean(BulkLoadHFilesTool.FAIL_IF_NEED_SPLIT_HFILE, true);
+    BulkLoadHFilesTool tool = new BulkLoadHFilesTool(config);
+
+    String[] args = new String[] { fs.getWorkingDirectory().toString(), 
tableName.toString() };
+    assertThrows(IOException.class, () -> tool.run(args));
+    util.getHBaseCluster().getRegions(tableName)
+      .forEach(r -> assertEquals(1, 
r.getStore(FAMILY).getStorefiles().size()));
+  }
 }

[hbase] branch branch-2.5 updated: HBASE-27305 add an option to skip file splitting when bulkload hfiles (#4709)

Reply via email to