This is an automated email from the ASF dual-hosted git repository.

zghao pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new 8e3727e  HBASE-25281 Bulkload split hfile too many times due to 
unreasonable split point (#2692)
8e3727e is described below

commit 8e3727ea06e85f37247f060598a54d37b303f21a
Author: niuyulin <nyl...@163.com>
AuthorDate: Wed Nov 25 15:27:09 2020 +0800

    HBASE-25281 Bulkload split hfile too many times due to unreasonable split 
point (#2692)
    
    Signed-off-by: Guanghao Zhang <zg...@apache.org>
---
 .../hadoop/hbase/tool/LoadIncrementalHFiles.java   | 92 ++++++++++++++--------
 .../TestLoadIncrementalHFilesSplitRecovery.java    | 28 +++++++
 2 files changed, 85 insertions(+), 35 deletions(-)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/LoadIncrementalHFiles.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/LoadIncrementalHFiles.java
index ddc857c..866faf2 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/LoadIncrementalHFiles.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/LoadIncrementalHFiles.java
@@ -723,6 +723,45 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
   }
 
   /**
+   * @param startEndKeys the start/end keys of regions belong to this table, 
the list in ascending
+   *          order by start key
+   * @param key the key need to find which region belong to
+   * @return region index
+   */
+  private int getRegionIndex(final Pair<byte[][], byte[][]> startEndKeys, 
byte[] key) {
+    int idx = Arrays.binarySearch(startEndKeys.getFirst(), key, 
Bytes.BYTES_COMPARATOR);
+    if (idx < 0) {
+      // not on boundary, returns -(insertion index). Calculate region it
+      // would be in.
+      idx = -(idx + 1) - 1;
+    }
+    return idx;
+  }
+
+  /**
+   * we can consider there is a region hole in following conditions. 1) if idx 
< 0,then first
+   * region info is lost. 2) if the endkey of a region is not equal to the 
startkey of the next
+   * region. 3) if the endkey of the last region is not empty.
+   */
+  private void checkRegionIndexValid(int idx, final Pair<byte[][], byte[][]> 
startEndKeys,
+    TableName tableName) throws IOException {
+    if (idx < 0) {
+      throw new IOException("The first region info for table " + tableName +
+        " can't be found in hbase:meta.Please use hbck tool to fix it first.");
+    } else if ((idx == startEndKeys.getFirst().length - 1) &&
+      !Bytes.equals(startEndKeys.getSecond()[idx], 
HConstants.EMPTY_BYTE_ARRAY)) {
+      throw new IOException("The last region info for table " + tableName +
+        " can't be found in hbase:meta.Please use hbck tool to fix it first.");
+    } else if (idx + 1 < startEndKeys.getFirst().length &&
+      !(Bytes.compareTo(startEndKeys.getSecond()[idx],
+        startEndKeys.getFirst()[idx + 1]) == 0)) {
+      throw new IOException("The endkey of one region for table " + tableName +
+        " is not equal to the startkey of the next region in hbase:meta." +
+        "Please use hbck tool to fix it first.");
+    }
+  }
+
+  /**
    * Attempt to assign the given load queue item into its target region group. 
If the hfile boundary
    * no longer fits into a region, physically splits the hfile such that the 
new bottom half will
    * fit and returns the list of LQI's corresponding to the resultant hfiles.
@@ -745,8 +784,8 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
       return new Pair<>(null, hfilePath.getName());
     }
 
-    LOG.info("Trying to load hfile=" + hfilePath + " first=" + 
first.map(Bytes::toStringBinary) +
-        " last=" + last.map(Bytes::toStringBinary));
+    LOG.info("Trying to load hfile=" + hfilePath + " first=" + 
first.map(Bytes::toStringBinary)
+        + " last=" + last.map(Bytes::toStringBinary));
     if (!first.isPresent() || !last.isPresent()) {
       assert !first.isPresent() && !last.isPresent();
       // TODO what if this is due to a bad HFile?
@@ -754,47 +793,30 @@ public class LoadIncrementalHFiles extends Configured 
implements Tool {
       return null;
     }
     if (Bytes.compareTo(first.get(), last.get()) > 0) {
-      throw new IllegalArgumentException("Invalid range: " + 
Bytes.toStringBinary(first.get()) +
-          " > " + Bytes.toStringBinary(last.get()));
-    }
-    int idx = Arrays.binarySearch(startEndKeys.getFirst(), first.get(), 
Bytes.BYTES_COMPARATOR);
-    if (idx < 0) {
-      // not on boundary, returns -(insertion index). Calculate region it
-      // would be in.
-      idx = -(idx + 1) - 1;
-    }
-    int indexForCallable = idx;
-
-    /**
-     * we can consider there is a region hole in following conditions. 1) if 
idx < 0,then first
-     * region info is lost. 2) if the endkey of a region is not equal to the 
startkey of the next
-     * region. 3) if the endkey of the last region is not empty.
-     */
-    if (indexForCallable < 0) {
-      throw new IOException("The first region info for table " + 
table.getName() +
-          " can't be found in hbase:meta.Please use hbck tool to fix it 
first.");
-    } else if ((indexForCallable == startEndKeys.getFirst().length - 1) &&
-        !Bytes.equals(startEndKeys.getSecond()[indexForCallable], 
HConstants.EMPTY_BYTE_ARRAY)) {
-      throw new IOException("The last region info for table " + 
table.getName() +
-          " can't be found in hbase:meta.Please use hbck tool to fix it 
first.");
-    } else if (indexForCallable + 1 < startEndKeys.getFirst().length &&
-        !(Bytes.compareTo(startEndKeys.getSecond()[indexForCallable],
-          startEndKeys.getFirst()[indexForCallable + 1]) == 0)) {
-      throw new IOException("The endkey of one region for table " + 
table.getName() +
-          " is not equal to the startkey of the next region in hbase:meta." +
-          "Please use hbck tool to fix it first.");
+      throw new IllegalArgumentException("Invalid range: " + 
Bytes.toStringBinary(first.get())
+          + " > " + Bytes.toStringBinary(last.get()));
     }
 
-    boolean lastKeyInRange = Bytes.compareTo(last.get(), 
startEndKeys.getSecond()[idx]) < 0 ||
-        Bytes.equals(startEndKeys.getSecond()[idx], 
HConstants.EMPTY_BYTE_ARRAY);
+    int firstKeyRegionIdx = getRegionIndex(startEndKeys, first.get());
+    checkRegionIndexValid(firstKeyRegionIdx, startEndKeys, table.getName());
+    boolean lastKeyInRange =
+        Bytes.compareTo(last.get(), 
startEndKeys.getSecond()[firstKeyRegionIdx]) < 0 || Bytes
+            .equals(startEndKeys.getSecond()[firstKeyRegionIdx], 
HConstants.EMPTY_BYTE_ARRAY);
     if (!lastKeyInRange) {
+      int lastKeyRegionIdx = getRegionIndex(startEndKeys, last.get());
+      int splitIdx = (firstKeyRegionIdx + lastKeyRegionIdx) >>> 1;
+      // make sure the splitPoint is valid in case region overlap occur, maybe 
the splitPoint bigger
+      // than hfile.endkey w/o this check
+      if (splitIdx != firstKeyRegionIdx) {
+        checkRegionIndexValid(splitIdx, startEndKeys, table.getName());
+      }
       List<LoadQueueItem> lqis = splitStoreFile(item, table,
-        startEndKeys.getFirst()[indexForCallable], 
startEndKeys.getSecond()[indexForCallable]);
+        startEndKeys.getFirst()[firstKeyRegionIdx], 
startEndKeys.getSecond()[splitIdx]);
       return new Pair<>(lqis, null);
     }
 
     // group regions.
-    regionGroups.put(ByteBuffer.wrap(startEndKeys.getFirst()[idx]), item);
+    
regionGroups.put(ByteBuffer.wrap(startEndKeys.getFirst()[firstKeyRegionIdx]), 
item);
     return null;
   }
 
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestLoadIncrementalHFilesSplitRecovery.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestLoadIncrementalHFilesSplitRecovery.java
index 05c2457..c65e3f2 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestLoadIncrementalHFilesSplitRecovery.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestLoadIncrementalHFilesSplitRecovery.java
@@ -470,6 +470,34 @@ public class TestLoadIncrementalHFilesSplitRecovery {
     }
   }
 
+  @Test
+  public void testCorrectSplitPoint() throws Exception {
+    final TableName table = TableName.valueOf(name.getMethodName());
+    byte[][] SPLIT_KEYS = new byte[][] { Bytes.toBytes("row_00000010"),
+        Bytes.toBytes("row_00000020"), Bytes.toBytes("row_00000030"), 
Bytes.toBytes("row_00000040"),
+        Bytes.toBytes("row_00000050"), Bytes.toBytes("row_00000060"),
+        Bytes.toBytes("row_00000070") };
+    setupTableWithSplitkeys(table, NUM_CFS, SPLIT_KEYS);
+
+    final AtomicInteger bulkloadRpcTimes = new AtomicInteger();
+    BulkLoadHFilesTool loader = new 
BulkLoadHFilesTool(util.getConfiguration()) {
+
+      @Override
+      protected void bulkLoadPhase(Table table, Connection conn, 
ExecutorService pool,
+          Deque<LoadIncrementalHFiles.LoadQueueItem> queue,
+          Multimap<ByteBuffer, LoadIncrementalHFiles.LoadQueueItem> 
regionGroups, boolean copyFile,
+          Map<LoadIncrementalHFiles.LoadQueueItem, ByteBuffer> item2RegionMap) 
throws IOException {
+        bulkloadRpcTimes.addAndGet(1);
+        super.bulkLoadPhase(table, conn, pool, queue, regionGroups, copyFile, 
item2RegionMap);
+      }
+    };
+
+    Path dir = buildBulkFiles(table, 1);
+    loader.bulkLoad(table, dir);
+    // before HBASE-25281 we need invoke bulkload rpc 8 times
+    assertEquals(4, bulkloadRpcTimes.get());
+  }
+
   /**
    * This test creates a table with many small regions. The bulk load files 
would be splitted
    * multiple times before all of them can be loaded successfully.

Reply via email to