This is an automated email from the ASF dual-hosted git repository.
zhangduo pushed a commit to branch branch-2.5
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2.5 by this push:
new f05cd710028 HBASE-28806 ExportSnapshot failed if reference file
presented (#7118) (#7120)
f05cd710028 is described below
commit f05cd7100285d7358b4ef967d63a00b738132883
Author: Ray Mattingly <[email protected]>
AuthorDate: Thu Jun 26 12:01:32 2025 -0400
HBASE-28806 ExportSnapshot failed if reference file presented (#7118)
(#7120)
Signed-off-by: Ray Mattingly <[email protected]>
Co-authored-by: Alex Hughes <[email protected]>
Co-authored-by: Alex Hughes <[email protected]>
(cherry picked from commit 71d68e8e65823562212b22f6944dddb15b43597b)
---
.../hadoop/hbase/snapshot/ExportSnapshot.java | 26 ++++++++----
.../hadoop/hbase/snapshot/TestExportSnapshot.java | 48 ++++++++++++++++++++++
2 files changed, 65 insertions(+), 9 deletions(-)
diff --git
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
index 856cb5e9a67..e320e9b31d0 100644
---
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
+++
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/snapshot/ExportSnapshot.java
@@ -26,13 +26,16 @@ import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
+import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
+import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.function.BiConsumer;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
@@ -52,6 +55,10 @@ import
org.apache.hadoop.hbase.io.hadoopbackport.ThrottledInputStream;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mob.MobUtils;
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import
org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
+import
org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotFileInfo;
+import
org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
import org.apache.hadoop.hbase.util.AbstractHBaseTool;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -74,18 +81,12 @@ import
org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.mapreduce.security.TokenCache;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
+import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
+import org.apache.hbase.thirdparty.org.apache.commons.cli.Option;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
-import org.apache.hbase.thirdparty.org.apache.commons.cli.Option;
-
-import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
-import
org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
-import
org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotFileInfo;
-import
org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
-
/**
* Export the specified snapshot to a given FileSystem. The .snapshot/name
folder is copied to the
* destination cluster and then all the hfiles/wals are copied using a
Map-Reduce Job in the
@@ -662,6 +663,7 @@ public class ExportSnapshot extends AbstractHBaseTool
implements Tool {
// Get snapshot files
LOG.info("Loading Snapshot '" + snapshotDesc.getName() + "' hfile list");
+ Set<String> addedFiles = new HashSet<>();
SnapshotReferenceUtil.visitReferencedFiles(conf, fs, snapshotDir,
snapshotDesc,
new SnapshotReferenceUtil.SnapshotVisitor() {
@Override
@@ -681,7 +683,13 @@ public class ExportSnapshot extends AbstractHBaseTool
implements Tool {
snapshotFileAndSize = getSnapshotFileAndSize(fs, conf, table,
referencedRegion, family,
referencedHFile, storeFile.hasFileSize() ?
storeFile.getFileSize() : -1);
}
- files.add(snapshotFileAndSize);
+ String fileToExport = snapshotFileAndSize.getFirst().getHfile();
+ if (!addedFiles.contains(fileToExport)) {
+ files.add(snapshotFileAndSize);
+ addedFiles.add(fileToExport);
+ } else {
+ LOG.debug("Skip the existing file: {}.", fileToExport);
+ }
}
});
diff --git
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
index 18d1a7eb9c4..0cea6f6b8e9 100644
---
a/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
+++
b/hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/snapshot/TestExportSnapshot.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
@@ -51,10 +52,12 @@ import
org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
+import org.apache.hadoop.hbase.tool.BulkLoadHFilesTool;
import org.apache.hadoop.hbase.util.AbstractHBaseTool;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.HFileTestUtil;
import org.apache.hadoop.hbase.util.Pair;
import org.junit.After;
import org.junit.AfterClass;
@@ -204,6 +207,51 @@ public class TestExportSnapshot {
TEST_UTIL.deleteTable(tableName0);
}
+ @Test
+ public void testExportFileSystemStateWithSplitRegion() throws Exception {
+ // disable compaction
+ admin.compactionSwitch(false,
admin.getRegionServers().stream().map(ServerName::getServerName)
+ .collect(Collectors.toList()));
+ // create Table
+ TableName splitTableName = TableName.valueOf(testName.getMethodName());
+ String splitTableSnap = "snapshot-" + testName.getMethodName();
+
admin.createTable(TableDescriptorBuilder.newBuilder(splitTableName).setColumnFamilies(
+
Lists.newArrayList(ColumnFamilyDescriptorBuilder.newBuilder(FAMILY).build())).build());
+
+ Path output = TEST_UTIL.getDataTestDir("output/cf");
+ TEST_UTIL.getTestFileSystem().mkdirs(output);
+ // Create and load a large hfile to ensure the execution time of MR job.
+ HFileTestUtil.createHFile(TEST_UTIL.getConfiguration(),
TEST_UTIL.getTestFileSystem(),
+ new Path(output, "test_file"), FAMILY, Bytes.toBytes("q"),
Bytes.toBytes("1"),
+ Bytes.toBytes("9"), 9999999);
+ BulkLoadHFilesTool tool = new
BulkLoadHFilesTool(TEST_UTIL.getConfiguration());
+ tool.run(new String[] { output.getParent().toString(),
splitTableName.getNameAsString() });
+
+ List<RegionInfo> regions = admin.getRegions(splitTableName);
+ assertEquals(1, regions.size());
+ tableNumFiles = regions.size();
+
+ // split region
+ admin.splitRegionAsync(regions.get(0).getEncodedNameAsBytes(),
Bytes.toBytes("5")).get();
+ regions = admin.getRegions(splitTableName);
+ assertEquals(2, regions.size());
+
+ // take a snapshot
+ admin.snapshot(splitTableSnap, splitTableName);
+ // export snapshot and verify
+ Configuration tmpConf = TEST_UTIL.getConfiguration();
+ // Decrease the buffer size of copier to avoid the export task finished
shortly
+ tmpConf.setInt("snapshot.export.buffer.size", 1);
+ // Decrease the maximum files of each mapper to ensure the three files(1
hfile + 2 reference
+ // files) copied in different mappers concurrently.
+ tmpConf.setInt("snapshot.export.default.map.group", 1);
+ testExportFileSystemState(tmpConf, splitTableName,
Bytes.toBytes(splitTableSnap),
+ Bytes.toBytes(splitTableSnap), tableNumFiles,
TEST_UTIL.getDefaultRootDirPath(),
+ getHdfsDestinationDir(), false, false, getBypassRegionPredicate(), true,
false);
+ // delete table
+ TEST_UTIL.deleteTable(splitTableName);
+ }
+
@Test
public void testExportFileSystemStateWithSkipTmp() throws Exception {
TEST_UTIL.getConfiguration().setBoolean(ExportSnapshot.CONF_SKIP_TMP,
true);