This is an automated email from the ASF dual-hosted git repository.
apurtell pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-3 by this push:
new 634a7ace917 HBASE-28919 Soft drop for destructive table actions (#7183)
634a7ace917 is described below
commit 634a7ace9178dfad4f35f8abaa990033fc830efa
Author: Andrew Purtell <[email protected]>
AuthorDate: Tue Aug 5 13:54:46 2025 -0700
HBASE-28919 Soft drop for destructive table actions (#7183)
Although HFiles are copied to the archive in a destructive schema change,
recovery
scenarios are not automatic and involve some operator labor to reconstruct
the
table and re-import the archived data. We can easily prevent the deletion
of the
HFiles of a deleted table or column family by taking a snapshot of the table
immediately prior to any destructive schema actions. We also set a TTL on
the
snapshot so housekeeping of unwanted HFiles remains no touch. Because we
take a
table snapshot all table structure and metadata is also captured and saved
so fast
recovery is possible, as either a restore from snapshot, or a clone from
snapshot
to a new table.
Existing site configuration property prerequisites:
* hbase.snapshot.enabled = true ( default is true )
New site configuration properties:
* hbase.snapshot.before.destructive.action.enabled = true ( default is
false )
* hbase.snapshot.before.destructive.action.ttl = <integer>, in seconds (
default 86400 (one day) )
Signed-off-by: Viraj Jasani <[email protected]>
---
.../java/org/apache/hadoop/hbase/HConstants.java | 12 ++
.../protobuf/server/master/MasterProcedure.proto | 13 ++
.../master/procedure/DeleteTableProcedure.java | 54 ++++-
.../master/procedure/ModifyTableProcedure.java | 61 ++++--
.../master/procedure/RecoverySnapshotUtils.java | 206 +++++++++++++++++++
.../master/procedure/TruncateRegionProcedure.java | 98 +++++++--
.../master/procedure/TruncateTableProcedure.java | 52 ++++-
.../TestDeleteTableProcedureWithRecovery.java | 159 +++++++++++++++
.../TestModifyTableProcedureWithRecovery.java | 177 ++++++++++++++++
.../procedure/TestRecoverySnapshotUtils.java | 96 +++++++++
.../TestTruncateRegionProcedureWithRecovery.java | 224 +++++++++++++++++++++
.../TestTruncateTableProcedureWithRecovery.java | 166 +++++++++++++++
12 files changed, 1273 insertions(+), 45 deletions(-)
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
index 29def997818..1051686d32e 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
@@ -1533,6 +1533,18 @@ public final class HConstants {
// User defined Default TTL config key
public static final String DEFAULT_SNAPSHOT_TTL_CONFIG_KEY =
"hbase.master.snapshot.ttl";
+ // Soft drop for destructive table actions configuration
+ public static final String SNAPSHOT_BEFORE_DESTRUCTIVE_ACTION_ENABLED_KEY =
+ "hbase.snapshot.before.destructive.action.enabled";
+ public static final boolean
DEFAULT_SNAPSHOT_BEFORE_DESTRUCTIVE_ACTION_ENABLED = false;
+
+ public static final String SNAPSHOT_BEFORE_DESTRUCTIVE_ACTION_TTL_KEY =
+ "hbase.snapshot.before.destructive.action.ttl";
+ public static final long DEFAULT_SNAPSHOT_BEFORE_DESTRUCTIVE_ACTION_TTL =
86400; // 1 day
+
+ // Table-level attribute name for recovery snapshot TTL override
+ public static final String TABLE_RECOVERY_SNAPSHOT_TTL_KEY =
"RECOVERY_SNAPSHOT_TTL";
+
// Regions Recovery based on high storeFileRefCount threshold value
public static final String STORE_FILE_REF_COUNT_THRESHOLD =
"hbase.regions.recovery.store.file.ref.count";
diff --git
a/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto
b/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto
index 6323f00ecb3..e3b43afd66a 100644
---
a/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto
+++
b/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto
@@ -77,6 +77,7 @@ enum ModifyTableState {
MODIFY_TABLE_CLOSE_EXCESS_REPLICAS = 8;
MODIFY_TABLE_ASSIGN_NEW_REPLICAS = 9;
MODIFY_TABLE_SYNC_ERASURE_CODING_POLICY = 10;
+ MODIFY_TABLE_SNAPSHOT = 11;
}
message ModifyTableStateData {
@@ -86,6 +87,7 @@ message ModifyTableStateData {
required bool delete_column_family_in_modify = 4;
optional bool should_check_descriptor = 5;
optional bool reopen_regions = 6;
+ optional string snapshot_name = 7;
}
enum TruncateTableState {
@@ -96,6 +98,7 @@ enum TruncateTableState {
TRUNCATE_TABLE_ADD_TO_META = 5;
TRUNCATE_TABLE_ASSIGN_REGIONS = 6;
TRUNCATE_TABLE_POST_OPERATION = 7;
+ TRUNCATE_TABLE_SNAPSHOT = 8;
}
message TruncateTableStateData {
@@ -104,6 +107,7 @@ message TruncateTableStateData {
optional TableName table_name = 3;
optional TableSchema table_schema = 4;
repeated RegionInfo region_info = 5;
+ optional string snapshot_name = 6;
}
enum TruncateRegionState {
@@ -112,6 +116,13 @@ enum TruncateRegionState {
TRUNCATE_REGION_REMOVE = 3;
TRUNCATE_REGION_MAKE_ONLINE = 4;
TRUNCATE_REGION_POST_OPERATION = 5;
+ TRUNCATE_REGION_SNAPSHOT = 6;
+}
+
+message TruncateRegionStateData {
+ required UserInformation user_info = 1;
+ required RegionInfo region_info = 2;
+ optional string snapshot_name = 3;
}
enum DeleteTableState {
@@ -121,12 +132,14 @@ enum DeleteTableState {
DELETE_TABLE_UPDATE_DESC_CACHE = 4;
DELETE_TABLE_UNASSIGN_REGIONS = 5;
DELETE_TABLE_POST_OPERATION = 6;
+ DELETE_TABLE_SNAPSHOT = 7;
}
message DeleteTableStateData {
required UserInformation user_info = 1;
required TableName table_name = 2;
repeated RegionInfo region_info = 3;
+ optional string snapshot_name = 4;
}
enum CreateNamespaceState {
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
index fc259433784..e199f6d5971 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java
@@ -67,6 +67,7 @@ public class DeleteTableProcedure extends
AbstractStateMachineTableProcedure<Del
private List<RegionInfo> regions;
private TableName tableName;
private RetryCounter retryCounter;
+ private String recoverySnapshotName;
public DeleteTableProcedure() {
// Required by the Procedure framework to create the procedure on replay
@@ -110,6 +111,23 @@ public class DeleteTableProcedure extends
AbstractStateMachineTableProcedure<Del
// Call coprocessors
preDelete(env);
+ // Check if we should create a recover snapshot
+ if (RecoverySnapshotUtils.isRecoveryEnabled(env)) {
+ setNextState(DeleteTableState.DELETE_TABLE_SNAPSHOT);
+ } else {
+ setNextState(DeleteTableState.DELETE_TABLE_CLEAR_FS_LAYOUT);
+ }
+ break;
+ case DELETE_TABLE_SNAPSHOT:
+ // Create recovery snapshot procedure as child procedure
+ recoverySnapshotName =
RecoverySnapshotUtils.generateSnapshotName(getTableName());
+ SnapshotProcedure snapshotProcedure =
+ RecoverySnapshotUtils.createSnapshotProcedure(env, getTableName(),
recoverySnapshotName,
+ env.getMasterServices().getTableDescriptors().get(tableName));
+ // Submit snapshot procedure as child procedure
+ addChildProcedure(snapshotProcedure);
+ LOG.debug("Creating recovery snapshot {} for table {} before
deletion",
+ recoverySnapshotName, getTableName());
setNextState(DeleteTableState.DELETE_TABLE_CLEAR_FS_LAYOUT);
break;
case DELETE_TABLE_CLEAR_FS_LAYOUT:
@@ -171,22 +189,34 @@ public class DeleteTableProcedure extends
AbstractStateMachineTableProcedure<Del
@Override
protected void rollbackState(final MasterProcedureEnv env, final
DeleteTableState state) {
- if (state == DeleteTableState.DELETE_TABLE_PRE_OPERATION) {
- // nothing to rollback, pre-delete is just table-state checks.
- // We can fail if the table does not exist or is not disabled.
- // TODO: coprocessor rollback semantic is still undefined.
- releaseSyncLatch();
- return;
+ switch (state) {
+ case DELETE_TABLE_PRE_OPERATION:
+ // nothing to rollback, pre-delete is just table-state checks.
+ // We can fail if the table does not exist or is not disabled.
+ // TODO: coprocessor rollback semantic is still undefined.
+ releaseSyncLatch();
+ return;
+ case DELETE_TABLE_SNAPSHOT:
+ // Handle recovery snapshot rollback. There is no
DeleteSnapshotProcedure as such to use
+ // here directly as a child procedure, so we call a utility method to
delete the snapshot
+ // which uses the SnapshotManager to delete the snapshot.
+ if (recoverySnapshotName != null) {
+ RecoverySnapshotUtils.deleteRecoverySnapshot(env,
recoverySnapshotName, getTableName());
+ recoverySnapshotName = null;
+ }
+ return;
+ default:
+ // Delete from other states doesn't have a rollback. The execution
will succeed, at some
+ // point.
+ throw new UnsupportedOperationException("unhandled state=" + state);
}
-
- // The delete doesn't have a rollback. The execution will succeed, at some
point.
- throw new UnsupportedOperationException("unhandled state=" + state);
}
@Override
protected boolean isRollbackSupported(final DeleteTableState state) {
switch (state) {
case DELETE_TABLE_PRE_OPERATION:
+ case DELETE_TABLE_SNAPSHOT:
return true;
default:
return false;
@@ -236,6 +266,9 @@ public class DeleteTableProcedure extends
AbstractStateMachineTableProcedure<Del
state.addRegionInfo(ProtobufUtil.toRegionInfo(hri));
}
}
+ if (recoverySnapshotName != null) {
+ state.setSnapshotName(recoverySnapshotName);
+ }
serializer.serialize(state.build());
}
@@ -255,6 +288,9 @@ public class DeleteTableProcedure extends
AbstractStateMachineTableProcedure<Del
regions.add(ProtobufUtil.toRegionInfo(hri));
}
}
+ if (state.hasSnapshotName()) {
+ recoverySnapshotName = state.getSnapshotName();
+ }
}
private boolean prepareDelete(final MasterProcedureEnv env) throws
IOException {
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java
index 03ad19799cd..2b6933cca09 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java
@@ -60,6 +60,8 @@ public class ModifyTableProcedure extends
AbstractStateMachineTableProcedure<Mod
private boolean deleteColumnFamilyInModify;
private boolean shouldCheckDescriptor;
private boolean reopenRegions;
+ private String recoverySnapshotName;
+
/**
* List of column families that cannot be deleted from the hbase:meta table.
They are critical to
* cluster operation. This is a bit of an odd place to keep this list but
then this is the tooling
@@ -188,11 +190,27 @@ public class ModifyTableProcedure extends
AbstractStateMachineTableProcedure<Mod
// We cannot allow changes to region replicas when
'reopenRegions==false',
// as this mode bypasses the state management required for modifying
region replicas.
if (reopenRegions) {
- setNextState(ModifyTableState.MODIFY_TABLE_CLOSE_EXCESS_REPLICAS);
+ // Check if we should create a recovery snapshot for column family
deletion
+ if (deleteColumnFamilyInModify &&
RecoverySnapshotUtils.isRecoveryEnabled(env)) {
+ setNextState(ModifyTableState.MODIFY_TABLE_SNAPSHOT);
+ } else {
+
setNextState(ModifyTableState.MODIFY_TABLE_CLOSE_EXCESS_REPLICAS);
+ }
} else {
setNextState(ModifyTableState.MODIFY_TABLE_UPDATE_TABLE_DESCRIPTOR);
}
break;
+ case MODIFY_TABLE_SNAPSHOT:
+ // Create recovery snapshot procedure as child procedure
+ recoverySnapshotName =
RecoverySnapshotUtils.generateSnapshotName(getTableName());
+ SnapshotProcedure snapshotProcedure =
RecoverySnapshotUtils.createSnapshotProcedure(env,
+ getTableName(), recoverySnapshotName, unmodifiedTableDescriptor);
+ // Submit snapshot procedure as child procedure
+ addChildProcedure(snapshotProcedure);
+ LOG.debug("Creating recovery snapshot {} for table {} before column
deletion",
+ recoverySnapshotName, getTableName());
+ setNextState(ModifyTableState.MODIFY_TABLE_CLOSE_EXCESS_REPLICAS);
+ break;
case MODIFY_TABLE_CLOSE_EXCESS_REPLICAS:
if (isTableEnabled(env)) {
closeExcessReplicasIfNeeded(env);
@@ -275,24 +293,35 @@ public class ModifyTableProcedure extends
AbstractStateMachineTableProcedure<Mod
@Override
protected void rollbackState(final MasterProcedureEnv env, final
ModifyTableState state)
throws IOException {
- if (
- state == ModifyTableState.MODIFY_TABLE_PREPARE
- || state == ModifyTableState.MODIFY_TABLE_PRE_OPERATION
- ) {
- // nothing to rollback, pre-modify is just checks.
- // TODO: coprocessor rollback semantic is still undefined.
- return;
+ switch (state) {
+ case MODIFY_TABLE_PREPARE:
+ case MODIFY_TABLE_PRE_OPERATION:
+ // Nothing to roll back.
+ // TODO: Coprocessor rollback semantic is still undefined.
+ break;
+ case MODIFY_TABLE_SNAPSHOT:
+ // Handle recovery snapshot rollback. There is no
DeleteSnapshotProcedure as such to use
+ // here directly as a child procedure, so we call a utility method to
delete the snapshot
+ // which uses the SnapshotManager to delete the snapshot.
+ if (recoverySnapshotName != null) {
+ RecoverySnapshotUtils.deleteRecoverySnapshot(env,
recoverySnapshotName, getTableName());
+ recoverySnapshotName = null;
+ }
+ break;
+ default:
+ // Modify from other states doesn't have a rollback. The execution
will succeed, at some
+ // point.
+ throw new UnsupportedOperationException("unhandled state=" + state);
}
-
- // The delete doesn't have a rollback. The execution will succeed, at some
point.
- throw new UnsupportedOperationException("unhandled state=" + state);
}
@Override
protected boolean isRollbackSupported(final ModifyTableState state) {
switch (state) {
- case MODIFY_TABLE_PRE_OPERATION:
case MODIFY_TABLE_PREPARE:
+ case MODIFY_TABLE_PRE_OPERATION:
+ case MODIFY_TABLE_SNAPSHOT:
+ case MODIFY_TABLE_CLOSE_EXCESS_REPLICAS:
return true;
default:
return false;
@@ -335,6 +364,10 @@ public class ModifyTableProcedure extends
AbstractStateMachineTableProcedure<Mod
.setUnmodifiedTableSchema(ProtobufUtil.toTableSchema(unmodifiedTableDescriptor));
}
+ if (recoverySnapshotName != null) {
+ modifyTableMsg.setSnapshotName(recoverySnapshotName);
+ }
+
serializer.serialize(modifyTableMsg.build());
}
@@ -356,6 +389,10 @@ public class ModifyTableProcedure extends
AbstractStateMachineTableProcedure<Mod
unmodifiedTableDescriptor =
ProtobufUtil.toTableDescriptor(modifyTableMsg.getUnmodifiedTableSchema());
}
+
+ if (modifyTableMsg.hasSnapshotName()) {
+ recoverySnapshotName = modifyTableMsg.getSnapshotName();
+ }
}
@Override
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverySnapshotUtils.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverySnapshotUtils.java
new file mode 100644
index 00000000000..7e4dba9a26c
--- /dev/null
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RecoverySnapshotUtils.java
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.procedure;
+
+import java.io.IOException;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
+import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
+import org.apache.hadoop.hbase.snapshot.SnapshotDoesNotExistException;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
+
+/**
+ * Utility class for recovery snapshot functionality, which automatically
creates snapshots before
+ * dropping tables, truncating tables, or deleting column families.
+ */
[email protected]
+public final class RecoverySnapshotUtils {
+ private static final Logger LOG =
LoggerFactory.getLogger(RecoverySnapshotUtils.class);
+
+ private RecoverySnapshotUtils() {
+
+ }
+
+ /**
+ * Checks if recovery snapshots are enabled for destructive table actions.
+ * @param env MasterProcedureEnv
+ * @return true if recovery snapshot functionality is enabled, false
otherwise
+ */
+ public static boolean isRecoveryEnabled(final MasterProcedureEnv env) {
+ return env.getMasterConfiguration().getBoolean(
+ HConstants.SNAPSHOT_BEFORE_DESTRUCTIVE_ACTION_ENABLED_KEY,
+ HConstants.DEFAULT_SNAPSHOT_BEFORE_DESTRUCTIVE_ACTION_ENABLED)
+ &&
env.getMasterConfiguration().getBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED,
true);
+ }
+
+ /**
+ * Gets the TTL that should be used for snapshots created before destructive
schema actions.
+ * Checks for table-level override first, then falls back to site
configuration.
+ * @param env MasterProcedureEnv
+ * @param tableDescriptor the table descriptor to check for table-level TTL
override
+ * @return TTL in seconds
+ */
+ public static long getRecoverySnapshotTtl(final MasterProcedureEnv env,
+ final TableDescriptor tableDescriptor) {
+ // Check table-level override first
+ if (tableDescriptor != null) {
+ String tableLevelTtl =
tableDescriptor.getValue(HConstants.TABLE_RECOVERY_SNAPSHOT_TTL_KEY);
+ if (tableLevelTtl != null) {
+ try {
+ long ttl = Long.parseLong(tableLevelTtl);
+ LOG.debug("Using table-level recovery snapshot TTL {} seconds for
table {}", ttl,
+ tableDescriptor.getTableName());
+ return ttl;
+ } catch (NumberFormatException e) {
+ LOG.warn("Invalid table-level recovery snapshot TTL '{}' for table
{}, using default",
+ tableLevelTtl, tableDescriptor.getTableName());
+ }
+ }
+ }
+
+ // Fall back to site configuration
+ return env.getMasterConfiguration().getLong(
+ HConstants.SNAPSHOT_BEFORE_DESTRUCTIVE_ACTION_TTL_KEY,
+ HConstants.DEFAULT_SNAPSHOT_BEFORE_DESTRUCTIVE_ACTION_TTL);
+ }
+
+ /**
+ * Generates a recovery snapshot name.
+ * <p>
+ * The naming convention is: <tt>auto_{table}_{timestamp}</tt>
+ * @param tableName the table name
+ * @return the generated snapshot name
+ */
+ public static String generateSnapshotName(final TableName tableName) {
+ return generateSnapshotName(tableName,
EnvironmentEdgeManager.currentTime());
+ }
+
+ /**
+ * Generates a recovery snapshot name.
+ * <p>
+ * The naming convention is: <tt>auto_{table}_{timestamp}</tt>
+ * @param tableName the table name
+ * @param timestamp the timestamp when the snapshot was initiated
+ * @return the generated snapshot name
+ */
+ public static String generateSnapshotName(final TableName tableName, final
long timestamp) {
+ return "auto_" + tableName.getNameAsString() + "_" + timestamp;
+ }
+
+ /**
+ * Creates a SnapshotDescription for the recovery snapshot for a given
operation.
+ * @param tableName the table name
+ * @param snapshotName the snapshot name
+ * @return SnapshotDescription for the recovery snapshot
+ */
+ public static SnapshotProtos.SnapshotDescription
+ buildSnapshotDescription(final TableName tableName, final String
snapshotName) {
+ return buildSnapshotDescription(tableName, snapshotName, 0,
+ SnapshotProtos.SnapshotDescription.Type.FLUSH);
+ }
+
+ /**
+ * Creates a SnapshotDescription for the recovery snapshot for a given
operation.
+ * @param tableName the table name
+ * @param snapshotName the snapshot name
+ * @param ttl the TTL for the snapshot in seconds (0 means no TTL)
+ * @param type the type of snapshot to create
+ * @return SnapshotDescription for the recovery snapshot
+ */
+ public static SnapshotProtos.SnapshotDescription buildSnapshotDescription(
+ final TableName tableName, final String snapshotName, final long ttl,
+ final SnapshotProtos.SnapshotDescription.Type type) {
+ SnapshotProtos.SnapshotDescription.Builder builder =
+ SnapshotProtos.SnapshotDescription.newBuilder();
+ builder.setVersion(SnapshotDescriptionUtils.SNAPSHOT_LAYOUT_VERSION);
+ builder.setName(snapshotName);
+ builder.setTable(tableName.getNameAsString());
+ builder.setType(type);
+ builder.setCreationTime(EnvironmentEdgeManager.currentTime());
+ builder.setTtl(ttl);
+ return builder.build();
+ }
+
+ /**
+ * Creates a SnapshotProcedure for soft drop functionality.
+ * <p>
+ * This method should be called from procedures that need to create a
snapshot before performing
+ * destructive operations. It will check for table-level TTL overrides.
+ * @param env MasterProcedureEnv
+ * @param tableName the table name
+ * @param snapshotName the name for the snapshot
+ * @param tableDescriptor the table descriptor to check for table-level TTL
override
+ * @return SnapshotProcedure that can be added as a child procedure
+ * @throws IOException if snapshot creation fails
+ */
+ public static SnapshotProcedure createSnapshotProcedure(final
MasterProcedureEnv env,
+ final TableName tableName, final String snapshotName, final
TableDescriptor tableDescriptor)
+ throws IOException {
+ return new SnapshotProcedure(env,
+ buildSnapshotDescription(tableName, snapshotName,
+ getRecoverySnapshotTtl(env, tableDescriptor),
+ env.getMasterServices().getTableStateManager().isTableState(tableName,
+ org.apache.hadoop.hbase.client.TableState.State.DISABLED)
+ ? SnapshotProtos.SnapshotDescription.Type.SKIPFLUSH
+ : SnapshotProtos.SnapshotDescription.Type.FLUSH));
+ }
+
+ /**
+ * Deletes a recovery snapshot during rollback scenarios.
+ * <p>
+ * This method should be called during procedure rollback to clean up any
snapshots that were
+ * created before the failure.
+ * @param env MasterProcedureEnv
+ * @param snapshotName the name of the snapshot to delete
+ * @param tableName the table name (for logging)
+ */
+ public static void deleteRecoverySnapshot(final MasterProcedureEnv env,
final String snapshotName,
+ final TableName tableName) {
+ try {
+ LOG.debug("Deleting recovery snapshot {} for table {} during rollback",
snapshotName,
+ tableName);
+ SnapshotManager snapshotManager =
env.getMasterServices().getSnapshotManager();
+ if (snapshotManager == null) {
+ LOG.warn("SnapshotManager is not available, cannot delete recovery
snapshot {}",
+ snapshotName);
+ return;
+ }
+ // Delete the snapshot using the snapshot manager. The SnapshotManager
will handle existence
+ // checks.
+ snapshotManager.deleteSnapshot(buildSnapshotDescription(tableName,
snapshotName));
+ LOG.info("Successfully deleted recovery snapshot {} for table {} during
rollback",
+ snapshotName, tableName);
+ } catch (SnapshotDoesNotExistException e) {
+ // Expected during rollback if the snapshot was never created or already
cleaned up.
+ LOG.debug("Recovery snapshot {} for table {} does not exist, skipping",
snapshotName,
+ tableName);
+ } catch (Exception e) {
+ // During rollback, we don't want to fail the rollback process due to
snapshot cleanup
+ // issues. Log the error and continue. The snapshot can be manually
cleaned up later.
+ LOG.warn("Failed to delete recovery snapshot {} for table {} during
rollback: {}. "
+ + "Manual cleanup may be required.", snapshotName, tableName,
e.getMessage());
+ }
+ }
+}
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateRegionProcedure.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateRegionProcedure.java
index 83722d6c1dc..2a3732c9998 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateRegionProcedure.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateRegionProcedure.java
@@ -26,19 +26,24 @@ import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
import org.apache.hadoop.hbase.master.MasterFileSystem;
import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
+import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
import org.apache.hadoop.hbase.util.CommonFSUtils;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.TruncateRegionState;
+import
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.TruncateRegionStateData;
@InterfaceAudience.Private
public class TruncateRegionProcedure
extends AbstractStateMachineRegionProcedure<TruncateRegionState> {
private static final Logger LOG =
LoggerFactory.getLogger(TruncateRegionProcedure.class);
+ private String recoverySnapshotName;
+
@SuppressWarnings("unused")
public TruncateRegionProcedure() {
// Required by the Procedure framework to create the procedure on replay
@@ -75,6 +80,24 @@ public class TruncateRegionProcedure
: "Can't truncate replicas directly. "
+ "Replicas are auto-truncated when their primary is truncated.";
preTruncate(env);
+
+ // Check if we should create a recovery snapshot
+ if (RecoverySnapshotUtils.isRecoveryEnabled(env)) {
+ setNextState(TruncateRegionState.TRUNCATE_REGION_SNAPSHOT);
+ } else {
+ setNextState(TruncateRegionState.TRUNCATE_REGION_MAKE_OFFLINE);
+ }
+ break;
+ case TRUNCATE_REGION_SNAPSHOT:
+ // Create recovery snapshot procedure as child procedure
+ recoverySnapshotName =
RecoverySnapshotUtils.generateSnapshotName(getTableName());
+ SnapshotProcedure snapshotProcedure =
+ RecoverySnapshotUtils.createSnapshotProcedure(env, getTableName(),
recoverySnapshotName,
+
env.getMasterServices().getTableDescriptors().get(getTableName()));
+ // Submit snapshot procedure as child procedure
+ addChildProcedure(snapshotProcedure);
+ LOG.debug("Creating recovery snapshot {} for table {} before
truncating region {}",
+ recoverySnapshotName, getTableName(),
getRegion().getRegionNameAsString());
setNextState(TruncateRegionState.TRUNCATE_REGION_MAKE_OFFLINE);
break;
case TRUNCATE_REGION_MAKE_OFFLINE:
@@ -124,22 +147,32 @@ public class TruncateRegionProcedure
@Override
protected void rollbackState(final MasterProcedureEnv env, final
TruncateRegionState state)
throws IOException {
- if (state == TruncateRegionState.TRUNCATE_REGION_PRE_OPERATION) {
- // Nothing to rollback, pre-truncate is just table-state checks.
- return;
- }
- if (state == TruncateRegionState.TRUNCATE_REGION_MAKE_OFFLINE) {
- RegionStateNode regionNode =
-
env.getAssignmentManager().getRegionStates().getRegionStateNode(getRegion());
- if (regionNode == null) {
- // Region was unassigned by state TRUNCATE_REGION_MAKE_OFFLINE.
- // So Assign it back
- addChildProcedure(createAssignProcedures(env));
- }
- return;
+ switch (state) {
+ case TRUNCATE_REGION_PRE_OPERATION:
+ // Nothing to rollback, pre-truncate is just table-state checks.
+ return;
+ case TRUNCATE_REGION_SNAPSHOT:
+ // Handle recovery snapshot rollback. There is no
DeleteSnapshotProcedure as such to use
+ // here directly as a child procedure, so we call a utility method to
delete the snapshot
+ // which uses the SnapshotManager to delete the snapshot.
+ if (recoverySnapshotName != null) {
+ RecoverySnapshotUtils.deleteRecoverySnapshot(env,
recoverySnapshotName, getTableName());
+ recoverySnapshotName = null;
+ }
+ return;
+ case TRUNCATE_REGION_MAKE_OFFLINE:
+ RegionStateNode regionNode =
+
env.getAssignmentManager().getRegionStates().getRegionStateNode(getRegion());
+ if (regionNode == null) {
+ // Region was unassigned by state TRUNCATE_REGION_MAKE_OFFLINE.
+ // So Assign it back
+ addChildProcedure(createAssignProcedures(env));
+ }
+ return;
+ default:
+ // The truncate doesn't have a rollback. The execution will succeed,
at some point.
+ throw new UnsupportedOperationException("unhandled state=" + state);
}
- // The truncate doesn't have a rollback. The execution will succeed, at
some point.
- throw new UnsupportedOperationException("unhandled state=" + state);
}
@Override
@@ -151,7 +184,7 @@ public class TruncateRegionProcedure
protected boolean isRollbackSupported(final TruncateRegionState state) {
switch (state) {
case TRUNCATE_REGION_PRE_OPERATION:
- return true;
+ case TRUNCATE_REGION_SNAPSHOT:
case TRUNCATE_REGION_MAKE_OFFLINE:
return true;
default:
@@ -208,6 +241,29 @@ public class TruncateRegionProcedure
return TableOperationType.REGION_TRUNCATE;
}
+ @Override
+ protected void serializeStateData(ProcedureStateSerializer serializer)
throws IOException {
+ super.serializeStateData(serializer);
+ TruncateRegionStateData.Builder state =
TruncateRegionStateData.newBuilder()
+ .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser()))
+ .setRegionInfo(ProtobufUtil.toRegionInfo(getRegion()));
+ if (recoverySnapshotName != null) {
+ state.setSnapshotName(recoverySnapshotName);
+ }
+ serializer.serialize(state.build());
+ }
+
+ @Override
+ protected void deserializeStateData(ProcedureStateSerializer serializer)
throws IOException {
+ super.deserializeStateData(serializer);
+ TruncateRegionStateData state =
serializer.deserialize(TruncateRegionStateData.class);
+ setUser(MasterProcedureUtil.toUserInfo(state.getUserInfo()));
+ setRegion(ProtobufUtil.toRegionInfo(state.getRegionInfo()));
+ if (state.hasSnapshotName()) {
+ recoverySnapshotName = state.getSnapshotName();
+ }
+ }
+
private TransitRegionStateProcedure
createUnAssignProcedures(MasterProcedureEnv env)
throws IOException {
return env.getAssignmentManager().createOneUnassignProcedure(getRegion(),
true, true);
@@ -216,4 +272,14 @@ public class TruncateRegionProcedure
private TransitRegionStateProcedure
createAssignProcedures(MasterProcedureEnv env) {
return env.getAssignmentManager().createOneAssignProcedure(getRegion(),
true, true);
}
+
+ @Override
+ protected boolean holdLock(MasterProcedureEnv env) {
+ if (RecoverySnapshotUtils.isRecoveryEnabled(env)) {
+ // If we are to take a recovery snapshot before deleting the region we
will need to allow the
+ // snapshot procedure to lock the table.
+ return false;
+ }
+ return super.holdLock(env);
+ }
}
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java
index 78d8fdf3bbc..028eff1821a 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java
@@ -49,6 +49,7 @@ public class TruncateTableProcedure extends
AbstractStateMachineTableProcedure<T
private List<RegionInfo> regions;
private TableDescriptor tableDescriptor;
private TableName tableName;
+ private String recoverySnapshotName;
public TruncateTableProcedure() {
// Required by the Procedure framework to create the procedure on replay
@@ -97,6 +98,23 @@ public class TruncateTableProcedure extends
AbstractStateMachineTableProcedure<T
// the procedure stage and can get recovered if the procedure
crashes between
// TRUNCATE_TABLE_REMOVE_FROM_META and
TRUNCATE_TABLE_CREATE_FS_LAYOUT
tableDescriptor =
env.getMasterServices().getTableDescriptors().get(tableName);
+
+ // Check if we should create a recovery snapshot
+ if (RecoverySnapshotUtils.isRecoveryEnabled(env)) {
+ setNextState(TruncateTableState.TRUNCATE_TABLE_SNAPSHOT);
+ } else {
+ setNextState(TruncateTableState.TRUNCATE_TABLE_CLEAR_FS_LAYOUT);
+ }
+ break;
+ case TRUNCATE_TABLE_SNAPSHOT:
+ // Create recovery snapshot procedure as child procedure
+ recoverySnapshotName =
RecoverySnapshotUtils.generateSnapshotName(tableName);
+ SnapshotProcedure snapshotProcedure =
RecoverySnapshotUtils.createSnapshotProcedure(env,
+ tableName, recoverySnapshotName, tableDescriptor);
+ // Submit snapshot procedure as child procedure
+ addChildProcedure(snapshotProcedure);
+ LOG.debug("Creating recovery snapshot {} for table {} before
truncation",
+ recoverySnapshotName, tableName);
setNextState(TruncateTableState.TRUNCATE_TABLE_CLEAR_FS_LAYOUT);
break;
case TRUNCATE_TABLE_CLEAR_FS_LAYOUT:
@@ -160,15 +178,26 @@ public class TruncateTableProcedure extends
AbstractStateMachineTableProcedure<T
@Override
protected void rollbackState(final MasterProcedureEnv env, final
TruncateTableState state) {
- if (state == TruncateTableState.TRUNCATE_TABLE_PRE_OPERATION) {
- // nothing to rollback, pre-truncate is just table-state checks.
- // We can fail if the table does not exist or is not disabled.
- // TODO: coprocessor rollback semantic is still undefined.
- return;
+ switch (state) {
+ case TRUNCATE_TABLE_PRE_OPERATION:
+ // nothing to rollback, pre-truncate is just table-state checks.
+ // We can fail if the table does not exist or is not disabled.
+ // TODO: coprocessor rollback semantic is still undefined.
+ break;
+ case TRUNCATE_TABLE_SNAPSHOT:
+ // Handle recovery snapshot rollback. There is no
DeleteSnapshotProcedure as such to use
+ // here directly as a child procedure, so we call a utility method to
delete the snapshot
+ // which uses the SnapshotManager to delete the snapshot.
+ if (recoverySnapshotName != null) {
+ RecoverySnapshotUtils.deleteRecoverySnapshot(env,
recoverySnapshotName, tableName);
+ recoverySnapshotName = null;
+ }
+ break;
+ default:
+ // Truncate from other states doesn't have a rollback. The execution
will succeed, at some
+ // point.
+ throw new UnsupportedOperationException("unhandled state=" + state);
}
-
- // The truncate doesn't have a rollback. The execution will succeed, at
some point.
- throw new UnsupportedOperationException("unhandled state=" + state);
}
@Override
@@ -180,6 +209,7 @@ public class TruncateTableProcedure extends
AbstractStateMachineTableProcedure<T
protected boolean isRollbackSupported(final TruncateTableState state) {
switch (state) {
case TRUNCATE_TABLE_PRE_OPERATION:
+ case TRUNCATE_TABLE_SNAPSHOT:
return true;
default:
return false;
@@ -244,6 +274,9 @@ public class TruncateTableProcedure extends
AbstractStateMachineTableProcedure<T
state.addRegionInfo(ProtobufUtil.toRegionInfo(hri));
}
}
+ if (recoverySnapshotName != null) {
+ state.setSnapshotName(recoverySnapshotName);
+ }
serializer.serialize(state.build());
}
@@ -269,6 +302,9 @@ public class TruncateTableProcedure extends
AbstractStateMachineTableProcedure<T
regions.add(ProtobufUtil.toRegionInfo(hri));
}
}
+ if (state.hasSnapshotName()) {
+ recoverySnapshotName = state.getSnapshotName();
+ }
}
private static List<RegionInfo> recreateRegionInfo(final List<RegionInfo>
regions) {
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDeleteTableProcedureWithRecovery.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDeleteTableProcedureWithRecovery.java
new file mode 100644
index 00000000000..e17fb517105
--- /dev/null
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestDeleteTableProcedureWithRecovery.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.procedure;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.SnapshotDescription;
+import org.apache.hadoop.hbase.procedure2.Procedure;
+import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
+import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
+import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+import
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.DeleteTableState;
+
+@Category({ MasterTests.class, MediumTests.class })
+public class TestDeleteTableProcedureWithRecovery extends
TestTableDDLProcedureBase {
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestDeleteTableProcedureWithRecovery.class);
+
+ @Rule
+ public TestName name = new TestName();
+
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ // Enable recovery snapshots
+
UTIL.getConfiguration().setBoolean(HConstants.SNAPSHOT_BEFORE_DESTRUCTIVE_ACTION_ENABLED_KEY,
+ true);
+ TestTableDDLProcedureBase.setupCluster();
+ }
+
+ @Test
+ public void testRecoverySnapshotRollback() throws Exception {
+ final TableName tableName = TableName.valueOf(name.getMethodName());
+ final String[] families = new String[] { "f1", "f2" };
+ final ProcedureExecutor<MasterProcedureEnv> procExec =
getMasterProcedureExecutor();
+
+ // Create table with data
+ MasterProcedureTestingUtility.createTable(procExec, tableName, null,
families);
+ MasterProcedureTestingUtility.loadData(UTIL.getConnection(), tableName,
100, new byte[0][],
+ families);
+ UTIL.getAdmin().disableTable(tableName);
+
+ // Submit the failing procedure
+ long procId = procExec
+ .submitProcedure(new
FailingDeleteTableProcedure(procExec.getEnvironment(), tableName));
+
+ // Wait for procedure to complete (should fail)
+ ProcedureTestingUtility.waitProcedure(procExec, procId);
+ Procedure<MasterProcedureEnv> result = procExec.getResult(procId);
+ assertTrue("Procedure should have failed", result.isFailed());
+
+ // Verify no recovery snapshots remain after rollback
+ boolean snapshotFound = false;
+ for (SnapshotDescription snapshot : UTIL.getAdmin().listSnapshots()) {
+ if (snapshot.getName().startsWith("auto_" +
tableName.getNameAsString())) {
+ snapshotFound = true;
+ break;
+ }
+ }
+ assertTrue("Recovery snapshot should have been cleaned up during
rollback", !snapshotFound);
+ }
+
+ @Test
+ public void testRecoverySnapshotAndRestore() throws Exception {
+ final TableName tableName = TableName.valueOf(name.getMethodName());
+ final TableName restoredTableName = TableName.valueOf(name.getMethodName()
+ "_restored");
+ final String[] families = new String[] { "f1", "f2" };
+
+ final ProcedureExecutor<MasterProcedureEnv> procExec =
getMasterProcedureExecutor();
+
+ // Create table with data
+ MasterProcedureTestingUtility.createTable(procExec, tableName, null,
families);
+ MasterProcedureTestingUtility.loadData(UTIL.getConnection(), tableName,
100, new byte[0][],
+ families);
+ UTIL.getAdmin().disableTable(tableName);
+
+ // Delete the table (this should create a recovery snapshot)
+ long procId = ProcedureTestingUtility.submitAndWait(procExec,
+ new DeleteTableProcedure(procExec.getEnvironment(), tableName));
+ ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
+
+ // Verify table is deleted
+ MasterProcedureTestingUtility.validateTableDeletion(getMaster(),
tableName);
+
+ // Find the recovery snapshot
+ String recoverySnapshotName = null;
+ for (SnapshotDescription snapshot : UTIL.getAdmin().listSnapshots()) {
+ if (snapshot.getName().startsWith("auto_" +
tableName.getNameAsString())) {
+ recoverySnapshotName = snapshot.getName();
+ break;
+ }
+ }
+ assertTrue("Recovery snapshot should exist", recoverySnapshotName != null);
+
+ // Restore from snapshot by cloning to a new table
+ UTIL.getAdmin().cloneSnapshot(recoverySnapshotName, restoredTableName);
+ UTIL.waitUntilAllRegionsAssigned(restoredTableName);
+
+ // Verify restored table has original data
+ assertEquals(100, UTIL.countRows(restoredTableName));
+
+ // Clean up the cloned table
+ UTIL.getAdmin().disableTable(restoredTableName);
+ UTIL.getAdmin().deleteTable(restoredTableName);
+ }
+
+ // Create a procedure that will fail after snapshot creation
+ public static class FailingDeleteTableProcedure extends DeleteTableProcedure
{
+ private boolean failOnce = false;
+
+ public FailingDeleteTableProcedure() {
+ super();
+ }
+
+ public FailingDeleteTableProcedure(MasterProcedureEnv env, TableName
tableName) {
+ super(env, tableName);
+ }
+
+ @Override
+ protected Flow executeFromState(MasterProcedureEnv env, DeleteTableState
state)
+ throws InterruptedException, ProcedureSuspendedException {
+ if (!failOnce && state == DeleteTableState.DELETE_TABLE_CLEAR_FS_LAYOUT)
{
+ failOnce = true;
+ throw new RuntimeException("Simulated failure");
+ }
+ return super.executeFromState(env, state);
+ }
+ }
+
+}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestModifyTableProcedureWithRecovery.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestModifyTableProcedureWithRecovery.java
new file mode 100644
index 00000000000..1eccfc82ecd
--- /dev/null
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestModifyTableProcedureWithRecovery.java
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.procedure;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseIOException;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.SnapshotDescription;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
+import org.apache.hadoop.hbase.procedure2.Procedure;
+import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
+import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+import
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.ModifyTableState;
+
+@Category({ MasterTests.class, LargeTests.class })
+public class TestModifyTableProcedureWithRecovery extends
TestTableDDLProcedureBase {
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestModifyTableProcedureWithRecovery.class);
+
+ @Rule
+ public TestName name = new TestName();
+
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ // Enable recovery snapshots
+
UTIL.getConfiguration().setBoolean(HConstants.SNAPSHOT_BEFORE_DESTRUCTIVE_ACTION_ENABLED_KEY,
+ true);
+ TestTableDDLProcedureBase.setupCluster();
+ }
+
+ @Test
+ public void testRecoverySnapshotRollback() throws Exception {
+ final TableName tableName = TableName.valueOf(name.getMethodName());
+ final String cf1 = "cf1";
+ final String cf2 = "cf2";
+ final ProcedureExecutor<MasterProcedureEnv> procExec =
getMasterProcedureExecutor();
+
+ // Create table with multiple column families
+ MasterProcedureTestingUtility.createTable(procExec, tableName, null, cf1,
cf2);
+ MasterProcedureTestingUtility.loadData(UTIL.getConnection(), tableName,
100, new byte[0][],
+ new String[] { cf1, cf2 });
+ UTIL.getAdmin().disableTable(tableName);
+
+ // Create a procedure that will fail - modify to delete a column family
+ // but simulate failure after snapshot creation
+ // Modify table to remove cf2 (which should trigger recovery snapshot)
+ TableDescriptor originalHtd = UTIL.getAdmin().getDescriptor(tableName);
+ TableDescriptor modifiedHtd =
+
TableDescriptorBuilder.newBuilder(originalHtd).removeColumnFamily(cf2.getBytes()).build();
+
+ // Submit the failing procedure
+ long procId = procExec
+ .submitProcedure(new
FailingModifyTableProcedure(procExec.getEnvironment(), modifiedHtd));
+
+ // Wait for procedure to complete (should fail)
+ ProcedureTestingUtility.waitProcedure(procExec, procId);
+ Procedure<MasterProcedureEnv> result = procExec.getResult(procId);
+ assertTrue("Procedure should have failed", result.isFailed());
+
+ // Verify no recovery snapshots remain after rollback
+ boolean snapshotFound = false;
+ for (SnapshotDescription snapshot : UTIL.getAdmin().listSnapshots()) {
+ if (snapshot.getName().startsWith("auto_" +
tableName.getNameAsString())) {
+ snapshotFound = true;
+ break;
+ }
+ }
+ assertTrue("Recovery snapshot should have been cleaned up during
rollback", !snapshotFound);
+ }
+
+ @Test
+ public void testRecoverySnapshotAndRestore() throws Exception {
+ final TableName tableName = TableName.valueOf(name.getMethodName());
+ final TableName restoredTableName = TableName.valueOf(name.getMethodName()
+ "_restored");
+ final String cf1 = "cf1";
+ final String cf2 = "cf2";
+ final ProcedureExecutor<MasterProcedureEnv> procExec =
getMasterProcedureExecutor();
+
+ // Create table with multiple column families
+ MasterProcedureTestingUtility.createTable(procExec, tableName, null, cf1,
cf2);
+ MasterProcedureTestingUtility.loadData(UTIL.getConnection(), tableName,
100, new byte[0][],
+ new String[] { cf1, cf2 });
+ UTIL.getAdmin().disableTable(tableName);
+
+ // Modify table to remove cf2 (which should trigger recovery snapshot)
+ TableDescriptor originalHtd = UTIL.getAdmin().getDescriptor(tableName);
+ TableDescriptor modifiedHtd =
+
TableDescriptorBuilder.newBuilder(originalHtd).removeColumnFamily(cf2.getBytes()).build();
+
+ long procId = ProcedureTestingUtility.submitAndWait(procExec,
+ new ModifyTableProcedure(procExec.getEnvironment(), modifiedHtd));
+ ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
+
+ // Verify table modification was successful
+ TableDescriptor currentHtd = UTIL.getAdmin().getDescriptor(tableName);
+ assertEquals("Should have one column family", 1,
currentHtd.getColumnFamilyNames().size());
+ assertTrue("Should only have cf1",
currentHtd.hasColumnFamily(cf1.getBytes()));
+
+ // Find the recovery snapshot
+ String recoverySnapshotName = null;
+ for (SnapshotDescription snapshot : UTIL.getAdmin().listSnapshots()) {
+ if (snapshot.getName().startsWith("auto_" +
tableName.getNameAsString())) {
+ recoverySnapshotName = snapshot.getName();
+ break;
+ }
+ }
+ assertTrue("Recovery snapshot should exist", recoverySnapshotName != null);
+
+ // Restore from snapshot by cloning to a new table
+ UTIL.getAdmin().cloneSnapshot(recoverySnapshotName, restoredTableName);
+ UTIL.waitUntilAllRegionsAssigned(restoredTableName);
+
+ // Verify restored table has original structure with both column families
+ TableDescriptor restoredHtd =
UTIL.getAdmin().getDescriptor(restoredTableName);
+ assertEquals("Should have two column families", 2,
restoredHtd.getColumnFamilyNames().size());
+ assertTrue("Should have cf1", restoredHtd.hasColumnFamily(cf1.getBytes()));
+ assertTrue("Should have cf2", restoredHtd.hasColumnFamily(cf2.getBytes()));
+
+ // Clean up the cloned table
+ UTIL.getAdmin().disableTable(restoredTableName);
+ UTIL.getAdmin().deleteTable(restoredTableName);
+ }
+
+ public static class FailingModifyTableProcedure extends ModifyTableProcedure
{
+ private boolean failOnce = false;
+
+ public FailingModifyTableProcedure() {
+ super();
+ }
+
+ public FailingModifyTableProcedure(MasterProcedureEnv env, TableDescriptor
newTableDescriptor)
+ throws HBaseIOException {
+ super(env, newTableDescriptor);
+ }
+
+ @Override
+ protected Flow executeFromState(MasterProcedureEnv env, ModifyTableState
state)
+ throws InterruptedException {
+ if (!failOnce && state ==
ModifyTableState.MODIFY_TABLE_CLOSE_EXCESS_REPLICAS) {
+ failOnce = true;
+ throw new RuntimeException("Simulated failure");
+ }
+ return super.executeFromState(env, state);
+ }
+ }
+}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRecoverySnapshotUtils.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRecoverySnapshotUtils.java
new file mode 100644
index 00000000000..57cfe57716b
--- /dev/null
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRecoverySnapshotUtils.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.procedure;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category({ MasterTests.class, SmallTests.class })
+public class TestRecoverySnapshotUtils {
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestRecoverySnapshotUtils.class);
+
+ @Test
+ public void testRecoverySnapshotTtlNoDescriptor() {
+ // Create a mock MasterProcedureEnv with a known site configuration TTL
+ long siteLevelTtl = 7200; // 2 hours
+ Configuration conf = new Configuration();
+ conf.setLong(HConstants.SNAPSHOT_BEFORE_DESTRUCTIVE_ACTION_TTL_KEY,
siteLevelTtl);
+
+ MasterProcedureEnv env = mock(MasterProcedureEnv.class);
+ when(env.getMasterConfiguration()).thenReturn(conf);
+
+ // Test with null table descriptor - should return site configuration
+ long actualTtl = RecoverySnapshotUtils.getRecoverySnapshotTtl(env, null);
+ assertEquals("Should return site-level TTL when no table descriptor
provided", siteLevelTtl,
+ actualTtl);
+ }
+
+ @Test
+ public void testRecoverySnapshotTtlWithDescriptor() {
+ // Create a mock MasterProcedureEnv with a known site configuration TTL
+ long siteLevelTtl = 7200; // 2 hours
+ Configuration conf = new Configuration();
+ conf.setLong(HConstants.SNAPSHOT_BEFORE_DESTRUCTIVE_ACTION_TTL_KEY,
siteLevelTtl);
+
+ MasterProcedureEnv env = mock(MasterProcedureEnv.class);
+ when(env.getMasterConfiguration()).thenReturn(conf);
+
+ // Create a table descriptor with a different TTL override
+ long tableLevelTtl = 3600; // 1 hour
+ TableDescriptor tableDescriptor =
TableDescriptorBuilder.newBuilder(TableName.valueOf("test"))
+ .setColumnFamily(ColumnFamilyDescriptorBuilder.of("cf"))
+ .setValue(HConstants.TABLE_RECOVERY_SNAPSHOT_TTL_KEY,
String.valueOf(tableLevelTtl)).build();
+
+ // Test with table descriptor override - should return table-level TTL
+ long actualTtl = RecoverySnapshotUtils.getRecoverySnapshotTtl(env,
tableDescriptor);
+ assertEquals("Should return table-level TTL when table descriptor provides
override",
+ tableLevelTtl, actualTtl);
+ }
+
+ @Test
+ public void testRecoverySnapshotTtlUsesDefault() {
+ // Create a mock MasterProcedureEnv with default configuration (no
explicit TTL set)
+ Configuration conf = new Configuration();
+ // Don't set the TTL key, so it should use the default
+
+ MasterProcedureEnv env = mock(MasterProcedureEnv.class);
+ when(env.getMasterConfiguration()).thenReturn(conf);
+
+ // Test with null table descriptor - should return default TTL
+ long actualTtl = RecoverySnapshotUtils.getRecoverySnapshotTtl(env, null);
+ assertEquals("Should return default TTL when no site configuration
provided",
+ HConstants.DEFAULT_SNAPSHOT_BEFORE_DESTRUCTIVE_ACTION_TTL, actualTtl);
+ }
+}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateRegionProcedureWithRecovery.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateRegionProcedureWithRecovery.java
new file mode 100644
index 00000000000..15023d48f24
--- /dev/null
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateRegionProcedureWithRecovery.java
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.procedure;
+
+import static
org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil.insertData;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseIOException;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.SnapshotDescription;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.procedure2.Procedure;
+import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
+import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.TruncateRegionState;
+
+@Category({ MasterTests.class, LargeTests.class })
+public class TestTruncateRegionProcedureWithRecovery extends
TestTableDDLProcedureBase {
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestTruncateRegionProcedureWithRecovery.class);
+ private static final Logger LOG =
+ LoggerFactory.getLogger(TestTruncateRegionProcedureWithRecovery.class);
+
+ @Rule
+ public TestName name = new TestName();
+
+ private static void setupConf(Configuration conf) {
+ conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1);
+ conf.setLong(HConstants.MAJOR_COMPACTION_PERIOD, 0);
+ conf.setBoolean(HConstants.SNAPSHOT_BEFORE_DESTRUCTIVE_ACTION_ENABLED_KEY,
true);
+ conf.setInt("hbase.client.sync.wait.timeout.msec", 60000);
+ }
+
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ setupConf(UTIL.getConfiguration());
+ UTIL.startMiniCluster(3);
+ }
+
+ @AfterClass
+ public static void cleanupTest() throws Exception {
+ try {
+ UTIL.shutdownMiniCluster();
+ } catch (Exception e) {
+ LOG.warn("failure shutting down cluster", e);
+ }
+ }
+
+ @Before
+ public void setup() throws Exception {
+
ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(getMasterProcedureExecutor(),
false);
+
+ // Turn off balancer, so it doesn't cut in and mess up our placements.
+ UTIL.getAdmin().balancerSwitch(false, true);
+ // Turn off the meta scanner, so it doesn't remove, parent on us.
+ UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
+ }
+
+ @After
+ public void tearDown() throws Exception {
+
ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(getMasterProcedureExecutor(),
false);
+ for (TableDescriptor htd : UTIL.getAdmin().listTableDescriptors()) {
+ UTIL.deleteTable(htd.getTableName());
+ }
+ }
+
+ @Test
+ public void testRecoverySnapshotRollback() throws Exception {
+ final TableName tableName = TableName.valueOf(name.getMethodName());
+ final String[] families = new String[] { "f1", "f2" };
+ final ProcedureExecutor<MasterProcedureEnv> procExec =
getMasterProcedureExecutor();
+
+ // Create table with split keys
+ final byte[][] splitKeys = new byte[][] { Bytes.toBytes("30"),
Bytes.toBytes("60") };
+ MasterProcedureTestingUtility.createTable(procExec, tableName, splitKeys,
families);
+
+ // Insert data
+ insertData(UTIL, tableName, 2, 20, families);
+ insertData(UTIL, tableName, 2, 31, families);
+ insertData(UTIL, tableName, 2, 61, families);
+
+ // Get a region to truncate
+ MasterProcedureEnv environment = procExec.getEnvironment();
+ RegionInfo regionToTruncate =
environment.getAssignmentManager().getAssignedRegions().stream()
+ .filter(r ->
tableName.getNameAsString().equals(r.getTable().getNameAsString()))
+ .min((o1, o2) -> Bytes.compareTo(o1.getStartKey(),
o2.getStartKey())).get();
+
+ // Create a procedure that might fail. Use a simple approach that creates
a custom procedure
+ // that fails after snapshot.
+ // Submit the failing procedure
+ long procId =
+ procExec.submitProcedure(new FailingTruncateRegionProcedure(environment,
regionToTruncate));
+
+ // Wait for procedure to complete (should fail)
+ ProcedureTestingUtility.waitProcedure(procExec, procId);
+ Procedure<MasterProcedureEnv> result = procExec.getResult(procId);
+ assertTrue("Procedure should have failed", result.isFailed());
+
+ // Verify no recovery snapshots remain after rollback
+ boolean snapshotFound = false;
+ for (SnapshotDescription snapshot : UTIL.getAdmin().listSnapshots()) {
+ if (snapshot.getName().startsWith("auto_" +
tableName.getNameAsString())) {
+ snapshotFound = true;
+ break;
+ }
+ }
+ assertTrue("Recovery snapshot should have been cleaned up during
rollback", !snapshotFound);
+ }
+
+ @Test
+ public void testRecoverySnapshotAndRestore() throws Exception {
+ final TableName tableName = TableName.valueOf(name.getMethodName());
+ final TableName restoredTableName = TableName.valueOf(name.getMethodName()
+ "_restored");
+ final String[] families = new String[] { "f1", "f2" };
+ final ProcedureExecutor<MasterProcedureEnv> procExec =
getMasterProcedureExecutor();
+
+ // Create table with split keys
+ final byte[][] splitKeys = new byte[][] { Bytes.toBytes("30"),
Bytes.toBytes("60") };
+ MasterProcedureTestingUtility.createTable(procExec, tableName, splitKeys,
families);
+
+ // Insert data
+ insertData(UTIL, tableName, 2, 20, families);
+ insertData(UTIL, tableName, 2, 31, families);
+ insertData(UTIL, tableName, 2, 61, families);
+ int initialRowCount = UTIL.countRows(tableName);
+
+ // Get a region to truncate
+ MasterProcedureEnv environment = procExec.getEnvironment();
+ RegionInfo regionToTruncate =
environment.getAssignmentManager().getAssignedRegions().stream()
+ .filter(r ->
tableName.getNameAsString().equals(r.getTable().getNameAsString()))
+ .min((o1, o2) -> Bytes.compareTo(o1.getStartKey(),
o2.getStartKey())).get();
+
+ // Truncate the region (this should create a recovery snapshot)
+ long procId =
+ procExec.submitProcedure(new TruncateRegionProcedure(environment,
regionToTruncate));
+ ProcedureTestingUtility.waitProcedure(procExec, procId);
+ ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
+
+ // Verify region is truncated (should have fewer rows)
+ int rowsAfterTruncate = UTIL.countRows(tableName);
+ assertTrue("Should have fewer rows after truncate", rowsAfterTruncate <
initialRowCount);
+
+ // Find the recovery snapshot
+ String recoverySnapshotName = null;
+ for (SnapshotDescription snapshot : UTIL.getAdmin().listSnapshots()) {
+ if (snapshot.getName().startsWith("auto_" +
tableName.getNameAsString())) {
+ recoverySnapshotName = snapshot.getName();
+ break;
+ }
+ }
+ assertTrue("Recovery snapshot should exist", recoverySnapshotName != null);
+
+ // Restore from snapshot by cloning to a new table
+ UTIL.getAdmin().cloneSnapshot(recoverySnapshotName, restoredTableName);
+ UTIL.waitUntilAllRegionsAssigned(restoredTableName);
+
+ // Verify restored table has original data
+ assertEquals("Restored table should have original data", initialRowCount,
+ UTIL.countRows(restoredTableName));
+
+ // Clean up the cloned table
+ UTIL.getAdmin().disableTable(restoredTableName);
+ UTIL.getAdmin().deleteTable(restoredTableName);
+ }
+
+ public static class FailingTruncateRegionProcedure extends
TruncateRegionProcedure {
+ private boolean failOnce = false;
+
+ public FailingTruncateRegionProcedure() {
+ super();
+ }
+
+ public FailingTruncateRegionProcedure(MasterProcedureEnv env, RegionInfo
region)
+ throws HBaseIOException {
+ super(env, region);
+ }
+
+ @Override
+ protected Flow executeFromState(MasterProcedureEnv env,
TruncateRegionState state)
+ throws InterruptedException {
+ if (!failOnce && state ==
TruncateRegionState.TRUNCATE_REGION_MAKE_OFFLINE) {
+ failOnce = true;
+ throw new RuntimeException("Simulated failure");
+ }
+ return super.executeFromState(env, state);
+ }
+ }
+}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateTableProcedureWithRecovery.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateTableProcedureWithRecovery.java
new file mode 100644
index 00000000000..34ffabc5854
--- /dev/null
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateTableProcedureWithRecovery.java
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.procedure;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseIOException;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.SnapshotDescription;
+import org.apache.hadoop.hbase.procedure2.Procedure;
+import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
+import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+
+import
org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.TruncateTableState;
+
+@Category({ MasterTests.class, LargeTests.class })
+public class TestTruncateTableProcedureWithRecovery extends
TestTableDDLProcedureBase {
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestTruncateTableProcedureWithRecovery.class);
+
+ @Rule
+ public TestName name = new TestName();
+
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ // Enable recovery snapshots
+
UTIL.getConfiguration().setBoolean(HConstants.SNAPSHOT_BEFORE_DESTRUCTIVE_ACTION_ENABLED_KEY,
+ true);
+ TestTableDDLProcedureBase.setupCluster();
+ }
+
+ @Test
+ public void testRecoverySnapshotRollback() throws Exception {
+ final TableName tableName = TableName.valueOf(name.getMethodName());
+ final String[] families = new String[] { "f1", "f2" };
+ final ProcedureExecutor<MasterProcedureEnv> procExec =
getMasterProcedureExecutor();
+
+ // Create table with data
+ MasterProcedureTestingUtility.createTable(getMasterProcedureExecutor(),
tableName, null,
+ families);
+ MasterProcedureTestingUtility.loadData(UTIL.getConnection(), tableName,
100, new byte[0][],
+ families);
+ assertEquals(100, UTIL.countRows(tableName));
+
+ // Disable the table
+ UTIL.getAdmin().disableTable(tableName);
+
+ // Submit the failing procedure
+ long procId = procExec.submitProcedure(
+ new FailingTruncateTableProcedure(procExec.getEnvironment(), tableName,
false));
+
+ // Wait for procedure to complete (should fail)
+ ProcedureTestingUtility.waitProcedure(procExec, procId);
+ Procedure<MasterProcedureEnv> result = procExec.getResult(procId);
+ assertTrue("Procedure should have failed", result.isFailed());
+
+ // Verify no recovery snapshots remain after rollback
+ boolean snapshotFound = false;
+ for (SnapshotDescription snapshot : UTIL.getAdmin().listSnapshots()) {
+ if (snapshot.getName().startsWith("auto_" +
tableName.getNameAsString())) {
+ snapshotFound = true;
+ break;
+ }
+ }
+ assertTrue("Recovery snapshot should have been cleaned up during
rollback", !snapshotFound);
+ }
+
+ @Test
+ public void testRecoverySnapshotAndRestore() throws Exception {
+ final TableName tableName = TableName.valueOf(name.getMethodName());
+ final TableName restoredTableName = TableName.valueOf(name.getMethodName()
+ "_restored");
+ final String[] families = new String[] { "f1", "f2" };
+
+ // Create table with data
+ MasterProcedureTestingUtility.createTable(getMasterProcedureExecutor(),
tableName, null,
+ families);
+ MasterProcedureTestingUtility.loadData(UTIL.getConnection(), tableName,
100, new byte[0][],
+ families);
+ assertEquals(100, UTIL.countRows(tableName));
+
+ // Disable the table
+ UTIL.getAdmin().disableTable(tableName);
+
+ // Truncate the table (this should create a recovery snapshot)
+ final ProcedureExecutor<MasterProcedureEnv> procExec =
getMasterProcedureExecutor();
+ long procId = ProcedureTestingUtility.submitAndWait(procExec,
+ new TruncateTableProcedure(procExec.getEnvironment(), tableName, false));
+ ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
+
+ // Verify table is truncated
+ UTIL.waitUntilAllRegionsAssigned(tableName);
+ assertEquals(0, UTIL.countRows(tableName));
+
+ // Find the recovery snapshot
+ String recoverySnapshotName = null;
+ for (SnapshotDescription snapshot : UTIL.getAdmin().listSnapshots()) {
+ if (snapshot.getName().startsWith("auto_" +
tableName.getNameAsString())) {
+ recoverySnapshotName = snapshot.getName();
+ break;
+ }
+ }
+ assertTrue("Recovery snapshot should exist", recoverySnapshotName != null);
+
+ // Restore from snapshot by cloning to a new table
+ UTIL.getAdmin().cloneSnapshot(recoverySnapshotName, restoredTableName);
+ UTIL.waitUntilAllRegionsAssigned(restoredTableName);
+
+ // Verify restored table has original data
+ assertEquals(100, UTIL.countRows(restoredTableName));
+
+ // Clean up the cloned table
+ UTIL.getAdmin().disableTable(restoredTableName);
+ UTIL.getAdmin().deleteTable(restoredTableName);
+ }
+
+ public static class FailingTruncateTableProcedure extends
TruncateTableProcedure {
+ private boolean failOnce = false;
+
+ public FailingTruncateTableProcedure() {
+ super();
+ }
+
+ public FailingTruncateTableProcedure(MasterProcedureEnv env, TableName
tableName,
+ boolean preserveSplits) throws HBaseIOException {
+ super(env, tableName, preserveSplits);
+ }
+
+ @Override
+ protected Flow executeFromState(MasterProcedureEnv env, TruncateTableState
state)
+ throws InterruptedException {
+ if (!failOnce && state ==
TruncateTableState.TRUNCATE_TABLE_CLEAR_FS_LAYOUT) {
+ failOnce = true;
+ throw new RuntimeException("Simulated failure");
+ }
+ return super.executeFromState(env, state);
+ }
+ }
+}