This is an automated email from the ASF dual-hosted git repository. andor pushed a commit to branch HBASE-29081_rebased in repository https://gitbox.apache.org/repos/asf/hbase.git
commit 39a2e42f4fdd162b55dfe5f5efd709190323b2ce Author: Kota-SH <[email protected]> AuthorDate: Thu Sep 11 09:22:19 2025 -0500 HBASE-29291: Add a command to refresh/sync hbase:meta table (#7058) Change-Id: Ia04bb12cdaf580f26cb14d9a34b5963105065faa --- .../java/org/apache/hadoop/hbase/client/Admin.java | 5 + .../hadoop/hbase/client/AdminOverAsyncAdmin.java | 5 + .../org/apache/hadoop/hbase/client/AsyncAdmin.java | 5 + .../hadoop/hbase/client/AsyncHBaseAdmin.java | 5 + .../hadoop/hbase/client/RawAsyncHBaseAdmin.java | 13 + .../src/main/protobuf/server/master/Master.proto | 11 + .../protobuf/server/master/MasterProcedure.proto | 12 + .../org/apache/hadoop/hbase/MetaTableAccessor.java | 2 +- .../org/apache/hadoop/hbase/TableDescriptors.java | 7 + .../org/apache/hadoop/hbase/master/HMaster.java | 16 + .../hadoop/hbase/master/MasterRpcServices.java | 11 + .../master/procedure/RefreshMetaProcedure.java | 480 +++++++++++++++++++++ .../hbase/security/access/ReadOnlyController.java | 19 +- .../hadoop/hbase/util/FSTableDescriptors.java | 10 + .../master/procedure/TestRefreshMetaProcedure.java | 121 ++++++ .../TestRefreshMetaProcedureIntegration.java | 285 ++++++++++++ .../hbase/rsgroup/VerifyingRSGroupAdmin.java | 4 + hbase-shell/src/main/ruby/hbase/admin.rb | 6 + hbase-shell/src/main/ruby/shell.rb | 1 + .../src/main/ruby/shell/commands/refresh_meta.rb | 43 ++ .../hadoop/hbase/thrift2/client/ThriftAdmin.java | 5 + 21 files changed, 1061 insertions(+), 5 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java index 65b3abcd413..8622e1c8877 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Admin.java @@ -2703,6 +2703,11 @@ public interface Admin extends Abortable, Closeable { */ List<String> getCachedFilesList(ServerName serverName) throws IOException; + /** + * Perform hbase:meta table refresh + */ + Long refreshMeta() throws IOException; + @InterfaceAudience.Private void restoreBackupSystemTable(String snapshotName) throws IOException; } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AdminOverAsyncAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AdminOverAsyncAdmin.java index 7117fd4fd33..37d46feb41a 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AdminOverAsyncAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AdminOverAsyncAdmin.java @@ -1153,6 +1153,11 @@ class AdminOverAsyncAdmin implements Admin { return get(admin.getCachedFilesList(serverName)); } + @Override + public Long refreshMeta() throws IOException { + return get(admin.refreshMeta()); + } + @Override public void restoreBackupSystemTable(String snapshotName) throws IOException { get(admin.restoreBackupSystemTable(snapshotName)); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java index 56211cedc49..e1d3aadfed3 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncAdmin.java @@ -1890,6 +1890,11 @@ public interface AsyncAdmin { */ CompletableFuture<List<String>> getCachedFilesList(ServerName serverName); + /** + * Perform hbase:meta table refresh + */ + CompletableFuture<Long> refreshMeta(); + @InterfaceAudience.Private CompletableFuture<Void> restoreBackupSystemTable(String snapshotName); } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java index 8132b184809..26abe68402c 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncHBaseAdmin.java @@ -1021,6 +1021,11 @@ class AsyncHBaseAdmin implements AsyncAdmin { return wrap(rawAdmin.getCachedFilesList(serverName)); } + @Override + public CompletableFuture<Long> refreshMeta() { + return wrap(rawAdmin.refreshMeta()); + } + @Override public CompletableFuture<Void> restoreBackupSystemTable(String snapshotName) { return wrap(rawAdmin.restoreBackupSystemTable(snapshotName)); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RawAsyncHBaseAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RawAsyncHBaseAdmin.java index ea51d27b99a..9454d282653 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RawAsyncHBaseAdmin.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RawAsyncHBaseAdmin.java @@ -263,6 +263,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.OfflineReg import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.OfflineRegionResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RecommissionRegionServerRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RecommissionRegionServerResponse; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RefreshMetaRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RefreshMetaResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ReopenTableRegionsRequest; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ReopenTableRegionsResponse; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RestoreSnapshotRequest; @@ -4697,4 +4699,15 @@ class RawAsyncHBaseAdmin implements AsyncAdmin { MasterProtos.RestoreBackupSystemTableResponse::getProcId, new RestoreBackupSystemTableProcedureBiConsumer()); } + + @Override + public CompletableFuture<Long> refreshMeta() { + RefreshMetaRequest.Builder request = RefreshMetaRequest.newBuilder(); + request.setNonceGroup(ng.getNonceGroup()).setNonce(ng.newNonce()); + return this.<Long> newMasterCaller() + .action((controller, stub) -> this.<RefreshMetaRequest, RefreshMetaResponse, Long> call( + controller, stub, request.build(), MasterService.Interface::refreshMeta, + RefreshMetaResponse::getProcId)) + .call(); + } } diff --git a/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto b/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto index f475d26060d..40f6a1518f1 100644 --- a/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto +++ b/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto @@ -819,6 +819,14 @@ message RollAllWALWritersResponse { optional uint64 proc_id = 1; } +message RefreshMetaRequest { + optional uint64 nonce_group = 1 [default = 0]; + optional uint64 nonce = 2 [default = 0]; +} +message RefreshMetaResponse { + optional uint64 proc_id = 1; +} + service MasterService { /** Used by the client to get the number of regions that have received the updated schema */ rpc GetSchemaAlterStatus(GetSchemaAlterStatusRequest) @@ -1303,6 +1311,9 @@ service MasterService { rpc rollAllWALWriters(RollAllWALWritersRequest) returns(RollAllWALWritersResponse); + + rpc RefreshMeta(RefreshMetaRequest) + returns(RefreshMetaResponse); } // HBCK Service definitions. diff --git a/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto b/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto index 7e6c6c8e2fc..aa79ff474c3 100644 --- a/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto +++ b/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto @@ -864,3 +864,15 @@ message LogRollRemoteProcedureResult { optional ServerName server_name = 1; optional uint64 last_highest_wal_filenum = 2; } + +enum RefreshMetaState { + REFRESH_META_INIT = 1; + REFRESH_META_SCAN_STORAGE = 2; + REFRESH_META_PREPARE = 3; + REFRESH_META_APPLY = 4; + REFRESH_META_FOLLOWUP = 5; + REFRESH_META_FINISH = 6; +} + +message RefreshMetaStateData { +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java index 05b049e27db..38aa054fd7c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java @@ -753,7 +753,7 @@ public final class MetaTableAccessor { * @param connection connection we're using * @param deletes Deletes to add to hbase:meta This list should support #remove. */ - private static void deleteFromMetaTable(final Connection connection, final List<Delete> deletes) + public static void deleteFromMetaTable(final Connection connection, final List<Delete> deletes) throws IOException { try (Table t = getMetaHTable(connection)) { debugLogMutations(deletes); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/TableDescriptors.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/TableDescriptors.java index d22e46383d3..32594ffce48 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/TableDescriptors.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/TableDescriptors.java @@ -78,4 +78,11 @@ public interface TableDescriptors extends Closeable { /** Returns Instance of table descriptor or null if none found. */ TableDescriptor remove(TableName tablename) throws IOException; + + /** + * Invalidates the table descriptor cache. + */ + default void invalidateTableDescriptorCache() { + // do nothing by default + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index b86644d5eda..caca0fcef73 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -170,6 +170,7 @@ import org.apache.hadoop.hbase.master.procedure.ModifyTableProcedure; import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch; import org.apache.hadoop.hbase.master.procedure.ProcedureSyncWait; import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher; +import org.apache.hadoop.hbase.master.procedure.RefreshMetaProcedure; import org.apache.hadoop.hbase.master.procedure.ReloadQuotasProcedure; import org.apache.hadoop.hbase.master.procedure.ReopenTableRegionsProcedure; import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; @@ -4652,4 +4653,19 @@ public class HMaster extends HBaseServerBase<MasterRpcServices> implements Maste return mobFileCleanerChore; } + public Long refreshMeta(long nonceGroup, long nonce) throws IOException { + return MasterProcedureUtil + .submitProcedure(new MasterProcedureUtil.NonceProcedureRunnable(this, nonceGroup, nonce) { + @Override + protected void run() throws IOException { + LOG.info("Submitting RefreshMetaProcedure"); + submitProcedure(new RefreshMetaProcedure(procedureExecutor.getEnvironment())); + } + + @Override + protected String getDescription() { + return "RefreshMetaProcedure"; + } + }); + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index 85336eed2fb..472a8f86398 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -3742,4 +3742,15 @@ public class MasterRpcServices extends HBaseRpcServicesBase<HMaster> throw new ServiceException(ioe); } } + + @Override + public MasterProtos.RefreshMetaResponse refreshMeta(RpcController controller, + MasterProtos.RefreshMetaRequest request) throws ServiceException { + try { + Long procId = server.refreshMeta(request.getNonceGroup(), request.getNonce()); + return MasterProtos.RefreshMetaResponse.newBuilder().setProcId(procId).build(); + } catch (IOException ioe) { + throw new ServiceException(ioe); + } + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RefreshMetaProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RefreshMetaProcedure.java new file mode 100644 index 00000000000..b2e458cd495 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RefreshMetaProcedure.java @@ -0,0 +1,480 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.procedure; + +import static org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState.WAITING_TIMEOUT; + +import java.io.DataInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.MetaTableAccessor; +import org.apache.hadoop.hbase.NamespaceDescriptor; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.Mutation; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.TableState; +import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; +import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; +import org.apache.hadoop.hbase.procedure2.ProcedureUtil; +import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; +import org.apache.hadoop.hbase.util.CommonFSUtils; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.RetryCounter; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hbase.thirdparty.com.google.common.collect.Lists; + +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RefreshMetaState; +import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RefreshMetaStateData; + [email protected] +public class RefreshMetaProcedure extends AbstractStateMachineTableProcedure<RefreshMetaState> { + private static final Logger LOG = LoggerFactory.getLogger(RefreshMetaProcedure.class); + private static final String HIDDEN_DIR_PATTERN = "^[._-].*"; + + private List<RegionInfo> currentRegions; + private List<RegionInfo> latestRegions; + private List<Mutation> pendingMutations; + private RetryCounter retryCounter; + private static final int MUTATION_BATCH_SIZE = 100; + private List<RegionInfo> newlyAddedRegions; + private List<TableName> deletedTables; + + public RefreshMetaProcedure() { + super(); + } + + public RefreshMetaProcedure(MasterProcedureEnv env) { + super(env); + } + + @Override + public TableName getTableName() { + return TableName.META_TABLE_NAME; + } + + @Override + public TableOperationType getTableOperationType() { + return TableOperationType.EDIT; + } + + @Override + protected Flow executeFromState(MasterProcedureEnv env, RefreshMetaState refreshMetaState) { + LOG.info("Executing RefreshMetaProcedure state: {}", refreshMetaState); + + try { + return switch (refreshMetaState) { + case REFRESH_META_INIT -> executeInit(env); + case REFRESH_META_SCAN_STORAGE -> executeScanStorage(env); + case REFRESH_META_PREPARE -> executePrepare(); + case REFRESH_META_APPLY -> executeApply(env); + case REFRESH_META_FOLLOWUP -> executeFollowup(env); + case REFRESH_META_FINISH -> executeFinish(env); + default -> throw new UnsupportedOperationException("Unhandled state: " + refreshMetaState); + }; + } catch (Exception ex) { + LOG.error("Error in RefreshMetaProcedure state {}", refreshMetaState, ex); + setFailure("RefreshMetaProcedure", ex); + return Flow.NO_MORE_STATE; + } + } + + private Flow executeInit(MasterProcedureEnv env) throws IOException { + LOG.trace("Getting current regions from hbase:meta table"); + try { + currentRegions = getCurrentRegions(env.getMasterServices().getConnection()); + LOG.info("Found {} current regions in meta table", currentRegions.size()); + setNextState(RefreshMetaState.REFRESH_META_SCAN_STORAGE); + return Flow.HAS_MORE_STATE; + } catch (IOException ioe) { + LOG.error("Failed to get current regions from meta table", ioe); + throw ioe; + } + } + + private Flow executeScanStorage(MasterProcedureEnv env) throws IOException { + try { + latestRegions = scanBackingStorage(env.getMasterServices().getConnection()); + LOG.info("Found {} regions in backing storage", latestRegions.size()); + setNextState(RefreshMetaState.REFRESH_META_PREPARE); + return Flow.HAS_MORE_STATE; + } catch (IOException ioe) { + LOG.error("Failed to scan backing storage", ioe); + throw ioe; + } + } + + private Flow executePrepare() throws IOException { + if (currentRegions == null || latestRegions == null) { + LOG.error( + "Can not execute update on null lists. " + "Meta Table Regions - {}, Storage Regions - {}", + currentRegions, latestRegions); + throw new IOException( + (currentRegions == null ? "current regions" : "latest regions") + " list is null"); + } + LOG.info("Comparing regions. Current regions: {}, Latest regions: {}", currentRegions.size(), + latestRegions.size()); + + this.newlyAddedRegions = new ArrayList<>(); + this.deletedTables = new ArrayList<>(); + + pendingMutations = prepareMutations( + currentRegions.stream() + .collect(Collectors.toMap(RegionInfo::getEncodedName, Function.identity())), + latestRegions.stream() + .collect(Collectors.toMap(RegionInfo::getEncodedName, Function.identity()))); + + if (pendingMutations.isEmpty()) { + LOG.info("RefreshMetaProcedure completed, No update needed."); + setNextState(RefreshMetaState.REFRESH_META_FINISH); + } else { + LOG.info("Prepared {} region mutations and {} tables for cleanup.", pendingMutations.size(), + deletedTables.size()); + setNextState(RefreshMetaState.REFRESH_META_APPLY); + } + return Flow.HAS_MORE_STATE; + } + + private Flow executeApply(MasterProcedureEnv env) throws ProcedureSuspendedException { + try { + if (pendingMutations != null && !pendingMutations.isEmpty()) { + applyMutations(env.getMasterServices().getConnection(), pendingMutations); + LOG.debug("RefreshMetaProcedure applied {} mutations to meta table", + pendingMutations.size()); + } + } catch (IOException ioe) { + if (retryCounter == null) { + retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration()); + } + long backoff = retryCounter.getBackoffTimeAndIncrementAttempts(); + LOG.warn("Failed to apply mutations to meta table, suspending for {} ms", backoff, ioe); + setTimeout(Math.toIntExact(backoff)); + setState(WAITING_TIMEOUT); + skipPersistence(); + throw new ProcedureSuspendedException(); + } + + if ( + (this.newlyAddedRegions != null && !this.newlyAddedRegions.isEmpty()) + || (this.deletedTables != null && !this.deletedTables.isEmpty()) + ) { + setNextState(RefreshMetaState.REFRESH_META_FOLLOWUP); + } else { + LOG.info("RefreshMetaProcedure completed. No follow-up actions were required."); + setNextState(RefreshMetaState.REFRESH_META_FINISH); + } + return Flow.HAS_MORE_STATE; + } + + private Flow executeFollowup(MasterProcedureEnv env) throws IOException { + + LOG.info("Submitting assignment for new regions: {}", this.newlyAddedRegions); + addChildProcedure(env.getAssignmentManager().createAssignProcedures(newlyAddedRegions)); + + for (TableName tableName : this.deletedTables) { + LOG.debug("Submitting deletion for empty table {}", tableName); + env.getMasterServices().getAssignmentManager().deleteTable(tableName); + env.getMasterServices().getTableStateManager().setDeletedTable(tableName); + env.getMasterServices().getTableDescriptors().remove(tableName); + } + setNextState(RefreshMetaState.REFRESH_META_FINISH); + return Flow.HAS_MORE_STATE; + } + + private Flow executeFinish(MasterProcedureEnv env) { + invalidateTableDescriptorCache(env); + LOG.info("RefreshMetaProcedure completed successfully. All follow-up actions finished."); + currentRegions = null; + latestRegions = null; + pendingMutations = null; + deletedTables = null; + newlyAddedRegions = null; + return Flow.NO_MORE_STATE; + } + + private void invalidateTableDescriptorCache(MasterProcedureEnv env) { + LOG.debug("Invalidating the table descriptor cache to ensure new tables are discovered"); + env.getMasterServices().getTableDescriptors().invalidateTableDescriptorCache(); + } + + /** + * Prepares mutations by comparing the current regions in hbase:meta with the latest regions from + * backing storage. Also populates newlyAddedRegions and deletedTables lists for follow-up + * actions. + * @param currentMap Current regions from hbase:meta + * @param latestMap Latest regions from backing storage + * @return List of mutations to apply to the meta table + * @throws IOException If there is an error creating mutations + */ + private List<Mutation> prepareMutations(Map<String, RegionInfo> currentMap, + Map<String, RegionInfo> latestMap) throws IOException { + List<Mutation> mutations = new ArrayList<>(); + + for (String regionId : Stream.concat(currentMap.keySet().stream(), latestMap.keySet().stream()) + .collect(Collectors.toSet())) { + RegionInfo currentRegion = currentMap.get(regionId); + RegionInfo latestRegion = latestMap.get(regionId); + + if (latestRegion != null) { + if (currentRegion == null || hasBoundaryChanged(currentRegion, latestRegion)) { + mutations.add(MetaTableAccessor.makePutFromRegionInfo(latestRegion)); + newlyAddedRegions.add(latestRegion); + } + } else { + mutations.add(MetaTableAccessor.makeDeleteFromRegionInfo(currentRegion, + EnvironmentEdgeManager.currentTime())); + } + } + + if (!currentMap.isEmpty() || !latestMap.isEmpty()) { + Set<TableName> currentTables = + currentMap.values().stream().map(RegionInfo::getTable).collect(Collectors.toSet()); + Set<TableName> latestTables = + latestMap.values().stream().map(RegionInfo::getTable).collect(Collectors.toSet()); + + Set<TableName> tablesToDeleteState = new HashSet<>(currentTables); + tablesToDeleteState.removeAll(latestTables); + if (!tablesToDeleteState.isEmpty()) { + LOG.warn( + "The following tables have no regions on storage and WILL BE REMOVED from the meta: {}", + tablesToDeleteState); + this.deletedTables.addAll(tablesToDeleteState); + } + + Set<TableName> tablesToRestoreState = new HashSet<>(latestTables); + tablesToRestoreState.removeAll(currentTables); + if (!tablesToRestoreState.isEmpty()) { + LOG.info("Adding missing table:state entry for recovered tables: {}", tablesToRestoreState); + for (TableName tableName : tablesToRestoreState) { + TableState tableState = new TableState(tableName, TableState.State.ENABLED); + mutations.add(MetaTableAccessor.makePutFromTableState(tableState, + EnvironmentEdgeManager.currentTime())); + } + } + } + return mutations; + } + + private void applyMutations(Connection connection, List<Mutation> mutations) throws IOException { + List<List<Mutation>> chunks = Lists.partition(mutations, MUTATION_BATCH_SIZE); + + for (int i = 0; i < chunks.size(); i++) { + List<Mutation> chunk = chunks.get(i); + + List<Put> puts = + chunk.stream().filter(m -> m instanceof Put).map(m -> (Put) m).collect(Collectors.toList()); + + List<Delete> deletes = chunk.stream().filter(m -> m instanceof Delete).map(m -> (Delete) m) + .collect(Collectors.toList()); + + if (!puts.isEmpty()) { + MetaTableAccessor.putsToMetaTable(connection, puts); + } + if (!deletes.isEmpty()) { + MetaTableAccessor.deleteFromMetaTable(connection, deletes); + } + LOG.debug("Successfully processed batch {}/{}", i + 1, chunks.size()); + } + } + + boolean hasBoundaryChanged(RegionInfo region1, RegionInfo region2) { + return !Arrays.equals(region1.getStartKey(), region2.getStartKey()) + || !Arrays.equals(region1.getEndKey(), region2.getEndKey()); + } + + /** + * Scans the backing storage for all regions and returns a list of RegionInfo objects. This method + * scans the filesystem for region directories and reads their .regioninfo files. + * @param connection The HBase connection to use. + * @return List of RegionInfo objects found in the backing storage. + * @throws IOException If there is an error accessing the filesystem or reading region info files. + */ + List<RegionInfo> scanBackingStorage(Connection connection) throws IOException { + List<RegionInfo> regions = new ArrayList<>(); + Configuration conf = connection.getConfiguration(); + FileSystem fs = FileSystem.get(conf); + Path rootDir = CommonFSUtils.getRootDir(conf); + Path dataDir = new Path(rootDir, HConstants.BASE_NAMESPACE_DIR); + + LOG.info("Scanning backing storage under: {}", dataDir); + + if (!fs.exists(dataDir)) { + LOG.warn("Data directory does not exist: {}", dataDir); + return regions; + } + + FileStatus[] namespaceDirs = + fs.listStatus(dataDir, path -> !path.getName().matches(HIDDEN_DIR_PATTERN)); + LOG.debug("Found {} namespace directories in data dir", Arrays.stream(namespaceDirs).toList()); + + for (FileStatus nsDir : namespaceDirs) { + String namespaceName = nsDir.getPath().getName(); + if (NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR.equals(namespaceName)) { + LOG.info("Skipping system namespace {}", namespaceName); + continue; + } + try { + List<RegionInfo> namespaceRegions = scanTablesInNamespace(fs, nsDir.getPath()); + regions.addAll(namespaceRegions); + LOG.debug("Found {} regions in namespace {}", namespaceRegions.size(), + nsDir.getPath().getName()); + } catch (IOException e) { + LOG.error("Failed to scan namespace directory: {}", nsDir.getPath(), e); + } + } + LOG.info("Scanned backing storage and found {} regions", regions.size()); + return regions; + } + + private List<RegionInfo> scanTablesInNamespace(FileSystem fs, Path namespacePath) + throws IOException { + LOG.debug("Scanning namespace {}", namespacePath.getName()); + List<Path> tableDirs = FSUtils.getLocalTableDirs(fs, namespacePath); + + return tableDirs.parallelStream().flatMap(tableDir -> { + try { + List<RegionInfo> tableRegions = scanRegionsInTable(fs, FSUtils.getRegionDirs(fs, tableDir)); + LOG.debug("Found {} regions in table {} in namespace {}", tableRegions.size(), + tableDir.getName(), namespacePath.getName()); + return tableRegions.stream(); + } catch (IOException e) { + LOG.warn("Failed to scan table directory: {} for namespace {}", tableDir, + namespacePath.getName(), e); + return Stream.empty(); + } + }).toList(); + } + + private List<RegionInfo> scanRegionsInTable(FileSystem fs, List<Path> regionDirs) + throws IOException { + return regionDirs.stream().map(regionDir -> { + String encodedRegionName = regionDir.getName(); + try { + Path regionInfoPath = new Path(regionDir, HRegionFileSystem.REGION_INFO_FILE); + if (fs.exists(regionInfoPath)) { + RegionInfo ri = readRegionInfo(fs, regionInfoPath); + if (ri != null && isValidRegionInfo(ri, encodedRegionName)) { + LOG.debug("Found region: {} -> {}", encodedRegionName, ri.getRegionNameAsString()); + return ri; + } else { + LOG.warn("Invalid RegionInfo in file: {}", regionInfoPath); + } + } else { + LOG.debug("No .regioninfo file found in region directory: {}", regionDir); + } + } catch (Exception e) { + LOG.warn("Failed to read region info from directory: {}", encodedRegionName, e); + } + return null; + }).filter(Objects::nonNull).collect(Collectors.toList()); + } + + private boolean isValidRegionInfo(RegionInfo regionInfo, String expectedEncodedName) { + if (!expectedEncodedName.equals(regionInfo.getEncodedName())) { + LOG.warn("RegionInfo encoded name mismatch: directory={}, regioninfo={}", expectedEncodedName, + regionInfo.getEncodedName()); + return false; + } + return true; + } + + private RegionInfo readRegionInfo(FileSystem fs, Path regionInfoPath) { + try (FSDataInputStream inputStream = fs.open(regionInfoPath); + DataInputStream dataInputStream = new DataInputStream(inputStream)) { + return RegionInfo.parseFrom(dataInputStream); + } catch (Exception e) { + LOG.warn("Failed to parse .regioninfo file: {}", regionInfoPath, e); + return null; + } + } + + /** + * Retrieves the current regions from the hbase:meta table. + * @param connection The HBase connection to use. + * @return List of RegionInfo objects representing the current regions in meta. + * @throws IOException If there is an error accessing the meta table. + */ + List<RegionInfo> getCurrentRegions(Connection connection) throws IOException { + LOG.info("Getting all regions from meta table"); + return MetaTableAccessor.getAllRegions(connection, true); + } + + @Override + protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) { + setState( + org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState.RUNNABLE); + env.getProcedureScheduler().addFront(this); + return false; + } + + @Override + protected void rollbackState(MasterProcedureEnv env, RefreshMetaState refreshMetaState) + throws IOException, InterruptedException { + // No specific rollback needed as it is generally safe to re-run the procedure. + LOG.trace("Rollback not implemented for RefreshMetaProcedure state: {}", refreshMetaState); + } + + @Override + protected RefreshMetaState getState(int stateId) { + return RefreshMetaState.forNumber(stateId); + } + + @Override + protected int getStateId(RefreshMetaState refreshMetaState) { + return refreshMetaState.getNumber(); + } + + @Override + protected RefreshMetaState getInitialState() { + return RefreshMetaState.REFRESH_META_INIT; + } + + @Override + protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { + // For now, we'll use a simple approach since we do not need to store any state data + RefreshMetaStateData.Builder builder = RefreshMetaStateData.newBuilder(); + serializer.serialize(builder.build()); + } + + @Override + protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { + // For now, we'll use a simple approach since we do not need to store any state data + serializer.deserialize(RefreshMetaStateData.class); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/ReadOnlyController.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/ReadOnlyController.java index 13f458299b9..5b7ab67df0b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/ReadOnlyController.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/ReadOnlyController.java @@ -109,6 +109,9 @@ public class ReadOnlyController implements MasterCoprocessor, RegionCoprocessor, @Override public void preDelete(ObserverContext<? extends RegionCoprocessorEnvironment> c, Delete delete, WALEdit edit) throws IOException { + if (c.getEnvironment().getRegionInfo().getTable().isSystemTable()) { + return; + } internalReadOnlyGuard(); } @@ -166,7 +169,9 @@ public class ReadOnlyController implements MasterCoprocessor, RegionCoprocessor, public boolean preCheckAndDelete(ObserverContext<? extends RegionCoprocessorEnvironment> c, byte[] row, byte[] family, byte[] qualifier, CompareOperator op, ByteArrayComparable comparator, Delete delete, boolean result) throws IOException { - internalReadOnlyGuard(); + if (!c.getEnvironment().getRegionInfo().getTable().isSystemTable()) { + internalReadOnlyGuard(); + } return RegionObserver.super.preCheckAndDelete(c, row, family, qualifier, op, comparator, delete, result); } @@ -174,7 +179,9 @@ public class ReadOnlyController implements MasterCoprocessor, RegionCoprocessor, @Override public boolean preCheckAndDelete(ObserverContext<? extends RegionCoprocessorEnvironment> c, byte[] row, Filter filter, Delete delete, boolean result) throws IOException { - internalReadOnlyGuard(); + if (!c.getEnvironment().getRegionInfo().getTable().isSystemTable()) { + internalReadOnlyGuard(); + } return RegionObserver.super.preCheckAndDelete(c, row, filter, delete, result); } @@ -183,7 +190,9 @@ public class ReadOnlyController implements MasterCoprocessor, RegionCoprocessor, ObserverContext<? extends RegionCoprocessorEnvironment> c, byte[] row, byte[] family, byte[] qualifier, CompareOperator op, ByteArrayComparable comparator, Delete delete, boolean result) throws IOException { - internalReadOnlyGuard(); + if (!c.getEnvironment().getRegionInfo().getTable().isSystemTable()) { + internalReadOnlyGuard(); + } return RegionObserver.super.preCheckAndDeleteAfterRowLock(c, row, family, qualifier, op, comparator, delete, result); } @@ -192,7 +201,9 @@ public class ReadOnlyController implements MasterCoprocessor, RegionCoprocessor, public boolean preCheckAndDeleteAfterRowLock( ObserverContext<? extends RegionCoprocessorEnvironment> c, byte[] row, Filter filter, Delete delete, boolean result) throws IOException { - internalReadOnlyGuard(); + if (!c.getEnvironment().getRegionInfo().getTable().isSystemTable()) { + internalReadOnlyGuard(); + } return RegionObserver.super.preCheckAndDeleteAfterRowLock(c, row, filter, delete, result); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java index 75bf721ef41..b32fad50f0f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java @@ -706,4 +706,14 @@ public class FSTableDescriptors implements TableDescriptors { } return writeTableDescriptor(fs, htd, tableDir, opt.map(Pair::getFirst).orElse(null)) != null; } + + /** + * Invalidates the table descriptor cache. + */ + @Override + public void invalidateTableDescriptorCache() { + LOG.info("Invalidating table descriptor cache."); + this.fsvisited = false; + this.cache.clear(); + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRefreshMetaProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRefreshMetaProcedure.java new file mode 100644 index 00000000000..e419d1df6ad --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRefreshMetaProcedure.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.procedure; + +import static org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.assertProcNotFailed; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.util.List; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.After; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({ MasterTests.class, MediumTests.class }) +public class TestRefreshMetaProcedure { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRefreshMetaProcedure.class); + + private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); + private ProcedureExecutor<MasterProcedureEnv> procExecutor; + List<RegionInfo> activeRegions; + TableName tableName = TableName.valueOf("testRefreshMeta"); + + @Before + public void setup() throws Exception { + TEST_UTIL.getConfiguration().set("USE_META_REPLICAS", "false"); + TEST_UTIL.startMiniCluster(); + procExecutor = TEST_UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(); + byte[][] splitKeys = + new byte[][] { Bytes.toBytes("split1"), Bytes.toBytes("split2"), Bytes.toBytes("split3") }; + TEST_UTIL.createTable(tableName, Bytes.toBytes("cf"), splitKeys); + TEST_UTIL.waitTableAvailable(tableName); + TEST_UTIL.getAdmin().flush(tableName); + activeRegions = TEST_UTIL.getAdmin().getRegions(tableName); + assertFalse(activeRegions.isEmpty()); + } + + @After + public void tearDown() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Test + public void testRefreshMetaProcedureExecutesSuccessfully() { + RefreshMetaProcedure procedure = new RefreshMetaProcedure(procExecutor.getEnvironment()); + long procId = procExecutor.submitProcedure(procedure); + ProcedureTestingUtility.waitProcedure(procExecutor, procId); + assertProcNotFailed(procExecutor.getResult(procId)); + } + + @Test + public void testGetCurrentRegions() throws Exception { + RefreshMetaProcedure procedure = new RefreshMetaProcedure(procExecutor.getEnvironment()); + List<RegionInfo> regions = procedure.getCurrentRegions(TEST_UTIL.getConnection()); + assertFalse("Should have found regions in meta", regions.isEmpty()); + assertTrue("Should include test table region", + regions.stream().anyMatch(r -> r.getTable().getNameAsString().equals("testRefreshMeta"))); + } + + @Test + public void testScanBackingStorage() throws Exception { + RefreshMetaProcedure procedure = new RefreshMetaProcedure(procExecutor.getEnvironment()); + + List<RegionInfo> fsRegions = procedure.scanBackingStorage(TEST_UTIL.getConnection()); + + assertTrue("All regions from meta should be found in the storage", + activeRegions.stream().allMatch(reg -> fsRegions.stream() + .anyMatch(r -> r.getRegionNameAsString().equals(reg.getRegionNameAsString())))); + } + + @Test + public void testHasBoundaryChanged() throws Exception { + RefreshMetaProcedure procedure = new RefreshMetaProcedure(procExecutor.getEnvironment()); + RegionInfo region1 = RegionInfoBuilder.newBuilder(tableName) + .setStartKey(Bytes.toBytes("start1")).setEndKey(Bytes.toBytes("end1")).build(); + + RegionInfo region2 = RegionInfoBuilder.newBuilder(tableName) + .setStartKey(Bytes.toBytes("start2")).setEndKey(Bytes.toBytes("end1")).build(); + + RegionInfo region3 = RegionInfoBuilder.newBuilder(tableName) + .setStartKey(Bytes.toBytes("start1")).setEndKey(Bytes.toBytes("end2")).build(); + + assertTrue("Different start keys should have been detected", + procedure.hasBoundaryChanged(region1, region2)); + + assertTrue("Different end keys should have been detected", + procedure.hasBoundaryChanged(region1, region3)); + + assertFalse("Identical boundaries should not have been identified", + procedure.hasBoundaryChanged(region1, region1)); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRefreshMetaProcedureIntegration.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRefreshMetaProcedureIntegration.java new file mode 100644 index 00000000000..917c12c6513 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestRefreshMetaProcedureIntegration.java @@ -0,0 +1,285 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.procedure; + +import static org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.assertProcNotFailed; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Stream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.MetaTableAccessor; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.client.TableState; +import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.security.access.ReadOnlyController; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CommonFSUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({ MasterTests.class, LargeTests.class }) +public class TestRefreshMetaProcedureIntegration { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRefreshMetaProcedureIntegration.class); + + private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); + private Admin admin; + private ProcedureExecutor<MasterProcedureEnv> procExecutor; + private HMaster master; + private HRegionServer regionServer; + + @Before + public void setup() throws Exception { + // Configure the cluster with ReadOnlyController + TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, + ReadOnlyController.class.getName()); + TEST_UTIL.getConfiguration().set(CoprocessorHost.REGIONSERVER_COPROCESSOR_CONF_KEY, + ReadOnlyController.class.getName()); + TEST_UTIL.getConfiguration().set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, + ReadOnlyController.class.getName()); + + // Start in active mode + TEST_UTIL.getConfiguration().setBoolean(HConstants.HBASE_GLOBAL_READONLY_ENABLED_KEY, false); + + TEST_UTIL.startMiniCluster(); + admin = TEST_UTIL.getAdmin(); + procExecutor = TEST_UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(); + master = TEST_UTIL.getHBaseCluster().getMaster(); + regionServer = TEST_UTIL.getHBaseCluster().getRegionServerThreads().get(0).getRegionServer(); + } + + @After + public void tearDown() throws Exception { + if (admin != null) { + admin.close(); + } + TEST_UTIL.shutdownMiniCluster(); + } + + @Test + public void testRestoreMissingRegionInMeta() throws Exception { + + TableName tableName = TableName.valueOf("replicaTestTable"); + + createTableWithData(tableName); + + List<RegionInfo> activeRegions = admin.getRegions(tableName); + assertTrue("Should have at least 2 regions after split", activeRegions.size() >= 2); + + Table metaTable = TEST_UTIL.getConnection().getTable(TableName.META_TABLE_NAME); + RegionInfo regionToRemove = activeRegions.get(0); + admin.unassign(regionToRemove.getRegionName(), false); + Thread.sleep(1000); + + org.apache.hadoop.hbase.client.Delete delete = + new org.apache.hadoop.hbase.client.Delete(regionToRemove.getRegionName()); + metaTable.delete(delete); + metaTable.close(); + + List<RegionInfo> regionsAfterDrift = admin.getRegions(tableName); + assertEquals("Should have one less region in meta after simulating drift", + activeRegions.size() - 1, regionsAfterDrift.size()); + + setReadOnlyMode(true); + + boolean writeBlocked = false; + try { + Table readOnlyTable = TEST_UTIL.getConnection().getTable(tableName); + Put testPut = new Put(Bytes.toBytes("test_readonly")); + testPut.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("qual"), Bytes.toBytes("should_fail")); + readOnlyTable.put(testPut); + readOnlyTable.close(); + } catch (Exception e) { + if (e.getMessage().contains("Operation not allowed in Read-Only Mode")) { + writeBlocked = true; + } + } + assertTrue("Write operations should be blocked in read-only mode", writeBlocked); + + Long procId = admin.refreshMeta(); + + waitForProcedureCompletion(procId); + + List<RegionInfo> regionsAfterRefresh = admin.getRegions(tableName); + assertEquals("Missing regions should be restored by refresh_meta", activeRegions.size(), + regionsAfterRefresh.size()); + + boolean regionRestored = regionsAfterRefresh.stream() + .anyMatch(r -> r.getRegionNameAsString().equals(regionToRemove.getRegionNameAsString())); + assertTrue("Missing region should be restored by refresh_meta", regionRestored); + + setReadOnlyMode(false); + + Table activeTable = TEST_UTIL.getConnection().getTable(tableName); + Put testPut = new Put(Bytes.toBytes("test_active_again")); + testPut.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("qual"), + Bytes.toBytes("active_mode_again")); + activeTable.put(testPut); + activeTable.close(); + } + + @Test + public void testPhantomTableCleanup() throws Exception { + TableName table1 = TableName.valueOf("table1"); + TableName phantomTable = TableName.valueOf("phantomTable"); + createTableWithData(table1); + createTableWithData(phantomTable); + + assertTrue("Table1 should have multiple regions", admin.getRegions(table1).size() >= 2); + assertTrue("phantomTable should have multiple regions", + admin.getRegions(phantomTable).size() >= 2); + + deleteTableFromFilesystem(phantomTable); + List<TableName> tablesBeforeRefresh = Arrays.asList(admin.listTableNames()); + assertTrue("phantomTable should still be listed before refresh_meta", + tablesBeforeRefresh.contains(phantomTable)); + assertTrue("Table1 should still be listed", tablesBeforeRefresh.contains(table1)); + + setReadOnlyMode(true); + Long procId = admin.refreshMeta(); + waitForProcedureCompletion(procId); + + List<TableName> tablesAfterRefresh = Arrays.asList(admin.listTableNames()); + + assertFalse("phantomTable should be removed after refresh_meta", + tablesAfterRefresh.contains(phantomTable)); + assertTrue("Table1 should still be listed", tablesAfterRefresh.contains(table1)); + assertTrue("phantomTable should have no regions after refresh_meta", + admin.getRegions(phantomTable).isEmpty()); + setReadOnlyMode(false); + } + + @Test + public void testRestoreTableStateForOrphanRegions() throws Exception { + TableName tableName = TableName.valueOf("t1"); + createTableInFilesystem(tableName); + + assertEquals("No tables should exist", 0, + Stream.of(admin.listTableNames()).filter(tn -> tn.equals(tableName)).count()); + + setReadOnlyMode(true); + Long procId = admin.refreshMeta(); + waitForProcedureCompletion(procId); + + TableState tableState = MetaTableAccessor.getTableState(admin.getConnection(), tableName); + assert tableState != null; + assertEquals("Table state should be ENABLED", TableState.State.ENABLED, tableState.getState()); + assertEquals("The list should show the new table from the FS", 1, + Stream.of(admin.listTableNames()).filter(tn -> tn.equals(tableName)).count()); + assertFalse("Should have at least 1 region", admin.getRegions(tableName).isEmpty()); + setReadOnlyMode(false); + } + + private void createTableInFilesystem(TableName tableName) throws IOException { + FileSystem fs = TEST_UTIL.getTestFileSystem(); + Path rootDir = CommonFSUtils.getRootDir(TEST_UTIL.getConfiguration()); + Path tableDir = CommonFSUtils.getTableDir(rootDir, tableName); + fs.mkdirs(tableDir); + + TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName); + TEST_UTIL.getHBaseCluster().getMaster().getTableDescriptors() + .update(builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of("cf1")).build(), false); + + Path regionDir = new Path(tableDir, "dab6d1e1c88787c13b97647f11b2c907"); + Path regionInfoFile = new Path(regionDir, HRegionFileSystem.REGION_INFO_FILE); + fs.mkdirs(regionDir); + + RegionInfo regionInfo = RegionInfoBuilder.newBuilder(tableName).setStartKey(new byte[0]) + .setEndKey(new byte[0]).setRegionId(1757100253228L).build(); + byte[] regionInfoContent = RegionInfo.toDelimitedByteArray(regionInfo); + try (FSDataOutputStream out = fs.create(regionInfoFile, true)) { + out.write(regionInfoContent); + } + } + + private void deleteTableFromFilesystem(TableName tableName) throws IOException { + FileSystem fs = TEST_UTIL.getTestFileSystem(); + Path rootDir = CommonFSUtils.getRootDir(TEST_UTIL.getConfiguration()); + Path tableDir = CommonFSUtils.getTableDir(rootDir, tableName); + if (fs.exists(tableDir)) { + fs.delete(tableDir, true); + } + } + + private void createTableWithData(TableName tableName) throws Exception { + TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName); + builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of("cf1")); + byte[] splitKeyBytes = Bytes.toBytes("split_key"); + admin.createTable(builder.build(), new byte[][] { splitKeyBytes }); + TEST_UTIL.waitTableAvailable(tableName); + try (Table table = TEST_UTIL.getConnection().getTable(tableName)) { + for (int i = 0; i < 100; i++) { + Put put = new Put(Bytes.toBytes("row_" + String.format("%05d", i))); + put.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("qual"), Bytes.toBytes("value_" + i)); + table.put(put); + } + } + admin.flush(tableName); + } + + private void waitForProcedureCompletion(Long procId) { + assertTrue("Procedure ID should be positive", procId > 0); + TEST_UTIL.waitFor(1000, () -> { + try { + return procExecutor.isFinished(procId); + } catch (Exception e) { + return false; + } + }); + assertProcNotFailed(procExecutor.getResult(procId)); + } + + private void setReadOnlyMode(boolean isReadOnly) { + TEST_UTIL.getConfiguration().setBoolean(HConstants.HBASE_GLOBAL_READONLY_ENABLED_KEY, + isReadOnly); + notifyConfigurationObservers(); + } + + private void notifyConfigurationObservers() { + master.getConfigurationManager().notifyAllObservers(TEST_UTIL.getConfiguration()); + regionServer.getConfigurationManager().notifyAllObservers(TEST_UTIL.getConfiguration()); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/rsgroup/VerifyingRSGroupAdmin.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/rsgroup/VerifyingRSGroupAdmin.java index b0e3eb051fc..1ac785e7276 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/rsgroup/VerifyingRSGroupAdmin.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/rsgroup/VerifyingRSGroupAdmin.java @@ -1011,4 +1011,8 @@ public class VerifyingRSGroupAdmin implements Admin, Closeable { return admin.isReplicationPeerModificationEnabled(); } + @Override + public Long refreshMeta() throws IOException { + return admin.refreshMeta(); + } } diff --git a/hbase-shell/src/main/ruby/hbase/admin.rb b/hbase-shell/src/main/ruby/hbase/admin.rb index 8827db0daef..60f1c72cda2 100644 --- a/hbase-shell/src/main/ruby/hbase/admin.rb +++ b/hbase-shell/src/main/ruby/hbase/admin.rb @@ -1927,6 +1927,12 @@ module Hbase def list_tables_by_state(isEnabled) @admin.listTableNamesByState(isEnabled).map(&:getNameAsString) end + + #---------------------------------------------------------------------------------------------- + # Refresh hbase:meta table by syncing with the backing storage + def refresh_meta() + @admin.refreshMeta() + end end # rubocop:enable Metrics/ClassLength end diff --git a/hbase-shell/src/main/ruby/shell.rb b/hbase-shell/src/main/ruby/shell.rb index c408bf06b22..aadf2b255f1 100644 --- a/hbase-shell/src/main/ruby/shell.rb +++ b/hbase-shell/src/main/ruby/shell.rb @@ -496,6 +496,7 @@ Shell.load_command_group( decommission_regionservers recommission_regionserver truncate_region + refresh_meta ], # TODO: remove older hlog_roll command aliases: { diff --git a/hbase-shell/src/main/ruby/shell/commands/refresh_meta.rb b/hbase-shell/src/main/ruby/shell/commands/refresh_meta.rb new file mode 100644 index 00000000000..8c5acb49dc5 --- /dev/null +++ b/hbase-shell/src/main/ruby/shell/commands/refresh_meta.rb @@ -0,0 +1,43 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +module Shell + module Commands + class RefreshMeta < Command + def help + <<-EOF +Refresh the hbase:meta table by syncing with backing storage. +This command is used in Read Replica clusters to pick up new +tables and regions from the shared storage. +Examples: + + hbase> refresh_meta + +The command returns a procedure ID that can be used to track the progress +of the meta table refresh operation. +EOF + end + + def command + proc_id = admin.refresh_meta + formatter.row(["Refresh meta procedure submitted. Procedure ID: #{proc_id}"]) + proc_id + end + end + end +end diff --git a/hbase-thrift/src/main/java/org/apache/hadoop/hbase/thrift2/client/ThriftAdmin.java b/hbase-thrift/src/main/java/org/apache/hadoop/hbase/thrift2/client/ThriftAdmin.java index 76a8b41481b..cfd53156cc4 100644 --- a/hbase-thrift/src/main/java/org/apache/hadoop/hbase/thrift2/client/ThriftAdmin.java +++ b/hbase-thrift/src/main/java/org/apache/hadoop/hbase/thrift2/client/ThriftAdmin.java @@ -1374,6 +1374,11 @@ public class ThriftAdmin implements Admin { throw new NotImplementedException("restoreBackupSystemTable not supported in ThriftAdmin"); } + @Override + public Long refreshMeta() throws IOException { + throw new NotImplementedException("refreshMeta not supported in ThriftAdmin"); + } + @Override public boolean replicationPeerModificationSwitch(boolean on, boolean drainProcedures) throws IOException {
